42 #include "../../App/ApplicationTools.h" 54 const std::string& prefix)
throw (
Exception):
57 hiddenAlphabet_(hiddenAlphabet),
58 transitionMatrix_(transitionMatrix),
59 emissionProbabilities_(emissionProbabilities),
64 backLikelihoodUpToDate_(
false),
73 if (!hiddenAlphabet)
throw Exception(
"RescaledHmmLikelihood: null pointer passed for HmmStateAlphabet.");
74 if (!transitionMatrix)
throw Exception(
"RescaledHmmLikelihood: null pointer passed for HmmTransitionMatrix.");
75 if (!emissionProbabilities)
throw Exception(
"RescaledHmmLikelihood: null pointer passed for HmmEmissionProbabilities.");
76 if (!hiddenAlphabet_->worksWith(transitionMatrix->getHmmStateAlphabet()))
77 throw Exception(
"RescaledHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
78 if (!hiddenAlphabet_->worksWith(emissionProbabilities->getHmmStateAlphabet()))
79 throw Exception(
"RescaledHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
80 nbStates_ = hiddenAlphabet_->getNumberOfStates();
81 nbSites_ = emissionProbabilities_->getNumberOfPositions();
84 addParameters_(hiddenAlphabet_->getParameters());
85 addParameters_(transitionMatrix_->getParameters());
86 addParameters_(emissionProbabilities_->getParameters());
89 likelihood_.resize(nbSites_ * nbStates_);
91 scales_.resize(nbSites_);
99 bool alphabetChanged = hiddenAlphabet_->matchParametersValues(pl);
100 bool transitionsChanged = transitionMatrix_->matchParametersValues(pl);
101 bool emissionChanged = emissionProbabilities_->matchParametersValues(pl);
105 if (alphabetChanged && !transitionsChanged) transitionMatrix_->setParametersValues(transitionMatrix_->getParameters());
106 if (alphabetChanged && !emissionChanged) emissionProbabilities_->setParametersValues(emissionProbabilities_->getParameters());
109 backLikelihoodUpToDate_=
false;
117 vector<double> tmp(nbStates_);
118 vector<double> lScales(nbSites_);
119 vector<double> trans(nbStates_ * nbStates_);
122 for (
size_t i = 0; i < nbStates_; i++)
124 size_t ii = i * nbStates_;
125 for (
size_t j = 0; j < nbStates_; j++) {
126 trans[ii + j] = transitionMatrix_->Pij(j, i);
127 if (isnan(trans[ii + j]))
128 throw Exception(
"RescaledHmmLikelihood::computeForward_. NaN transition probability");
129 if (trans[ii + j] < 0)
136 const vector<double>* emissions = &(*emissionProbabilities_)(0);
137 for (
size_t j = 0; j < nbStates_; j++)
139 size_t jj = j * nbStates_;
141 for (
size_t k = 0; k < nbStates_; k++)
143 x += trans[k + jj] * transitionMatrix_->getEquilibriumFrequencies()[k];
146 tmp[j] = (*emissions)[j] * x;
148 scales_[0] += tmp[j];
150 for (
size_t j = 0; j < nbStates_; j++)
152 likelihood_[j] = tmp[j] / scales_[0];
154 lScales[0] = log(scales_[0]);
157 size_t nextBrkPt = nbSites_;
158 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
159 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
162 for (
size_t i = 1; i < nbSites_; i++)
164 size_t ii = i * nbStates_;
165 size_t iip = (i - 1) * nbStates_;
167 emissions = &(*emissionProbabilities_)(i);
170 for (
size_t j = 0; j < nbStates_; j++)
172 size_t jj = j * nbStates_;
174 for (
size_t k = 0; k < nbStates_; k++)
176 a = trans[jj + k] * likelihood_[iip + k];
184 tmp[j] = (*emissions)[j] * x;
187 (*
ApplicationTools::warning <<
"Negative probability at " << i <<
", state " << j <<
": " << (*emissions)[j] <<
"\t" << x).endLine();
190 scales_[i] += tmp[j];
195 for (
size_t j = 0; j < nbStates_; j++)
197 size_t jj = j * nbStates_;
199 for (
size_t k = 0; k < nbStates_; k++)
201 a = trans[jj + k] * transitionMatrix_->getEquilibriumFrequencies()[k];
209 tmp[j] = (*emissions)[j] * x;
215 scales_[i] += tmp[j];
218 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
219 else nextBrkPt = nbSites_;
222 for (
size_t j = 0; j < nbStates_; j++)
224 if (scales_[i] > 0) likelihood_[ii + j] = tmp[j] / scales_[i];
225 else likelihood_[ii + j] = 0;
227 lScales[i] = log(scales_[i]);
230 sort(lScales.begin(), lScales.end(), cmp);
232 for (
size_t i = 0; i < nbSites_; ++i)
234 logLik_ += lScales[i];
242 if (backLikelihood_.size()==0)
244 backLikelihood_.resize(nbSites_);
245 for (
size_t i=0;i<nbSites_;i++)
246 backLikelihood_[i].resize(nbStates_);
252 vector<double> trans(nbStates_ * nbStates_);
253 for (
size_t i = 0; i < nbStates_; i++)
255 size_t ii = i * nbStates_;
256 for (
size_t j = 0; j < nbStates_; j++)
257 trans[ii + j] = transitionMatrix_->Pij(i, j);
262 const vector<double>* emissions = 0;
263 size_t nextBrkPt = 0;
264 vector<size_t>::const_reverse_iterator bpIt = breakPoints_.rbegin();
265 if (bpIt != breakPoints_.rend()) nextBrkPt = *bpIt;
267 for (
size_t j = 0; j < nbStates_; j++)
270 backLikelihood_[nbSites_ - 1][j] = 1.;
274 for (
size_t i = nbSites_ - 1; i > 0; i--)
276 emissions = &(*emissionProbabilities_)(i);
279 for (
size_t j = 0; j < nbStates_; j++)
282 size_t jj = j * nbStates_;
283 for (
size_t k = 0; k < nbStates_; k++)
285 x += (*emissions)[k] * trans[jj + k] * backLikelihood_[i][k];
287 backLikelihood_[i-1][j] = x / scales_[i];
292 for (
size_t j = 0; j < nbStates_; j++)
294 backLikelihood_[i-1][j] = 1.;
297 if (bpIt != breakPoints_.rend()) nextBrkPt = *bpIt;
302 backLikelihoodUpToDate_=
true;
309 Vdouble probs=getHiddenStatesPosteriorProbabilitiesForASite(site);
311 for (
size_t i=0;i<nbStates_;i++)
312 x+=probs[i]*(*emissionProbabilities_)(site,i);
319 std::vector< std::vector<double> > vv;
320 getHiddenStatesPosteriorProbabilities(vv);
323 for (
size_t i=0;i<nbSites_;i++)
326 for (
size_t j=0;j<nbStates_;j++)
327 ret[i]+=vv[i][j]*(*emissionProbabilities_)(i,j);
337 if (!backLikelihoodUpToDate_)
342 for (
size_t j = 0; j < nbStates_; j++)
344 probs[j] = likelihood_[site * nbStates_ + j] * backLikelihood_[site][j];
353 size_t offset = append ? probs.size() : 0;
354 probs.resize(offset + nbSites_);
355 for (
size_t i = 0; i < nbSites_; i++)
357 probs[offset + i].resize(nbStates_);
360 if (!backLikelihoodUpToDate_)
363 for (
size_t i = 0; i < nbSites_; i++)
365 size_t ii = i * nbStates_;
366 for (
size_t j = 0; j < nbStates_; j++)
368 probs[offset + i][j] = likelihood_[ii + j] * backLikelihood_[i][j];
378 if (dLikelihood_.size()==0){
379 dLikelihood_.resize(nbSites_);
380 for (
size_t i=0;i<nbSites_;i++)
381 dLikelihood_[i].resize(nbStates_);
383 if (dScales_.size()==0)
384 dScales_.resize(nbSites_);
387 vector<double> tmp(nbStates_), dTmp(nbStates_);
388 vector<double> dLScales(nbSites_);
395 const vector<double>* emissions = &(*emissionProbabilities_)(0);
396 const vector<double>* dEmissions = &emissionProbabilities_->getDEmissionProbabilities(0);
398 for (
size_t j = 0; j < nbStates_; j++)
400 dTmp[j] = (*dEmissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
401 tmp[j] = (*emissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
403 dScales_[0] += dTmp[j];
406 dLScales[0]=dScales_[0]/scales_[0];
409 for (
size_t j = 0; j < nbStates_; j++)
410 dLikelihood_[0][j] = (dTmp[j] * scales_[0] - tmp[j] * dScales_[0]) / pow(scales_[0],2);
414 size_t nextBrkPt = nbSites_;
415 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
416 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
418 for (
size_t i = 1; i < nbSites_; i++)
420 size_t iip = (i - 1) * nbStates_;
424 emissions = &(*emissionProbabilities_)(i);
425 dEmissions = &emissionProbabilities_->getDEmissionProbabilities(i);
429 for (
size_t j = 0; j < nbStates_; j++)
432 for (
size_t k = 0; k < nbStates_; k++)
433 x += trans(k,j) * likelihood_[iip + k];
435 tmp[j] = (*emissions)[j] * x;
436 dTmp[j] = (*dEmissions)[j] * x + (*emissions)[j] *
VectorTools::sum(trans.getCol(j) * dLikelihood_[i-1]);
438 dScales_[i] += dTmp[j];
443 for (
size_t j = 0; j < nbStates_; j++)
445 dTmp[j] = (*dEmissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
446 tmp[j] = (*emissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
448 dScales_[i] += dTmp[j];
452 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
453 else nextBrkPt = nbSites_;
456 dLScales[i]=dScales_[i]/scales_[i];
458 for (
size_t j = 0; j < nbStates_; j++)
459 dLikelihood_[i][j] = (dTmp[j] * scales_[i] - tmp[j] * dScales_[i]) / pow(scales_[i],2);
463 sort(dLScales.begin(), dLScales.end(), cmp);
465 for (
size_t i = 0; i < nbSites_; ++i)
467 dLogLik_ += dLScales[i];
477 if (d2Likelihood_.size()==0){
478 d2Likelihood_.resize(nbSites_);
479 for (
size_t i=0;i<nbSites_;i++)
480 d2Likelihood_[i].resize(nbStates_);
482 if (d2Scales_.size()==0)
483 d2Scales_.resize(nbSites_);
486 vector<double> tmp(nbStates_), dTmp(nbStates_), d2Tmp(nbStates_);
487 vector<double> d2LScales(nbSites_);
494 const vector<double>* emissions = &(*emissionProbabilities_)(0);
495 const vector<double>* dEmissions = &emissionProbabilities_->getDEmissionProbabilities(0);
496 const vector<double>* d2Emissions = &emissionProbabilities_->getD2EmissionProbabilities(0);
498 for (
size_t j = 0; j < nbStates_; j++)
500 tmp[j] = (*emissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
501 dTmp[j] = (*dEmissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
502 d2Tmp[j] = (*d2Emissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
504 d2Scales_[0] += d2Tmp[j];
507 d2LScales[0]=d2Scales_[0]/scales_[0]-pow(dScales_[0]/scales_[0],2);
509 for (
size_t j = 0; j < nbStates_; j++)
510 d2Likelihood_[0][j] = d2Tmp[j] / scales_[0] - (d2Scales_[0] * tmp[j] + 2 * dScales_[0] * dTmp[j]) / pow(scales_[0],2)
511 + 2 * pow(dScales_[0],2) * tmp[j] / pow(scales_[0],3);
515 size_t nextBrkPt = nbSites_;
516 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
517 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
519 for (
size_t i = 1; i < nbSites_; i++)
523 emissions = &(*emissionProbabilities_)(i);
524 dEmissions = &emissionProbabilities_->getDEmissionProbabilities(i);
525 d2Emissions = &emissionProbabilities_->getD2EmissionProbabilities(i);
529 size_t iip = (i - 1) * nbStates_;
531 for (
size_t j = 0; j < nbStates_; j++)
534 for (
size_t k = 0; k < nbStates_; k++)
535 x += trans(k,j) * likelihood_[iip + k];
537 tmp[j] = (*emissions)[j] * x;
538 dTmp[j] = (*dEmissions)[j] * x + (*emissions)[j] *
VectorTools::sum(trans.getCol(j) * dLikelihood_[i-1]);
539 d2Tmp[j] = (*d2Emissions)[j] * x + 2 * (*dEmissions)[j] *
VectorTools::sum(trans.getCol(j) * dLikelihood_[i-1])
542 d2Scales_[i] += d2Tmp[j];
547 for (
size_t j = 0; j < nbStates_; j++)
549 tmp[j] = (*emissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
550 dTmp[j] = (*dEmissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
551 d2Tmp[j] = (*d2Emissions)[j] * transitionMatrix_->getEquilibriumFrequencies()[j];
553 d2Scales_[i] += d2Tmp[j];
557 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
558 else nextBrkPt = nbSites_;
561 d2LScales[i]=d2Scales_[i]/scales_[i]-pow(dScales_[i]/scales_[i],2);
563 for (
size_t j = 0; j < nbStates_; j++)
564 d2Likelihood_[i][j] = d2Tmp[j] / scales_[i] - (d2Scales_[i] * tmp[j] + 2 * dScales_[i] * dTmp[j]) / pow(scales_[i],2)
565 + 2 * pow(dScales_[i],2) * tmp[j] / pow(scales_[i],3);
569 sort(d2LScales.begin(), d2LScales.end(), cmp);
571 for (
size_t i = 0; i < nbSites_; ++i)
573 d2LogLik_ += d2LScales[i];
void computeDForward_() const
This class allows to perform a correspondence analysis.
void fireParameterChanged(const ParameterList &pl)
Notify the class when one or several parameters have changed.
A partial implementation of the Parametrizable interface.
void computeD2Forward_() const
Vdouble getHiddenStatesPosteriorProbabilitiesForASite(size_t site) const
Interface for computing emission probabilities in a Hidden Markov Model.
static std::string toString(T t)
General template method to convert to a string.
The parameter list object.
RescaledHmmLikelihood(HmmStateAlphabet *hiddenAlphabet, HmmTransitionMatrix *transitionMatrix, HmmEmissionProbabilities *emissionProbabilities, const std::string &prefix)
Build a new RescaledHmmLikelihood object.
void getHiddenStatesPosteriorProbabilities(std::vector< std::vector< double > > &probs, bool append=false) const
Matrix storage by column.
std::vector< double > Vdouble
Vdouble getLikelihoodForEachSite() const
Get the likelihood for each site.
void computeBackward_() const
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
Describe the transition probabilities between hidden states of a Hidden Markov Model.