52 const std::string& prefix)
throw (
Exception):
55 hiddenAlphabet_(hiddenAlphabet),
56 transitionMatrix_(transitionMatrix),
57 emissionProbabilities_(emissionProbabilities),
59 partialLogLikelihoods_(),
62 partialDLogLikelihoods_(),
64 partialD2LogLikelihoods_(),
66 backLogLikelihoodUpToDate_(
false),
71 if (!hiddenAlphabet)
throw Exception(
"LogsumHmmLikelihood: null pointer passed for HmmStateAlphabet.");
72 if (!transitionMatrix)
throw Exception(
"LogsumHmmLikelihood: null pointer passed for HmmTransitionMatrix.");
73 if (!emissionProbabilities)
throw Exception(
"LogsumHmmLikelihood: null pointer passed for HmmEmissionProbabilities.");
74 if (!hiddenAlphabet_->worksWith(transitionMatrix->getHmmStateAlphabet()))
75 throw Exception(
"LogsumHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
76 if (!hiddenAlphabet_->worksWith(emissionProbabilities->getHmmStateAlphabet()))
77 throw Exception(
"LogsumHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
78 nbStates_ = hiddenAlphabet_->getNumberOfStates();
79 nbSites_ = emissionProbabilities_->getNumberOfPositions();
82 addParameters_(hiddenAlphabet_->getParameters());
83 addParameters_(transitionMatrix_->getParameters());
84 addParameters_(emissionProbabilities_->getParameters());
87 logLikelihood_.resize(nbSites_ * nbStates_);
98 bool alphabetChanged = hiddenAlphabet_->matchParametersValues(pl);
99 bool transitionsChanged = transitionMatrix_->matchParametersValues(pl);
100 bool emissionChanged = emissionProbabilities_->matchParametersValues(pl);
104 if (alphabetChanged && !transitionsChanged) transitionMatrix_->setParametersValues(transitionMatrix_->getParameters());
105 if (alphabetChanged && !emissionChanged) emissionProbabilities_->setParametersValues(emissionProbabilities_->getParameters());
108 backLogLikelihoodUpToDate_=
false;
116 vector<double> logTrans(nbStates_ * nbStates_);
119 for (
size_t i = 0; i < nbStates_; i++)
121 size_t ii = i * nbStates_;
122 for (
size_t j = 0; j < nbStates_; j++)
123 logTrans[ii + j] = log(transitionMatrix_->Pij(j, i));
127 const vector<double>* emissions = &(*emissionProbabilities_)(0);
129 for (
size_t j = 0; j < nbStates_; j++)
131 size_t jj = j * nbStates_;
132 x = logTrans[jj] + log(transitionMatrix_->getEquilibriumFrequencies()[0]);
134 for (
size_t k = 1; k < nbStates_; k++)
136 a = logTrans[k + jj] + log(transitionMatrix_->getEquilibriumFrequencies()[k]);
139 logLikelihood_[j] = log((*emissions)[j]) + x;
143 size_t nextBrkPt = nbSites_;
144 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
145 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
146 partialLogLikelihoods_.clear();
148 for (
size_t i = 1; i < nbSites_; i++)
150 size_t ii = i * nbStates_;
151 size_t iip = (i - 1) * nbStates_;
152 emissions = &(*emissionProbabilities_)(i);
155 for (
size_t j = 0; j < nbStates_; j++)
157 size_t jj = j * nbStates_;
158 x = logTrans[jj] + logLikelihood_[iip];
159 for (
size_t k = 1; k < nbStates_; k++)
161 a = logTrans[jj + k] + logLikelihood_[iip + k];
164 logLikelihood_[ii + j] = log((*emissions)[j]) + x;
170 double tmpLog = logLikelihood_[(i - 1) * nbStates_];
171 for (
size_t k = 1; k < nbStates_; k++)
173 partialLogLikelihoods_.push_back(tmpLog);
175 for (
size_t j = 0; j < nbStates_; j++)
177 size_t jj = j * nbStates_;
178 x = logTrans[jj] + log(transitionMatrix_->getEquilibriumFrequencies()[0]);
179 for (
size_t k = 1; k < nbStates_; k++)
181 a = logTrans[jj + k] + log(transitionMatrix_->getEquilibriumFrequencies()[k]);
184 logLikelihood_[ii + j] = log((*emissions)[j]) + x;
187 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
188 else nextBrkPt = nbSites_;
193 double tmpLog = logLikelihood_[(nbSites_ - 1) * nbStates_];
194 for (
size_t k = 1; k < nbStates_; k++)
195 tmpLog =
NumTools::logsum(tmpLog, logLikelihood_[(nbSites_ - 1) * nbStates_ + k]);
196 partialLogLikelihoods_.push_back(tmpLog);
200 vector<double> copy = partialLogLikelihoods_;
201 sort(copy.begin(), copy.end());
202 for (
size_t i = copy.size(); i > 0; --i)
203 logLik_ += copy[i - 1];
210 if (backLogLikelihood_.size()==0)
212 backLogLikelihood_.resize(nbSites_);
213 for (
size_t i=0;i<nbSites_;i++)
214 backLogLikelihood_[i].resize(nbStates_);
220 vector<double> logTrans(nbStates_ * nbStates_);
221 for (
size_t i = 0; i < nbStates_; i++)
223 size_t ii = i * nbStates_;
224 for (
size_t j = 0; j < nbStates_; j++)
225 logTrans[ii + j] = log(transitionMatrix_->Pij(i, j));
230 const vector<double>* emissions = 0;
231 size_t nextBrkPt = 0;
232 vector<size_t>::const_reverse_iterator bpIt = breakPoints_.rbegin();
233 if (bpIt != breakPoints_.rend()) nextBrkPt = *bpIt;
235 for (
size_t k = 0; k < nbStates_; k++)
237 backLogLikelihood_[nbSites_ - 1][k] = 0.;
241 for (
size_t i = nbSites_ - 1; i > 0; i--)
243 emissions = &(*emissionProbabilities_)(i);
246 for (
size_t j = 0; j < nbStates_; j++)
248 size_t jj = j * nbStates_;
249 x = log((*emissions)[0]) + logTrans[jj] + backLogLikelihood_[i][0];
250 for (
size_t k = 1; k < nbStates_; k++)
252 x =
NumTools::logsum(x, log((*emissions)[k]) + logTrans[jj + k] + backLogLikelihood_[i][k]);
254 backLogLikelihood_[i - 1][j] = x;
259 for (
unsigned int j = 0; j < nbStates_; j++)
261 backLogLikelihood_[i - 1][j] = 0.;
264 if (bpIt != breakPoints_.rend()) nextBrkPt = *bpIt;
269 backLogLikelihoodUpToDate_=
true;
277 Vdouble probs=getHiddenStatesPosteriorProbabilitiesForASite(site);
279 for (
size_t i=0;i<nbStates_;i++)
280 x+=probs[i]*(*emissionProbabilities_)(site,i);
287 std::vector< std::vector<double> > vv;
288 getHiddenStatesPosteriorProbabilities(vv);
291 for (
size_t i=0;i<nbSites_;i++)
294 for (
size_t j=0;j<nbStates_;j++)
295 ret[i]+=vv[i][j]*(*emissionProbabilities_)(i,j);
306 if (!backLogLikelihoodUpToDate_)
311 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
312 vector<double>::const_iterator logLikIt = partialLogLikelihoods_.begin();
313 while (bpIt != breakPoints_.end())
322 for (
size_t j = 0; j < nbStates_; j++)
324 probs[j] = exp(logLikelihood_[site * nbStates_ + j] + backLogLikelihood_[site][j] - *logLikIt);
332 size_t offset = append ? probs.size() : 0;
333 probs.resize(offset + nbSites_);
334 for (
size_t i = 0; i < nbSites_; i++)
336 probs[offset + i].resize(nbStates_);
339 if (!backLogLikelihoodUpToDate_)
342 size_t nextBrkPt = nbSites_;
343 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
344 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
346 vector<double>::const_iterator logLikIt = partialLogLikelihoods_.begin();
347 for (
size_t i = 0; i < nbSites_; i++)
349 if (i == nextBrkPt) {
352 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
353 else nextBrkPt = nbSites_;
356 size_t ii = i * nbStates_;
357 for (
size_t j = 0; j < nbStates_; j++)
359 probs[offset + i][j] = exp(logLikelihood_[ii + j] + backLogLikelihood_[i][j] - *logLikIt);
369 if (dLogLikelihood_.size()==0){
370 dLogLikelihood_.resize(nbSites_);
371 for (
size_t i=0;i<nbSites_;i++)
372 dLogLikelihood_[i].resize(nbStates_);
375 partialDLogLikelihoods_.clear();
379 vector<double> num(nbStates_);
385 const vector<double>* emissions = &(*emissionProbabilities_)(0);
386 const vector<double>* dEmissions = &emissionProbabilities_->getDEmissionProbabilities(0);
388 for (
size_t j = 0; j < nbStates_; j++)
389 dLogLikelihood_[0][j] = (*dEmissions)[j] / (*emissions)[j];
392 size_t nextBrkPt = nbSites_;
393 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
394 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
395 partialDLogLikelihoods_.clear();
397 for (
size_t i = 1; i < nbSites_; i++)
399 size_t iip = (i - 1) * nbStates_;
401 emissions = &(*emissionProbabilities_)(i);
402 dEmissions = &emissionProbabilities_->getDEmissionProbabilities(i);
406 for (
size_t j = 0; j < nbStates_; j++)
408 x=(*dEmissions)[j]/(*emissions)[j];
410 for (
size_t k = 0; k < nbStates_; k++)
412 for (
size_t kp = 0; kp < nbStates_; kp++)
413 num[kp]=logLikelihood_[iip+kp]-logLikelihood_[iip+k];
418 dLogLikelihood_[i][j] = x;
425 for (
size_t k = 0; k < nbStates_; k++)
427 for (
size_t kp = 0; kp < nbStates_; kp++)
428 num[kp]=logLikelihood_[iip+kp]-logLikelihood_[iip+k];
433 partialDLogLikelihoods_.push_back(x);
435 for (
size_t j = 0; j < nbStates_; j++)
436 dLogLikelihood_[i][j] = (*dEmissions)[j] / (*emissions)[j];
439 if (bpIt != breakPoints_.end())
442 nextBrkPt = nbSites_;
448 for (
size_t k = 0; k < nbStates_; k++)
450 for (
size_t kp = 0; kp < nbStates_; kp++)
451 num[kp]=logLikelihood_[nbStates_*(nbSites_-1)+kp]-logLikelihood_[nbStates_*(nbSites_-1)+k];
456 partialDLogLikelihoods_.push_back(x);
461 vector<double> copy = partialDLogLikelihoods_;
462 sort(copy.begin(), copy.end());
463 for (
size_t i = copy.size(); i > 0; --i)
464 dLogLik_ += copy[i - 1];
472 getFirstOrderDerivative(d2Variable_);
475 if (d2LogLikelihood_.size()==0){
476 d2LogLikelihood_.resize(nbSites_);
477 for (
size_t i=0;i<nbSites_;i++)
478 d2LogLikelihood_[i].resize(nbStates_);
481 partialD2LogLikelihoods_.clear();
485 vector<double> num(nbStates_);
491 const vector<double>* emissions = &(*emissionProbabilities_)(0);
492 const vector<double>* dEmissions = &emissionProbabilities_->getDEmissionProbabilities(0);
493 const vector<double>* d2Emissions = &emissionProbabilities_->getD2EmissionProbabilities(0);
495 for (
size_t j = 0; j < nbStates_; j++)
496 d2LogLikelihood_[0][j] = (*d2Emissions)[j] / (*emissions)[j] - pow((*dEmissions)[j] / (*emissions)[j],2);
499 size_t nextBrkPt = nbSites_;
500 vector<size_t>::const_iterator bpIt = breakPoints_.begin();
501 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
502 partialDLogLikelihoods_.clear();
504 for (
size_t i = 1; i < nbSites_; i++)
506 size_t iip = (i - 1) * nbStates_;
508 emissions = &(*emissionProbabilities_)(i);
509 dEmissions = &emissionProbabilities_->getDEmissionProbabilities(i);
510 d2Emissions = &emissionProbabilities_->getD2EmissionProbabilities(i);
514 for (
size_t j = 0; j < nbStates_; j++)
516 x=(*d2Emissions)[j] / (*emissions)[j] - pow((*dEmissions)[j] / (*emissions)[j],2);
518 for (
size_t k = 0; k < nbStates_; k++)
520 for (
size_t kp = 0; kp < nbStates_; kp++)
521 num[kp]=logLikelihood_[iip+kp]-logLikelihood_[iip+k];
525 z=d2LogLikelihood_[i-1][k]+pow(dLogLikelihood_[i-1][k],2)
526 - dLogLikelihood_[i-1][k] *
VectorTools::sumExp(num, trans.getCol(j) * dLogLikelihood_[i-1])/snum;
528 x += z * trans(k,j) / snum;
531 d2LogLikelihood_[i][j] = x;
539 for (
size_t k = 1; k < nbStates_; k++)
541 for (
size_t kp = 0; kp < nbStates_; kp++)
542 num[kp]=logLikelihood_[iip+kp]-logLikelihood_[iip+k];
546 x += (d2LogLikelihood_[i-1][k]+pow(dLogLikelihood_[i-1][k],2)
550 partialD2LogLikelihoods_.push_back(x);
552 for (
size_t j = 0; j < nbStates_; j++)
553 d2LogLikelihood_[i][j] = (*d2Emissions)[j] / (*emissions)[j] - pow((*dEmissions)[j] / (*emissions)[j],2);
557 if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
558 else nextBrkPt = nbSites_;
564 for (
size_t k = 0; k < nbStates_; k++)
566 for (
size_t kp = 0; kp < nbStates_; kp++)
567 num[kp]=logLikelihood_[nbStates_*(nbSites_-1)+kp]-logLikelihood_[nbStates_*(nbSites_-1)+k];
571 x += (d2LogLikelihood_[nbSites_-1][k]+pow(dLogLikelihood_[nbSites_-1][k],2)
572 - dLogLikelihood_[nbSites_-1][k] *
VectorTools::sumExp(num, dLogLikelihood_[nbSites_-1])/snum)/snum;
575 partialD2LogLikelihoods_.push_back(x);
580 vector<double> copy = partialD2LogLikelihoods_;
581 sort(copy.begin(), copy.end());
582 for (
size_t i = copy.size(); i > 0; --i)
583 d2LogLik_ += copy[i - 1];
void computeBackward_() const
This class allows to perform a correspondence analysis.
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
A partial implementation of the Parametrizable interface.
Interface for computing emission probabilities in a Hidden Markov Model.
Vdouble getLikelihoodForEachSite() const
Get the likelihood for each site.
virtual void fireParameterChanged(const ParameterList &pl)
Notify the class when one or several parameters have changed.
The parameter list object.
LogsumHmmLikelihood(HmmStateAlphabet *hiddenAlphabet, HmmTransitionMatrix *transitionMatrix, HmmEmissionProbabilities *emissionProbabilities, const std::string &prefix)
Build a new LogsumHmmLikelihood object.
Matrix storage by column.
void computeD2Forward_() const
std::vector< double > Vdouble
void getHiddenStatesPosteriorProbabilities(std::vector< std::vector< double > > &probs, bool append=false) const
Vdouble getHiddenStatesPosteriorProbabilitiesForASite(size_t site) const
void computeDForward_() const
Describe the transition probabilities between hidden states of a Hidden Markov Model.