bpp-core  2.2.0
LowMemoryRescaledHmmLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: LowMemoryRescaledHmmLikelihood.h
3 // Created by: Julien Dutheil
4 // Created on: Wed Dec 16 10:47 2009
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 
42 // from the STL:
43 #include <iostream>
44 #include <algorithm>
45 using namespace bpp;
46 using namespace std;
47 
49  HmmStateAlphabet* hiddenAlphabet,
50  HmmTransitionMatrix* transitionMatrix,
51  HmmEmissionProbabilities* emissionProbabilities,
52  const std::string& prefix,
53  size_t maxSize) throw (Exception) :
55  AbstractParametrizable(prefix),
56  hiddenAlphabet_(hiddenAlphabet),
57  transitionMatrix_(transitionMatrix),
58  emissionProbabilities_(emissionProbabilities),
59  likelihood1_(),
60  likelihood2_(),
61  logLik_(),
62  maxSize_(maxSize),
63  breakPoints_(),
64  nbStates_(),
65  nbSites_()
66 {
67  if (!hiddenAlphabet) throw Exception("LowMemoryRescaledHmmLikelihood: null pointer passed for HmmStateAlphabet.");
68  if (!transitionMatrix) throw Exception("LowMemoryRescaledHmmLikelihood: null pointer passed for HmmTransitionMatrix.");
69  if (!emissionProbabilities) throw Exception("LowMemoryRescaledHmmLikelihood: null pointer passed for HmmEmissionProbabilities.");
70  if (!hiddenAlphabet_->worksWith(transitionMatrix->getHmmStateAlphabet()))
71  throw Exception("LowMemoryRescaledHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
72  if (!hiddenAlphabet_->worksWith(emissionProbabilities->getHmmStateAlphabet()))
73  throw Exception("LowMemoryRescaledHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
74  nbStates_ = hiddenAlphabet_->getNumberOfStates();
75  nbSites_ = emissionProbabilities_->getNumberOfPositions();
76 
77  // Manage parameters:
78  addParameters_(hiddenAlphabet_->getParameters());
79  addParameters_(transitionMatrix_->getParameters());
80  addParameters_(emissionProbabilities_->getParameters());
81 
82  // Init arrays:
83  likelihood1_.resize(nbStates_);
84  likelihood2_.resize(nbStates_);
85 
86  // Compute:
87  computeForward_();
88 }
89 
91 {
92  bool alphabetChanged = hiddenAlphabet_->matchParametersValues(pl);
93  bool transitionsChanged = transitionMatrix_->matchParametersValues(pl);
94  bool emissionChanged = emissionProbabilities_->matchParametersValues(pl);
95  // these lines are necessary because the transitions and emissions can depend on the alphabet.
96  // we could use a StateChangeEvent, but this would result in computing some calculations twice in some cases
97  // (when both the alphabet and other parameter changed).
98  if (alphabetChanged && !transitionsChanged) transitionMatrix_->setParametersValues(transitionMatrix_->getParameters());
99  if (alphabetChanged && !emissionChanged) emissionProbabilities_->setParametersValues(emissionProbabilities_->getParameters());
100 
101  computeForward_();
102 }
103 
104 /***************************************************************************************************************************/
105 
107 {
108  double x;
109  vector<double> tmp(nbStates_);
110  vector<double> lScales(min(maxSize_, nbSites_));
111  vector<double> trans(nbStates_ * nbStates_);
112 
113  // Transition probabilities:
114  for (size_t i = 0; i < nbStates_; i++)
115  {
116  size_t ii = i * nbStates_;
117  for (size_t j = 0; j < nbStates_; j++)
118  {
119  trans[ii + j] = transitionMatrix_->Pij(j, i);
120  }
121  }
122 
123  // Initialisation:
124  double scale = 0;
125  const vector<double>* emissions = &(*emissionProbabilities_)(0);
126  for (size_t j = 0; j < nbStates_; j++)
127  {
128  size_t jj = j * nbStates_;
129  x = 0;
130  for (size_t k = 0; k < nbStates_; k++)
131  {
132  x += trans[k + jj] * transitionMatrix_->getEquilibriumFrequencies()[k];
133  }
134  tmp[j] = (*emissions)[j] * x;
135  scale += tmp[j];
136  }
137  for (size_t j = 0; j < nbStates_; j++)
138  {
139  likelihood1_[j] = tmp[j] / scale;
140  }
141  lScales[0] = log(scale);
142 
143  vector<double>* previousLikelihood = &likelihood2_, * currentLikelihood = &likelihood1_, * tmpLikelihood;
144 
145  // Recursion:
146  size_t nextBrkPt = nbSites_; // next break point
147  vector<size_t>::const_iterator bpIt = breakPoints_.begin();
148  if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
149 
150  double a;
151  logLik_ = 0;
152  size_t offset = 0;
153  greater<double> cmp;
154  for (size_t i = 1; i < nbSites_; i++)
155  {
156  //Swap pointers:
157  tmpLikelihood = previousLikelihood;
158  previousLikelihood = currentLikelihood;
159  currentLikelihood = tmpLikelihood;
160 
161  scale = 0;
162  emissions = &(*emissionProbabilities_)(i);
163  if (i < nextBrkPt)
164  {
165  for (size_t j = 0; j < nbStates_; j++)
166  {
167  size_t jj = j * nbStates_;
168  x = 0;
169  for (size_t k = 0; k < nbStates_; k++)
170  {
171  a = trans[jj + k] * (*previousLikelihood)[k];
172  if (a < 0)
173  {
174  // *ApplicationTools::warning << "Negative value for likelihood at " << i << ", state " << j << ": " << _likelihood[i-1][k] << ", Pij = " << _hiddenModel->Pij(k, j) << endl;
175  a = 0;
176  }
177  x += a;
178  }
179  tmp[j] = (*emissions)[j] * x;
180  if (tmp[j] < 0)
181  {
182  // *ApplicationTools::warning << "Negative emission probability at " << i << ", state " << j << ": " << _emissions[i][j] << endl;
183  tmp[j] = 0;
184  }
185  scale += tmp[j];
186  }
187  }
188  else // Reset markov chain:
189  {
190  for (size_t j = 0; j < nbStates_; j++)
191  {
192  size_t jj = j * nbStates_;
193  x = 0;
194  for (size_t k = 0; k < nbStates_; k++)
195  {
196  a = trans[jj + k] * transitionMatrix_->getEquilibriumFrequencies()[k];
197  if (a < 0)
198  {
199  // *ApplicationTools::warning << "Negative value for likelihood at " << i << ", state " << j << ": " << _likelihood[i-1][k] << ", Pij = " << _hiddenModel->Pij(k, j) << endl;
200  a = 0;
201  }
202  x += a;
203  }
204  tmp[j] = (*emissions)[j] * x;
205  if (tmp[j] < 0)
206  {
207  // *ApplicationTools::warning << "Negative emission probability at " << i << ", state " << j << ": " << _emissions[i][j] << endl;
208  tmp[j] = 0;
209  }
210  scale += tmp[j];
211  }
212  bpIt++;
213  if (bpIt != breakPoints_.end()) nextBrkPt = *bpIt;
214  else nextBrkPt = nbSites_;
215  }
216 
217  for (size_t j = 0; j < nbStates_; j++)
218  {
219  if (scale > 0) (*currentLikelihood)[j] = tmp[j] / scale;
220  else (*currentLikelihood)[j] = 0;
221  }
222  lScales[i - offset] = log(scale);
223 
224  if (i - offset == maxSize_ - 1)
225  {
226  //We make partial calculations and reset the arrays:
227  double partialLogLik = 0;
228  sort(lScales.begin(), lScales.end(), cmp);
229  for (size_t j = 0; j < maxSize_; ++j)
230  {
231  partialLogLik += lScales[j];
232  }
233  logLik_ += partialLogLik;
234  offset += maxSize_;
235  }
236  }
237  sort(lScales.begin(), lScales.begin() + static_cast<ptrdiff_t>(nbSites_ - offset), cmp);
238  double partialLogLik = 0;
239  for (size_t i = 0; i < nbSites_ - offset; ++i)
240  {
241  partialLogLik += lScales[i];
242  }
243  logLik_ += partialLogLik;
244 }
245 
246 /***************************************************************************************************************************/
247 
Hidden states alphabet.
This class allows to perform a correspondence analysis.
A partial implementation of the Parametrizable interface.
Interface for computing emission probabilities in a Hidden Markov Model.
STL namespace.
The parameter list object.
Definition: ParameterList.h:61
Exception base class.
Definition: Exceptions.h:57
LowMemoryRescaledHmmLikelihood(HmmStateAlphabet *hiddenAlphabet, HmmTransitionMatrix *transitionMatrix, HmmEmissionProbabilities *emissionProbabilities, const std::string &prefix, size_t maxSize=1000000)
Build a new LowMemoryRescaledHmmLikelihood object.
void fireParameterChanged(const ParameterList &pl)
Notify the class when one or several parameters have changed.
Describe the transition probabilities between hidden states of a Hidden Markov Model.