bpp-phyl  2.2.0
WordFrequenciesSet.cpp
Go to the documentation of this file.
1 //
2 // File: WordFrequenciesSet.cpp
3 // Created by: Laurent Gueguen
4 // Created on: lundi 2 avril 2012, à 14h 02
5 //
6 
7 /*
8  Copyright or (c) or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "WordFrequenciesSet.h"
41 
42 
43 using namespace bpp;
44 
45 #include <cmath>
46 using namespace std;
47 
48 size_t AbstractWordFrequenciesSet::getSizeFromVector(const std::vector<FrequenciesSet*>& freqVector)
49 {
50  size_t s = 1;
51  size_t l = freqVector.size();
52 
53  for (size_t i = 0; i < l; i++)
54  {
55  s *= freqVector[i]->getAlphabet()->getSize();
56  }
57 
58  return s;
59 }
60 
61 AbstractWordFrequenciesSet::AbstractWordFrequenciesSet(StateMap* stateMap, const string& prefix, const string& name) :
62  AbstractFrequenciesSet(stateMap, prefix, name)
63 {}
64 
66 {
67  return dynamic_cast<const WordAlphabet*>(getAlphabet())->getLength();
68 }
69 
71 {}
72 
73 // ///////////////////////////////////////////////////////////////////
74 // // WordFromIndependentFrequenciesSet
75 
76 
78  const WordAlphabet* pWA,
79  const std::vector<FrequenciesSet*>& freqVector,
80  const string& prefix, const string& name) :
81  AbstractWordFrequenciesSet(new CanonicalStateMap(pWA, false), prefix, name),
82  vFreq_(),
83  vNestedPrefix_()
84 {
85  size_t sf = getSizeFromVector(freqVector);
86  if (pWA->getSize() != sf)
87  throw Exception("WordFromIndependentFrequenciesSet: Size of the frequencies does not match size of the alphabet : " + TextTools::toString(sf) + " vs " + TextTools::toString(pWA->getSize()));
88 
89  size_t l = freqVector.size();
90 
91  for (size_t i = 0; i < l; i++)
92  {
93  vFreq_.push_back(freqVector[i]);
94  vNestedPrefix_.push_back(freqVector[i]->getNamespace());
95  vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]);
96  addParameters_(vFreq_[i]->getParameters());
97  }
98 
100 }
101 
104  vFreq_(iwfs.vFreq_.size()),
105  vNestedPrefix_(iwfs.vNestedPrefix_)
106 {
107  for (unsigned i = 0; i < iwfs.vFreq_.size(); i++)
108  {
109  vFreq_[i] = iwfs.vFreq_[i]->clone();
110  }
112 }
113 
115 {
116  for (unsigned i = 0; i < vFreq_.size(); i++)
117  {
118  delete vFreq_[i];
119  }
120 }
121 
123 {
126 
127  //Clean current frequencies first:
128  for (unsigned i = 0; i < vFreq_.size(); i++)
129  {
130  delete vFreq_[i];
131  }
132 
133  vFreq_.resize(iwfs.vFreq_.size());
134  for (unsigned i = 0; i < vFreq_.size(); i++)
135  {
136  vFreq_[i] = iwfs.vFreq_[i]->clone();
137  }
139 
140  return *this;
141 }
142 
144 {
145  size_t l = vFreq_.size();
146 
147  bool f = 0;
148  for (size_t i = 0; i < l; i++)
149  {
150  f |= vFreq_[i]->matchParametersValues(pl);
151  }
152 
153  if (f)
155 }
156 
158 {
159  size_t l = vFreq_.size();
160  size_t s = getAlphabet()->getSize();
161  vector< vector<double> >f(l);
162 
163  size_t i, p, t, i2;
164 
165  for (i = 0; i < l; i++)
166  {
167  f[i] = vFreq_[i]->getFrequencies();
168  }
169 
170  for (i = 0; i < s; i++)
171  {
172  i2 = i;
173  getFreq_(i) = 1;
174  for (p = l; p > 0; p--)
175  {
176  t = vFreq_[p - 1]->getAlphabet()->getSize();
177  getFreq_(i) *= f[p - 1][i2 % t];
178  i2 /= t;
179  }
180  }
181 }
182 
183 void WordFromIndependentFrequenciesSet::setFrequencies(const vector<double>& frequencies)
184 {
185  if (frequencies.size() != getAlphabet()->getSize())
186  throw DimensionException("WordFromIndependentFrequenciesSet::setFrequencies", frequencies.size(), getAlphabet()->getSize());
187  double sum = 0.0;
188  size_t size = frequencies.size();
189  for (size_t i = 0; i < size; i++)
190  {
191  sum += frequencies[i];
192  }
193  if (fabs(1. - sum) > 0.000001)
194  throw Exception("WordFromIndependentFrequenciesSet::setFrequencies. Frequencies must equal 1 (sum = " + TextTools::toString(sum) + ").");
195 
196  size_t d, i, j, k, s, l = vFreq_.size();
197  vector<double> freq;
198 
199  d = size;
200  for (i = 0; i < l; i++)
201  {
202  s = vFreq_[i]->getAlphabet()->getSize();
203  freq.resize(s);
204  d /= s;
205  for (j = 0; j < s; j++)
206  {
207  freq[j] = 0;
208  }
209  for (k = 0; k < size; k++)
210  {
211  freq[(k / d) % s] += frequencies[k];
212  }
213  vFreq_[i]->setFrequencies(freq);
214  }
215 
216  for (i = 0; i < l; i++)
217  {
218  matchParametersValues(vFreq_[i]->getParameters());
219  }
220 
222 }
223 
224 
226 {
227  return vFreq_.size();
228 }
229 
230 void WordFromIndependentFrequenciesSet::setNamespace(const std::string& prefix)
231 {
232  AbstractFrequenciesSet::setNamespace(prefix);
233  for (size_t i = 0; i < vFreq_.size(); i++)
234  {
235  vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]);
236  }
237 }
238 
240 {
241  string s = getName() +" : " + vFreq_[0]->getName();
242  for (size_t i = 1; i < vFreq_.size(); i++)
243  {
244  s += " * " + vFreq_[i]->getName();
245  }
246  return s;
247 }
248 
249 // ///////////////////////////////////////////////////////////////////
250 // // WordFromUniqueFrequenciesSet
251 
252 
254  const WordAlphabet* pWA,
255  FrequenciesSet* pabsfreq,
256  const string& prefix,
257  const string& name) :
258  AbstractWordFrequenciesSet(new CanonicalStateMap(pWA, false), prefix, name),
259  pFreq_(pabsfreq),
260  NestedPrefix_(pabsfreq->getNamespace()),
261  length_(pWA->getLength())
262 {
263  size_t i;
264 
265  string st = "";
266  for (i = 0; i < length_; i++)
267  {
268  st += TextTools::toString(i + 1);
269  }
270 
271  pFreq_->setNamespace(prefix+ st + "_" + NestedPrefix_);
272  addParameters_(pFreq_->getParameters());
273 
275 }
276 
279  pFreq_(iwfs.pFreq_->clone()),
280  NestedPrefix_(iwfs.NestedPrefix_),
281  length_(iwfs.length_)
282 {
284 }
285 
286 
288 {
290  delete pFreq_;
291  pFreq_ = iwfs.pFreq_->clone();
293  length_ = iwfs.length_;
294 
296  return *this;
297 }
298 
300 {
301  if (pFreq_)
302  delete pFreq_;
303  pFreq_ = 0;
304 }
305 
307 {
308  if (pFreq_->matchParametersValues(pl))
310 }
311 
313 {
314  size_t s = getAlphabet()->getSize();
315  vector<double> f;
316  size_t letsi = pFreq_->getAlphabet()->getSize();
317 
318  size_t i, p, i2;
319 
320  f = pFreq_->getFrequencies();
321 
322  for (i = 0; i < s; i++)
323  {
324  i2 = i;
325  getFreq_(i2) = 1;
326  for (p = length_; p > 0; p--)
327  {
328  getFreq_(i) *= f[i2 % letsi];
329  i2 /= letsi;
330  }
331  }
332 }
333 
334 void WordFromUniqueFrequenciesSet::setFrequencies(const vector<double>& frequencies)
335 {
336  if (frequencies.size() != getAlphabet()->getSize())
337  throw DimensionException("WordFromUniqueFrequenciesSet::setFrequencies", frequencies.size(), getAlphabet()->getSize());
338  double sum = 0.0;
339  size_t size = frequencies.size();
340  for (size_t i = 0; i < size; i++)
341  {
342  sum += frequencies[i];
343  }
344  if (fabs(1. - sum) > 0.000001)
345  throw Exception("WordFromUniqueFrequenciesSet::setFrequencies. Frequencies must equal 1 (sum = " + TextTools::toString(sum) + ").");
346 
347  size_t d, i, j, k;
348  vector<double> freq;
349 
350  size_t letsi = pFreq_->getAlphabet()->getSize();
351  freq.resize(letsi);
352 
353  for (j = 0; j < letsi; j++)
354  {
355  freq[j] = 0;
356  }
357 
358  d = size;
359  for (i = 0; i < length_; i++)
360  {
361  d /= letsi;
362  for (k = 0; k < size; k++)
363  {
364  freq[(k / d) % letsi] += frequencies[k];
365  }
366  }
367  for (j = 0; j < letsi; j++)
368  {
369  freq[j] /= static_cast<double>(length_);
370  }
371 
372  pFreq_->setFrequencies(freq);
373  matchParametersValues(pFreq_->getParameters());
375 }
376 
377 
379 {
380  AbstractFrequenciesSet::setNamespace(prefix);
381  string st = "";
382  for (unsigned i = 0; i < length_; i++)
383  {
384  st += TextTools::toString(i + 1);
385  }
386  pFreq_->setNamespace(prefix + st + "_" + NestedPrefix_);
387 }
388 
389 
391 {
392  return getName() + " : " + pFreq_->getName() + " * " + TextTools::toString(length_);
393 }
394 
395 
WordFromUniqueFrequenciesSet(const WordAlphabet *pWA, FrequenciesSet *pabsfreq, const std::string &prefix="", const std::string &name="WordFromUnique")
Constructor from a WordAlphabet* and a FrequenciesSet* repeated as many times as the length of the wo...
AbstractWordFrequenciesSet & operator=(const AbstractWordFrequenciesSet &af)
This class implements a state map where all resolved states are modeled.
Definition: StateMap.h:161
virtual const Alphabet * getAlphabet() const =0
size_t getSizeFromVector(const std::vector< FrequenciesSet *> &freqVector)
const WordAlphabet * getAlphabet() const
virtual void setFrequencies(const std::vector< double > &frequencies)=0
Set the parameters in order to match a given set of frequencies.
STL namespace.
virtual const std::vector< double > getFrequencies() const =0
FrequenciesSet * clone() const =0
Parametrize a set of state frequencies.
void setNamespace(const std::string &prefix)
WordFromIndependentFrequenciesSet & operator=(const WordFromIndependentFrequenciesSet &iwfs)
std::vector< FrequenciesSet * > vFreq_
virtual void setFrequencies(const std::vector< double > &frequencies)
virtual void fireParameterChanged(const ParameterList &pl)
WordFromIndependentFrequenciesSet(const WordAlphabet *pWA, const std::vector< FrequenciesSet *> &freqVector, const std::string &prefix="", const std::string &name="WordFromIndependent")
Constructor from a WordAlphabet* and a vector of different FrequenciesSet*. Throws an Exception if th...
virtual std::string getName() const =0
virtual void setFrequencies(const std::vector< double > &frequencies)
double & getFreq_(size_t i)
the Frequencies in words are the product of Independent Frequencies in letters
std::vector< std::string > vNestedPrefix_
AbstractWordFrequenciesSet(StateMap *stateMap, const std::string &prefix="", const std::string &name="")
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:58
void setNamespace(const std::string &prefix)
std::string getName() const
void fireParameterChanged(const ParameterList &pl)
WordFromUniqueFrequenciesSet & operator=(const WordFromUniqueFrequenciesSet &iwfs)
Basic implementation of the FrequenciesSet interface.