44 #include <Bpp/Text/TextTools.h> 45 #include <Bpp/Text/StringTokenizer.h> 46 #include <Bpp/App/ApplicationTools.h> 47 #include <Bpp/Numeric/VectorTools.h> 48 #include <Bpp/Numeric/Matrix/MatrixTools.h> 49 #include <Bpp/Numeric/Stat/Mva/CorrespondenceAnalysis.h> 51 #include <Bpp/Seq/SequenceTools.h> 82 vector<string> names = data.getSequencesNames();
83 vector< map<int, double> > freqs(names.size());
85 for (
size_t i = 0; i < names.size(); ++i)
87 Sequence* seq =
new BasicSequence(data.getSequence(names[i]));
88 SymbolListTools::changeGapsToUnknownCharacters(*seq);
89 SequenceTools::getFrequencies(*seq, freqs.at(i));
92 for (
int k = 0; k < 20; ++k)
96 for (
int k = 0; k < 20; k++)
104 RowMatrix<double> freqMatrix(names.size(), 20);
105 for (
size_t i = 0; i < freqs.size(); i++)
107 bool normalize =
false;
108 for (
size_t j = 0; j < 20; j++)
110 map<int, double>::iterator it = freqs[i].find(static_cast<int>(j));
111 if (it != freqs[i].end())
113 freqMatrix(i, j) = (*it).second;
117 freqMatrix(i, j) = 0.000001;
124 for (
size_t k = 0; k < 20; k++)
126 sum += freqMatrix(i, k);
128 for (
size_t l = 0; l < 20; l++)
130 freqMatrix(i, l) = freqMatrix(i, l) / sum;
136 CorrespondenceAnalysis* coa =
new CorrespondenceAnalysis(freqMatrix, 19);
138 RowMatrix<double> ppalAxes = coa->getPrincipalAxes();
140 MatrixTools::transpose(ppalAxes,
P_);
142 R_ = coa->getRowCoordinates();
149 size_t nbAxesConserved = coa->getNbOfKeptAxes();
152 ApplicationTools::displayWarning(
"The specified number of parameters per branch (" + TextTools::toString(
nbrOfAxes_) +
153 ") is higher than the number of axes (" + TextTools::toString(nbAxesConserved) +
154 ")... The number of parameters per branch is now equal to the number of axes kept by the COA analysis (" + TextTools::toString(nbAxesConserved) +
")");
159 const vector<double> rCoords =
R_.col(i);
160 double maxCoord = VectorTools::max(rCoords);
161 double minCoord = VectorTools::min(rCoords);
162 double sd = VectorTools::sd<double, double>(rCoords);
163 IntervalConstraint* constraint =
new IntervalConstraint(minCoord - sd, maxCoord + sd,
true,
true);
165 pList.addParameter(Parameter(
"Coala.AxPos" + TextTools::toString(i), TextTools::toDouble(
paramValues_[
"AxPos" + TextTools::toString(i)].substr(0, 8)), constraint));
167 pList.addParameter(Parameter(
"Coala.AxPos" + TextTools::toString(i), 0., constraint));
178 vector<double> E(20, 0.0);
180 for (
unsigned int i = 0; i < 20; i++)
182 for (
unsigned int j = 0; j < V.size(); j++)
184 E[i] = E[i] + P(j, i) * V[j];
std::vector< double > colWeights_
std::vector< double > prodMatrixVector(RowMatrix< double > &P, std::vector< double > &V)
CoalaCore(size_t nbAxes=0, const std::string &exch="LG08")
ParameterList computeCOA(const SequenceContainer &data, bool param=true)
std::map< std::string, std::string > paramValues_