49 #include <Bpp/Text/TextTools.h> 50 #include <Bpp/Text/StringTokenizer.h> 51 #include <Bpp/Numeric/Number.h> 52 #include <Bpp/BppString.h> 53 #include <Bpp/App/ApplicationTools.h> 54 #include <Bpp/Numeric/VectorTools.h> 55 #include <Bpp/Numeric/Prob/ConstantDistribution.h> 56 #include <Bpp/Numeric/Matrix/MatrixTools.h> 59 #include <Bpp/Seq/Alphabet/DNA.h> 60 #include <Bpp/Seq/Container/VectorSiteContainer.h> 79 getLeavesId(tree, nodeId, leaves);
85 if (!tree.hasNode(nodeId))
87 if (tree.isLeaf(nodeId))
89 leaves.push_back(nodeId);
91 vector<int> sons = tree.getSonsId(nodeId);
92 for (
size_t i = 0; i < sons.size(); i++)
94 getLeavesId(tree, sons[i], leaves);
100 if (!tree.hasNode(nodeId))
104 if (tree.isLeaf(nodeId))
108 vector<int> sons = tree.getSonsId(nodeId);
109 for (
size_t i = 0; i < sons.size(); ++i)
111 n += getNumberOfLeaves(tree, sons[i]);
122 searchLeaf(tree, nodeId, name,
id);
136 if (tree.isLeaf(nodeId))
138 if (tree.getNodeName(nodeId) == name)
140 id =
new int(nodeId);
145 for (
size_t i = 0; i < sons.size(); i++)
147 searchLeaf(tree, nodeId, name,
id);
156 getNodesId(tree, nodeId, nodes);
162 if (!tree.hasNode(nodeId))
164 vector<int> sons = tree.getSonsId(nodeId);
165 for (
size_t i = 0; i < sons.size(); i++)
167 getNodesId(tree, sons[i], nodes);
169 nodes.push_back(nodeId);
176 if (!tree.hasNode(nodeId))
179 vector<int> sons = tree.getSonsId(nodeId);
180 for (
size_t i = 0; i < sons.size(); i++)
182 size_t c = getDepth(tree, sons[i]) + 1;
193 if (!tree.hasNode(nodeId))
196 vector<int> sons = tree.getSonsId(nodeId);
197 for (
size_t i = 0; i < sons.size(); i++)
199 size_t c = getDepths(tree, sons[i], depths) + 1;
211 if (!tree.hasNode(nodeId))
214 vector<int> sons = tree.getSonsId(nodeId);
215 for (
size_t i = 0; i < sons.size(); i++)
218 if (tree.hasDistanceToFather(sons[i]))
219 dist = tree.getDistanceToFather(sons[i]);
222 double c = getHeight(tree, sons[i]) + dist;
233 if (!tree.hasNode(nodeId))
236 vector<int> sons = tree.getSonsId(nodeId);
237 for (
size_t i = 0; i < sons.size(); i++)
240 if (tree.hasDistanceToFather(sons[i]))
241 dist = tree.getDistanceToFather(sons[i]);
244 double c = getHeights(tree, sons[i], heights) + dist;
256 if (!tree.hasNode(nodeId))
259 if (tree.isLeaf(nodeId))
261 s << tree.getNodeName(nodeId);
266 vector<int> sonsId = tree.getSonsId(nodeId);
267 s << nodeToParenthesis(tree, sonsId[0], writeId);
268 for (
size_t i = 1; i < sonsId.size(); i++)
270 s <<
"," << nodeToParenthesis(tree, sonsId[i], writeId);
276 if (tree.isLeaf(nodeId))
282 if (tree.hasBranchProperty(nodeId, BOOTSTRAP))
283 s << (
dynamic_cast<const Number<double>*
>(tree.getBranchProperty(nodeId, BOOTSTRAP))->getValue());
285 if (tree.hasDistanceToFather(nodeId))
286 s <<
":" << tree.getDistanceToFather(nodeId);
294 if (!tree.hasNode(nodeId))
297 if (tree.isLeaf(nodeId))
299 s << tree.getNodeName(nodeId);
304 vector<int> sonsId = tree.getSonsId(nodeId);
305 s << nodeToParenthesis(tree, sonsId[0], bootstrap, propertyName);
306 for (
size_t i = 1; i < sonsId.size(); i++)
308 s <<
"," << nodeToParenthesis(tree, sonsId[i], bootstrap, propertyName);
314 if (tree.hasBranchProperty(nodeId, BOOTSTRAP))
315 s << (
dynamic_cast<const Number<double>*
>(tree.getBranchProperty(nodeId, BOOTSTRAP))->getValue());
319 if (tree.hasBranchProperty(nodeId, propertyName))
320 s << *(dynamic_cast<const BppString*>(tree.getBranchProperty(nodeId, propertyName)));
323 if (tree.hasDistanceToFather(nodeId))
324 s <<
":" << tree.getDistanceToFather(nodeId);
335 vector<int> sonsId = tree.
getSonsId(rootId);
339 for (
size_t i = 0; i < sonsId.size(); i++)
341 s <<
"," << nodeToParenthesis(tree, sonsId[i], writeId);
346 if (sonsId.size() > 0)
348 s << nodeToParenthesis(tree, sonsId[0], writeId);
349 for (
size_t i = 1; i < sonsId.size(); i++)
351 s <<
"," << nodeToParenthesis(tree, sonsId[i], writeId);
367 vector<int> sonsId = tree.
getSonsId(rootId);
371 for (
size_t i = 0; i < sonsId.size(); i++)
373 s <<
"," << nodeToParenthesis(tree, sonsId[i], bootstrap, propertyName);
378 s << nodeToParenthesis(tree, sonsId[0], bootstrap, propertyName);
379 for (
size_t i = 1; i < sonsId.size(); i++)
381 s <<
"," << nodeToParenthesis(tree, sonsId[i], bootstrap, propertyName);
388 s << (
dynamic_cast<const Number<double>*
>(tree.
getBranchProperty(rootId, BOOTSTRAP))->getValue());
393 s << *(dynamic_cast<const BppString*>(tree.
getBranchProperty(rootId, propertyName)));
404 if (!tree.hasNode(nodeId1))
406 if (!tree.hasNode(nodeId2))
409 vector<int> pathMatrix1;
410 vector<int> pathMatrix2;
412 int nodeUp = nodeId1;
413 while (tree.hasFather(nodeUp))
415 pathMatrix1.push_back(nodeUp);
416 nodeUp = tree.getFatherId(nodeUp);
418 pathMatrix1.push_back(nodeUp);
421 while (tree.hasFather(nodeUp))
423 pathMatrix2.push_back(nodeUp);
424 nodeUp = tree.getFatherId(nodeUp);
426 pathMatrix2.push_back(nodeUp);
429 size_t tmp1 = pathMatrix1.size();
430 size_t tmp2 = pathMatrix2.size();
432 while ((tmp1 > 0) && (tmp2 > 0))
434 if (pathMatrix1[tmp1 - 1] != pathMatrix2[tmp2 - 1])
440 for (
size_t y = 0; y < tmp1; ++y)
442 path.push_back(pathMatrix1[y]);
445 path.push_back(pathMatrix1[tmp1]);
446 for (
size_t j = tmp2; j > 0; --j)
448 path.push_back(pathMatrix2[j - 1]);
461 vector<int> path = getPathBetweenAnyTwoNodes(tree, nodeId1, nodeId2,
false);
463 for (
size_t i = 0; i < path.size(); i++)
474 if (!tree.hasNode(nodeId))
477 if (tree.hasDistanceToFather(nodeId))
478 brLen[0] = tree.getDistanceToFather(nodeId);
480 throw NodeException(
"TreeTools::getbranchLengths(). No branch length.", nodeId);
481 vector<int> sons = tree.getSonsId(nodeId);
482 for (
size_t i = 0; i < sons.size(); i++)
484 Vdouble sonBrLen = getBranchLengths(tree, sons[i]);
485 for (
size_t j = 0; j < sonBrLen.size(); j++)
487 brLen.push_back(sonBrLen[j]);
497 if (!tree.hasNode(nodeId))
499 if (includeAncestor && !tree.hasDistanceToFather(nodeId))
500 throw NodeException(
"TreeTools::getTotalLength(). No branch length.", nodeId);
501 double length = includeAncestor ? tree.getDistanceToFather(nodeId) : 0;
502 vector<int> sons = tree.getSonsId(nodeId);
503 for (
size_t i = 0; i < sons.size(); i++)
505 length += getTotalLength(tree, sons[i],
true);
514 if (!tree.hasNode(nodeId))
516 vector<int> nodes = getNodesId(tree, nodeId);
517 for (
size_t i = 0; i < nodes.size(); i++)
519 tree.setDistanceToFather(nodes[i], brLen);
527 if (!tree.hasNode(nodeId))
529 vector<int> nodes = getNodesId(tree, nodeId);
530 for (
size_t i = 0; i < nodes.size(); i++)
532 if (!tree.hasDistanceToFather(nodes[i]))
533 tree.setDistanceToFather(nodes[i], brLen);
541 if (!tree.hasNode(nodeId))
543 vector<int> nodes = getNodesId(tree, nodeId);
544 for (
size_t i = 0; i < nodes.size(); i++)
546 if (tree.hasFather(nodes[i]))
548 if (!tree.hasDistanceToFather(nodes[i]))
549 throw NodeException(
"TreeTools::scaleTree(). Branch with no length", nodes[i]);
550 double brLen = tree.getDistanceToFather(nodes[i]) * factor;
551 tree.setDistanceToFather(nodes[i], brLen);
560 if (!tree.hasNode(nodeId))
562 vector<int> sons = tree.getSonsId(nodeId);
563 vector<size_t> h(sons.size());
564 for (
size_t i = 0; i < sons.size(); i++)
566 h[i] = initBranchLengthsGrafen(tree, sons[i]);
568 size_t thish = sons.size() == 0 ? 0 : VectorTools::sum<size_t>(h) + sons.size() - 1;
569 for (
size_t i = 0; i < sons.size(); i++)
571 tree.setDistanceToFather(sons[i], (
double)(thish - h[i]));
578 initBranchLengthsGrafen(tree, tree.
getRootId());
589 double& heightRaised)
592 if (!tree.hasNode(nodeId))
594 vector<int> sons = tree.getSonsId(nodeId);
595 vector<double> hr(sons.size());
597 for (
size_t i = 0; i < sons.size(); i++)
600 if (tree.hasDistanceToFather(son))
603 computeBranchLengthsGrafen(tree, sons[i], power, total, h, hr[i]);
604 double d = h + tree.getDistanceToFather(son);
609 throw NodeException (
"TreeTools::computeBranchLengthsGrafen. Branch length lacking.", son);
611 heightRaised = pow(height / total, power) * total;
612 for (
size_t i = 0; i < sons.size(); i++)
614 tree.setDistanceToFather(sons[i], heightRaised - hr[i]);
621 int rootId = tree.getRootId();
624 initBranchLengthsGrafen(tree);
627 double totalHeight = getHeight(tree, rootId);
629 computeBranchLengthsGrafen(tree, rootId, power, totalHeight, h, hr);
638 vector<int> sons = tree.
getSonsId(nodeId);
639 vector<double> h(sons.size());
643 for (
size_t i = 0; i < sons.size(); i++)
648 h[i] = convertToClockTree(tree, son);
654 throw NodeException (
"TreeTools::convertToClockTree. Branch length lacking.", son);
657 l /= (
double)sons.size();
660 for (
size_t i = 0; i < sons.size(); i++)
673 vector<int> sons = tree.
getSonsId(nodeId);
674 vector<double> h(sons.size());
678 for (
size_t i = 0; i < sons.size(); i++)
683 h[i] = convertToClockTree2(tree, son);
689 throw NodeException (
"TreeTools::convertToClockTree2. Branch length lacking.", son);
692 l /= (
double)sons.size();
693 for (
size_t i = 0; i < sons.size(); i++)
695 scaleTree(tree, sons[i], h[i] > 0 ? l / h[i] : 0);
705 DistanceMatrix* mat =
new DistanceMatrix(names);
706 for (
size_t i = 0; i < names.size(); i++)
709 for (
size_t j = 0; j < i; j++)
711 (*mat)(i, j) = (*mat)(j, i) = getDistanceBetweenAnyTwoNodes(tree, tree.
getLeafId(names[i]), tree.
getLeafId(names[j]));
721 throw Exception(
"TreeTools::midpointRooting(Tree). This function is deprecated, use TreeTemplateTools::midRoot instead!");
724 DistanceMatrix* dist = getDistanceMatrix(tree);
725 vector<size_t> pos = MatrixTools::whichMax(dist->asMatrix());
726 double dmid = (*dist)(pos[0], pos[1]) / 2;
727 int id1 = tree.
getLeafId(dist->getName(pos[0]));
728 int id2 = tree.
getLeafId(dist->getName(pos[1]));
730 double d1 = getDistanceBetweenAnyTwoNodes(tree, id1, rootId);
731 double d2 = getDistanceBetweenAnyTwoNodes(tree, id2, rootId);
732 int current = d2 > d1 ? id2 : id1;
744 if (brother == current)
756 for (
size_t i = 0; i < sonsId.size(); i++)
758 int subMax = getMaxId(tree, sonsId[i]);
769 vector<int> ids = getNodesId(tree,
id);
770 sort(ids.begin(), ids.end());
772 for (
size_t i = 0; i < ids.size(); i++)
774 if (ids[i] != (
int)i)
778 return (
int)ids.size();
785 vector<int> ids = tree.getNodesId();
786 sort(ids.begin(), ids.end());
787 for (
size_t i = 1; i < ids.size(); i++)
789 if (ids[i] == ids[i - 1])
792 throw Exception(
"TreeTools::checkIds. This id is used at least twice: " + TextTools::toString(ids[i]));
803 vector<BipartitionList*> vecBipL;
804 for (
size_t i = 0; i < vecTr.size(); i++)
811 for (
size_t i = 0; i < vecTr.size(); i++)
823 vector<BipartitionList*> vecBipL;
824 for (
size_t i = 0; i < vecTr.size(); i++)
831 for (
size_t i = 0; i < vecTr.size(); i++)
845 vector<size_t> size1, size2;
867 for (
size_t i = 0; i < nbbip; i++)
871 if (size1[i] != size2[i])
876 for (
size_t i = 0; i < nbbip; i++)
878 for (jj = 0; jj < nbbip; jj++)
896 vector<size_t> size1, size2;
900 if (checkNames && !VectorTools::haveSameElements(tr1.getLeavesNames(), tr2.getLeavesNames()))
901 throw Exception(
"Distinct leaf sets between trees ");
926 bipOK2.push_back(
false);
950 *missing_in_tr1 = missing1;
952 *missing_in_tr2 = missing2;
953 return missing1 + missing2;
960 vector<BipartitionList*> vecBipL;
962 vector<size_t> bipSize;
966 for (
size_t i = 0; i < vecTr.size(); i++)
971 for (
size_t i = 0; i < vecTr.size(); i++)
979 for (
size_t i = 0; i < nbBip; i++)
982 bipScore.push_back(1);
986 for (
size_t i = nbBip; i > 0; i--)
988 if (bipScore[i - 1] == 0)
990 for (
size_t j = i - 1; j > 0; j--)
992 if (bipScore[j - 1] && bipSize[i - 1] == bipSize[j - 1] && mergedBipL->
areIdentical(i - 1, j - 1))
1001 for (
size_t i = nbBip; i > 0; i--)
1003 if (bipScore[i - 1] == 0)
1005 bipScore.erase(bipScore.begin() +
static_cast<ptrdiff_t
>(i - 1));
1014 bipScore.push_back(vecTr.size());
1024 vector<size_t> bipScore;
1025 vector<string> tr0leaves;
1029 if (vecTr.size() == 0)
1030 throw Exception(
"TreeTools::thresholdConsensus. Empty vector passed");
1035 tr0leaves = vecTr[0]->getLeavesNames();
1036 for (
size_t i = 1; i < vecTr.size(); i++)
1038 if (!VectorTools::haveSameElements(vecTr[i]->getLeavesNames(), tr0leaves))
1039 throw Exception(
"TreeTools::thresholdConsensus. Distinct leaf sets between trees");
1043 bipL = bipartitionOccurrences(vecTr, bipScore);
1049 score =
static_cast<int>(bipScore[i - 1]) / static_cast<double>(vecTr.size());
1050 if (score <= threshold && score != 1.)
1076 return thresholdConsensus(vecTr, 0., checkNames);
1083 return thresholdConsensus(vecTr, 0.5, checkNames);
1090 return thresholdConsensus(vecTr, 1., checkNames);
1101 const DNA* alphabet =
dynamic_cast<const DNA*
>(sites->getAlphabet());
1103 ConstantDistribution* constRate =
new ConstantDistribution(1.);
1105 BioNJ bionjTreeBuilder(
false,
false);
1108 if (ApplicationTools::message)
1109 ApplicationTools::message->endLine();
1128 vector<size_t> occurences;
1141 bootstrapValues[i] = format >= 0 ? round(static_cast<double>(occurences[j]) * pow(10., 2 + format) / static_cast<double>(vecTr.size())) / pow(10., format) :
static_cast<double>(occurences[j]);
1147 for (
size_t i = 0; i < index.size(); i++)
1149 if (!tree.
isLeaf(index[i]))
1161 int currentId = nodeId;
1162 while (tree.hasFather(currentId))
1164 currentId = tree.getFatherId(currentId);
1165 ids.push_back(currentId);
1174 if (nodeIds.size() == 0)
1175 throw Exception(
"TreeTools::getLastCommonAncestor(). You must provide at least one node id.");
1176 vector< vector<int> > ancestors(nodeIds.size());
1177 for (
size_t i = 0; i < nodeIds.size(); i++)
1179 ancestors[i] = getAncestors(tree, nodeIds[i]);
1180 ancestors[i].insert(ancestors[i].begin(), nodeIds[i]);
1182 int lca = tree.getRootId();
1186 if (ancestors[0].size() <= count)
1188 int current = ancestors[0][ancestors[0].size() - count - 1];
1189 for (
size_t i = 1; i < nodeIds.size(); i++)
1191 if (ancestors[i].size() <= count)
1193 if (ancestors[i][ancestors[i].size() - count - 1] != current)
1209 throw Exception(
"The tree has to be rooted on the branch of interest to determine the midpoint position of the root");
1212 throw Exception(
"The tree is multifurcated, which is not allowed.");
1219 double x = bestRootPosition_(tree, sonsIds.at(0), sonsIds.at(1), length);
1234 m1 = statFromNode_(tree, nodeId1);
1235 m2 = statFromNode_(tree, nodeId2);
1236 A = 4 * m1.
N * (m2.
N * length) * length;
1237 B = 4 * length * (m2.
N * m1.
sum - m1.
N * m2.
sum - length * m1.
N * m2.
N);
1271 vector<int> sonsId = tree.
getSonsId(rootId);
1272 for (
size_t i = 0; i < sonsId.size(); i++)
1274 mtmp = statFromNode_(tree, sonsId.at(i));
1277 m.
sum += mtmp.
sum + bLength * mtmp.
N;
1293 const DNA* alphabet =
dynamic_cast<const DNA*
>(sites->getAlphabet());
1295 ConstantDistribution* constRate =
new ConstantDistribution(1.);
1297 BioNJ bionjTreeBuilder(
false,
false);
1300 if (ApplicationTools::message)
1301 ApplicationTools::message->endLine();
size_t getNumberOfBipartitions() const
void computeTree()
Compute the tree corresponding to the distance matrix.
bool areIdentical(size_t k1, size_t k2) const
DistanceMatrix * getMatrix() const
Get the distance matrix.
void sortByPartitionSize()
Sort bipartitions by partition size.
virtual int getRootId() const =0
bool areCompatible(size_t k1, size_t k2) const
Tells whether 2 bipartitions from the list are compatible.
virtual TreeTemplate< Node > * getTree() const
Get the computed tree, if there is one.
Double recursive implementation of interface TreeParsimonyScore.
virtual void newOutGroup(int nodeId)=0
Root a tree by specifying an outgroup.
The phylogenetic tree class.
Interface for phylogenetic tree objects.
TreeTemplate< Node > * toTree() const
Translate into a tree.
void removeRedundantBipartitions()
virtual void setBranchProperty(int nodeId, const std::string &name, const Clonable &property)=0
virtual int getFatherId(int parentId) const =0
void addTrivialBipartitions(bool checkExisting)
Adds bipartitions corresponding to external branches if missing.
virtual bool hasNode(int nodeId) const =0
virtual bool isLeaf(int nodeId) const =0
virtual bool hasBranchProperty(int nodeId, const std::string &name) const =0
void removeTrivialBipartitions()
Removes bipartitions corresponding to external branches (1 vs n-1)
The BioNJ distance method.
Exception thrown when something is wrong with a particular node.
size_t getNumberOfElements() const
virtual std::vector< std::string > getLeavesNames() const =0
virtual void setDistanceToFather(int nodeId, double length)=0
This class deals with the bipartitions defined by trees.
void setDistanceMatrix(const DistanceMatrix &matrix)
Set the distance matrix to use.
virtual double getDistanceToFather(int nodeId) const =0
void deleteBipartition(size_t i)
virtual bool unroot()=0
Unroot a rooted tree.
virtual bool isMultifurcating() const =0
Tell if the tree is multifurcating.
virtual int getLeafId(const std::string &name) const =0
virtual bool hasDistanceToFather(int nodeId) const =0
The Jukes-Cantor substitution model for nucleotides.
Estimate a distance matrix from sequence data, according to a given model.
virtual Clonable * getBranchProperty(int nodeId, const std::string &name)=0
virtual std::vector< int > getSonsId(int parentId) const =0
size_t getPartitionSize(size_t i) const
Returns the size of the smallest of the two partitions (e.g. 1 for external branches) ...
General exception thrown when something is wrong with a particular node.
virtual bool isRooted() const =0
Tell if the tree is rooted.
virtual std::string getNodeName(int nodeId) const =0