45 #include <Bpp/Numeric/Number.h> 46 #include <Bpp/BppString.h> 47 #include <Bpp/Text/StringTokenizer.h> 48 #include <Bpp/Text/NestedStringTokenizer.h> 49 #include <Bpp/Text/TextTools.h> 50 #include <Bpp/Numeric/Random/RandomTools.h> 72 b = b || isMultifurcating(*node.
getSon(i));
82 unsigned int nbLeaves = 0;
89 nbLeaves += getNumberOfLeaves(*node[i]);
98 unsigned int nbNodes = 1;
101 nbNodes += getNumberOfNodes(*node[i]);
110 vector<string> names;
113 names.push_back(node.
getName());
117 vector<string> subNames = getLeavesNames(*node.
getSon(i));
118 for (
size_t j = 0; j < subNames.size(); j++)
120 names.push_back(subNames[j]);
133 unsigned int c = getDepth(*node[i]) + 1;
147 unsigned int c = getDepths(*node[i], depths) + 1;
162 const Node* son = node[i];
164 double c = getHeight(*son) + dist;
178 const Node* son = node[i];
180 double c = getHeights(*son, heights) + dist;
198 bool hasColon =
false;
199 for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] !=
')'; colonIndex--)
201 if (elt[colonIndex] ==
':')
213 elt2 = elt.substr(0, colonIndex);
214 element.
length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1));
222 string::size_type lastP = elt2.rfind(
')');
223 string::size_type firstP = elt2.find(
'(');
224 if (firstP == string::npos)
234 throw IOException(
"TreeTemplateTools::getElement(). Invalid format: bad closing parenthesis in " + elt2);
235 element.
content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1));
236 string bootstrap = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1));
238 if (!TextTools::isEmpty(bootstrap))
246 throw IOException(
"Bad tree description: " + elt);
257 Element elt = getElement(description);
261 if (!TextTools::isEmpty(elt.
length))
286 NestedStringTokenizer nt(elt.
content,
"(",
")",
",");
287 vector<string> elements;
288 while (nt.hasMoreToken())
290 elements.push_back(nt.nextToken());
296 string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
299 StringTokenizer st(name,
"_",
true,
true);
300 ostringstream realName;
301 for (
size_t i = 0; i < st.numberOfRemainingTokens() - 1; ++i)
307 realName << st.getToken(i);
310 node->
setId(TextTools::toInt(st.getToken(st.numberOfRemainingTokens() - 1)));
320 for (
size_t i = 0; i < elements.size(); i++)
323 Node* son = parenthesisToNode(elements[i], bootstrap, propertyName, withId);
334 string::size_type semi = description.rfind(
';');
335 if (semi == string::npos)
336 throw Exception(
"TreeTemplateTools::parenthesisToTree(). Bad format: no semi-colon found.");
337 string content = description.substr(0, semi);
338 Node* node = parenthesisToNode(content, bootstrap, propertyName, withId);
360 s << nodeToParenthesis(*node[0], writeId);
363 s <<
"," << nodeToParenthesis(*node[i], writeId);
395 s << nodeToParenthesis(*node[0], bootstrap, propertyName);
398 s <<
"," << nodeToParenthesis(*node[i], bootstrap, propertyName);
411 const BppString* ppt =
dynamic_cast<const BppString*
>(node.
getBranchProperty(propertyName));
415 throw Exception(
"TreeTemplateTools::nodeToParenthesis. Property should be a BppString.");
436 s <<
"," << nodeToParenthesis(*node->
getSon(i), writeId);
441 s << nodeToParenthesis(*node->
getSon(0), writeId);
444 s <<
"," << nodeToParenthesis(*node->
getSon(i), writeId);
466 s <<
"," << nodeToParenthesis(*node->
getSon(i), bootstrap, propertyName);
471 s << nodeToParenthesis(*node->
getSon(0), bootstrap, propertyName);
474 s <<
"," << nodeToParenthesis(*node->
getSon(i), bootstrap, propertyName);
487 const BppString* ppt =
dynamic_cast<const BppString*
>(node->
getBranchProperty(propertyName));
491 throw Exception(
"TreeTemplateTools::nodeToParenthesis. Property should be a BppString.");
503 brLen[0] = node.getDistanceToFather();
504 for (
size_t i = 0; i < node.getNumberOfSons(); i++)
506 Vdouble sonBrLen = getBranchLengths(*node.getSon(i));
507 for (
size_t j = 0; j < sonBrLen.size(); j++)
509 brLen.push_back(sonBrLen[j]);
519 if (includeAncestor && !node.hasDistanceToFather())
520 throw NodePException(
"TreeTools::getTotalLength(). No branch length.", &node);
521 double length = includeAncestor ? node.getDistanceToFather() : 0;
522 for (
size_t i = 0; i < node.getNumberOfSons(); i++)
524 length += getTotalLength(*node.getSon(i),
true);
536 setBranchLengths(*node.
getSon(i), brLen);
547 deleteBranchLengths(*node.
getSon(i));
559 setVoidBranchLengths(*node.
getSon(i), brLen);
567 if (node.hasFather())
569 node.setDistanceToFather(node.getDistanceToFather() * factor);
571 for (
size_t i = 0; i < node.getNumberOfSons(); i++)
573 scaleTree(*node.getSon(i), factor);
581 if (leavesNames.size() == 0)
586 vector<Node*> nodes(leavesNames.size());
588 for (
size_t i = 0; i < leavesNames.size(); ++i)
590 nodes[i] =
new Node(leavesNames[i]);
593 while (nodes.size() > (rooted ? 2 : 3))
596 size_t pos1 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nodes.size());
597 Node* node1 = nodes[pos1];
598 nodes.erase(nodes.begin() +
static_cast<ptrdiff_t
>(pos1));
599 size_t pos2 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nodes.size());
600 Node* node2 = nodes[pos2];
601 nodes.erase(nodes.begin() +
static_cast<ptrdiff_t
>(pos2));
606 nodes.push_back(parent);
610 for (
size_t i = 0; i < nodes.size(); ++i)
624 vector<Node*> pathMatrix1;
625 vector<Node*> pathMatrix2;
627 Node* nodeUp = &node1;
630 pathMatrix1.push_back(nodeUp);
633 pathMatrix1.push_back(nodeUp);
638 pathMatrix2.push_back(nodeUp);
641 pathMatrix2.push_back(nodeUp);
643 size_t pos1 = pathMatrix1.size() - 1;
644 size_t pos2 = pathMatrix2.size() - 1;
646 if (pathMatrix1[pos1] != pathMatrix2[pos2])
647 throw Exception(
"TreeTemplateTools::getPathBetweenAnyTwoNodes(). The two nodes do not have any ancestor in common / do not belong to the same tree.");
649 if (pos1 == 0 && pos2 == 0) {
651 path.push_back(pathMatrix1[0]);
652 }
else if (pos1 == 0) {
655 for (
size_t i = pathMatrix2.size(); i > 0; --i)
656 path.push_back(pathMatrix2[i-1]);
657 }
else if (pos2 == 0) {
659 path.insert(path.end(), pathMatrix1.begin(), pathMatrix1.end());
662 while (pathMatrix1[pos1] == pathMatrix2[pos2] && pos1 > 0 && pos2 > 0)
664 commonAnc = pathMatrix1[pos1];
668 path.insert(path.end(), pathMatrix1.begin(), pathMatrix1.begin() +
static_cast<ptrdiff_t
>(pos1 + 1));
669 if (includeAncestor && commonAnc)
670 path.push_back(commonAnc);
674 for (
size_t i = pos2 + 1; i > 0; --i)
675 path.push_back(pathMatrix2[i-1]);
684 vector<const Node*> path;
685 vector<const Node*> pathMatrix1;
686 vector<const Node*> pathMatrix2;
688 const Node* nodeUp = &node1;
691 pathMatrix1.push_back(nodeUp);
694 pathMatrix1.push_back(nodeUp);
699 pathMatrix2.push_back(nodeUp);
702 pathMatrix2.push_back(nodeUp);
704 size_t pos1 = pathMatrix1.size() - 1;
705 size_t pos2 = pathMatrix2.size() - 1;
707 if (pathMatrix1[pos1] != pathMatrix2[pos2])
708 throw Exception(
"TreeTemplateTools::getPathBetweenAnyTwoNodes(). The two nodes do not have any ancestor in common / do not belong to the same tree.");
710 if (pos1 == 0 && pos2 == 0) {
712 path.push_back(pathMatrix1[0]);
713 }
else if (pos1 == 0) {
716 for (
size_t i = pathMatrix2.size(); i > 0; --i)
717 path.push_back(pathMatrix2[i-1]);
718 }
else if (pos2 == 0) {
720 path.insert(path.end(), pathMatrix1.begin(), pathMatrix1.end());
722 const Node* commonAnc = 0;
723 while (pathMatrix1[pos1] == pathMatrix2[pos2] && pos1 > 0 && pos2 > 0)
725 commonAnc = pathMatrix1[pos1];
729 path.insert(path.end(), pathMatrix1.begin(), pathMatrix1.begin() +
static_cast<ptrdiff_t
>(pos1 + 1));
730 if (includeAncestor && commonAnc)
731 path.push_back(commonAnc);
735 for (
size_t i = pos2 + 1; i > 0; --i)
736 path.push_back(pathMatrix2[i-1]);
745 vector<const Node*> path = getPathBetweenAnyTwoNodes(node1, node2,
false);
747 for (
size_t i = 0; i < path.size(); ++i)
749 d += path[i]->getDistanceToFather();
758 distsToNodeFather.clear();
769 map<const Node*, vector< pair<string, double> > > leavesDists;
773 processDistsInSubtree_(son, matrix, leavesDists[son]);
778 for (
size_t son1_loc = 0; son1_loc < node->
getNumberOfSons(); ++son1_loc)
780 for (
size_t son2_loc = 0; son2_loc < son1_loc; ++son2_loc)
785 for (vector< pair<string, double> >::iterator son1_leaf = leavesDists[son1].begin();
786 son1_leaf != leavesDists[son1].end();
789 for (vector< pair<string, double> >::iterator son2_leaf = leavesDists[son2].begin();
790 son2_leaf != leavesDists[son2].end();
793 matrix(son1_leaf->first, son2_leaf->first) =
794 matrix(son2_leaf->first, son1_leaf->first) =
795 ( son1_leaf->second + son2_leaf->second );
807 string root_name = node->
getName();
808 for (vector< pair<string, double> >::iterator other_leaf = leavesDists[node->
getSon(0)].begin();
809 other_leaf != leavesDists[node->
getSon(0)].end();
812 matrix(root_name, other_leaf->first) = matrix( other_leaf->first, root_name) = other_leaf->second;
820 distsToNodeFather.clear();
822 for (map<
const Node*, vector<pair<string, double> > >::iterator son = leavesDists.begin(); son != leavesDists.end(); ++son)
824 for (vector< pair<string, double> >::iterator leaf = (son->second).begin(); leaf != (son->second).end(); ++leaf)
826 distsToNodeFather.push_back(make_pair(leaf->first, (leaf->second + nodeToFather)));
833 DistanceMatrix* matrix =
new DistanceMatrix(tree.
getLeavesNames());
834 vector< pair<string, double> > distsToRoot;
835 processDistsInSubtree_(tree.
getRootNode(), *matrix, distsToRoot);
844 vector<const Node*> neighbors2;
845 for (
size_t k = 0; k < neighbors.size(); k++)
847 const Node* n = neighbors[k];
848 if (n != node2 && n != node3)
849 neighbors2.push_back(n);
861 incrementAllIds(node->
getSon(i), increment);
872 getNodePropertyNames(*node.
getSon(i), propertyNames);
882 getNodeProperties(*node.
getSon(i), propertyName, properties);
892 getNodeProperties(*node.
getSon(i), propertyName, properties);
903 getBranchPropertyNames(*node.
getSon(i), propertyNames);
913 getBranchProperties(*node.
getSon(i), propertyName, properties);
923 getBranchProperties(*node.
getSon(i), propertyName, properties);
941 test &= haveSameOrderedTopology(*n1.
getSon(i), *n2.
getSon(i));
959 vector<size_t> nbSons;
960 vector<string> firstLeaves;
968 nbSons.push_back(otdsub.
size);
971 otd.
size = VectorTools::sum(nbSons);
976 for (
size_t i = 0; i < nbSons.size() - 1; ++i)
979 vector<size_t> index = VectorTools::whichMaxAll(nbSons);
980 if (index.size() == 1 || !orderLeaves)
988 for (
size_t j = 0; j < index.size(); ++j)
990 v.push_back(firstLeaves[index[j]]);
992 size_t mx = VectorTools::whichMax(v);
998 nbSons[pos] = nbSons[i];
1005 for (
size_t i = 0; i < nbSons.size() - 1; ++i)
1008 vector<size_t> index = VectorTools::whichMinAll(nbSons);
1009 if (index.size() == 1 || !orderLeaves)
1017 for (
size_t j = 0; j < index.size(); ++j)
1019 v.push_back(firstLeaves[index[j]]);
1021 size_t mx = VectorTools::whichMin(v);
1027 nbSons[pos] = nbSons[i];
1029 nbSons[i] = otd.
size + 1;
1042 if (criterion != MIDROOT_VARIANCE && criterion != MIDROOT_SUM_OF_SQUARES)
1043 throw Exception(
"TreeTemplateTools::midRoot(). Illegal criterion value '" + TextTools::toString(criterion) +
"'");
1055 pair<Node*, map<string, double> > best_root_branch;
1056 best_root_branch.first = ref_root;
1057 best_root_branch.second [
"position"] = -1;
1058 best_root_branch.second [
"score"] = numeric_limits<double>::max();
1061 getBestRootInSubtree_(tree, criterion, ref_root, best_root_branch);
1065 const double pos = best_root_branch.second[
"position"];
1066 if (pos < 1e-6 or pos > 1 - 1e-6)
1068 tree.
rootAt(pos < 1e-6 ? best_root_branch.first->getFather() : best_root_branch.first);
1075 double root_branch_length = best_root_branch.first->getDistanceToFather();
1076 Node* best_root_father = best_root_branch.first->
getFather();
1078 best_root_father->
removeSon(best_root_branch.first);
1079 best_root_father->
addSon(new_root);
1080 new_root->
addSon(best_root_branch.first);
1083 best_root_branch.first->setDistanceToFather(max((1 - pos) * root_branch_length, 1e-6));
1086 const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames();
1087 for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p)
1089 new_root->
setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p));
1095 if (forceBranchRoot)
1098 vector<Node*> root_sons = orig_root->
getSons();
1099 if (root_sons.size() > 2)
1101 Node* nearest = root_sons.at(0);
1102 for (vector<Node*>::iterator n = root_sons.begin(); n !=
1103 root_sons.end(); ++n)
1112 orig_root->
addSon(new_root);
1113 new_root->
addSon(nearest);
1117 for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p)
1146 unresolveUncertainNodes(*son, threshold, property);
1150 double value =
dynamic_cast<Number<double>*
>(son->
getBranchProperty(property))->getValue();
1151 if (value < threshold)
1159 subtree.
addSon(i, grandSon);
1174 const vector<Node*> sons = node->
getSons();
1178 for (vector<Node*>::const_iterator son = sons.begin(); son != sons.end(); ++son)
1181 Moments_ son_moment = getSubtreeMoments_(*son);
1185 Moments_ node_moment = getSubtreeMoments_(node);
1194 double min_criterion_value;
1195 double best_position;
1199 const double d = (**son).getDistanceToFather();
1203 double A = 0, B = 0, C = 0;
1204 if (criterion == MIDROOT_SUM_OF_SQUARES)
1206 A = (n1 + n2) * d * d;
1207 B = 2 * d * (m1.
sum - m2.
sum) - 2 * n2 * d * d;
1212 else if (criterion == MIDROOT_VARIANCE)
1214 A = 4 * n1 * n2 * d * d;
1215 B = 4 * d * ( n2 * m1.
sum - n1 * m2.
sum - d * n1 * n2);
1217 + 2 * n1 * d * m2.
sum - 2 * n2 * d * m1.
sum 1223 min_criterion_value = numeric_limits<double>::max();
1224 best_position = 0.5;
1228 min_criterion_value = C - B * B / (4 * A);
1229 best_position = -B / (2 * A);
1230 if (best_position < 0)
1233 min_criterion_value = C;
1235 else if (best_position > 1)
1238 min_criterion_value = A + B + C;
1243 if (min_criterion_value < bestRoot.second[
"score"])
1245 bestRoot.first = *son;
1246 bestRoot.second[
"position"] = best_position;
1247 bestRoot.second[
"score"] = min_criterion_value;
1268 for (
size_t i = 0; i < nsons; ++i)
virtual N * getRootNode()
virtual void addSon(size_t pos, Node *node)
virtual std::vector< std::string > getBranchPropertyNames() const
virtual bool hasNodeProperty(const std::string &name) const
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
virtual bool hasName() const
Tell is this node has a name.
virtual Clonable * getBranchProperty(const std::string &name)
virtual const Node * getSon(size_t pos) const
virtual bool hasDistanceToFather() const
Tell is this node has a distance to the father.
The phylogenetic tree class.
virtual const Node * getFather() const
Get the father of this node is there is one.
virtual void setName(const std::string &name)
Give a name or update the name associated to the node.
bool isRooted() const
Tell if the tree is rooted.
virtual bool hasFather() const
Tell if this node has a father node.
virtual bool isLeaf() const
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
virtual void setRootNode(N *root)
virtual int getId() const
Get the node's id.
std::vector< const Node * > getNeighbors() const
virtual Node * removeSon(size_t pos)
std::vector< std::string > getLeavesNames() const
bool unroot()
Unroot a rooted tree.
The phylogenetic node class.
virtual void setId(int id)
Set this node's id.
virtual Clonable * getNodeProperty(const std::string &name)
void rootAt(int nodeId)
Change the root node.
virtual void swap(size_t branch1, size_t branch2)
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
virtual size_t getNumberOfSons() const
void resetNodesId()
Number nodes.
General exception thrown when something is wrong with a particular node.
virtual std::vector< Node * > & getSons()
virtual void deleteDistanceToFather()
Delete the distance to the father node.
virtual std::vector< std::string > getNodePropertyNames() const
virtual bool hasBranchProperty(const std::string &name) const
virtual void setBranchProperty(const std::string &name, const Clonable &property)
Set/add a branch property.