bpp-phyl  2.2.0
Nhx.cpp
Go to the documentation of this file.
1 //
2 // File: Nhx.cpp
3 // Created by: Bastien Boussau
4 // Created on: Thu Oct 19 11:06:03 2010
5 //
6 
7 /*
8 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10 This software is a computer program whose purpose is to provide classes
11 for phylogenetic data analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #include "Nhx.h"
41 #include "../Tree.h"
42 #include "../TreeTemplate.h"
43 
44 //From bpp-core:
45 #include <Bpp/Text/TextTools.h>
46 #include <Bpp/BppString.h>
47 #include <Bpp/BppBoolean.h>
48 #include <Bpp/Numeric/Number.h>
49 
50 using namespace bpp;
51 
52 // From the STL:
53 #include <iostream>
54 #include <fstream>
55 
56 using namespace std;
57 
58 /******************************************************************************/
59 
60 Nhx::Nhx(bool useTagsAsPptNames):
61  supportedProperties_(),
62  useTagsAsPropertyNames_(useTagsAsPptNames),
63  hasIds_(false)
64 {
65  registerProperty(Property("Gene name", "GN", false, 0));
66  registerProperty(Property("Sequence accession", "AC", false, 0));
67  registerProperty(Property("Node ID", "ND", false, 0));
69  registerProperty(Property("Event", "Ev", true, 0));
70  registerProperty(Property("EC number", "E", false, 0));
71  registerProperty(Property("Function", "Fu", false, 0));
72  registerProperty(Property("Domain structure", "DS", false, 0));
73  registerProperty(Property("Species name", "S", false, 0));
74  registerProperty(Property("Taxonomy ID", "T", false, 1));
75  registerProperty(Property("Width of parent branch", "W", true, 1));
76  registerProperty(Property("Color of parent branch", "C", true, 0));
77  registerProperty(Property("Collapse", "C", false, 3));
78  registerProperty(Property("Custom", "XB", true, 0));
79  registerProperty(Property("Custom", "XN", false, 0));
80  registerProperty(Property("Orthologous", "O", false, 1));
81  registerProperty(Property("Subtree neighbors", "SN", false, 1));
82  registerProperty(Property("Super orthologous", "SO", false, 1));
83 }
84 
85 /******************************************************************************/
86 
87 const string Nhx::getFormatName() const { return "Nhx"; }
88 
89 /******************************************************************************/
90 
91 const string Nhx::getFormatDescription() const
92 {
93  return string("New Hampshire eXtended parenthesis format. ") +
94  "See http://www.phylosoft.org/NHX/ for more info.";
95 }
96 
97 /******************************************************************************/
98 
99 #if defined(NO_VIRTUAL_COV)
100  Tree*
101 #else
103 #endif
104 Nhx::read(istream& in) const throw (Exception)
105 {
106  // Checking the existence of specified file
107  if (! in) { throw IOException ("Nhx ::read: failed to read from stream"); }
108 
109  //We concatenate all line in file till we reach the ending semi colon:
110  string temp, description;// Initialization
111  // Main loop : for all file lines
112  while (! in.eof())
113  {
114  getline(in, temp, '\n'); // Copy current line in temporary string
115  string::size_type index = temp.find(";");
116  if (index != string::npos)
117  {
118  description += temp.substr(0, index + 1);
119  break;
120  }
121  else description += temp;
122  }
123  vector<string> beginnings, endings;
124  beginnings.push_back("[&&NHX:");
125  description = TextTools::removeSubstrings(description, '[', ']', beginnings, endings);
126  return parenthesisToTree(description);
127 }
128 
129 /******************************************************************************/
130 
131 void Nhx::write_(const Tree& tree, ostream& out) const throw (Exception)
132 {
133  // Checking the existence of specified file, and possibility to open it in write mode
134  if (! out) { throw IOException ("Nhx::writeTree: failed to write to stream"); }
135  out << treeToParenthesis(tree);
136 }
137 
138 /******************************************************************************/
139 
140 template<class N>
141 void Nhx::write_(const TreeTemplate<N>& tree, ostream& out) const throw (Exception)
142 {
143  // Checking the existence of specified file, and possibility to open it in write mode
144  if (! out) { throw IOException ("Nhx::writeTree: failed to write to stream"); }
145  out << treeToParenthesis(tree);
146 }
147 
148 /******************************************************************************/
149 
150 void Nhx::read(istream& in, vector<Tree*>& trees) const throw (Exception)
151 {
152  // Checking the existence of specified file
153  if (! in) { throw IOException ("Nhx::read: failed to read from stream"); }
154 
155  // Main loop : for all file lines
156  string temp, description;// Initialization
157  string::size_type index;
158  vector <string > beginnings, endings;
159  beginnings.push_back("[&&NHX:");
160  while (!in.eof())
161  {
162  //We concatenate all line in file till we reach the ending semi colon:
163  while (!in.eof())
164  {
165  getline(in, temp, '\n'); // Copy current line in temporary string
166  index = temp.find(";");
167  if (index != string::npos)
168  {
169  description += temp.substr(0, index + 1);
170  description = TextTools::removeSubstrings(description, '[', ']', beginnings, endings);
171  trees.push_back(parenthesisToTree(description));
172  description = temp.substr(index + 1);
173  }
174  else description += temp;
175  }
176  }
177 }
178 
179 /******************************************************************************/
180 
181 void Nhx::write_(const vector<Tree*>& trees, ostream& out) const throw (Exception)
182 {
183  // Checking the existence of specified file, and possibility to open it in write mode
184  if (! out) { throw IOException ("Nhx::write: failed to write to stream"); }
185  for(unsigned int i = 0; i < trees.size(); i++)
186  {
187  out << treeToParenthesis(*trees[i]);
188  }
189 }
190 
191 /******************************************************************************/
192 
193 template<class N>
194 void Nhx::write_(const vector<TreeTemplate<N>*>& trees, ostream& out) const throw (Exception)
195 {
196  // Checking the existence of specified file, and possibility to open it in write mode
197  if (! out) { throw IOException ("Nhx::write: failed to write to stream"); }
198  for(unsigned int i = 0; i < trees.size(); i++)
199  {
200  out << treeToParenthesis(*trees[i]);
201  }
202 }
203 
204 /******************************************************************************/
205 
206 Nhx::Element Nhx::getElement(const string& elt) const throw (IOException)
207 {
208  Element element;
209  element.length = ""; //default
210  element.annotation = ""; //default
211  element.isLeaf = false; // default
212 
213  //cout << "ELT=" << elt << endl;
214  size_t lastP = elt.rfind(")"), firstP;
215  size_t beginAnno = string::npos;
216  if (lastP == string::npos)
217  beginAnno = elt.rfind("[&&NHX:");
218  else
219  beginAnno = elt.find("[&&NHX:", lastP + 1);
220  string elementWithoutAnnotation;
221  if (beginAnno != string::npos) {
222  size_t endAnno = elt.find("]", beginAnno + 7);
223  element.annotation = elt.substr(beginAnno + 7, endAnno - beginAnno - 7);
224  elementWithoutAnnotation = elt.substr(0, beginAnno);
225  } else {
226  element.annotation = "";
227  elementWithoutAnnotation = elt;
228  }
229  //cout << "ANNO=" << element.annotation << endl;
230  //cout << "ELT =" << elementWithoutAnnotation << endl;
231 
232  size_t colonIndex;
233  bool hasColon = false;
234  for (colonIndex = elementWithoutAnnotation.size() - 1; colonIndex > 0 && elementWithoutAnnotation[colonIndex] != ')' && !hasColon; --colonIndex)
235  {
236  if (elementWithoutAnnotation[colonIndex] == ':')
237  {
238  hasColon = true;
239  }
240  }
241  try
242  {
243  string elt2;
244  if (hasColon)
245  {
246  //this is an element with length:
247  elt2 = elementWithoutAnnotation.substr(0, colonIndex + 1);
248  element.length = elementWithoutAnnotation.substr(colonIndex + 2);
249  }
250  else
251  {
252  //this is an element without length;
253  elt2 = elementWithoutAnnotation;
254  }
255 
256  lastP = elt2.rfind(')');
257  firstP = elt2.find('(');
258  if (firstP == string::npos)
259  {
260  //This is a leaf:
261  element.content = elt2;
262  element.isLeaf = true;
263  }
264  else
265  {
266  //This is a node:
267  if (lastP < firstP) throw IOException("Nhx::getElement(). Invalid format: bad closing parenthesis in " + elt2);
268  element.content = elt2.substr(firstP + 1, lastP - firstP - 1);
269  }
270  }
271  catch (exception& e)
272  {
273  throw IOException("Bad tree description: " + elt);
274  }
275  //cout << endl;
276  //cout << "CONTENT:" << endl << element.content << endl;
277  //cout << endl;
278  //cout << "ANNOTATION:" << endl << element.annotation << endl;
279  //cout << endl;
280 
281  return element;
282 }
283 
284 /******************************************************************************/
285 
286 
287 Node* Nhx::parenthesisToNode(const string& description) const
288 {
289  //cout << "NODE: " << description << endl;
290  Element elt = getElement(description);
291 
292  //New node:
293  Node* node = new Node();
294  if (!TextTools::isEmpty(elt.length))
295  {
296  node->setDistanceToFather(TextTools::toDouble(elt.length));
297  }
298  if (!TextTools::isEmpty(elt.annotation))
299  {
300  bool hasId = setNodeProperties(*node, elt.annotation);
301  if (!hasIds_ && hasId)
302  hasIds_ = true;
303  if (hasIds_ && !hasId)
304  throw Exception("Nhx::parenthesisToNode. At least one one is missing an id (ND tag).");
305  }
306 
307  NestedStringTokenizer nt(elt.content, "(", ")", ",");
308  vector<string> elements;
309  while (nt.hasMoreToken())
310  {
311  elements.push_back(nt.nextToken());
312  }
313  if (elt.isLeaf)
314  {
315  //This is a leaf:
316  string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
317  node->setName(name);
318  }
319  else
320  {
321  //This is a node:
322  for (size_t i = 0; i < elements.size(); ++i)
323  {
324  //cout << "NODE: SUBNODE: " << i << ", " << elements[i] << endl;
325  Node* son = parenthesisToNode(elements[i]);
326  node->addSon(son);
327  }
328  }
329  return node;
330 }
331 
332 /******************************************************************************/
333 
334 TreeTemplate<Node>* Nhx::parenthesisToTree(const string& description) const throw (Exception)
335 {
336  hasIds_ = false;
337  string::size_type semi = description.rfind(';');
338  if (semi == string::npos)
339  throw Exception("Nhx::parenthesisToTree(). Bad format: no semi-colon found.");
340  string content = description.substr(0, semi);
341  Node* node = parenthesisToNode(content);
343  tree->setRootNode(node);
344  if (!hasIds_)
345  {
346  tree->resetNodesId();
347  }
348  return tree;
349 }
350 
351 /******************************************************************************/
352 
353 string Nhx::propertyToString_(const Clonable* pptObject, short type) throw (Exception)
354 {
355  if (type == 0) {
356  const BppString* castedPptObject = dynamic_cast<const BppString*>(pptObject);
357  if (castedPptObject)
358  return castedPptObject->toSTL();
359  else
360  throw Exception("Nhx::propertyToString_. Unvalid property type, should be of class BppString.");
361  } else if (type == 1) {
362  const Number<int>* castedPptObject = dynamic_cast<const Number<int>*>(pptObject);
363  if (castedPptObject)
364  return TextTools::toString(castedPptObject->getValue());
365  else
366  throw Exception("Nhx::propertyToString_. Unvalid property type, should be of class Number<int>.");
367  } else if (type == 2) {
368  const Number<double>* castedPptObject = dynamic_cast<const Number<double>*>(pptObject);
369  if (castedPptObject)
370  return TextTools::toString(castedPptObject->getValue());
371  else
372  throw Exception("Nhx::propertyToString_. Unvalid property type, should be of class Number<double>.");
373  } else if (type == 3) {
374  const BppBoolean* castedPptObject = dynamic_cast<const BppBoolean*>(pptObject);
375  if (castedPptObject)
376  return TextTools::toString(castedPptObject->getValue());
377  else
378  throw Exception("Nhx::propertyToString_. Unvalid property type, should be of class BppBoolean.");
379  } else {
380  throw Exception("Nhx::propertyToString_. Unsupported type: " + TextTools::toString(type));
381  }
382 }
383 
384 /******************************************************************************/
385 
386 Clonable* Nhx::stringToProperty_(const string& pptDesc, short type) throw (Exception)
387 {
388  if (type == 0) {
389  return new BppString(pptDesc);
390  } else if (type == 1) {
391  return new Number<int>(TextTools::toInt(pptDesc));
392  } else if (type == 2) {
393  return new Number<double>(TextTools::toDouble(pptDesc));
394  } else if (type == 3) {
395  return new BppBoolean(TextTools::to<bool>(pptDesc));
396  } else {
397  throw Exception("Nhx::stringToProperty_. Unsupported type: " + TextTools::toString(type));
398  }
399 }
400 
401 
402 /******************************************************************************/
403 
404 string Nhx::propertiesToParenthesis(const Node& node) const
405 {
406  ostringstream s;
407  s << "[&&NHX";
408  for (set<Property>::iterator it = supportedProperties_.begin(); it != supportedProperties_.end(); ++it) {
409  string ppt = (useTagsAsPropertyNames_ ? it->tag : it->name);
410  if (it->onBranch) {
411  if (node.hasBranchProperty(ppt)) {
412  const Clonable* pptObject = node.getBranchProperty(ppt);
413  s << ":" << it->tag << "=" << propertyToString_(pptObject, it->type);
414  }
415  } else {
416  if (node.hasNodeProperty(ppt)) {
417  const Clonable* pptObject = node.getNodeProperty(ppt);
418  s << ":" << it->tag << "=" << propertyToString_(pptObject, it->type);
419  }
420  }
421  }
422  //If no special node id is provided, we output the one from the tree:
423  if (!node.hasNodeProperty(useTagsAsPropertyNames_ ? "ND" : "Node ID"))
424  {
425  s << ":ND="<<TextTools::toString(node.getId());
426  }
427  s << "]";
428  return s.str();
429 }
430 
431 /******************************************************************************/
432 
433 string Nhx::nodeToParenthesis(const Node& node) const
434 {
435  ostringstream s;
436  if (node.isLeaf())
437  {
438  s << node.getName();
439  }
440  else
441  {
442  s << "(";
443  s << nodeToParenthesis(* node[0]);
444  for (int i = 1; i < static_cast<int>(node.getNumberOfSons()); i++)
445  {
446  s << "," << nodeToParenthesis(*node[i]);
447  }
448  s << ")";
449  }
450  if (node.hasDistanceToFather()) s << ":" << node.getDistanceToFather();
451  s << propertiesToParenthesis(node);
452  return s.str();
453 }
454 
455 /******************************************************************************/
456 
458 {
459  ostringstream s;
460  s << "(";
461 
462  const Node* node = tree.getRootNode();
463 
464  if (node->isLeaf())
465  {
466  s << node->getName();
467  for (size_t i = 0; i < node->getNumberOfSons(); ++i)
468  {
469  s << "," << nodeToParenthesis(*node->getSon(i));
470  }
471  }
472  else
473  {
474  s << nodeToParenthesis(* node->getSon(0));
475  for (size_t i = 1; i < node->getNumberOfSons(); ++i)
476  {
477  s << "," << nodeToParenthesis(*node->getSon(i));
478  }
479  }
480 
481  s << ")" ;
482  if (node->hasDistanceToFather()) s << ":" << node->getDistanceToFather();
483  s << propertiesToParenthesis(*node);
484  s << ";" << endl;
485  return s.str();
486 }
487 
488 /******************************************************************************/
489 
490 bool Nhx::setNodeProperties(Node& node, const string properties) const
491 {
492  string propsDesc = TextTools::removeChar(properties, ']');
493  StringTokenizer st(propsDesc, ":", true, true);
494  map<string, string> props;
495  while (st.hasMoreToken())
496  {
497  string token = st.nextToken();
498  if (TextTools::hasSubstring(token, "=")) {
499  StringTokenizer pt(token, "=", true, true);
500  string tag = pt.nextToken();
501  string value = pt.nextToken();
502  props[tag] = value;
503  }
504  }
505 
506  for (set<Property>::iterator it = supportedProperties_.begin(); it != supportedProperties_.end(); ++it) {
507  if (props.find(it->tag) != props.end()) {
508  //Property found
509  string ppt = (useTagsAsPropertyNames_ ? it->tag : it->name);
510  if (it->onBranch) {
511  node.setBranchProperty(ppt, *auto_ptr<Clonable>(stringToProperty_(props[it->tag], it->type)));
512  } else {
513  node.setNodeProperty(ppt, *auto_ptr<Clonable>(stringToProperty_(props[it->tag], it->type)));
514  }
515  }
516  }
517 
518  //If the ND tag is present and is decimal, we use it has the node id:
519  bool hasId = false;
520  if (props.find("ND") != props.end()) {
521  string prop = props["ND"];
522  if (TextTools::isDecimalNumber(prop))
523  {
524  node.setId(TextTools::toInt(prop));
525  hasId = true;
526  }
527  }
528  return hasId;
529 }
530 
531 /******************************************************************************/
532 
533 void Nhx::changeTagsToNames(Node& node) const {
534  for (set<Property>::iterator it = supportedProperties_.begin(); it != supportedProperties_.end(); ++it) {
535  if (it->onBranch) {
536  if (node.hasBranchProperty(it->tag)) {
537  node.setBranchProperty(it->name, *node.getBranchProperty(it->tag));
538  node.deleteBranchProperty(it->tag);
539  }
540  } else {
541  if (node.hasNodeProperty(it->tag)) {
542  node.setNodeProperty(it->name, *node.getNodeProperty(it->tag));
543  node.deleteNodeProperty(it->tag);
544  }
545  }
546  }
547  for (unsigned int i = 0; i < node.getNumberOfSons(); ++i)
548  changeTagsToNames(*node.getSon(i));
549 }
550 
551 /******************************************************************************/
552 
553 void Nhx::changeNamesToTags(Node& node) const {
554  for (set<Property>::iterator it = supportedProperties_.begin(); it != supportedProperties_.end(); ++it) {
555  if (it->onBranch) {
556  if (node.hasBranchProperty(it->name)) {
557  node.setBranchProperty(it->tag, *node.getBranchProperty(it->name));
558  node.deleteBranchProperty(it->name);
559  }
560  } else {
561  if (node.hasNodeProperty(it->name)) {
562  node.setNodeProperty(it->tag, *node.getNodeProperty(it->name));
563  node.deleteNodeProperty(it->name);
564  }
565  }
566  }
567  for (unsigned int i = 0; i < node.getNumberOfSons(); ++i)
568  changeNamesToTags(*node.getSon(i));
569 }
570 
571 /******************************************************************************/
572 
virtual N * getRootNode()
Definition: TreeTemplate.h:403
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:407
void changeNamesToTags(Node &node) const
Convert property names from names to tags.
Definition: Nhx.cpp:553
Element getElement(const std::string &elt) const
Definition: Nhx.cpp:206
virtual bool hasNodeProperty(const std::string &name) const
Definition: Node.h:588
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:236
bool hasIds_
Definition: Nhx.h:140
virtual void deleteNodeProperty(const std::string &name)
Definition: Node.h:555
virtual Clonable * getBranchProperty(const std::string &name)
Definition: Node.h:617
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
STL namespace.
TreeTemplate< Node > * parenthesisToTree(const std::string &description) const
Definition: Nhx.cpp:334
virtual bool hasDistanceToFather() const
Tell is this node has a distance to the father.
Definition: Node.h:321
The phylogenetic tree class.
Interface for phylogenetic tree objects.
Definition: Tree.h:148
void registerProperty(const Property &property)
Definition: Nhx.h:225
std::string nodeToParenthesis(const Node &node) const
Definition: Nhx.cpp:433
virtual void deleteBranchProperty(const std::string &name)
Definition: Node.h:645
virtual void setName(const std::string &name)
Give a name or update the name associated to the node.
Definition: Node.h:247
bool useTagsAsPropertyNames_
Definition: Nhx.h:139
virtual bool isLeaf() const
Definition: Node.h:692
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
const std::string getFormatDescription() const
Definition: Nhx.cpp:91
std::set< Property > supportedProperties_
Definition: Nhx.h:138
std::string content
Definition: Nhx.h:97
virtual void setRootNode(N *root)
Definition: TreeTemplate.h:401
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
const std::string getFormatName() const
Definition: Nhx.cpp:87
std::string length
Definition: Nhx.h:98
Node * parenthesisToNode(const std::string &description) const
Definition: Nhx.cpp:287
std::string propertiesToParenthesis(const Node &node) const
Definition: Nhx.cpp:404
bool isLeaf
Definition: Nhx.h:100
void changeTagsToNames(Node &node) const
Convert property names from tag to names.
Definition: Nhx.cpp:533
std::string treeToParenthesis(const TreeTemplate< Node > &tree) const
Definition: Nhx.cpp:457
The phylogenetic node class.
Definition: Node.h:90
virtual void setId(int id)
Set this node&#39;s id.
Definition: Node.h:210
virtual Clonable * getNodeProperty(const std::string &name)
Definition: Node.h:527
TreeTemplate< Node > * read(const std::string &path) const
Read a tree from a file.
Definition: Nhx.h:170
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:299
Nhx(bool useTagsAsPptNames=true)
Build a new Nhx reader/writer.
Definition: Nhx.cpp:60
void write_(const Tree &tree, std::ostream &out) const
Definition: Nhx.cpp:131
bool setNodeProperties(Node &node, const std::string properties) const
Definition: Nhx.cpp:490
static const std::string BOOTSTRAP
Bootstrap tag.
Definition: TreeTools.h:723
virtual size_t getNumberOfSons() const
Definition: Node.h:388
void resetNodesId()
Number nodes.
Definition: TreeTemplate.h:284
std::string annotation
Definition: Nhx.h:99
virtual bool hasBranchProperty(const std::string &name) const
Definition: Node.h:678
virtual void setBranchProperty(const std::string &name, const Clonable &property)
Set/add a branch property.
Definition: Node.h:610
static Clonable * stringToProperty_(const std::string &pptDesc, short type)
Definition: Nhx.cpp:386
virtual void setNodeProperty(const std::string &name, const Clonable &property)
Set/add a node property.
Definition: Node.h:520
static std::string propertyToString_(const Clonable *pptObject, short type)
Definition: Nhx.cpp:353