bpp-seq  2.2.0
NexusIoSequence.cpp
Go to the documentation of this file.
1 //
2 // File: NexusIOSequence.cpp
3 // Created by: Julien Dutheil
4 // Created on: Wed May 27 16:15 2009
5 //
6 
7 /*
8 Copyright or © or Copr. CNRS, (November 17, 2004)
9 
10 This software is a computer program whose purpose is to provide classes
11 for sequences analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #include "NexusIoSequence.h"
41 #include "NexusTools.h"
42 #include "../Container/SiteContainerTools.h"
43 #include "../Alphabet/AlphabetTools.h"
44 #include <Bpp/Text/TextTools.h>
45 #include <Bpp/Text/KeyvalTools.h>
46 #include <Bpp/Io/FileTools.h>
47 
48 using namespace bpp;
49 
50 // From the STL:
51 #include <sstream>
52 
53 using namespace std;
54 
55 /******************************************************************************/
56 
57 const std::vector<std::string> NexusIOSequence::splitNameAndSequence_(const std::string& s) const throw (Exception)
58 {
59  vector<string> v(2);
60  string::size_type index = s.find(" ");
61  if(index == string::npos) throw Exception("NexusIOSequence::splitNameAndSequence_(). No sequence name found.");
62  v[0] = TextTools::removeSurroundingWhiteSpaces(s.substr(0, index));
63  v[1] = TextTools::removeFirstWhiteSpaces(s.substr(index + 1));
64  return v;
65 }
66 
67 
68 /******************************************************************************/
69 
70 void NexusIOSequence::appendAlignmentFromStream(std::istream& input, SiteContainer& vsc) const throw (Exception)
71 {
72  // Checking the existence of specified file
73  if (!input) { throw IOException ("NexusIOSequence::read(). Fail to open file"); }
74 
75  //Look for the DATA block:
76  string line = "";
77  while (TextTools::toUpper(line) != "BEGIN DATA;")
78  {
79  if (input.eof())
80  throw Exception("NexusIOSequence::appendFromStream(). No data block was found.");
81  line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(input));
82  }
83 
84  //Look for the DIMENSIONS command:
85  string cmdName = "", cmdArgs = "";
86  while (cmdName != "DIMENSIONS")
87  {
88  if (input.eof())
89  throw Exception("NexusIOSequence::appendFromStream(). No DIMENSIONS command was found.");
90  NexusTools::getNextCommand(input, cmdName, cmdArgs);
91  cmdName = TextTools::toUpper(cmdName);
92  }
93  map<string, string> args;
94  KeyvalTools::multipleKeyvals(cmdArgs, args, " ");
95  map<string, string> argsUp;
96  for (map<string, string>::iterator it = args.begin(); it != args.end(); it++)
97  argsUp[TextTools::toUpper(it->first)] = it->second;
98  if (argsUp["NTAX"] == "")
99  throw Exception("NexusIOSequence::appendFromStream(). DIMENSIONS command does not have a NTAX argument.");
100  unsigned int ntax = TextTools::to<unsigned int>(argsUp["NTAX"]);
101 
102  //Look for the FORMAT command:
103  while (cmdName != "FORMAT")
104  {
105  if (input.eof())
106  throw Exception("NexusIOSequence::appendFromStream(). No FORMAT command was found.");
107  NexusTools::getNextCommand(input, cmdName, cmdArgs);
108  cmdName = TextTools::toUpper(cmdName);
109  }
110  if (TextTools::hasSubstring(cmdArgs, "TRANSPOSE"))
111  throw Exception("NexusIOSequence::appendFromStream(). TRANSPOSE option is not supported.");
112 
113  //Check if the alignment is dotted or not:
114  bool matchChar = TextTools::hasSubstring(TextTools::toUpper(cmdArgs), "MATCHCHAR");
115 
116  SiteContainer* alignment = 0;
117  if (matchChar)
119  else
120  alignment = &vsc;
121 
122  //Look for the MATRIX command:
123  line = "";
124  while (!TextTools::startsWith(TextTools::toUpper(line), "MATRIX"))
125  {
126  if (input.eof())
127  throw Exception("NexusIOSequence::appendFromStream(). No MATRIX command was found.");
128  line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(input));
129  }
130  line = FileTools::getNextLine(input);
131 
132  vector<string> names, seqs;
133  // Read first block:
134  bool commandFinished = false;
135  for (unsigned int i = 0; i < ntax && !input.eof(); i++)
136  {
137  if (TextTools::endsWith(line, ";"))
138  {
139  if (i < ntax - 1)
140  throw IOException("NexusIOSequence::appendFromStream. Early end of MATRIX command, some sequences are missing.");
141  else
142  {
143  commandFinished = true;
144  line = line.substr(0, line.size() - 1); //Remove trailing semi-colon.
145  }
146  }
147  vector<string> v = splitNameAndSequence_(line);
148  names.push_back(v[0]);
149  seqs.push_back(v[1]);
150  line = FileTools::getNextLine(input);
151  }
152 
153  //Then read all other blocks:
154  commandFinished = TextTools::removeSurroundingWhiteSpaces(line) == ";"; //In case the end of command is on a separate line.
155  while (!commandFinished)
156  {
157  for (unsigned int i = 0; i < ntax && !input.eof(); i++)
158  {
159  if (TextTools::endsWith(line, ";"))
160  {
161  if (i < ntax - 1)
162  throw IOException("NexusIOSequence::appendFromStream. Early end of MATRIX command, some sequences are missing.");
163  else
164  {
165  commandFinished = true;
166  line = line.substr(0, line.size() - 1); //Remove trailing semi-colon.
167  }
168  }
169 
170  vector<string> v = splitNameAndSequence_(line);
171  if (v[0] != names[i])
172  throw IOException("NexusIOSequence::appendFromStream. Bad file, the sequences are not in the same order in interleaved blocks, or one taxon is missing.");
173  seqs[i] += v[1];
174  line = FileTools::getNextLine(input);
175  commandFinished = TextTools::removeSurroundingWhiteSpaces(line) == ";"; //In case the end of command is on a separate line.
176  }
177  }
178  for (unsigned int i = 0; i < names.size(); i++)
179  {
180  alignment->addSequence(BasicSequence(names[i], seqs[i], vsc.getAlphabet()), checkNames_);
181  }
182 
183  if (matchChar)
184  {
185  //Now we resolve the alignment:
186  SiteContainer* resolvedAlignment =
188  delete alignment;
189  for (unsigned int i = 0; i < resolvedAlignment->getNumberOfSequences(); i++)
190  {
191  vsc.addSequence(resolvedAlignment->getSequence(i), false);
192  }
193  delete resolvedAlignment;
194  }
195 }
196 
197 /******************************************************************************/
198 
199 const std::string NexusIOSequence::getFormatName() const { return "Nexus"; }
200 
201 /******************************************************************************/
202 
203 const std::string NexusIOSequence::getFormatDescription() const
204 {
205  return "Nexus file format.";
206 }
207 
208 /******************************************************************************/
209 
The SiteContainer interface.
Definition: SiteContainer.h:63
const std::string getFormatName() const
Aligned sequences container.
This alphabet is used to deal NumericAlphabet.
STL namespace.
const std::string getFormatDescription() const
static bool getNextCommand(std::istream &input, std::string &name, std::string &arguments, bool lineBrk=true)
parse the next command name within a block.
Definition: NexusTools.cpp:72
virtual const Sequence & getSequence(size_t sequenceIndex) const =0
Retrieve a sequence object from the container.
A basic implementation of the Sequence interface.
Definition: Sequence.h:207
void appendAlignmentFromStream(std::istream &input, SiteContainer &sc) const
Append sequences to a container from a stream.
virtual void addSequence(const Sequence &sequence, bool checkName)=0
Add a sequence to the container.
virtual const Alphabet * getAlphabet() const =0
Get sequence container&#39;s alphabet.
static const DefaultAlphabet DEFAULT_ALPHABET
Definition: AlphabetTools.h:65
const std::vector< std::string > splitNameAndSequence_(const std::string &s) const
static SiteContainer * resolveDottedAlignment(const SiteContainer &dottedAln, const Alphabet *resolvedAlphabet)
Resolve a container with "." notations.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.