bpp-phyl  2.2.0
IoPairedSiteLikelihoods.cpp
Go to the documentation of this file.
1 //
2 // File: PairedSiteLikelihoods.cpp
3 // Created by: Nicolas Rochette
4 // Created on: January 10, 2011
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 // From the STL
41 #include <vector>
42 #include <numeric>
43 
44 // From Bio++
45 #include <Bpp/Text/TextTools.h>
46 #include "../Likelihood/PairedSiteLikelihoods.h"
47 
49 
50 using namespace std;
51 using namespace bpp;
52 
53 /*
54  * Read from a stream in Tree-puzzle, phylip-like format
55  */
56 PairedSiteLikelihoods IOTreepuzzlePairedSiteLikelihoods::read(std::istream& is) throw (Exception)
57 {
58  // The first line contains the number of models and the number of sites
59  string line;
60  getline(is, line);
61  istringstream iss (line);
62  size_t nmodels, nsites;
63  iss >> nmodels;
64  iss >> nsites;
65 
66  // Then each line contains a model name and site loglikelihoods under this model
67  vector<string> names;
68  vector<vector<double> > loglikelihoods;
69  loglikelihoods.reserve(nmodels);
70 
71  // The exact format is determined upon the first line
72  // Name and likelihoods fields can be delimited by either a tab or two spaces
73  string delim;
74  streampos pos (is.tellg());
75  getline(is, line);
76  if (line.find("\t") != string::npos)
77  delim = "\t";
78  else if (line.find(" ") != string::npos)
79  delim = " ";
80  else
81  throw;
82  is.seekg(pos);
83 
84  // Read the data
85  while (getline(is, line))
86  {
87  size_t delim_pos (line.find(delim));
88  if (delim_pos == string::npos)
89  {
90  ostringstream msg;
91  msg << "IOTreepuzzlePairedSiteLikelihoods::read: Couldn't find delimiter. The beggining of the line was : "
92  << endl << line.substr(0, 100);
93  throw Exception(msg.str());
94  }
95 
96  // The name
97  names.push_back( TextTools::removeSurroundingWhiteSpaces(line.substr(0, delim_pos)) );
98 
99  // The sites
100  loglikelihoods.push_back(vector<double>());
101  loglikelihoods.back().reserve(nsites);
102 
103  istringstream liks_stream ( line.substr(delim_pos) );
104  double currllik;
105  while (liks_stream >> currllik)
106  loglikelihoods.back().push_back(currllik);
107 
108  // Check the number of sites
109  if (loglikelihoods.back().size() != nsites)
110  {
111  ostringstream oss;
112  oss << "IOTreepuzzlePairedSiteLikelihoods::read: Model '" << names.back()
113  << "' does not have the correct number of sites. ("
114  << loglikelihoods.back().size() << ", expected: " << nsites << ")";
115  throw Exception(oss.str());
116  }
117  }
118 
119  // Check the number of models
120  if (loglikelihoods.size() != nmodels)
121  throw Exception("IOTreepuzzlePairedSiteLikelihoods::read: Wrong number of models.");
122 
123  PairedSiteLikelihoods psl (loglikelihoods, names);
124 
125  return psl;
126 }
127 
128 /*
129  * Read from a file in Tree-puzzle, phylip-like format
130  */
131 PairedSiteLikelihoods IOTreepuzzlePairedSiteLikelihoods::read(const std::string& path) throw (Exception)
132 {
133  ifstream iF (path.c_str());
134  PairedSiteLikelihoods psl (IOTreepuzzlePairedSiteLikelihoods::read(iF));
135  return psl;
136 }
137 
138 /*
139  * Write to stream in Tree-puzzle, phylip-like format
140  */
141 void IOTreepuzzlePairedSiteLikelihoods::write(const bpp::PairedSiteLikelihoods& psl, ostream& os, const string& delim)
142 {
143  if (psl.getLikelihoods().size() == 0)
144  throw Exception("Writing an empty PairedSiteLikelihoods object to file.");
145 
146  // Header line
147  os << psl.getNumberOfModels() << " " << psl.getNumberOfSites() << endl;
148 
149  // Data lines
150 
151  if (delim == "\t")
152  {
153  // The delimiter is a tab
154  for (size_t i = 0; i < psl.getNumberOfModels(); ++i)
155  {
156  os << psl.getModelNames().at(i) << "\t";
157  for (vector<double>::const_iterator sitelik = psl.getLikelihoods().at(i).begin();
158  sitelik != psl.getLikelihoods().at(i).end();
159  ++sitelik)
160  {
161  if (sitelik == psl.getLikelihoods().at(i).end() - 1)
162  os << *sitelik;
163  else
164  os << *sitelik << " ";
165  }
166  os << endl;
167  }
168  }
169 
170  else if (delim == " ")
171  // The delimiter is two spaces
172  {
173  // First we must get the length of the names field
174  vector<size_t> name_sizes;
175  for (vector<string>::const_iterator name = psl.getModelNames().begin();
176  name != psl.getModelNames().end();
177  ++name)
178  {
179  name_sizes.push_back(name->size());
180  }
181  size_t names_field_size = *max_element(name_sizes.begin(), name_sizes.end()) + 2;
182 
183  // Data lines
184  for (size_t i = 0; i < psl.getNumberOfModels(); ++i)
185  {
186  // name field
187  string name (psl.getModelNames().at(i));
188  while (name.size() != names_field_size)
189  name.append(" ");
190  os << name;
191 
192  // site-likelihoods field
193  for (vector<double>::const_iterator sitelik = psl.getLikelihoods().at(i).begin();
194  sitelik != psl.getLikelihoods().at(i).end();
195  ++sitelik)
196  {
197  if (sitelik == psl.getLikelihoods().at(i).end() - 1)
198  os << *sitelik;
199  else
200  os << *sitelik << " ";
201  }
202  os << endl;
203  }
204  }
205 
206  else
207  {
208  ostringstream msg;
209  msg << "IOTreepuzzlePairedSiteLikelihoods::write: Unknown field delimiter "
210  << "\"" << delim << "\".";
211  os << msg.str() << endl;
212  throw Exception(msg.str());
213  }
214 }
215 
216 
217 /*
218  * Write to file in Tree-puzzle, phylip-like format
219  */
220 void IOTreepuzzlePairedSiteLikelihoods::write(const bpp::PairedSiteLikelihoods& psl, const std::string& path, const string& delim)
221 {
222  ofstream oF (path.c_str());
223  IOTreepuzzlePairedSiteLikelihoods::write(psl, oF, delim);
224 }
225 
226 
227 /*
228  * Read from a stream in Phyml format
229  */
230 vector<double> IOPhymlPairedSiteLikelihoods::read(std::istream& is) throw (Exception)
231 {
232  vector<double> loglikelihoods;
233  string str;
234 
235  // check the format, with first line
236  getline(is, str);
237  string expected ("Note : P(D|M) is the probability of site D given the model M (i.e., the site likelihood)");
238  if (str.find(expected) == string::npos) // \r\n handling
239  {
240  ostringstream msg;
241  msg << "IOPhymlPairedSiteLikelihoods::read: The first line was expected to be :"
242  << expected << endl
243  << "and was :" << endl
244  << str << endl;
245  throw Exception(msg.str());
246  }
247 
248  // skip header lines
249  for (int i = 0; i < 6; ++i)
250  {
251  getline(is, str);
252  }
253 
254  while (getline(is, str))
255  {
256  // the site likelihood is the second field on the line
257  istringstream ss (str);
258  ss >> str;
259  double lik;
260  ss >> lik;
261  loglikelihoods.push_back(log(lik));
262  }
263 
264  return loglikelihoods;
265 }
266 
267 /*
268  * Read from a file in Phyml format
269  */
270 vector<double> IOPhymlPairedSiteLikelihoods::read(const std::string& path) throw (Exception)
271 {
272  ifstream iF (path.c_str());
273  vector<double> loglikelihoods(IOPhymlPairedSiteLikelihoods::read(iF));
274  return loglikelihoods;
275 }
276 
const std::vector< std::string > & getModelNames() const
A container for paired-site likelihoods (likelihoods over the same sites for different models...
STL namespace.
std::size_t getNumberOfSites() const
const std::vector< std::vector< double > > & getLikelihoods() const
size_t getNumberOfModels() const
Get the number of models in the container.