bpp-core  2.2.0
TextTools.cpp
Go to the documentation of this file.
1 //
2 // File: TextTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Aug 8 12:57:50 2003
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide utilitary
11  classes. This file belongs to the Bio++ Project.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "TextTools.h"
41 
42 using namespace bpp;
43 
44 #include <ctype.h>
45 #include <sstream>
46 #include <iomanip>
47 
48 using namespace std;
49 
50 /******************************************************************************/
51 
52 bool TextTools::isEmpty(const std::string& s)
53 {
54  for (unsigned int i = 0; i < s.size(); i++)
55  {
56  char c = s[i];
57  if (c != ' ' && c != '\n' && c != '\t')
58  return false;
59  }
60  return true;
61 }
62 
63 /******************************************************************************/
64 
65 std::string TextTools::toUpper(const std::string& s)
66 {
67  string result = "";
68  for (size_t i = 0; i < s.size(); i++)
69  {
70  result += static_cast<char>(toupper(static_cast<int>(s[i])));
71  }
72  return result;
73 }
74 
75 /******************************************************************************/
76 
77 std::string TextTools::toLower(const std::string& s)
78 {
79  string result = "";
80  for (size_t i = 0; i < s.size(); i++)
81  {
82  result += static_cast<char>(tolower(static_cast<int>(s[i])));
83  }
84  return result;
85 }
86 
87 /******************************************************************************/
88 
90 {
91  return (c == ' ')
92  || (c == '\t')
93  || (c == '\n')
94  || (c == '\r')
95  || (c == '\f');
96 }
97 
98 /******************************************************************************/
99 
100 std::string TextTools::removeWhiteSpaces(const std::string& s)
101 {
102  // Copy sequence
103  string st (s);
104 
105  // For all sequence's characters
106  for (unsigned int i = 0; i < st.size(); i++)
107  {
108  if (isWhiteSpaceCharacter(st[i]))
109  {
110  st.erase(st.begin() + i); // Remove character
111  i--;
112  }
113  }
114 
115  // Send result
116  return st;
117 }
118 
119 /******************************************************************************/
120 
121 std::string TextTools::removeFirstWhiteSpaces(const std::string& s)
122 {
123  // Copy sequence
124  string st (s);
125 
126  while (st.size() > 0 && isWhiteSpaceCharacter(st[0]))
127  {
128  st.erase(st.begin());
129  }
130 
131  // Send result
132  return st;
133 }
134 
135 /******************************************************************************/
136 
137 std::string TextTools::removeLastWhiteSpaces(const std::string& s)
138 {
139  // Copy sequence
140  string st (s);
141 
142  while (st.size() > 0 && isWhiteSpaceCharacter(st[st.size() - 1]))
143  {
144  st.erase(st.end() - 1);
145  }
146 
147  // Send result
148  return st;
149 }
150 
151 /******************************************************************************/
152 
153 std::string TextTools::removeSurroundingWhiteSpaces(const std::string& s)
154 {
155  return removeFirstWhiteSpaces(removeLastWhiteSpaces(s));
156 }
157 
158 /******************************************************************************/
159 
161 {
162  return (c == '\n')
163  || (c == '\r');
164 }
165 
166 /******************************************************************************/
167 
168 std::string TextTools::removeNewLines(const std::string& s)
169 {
170  // Copy string
171  string st (s);
172 
173  // For all string's characters
174  for (unsigned int i = 0; i < st.size(); i++)
175  {
176  if (isNewLineCharacter(st[i]))
177  {
178  st.erase(st.begin() + i); // Remove character
179  i--;
180  }
181  }
182 
183  // Send result
184  return st;
185 }
186 
187 /******************************************************************************/
188 
189 std::string TextTools::removeLastNewLines(const std::string& s)
190 {
191  // Copy string
192  string st (s);
193 
194  while (st.size() > 0 && isNewLineCharacter(st[st.size() - 1]))
195  {
196  st.erase(st.end() - 1);
197  }
198 
199  // Send result
200  return st;
201 }
202 
203 /******************************************************************************/
204 
206 {
207  if (c == '0' || c == '1' || c == '2' || c == '3' || c == '4'
208  || c == '5' || c == '6' || c == '7' || c == '8' || c == '9')
209  return true;
210  else
211  return false;
212 }
213 
214 /******************************************************************************/
215 
216 bool TextTools::isDecimalNumber(const std::string& s, char dec, char scientificNotation)
217 {
218  if (isEmpty(s))
219  return false;
220 
221  size_t sepCount = 0;
222  size_t sciCount = 0;
223  size_t i = 0;
224  if (s[0] == '-') i = 1;
225  for (; i < s.size(); ++i)
226  {
227  char c = s[i];
228  if (c == dec)
229  sepCount++;
230  else if (c == scientificNotation) {
231  sciCount++;
232  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
233  c = s[i + 1];
234  if (c == '-' || c == '+') i++;
235  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
236  if (sepCount == 0) sepCount = 1; //We do not want any dec in the exponent.
237  } else if (!isDecimalNumber(c))
238  return false;
239  if (sepCount > 1 || sciCount > 1)
240  return false;
241  }
242  return true;
243 }
244 
245 /******************************************************************************/
246 
247 bool TextTools::isDecimalInteger(const std::string& s, char scientificNotation)
248 {
249  if (isEmpty(s))
250  return false;
251 
252  size_t sciCount = 0;
253  size_t i = 0;
254  if (s[0] == '-') i = 1;
255  for (; i < s.size(); ++i)
256  {
257  char c = s[i];
258  if (c == scientificNotation) {
259  sciCount++;
260  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
261  c = s[i + 1];
262  if (c == '-') return false; //Not an integer then!
263  if (c == '+') i++;
264  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
265  } else if (!isDecimalNumber(c))
266  return false;
267  if (sciCount > 1)
268  return false;
269  }
270  return true;
271 }
272 
273 /******************************************************************************/
274 
275 std::string TextTools::toString(int i)
276 {
277  ostringstream oss;
278  oss << i;
279  return oss.str();
280 }
281 
282 /******************************************************************************/
283 
284 std::string TextTools::toString(char c)
285 {
286  ostringstream oss;
287  oss << c;
288  return oss.str();
289 }
290 
291 /******************************************************************************/
292 
293 std::string TextTools::toString(double d, int precision)
294 {
295  ostringstream oss;
296  oss << setprecision(precision) << d;
297  return oss.str();
298 }
299 
300 /******************************************************************************/
301 
302 int TextTools::toInt(const std::string& s, char scientificNotation) throw (Exception)
303 {
304  if (!isDecimalInteger(s, scientificNotation)) throw Exception("TextTools::toInt(). Invalid number specification: " + s);
305  istringstream iss(s);
306  int i;
307  iss >> i;
308  return i;
309 }
310 
311 /******************************************************************************/
312 
313 double TextTools::toDouble(const std::string& s, char dec, char scientificNotation) throw (Exception)
314 {
315  if (!isDecimalNumber(s, dec, scientificNotation)) throw Exception("TextTools::toDouble(). Invalid number specification: " + s);
316  istringstream iss(s);
317  double d;
318  iss >> d;
319  return d;
320 }
321 
322 /******************************************************************************/
323 
324 std::string TextTools::resizeRight(const std::string& s, size_t newSize, char fill)
325 {
326  if (s.size() > newSize)
327  return s.substr(0, newSize);
328  else
329  return s + string(newSize - s.size(), fill);
330 }
331 
332 /******************************************************************************/
333 
334 std::string TextTools::resizeLeft(const std::string& s, size_t newSize, char fill)
335 {
336  if (s.size() > newSize)
337  return s.substr(s.size() - newSize);
338  else
339  return string(newSize - s.size(), fill) + s;
340 }
341 
342 /******************************************************************************/
343 
344 std::vector<std::string> TextTools::split(const std::string& s, size_t n)
345 {
346  vector<string> v;
347  string tmp = s;
348  while (tmp.size() > n)
349  {
350  v.push_back(tmp.substr(0, n));
351  tmp = tmp.substr(n);
352  }
353  v.push_back(tmp);
354  return v;
355 }
356 
357 /******************************************************************************/
358 
359 std::string TextTools::removeSubstrings(const std::string& s, char blockBeginning, char blockEnding)
360 throw (Exception)
361 {
362  string t = "";
363  int blockCount = 0;
364  size_t begPos = 0;
365  for (size_t i = 0; i < s.size(); i++)
366  {
367  char current = s[i];
368  if (current == blockBeginning)
369  {
370  blockCount++;
371  t += s.substr(begPos, i - begPos);
372  }
373  else if (current == blockEnding)
374  {
375  blockCount--;
376  if (blockCount == 0)
377  {
378  begPos = i + 1;
379  }
380  else if (blockCount < 0)
381  throw Exception("TextTools::removeSubstrings(). " +
382  string("Ending block character without corresponding beginning one at position ") + toString((int)i) + ".");
383  }
384  }
385  t += s.substr(begPos);
386  return t;
387 }
388 
389 /******************************************************************************/
390 
391 std::string TextTools::removeSubstrings(const std::string& s, char blockBeginning, char blockEnding, std::vector<string>& exceptionsBeginning, std::vector<string>& exceptionsEnding)
392 throw (Exception)
393 {
394  string t = "";
395  int blockCount = 0;
396  size_t begPos = 0;
397  for (size_t i = 0; i < s.size(); i++)
398  {
399  char current = s[i];
400  if (current == blockBeginning)
401  {
402  bool except = false;
403  for (size_t j = 0; j < exceptionsBeginning.size(); j++)
404  {
405  size_t pos = exceptionsBeginning[j].find(blockBeginning);
406  if (pos != string::npos) {
407  size_t left = i - pos;
408  size_t right = i + exceptionsBeginning[j].length() - pos;
409  if ((right < s.length() - 1) && (hasSubstring (s.substr(left, right), exceptionsBeginning[j])))
410  {
411  except = true;
412  break;
413  }
414  }
415  }
416  if (!except)
417  {
418  blockCount++;
419  t += s.substr(begPos, i - begPos);
420  }
421  }
422  else if ( (current == blockEnding) && (blockCount > 0) )
423  {
424  for (size_t j = 0; j < exceptionsEnding.size(); j++)
425  {
426  size_t pos = exceptionsEnding[j].find(blockEnding);
427  if (pos != string::npos) {
428  size_t left = i - pos;
429  size_t right = i + exceptionsEnding[j].length() - pos;
430  if ((right < s.length() - 1 ) && (hasSubstring (s.substr(left, right), exceptionsEnding[j])))
431  {
432  break;
433  }
434  }
435  }
436  blockCount--;
437  if (blockCount == 0)
438  {
439  begPos = i + 1;
440  }
441  else if (blockCount < 0)
442  throw Exception("TextTools::removeSubstrings(). " +
443  string("Ending block character without corresponding beginning one at position ") + toString((int)i) + ".");
444  }
445  }
446  t += s.substr(begPos);
447  return t;
448 }
449 
450 /******************************************************************************/
451 
452 std::string TextTools::removeChar(const std::string& s, char c)
453 {
454  // Copy sequence
455  string st(s);
456 
457  // For all sequence's characters
458  for (unsigned int i = 0; i < st.size(); i++)
459  {
460  if (st[i] == c)
461  {
462  st.erase(st.begin() + i); // Remove character
463  i--;
464  }
465  }
466 
467  // Send result
468  return st;
469 }
470 
471 /******************************************************************************/
472 
473 unsigned int TextTools::count(const std::string& s, const std::string& pattern)
474 {
475  unsigned int count = 0;
476  string::size_type index = s.find(pattern);
477  while (index != string::npos)
478  {
479  count++;
480  index = s.find(pattern, index + 1);
481  }
482  return count;
483 }
484 
485 /******************************************************************************/
486 
487 bool TextTools::startsWith(const std::string& s, const std::string& pattern)
488 {
489  if (s.size() < pattern.size())
490  return false;
491  return s.substr(0, pattern.size()) == pattern;
492 }
493 
494 /******************************************************************************/
495 
496 bool TextTools::endsWith(const std::string& s, const std::string& pattern)
497 {
498  if (s.size() < pattern.size())
499  return false;
500  return s.substr(s.size() - pattern.size()) == pattern;
501 }
502 
503 /******************************************************************************/
504 
505 bool TextTools::hasSubstring(const std::string& s, const std::string& pattern)
506 {
507  if (s.size() < pattern.size())
508  return false;
509  for (size_t i = 0; i < s.size() - pattern.size() + 1; ++i)
510  {
511  if (s.substr(i, pattern.size()) == pattern)
512  return true;
513  }
514  return false;
515 }
516 
517 /******************************************************************************/
518 
519 void TextTools::replaceAll(std::string& target, const std::string& query, const std::string& replacement)
520 {
521  if (query.empty())
522  return;
523  size_t pos = target.find(query);
524  while (pos != string::npos) {
525  target.replace(pos, query.length(), replacement);
526  pos += replacement.length(); //We prevent recursivity!
527  pos = target.find(query, pos);
528  }
529 }
530 
531 /******************************************************************************/
532 
static bool isNewLineCharacter(char c)
Tell if a character is a new line character or not.
Definition: TextTools.cpp:160
static std::string resizeLeft(const std::string &s, size_t newSize, char fill=' ')
Send a string of size &#39;newSize&#39;, which is a copy of &#39;s&#39; truncated or filled with character &#39;fill&#39; at ...
Definition: TextTools.cpp:334
This class allows to perform a correspondence analysis.
static std::string resizeRight(const std::string &s, size_t newSize, char fill=' ')
Send a string of size &#39;newSize&#39;, which is a copy of &#39;s&#39; truncated or filled with character &#39;fill&#39; at ...
Definition: TextTools.cpp:324
STL namespace.
static bool isDecimalNumber(char c)
Tell is a given character describes a decimal number.
Definition: TextTools.cpp:205
static std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:189
static int toInt(const std::string &s, char scientificNotation='e')
Convert from string to int.
Definition: TextTools.cpp:302
static unsigned int count(const std::string &s, const std::string &pattern)
Count the occurences of a given pattern in a string.
Definition: TextTools.cpp:473
static bool startsWith(const std::string &s, const std::string &pattern)
Tell is a string begins with a certain motif.
Definition: TextTools.cpp:487
static bool hasSubstring(const std::string &s, const std::string &pattern)
Tell is a string contains a certain motif.
Definition: TextTools.cpp:505
static bool isEmpty(const std::string &s)
Tell if a string is empty.
Definition: TextTools.cpp:52
static std::string removeSurroundingWhiteSpaces(const std::string &s)
Remove all white spaces characters at the beginning and the end of a string.
Definition: TextTools.cpp:153
static bool endsWith(const std::string &s, const std::string &pattern)
Tell is a string ends with a certain motif.
Definition: TextTools.cpp:496
static std::string removeNewLines(const std::string &s)
Remove all new line characters in a string.
Definition: TextTools.cpp:168
static std::string removeWhiteSpaces(const std::string &s)
Remove all white spaces characters in a string.
Definition: TextTools.cpp:100
static bool isDecimalInteger(const std::string &s, char scientificNotation='e')
Tell is a given character string describes a decimal integer.
Definition: TextTools.cpp:247
static std::string removeFirstWhiteSpaces(const std::string &s)
Remove all white spaces characters at the beginning of a string.
Definition: TextTools.cpp:121
Exception base class.
Definition: Exceptions.h:57
static std::string removeLastWhiteSpaces(const std::string &s)
Remove all white spaces characters at the end of a string.
Definition: TextTools.cpp:137
static std::string removeLastNewLines(const std::string &s)
Remove all new line characters at the end of a string.
Definition: TextTools.cpp:189
static std::vector< std::string > split(const std::string &s, size_t n)
Split a string into parts of size &#39;n&#39;.
Definition: TextTools.cpp:344
static std::string removeSubstrings(const std::string &s, char blockBeginning, char blockEnding)
Remove substrings from a string.
Definition: TextTools.cpp:359
static std::string removeChar(const std::string &s, char c)
Remove all occurences of a character in a string.
Definition: TextTools.cpp:452
static std::string toUpper(const std::string &s)
Make the string uppercase.
Definition: TextTools.cpp:65
static std::string toLower(const std::string &s)
Make the string lowercase.
Definition: TextTools.cpp:77
static void replaceAll(std::string &target, const std::string &query, const std::string &replacement)
Replacement of all non-overlapping occurrences of a certain motif in a string.
Definition: TextTools.cpp:519
static double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
Convert from string to double.
Definition: TextTools.cpp:313
static bool isWhiteSpaceCharacter(char c)
Tell if a character is a white space or not.
Definition: TextTools.cpp:89