bpp-core  2.2.0
NestedStringTokenizer.cpp
Go to the documentation of this file.
1 //
2 // File: NestedStringTokenizer.cpp
3 // Author : Julien Dutheil
4 // Last modification : Monday May 22 10:57 2006
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to map data onto
11  a sequence or a phylogenetic tree.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #include "NestedStringTokenizer.h"
41 #include "TextTools.h"
42 
43 using namespace bpp;
44 
45 //From the STL:
46 #include <iostream>
47 
48 using namespace std;
49 
50 NestedStringTokenizer::NestedStringTokenizer(const std::string& s, const std::string& open, const std::string& end, const std::string& delimiters, bool solid)
51  throw (Exception):
53 {
54  int blocks = 0;
55  string cache = "";
56  if (!solid)
57  {
58  string::size_type index = s.find_first_not_of(delimiters, 0);
59  while (index != s.npos)
60  {
61  string::size_type newIndex = s.find_first_of(delimiters, index);
62  bool endBlockFound = false;
63  while (!endBlockFound)
64  {
65  if (newIndex != s.npos)
66  {
67  string token = s.substr(index, newIndex - index);
68  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
69 
70  if (blocks == 0)
71  {
72  tokens_.push_back(cache + token);
73  cache = ""; //reset cache.
74  index = s.find_first_not_of(delimiters, newIndex);
75  endBlockFound = true;
76  }
77  else
78  {
79  // Ignore this token untill closing block is found
80  cache += s.substr(index, newIndex - index + 1);
81  index = newIndex + 1;
82  newIndex = s.find_first_of(delimiters, index);
83  }
84  }
85  else
86  {
87  string token = s.substr(index);
88  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
89  if (blocks == 0)
90  {
91  tokens_.push_back(cache + token);
92  cache = ""; //reset cache.
93  index = newIndex;
94  endBlockFound = true;
95  }
96  else throw Exception("NestedStringTokenizer (constructor). Unclosed block.");
97  }
98  }
99  }
100  }
101  else
102  {
103  string::size_type index = 0;
104  while (index != s.npos)
105  {
106  string::size_type newIndex = s.find(delimiters, index);
107  bool endBlockFound = false;
108  while (!endBlockFound)
109  {
110  if (newIndex != s.npos)
111  {
112  string token = s.substr(index, newIndex - index);
113  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
114 
115  if (blocks == 0)
116  {
117  tokens_.push_back(cache + token);
118  cache = ""; //reset cache.
119  index = newIndex + delimiters.size();
120  endBlockFound = true;
121  }
122  else
123  {
124  // Ignore this token untill closing block is found
125  cache += s.substr(index, newIndex - index + 1);
126  index = newIndex + 1;
127  newIndex = s.find(delimiters, index);
128  }
129  }
130  else
131  {
132  string token = s.substr(index);
133  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
134  if (blocks == 0)
135  {
136  tokens_.push_back(cache + token);
137  cache = ""; //reset cache.
138  index = newIndex;
139  endBlockFound = true;
140  }
141  else throw Exception("Unclosed block.");
142  }
143  }
144  }
145  }
146 }
147 
148 const std::string& NestedStringTokenizer::nextToken() throw (Exception)
149 {
150  if (!hasMoreToken()) throw Exception("No more token in nested tokenizer.");
151  return tokens_[currentPosition_++];
152 }
153 
A tokenizer for strings.
This class allows to perform a correspondence analysis.
STL namespace.
static unsigned int count(const std::string &s, const std::string &pattern)
Count the occurences of a given pattern in a string.
Definition: TextTools.cpp:473
const std::string & nextToken()
Get the next available token. If no token is availbale, throw an Exception.
Exception base class.
Definition: Exceptions.h:57
NestedStringTokenizer(const std::string &s, const std::string &open, const std::string &end, const std::string &delimiters=" \\", bool solid=false)
Build a new StringTokenizer from a string.