46 #include <Bpp/Text/TextTools.h> 47 #include <Bpp/Numeric/Random/RandomTools.h> 65 throw Exception (
"StringSequenceTools::subseq: Invalid interval");
68 string temp(sequence);
71 temp.erase(temp.begin() +
static_cast<ptrdiff_t
>(end + 1), temp.end());
72 temp.erase(temp.begin(), temp.begin() +
static_cast<ptrdiff_t
>(begin));
82 return TextTools::resizeRight(sequence, size,
'-');
87 return TextTools::resizeLeft(sequence, size,
'-');
95 string result(sequence);
98 for (
unsigned int i = 0; i < result.size(); i++)
100 if (result[i] == chars)
101 result.erase(result.begin() + i);
112 string result(sequence);
115 for (
unsigned int i = 0; i < chars.size(); i++)
118 for (
unsigned int j = 0; j < result.size(); j++)
120 if (result[j] == chars[i])
121 result.erase(result.begin() + j);
133 string* result =
new string;
136 size_t size = sequence.size();
137 for (
size_t i = 0; i < size; i++)
139 *result += sequence[size - i - 1];
151 string* result =
new string;
154 size_t size = sequence.size();
155 for (
unsigned int i = 0; i < size; i++)
159 case 'A': *result +=
'T';
161 case 'C': *result +=
'G';
163 case 'G': *result +=
'C';
165 case 'T': *result +=
'A';
167 case 'M': *result +=
'K';
169 case 'R': *result +=
'Y';
171 case 'Y': *result +=
'R';
173 case 'K': *result +=
'M';
175 case 'V': *result +=
'B';
177 case 'H': *result +=
'D';
179 case 'D': *result +=
'H';
181 case 'B': *result +=
'V';
183 default: *result += sequence[i];
197 map<char, double> counts;
200 if (window < sequence.size())
201 throw BadIntegerException(
"StringSequenceTools::getGCContent : specified window too high", static_cast<int>(window));
204 if (pos + window > sequence.size())
206 pos = sequence.size() - window;
210 for (
size_t i = pos; i < pos + window; i++)
212 switch (toupper(sequence[i]))
214 case 'A': counts[
'A'] += 1;
216 case 'C': counts[
'C'] += 1;
218 case 'G': counts[
'G'] += 1;
220 case 'T': counts[
'T'] += 1;
222 case 'M': counts[
'A'] += 0.5;
225 case 'R': counts[
'A'] += 0.5;
228 case 'W': counts[
'A'] += 0.5;
231 case 'S': counts[
'C'] += 0.5;
234 case 'Y': counts[
'C'] += 0.5;
237 case 'K': counts[
'G'] += 0.5;
240 case 'V': counts[
'A'] += 0.34;
244 case 'H': counts[
'A'] += 0.34;
248 case 'D': counts[
'A'] += 0.34;
252 case 'B': counts[
'C'] += 0.34;
256 case '-':
throw Exception(
"StringSequenceTools::getGCContent : Gap found in sequence");
259 default: counts[
'A'] += 0.25;
267 return (counts[
'G'] + counts[
'C']) /
static_cast<double>(window);
276 vector<int> code(static_cast<size_t>(floor(static_cast<double>(sequence.size()) / static_cast<double>(size))));
279 while (pos + size <= sequence.size())
281 code[count] = alphabet->charToInt(sequence.substr(pos, size));
293 for (
unsigned int i = 0; i < sequence.size(); i++)
295 result += alphabet->intToChar(sequence[i]);
306 if (sequence.size() == 0)
318 for (
unsigned int i = 0; i < sequence.size(); i++)
323 case 0: u =
true;
break;
324 case 3: p =
true;
break;
325 case 2: r =
true;
break;
326 case 5: pd =
true;
break;
331 throw AlphabetException (
"Sequence::getAlphabetFromSequence : Unknow character detected in specified sequence");
333 throw SequenceException (
"Sequence::getAlphabetFromSequence : Both 'T' and 'U' in the same sequence!");
335 throw SequenceException (
"Sequence::getAlphabetFromSequence : Protein character and 'U' in the same sequence!");
An alphabet exception thrown when trying to specify a bad char to the alphabet.
This alphabet is used to deal NumericAlphabet.
This alphabet is used to deal with proteins.
The alphabet exception base class.
The sequence exception base class.
This alphabet is used to deal with DNA sequences.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
This alphabet is used to deal with RNA sequences.
Exception thrown when a sequence is found to be empty and it should not.