42 #include <Bpp/Numeric/Random/RandomTools.h> 51 void SymbolListTools::getCounts(
const SymbolList& list, map<int, double>& counts,
bool resolveUnknowns)
55 for (vector<int>::const_iterator seqit = list.
getContent().begin();
62 for (vector<int>::const_iterator seqit = list.
getContent().begin();
67 double n = (double)alias.size();
68 for (
size_t j = 0; j < alias.size(); j++) counts[alias[j]] += 1./n ;
73 void SymbolListTools::getCounts(
const SymbolList& list1,
const SymbolList& list2, map<
int, map<int, double> >& counts,
bool resolveUnknowns)
throw (DimensionException)
75 if (list1.size() != list2.size())
throw DimensionException(
"SymbolListTools::getCounts: the two sites must have the same size.", list1.size(), list2.size());
78 for (
size_t i = 0; i < list1.size(); i++)
79 counts[list1[i]][list2[i]]++;
83 for (
size_t i = 0; i < list1.size(); i++)
85 vector<int> alias1 = list1.getAlphabet()->getAlias(list1[i]);
86 vector<int> alias2 = list2.getAlphabet()->getAlias(list2[i]);
87 double n1 = (double)alias1.size();
88 double n2 = (double)alias2.size();
89 for (
size_t j = 0; j < alias1.size(); j++)
90 for (
size_t k = 0; k < alias2.size(); k++)
91 counts[alias1[j]][alias2[k]] += 1./(n1*n2) ;
96 void SymbolListTools::getFrequencies(
const SymbolList& list, map<int, double>& frequencies,
bool resolveUnknowns)
98 double n = (double)list.
size();
99 map<int, double> counts;
100 getCounts(list, counts, resolveUnknowns);
101 for (map<int, double>::iterator i = counts.begin(); i != counts.end(); i++)
103 frequencies[i->first] = i->second / n;
107 void SymbolListTools::getFrequencies(
const SymbolList& list1,
const SymbolList& list2, map<
int, map<int, double> >& frequencies,
bool resolveUnknowns)
throw (DimensionException)
109 double n2 = (double)list1.size() * (double)list1.size();
110 map<int, map<int, double> > counts;
111 getCounts(list1, list2, counts, resolveUnknowns);
112 for (map<
int, map<int, double> >::iterator i = counts.begin(); i != counts.end(); i++)
113 for (map<int, double>::iterator j = i->second.begin(); j != i->second.end(); j++)
115 frequencies[i->first][j->first] = j->second / n2;
121 const Alphabet * alphabet = list.getAlphabet();
122 if (!AlphabetTools::isNucleicAlphabet(alphabet))
123 throw AlphabetException(
"SymbolListTools::getGCContent. Method only works on nucleotides.", alphabet);
126 for (
size_t i = 0; i < list.size(); i++) {
127 int state = list.getValue(i);
129 if (state == 1 || state == 2) {
132 }
else if (state == 0 || state == 3) {
135 if (!ignoreUnresolved) {
138 case(7): gc++;
break;
139 case(4): gc+=0.5;
break;
140 case(5): gc+=0.5;
break;
141 case(6): gc+=0.5;
break;
142 case(9): gc+=0.5;
break;
143 case(10): gc+=2./3.;
break;
144 case(11): gc+=1./3.;
break;
145 case(12): gc+=1./3.;
break;
146 case(13): gc+=2./3.;
break;
147 case(14): gc+=0.5;
break;
152 if (!ignoreGap) total++;
155 return total != 0 ? gc/total : 0;
160 if (l1.getAlphabet()->getAlphabetType() != l2.getAlphabet()->getAlphabetType())
throw AlphabetMismatchException(
"SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet());
161 size_t n = min(l1.size(), l2.size());
163 for (
size_t i = 0; i < n; i++) {
164 if (l1[i] != l2[i]) count++;
171 if (l1.getAlphabet() -> getAlphabetType() != l2.getAlphabet() -> getAlphabetType())
throw AlphabetMismatchException(
"SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet());
172 size_t n = min(l1.size(), l2.size());
174 for (
size_t i = 0; i < n; i++) {
175 if (l1[i] != -1 && l2[i] != -1) count++;
180 void SymbolListTools::changeGapsToUnknownCharacters(
SymbolList& l)
183 for (
size_t i = 0; i < l.
size(); i++)
189 void SymbolListTools::changeUnresolvedCharactersToGaps(
SymbolList& l)
192 for (
size_t i = 0; i < l.
size(); i++)
The SymbolList interface.
virtual bool isGap(int state) const =0
This alphabet is used to deal NumericAlphabet.
virtual bool isUnresolved(int state) const =0
virtual std::vector< int > getAlias(int state) const =0
Get all resolved states that match a generic state.
virtual int getGapCharacterCode() const =0
The alphabet exception base class.
virtual const Alphabet * getAlphabet() const =0
Get the alphabet associated to the list.
virtual size_t size() const =0
Get the number of elements in the list.
virtual const std::vector< int > & getContent() const =0
Get the whole content of the list as a vector of int.
Exception thrown when two alphabets do not match.
virtual int getUnknownCharacterCode() const =0