42 #include <Bpp/Io/FileTools.h> 43 #include <Bpp/App/ApplicationTools.h> 44 #include <Bpp/Text/TextTools.h> 45 #include <Bpp/Text/StringTokenizer.h> 46 #include <Bpp/Numeric/VectorTools.h> 49 #include <Bpp/Seq/Container/SequenceContainerTools.h> 63 const ProteicAlphabet* alpha,
const std::string& path,
const std::string& prefix) :
64 AbstractParameterAliasable(prefix),
75 const ProteicAlphabet* alpha,
const std::string& path,
78 AbstractParameterAliasable(prefix),
87 addParameters_(
freqSet_->getParameters());
95 if (TextTools::hasSubstring(
freqSet_->getNamespace(),
"+F.") )
105 ifstream in(
path_.c_str(), ios::in);
107 for (
unsigned int i = 1; i < 20; i++)
109 string line = FileTools::getNextLine(in);
110 StringTokenizer st(line);
111 for(
unsigned int j = 0; j < i; j++) {
112 double s = TextTools::toDouble(st.nextToken());
117 unsigned int fCount = 0;
118 while (in && fCount < 20)
120 string line = FileTools::getNextLine(in);
121 StringTokenizer st(line);
122 while(st.hasMoreToken() && fCount < 20)
124 freq_[fCount] = TextTools::toDouble(st.nextToken());
128 double sf = VectorTools::sum(
freq_);
129 if (sf - 1 > 0.000001)
131 ApplicationTools::displayMessage(
"WARNING!!! Frequencies sum to " + TextTools::toString(sf) +
", frequencies have been scaled.");
136 for (
unsigned int i = 0; i < 20; i++)
139 for(
unsigned int j = 0; j < 20; j++)
154 map<int, int> counts;
155 SequenceContainerTools::getCounts(data, counts);
157 for (
int i = 0; i < static_cast<int>(
size_); i++)
159 t += (counts[i] + pseudoCount);
161 for (
size_t i = 0; i < size_; ++i) freq_[i] = (static_cast<double>(counts[
static_cast<int>(i)]) + pseudoCount) / t;
164 matchParametersValues(
freqSet_->getParameters());
std::string getName() const
Get the name of the model.
ProteinFrequenciesSet * freqSet_
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible, this matrix is symetric.
This class implements a state map where all resolved states are modeled.
virtual void setFrequencies(const std::vector< double > &frequencies)=0
Set the parameters in order to match a given set of frequencies.
FrequenciesSet useful for homogeneous and stationary models, protein implementation.
virtual const std::vector< double > getFrequencies() const =0
Vdouble freq_
The vector of equilibrium frequencies.
virtual void updateMatrices()
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
UserProteinSubstitutionModel(const ProteicAlphabet *alpha, const std::string &path, const std::string &prefix)
Build a protein model from a PAML file, with original equilibrium frequencies.
Partial implementation of the ReversibleSubstitutionModel interface.
Parametrize a set of state frequencies for proteins.
size_t size_
The size of the generator, i.e. the number of states.
void setFreqFromData(const SequenceContainer &data, double pseudoCount=0)
Set equilibrium frequencies equal to the frequencies estimated from the data.