bpp-core  2.2.0
PrincipalComponentAnalysis.cpp
Go to the documentation of this file.
1 //
2 // File: PrincipalComponentAnalysis.cpp
3 // Created by: Mathieu Groussin
4 //
5 
6 /*
7  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
8 
9  This software is a computer program whose purpose is to provide classes
10  for phylogenetic data analysis.
11 
12  This software is governed by the CeCILL license under French law and
13  abiding by the rules of distribution of free software. You can use,
14  modify and/ or redistribute the software under the terms of the CeCILL
15  license as circulated by CEA, CNRS and INRIA at the following URL
16  "http://www.cecill.info".
17 
18  As a counterpart to the access to the source code and rights to copy,
19  modify and redistribute granted by the license, users are provided only
20  with a limited warranty and the software's author, the holder of the
21  economic rights, and the successive licensors have only limited
22  liability.
23 
24  In this respect, the user's attention is drawn to the risks associated
25  with loading, using, modifying and/or developing or reproducing the
26  software by the user in light of its specific status of free software,
27  that may mean that it is complicated to manipulate, and that also
28  therefore means that it is reserved for developers and experienced
29  professionals having in-depth computer knowledge. Users are therefore
30  encouraged to load and test the software's suitability as regards their
31  requirements in conditions enabling the security of their systems and/or
32  data to be ensured and, more generally, to use and operate it in the
33  same conditions as regards security.
34 
35  The fact that you are presently reading this means that you have had
36  knowledge of the CeCILL license and that you accept its terms.
37  */
38 
40 #include "../../Matrix/Matrix.h"
41 #include "../../Matrix/MatrixTools.h"
42 #include "../../VectorTools.h"
43 #include "DualityDiagram.h"
44 
45 #include <cmath>
46 
47 using namespace bpp;
48 using namespace std;
49 
51  const Matrix<double>& data,
52  unsigned int nbAxes,
53  const vector<double>& rowW,
54  const vector<double>& colW,
55  bool centered,
56  bool scaled,
57  double tol,
58  bool verbose) throw (Exception) :
60  columnMeans_(),
61  columnSd_()
62 {
63  RowMatrix<double> tmpData = data;
64 
65  // Centering of data?
66  if (centered)
67  {
68  center(tmpData, rowW);
69  }
70 
71  // Scaling of data?
72  if (scaled)
73  {
74  scale(tmpData, rowW);
75  }
76 
77  setData(tmpData, rowW, colW, nbAxes, tol, verbose);
78 }
79 
80 /******************************************************************************/
81 
83  const Matrix<double>& data,
84  unsigned int nbAxes,
85  bool centered,
86  bool scaled,
87  double tol,
88  bool verbose) throw (Exception) :
90  columnMeans_(),
91  columnSd_()
92 {
93  size_t nRow = data.getNumberOfRows();
94  size_t nCol = data.getNumberOfColumns();
95 
96  vector<double> rowW(nRow);
97  vector<double> colW(nCol);
98  VectorTools::fill(rowW, 1. / static_cast<double>(nRow));
99  VectorTools::fill(colW, 1.);
100 
101  RowMatrix<double> tmpData = data;
102 
103  // Centering of data?
104  if (centered)
105  {
106  center(tmpData, rowW);
107  }
108 
109  // Scaling of data?
110  if (scaled)
111  {
112  scale(tmpData, rowW);
113  }
114 
115  setData(tmpData, rowW, colW, nbAxes, tol, verbose);
116 }
117 
118 /******************************************************************************/
119 
120 void PrincipalComponentAnalysis::center(Matrix<double>& matrix, const vector<double>& rowW) throw (Exception)
121 {
122  size_t nRow = matrix.getNumberOfRows();
123  size_t nCol = matrix.getNumberOfColumns();
124  if (nRow != rowW.size())
125  throw Exception("PrincipalComponentAnalysis::center. The number of row weigths have to be equal to the number of rows!");
126 
127  double sumRowWeights = VectorTools::sum(rowW);
128 
129  vector<double> columnMeans(nCol);
130  for (unsigned int i = 0; i < nCol; i++)
131  {
132  double tmp = 0.;
133  for (unsigned int j = 0; j < nRow; j++)
134  {
135  tmp += matrix(j, i) * rowW[j];
136  }
137  columnMeans[i] = tmp / sumRowWeights;
138  }
139 
140  for (unsigned int i = 0; i < nCol; i++)
141  {
142  for (unsigned int j = 0; j < nRow; j++)
143  {
144  matrix(j, i) -= columnMeans[i];
145  }
146  }
147 }
148 
149 /******************************************************************************/
150 
151 void PrincipalComponentAnalysis::scale(Matrix<double>& matrix, const vector<double>& rowW) throw (Exception)
152 {
153  size_t nRow = matrix.getNumberOfRows();
154  size_t nCol = matrix.getNumberOfColumns();
155  if (nRow != rowW.size())
156  throw Exception("PrincipalComponentAnalysis::scale. The number of row weigths have to be equal to the number of rows!");
157 
158  double sumRowWeights = VectorTools::sum(rowW);
159 
160  vector<double> columnSd(nCol);
161  for (size_t i = 0; i < nCol; i++)
162  {
163  double tmp = 0.;
164  for (unsigned int j = 0; j < nRow; j++)
165  {
166  tmp += pow(matrix(j, i), 2) * rowW[j];
167  }
168  columnSd[i] = sqrt(tmp / sumRowWeights);
169  }
170 
171  for (size_t i = 0; i < nCol; i++)
172  {
173  for (unsigned int j = 0; j < nRow; j++)
174  {
175  if (columnSd[i] == 0.)
176  matrix(j, i) = 0.;
177  else
178  matrix(j, i) /= columnSd[i];
179  }
180  }
181 }
182 
The matrix template interface.
Definition: Matrix.h:58
PrincipalComponentAnalysis(const Matrix< double > &data, unsigned int nbAxes, const std::vector< double > &rowW, const std::vector< double > &colW, bool centered=true, bool scaled=true, double tol=0.0000001, bool verbose=true)
Build a new PrincipalComponentAnalysis object.
This class allows to perform a correspondence analysis.
static T sum(const std::vector< T > &v1)
Definition: VectorTools.h:614
STL namespace.
static void scale(Matrix< double > &matrix, const std::vector< double > &rowW)
This function allows to center an input matrix from its column means.
static void center(Matrix< double > &matrix, const std::vector< double > &rowW)
This function allows to center an input matrix from its column means.
static void fill(std::vector< T > &v, T value)
Definition: VectorTools.h:381
Exception base class.
Definition: Exceptions.h:57
The core class of a multivariate analysis.