64 throw IOException(
"GeneMapperCsvExport::read: fail to open stream.");
69 DataTable* dtp = DataTable::read(is,
"\t",
true, -1);
75 vector<string> ind_names;
76 vector<string> markers;
79 ind_names = dt.getColumn(SAMPLE_NAME_H);
80 markers = dt.getColumn(MARKER_H);
86 map<string, int> indname_marker;
87 for (
size_t i = 0; i < dt.getNumberOfRows(); i++)
89 string test_lab = dt(i, SAMPLE_NAME_H) + dt(i, MARKER_H);
90 if (indname_marker.find(test_lab) != indname_marker.end())
92 string new_lab = dt(i, SAMPLE_NAME_H) +
"_" + TextTools::toString(indname_marker[test_lab] + 1);
93 dt (i, SAMPLE_NAME_H) = new_lab;
95 indname_marker[test_lab]++;
97 ind_names = dt.getColumn(SAMPLE_NAME_H);
99 map<string, size_t> ind_count = VectorTools::countValues(ind_names);
100 ind_names = VectorTools::unique(ind_names);
101 markers = VectorTools::unique(markers);
102 size_t loc_nbr = markers.size();
107 data_set.initAnalyzedLoci(loc_nbr);
112 data_set.addEmptyGroup(0);
113 for (
unsigned int i = 0; i < ind_names.size(); i++)
116 data_set.addIndividualToGroup(data_set.getGroupPosition(0), ind);
123 vector<string> col_names = dt.getColumnNames();
126 vector<size_t> alleles_cols;
127 for (
size_t i = 0; i < col_names.size(); i++)
129 if (TextTools::startsWith(col_names[i], ALLELE_H))
130 alleles_cols.push_back(i);
133 vector<vector<size_t> > alleles_pos;
134 for (
size_t i = 0; i < markers.size(); i++)
138 std::map< std::string, std::set< std::string > > markerAlleles;
139 for (
size_t i = 0; i < dt.getNumberOfRows(); ++i)
141 for (
size_t j = 0; j < alleles_cols.size(); ++j)
143 if (dt(i, alleles_cols[j]) !=
"")
145 markerAlleles[dt(i, MARKER_H)].insert(dt(i, alleles_cols[j]));
149 for (std::map< std::string, std::set< std::string > >::iterator itm = markerAlleles.begin(); itm != markerAlleles.end(); itm++)
151 std::set< std::string >& s = itm->second;
152 for (std::set< std::string >::iterator its = s.begin(); its != s.end(); its++)
157 data_set.setAnalyzedLoci(al);
162 size_t ind_col_index = VectorTools::which(dt.getColumnNames(), SAMPLE_NAME_H);
163 size_t mark_col_index = VectorTools::which(dt.getColumnNames(), MARKER_H);
164 for (
size_t i = 0; i < dt.getNumberOfRows(); i++)
166 vector<size_t> alleles;
167 for (
size_t j = 0; j < alleles_cols.size(); j++)
169 if (!TextTools::isEmpty(dt(i, alleles_cols[j])))
171 unsigned int num = (data_set.getLocusInfoByName(dt(i, mark_col_index))).getAlleleInfoKey(dt(i, alleles_cols[j]));
172 alleles.push_back(num);
175 alleles = VectorTools::unique(alleles);
177 if (!data_set.getIndividualByIdFromGroup(0, dt(i, ind_col_index))->hasGenotype())
178 data_set.initIndividualGenotypeInGroup(0, data_set.getIndividualPositionInGroup(0, dt(i, ind_col_index)));
180 data_set.setIndividualMonolocusGenotypeInGroup(0, data_set.getIndividualPositionInGroup(0, dt(i, ind_col_index)), data_set.getAnalyzedLoci()->getLocusInfoPosition(dt(i, mark_col_index)), ma);
210 StringTokenizer st(row,
"\t",
true,
false);
216 size_t itemNum = st.numberOfRemainingTokens();
217 size_t alleleNum = (itemNum - 7) / 4;
222 dye_ = st.getToken(4);
223 dac_ = st.getToken(itemNum - 2);
224 an_ = TextTools::toDouble(st.getToken(itemNum - 1));
225 for (
unsigned int i = 0; i < alleleNum; ++i)
229 TextTools::toDouble(st.getToken(5 + alleleNum + i)),
230 TextTools::to<unsigned int>(st.getToken(5 + (2 * alleleNum) + i)),
231 TextTools::toDouble(st.getToken(5 + (3 * alleleNum) + i))
Record(const std::string &row)
Constructor.
The MultiAlleleMonolocusGenotype class.
virtual void read(std::istream &is, DataSet &data_set)=0
Read a DataSet on istream.
static const std::string AN_H
static const std::string SIZE_H
The BasicAlleleInfo class.
static const std::string SAMPLE_NAME_H
static const std::string PANEL_H
void read(std::istream &is, DataSet &data_set)
Read a DataSet on istream.
static const std::string HEIGHT_H
static const std::string DAC_H
std::vector< GeneMapperCsvExport::Allele > alleles_
static const std::string MARKER_H
static unsigned int UNKNOWN
static const std::string DYE_H
static const std::string SAMPLE_FILE_H
Store data for one allele.
static const std::string PEAK_AREA_H
static const std::string ALLELE_H