41 #include <Bpp/Seq/SequenceWithQuality.h> 42 #include <Bpp/Seq/SequenceWithAnnotationTools.h> 43 #include <Bpp/Text/TextTools.h> 44 #include <Bpp/Text/KeyvalTools.h> 49 MafBlock* MafParser::analyseCurrentBlock_() throw (Exception)
58 if (stream_->eof())
return 0;
59 getline(*stream_, line,
'\n');
60 if (TextTools::isEmpty(line))
64 if (currentSequence) {
67 delete currentSequence;
73 else if (line[0] ==
'a')
75 if (currentSequence) {
78 delete currentSequence;
85 map<string, string> args;
88 KeyvalTools::multipleKeyvals(line.substr(2), args,
" ");
90 if (args.find(
"score") != args.end())
91 if (args[
"score"] !=
"NA")
92 block->
setScore(TextTools::toDouble(args[
"score"]));
94 if (args.find(
"pass") != args.end())
95 block->
setPass(TextTools::to<unsigned int>(args[
"pass"]));
98 else if (line[0] ==
's')
100 StringTokenizer st(line);
102 string src = st.nextToken();
103 unsigned int start = TextTools::to<unsigned int>(st.nextToken());
104 unsigned int size = TextTools::to<unsigned int>(st.nextToken());
105 string tmp = st.nextToken();
107 throw Exception(
"MafAlignmentParser::nextBlock. Strand specification is incorrect, should be only one character long, found " + TextTools::toString(tmp.size()) +
".");
108 char strand = tmp[0];
110 unsigned int srcSize = TextTools::to<unsigned int>(st.nextToken());
111 if (currentSequence) {
114 delete currentSequence;
116 const string seq = st.nextToken();
117 currentSequence =
new MafSequence(src, seq, start, strand, srcSize);
119 throw Exception(
"MafAlignmentParser::nextBlock. Sequence found (" + src +
") does not match specified size: " + TextTools::toString(currentSequence->
getGenomicSize()) +
", should be " + TextTools::toString(size) +
".");
123 vector<bool> mask(currentSequence->size());
124 for (
unsigned int i = 0; i < mask.size(); ++i) {
125 mask[i] = cmAlphabet_.isMasked(seq[i]);
127 currentSequence->addAnnotation(
new SequenceMask(mask));
130 else if (line[0] ==
'q')
132 if (!currentSequence)
133 throw Exception(
"MafAlignmentParser::nextBlock(). Quality scores found, but there is currently no sequence!");
134 StringTokenizer st(line);
136 string name = st.nextToken();
137 if (name != currentSequence->getName())
138 throw Exception(
"MafAlignmentParser::nextBlock(). Quality scores found, but with a different name from the previous sequence: " + name +
", should be " + currentSequence->getName() +
".");
139 string qstr = st.nextToken();
141 SequenceQuality* seqQual =
new SequenceQuality(qstr.size());
142 for (
unsigned int i = 0; i < qstr.size(); ++i) {
145 seqQual->setScore(i, -1);
146 }
else if (c ==
'0' || c ==
'1' || c ==
'2' || c==
'3' || c ==
'4' || c ==
'5' || c ==
'6' || c ==
'7' || c ==
'8' || c ==
'9') {
147 seqQual->setScore(i, c -
'0');
148 }
else if (c ==
'F' || c ==
'f') {
149 seqQual->setScore(i, 10);
150 }
else if (c ==
'?' || c ==
'.') {
151 seqQual->setScore(i, -2);
153 throw Exception(
"MafAlignmentParser::nextBlock(). Unvalid quality score: " + TextTools::toString(c) +
". Should be 0-9, F or '-'.");
156 currentSequence->addAnnotation(seqQual);
void setScore(double score)
size_t getGenomicSize() const
A synteny block data structure, the basic unit of a MAF alignement file.
void addSequence(const MafSequence &sequence)
void setPass(unsigned int pass)
A sequence class which is used to store data from MAF files.