00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <climits>
00020 #include "ncl/nxssetreader.h"
00021 #include "ncl/nxstoken.h"
00022 #include <algorithm>
00023 using namespace std;
00024
00025 void NxsSetReader::AddRangeToSet(unsigned first, unsigned last, unsigned stride, NxsUnsignedSet * destination, const NxsUnsignedSet * taboo, NxsToken &token)
00026 {
00027 NCL_ASSERT (last >= first);
00028 NCL_ASSERT (last !=UINT_MAX);
00029 NCL_ASSERT (stride !=UINT_MAX);
00030 if (destination == NULL)
00031 return;
00032 NxsUnsignedSet::iterator dIt = destination->insert(first).first;
00033 for (unsigned curr = first + stride; curr <= last; curr += stride)
00034 {
00035 if (taboo != NULL && taboo->count(curr) > 0)
00036 {
00037 NxsString errormsg;
00038 errormsg << "Illegal repitition of an index (" << curr + 1 << ") in multiple subsets.";
00039 throw NxsException(errormsg, token);
00040 }
00041 dIt = destination->insert(dIt, curr);
00042 }
00043 }
00044
00048 unsigned NxsSetReader::InterpretTokenAsIndices(NxsToken &token,
00049 const NxsLabelToIndicesMapper & mapper,
00050 const char * setType,
00051 const char * cmdName,
00052 NxsUnsignedSet * destination)
00053 {
00054 try {
00055 const std::string t = token.GetToken();
00056 if (NxsString::case_insensitive_equals(t.c_str(), "ALL"))
00057 {
00058 unsigned m = mapper.GetMaxIndex();
00059 NxsUnsignedSet s;
00060 for (unsigned i = 0; i <= m; ++i)
00061 s.insert(i);
00062 destination->insert(s.begin(), s.end());
00063 return (unsigned)s.size();
00064 }
00065 return mapper.GetIndicesForLabel(t, destination);
00066 }
00067 catch (const NxsException & x)
00068 {
00069 NxsString errormsg = "Error in the ";
00070 errormsg << setType << " descriptor of a " << cmdName << " command.\n";
00071 errormsg += x.msg;
00072 throw NxsException(errormsg, token);
00073 }
00074 catch (...)
00075 {
00076 NxsString errormsg = "Expecting a ";
00077 errormsg << setType << " descriptor (number or label) in the " << cmdName << ". Encountered ";
00078 errormsg << token.GetToken();
00079 throw NxsException(errormsg, token);
00080 }
00081 }
00082
00083 void NxsSetReader::ReadSetDefinition(
00084 NxsToken &token,
00085 const NxsLabelToIndicesMapper & mapper,
00086 const char * setType,
00087 const char * cmdName,
00088 NxsUnsignedSet * destination,
00089 const NxsUnsignedSet * taboo)
00090 {
00091 NxsString errormsg;
00092 NxsUnsignedSet tmpset;
00093 NxsUnsignedSet dummy;
00094 if (destination == NULL)
00095 destination = & dummy;
00096 unsigned previousInd = UINT_MAX;
00097 std::vector<unsigned> intersectVec;
00098 while (!token.Equals(";"))
00099 {
00100 if (taboo && token.Equals(","))
00101 return;
00102 if (token.Equals("-"))
00103 {
00104 if (previousInd == UINT_MAX)
00105 {
00106 errormsg = "The '-' must be preceded by number or a ";
00107 errormsg << setType << " label in the " << cmdName << " command.";
00108 throw NxsException(errormsg, token);
00109 }
00110 token.GetNextToken();
00111 if (token.Equals(";") || token.Equals("\\"))
00112 {
00113 errormsg = "Range in the ";
00114 errormsg << setType << " set definition in the " << cmdName << " command must be closed with a number or label.";
00115 throw NxsException(errormsg, token);
00116 }
00117 unsigned endpoint;
00118 if (token.Equals("."))
00119 endpoint = mapper.GetMaxIndex();
00120 else
00121 {
00122 tmpset.clear();
00123 unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
00124 if (nAdded != 1)
00125 {
00126 errormsg = "End of a range in a ";
00127 errormsg << setType << " set definition in the " << cmdName << " command must be closed with a single number or label (not a set).";
00128 throw NxsException(errormsg, token);
00129 }
00130 endpoint = *(tmpset.begin());
00131 if (endpoint < previousInd)
00132 {
00133 errormsg = "End of a range in a ";
00134 errormsg << setType << " set definition in the " << cmdName << " command must be a larger index than the start of the range (found ";
00135 errormsg << previousInd + 1 << " - " << token.GetToken();
00136 throw NxsException(errormsg, token);
00137 }
00138 }
00139 token.GetNextToken();
00140 if (token.Equals("\\"))
00141 {
00142 token.GetNextToken();
00143 NxsString t = token.GetToken();
00144 unsigned stride = 0;
00145 try
00146 {
00147 stride = t.ConvertToUnsigned();
00148 }
00149 catch (const NxsString::NxsX_NotANumber &)
00150 {}
00151 if (stride == 0)
00152 {
00153 errormsg = "Expecting a positive number indicating the 'stride' after the \\ in the ";
00154 errormsg << setType << " set definition in the " << cmdName << " command. Encountered ";
00155 errormsg << t;
00156 throw NxsException(errormsg, token);
00157 }
00158 AddRangeToSet(previousInd, endpoint, stride, destination, taboo, token);
00159 token.GetNextToken();
00160 }
00161 else
00162 AddRangeToSet(previousInd, endpoint, 1, destination, taboo, token);
00163 previousInd = UINT_MAX;
00164 }
00165 else
00166 {
00167 tmpset.clear();
00168 const unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
00169 if (taboo != NULL)
00170 {
00171 set_intersection(taboo->begin(), taboo->end(), tmpset.begin(), tmpset.end(), back_inserter(intersectVec));
00172 if (!intersectVec.empty())
00173 {
00174 errormsg << "Illegal repitition of an index (" << 1 + *(intersectVec.begin()) << ") in multiple subsets.";
00175 throw NxsException(errormsg, token);
00176 }
00177 }
00178 if (nAdded == 1 )
00179 {
00180 previousInd = *(tmpset.begin());
00181 destination->insert(previousInd);
00182 }
00183 else
00184 {
00185 previousInd = UINT_MAX;
00186 destination->insert(tmpset.begin(), tmpset.end());
00187 }
00188 token.GetNextToken();
00189 }
00190 }
00191 }
00192
00197 NxsSetReader::NxsSetReader(
00198 NxsToken &t,
00199 unsigned maxValue,
00200 NxsUnsignedSet &iset,
00201 NxsBlock &nxsblk,
00202 unsigned type)
00203 : block(nxsblk), token(t), nxsset(iset)
00204 {
00205 max = maxValue;
00206 settype = type;
00207 nxsset.clear();
00208 }
00209
00218 bool NxsSetReader::AddRange(
00219 unsigned first,
00220 unsigned last,
00221 unsigned modulus)
00222 {
00223 if (last > max || first < 1 || first > last)
00224 return false;
00225
00226 for (unsigned i = first - 1; i < last; i++)
00227 {
00228 unsigned diff = i - first + 1;
00229 if (modulus > 0 && diff % modulus != 0)
00230 continue;
00231 nxsset.insert(i);
00232 }
00233
00234 return true;
00235 }
00236
00241 unsigned NxsSetReader::GetTokenValue()
00242 {
00243 unsigned v = atoi(token.GetToken().c_str());
00244
00245 if (v == 0 && settype != NxsSetReader::generic)
00246 {
00247 if (settype == NxsSetReader::charset)
00248 v = block.CharLabelToNumber(token.GetToken());
00249 else if (settype == NxsSetReader::taxset)
00250 v = block.TaxonLabelToNumber(token.GetToken());
00251 }
00252
00253 if (v == 0)
00254 {
00255 block.errormsg = "Set element (";
00256 block.errormsg += token.GetToken();
00257 block.errormsg += ") not a number ";
00258 if (settype == NxsSetReader::charset)
00259 block.errormsg += "and not a valid character label";
00260 else if (settype == NxsSetReader::taxset)
00261 block.errormsg += "and not a valid taxon label";
00262
00263 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00264 }
00265
00266 return v;
00267 }
00268
00269 std::vector<unsigned> NxsSetReader::GetSetAsVector(const NxsUnsignedSet &s)
00270 {
00271 std::vector<unsigned> u;
00272 u.reserve(s.size());
00273 for (NxsUnsignedSet::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
00274 u.push_back(*sIt);
00275 return u;
00276 }
00277
00278 void NxsSetReader::WriteSetAsNexusValue(const NxsUnsignedSet & nxsset, std::ostream & out)
00279 {
00280 NxsUnsignedSet::const_iterator currIt = nxsset.begin();
00281 const NxsUnsignedSet::const_iterator endIt = nxsset.end();
00282 if (currIt == endIt)
00283 return;
00284 unsigned rangeBegin = 1 + *currIt++;
00285 if (currIt == endIt)
00286 {
00287 out << ' ' << rangeBegin;
00288 return;
00289 }
00290 unsigned prev = 1 + *currIt++;
00291 if (currIt == endIt)
00292 {
00293 out << ' ' << rangeBegin << ' ' << prev;
00294 return;
00295 }
00296 unsigned stride = prev - rangeBegin;
00297 unsigned curr = 1 + *currIt++;
00298 bool inRange = true;
00299 while (currIt != endIt)
00300 {
00301 if (inRange)
00302 {
00303 if (curr - prev != stride)
00304 {
00305 if (prev - rangeBegin == stride)
00306 {
00307 out << ' ' << rangeBegin;
00308 rangeBegin = prev;
00309 stride = curr - prev;
00310 }
00311 else
00312 {
00313 if (stride > 1)
00314 out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
00315 else
00316 out << ' ' << rangeBegin << '-' << prev ;
00317 inRange = false;
00318 }
00319 }
00320 }
00321 else
00322 {
00323 inRange = true;
00324 rangeBegin = prev;
00325 stride = curr - prev;
00326 }
00327 prev = curr;
00328 curr = 1 + *currIt;
00329 currIt++;
00330 }
00331 if (inRange)
00332 {
00333 if (curr - prev != stride)
00334 {
00335 if (prev - rangeBegin == stride)
00336 out << ' ' << rangeBegin << ' ' << prev;
00337 else
00338 {
00339 if (stride > 1)
00340 out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
00341 else
00342 out << ' ' << rangeBegin << '-' << prev ;
00343 }
00344 out << ' ' << curr;
00345 }
00346 else
00347 {
00348 if (stride > 1)
00349 out << ' ' << rangeBegin << '-' << curr << " \\ " << stride;
00350 else
00351 out << ' ' << rangeBegin << '-' << curr ;
00352 }
00353 }
00354 else
00355 out << ' ' << prev << ' ' << curr;
00356 }
00360 bool NxsSetReader::Run()
00361 {
00362 bool ok;
00363 bool retval = false;
00364
00365 unsigned rangeBegin = UINT_MAX;
00366 unsigned rangeEnd = rangeBegin;
00367 bool insideRange = false;
00368 unsigned modValue = 1;
00369
00370 for (;;)
00371 {
00372
00373
00374
00375
00376
00377
00378
00379 token.GetNextToken();
00380
00381 if (token.Equals("-"))
00382 {
00383
00384
00385
00386 if (insideRange)
00387 {
00388 block.errormsg = "The symbol '-' is out of place here";
00389 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00390 }
00391 insideRange = true;
00392 }
00393
00394 else if (token.Equals("."))
00395 {
00396
00397
00398
00399 if (!insideRange)
00400 {
00401 block.errormsg = "The symbol '.' can only be used to specify the end of a range";
00402 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00403 }
00404 rangeEnd = max;
00405 }
00406
00407 else if (token.Equals("\\"))
00408 {
00409
00410
00411
00412 if (!insideRange)
00413 {
00414 block.errormsg = "The symbol '\\' can only be used after the end of a range has been specified";
00415 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00416 }
00417
00418
00419
00420 modValue = NxsToken::DemandPositiveInt(token, block.errormsg, "The modulus value");
00421 }
00422
00423 else if (insideRange && rangeEnd == UINT_MAX)
00424 {
00425
00426
00427
00428 rangeEnd = GetTokenValue();
00429 }
00430
00431 else if (insideRange)
00432 {
00433
00434
00435
00436
00437
00438 ok = AddRange(rangeBegin, rangeEnd, modValue);
00439 modValue = 1;
00440
00441 if (!ok)
00442 {
00443 block.errormsg = "Character number out of range (or range incorrectly specified) in set specification";
00444 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00445 }
00446
00447
00448
00449
00450 if (token.Equals(";"))
00451 {
00452 retval = true;
00453 break;
00454 }
00455 else if (token.Equals(","))
00456 {
00457 break;
00458 }
00459
00460 rangeBegin = GetTokenValue();
00461 rangeEnd = UINT_MAX;
00462 insideRange = false;
00463 }
00464
00465 else if (rangeBegin != UINT_MAX)
00466 {
00467
00468
00469
00470
00471
00472 ok = AddRange(rangeBegin, rangeBegin, modValue);
00473 modValue = 1;
00474
00475 if (!ok)
00476 {
00477 block.errormsg = "Number out of range (or range incorrectly specified) in set specification";
00478 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00479 }
00480
00481 if (token.Equals(";"))
00482 {
00483 retval = true;
00484 break;
00485 }
00486 else if (token.Equals(","))
00487 {
00488 break;
00489 }
00490
00491 rangeBegin = GetTokenValue();
00492 rangeEnd = UINT_MAX;
00493 }
00494
00495 else if (token.Equals(";"))
00496 {
00497 retval = true;
00498 break;
00499 }
00500
00501 else if (token.Equals(","))
00502 {
00503 break;
00504 }
00505
00506 else if (token.Equals("ALL"))
00507 {
00508 rangeBegin = 1;
00509 rangeEnd = max;
00510 ok = AddRange(rangeBegin, rangeEnd);
00511 }
00512
00513 else
00514 {
00515
00516
00517
00518
00519 rangeBegin = GetTokenValue();
00520 rangeEnd = UINT_MAX;
00521 }
00522 }
00523
00524 return retval;
00525 }