00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 #include <climits>
00020 #include "ncl/nxssetreader.h"
00021 #include "ncl/nxstoken.h"
00022 #include <algorithm>
00023 using namespace std;
00024 
00025 void NxsSetReader::AddRangeToSet(unsigned first, unsigned last, unsigned stride, NxsUnsignedSet * destination, const NxsUnsignedSet * taboo, NxsToken &token)
00026     {
00027     NCL_ASSERT (last >= first);
00028     NCL_ASSERT (last !=UINT_MAX);
00029     NCL_ASSERT (stride !=UINT_MAX);
00030     if (destination == NULL)
00031         return;
00032     NxsUnsignedSet::iterator dIt = destination->insert(first).first;
00033     for (unsigned curr = first + stride; curr <= last; curr += stride)
00034         {
00035         if (taboo != NULL && taboo->count(curr) > 0)
00036             {
00037             NxsString errormsg;
00038             errormsg << "Illegal repitition of an index (" << curr + 1 << ") in multiple subsets.";
00039             throw NxsException(errormsg, token);
00040             }
00041         dIt = destination->insert(dIt, curr);
00042         }
00043     }
00044 
00048 unsigned NxsSetReader::InterpretTokenAsIndices(NxsToken &token,
00049   const NxsLabelToIndicesMapper & mapper,
00050   const char * setType,
00051   const char * cmdName,
00052   NxsUnsignedSet * destination)
00053     {
00054     try {
00055         const std::string t = token.GetToken();
00056         if (NxsString::case_insensitive_equals(t.c_str(), "ALL"))
00057             {
00058             unsigned m = mapper.GetMaxIndex();
00059             NxsUnsignedSet s;
00060             for (unsigned i = 0; i <= m; ++i)
00061                 s.insert(i);
00062             destination->insert(s.begin(), s.end());
00063             return (unsigned)s.size();
00064             }
00065         return mapper.GetIndicesForLabel(t, destination);
00066         }
00067     catch (const NxsException & x)
00068         {
00069         NxsString errormsg = "Error in the ";
00070         errormsg << setType << " descriptor of a " << cmdName << " command.\n";
00071         errormsg += x.msg;
00072         throw NxsException(errormsg, token);
00073         }
00074     catch (...)
00075         {
00076         NxsString errormsg = "Expecting a ";
00077         errormsg << setType << " descriptor (number or label) in the " << cmdName << ".  Encountered ";
00078         errormsg <<  token.GetToken();
00079         throw NxsException(errormsg, token);
00080         }
00081     }
00082 
00083 void NxsSetReader::ReadSetDefinition(
00084   NxsToken &token,
00085   const NxsLabelToIndicesMapper & mapper,
00086   const char * setType, 
00087   const char * cmdName, 
00088   NxsUnsignedSet * destination, 
00089   const NxsUnsignedSet * taboo)
00090     {
00091     NxsString errormsg;
00092     NxsUnsignedSet tmpset;
00093     NxsUnsignedSet dummy;
00094     if (destination == NULL)
00095         destination = & dummy;
00096     unsigned previousInd = UINT_MAX;
00097     std::vector<unsigned> intersectVec;
00098     while (!token.Equals(";"))
00099         {
00100         if (taboo && token.Equals(","))
00101             return;
00102         if (token.Equals("-"))
00103             {
00104             if (previousInd == UINT_MAX)
00105                 {
00106                 errormsg = "The '-' must be preceded by number or a ";
00107                 errormsg << setType << " label in the " << cmdName << " command.";
00108                 throw NxsException(errormsg, token);
00109                 }
00110             token.GetNextToken();
00111             if (token.Equals(";") || token.Equals("\\"))
00112                 {
00113                 errormsg = "Range in the ";
00114                 errormsg << setType << " set definition in the " << cmdName << " command must be closed with a number or label.";
00115                 throw NxsException(errormsg, token);
00116                 }
00117             unsigned endpoint;
00118             if (token.Equals("."))
00119                 endpoint = mapper.GetMaxIndex();
00120             else
00121                 {
00122                 tmpset.clear();
00123                 unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
00124                 if (nAdded != 1)
00125                     {
00126                     errormsg = "End of a range in a ";
00127                     errormsg << setType << " set definition in the " << cmdName << " command must be closed with a single number or label (not a set).";
00128                     throw NxsException(errormsg, token);
00129                     }
00130                 endpoint = *(tmpset.begin());
00131                 if (endpoint < previousInd)
00132                     {
00133                     errormsg = "End of a range in a ";
00134                     errormsg << setType << " set definition in the " << cmdName << " command must be a larger index than the start of the range (found ";
00135                     errormsg << previousInd + 1 << " - " << token.GetToken();
00136                     throw NxsException(errormsg, token);
00137                     }
00138                 }
00139             token.GetNextToken();
00140             if (token.Equals("\\"))
00141                 {
00142                 token.GetNextToken();
00143                 NxsString t = token.GetToken();
00144                 unsigned stride = 0;
00145                 try
00146                     {
00147                     stride = t.ConvertToUnsigned();
00148                     }
00149                 catch (const NxsString::NxsX_NotANumber &)
00150                     {}
00151                 if (stride == 0)
00152                     {
00153                     errormsg = "Expecting a positive number indicating the 'stride' after the \\ in the ";
00154                     errormsg << setType << " set definition in the " << cmdName << " command. Encountered ";
00155                     errormsg << t;
00156                     throw NxsException(errormsg, token);
00157                     }
00158                 AddRangeToSet(previousInd, endpoint, stride, destination, taboo, token);
00159                 token.GetNextToken();
00160                 }
00161             else
00162                 AddRangeToSet(previousInd, endpoint, 1, destination, taboo, token);
00163             previousInd = UINT_MAX;
00164             }
00165         else
00166             {
00167             tmpset.clear();
00168             const unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
00169             if (taboo != NULL)
00170                 {
00171                 set_intersection(taboo->begin(), taboo->end(), tmpset.begin(), tmpset.end(), back_inserter(intersectVec));
00172                 if (!intersectVec.empty())
00173                     {
00174                     errormsg << "Illegal repitition of an index (" << 1 + *(intersectVec.begin()) << ") in multiple subsets.";
00175                     throw NxsException(errormsg, token);
00176                     }
00177                 }
00178             if (nAdded == 1 )
00179                 {
00180                 previousInd = *(tmpset.begin());
00181                 destination->insert(previousInd);
00182                 }
00183             else
00184                 {
00185                 previousInd = UINT_MAX;
00186                 destination->insert(tmpset.begin(), tmpset.end());
00187                 }
00188             token.GetNextToken();
00189             }
00190         }
00191     }
00192 
00197 NxsSetReader::NxsSetReader(
00198   NxsToken          &t,         
00199   unsigned          maxValue,   
00200   NxsUnsignedSet    &iset,      
00201   NxsBlock          &nxsblk,    
00202   unsigned          type)       
00203   : block(nxsblk), token(t), nxsset(iset)
00204     {
00205     max     = maxValue;
00206     settype = type;
00207     nxsset.clear();
00208     }
00209 
00218 bool NxsSetReader::AddRange(
00219   unsigned first,       
00220   unsigned last,        
00221   unsigned modulus)     
00222     {
00223     if (last > max || first < 1 || first > last)
00224         return false;
00225 
00226     for (unsigned i = first - 1; i < last; i++)
00227         {
00228         unsigned diff = i - first + 1;
00229         if (modulus > 0 && diff % modulus != 0)
00230             continue;
00231         nxsset.insert(i);
00232         }
00233 
00234     return true;
00235     }
00236 
00241 unsigned NxsSetReader::GetTokenValue()
00242     {
00243     unsigned v = atoi(token.GetToken().c_str());
00244 
00245     if (v == 0 && settype != NxsSetReader::generic)
00246         {
00247         if (settype == NxsSetReader::charset)
00248             v = block.CharLabelToNumber(token.GetToken());
00249         else if (settype == NxsSetReader::taxset)
00250             v = block.TaxonLabelToNumber(token.GetToken());
00251         }
00252 
00253     if (v == 0)
00254         {
00255         block.errormsg = "Set element (";
00256         block.errormsg += token.GetToken();
00257         block.errormsg += ") not a number ";
00258         if (settype == NxsSetReader::charset)
00259             block.errormsg += "and not a valid character label";
00260         else if (settype == NxsSetReader::taxset)
00261             block.errormsg += "and not a valid taxon label";
00262 
00263         throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00264         }
00265 
00266     return v;
00267     }
00268 
00269 std::vector<unsigned> NxsSetReader::GetSetAsVector(const NxsUnsignedSet &s)
00270     {
00271     std::vector<unsigned> u;
00272     u.reserve(s.size());
00273     for (NxsUnsignedSet::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
00274         u.push_back(*sIt);
00275     return u;
00276     }
00277 
00278 void NxsSetReader::WriteSetAsNexusValue(const NxsUnsignedSet & nxsset, std::ostream & out)
00279     {
00280     NxsUnsignedSet::const_iterator currIt = nxsset.begin();
00281     const NxsUnsignedSet::const_iterator endIt = nxsset.end();
00282     if (currIt == endIt)
00283         return;
00284     unsigned rangeBegin = 1 + *currIt++;
00285     if (currIt == endIt)
00286         {
00287         out << ' ' << rangeBegin;
00288         return;
00289         }
00290     unsigned prev = 1 + *currIt++;
00291     if (currIt == endIt)
00292         {
00293         out << ' ' << rangeBegin << ' ' << prev;
00294         return;
00295         }
00296     unsigned stride = prev - rangeBegin;
00297     unsigned curr = 1 + *currIt++;
00298     bool inRange = true;
00299     while (currIt != endIt)
00300         {
00301         if (inRange)
00302             {
00303             if (curr - prev != stride)
00304                 {
00305                 if (prev - rangeBegin == stride)
00306                     {
00307                     out << ' ' << rangeBegin;
00308                     rangeBegin = prev;
00309                     stride = curr - prev;
00310                     }
00311                 else
00312                     {
00313                     if (stride > 1)
00314                         out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
00315                     else
00316                         out << ' ' << rangeBegin << '-' << prev ;
00317                     inRange = false;
00318                     }
00319                 }
00320             }
00321         else
00322             {
00323             inRange = true;
00324             rangeBegin = prev;
00325             stride = curr - prev;
00326             }
00327         prev = curr;
00328         curr = 1 + *currIt;
00329         currIt++;
00330         }
00331     if (inRange)
00332         {
00333         if (curr - prev != stride)
00334             {
00335             if (prev - rangeBegin == stride)
00336                 out << ' ' << rangeBegin << ' ' << prev;
00337             else
00338                 {
00339                 if (stride > 1)
00340                     out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
00341                 else
00342                     out << ' ' << rangeBegin << '-' << prev ;
00343                 }
00344             out << ' ' << curr;
00345             }
00346         else
00347             {
00348             if (stride > 1)
00349                 out << ' ' << rangeBegin << '-' << curr << " \\ " << stride;
00350             else
00351                 out << ' ' << rangeBegin << '-' << curr ;
00352             }
00353         }
00354     else
00355         out << ' ' << prev << ' ' << curr;
00356     }
00360 bool NxsSetReader::Run()
00361     {
00362     bool ok;
00363     bool retval = false;
00364 
00365     unsigned rangeBegin = UINT_MAX;
00366     unsigned rangeEnd = rangeBegin;
00367     bool insideRange = false;
00368     unsigned modValue = 1;
00369 
00370     for (;;)
00371         {
00372         
00373         
00374         
00375         
00376         
00377         
00378         
00379         token.GetNextToken();
00380 
00381         if (token.Equals("-"))
00382             {
00383             
00384             
00385             
00386             if (insideRange)
00387                 {
00388                 block.errormsg = "The symbol '-' is out of place here";
00389                 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00390                 }
00391             insideRange = true;
00392             }
00393 
00394         else if (token.Equals("."))
00395             {
00396             
00397             
00398             
00399             if (!insideRange)
00400                 {
00401                 block.errormsg = "The symbol '.' can only be used to specify the end of a range";
00402                 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00403                 }
00404             rangeEnd = max;
00405             }
00406 
00407         else if (token.Equals("\\"))
00408             {
00409             
00410             
00411             
00412             if (!insideRange)
00413                 {
00414                 block.errormsg = "The symbol '\\' can only be used after the end of a range has been specified";
00415                 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00416                 }
00417 
00418             
00419             
00420             modValue = NxsToken::DemandPositiveInt(token, block.errormsg, "The modulus value");
00421             }
00422 
00423         else if (insideRange && rangeEnd == UINT_MAX)
00424             {
00425             
00426             
00427             
00428             rangeEnd = GetTokenValue();
00429             }
00430 
00431         else if (insideRange)
00432             {
00433             
00434             
00435             
00436             
00437             
00438             ok = AddRange(rangeBegin, rangeEnd, modValue);
00439             modValue = 1;
00440 
00441             if (!ok)
00442                 {
00443                 block.errormsg = "Character number out of range (or range incorrectly specified) in set specification";
00444                 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00445                 }
00446 
00447             
00448             
00449             
00450             if (token.Equals(";"))
00451                 {
00452                 retval = true;
00453                 break;
00454                 }
00455             else if (token.Equals(","))
00456                 {
00457                 break;
00458                 }
00459 
00460             rangeBegin = GetTokenValue();
00461             rangeEnd = UINT_MAX;
00462             insideRange = false;
00463             }
00464 
00465         else if (rangeBegin != UINT_MAX)
00466             {
00467             
00468             
00469             
00470             
00471             
00472             ok = AddRange(rangeBegin, rangeBegin, modValue);
00473             modValue = 1;
00474 
00475             if (!ok)
00476                 {
00477                 block.errormsg = "Number out of range (or range incorrectly specified) in set specification";
00478                 throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
00479                 }
00480 
00481             if (token.Equals(";"))
00482                 {
00483                 retval = true;
00484                 break;
00485                 }
00486             else if (token.Equals(","))
00487                 {
00488                 break;
00489                 }
00490 
00491             rangeBegin = GetTokenValue();
00492             rangeEnd = UINT_MAX;
00493             }
00494 
00495         else if (token.Equals(";"))
00496             {
00497             retval = true;
00498             break;
00499             }
00500 
00501         else if (token.Equals(","))
00502             {
00503             break;
00504             }
00505 
00506         else if (token.Equals("ALL"))
00507             {
00508             rangeBegin = 1;
00509             rangeEnd = max;
00510             ok = AddRange(rangeBegin, rangeEnd);
00511             }
00512 
00513         else
00514             {
00515             
00516             
00517             
00518             
00519             rangeBegin = GetTokenValue();
00520             rangeEnd = UINT_MAX;
00521             }
00522         }
00523 
00524     return retval;
00525     }