Ray: /ray/src/lib/lex/basicparse.cc Source File

00001 /*
00002  * lib/lex/basicparse.cc
00003  * 
00004  * Basic integer / floating point parsing routines. 
00005  * 
00006  * Copyright (c) 2004 by Wolfgang Wieser ] wwieser (a) gmx <*> de [ 
00007  * 
00008  * This file may be distributed and/or modified under the terms of the 
00009  * GNU General Public License version 2 as published by the Free Software 
00010  * Foundation. (See COPYING.GPL for details.)
00011  * 
00012  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00013  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00014  * 
00015  */
00016 
00017 #include "basicparse.h"
00018 #include <ctype.h>
00019 
00020 #if HAVE_ERRNO_H
00021 #include <errno.h>
00022 #endif
00023 
00024 
00025 // Internally used string to integer conversion function which can handle 
00026 // 8,16,32 and 64 bit signed integers. To parse a value of type T 
00027 // (with T being int8,int16,int32,int64), supply the corresponding unsigned 
00028 // type as UT. 
00029 // (Large but inline as only used once in the file.)
00030 // By Wolfgang Wieser, part of algorithm from glibc. 
00031 template<typename T,typename UT>
00032     static inline char *_ParseInt(T *rv,const char *str,
00033         SError &error,bool may_continue)
00034 {
00035     error.clear();
00036     
00037     const char *s=str;
00038     if(!s)  return((char*)s);
00039     
00040     // First, skip any whitespace. 
00041     while(isspace(*s))  ++s;
00042     
00043     // Skip sign, if any. 
00044     bool neg=0;
00045     if(*s=='-')
00046     {  neg=1;  ++s;  }
00047     else if(*s=='+')
00048     {  ++s;  }
00049     
00050     // Then, determine base. 
00051     char base=10;
00052     if(*s=='0')
00053     {
00054         ++s;
00055         if(*s=='x' || *s=='X')
00056         {  base=16;  ++s;  }
00057         else if(*s=='b' || *s=='B')
00058         {  base=2;  ++s;  }
00059         else
00060         {  base=8;  --s;  }
00061     }
00062     
00063     // Read in the value. 
00064     static const UT min_val=
00065         sizeof(T)==1 ? (UT)0x80U : 
00066         sizeof(T)==2 ? (UT)0x8000U : 
00067         sizeof(T)==4 ? (UT)0x80000000LU : 
00068                        (UT)0x8000000000000000LLU ;
00069     static const UT max_val=
00070         sizeof(T)==1 ? (UT)0x7fU : 
00071         sizeof(T)==2 ? (UT)0x7fffU : 
00072         sizeof(T)==4 ? (UT)0x7fffffffLU : 
00073                        (UT)0x7fffffffffffffffLLU ;
00074     
00075     // cutoff and limit are used to detect overflow. This code is based 
00076     // upon strtol.c found in glibc-2.1.2 (GNU C Library), Copyright (C) 
00077     // 1991,92,94,95, 96,97,98,99 Free Software Foundation, Inc.
00078     // (No authors were mentioned in the file.)
00079     UT cutoff = neg ? min_val : max_val;
00080     char limit = (char)(cutoff % UT(base));
00081     cutoff/=UT(base);
00082     bool overflow=0;
00083     unsigned char consumed=0;  // max 63 for binary parsing of int64
00084     register char c=*s;
00085     UT val=0;
00086     
00087     // There are now optimized versions for base <=10 and 16. 
00088     // Expect runtime savings of <=7% with GCC-3.4.2 (pre) on AthlonXP for 
00089     // int32 (decimal/hex); depending on optimization. 
00090     if(base==16) for(;;c=*++s)
00091     {
00092         if(c>='0' && c<='9')  c-='0';
00093         else if(c>='a' && c<='f')  c-='a'-10;
00094         else if(c>='A' && c<='F')  c-='A'-10;
00095         else break;
00096         if(val>cutoff || (val==cutoff && c>limit))
00097         {  overflow=1;  }
00098         else if(!overflow)
00099         {  val=(val<<4)|((unsigned char)c);  ++consumed;  }
00100     }
00101     else if(base<=10)
00102     {
00103         char lastchar='0'+base;
00104         for(;;c=*++s)
00105         {
00106             if(c>='0' && c<lastchar)  c-='0';
00107             else break;
00108             if(val>cutoff || (val==cutoff && c>limit))
00109             {  overflow=1;  }
00110             else if(!overflow)
00111             {  val=val*base+c;  ++consumed;  }
00112         }
00113     }
00114     else for(;;c=*++s)
00115     {
00116         if(c>='0' && c<='9')  c-='0';
00117         else if(c>='a' && c<='z')  c-='a'-10;
00118         else if(c>='A' && c<='Z')  c-='A'-10;
00119         else break;
00120         if(c>=base)  break;
00121         if(val>cutoff || (val==cutoff && c>limit))
00122         {  overflow=1;  }
00123         else if(!overflow)
00124         {  val=val*base+c;  ++consumed;  }
00125     }
00126     
00127     if(overflow)
00128     {
00129         *rv = neg ? (T)-min_val : (T)max_val;
00130         TLString tmp;
00131         tmp.sprintf("%dbit signed integer %serflow",
00132             sizeof(T)*8,neg ? "und" : "ov");
00133         error=SError(tmp,1);
00134     }
00135     else
00136     {  *rv = neg ? -val : val;  }
00137     
00138     // Parsing ended at c, see if it is end of string. 
00139     if(!c && !consumed)
00140     {
00141         Assert(!error);
00142         error=SError("expected integer missing",1);
00143     }
00144     else if(!may_continue)
00145     {
00146         TLString tmp;
00147         tmp.sprintf("illegal char '%c' in integer (base %d)",c,(int)base);
00148         if(error)  tmp.prepend(error.msg()+"; ");
00149         error=SError(tmp,1);
00150     }
00151     
00152     return((char*)(consumed ? s : str));
00153 }
00154 
00155 
00156 char *ParseInt(int8 *val,const char *str,SError &error,bool may_continue)
00157     {  return(_ParseInt<int8,uint8>(val,str,error,may_continue));  }
00158 
00159 char *ParseInt(int16 *val,const char *str,SError &error,bool may_continue)
00160     {  return(_ParseInt<int16,uint16>(val,str,error,may_continue));  }
00161 
00162 char *ParseInt(int32 *val,const char *str,SError &error,bool may_continue)
00163     {  return(_ParseInt<int32,uint32>(val,str,error,may_continue));  }
00164 
00165 char *ParseInt(int64 *val,const char *str,SError &error,bool may_continue)
00166     {  return(_ParseInt<int64,uint64>(val,str,error,may_continue));  }
00167 
00168 
00169 // This is pretty much the same as the _ParseInt() template but for 
00170 // unsigned types. Only the unsigned type needs to be specified as template 
00171 // parameter. 
00172 // By Wolfgang Wieser, part of algorithm from glibc.
00173 template<typename UT>
00174     static inline char *_ParseUInt(UT *rv,const char *str,
00175         SError &error,bool may_continue)
00176 {
00177     error.clear();
00178     
00179     const char *s=str;
00180     if(!s)  return((char*)s);
00181     
00182     // First, skip any whitespace. 
00183     while(isspace(*s))  ++s;
00184     
00185     // Skip sign, if any. 
00186     if(*s=='+')  ++s;
00187     
00188     // Then, determine base. 
00189     char base=10;
00190     if(*s=='0')
00191     {
00192         ++s;
00193         if(*s=='x' || *s=='X')
00194         {  base=16;  ++s;  }
00195         else if(*s=='b' || *s=='B')
00196         {  base=2;  ++s;  }
00197         else
00198         {  base=8;  --s;  }
00199     }
00200     
00201     // Read in the value. 
00202     static const UT max_val=~((UT)0);
00203     // See the signed integer version for info on the cutoff thingy. 
00204     UT cutoff = max_val/UT(base);
00205     char limit = (char)(max_val % UT(base));
00206     bool overflow=0;
00207     unsigned char consumed=0;  // max 64 for binary parsing of uint64
00208     register char c=*s;
00209     UT val=0;
00210     
00211     // There are now optimized versions for base <=10 and 16. 
00212     // Expect runtime savings of nearly 10% with GCC-3.4.2 (pre) on AthlonXP 
00213     // for uint64 (decimal/hex average) with -O2. 
00214     if(base==16) for(;;c=*++s)
00215     {
00216         // Using a conversion array cv[256] to map chars to values resulted 
00217         // in a slight performance decrease with above mentioned setup. (WW)
00218         if(c>='0' && c<='9')  c-='0';
00219         else if(c>='a' && c<='f')  c-='a'-10;
00220         else if(c>='A' && c<='F')  c-='A'-10;
00221         else break;
00222         if(val>cutoff || (val==cutoff && c>limit))
00223         {  overflow=1;  }
00224         else if(!overflow)
00225         {  val=(val<<4)|((unsigned char)c);  ++consumed;  }
00226     }
00227     else if(base<=10)
00228     {
00229         char lastchar='0'+base;
00230         for(;;c=*++s)
00231         {
00232             if(c>='0' && c<lastchar)  c-='0';
00233             else break;
00234             if(val>cutoff || (val==cutoff && c>limit))
00235             {  overflow=1;  }
00236             else if(!overflow)
00237             {  val=val*base+c;  ++consumed;  }
00238         }
00239     }
00240     else for(;;c=*++s)
00241     {
00242         if(c>='0' && c<='9')  c-='0';
00243         else if(c>='a' && c<='z')  c-='a'-10;
00244         else if(c>='A' && c<='Z')  c-='A'-10;
00245         else break;
00246         if(c>=base)  break;
00247         if(val>cutoff || (val==cutoff && c>limit))
00248         {  overflow=1;  }
00249         else if(!overflow)
00250         {  val=val*base+c;  ++consumed;  }
00251     }
00252     
00253     if(overflow)
00254     {
00255         *rv = max_val;
00256         TLString tmp;
00257         tmp.sprintf("%dbit unsigned integer overflow",sizeof(UT)*8);
00258         error=SError(tmp,1);
00259     }
00260     else
00261     {  *rv = val;  }
00262     
00263     // Parsing ended at c, see if it is end of string. 
00264     if(!c && !consumed)
00265     {
00266         Assert(!error);
00267         error=SError("expected integer missing",1);
00268     }
00269     else if(!may_continue)
00270     {
00271         TLString tmp;
00272         tmp.sprintf("illegal char '%c' in integer (base %d)",c,(int)base);
00273         if(error)  tmp.prepend(error.msg()+"; ");
00274         error=SError(tmp,1);
00275     }
00276     
00277     return((char*)(consumed ? s : str));
00278 }
00279 
00280 
00281 char *ParseInt(uint8 *val,const char *str,SError &error,bool may_continue)
00282     {  return(_ParseUInt<uint8>(val,str,error,may_continue));  }
00283 
00284 char *ParseInt(uint16 *val,const char *str,SError &error,bool may_continue)
00285     {  return(_ParseUInt<uint16>(val,str,error,may_continue));  }
00286 
00287 char *ParseInt(uint32 *val,const char *str,SError &error,bool may_continue)
00288     {  return(_ParseUInt<uint32>(val,str,error,may_continue));  }
00289 
00290 char *ParseInt(uint64 *val,const char *str,SError &error,bool may_continue)
00291     {  return(_ParseUInt<uint64>(val,str,error,may_continue));  }
00292 
00293 
00294 
00295 // Like that the compiler will automatically select the correct 
00296 // function from libc. 
00297 int _Str2FP(dbl *rv,const char *str,char **end)
00298     {  errno=0;  *rv=strtod(str,end);  return(errno);  }
00299 int _Str2FP(flt *rv,const char *str,char **end)
00300     {  errno=0;  *rv=strtof(str,end);  return(errno);  }
00301 
00302 // Parse flt/dbl values. 
00303 // Basically a wrapper around the libc functions strtod() and strtof(). 
00304 // By Wolfgang Wieser. 
00305 template<typename F>
00306     char *_ParseFloat(F *val,const char *str,SError &error,bool may_continue)
00307 {
00308     char *end;
00309     int e=_Str2FP(val,str,&end);
00310     if(e==ERANGE || (*end && !may_continue))
00311     {
00312         TLString tmp;
00313         if(*end && !may_continue)
00314         {  tmp.sprintf("parse error in float at '%c'",*end);  }
00315         if(e==ERANGE)
00316         {
00317             if(tmp)  tmp+="; ";
00318             tmp+="floating point value out of range";
00319         }
00320         error=SError(tmp,1);
00321     }
00322     return(end);
00323 }
00324 
00325 char *ParseFloat(flt *val,const char *str,SError &error,bool may_continue)
00326     {  return(_ParseFloat<flt>(val,str,error,may_continue));  }
00327 char *ParseFloat(dbl *val,const char *str,SError &error,bool may_continue)
00328     {  return(_ParseFloat<dbl>(val,str,error,may_continue));  }
00329 
00330 
00332 struct ParsedIntegerValue
00333 {
00334     int64 val;          
00335     uint16 size : 8;    
00336     uint16 unsig : 3;   
00337 };
00338 
00339 
00393 int ParseIntegerSpec(const char *str,ParsedIntegerValue *si,SError &error)
00394 {
00396 }
00397 
00398 
00412 char ParseCharacterSpec(const char *str,SError &error)
00413 {
00415 }
00416 
00417 struct ParsedFloatValue
00418 {
00419     union {
00420         dbl dval;   
00421         flt fval;   
00422     };
00423     int16 size;   
00424 };
00425 
00441 int ParseFloatSpec(const char *str,ParsedFloatValue *sf,SError &error)
00442 {
00443     if(!str) return(0);
00444     
00445     // This is tricky as the float value may end before the end of the 
00446     // string. So, I parse it as dbl and then convert to flt if the 
00447     // f suffix is present. 
00448     dbl tmp;
00449     const char *end=ParseFloat(&tmp,str,error,/*may_continue=*/1);
00450     if(*end=='f' || *end=='F')
00451     {
00452         if(tmp>FLT_MAX || tmp<FLT_MIN)
00453         {
00454             error.AppendMsg("floating point value out of (flt) range");
00456             sf->fval = tmp>FLT_MAX ? FLT_MAX : FLT_MIN;
00457         }
00458         else
00459         {  sf->fval=(flt)tmp;  }
00460         sf->size=sizeof(flt);
00461         ++end;
00462     }
00463     else
00464     {
00465         sf->dval=tmp;
00466         sf->size=sizeof(dbl);
00467     }
00468     
00469     if(*end)
00470     {  error.AppendMsg("garbage at end of float spec");  }
00471     
00472     return(error ? 1 : 0);
00473 }
00474 
00475 
00476 #if 0  /* Test program */
00477 
00478 // gcc lib_lex.a -o test -lm ../lib_misc.a ../tl/lib_tl.a ../threads/lib_threads.a -lpthread
00479 #include <stdio.h>  /* test program */
00480 
00481 
00482 template<typename T,typename UT>
00483     void TestParseInt(const char *str)
00484 {
00485     T val;
00486     SError error;
00487     fprintf(stderr,"%s: ",str);
00488     const char *end=ParseInt(&val,str,error,0);
00489     fprintf(stderr," %lld; %s  (error=%s)\n",
00490         (long long)val,end,error ? error.msg().str() : "success");
00491 }
00492 
00493 template<typename UT>
00494     void TestParseUInt(const char *str)
00495 {
00496     UT val;
00497     SError error;
00498     fprintf(stderr,"%s: ",str);
00499     const char *end=ParseInt(&val,str,error,0);
00500     fprintf(stderr," %llu; %s  (error=%s)\n",
00501         (unsigned long long)val,end,
00502         error ? error.msg().str() : "success");
00503 }
00504 
00505 int main(int argc,char **arg)
00506 {
00507     for(int i=1; i<argc; i++)
00508     {
00509         TestParseInt<int16,uint16>(arg[i]);
00510         TestParseInt<int32,uint32>(arg[i]);
00511         TestParseInt<int64,uint64>(arg[i]);
00512         TestParseUInt<int16>(arg[i]);
00513         TestParseUInt<int32>(arg[i]);
00514         TestParseUInt<int64>(arg[i]);
00515     }
00516     
00517     /*char tmp[32];
00518     SError error;
00519     for(int i=0; i<1000; i++)
00520     {
00521         int32 val=rand();
00522         uint64 rval;
00523         
00524         sprintf(tmp,"0x%x",val);
00525         const char *end;
00526         for(int j=0; j<2000; j++)
00527         {  end=ParseInt(&rval,tmp,error,0);  }
00528         CritAssert(!error); CritAssert(!*end); CritAssert(val==rval);  // test app
00529         
00530         //if(val & (1<<3))  val=-val;
00531         
00532         sprintf(tmp,"%d",val);
00533         for(int j=0; j<2000; j++)
00534         {  end=ParseInt(&rval,tmp,error,0);  }
00535         CritAssert(!error); CritAssert(!*end); CritAssert(val==rval);  // test app
00536     }*/
00537     
00538     return(0);
00539 }
00540 
00541 #endif