Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

/ray/src/lib/lex/iscannerbase.cc

Go to the documentation of this file.
00001 /*
00002  * flex/iscannerbase.cc
00003  * 
00004  * Internal base class for flex-based scanner base class. 
00005  * 
00006  * Copyright (c) 2003--2004 by Wolfgang Wieser ] wwieser (a) gmx <*> de [ 
00007  * 
00008  * This file may be distributed and/or modified under the terms of the 
00009  * GNU General Public License version 2 as published by the Free Software 
00010  * Foundation. (See COPYING.GPL for details.)
00011  * 
00012  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00013  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00014  * 
00015  */
00016 
00017 #include <lib/sconfig.h>    /* must be first */
00018 
00019 #include <lib/serror.h>
00020 #include <lib/tl/linkedlist.h>
00021 #include <lib/lex/location.h>
00022 #include <lib/sourcepos/sparchive.h>
00023 #include <lib/tl/tlstring.h>
00024 #include <lib/message/message.h>
00025 
00026 #include <unistd.h>  /* USE STREAMS INSTEAD!! */
00027 #include <stdio.h>   /* USE STREAMS INSTEAD!! */
00028 #include <errno.h>   /* USE STREAMS INSTEAD!! */
00029 
00030 // For the implementation of the internal base class, we use no 
00031 // _InternalFlexScannerBase, so that this does not get included. 
00032 #undef _InternalFlexScannerBase
00033 #include "scannerbase.h"
00034 
00035 
00036 //---------------------------------<Config>-------------------------------------
00037 
00038 _InternalFlexScannerBase::Config::Config()
00039 {
00040     // Defaults for values which are not changed by _reset(): 
00041     loc_use_lpos=1;
00042     tab_width=8;
00043     max_file_depth=32;
00044     ahead_toks_wanted=1;   // minimum 1
00045     old_toks_wanted=16;
00046 }
00047 
00048 //-------------------------------<TokenEntry>-----------------------------------
00049 
00050 bool _InternalFlexScannerBase::TokenEntry::MayBeCleared() const
00051 {
00052     // This is always true for pure simple tokens (i.e. no allocated 
00053     // pointers inside). 
00054     return(1);
00055 }
00056 
00057 void _InternalFlexScannerBase::TokenEntry::clear(bool /*force*/)
00058 {
00059     // Nothing special to do here; however clearing the state makes 
00060     // debugging easier...
00061     token=-1;
00062     lloc=SCLocationRange();
00063 }
00064 
00065 
00066 _InternalFlexScannerBase::TokenEntry *_InternalFlexScannerBase::
00067 	AllocTokenEntry()
00068 {
00069     return new TokenEntry();
00070 }
00071 
00072 
00073 //-------------------------------<LexerInput>-----------------------------------
00074 
00075 ssize_t _InternalFlexScannerBase::LexerInput::read(char *buf,size_t len)
00076 {
00077     // FIXME: To be replaced with stream implementation and no stderr,.. error. 
00078     
00079     if(fp)
00080     {
00081         errno=0;
00082         size_t result;
00083         if(feof(fp))
00084         {  result=0;  }
00085         else while( (result = fread(buf,1,len,fp))==0 && ferror(fp) )
00086         {
00087             if(errno!=EINTR)
00088             {
00089                 fprintf(stderr,"reading %s failed: %s\n",
00090                     fp_path.str(),strerror(errno));
00091                 return(-1);
00092                 break;
00093             }
00094             errno=0;
00095             clearerr(fp);
00096         }
00097         
00098         //fprintf(stderr,"read()=%d\n",result);
00099         return(result);
00100     }
00101     
00102     return(0);
00103 }
00104 
00105 
00106 int _InternalFlexScannerBase::LexerInput::OpenFile(const TLString &path,
00107     SError &error)
00108 {
00109     // FIXME: To be replaced with stream implementation. 
00110     
00111     if(fp)  return(1);
00112     
00113     if(path)
00114     {
00115         fp=fopen(path.str(),"r");
00116         if(!fp)
00117         {  /* FIXME: set error*/ error=SError("Open failed blah blah",1); return(-2);  }
00118         
00119         fp_path=path;
00120     }
00121     else
00122     {
00123         fp=stdin;
00124         fp_path="[stdin]";
00125     }
00126     
00127     return(0);
00128 }
00129 
00130 
00131 _InternalFlexScannerBase::LexerInput::LexerInput(LexerInput *_down) : 
00132     down(_down),
00133     fp(NULL),
00134     fp_path(),
00135     saved(-1,-1),
00136     must_pop_state(0),
00137     must_pop_pos_arch(0),
00138     read_eof(0),
00139     special_next_tok(-1),
00140     next_toks(),
00141     prev_toks()
00142 {
00143     // Nothing more to do. 
00144 }
00145 
00146 _InternalFlexScannerBase::LexerInput::~LexerInput()
00147 {
00148     if(fp)
00149     {
00150         if(fp!=stdin) fclose(fp);
00151         fp=NULL;
00152     }
00153     
00154     while(!next_toks.IsEmpty())
00155     {
00156         Error("OOPS: ~ALSInput: Token %d left in next_toks.",
00157             next_toks.first()->token);
00158         next_toks.first()->clear(/*force=*/1);
00159         delete next_toks.PopFirst();
00160     }
00161     
00162     // Tokens with MayBeCleared()==0 from this list must have been 
00163     // requeued in zombie_list before. 
00164     while(!prev_toks.IsEmpty())
00165     {  delete prev_toks.PopFirst();  }
00166 }
00167 
00168 //-------------------------------<SCLocation>-----------------------------------
00169 
00170 SCLocation _InternalFlexScannerBase::_MakeCurrLoc() const
00171 {
00172     return(SCLocation(pos_arch->GetPos(p0.line,
00173         cfg.loc_use_lpos ? p0.lpos : -1)));
00174 }
00175 
00176 SCLocation _InternalFlexScannerBase::_MakeCurrEndLoc() const
00177 {
00178     return(SCLocation(pos_arch->GetPos(p1.line,
00179         cfg.loc_use_lpos ? p1.lpos : -1)));
00180 }
00181 
00183 SCLocationRange _InternalFlexScannerBase::_MakeCurrLocRange() const
00184 {
00185     return(SCLocationRange(_MakeCurrLoc(),_MakeCurrEndLoc()));
00186 }
00187 
00188 //----------------------------<Lexer Interface>---------------------------------
00189 
00190 void _InternalFlexScannerBase::_pi_char(char c)
00191 {
00192     switch(c)
00193     {
00194         case '\n':  _pi_line();  break;
00195         case '\r':  break;   // should not happen due to filtering
00196         case '\t':  _pi_tab();  break;
00197         default:  ++p1.lpos;  break;
00198     }
00199 }
00200 
00201 void _InternalFlexScannerBase::_pi_str(const char *str,size_t len)
00202 {
00203     for(const char *c=str,*cend=str+len; c<cend; c++)
00204     {  _pi_char(*c);  }
00205 }
00206 
00207 void _InternalFlexScannerBase::_pi_eof()
00208 {
00209     Assert(top_inp);
00210     top_inp->read_eof=1;
00211     //fprintf(stderr,"_pi_eof()\n");
00212 }
00213 
00214 
00215 //---------------------------<_InternalFlexScannerBase>-------------------------
00216 
00217 
00218 int _InternalFlexScannerBase::SetInput(const TLString &path,SError &error,
00219     bool included,
00220     int first_token,XYPos alt_pos,SourcePositionArchive *alt_pos_arch)
00221 {
00222     Assert(magic==MyMagic);
00223     
00224     // Input validation: 
00225     if(included && alt_pos_arch)  return(-3);
00226     if(scanner && !included)  return(1);
00227     
00228     if(included && file_depth>=cfg.max_file_depth)  return(-4);
00229     
00230     // Set up LexerInput; "down" pointer set further down. 
00231     LexerInput *inp=new LexerInput(/*down=*/NULL);
00232     if(inp->OpenFile(path,error))
00233     {  DELETE(inp);  return(-2);  }
00234     
00235     // Set special next token if specified: 
00236     if(first_token>=0)
00237     {  inp->special_next_tok=first_token;  }
00238     
00239     // Do scanner setup (if needed): 
00240     if(!scanner)
00241     {  _lex_init(&scanner);  }  // will set extra pointer
00242     
00243     int rv=_SetInputBH(inp,included,alt_pos,alt_pos_arch);
00244     
00245     // Make sure we know which input is on top: 
00246     inp->down=top_inp;
00247     top_inp=inp;
00248     
00249     return(rv);
00250 }
00251 
00252 
00253 _InternalFlexScannerBase::TokenEntry *_InternalFlexScannerBase::LexNextToken()
00254 {
00255     if(!scanner)  return(NULL);
00256     
00257     for(;;)
00258     {
00259         cont2:;
00260         
00261         /* WARNING: DO NOT TOUCH THAT CODE UNLESS YOU **REALLY** KNOW */
00262         /*          WHAT YOU ARE DOING.  SOME CALLS HAVE UNEXPECTED   */
00263         /*          SIDE EFFECTS (esp. when processing #includes).    */
00264         /*                                 -- Wolfgang Jul/03; Dez/04 */
00265         
00266         Assert(magic==MyMagic);
00267         Assert(top_inp);
00268         
00269         if(!read_inp)
00270         {  read_inp=top_inp;  }
00271         
00272         // Okay, I maintain a token buffer for some additional 
00273         // lookahead and look-back if needed. 
00274         LexerInput *from_inp=NULL;
00275         
00276         // In case read_inp!=top_inp. 
00277         // The lexer is in a different input than we are. 
00278         // This is due to some #include. 
00279         // Must first return all tokens from the current read_inp. 
00280         if(read_inp!=top_inp)
00281         {
00282             /* See if we have tokens: */
00283             if(read_inp->next_toks.IsEmpty())
00284             {
00285                 // No toks left; go on: 
00286                 read_inp=top_inp;
00287             }
00288             else
00289             {  from_inp=read_inp;  }
00290         } /* NO ELSE. */
00291         if(read_inp==top_inp)
00292         {
00293             // Make sure token buffer is full... 
00294             LexerInput *inp=top_inp;
00295             // I call count() here because for 2 or 3 tokens this 
00296             // costs "no" time. 
00297             int n_next=inp->next_toks.count();
00298             int n_prev=inp->prev_toks.count();
00299             // Want to have ahead_toks_wanted in the next_toks list. 
00300             // Get new ones unless we read EOF. 
00301             while(n_next<cfg.ahead_toks_wanted && !inp->read_eof)
00302             {
00303                 // Get TokenEntry to use: 
00304                 TokenEntry *te=NULL;
00305                 if(n_prev>cfg.old_toks_wanted)
00306                 {
00307                     te=inp->prev_toks.PopFirst();
00308                     --n_prev;
00309                     // Read comment about zombie_list...
00310                     if(te->MayBeCleared())
00311                     {  te->clear();  } // Get rid of old content. 
00312                     else
00313                     {
00314                         zombie_list.append(te);
00315                         ++zombie_list_nents;
00316                         te=NULL;
00317                     }
00318                 }
00319                 if(!te)
00320                 {  te=AllocTokenEntry();  }
00321                 
00322                 // Do the lexical analysis & queue token: 
00323                 bool state_change=0;
00324                 _RawYYLex(te);
00325                 if(inp!=top_inp)   // <-- May have changed...
00326                 {  inp=top_inp;  state_change=1;  }
00327                 inp->next_toks.append(te);
00328                 ++n_next;
00329                 
00330                 // If scanner state changed, go back to beginning: 
00331                 if(state_change)
00332                 {  goto cont2;  }
00333             }
00334             while(n_prev>cfg.old_toks_wanted)
00335             {
00336                 TokenEntry *tmp=inp->prev_toks.PopFirst();
00337                 --n_prev;
00338                 // Read comment about zombie_list...
00339                 if(tmp->MayBeCleared())
00340                 {  delete tmp;  }
00341                 else
00342                 {  zombie_list.append(tmp);  ++zombie_list_nents;  }
00343                 // Should not happen unless we just #included something. 
00344                 if(!inp->read_eof)
00345                 {  Warning("HMMM: Del Old Tok.");  }
00346             }
00347             from_inp=inp;
00348         }
00349         
00350         _TidyUpZombieList();  // inline check
00351         
00352         // Get token to be returned: 
00353         TokenEntry *te=from_inp->next_toks.PopFirst();
00354         Assert(te);
00355         // If we have ahead_toks_wanted=1 (minimum value) it is important 
00356         // that we really do not read more tokens (we may need to switch 
00357         // context thus differently tokens may be recognized. 
00358         Assert(cfg.ahead_toks_wanted>1 || from_inp->next_toks.IsEmpty());
00359         // "Eat" token: 
00360         from_inp->prev_toks.append(te);
00361         
00362         // *te is the token to be returned. 
00363         //(FIXME) cloc=te->lloc;  /* another copy */
00364         
00365         if(te->token)   // Not EOF token. 
00366         {
00367             // You can plug in smart lookahead / context-sensitive 
00368             // de-ambiguation here. (FIXME)
00369             
00370             /*if(te->token<256)
00371             {  fprintf(stderr,"\tTOK=\'%c\'\t\t[%s]\n",te->token,
00372                 from_inp->fp_path.str());  }
00373             else if(te->token==TS_IDENTIFIER)
00374             {  fprintf(stderr,"\tTOK=\"%s\"\t\t[%s]\n",te->lval.str_val,
00375                 from_inp->fp_path.str());  }
00376             else
00377             {  fprintf(stderr,"\tTOK=%d\t\t[%s]\n",te->token,
00378                 from_inp->fp_path.str());  }*/
00379             
00380             return(te);
00381         }
00382         
00383         // EOF token here. 
00384         // The EOF token is only returned if we cannot _wrap any more. 
00385         // First, copy the data, becuase _wrap() will delete 
00386         // the current LexerInput and thus the token buffer and *te. 
00387         // --> Already done above.  
00388         
00389         read_inp=NULL;
00390         if(_wrap())
00391         {
00392             // Real EOF reached. 
00393             break;
00394         }
00395     }
00396     
00397     return(NULL);   // EOF
00398 }
00399 
00400 
00401 void _InternalFlexScannerBase::_RawYYLex(TokenEntry *dest)
00402 {
00403     Assert(magic==MyMagic);
00404     
00405     // "Shift" up position...
00406     p0=p1;  // C++ assignment
00407     
00408     // Do the actual lex'ing: 
00409     int tok_rv;
00410     if(top_inp->special_next_tok>=0)
00411     {
00412         tok_rv=top_inp->special_next_tok;
00413         top_inp->special_next_tok=-1;
00414     }
00415     else
00416     {
00417         lex_token=dest;
00418         tok_rv=_lex(scanner);
00419         lex_token=NULL;
00420     }
00421     
00422     // Store the token: 
00423     dest->token=tok_rv;
00424     if(!tok_rv)  Assert(top_inp->read_eof || n_errors);
00425     
00426     // Store the position: 
00427     dest->lloc=_MakeCurrLocRange();
00428     
00429     /*if(rv<256)
00430     {  fprintf(stderr,"   LEX=\'%c\'\n",rv);  }
00431     else
00432     {  fprintf(stderr,"   LEX=%d\n",tok_rv);  }*/
00433 }
00434 
00435 
00436 /* NOTE: Always use _TidyUpZombieList() and not the "__" version. */
00437 void _InternalFlexScannerBase::__TidyUpZombieList(bool force)
00438 {
00439     // In order to avoid too many tokens in the zombie list and OTOH 
00440     // not checking all tokens all the time, the zombie_list_threash is 
00441     // used. If there are at least zombie_list_threash new entries in 
00442     // the zombie list, the last 2*zombie_list_threash tokens (max.) 
00443     // will be checked for tid up. This means, that tokens are asked 
00444     // twice or a bit more often but not "all the time". 
00445     // 
00446     // This approach seems natural for use together with a shift/reduce 
00447     // parser where some tokens may spend a long time on the stack until 
00448     // finally getting used. 
00449     
00450     int _cnt=0,_nforce=0,_ntry=0;
00451     int ncheck = force ? -1 : 2*zombie_list_threash;
00452     for(TokenEntry *_i=zombie_list.last(); _i && ncheck; ncheck--)
00453     {
00454         TokenEntry *i=_i;
00455         _i=_i->prev;
00456         
00457         bool we_may=i->MayBeCleared();
00458         if(force && !we_may)
00459         {  ++_nforce;  we_may=1;  }
00460         
00461         if(we_may)
00462         {
00463             i->clear(/*force=*/force);
00464             
00465             delete zombie_list.dequeue(i);
00466             --zombie_list_nents;
00467             ++_cnt;
00468         }
00469         
00470         ++_ntry;
00471     }
00472     
00473     // Getting this message is perfectly normal. 
00474     if(_cnt<_ntry || (force && (_cnt || _nforce)))
00475     {  Warning(
00476         "Hmmm: deleted %d (%d forced, %d tried) zombie tokens, %d left "
00477         "(force=%d).",
00478         _cnt,_nforce,_ntry,zombie_list_nents,int(force));  }
00479     if(force)
00480     {  Assert(zombie_list_nents==0);  }
00481     
00482     Assert(zombie_list_nents==zombie_list.count());
00483     last_zombie_list_nents=zombie_list_nents;
00484 }
00485 
00486 
00487 void _InternalFlexScannerBase::_reset(bool no_virtuals)
00488 {
00489     // NOTE: May not call virtual functions which go "below" 
00490     // PREFIX_FlexScannerBase (as called from destructor). 
00491     Assert(magic==MyMagic);
00492     
00493     // Must be NULL if outside yylex(). 
00494     Assert(!lex_token);
00495     
00496     if(scanner)
00497     {
00498         // Tidy up the stack: 
00499         read_inp=NULL;
00500         while(!_wrap());
00501         
00502         Assert(!file_depth);
00503         Assert(!top_inp);
00504         
00505         _lex_destroy(scanner);
00506         scanner=NULL;
00507     }
00508     
00509     p0.line=1;  p1.line=1;
00510     p0.lpos=0;  p1.lpos=0;
00511     
00512     if(pos_arch)
00513     {
00514         if(pos_arch_allocated)
00515         {  delete pos_arch;  }
00516         pos_arch=NULL;
00517         pos_arch_allocated=0;
00518     }
00519     
00520     _TidyUpZombieList(/*force=*/1);
00521     
00522     n_errors=0;
00523     
00524     top_inp=NULL;
00525     read_inp=NULL;
00526     file_depth=0;
00527     
00528     // Call virtual function for derived class unless forbidden. 
00529     if(!no_virtuals)
00530     {  reset();  }
00531 }
00532 
00533 
00534 void _InternalFlexScannerBase::reset()
00535 {
00536     // Virtual function which only has to be overridden if the derived 
00537     // parser needs that. Nothing to do for default implementation. 
00538 }
00539 
00540 
00541 _InternalFlexScannerBase::_InternalFlexScannerBase() : 
00542     zombie_list(),
00543     p0(1,0),
00544     p1(1,0),
00545     cfg()
00546 {
00547     magic=MyMagic;
00548     scanner=NULL;
00549     lex_token=NULL;
00550     
00551     top_inp=NULL;
00552     read_inp=NULL;
00553     
00554     zombie_list_nents=0;
00555     last_zombie_list_nents=0;
00556     
00557     pos_arch=NULL;
00558     pos_arch_allocated=0;
00559     
00560     // All the other values are initialized in _reset(). 
00561     // This will be called from within PREFIX_FlexScannerBase. 
00562     // _reset();
00563 }
00564 
00565 _InternalFlexScannerBase::~_InternalFlexScannerBase()
00566 {
00567     // The following is called from within PREFIX_FlexScannerBase 
00568     // due to virtual function calls. 
00569     //_InternalFlexScannerBase::_reset();
00570     
00571     Assert(!scanner);
00572     
00573     Assert(!top_inp);
00574     Assert(!read_inp);
00575     
00576     Assert(magic==MyMagic);
00577     magic=0;
00578 }
00579 
00580 //------------------------------------------------------------------------------
00581 
00582 int _InternalFlexScannerBase::_lex(void * /*scanner*/)
00583 {
00584     // Must be overridden if used. 
00585     CritAssert(0);  // override
00586 }
00587 
00588 int _InternalFlexScannerBase::_lex_init(void ** /*scanner_p*/)
00589 {
00590     // Must be overridden if used. 
00591     CritAssert(0);  // override
00592 }
00593 
00594 void _InternalFlexScannerBase::_lex_destroy(void * /*scanner*/)
00595 {
00596     // Must be overridden if used. 
00597     CritAssert(0);  // override
00598 }
00599 
00600 int _InternalFlexScannerBase::_wrap()
00601 {
00602     // Must be overridden if used. 
00603     CritAssert(0);  // override
00604     return(0);
00605 }
00606 
00607 int _InternalFlexScannerBase::_SetInputBH(LexerInput * /*inp*/,
00608     bool /*included*/,
00609     XYPos /*alt_pos*/,SourcePositionArchive * /*alt_pos_arch*/)
00610 {
00611     // Must be overridden if used. 
00612     CritAssert(0);  // override
00613     return(-5);
00614 }

Generated on Sat Feb 19 22:33:45 2005 for Ray by doxygen 1.3.5