11/***********************************************************
2- Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
2+ Copyright 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum,
33Amsterdam, The Netherlands.
44
55 All Rights Reserved
@@ -24,19 +24,18 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2424
2525/* Tokenizer implementation */
2626
27- /* XXX This is rather old, should be restructured perhaps */
28- /* XXX Need a better interface to report errors than writing to stderr */
29- /* XXX Should use editor resource to fetch true tab size on Macintosh */
30-
3127#include "pgenheaders.h"
3228
3329#include <ctype.h>
34- #include "string.h"
3530
36- #include "fgetsintr.h"
3731#include "tokenizer.h"
3832#include "errcode.h"
3933
34+ extern char * my_readline PROTO ((char * ) );
35+ /* Return malloc'ed string including trailing \n;
36+ empty malloc'ed string for EOF;
37+ NULL if interrupted */
38+
4039/* Don't ever change this -- it would break the portability of Python code */
4140#define TABSIZE 8
4241
@@ -99,7 +98,7 @@ tok_new()
9998 struct tok_state * tok = NEW (struct tok_state , 1 );
10099 if (tok == NULL )
101100 return NULL ;
102- tok -> buf = tok -> cur = tok -> end = tok -> inp = NULL ;
101+ tok -> buf = tok -> cur = tok -> end = tok -> inp = tok -> start = NULL ;
103102 tok -> done = E_OK ;
104103 tok -> fp = NULL ;
105104 tok -> tabsize = TABSIZE ;
158157tok_free (tok )
159158 struct tok_state * tok ;
160159{
161- /* XXX really need a separate flag to say 'my buffer' */
162160 if (tok -> fp != NULL && tok -> buf != NULL )
163161 DEL (tok -> buf );
164162 DEL (tok );
@@ -180,58 +178,78 @@ tok_nextc(tok)
180178 tok -> done = E_EOF ;
181179 return EOF ;
182180 }
183- #ifdef USE_READLINE
184181 if (tok -> prompt != NULL ) {
185- extern char * readline PROTO ((char * prompt ));
186- static int been_here ;
187- if (!been_here ) {
188- /* Force rebind of TAB to insert-tab */
189- extern int rl_insert ();
190- rl_bind_key ('\t' , rl_insert );
191- been_here ++ ;
192- }
193- if (tok -> buf != NULL )
194- free (tok -> buf );
195- tok -> buf = readline (tok -> prompt );
196- (void ) intrcheck (); /* Clear pending interrupt */
182+ char * new = my_readline (tok -> prompt );
197183 if (tok -> nextprompt != NULL )
198184 tok -> prompt = tok -> nextprompt ;
199- if (tok -> buf == NULL ) {
185+ if (new == NULL )
186+ tok -> done = E_INTR ;
187+ else if (* new == '\0' ) {
188+ free (new );
200189 tok -> done = E_EOF ;
201190 }
191+ else if (tok -> start != NULL ) {
192+ int start = tok -> start - tok -> buf ;
193+ int oldlen = tok -> cur - tok -> buf ;
194+ int newlen = oldlen + strlen (new );
195+ char * buf = realloc (tok -> buf , newlen + 1 );
196+ tok -> lineno ++ ;
197+ if (buf == NULL ) {
198+ free (tok -> buf );
199+ free (new );
200+ tok -> done = E_NOMEM ;
201+ return EOF ;
202+ }
203+ tok -> buf = buf ;
204+ tok -> cur = tok -> buf + oldlen ;
205+ strcpy (tok -> buf + oldlen , new );
206+ free (new );
207+ tok -> inp = tok -> buf + newlen ;
208+ tok -> end = tok -> inp + 1 ;
209+ tok -> start = tok -> buf + start ;
210+ }
202211 else {
203- tok -> end = strchr (tok -> buf , '\0' );
204- if (tok -> end > tok -> buf )
205- add_history (tok -> buf );
206- /* Replace trailing '\n' by '\0'
207- (we don't need a '\0', but the
208- tokenizer wants a '\n'...) */
209- * tok -> end ++ = '\n' ;
210- tok -> inp = tok -> end ;
212+ tok -> lineno ++ ;
213+ if (tok -> buf != NULL )
214+ free (tok -> buf );
215+ tok -> buf = new ;
211216 tok -> cur = tok -> buf ;
217+ tok -> inp = strchr (tok -> buf , '\0' );
218+ tok -> end = tok -> inp + 1 ;
212219 }
213220 }
214- else
215- #endif
216- {
217- if (tok -> prompt != NULL ) {
218- fprintf (stderr , "%s" , tok -> prompt );
219- if (tok -> nextprompt != NULL )
220- tok -> prompt = tok -> nextprompt ;
221- }
222- if (tok -> buf == NULL ) {
223- tok -> buf = NEW (char , BUFSIZ );
221+ else {
222+ int done = 0 ;
223+ int cur = 0 ;
224+ if (tok -> start == NULL ) {
224225 if (tok -> buf == NULL ) {
225- tok -> done = E_NOMEM ;
226- return EOF ;
226+ tok -> buf = NEW (char , BUFSIZ );
227+ if (tok -> buf == NULL ) {
228+ tok -> done = E_NOMEM ;
229+ return EOF ;
230+ }
231+ tok -> end = tok -> buf + BUFSIZ ;
232+ }
233+ if (fgets (tok -> buf , (int )(tok -> end - tok -> buf ),
234+ tok -> fp ) == NULL ) {
235+ tok -> done = E_EOF ;
236+ done = 1 ;
237+ }
238+ else {
239+ tok -> done = E_OK ;
240+ tok -> inp = strchr (tok -> buf , '\0' );
241+ done = tok -> inp [-1 ] == '\n' ;
227242 }
228- tok -> end = tok -> buf + BUFSIZ ;
229243 }
230- tok -> done = fgets_intr (tok -> buf ,
231- (int )(tok -> end - tok -> buf ), tok -> fp );
232- tok -> inp = strchr (tok -> buf , '\0' );
244+ else {
245+ cur = tok -> cur - tok -> buf ;
246+ tok -> done = E_OK ;
247+ }
248+ tok -> lineno ++ ;
233249 /* Read until '\n' or EOF */
234- while (tok -> inp + 1 == tok -> end && tok -> inp [-1 ]!= '\n' ) {
250+ while (!done ) {
251+ int curstart = tok -> start == NULL ? -1 :
252+ tok -> start - tok -> buf ;
235253 int curvalid = tok -> inp - tok -> buf ;
236254 int cursize = tok -> end - tok -> buf ;
237255 int newsize = cursize + BUFSIZ ;
@@ -245,13 +263,19 @@ tok_nextc(tok)
245263 tok -> buf = newbuf ;
246264 tok -> inp = tok -> buf + curvalid ;
247265 tok -> end = tok -> buf + newsize ;
248- if (fgets_intr (tok -> inp ,
266+ tok -> start = curstart < 0 ? NULL :
267+ tok -> buf + curstart ;
268+ if (fgets (tok -> inp ,
249269 (int )(tok -> end - tok -> inp ),
250- tok -> fp ) != E_OK )
251- break ;
270+ tok -> fp ) == NULL ) {
271+ /* Last line does not end in \n,
272+ fake one */
273+ strcpy (tok -> inp , "\n" );
274+ }
252275 tok -> inp = strchr (tok -> inp , '\0' );
276+ done = tok -> inp [-1 ] == '\n' ;
253277 }
254- tok -> cur = tok -> buf ;
278+ tok -> cur = tok -> buf + cur ;
255279 }
256280 if (tok -> done != E_OK ) {
257281 if (tok -> prompt != NULL )
@@ -360,14 +384,15 @@ tok_get(tok, p_start, p_end)
360384 register int c ;
361385 int blankline ;
362386
387+ * p_start = * p_end = NULL ;
363388 nextline :
389+ tok -> start = NULL ;
364390 blankline = 0 ;
365391
366392 /* Get indentation level */
367393 if (tok -> atbol ) {
368394 register int col = 0 ;
369395 tok -> atbol = 0 ;
370- tok -> lineno ++ ;
371396 for (;;) {
372397 c = tok_nextc (tok );
373398 if (c == ' ' )
@@ -423,7 +448,7 @@ tok_get(tok, p_start, p_end)
423448 }
424449 }
425450
426- * p_start = * p_end = tok -> cur ;
451+ tok -> start = tok -> cur ;
427452
428453 /* Return pending indents/dedents */
429454 if (tok -> pendin != 0 ) {
@@ -438,13 +463,14 @@ tok_get(tok, p_start, p_end)
438463 }
439464
440465 again :
466+ tok -> start = NULL ;
441467 /* Skip spaces */
442468 do {
443469 c = tok_nextc (tok );
444470 } while (c == ' ' || c == '\t' );
445471
446472 /* Set start of current token */
447- * p_start = tok -> cur - 1 ;
473+ tok -> start = tok -> cur - 1 ;
448474
449475 /* Skip comment */
450476 if (c == '#' ) {
@@ -467,7 +493,6 @@ tok_get(tok, p_start, p_end)
467493
468494 /* Check for EOF and errors now */
469495 if (c == EOF ) {
470- * p_start = * p_end = tok -> cur ;
471496 return tok -> done == E_EOF ? ENDMARKER : ERRORTOKEN ;
472497 }
473498
@@ -477,6 +502,7 @@ tok_get(tok, p_start, p_end)
477502 c = tok_nextc (tok );
478503 } while (isalnum (c ) || c == '_' );
479504 tok_backup (tok , c );
505+ * p_start = tok -> start ;
480506 * p_end = tok -> cur ;
481507 return NAME ;
482508 }
@@ -486,6 +512,7 @@ tok_get(tok, p_start, p_end)
486512 tok -> atbol = 1 ;
487513 if (blankline || tok -> level > 0 )
488514 goto nextline ;
515+ * p_start = tok -> start ;
489516 * p_end = tok -> cur - 1 ; /* Leave '\n' out of the string */
490517 return NEWLINE ;
491518 }
@@ -498,6 +525,7 @@ tok_get(tok, p_start, p_end)
498525 }
499526 else {
500527 tok_backup (tok , c );
528+ * p_start = tok -> start ;
501529 * p_end = tok -> cur ;
502530 return DOT ;
503531 }
@@ -538,9 +566,7 @@ tok_get(tok, p_start, p_end)
538566 else {
539567 /* Accept floating point numbers.
540568 XXX This accepts incomplete things like
541- XXX 12e or 1e+; worry run-time.
542- XXX Doesn't accept numbers
543- XXX starting with a dot */
569+ XXX 12e or 1e+; worry run-time */
544570 if (c == '.' ) {
545571 fraction :
546572 /* Fraction */
@@ -560,58 +586,58 @@ tok_get(tok, p_start, p_end)
560586 }
561587 }
562588 tok_backup (tok , c );
589+ * p_start = tok -> start ;
563590 * p_end = tok -> cur ;
564591 return NUMBER ;
565592 }
566593
567- /* String (single quotes) */
568- if (c == '\'' ) {
594+ /* String */
595+ if (c == '\'' || c == '"' ) {
596+ int quote = c ;
597+ int triple = 0 ;
598+ int tripcount = 0 ;
569599 for (;;) {
570600 c = tok_nextc (tok );
571- if (c == '\n' || c == EOF ) {
572- tok -> done = E_TOKEN ;
573- tok -> cur = tok -> inp ;
574- return ERRORTOKEN ;
575- }
576- if (c == '\\' ) {
577- c = tok_nextc (tok );
578- * p_end = tok -> cur ;
579- if (c == '\n' || c == EOF ) {
601+ if (c == '\n' ) {
602+ if (!triple ) {
580603 tok -> done = E_TOKEN ;
581- tok -> cur = tok -> inp ;
604+ tok_backup ( tok , c ) ;
582605 return ERRORTOKEN ;
583606 }
584- continue ;
607+ tripcount = 0 ;
585608 }
586- if (c == '\'' )
587- break ;
588- }
589- * p_end = tok -> cur ;
590- return STRING ;
591- }
592-
593- /* String (double quotes) */
594- if (c == '\"' ) {
595- for (;;) {
596- c = tok_nextc (tok );
597- if (c == '\n' || c == EOF ) {
609+ else if (c == EOF ) {
598610 tok -> done = E_TOKEN ;
599611 tok -> cur = tok -> inp ;
600612 return ERRORTOKEN ;
601613 }
602- if (c == '\\' ) {
614+ else if (c == quote ) {
615+ tripcount ++ ;
616+ if (tok -> cur == tok -> start + 2 ) {
617+ c = tok_nextc (tok );
618+ if (c == quote ) {
619+ triple = 1 ;
620+ tripcount = 0 ;
621+ continue ;
622+ }
623+ tok_backup (tok , c );
624+ }
625+ if (!triple || tripcount == 3 )
626+ break ;
627+ }
628+ else if (c == '\\' ) {
629+ tripcount = 0 ;
603630 c = tok_nextc (tok );
604- * p_end = tok -> cur ;
605- if (c == '\n' || c == EOF ) {
631+ if (c == EOF ) {
606632 tok -> done = E_TOKEN ;
607633 tok -> cur = tok -> inp ;
608634 return ERRORTOKEN ;
609635 }
610- continue ;
611636 }
612- if ( c == '\"' )
613- break ;
637+ else
638+ tripcount = 0 ;
614639 }
640+ * p_start = tok -> start ;
615641 * p_end = tok -> cur ;
616642 return STRING ;
617643 }
@@ -624,7 +650,6 @@ tok_get(tok, p_start, p_end)
624650 tok -> cur = tok -> inp ;
625651 return ERRORTOKEN ;
626652 }
627- tok -> lineno ++ ;
628653 goto again ; /* Read next line */
629654 }
630655
@@ -633,13 +658,14 @@ tok_get(tok, p_start, p_end)
633658 int c2 = tok_nextc (tok );
634659 int token = tok_2char (c , c2 );
635660 if (token != OP ) {
661+ * p_start = tok -> start ;
636662 * p_end = tok -> cur ;
637663 return token ;
638664 }
639665 tok_backup (tok , c2 );
640666 }
641667
642- /* Keep track of parenteses nesting level */
668+ /* Keep track of parentheses nesting level */
643669 switch (c ) {
644670 case '(' :
645671 case '[' :
@@ -654,6 +680,7 @@ tok_get(tok, p_start, p_end)
654680 }
655681
656682 /* Punctuation character */
683+ * p_start = tok -> start ;
657684 * p_end = tok -> cur ;
658685 return tok_1char (c );
659686}
0 commit comments