[ragel-users] can't pull parse strings - BUG?

Adrian Thurston thurs... at cs.queensu.ca
Mon Feb 5 18:25:54 UTC 2007


Ahhh, sorry this example is broken. It's missing a %% write init; in the
init function. Move the write data up, add the init and also add fbreaks
after your ret_tok(TK_String) and your troubles should go away.

Note that it worked before adding in the strings because it was just chance
that the start state had id 0.

-Adrian

Jason wrote:
> I've finally gotten a little time to start working with the new
> pullscan example.
> 
> Everything was going well until I tried to add string tokens. The rl
> (below) works as is but un-comment the String rules and all hell
> breaks loose.
> 
> Any pointers would be most welcome.
> 
> ---      TEST input file -------
> [target go: 124 with:(1..9) num:1,298,99 int:87 float:1,299.987]
> 
> (list "some string")
> (list 'some other string')
> 
> -------------- mod_pullscan.rl ------------
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> 
> #define BUFSIZE 4096
> 
> typedef struct _Scanner {
> 	/* Scanner state. */
>     int cs;
>     int act;
>     int have;
>     int curline;
>     char *tokstart;
>     char *tokend;
>     char *p;
>     char *pe;
> 	FILE *file;
> 	int done;
> 
> 	/* Token data */
> 	char *data;
> 	int len;
>     int value;
> 	char *token_name;
> 	char buf[BUFSIZE];
> } Scanner;
> 
> 
> void scan_init( Scanner *s, FILE *file )
> {
> 	memset (s, '\0', sizeof(Scanner));
> 	s->curline = 1;
> 	s->file = file;
> }
> 
> #define TK_NO_TOKEN (-1)
> #define TK_ERR 128
> #define TK_EOF 129
> #define TK_Identifier 130
> #define TK_Number 131
> #define TK_Keyword 132
> #define TK_String 133
> 
> 
> %%{
> 	machine Scanner;
> 	write data;
> }%%
> 
> #define ret_tok( _tok ) token = _tok; s->data = s->tokstart; s-
>> token_name = #_tok
> #define ret_char( _tok ) token = _tok; s->data = s->tokstart; s-
>> token_name = "TK_Char"
> 
> int scan( Scanner *s )
> {
> 	char *p = s->p;
> 	char *pe = s->pe;
> 	int token = TK_NO_TOKEN;
> 	int space, readlen;
> 
> 	while ( 1 ) {
> 		if ( p == pe ) {
> 			printf("scanner: need more data\n");
> 
> 			if ( s->tokstart == 0 )
> 				s->have = 0;
> 			else {
> 				/* There is data that needs to be shifted over. */
> 				printf("scanner: buffer broken mid token\n");
> 				s->have = pe - s->tokstart;
> 				memmove( s->buf, s->tokstart, s->have );
> 				s->tokend -= (s->tokstart-s->buf);
> 				s->tokstart = s->buf;
> 			}
> 
> 			p = s->buf + s->have;
> 			space = BUFSIZE - s->have;
> 
> 			if ( space == 0 ) {
> 				/* We filled up the buffer trying to scan a token. */
> 				printf("scanner: out of buffer space\n");
> 				return TK_ERR;
> 			}
> 
> 			if ( s->done ) {
> 				printf("scanner: end of file\n");
> 				p[0] = 0;
> 				readlen = 1;
> 			}
> 			else {
> 				readlen = fread( p, 1, space, s->file );
> 				if ( readlen < space )
> 					s->done = 1;
> 			}
> 
> 			pe = p + readlen;
> 		}
> 
> 		%%{
> 			machine Scanner;
> 			access s->;
> 
> 			newline = '\n' @{s->curline += 1;};
> 			any_count_line = any | newline;
> 
> 			main := |*
> 
> 			# Alpha numberic characters or underscore.
> 			alnum_u = alnum | '_';
> 
> 			# Alpha charactres or underscore.
> 			alpha_u = alpha | '_';
> 
> 			ident = alpha_u alnum_u*;
> 
> 			# Identifiers
> 			ident =>
> 				{ ret_tok( TK_Identifier ); fbreak; };
> 
> 			# Keywords
> 			ident ':' => {
> 				ret_tok (TK_Keyword); fbreak;
> 			};
> 
> 	# Strings
> #			"'" ( [^'\\] | /\\./ )* "'" => { ret_tok (TK_String); };
> #			'"' ( [^"\\] | /\\./ )* '"' => { ret_tok (TK_String); };
> 
> 			# Whitespace
> 			[ \t\n];
> 
> 			# Number
> 			digit+ =>
> 				{ ret_tok( TK_Number ); fbreak; };
> 
> 			# EOF
> 			0 =>
> 				{ ret_tok( TK_EOF ); fbreak; };
> 
> 			# Anything else
> 			any =>
> 				{ ret_char( *p ); fbreak; };
> 
> 			*|;
> 
> 			write exec;
> 		}%%
> 
> 		if ( s->cs == Scanner_error )
> 			return TK_ERR;
> 
> 		if ( token != TK_NO_TOKEN ) {
> 			/* Save p and pe. fbreak does not advance p. */
> 			s->p = p + 1;
> 			s->pe = pe;
> 			s->len = s->p - s->data;
> 			return token;
> 		}
> 	}
> }
> 
> 
> int main (int argc, char** argv)
> {
> 	Scanner ss;
> 	int tok;
> 
> 	scan_init(&ss, stdin);
> 
> 	while ( 1 ) {
> 		tok = scan (&ss);
> 		if ( tok == TK_EOF ) {
> 			printf ("parser: EOF\n");
> 			break;
> 		}
> 		else if ( tok == TK_ERR ) {
> 			printf ("parser: ERR\n");
> 			break;
> 		}
> 		else {
> 			printf ("parser: %s(%d) \"", ss.token_name, tok);
> 			fwrite ( ss.data, 1, ss.len, stdout );
> 			printf ("\"\n" );
> 		}
> 	}
> 
> 	return 0;
> }
> 
> 
> 



More information about the ragel-users mailing list