(unexpected) problem with accented chars

jupp_g jup... at hotmail.de
Thu Jan 10 22:05:40 UTC 2008


If you feed the following console app with chars like [ÄÖÜäöüß] they
are not accepted, which is somewhat unexpected (for me). The dot
output looks fine with proper states and transitions. The problem is
definetely not related to the code page switching, but I didn't delete
the code to enable showing the chars on the console (cmd.exe defaults
to code page 850, which doesn't have some of the chars I want). If I
use the "all" machine things work as expected.

Any hints?

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include <stdio.h>

#define BUF_LEN		128

%%{
	machine cp1252;
	alphtype unsigned char;

	action finish_line {
		++count;
		res = (cs >= cp1252_first_final);
		printf( "==> \"%s\"\n",  line );
	}

	action start_line {
		line_idx = 0;
	}

	action add_char {
		line[line_idx++] = fc;
	}

	action end_line {
		line[line_idx] = '\0';
	}

	more = (0xC0..0xFF);
	what_i_want = ('0'..'9')|('A'..'Z')|('a'..'z')|(0xC0..0xFF);
	all = (0x30..0xFF);

	# fails:
            main := ( ( (alnum|more)+ > start_line $ add_char %
end_line ) ) 0 @ finish_line;
	# fails: main := ( ( (what_i_want)+ > start_line $ add_char %
end_line ) ) 0 @ finish_line;
	# only almost what I want, but works:
	#main := ( ( (all)+ > start_line $ add_char % end_line ) ) 0 @
finish_line;
}%%

%% write data;

int main( int argc, char* argv[] )
{
#ifdef _WIN32
	SetConsoleCP( 1252 );
	SetConsoleOutputCP( 1252 );
#endif

	for ( int i = 1; i < argc; ++i ) {

		int  cs;
		char *p = argv[i];
		char *pe = p + strlen(p) + 1;

		int res = 0;
		int count = 0;
		char line[BUF_LEN];
		int line_idx = -1;
		line[0] = '\0';

		printf( "argv[%d] = \"%s\"\n", i, p );

		%% write init;
		%% write exec;

		printf( "\tresult = %d (count=%d)\n", res, count );
	}

	return 0;
}



More information about the ragel-users mailing list