ragel and memory usage

Damir Nedzibovic dam... at gmail.com
Sun Jan 21 13:58:32 UTC 2007


Hi Adrian,

thanks for your help - I managed to isolate the ambiguity in my grammar
and even to refine it a bit. It works much better now.

Once I finish the complete grammar I'll post it here, maybe somebody
else will find it useful

thanks,
d

On Jan 21, 7:00 am, "Adrian Thurston" <thurs... at cs.queensu.ca> wrote:
> Hi, sorry I didn't notice the message text at the bottom. I'll have a look
> at it to find out why it fails when adding the actions mentioned.
>
> -Adrian
>
> > Hi all,
>
> > I'm trying to build a grammar for parsing SIP messages (as in rfc3261),
> > but I have encountered a problem that I cannot solve - it seems that
> > I've hit a state explosion, since ragel just keeps allocating memory
> > and finally dies.
>
> > Here's my grammar (sorry for pasting it here like this)
>
> > --cut--
> > %%{
> >   machine sip_parser;
>
> >   action mark
> >   {
> >   }
>
> >   action req_i
> >   {
> >   }
>
> >   action req_a
> >   {
> >   }
>
> >   action req_o
> >   {
> >   }
>
> >   action req_b
> >   {
> >   }
>
> >   action req_c
> >   {
> >   }
>
> >   action req_r
> >   {
> >   }
>
> >   action create_req
> >   {
> >   }
>
> >   action request_method
> >   {
> >   }
>
> >   action uri_host
> >   {
> >   }
>
> >   action done
> >   {
> >     fbreak;
> >   }
>
> >   action uri_port
> >   {
> >   }
>
> >   action uri_pass
> >   {
> >   }
>
> >   action uri_user
> >   {
> >   }
>
> >   action uri_scheme
> >   {
> >   }
>
> >   action set_req_uri
> >   {
> >   }
>
> >   action add_param
> >   {
> >   }
>
> >   action add_param_value
> >   {
> >   }
>
> >   action uri_query
> >   {
> >   }
>
> > #### SIP PROTOCOL GRAMMAR
>
> >   CRLF = "\r\n";
> >   SP = " ";
> >   HTAB = "\t";
> >   DQUOTE = "\"";
> >   LHEX = digit | [a-f];
>
> >   alphanum = [a-zA-Z0-9];
> >   reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" |
> > ",");
> >   mark = ("-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")");
> >   unreserved = (alphanum | mark);
> >   escaped = ("%" xdigit xdigit);
>
> >   LWS = ((" "* CRLF)? " "+);
> >   SWS = (LWS?);
>
> >   HCOLON = ((" " | "\t")* ":" SWS);
>
> >   token = (alphanum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`" |
> > "'" | "~" )+;
> >   separators = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" |
> > DQUOTE | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HTAB);
> >   word = (alphanum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`" |
> > "'" | "~" | "(" | ")" | "<" | ">" | ":" | "\\" | DQUOTE | "/" | "[" |
> > "]" | "?" | "{" | "}")+;
>
> >   STAR   = (SWS "*" SWS);
> >   SLASH  = (SWS "/" SWS);
> >   EQUAL  = (SWS "=" SWS);
> >   LPAREN = (SWS "(" SWS);
> >   RPAREN = (SWS ")" SWS);
> >   RAQUOT = (">" SWS);
> >   LAQUOT = (SWS "<");
> >   COMMA  = (SWS "," SWS);
> >   SEMI   = (SWS ";" SWS);
> >   COLON  = (SWS ":" SWS);
> >   LDQUOT = (SWS DQUOTE);
> >   RDQUOT = (DQUOTE SWS);
>
> >   qdtext = (LWS | 0x21 | 0x23..0x5B | 0x5D..0x7E);
> >   quoted_pair = ("\\" (0x00..0x09 | 0x0B..0x0C | 0x0E..0x7F));
> >   quoted_string = (SWS DQUOTE (qdtext | quoted_pair)* DQUOTE);
> >   ctext    = (0x21..27 | 0x2A..0x5B | 0x5D..0x7E | LWS);
> >   comment_r = (LPAREN? (ctext | quoted_pair) RPAREN?);
> >   comment  = (LPAREN (comment_r)* RPAREN);
>
> >   port = digit+;
> >   IPv4address =  (digit{1,3} "." digit{1,3} "." digit{1,3} "."
> > digit{1,3});
> >   toplabel = (alpha | (alpha (alphanum | "-")* alphanum));
> >   domainlabel = (alphanum | (alphanum (alphanum | "-")* alphanum));
> >   hostname = ((domainlabel ".")* toplabel "."?);
> >   host = (hostname | IPv4address);
> >   hostport = (host >mark %uri_host (":" port >mark %uri_port)?);
> >   password = ((unreserved | escaped | "&" | "=" | "+" | "$" | "," )*);
> >   user_unreserved = ("&" | "=" | "+" | "$" | "," | ";" | "?" | "/");
> >   user = ((unreserved | escaped | user_unreserved)+);
> >   userinfo = (user >mark %uri_user (":" password >mark %uri_pass)?
> > "@");
>
> >   extension_method = token;
> >   Method = (("INVITE" %req_i | "ACK" %req_a | "OPTIONS" %req_o | "BYE"
> > %req_b | "CANCEL" %req_c | "REGISTER" %req_r | extension_method)) >mark
> > %request_method;
>
> >   hnv_unreserved = ("[" | "]" | "|" | "?" | ":" | "+" | "$");
> >   hvalue = ((hnv_unreserved | unreserved | escaped)*);
> >   hname = ((hnv_unreserved | unreserved | escaped)+);
> >   header = (hname "=" hvalue);
> >   headers = ("?" header ("&" header)*);
> >   param_unreserved = ("[" | "]" | "/" | ":" | "&" | "+" | "$");
> >   paramchar = (param_unreserved | unreserved | escaped);
> >   pvalue = (paramchar+);
> >   pname = (paramchar+);
> >   other_param = (pname >mark %add_param ("=" pvalue >mark
> > %add_param_value)?);
> >   lr_param = "lr" >mark %add_param;
> >   maddr_param = "maddr" >mark %add_param "=" host >mark
> > %add_param_value;
> >   method_param = "method" >mark %add_param "=" Method >mark
> > %add_param_value;
> >   ttl = (digit{1,3});
> >   ttl_param = "ttl" >mark %add_param "=" ttl >mark %add_param_value;
> >   other_user = token;
> >   user_param = "user" >mark %add_param "=" ("phone" | "ip" |
> > other_user) >mark %add_param_value;
> >   other_transport = token;
> >   transport_param = ("transport" >mark %add_param "=" ("udp" | "tcp" |
> > "sctp" | "tls" | other_transport) >mark %add_param_value);
> >   uri_parameter = (transport_param | user_param | method_param |
> > ttl_param | maddr_param | lr_param | other_param);
> >   uri_parameters = ((";" uri_parameter)*);
> >   SIP_URI = ("sip:" %{ m_data->m_uri.scheme() = "sip"; } (userinfo)?
> > hostport uri_parameters (headers)?);
> >   SIPS_URI = ("sips:" %{ m_data->m_uri.scheme() = "sips"; } (userinfo)?
> > hostport uri_parameters (headers)?);
>
> >   x_token = ("x-" token);
> >   ietf_token = token;
> >   extension_token = (ietf_token | x_token);
> >   composite_type = ("message" | "multipart" | extension_token);
> >   discrete_type = ("text" | "image" | "audio" | "video" | "application"
> > | extension_token);
> >   m_type = (discrete_type | composite_type);
>
> >   m_value = (token | quoted_string);
> >   m_attribute = token;
> >   m_parameter = (m_attribute EQUAL m_value);
> >   iana_token = token;
> >   m_subtype = (extension_token | iana_token);
>
> >   uric           = (reserved | unreserved | escaped);
> >   query          = (uric*);
> >   reg_name       = (unreserved | escaped | "$" | "," | ";" | ":" | "@"
> > | "&" | "=" | "+" )+;
> >   srvr           = ((userinfo "@")? hostport)?;
> >   authority      = (srvr | reg_name);
> >   scheme         = (alpha (alpha | digit | "+" | "-" | "." )*);
> >   pchar          = (unreserved | escaped | ":" | "@" | "&" | "=" | "+"
> > | "$" | ",");
> >   param          = (pchar*);
> >   segment        = (pchar* (";" param)*);
> >   path_segments  = (segment ("/" segment)*);
> >   uric_no_slash  = (unreserved | escaped | ";" | "?" | ":" | "@" | "&"
> > | "=" | "+" | "$" | ",");
> >   opaque_part    = (uric_no_slash uric*);
> >   abs_path       = ("/" path_segments);
> >   net_path       = ("//" authority (abs_path)?);
> >   hier_part      = ((net_path | abs_path) ("?" query >mark
> > %uri_query)?);
> >   absoluteURI    = (scheme >mark %uri_scheme ":" (hier_part |
> > opaque_part));
>
> >   gen_value = (token | host | quoted_string);
> >   generic_param = (token (EQUAL gen_value)?);
> >   qvalue = (("0" ("." digit{,3})?) | ("1" ("." ("0"){,3})?));
> >   accept_param = (("q" EQUAL qvalue) | generic_param);
> >   media_range = (("*" "/" "*" | ( m_type SLASH "*" ) | ( m_type SLASH
> > m_subtype )) (SEMI m_parameter)*);
> >   accept_range = (media_range (SEMI accept_param)*);
> >   Accept = ("Accept" HCOLON (accept_range (COMMA accept_range)*))?;
>
> >   content_coding   = (token);
> >   codings          = (content_coding | "*");
> >   encoding         = (codings (SEMI accept_param)*);
> >   Accept_Encoding  = ("Accept-Encoding" HCOLON (encoding (COMMA
> > encoding)*)?);
>
> >   language_range   = (((alpha{1,8} ("-" alpha{1,8})*) | "*" ));
> >   language         = (language_range (SEMI accept_param)*);
> >   Accept_Language  = ("Accept-Language" HCOLON (language (COMMA
> > language)*)?);
>
> >   alert_param      = (LAQUOT absoluteURI RAQUOT (SEMI generic_param
> > )*);
> >   Alert_Info       = ("Alert-Info" HCOLON alert_param (COMMA
> > alert_param)*);
>
> >   Allow            = ("Allow" HCOLON (Method (COMMA Method)*)?);
>
> >   opaque           = ("opaque" EQUAL quoted_string);
> >   algorithm        = ("algorithm" EQUAL ("MD5" | "MD5-sess" | token));
> >   realm_value      = (quoted_string);
> >   realm            = ("realm" EQUAL realm_value);
> >   auth_scheme      = (token);
> >   auth_param_name  = (token);
> >   auth_param       = (auth_param_name EQUAL (token | quoted_string));
> >   other_response   = (auth_scheme LWS auth_param (COMMA auth_param)*);
> >   request_digest   = (LDQUOT LHEX{32} RDQUOT);
> >   dresponse        = ("response" EQUAL request_digest);
> >   nonce_value      = (quoted_string);
> >   nonce            = ("nonce" EQUAL nonce_value);
> >   nc_value         = (LHEX{8});
> >   nonce_count      = ("nc" EQUAL nc_value);
> >   cnonce_value     = (nonce_value);
> >   cnonce           = ("cnonce" EQUAL cnonce_value);
> >   qop_value        = ("auth" | "auth-int" | token);
> >   message_qop      = ("qop" EQUAL qop_value);
> >   digest_uri_value = (absoluteURI); # fixme
> >   digest_uri       = ("uri" EQUAL LDQUOT digest_uri_value RDQUOT);
> >   username_value   = (quoted_string);
> >   username         = ("username" EQUAL username_value);
> >   dig_resp         = (username | realm | nonce | digest_uri | dresponse
> > | algorithm | cnonce | opaque | message_qop | nonce_count |
> > auth_param);
> >   digest_response  = (dig_resp (COMMA dig_resp)*);
> >   credentials      = (("Digest" LWS digest_response) | other_response);
> >   Authorization    = ("Authorization" HCOLON credentials);
>
> >   response_digest      = (LDQUOT LHEX* RDQUOT);
> >   response_auth        = ("rspauth" EQUAL response_digest);
> >   nextnonce            = ("nextnonce" EQUAL nonce_value);
> >   ainfo                = (nextnonce | message_qop | response_auth |
> > cnonce | nonce_count);
> >   Authentication_Info  = ("Authentication-Info" HCOLON ainfo (COMMA
> > ainfo)*);
>
> >   callid   =  (word ("@" word)?);
> >   Call_ID  = (( "Call-ID" | "i" ) HCOLON callid);
>
> >   info_param  = (("purpose" EQUAL ("icon" | "info" | "card" | token)) |
> > generic_param);
> >   info        = (LAQUOT absoluteURI RAQUOT (SEMI info_param)*);
> >   Call_Info   = ("Call-Info" HCOLON info (COMMA info)*);
>
> >   delta_seconds      = (digit+);
> >   contact_extension  = (generic_param);
> >   c_p_expires    = ("expires" EQUAL delta_seconds);
> >   c_p_q          = ("q" EQUAL qvalue);
> >   contact_params = (c_p_q | c_p_expires);
> >   display_name   = ((token LWS)* | quoted_string);
> >   addr_spec    ...
> 
> read more ยป



More information about the ragel-users mailing list