Simple URL parser

hsanson hsan... at gmail.com
Fri May 23 06:10:47 UTC 2008


To learn how to use Ragel I am implementing a simple URL parser that
receives something like "http://www.ragel.com:8080/file.txt" and
returns each part (scheme, hostname, port, path) as strings. As I
understand doing this with Ragel should be a breeze.

Still there is something I am not getting right and would like some
advice, see code below:

The scheme part seems to work so I assume my understanding of Ragel is
not that bad. The problem is with the hostname and port parts. The
hostname action gets called for each character on the hostname, that
is not the intended behavior and the port action never gets called.

Any tips to take me back on track would be greatly appreciated.

Horacio

//###################################
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

typedef struct {
    char *scheme;
    char *hostname;
    char *service;
    char *path;
    char *uri;
} suj_url;

%%{
    machine uri_parser;

# Actions
    action mark_start {
        start = fpc;
        printf("Mark start at %c\n", fc);
    }

    action scheme {
        size_t len = fpc - start + 1;
        url->scheme = calloc(len,sizeof(char));
        strncpy(url->scheme, start, len);
        url->scheme[len]='\0';
        printf("scheme: %s\n",url->scheme);
    }

    action host {
        size_t len = fpc - start + 1;
        url->hostname = calloc(len,sizeof(char));
        strncpy(url->hostname, start, len);
        url->hostname[len]='\0';
        printf("host: %s\n",url->hostname);
    }

    action port {
        size_t len = fpc - start + 1;
        url->service = calloc(len,sizeof(char));
        strncpy(url->service, start, len);
        url->service[len]='\0';
        printf("service: %s\n",url->service);
    }
# Grammar
    escaped = ("%" xdigit xdigit);
    scheme = ("http"i | "rtsp"i | "rtp"i) >mark_start @scheme;
    port   = (":" digit+) >mark_start %port;
    host   = (any* -- ("/" | ":")) >mark_start @host;

    uri = (scheme "://" host  port ) . '\0';

# Main
    main := uri;

}%%

%%write data;

suj_url * suj_url_new(char *uri)
{
    suj_url *url;
    char *start;
    char *end;

    int cs;
    %% write init;

    char *p = uri;
    char *pe = p + strlen(uri);

    url = calloc(1,sizeof(url));
    url->uri = calloc(strlen(uri),sizeof(char));
    strncpy(url->uri,uri, strlen(uri));

    %% write exec;

    return url;
}

int main(int argc, char **argv)
{
    suj_url *url;
    url = suj_url_new("rtp://www.ragel.org:8080");
}



More information about the ragel-users mailing list