Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r103:1ad5d2bb1fa5
Date: 2014-11-28 23:11 +0100
http://bitbucket.org/cffi/creflect/changeset/1ad5d2bb1fa5/

Log:    Parsing simple C declarations, starting

diff --git a/creflect/src/c_decl_parser.c b/creflect/src/c_decl_parser.c
new file mode 100644
--- /dev/null
+++ b/creflect/src/c_decl_parser.c
@@ -0,0 +1,197 @@
+#include <stdlib.h>
+#include "creflect.h"
+
+
+enum crxp_token_e {
+    TOK_STAR='*',
+    TOK_OPEN_PAREN='(',
+    TOK_CLOSE_PAREN=')',
+    TOK_OPEN_BRACKET='[',
+    TOK_CLOSE_BRACKET=']',
+    TOK_COMMA=',',
+
+    TOK_START=256,
+    TOK_END,
+    TOK_ERROR,
+    TOK_IDENTIFIER,
+    TOK_INTEGER,
+
+    /* keywords */
+    TOK__BOOL,
+    TOK_CHAR,
+    //TOK__COMPLEX,
+    TOK_CONST,
+    TOK_DOUBLE,
+    TOK_FLOAT,
+    //TOK__IMAGINARY,
+    TOK_INT,
+    TOK_LONG,
+    TOK_SHORT,
+    TOK_SIGNED,
+    TOK_STRUCT,
+    TOK_UNION,
+    TOK_UNSIGNED,
+    TOK_VOID,
+};
+
+typedef struct {
+    enum crxp_token_e kind;
+    const char *p;
+    size_t size;
+    crx_builder_t *cb;
+} crxp_token_t;
+
+static int is_space(char x)
+{
+    return (x == ' ' || x == '\f' || x == '\n' || x == '\r' ||
+            x == '\t' || x == '\v');
+}
+
+static int is_ident_first(char x)
+{
+    return ('A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || x == '_');
+}
+
+static int is_ident_next(char x)
+{
+    return (is_ident_first(x) || '0' <= x && x <= '9');
+}
+
+static void next_token(crxp_token_t *tok)
+{
+    const char *p = tok->p + tok->size;
+    if (tok->kind == TOK_ERROR)
+        return;
+    while (!is_ident_first(*p)) {
+        if (is_space(*p)) {
+            p++;
+        }
+        else if (*p) {
+            tok->kind = *p;
+            tok->p = p;
+            tok->size = 1;
+            return;
+        }
+        else {
+            tok->kind = TOK_END;
+            tok->p = p;
+            tok->size = 0;
+            return;
+        }
+    }
+    tok->p = p;
+    tok->size = 1;
+    while (is_ident_next(p[tok->size]))
+        tok->size++;
+    tok->kind = TOK_IDENTIFIER;
+
+    switch (*p) {
+    case '_':
+        if (tok->size == 5 && !memcmp(p, "_Bool", 5))  tok->kind = TOK__BOOL;
+        break;
+    case 'c':
+        if (tok->size == 4 && !memcmp(p, "char", 4))   tok->kind = TOK_CHAR;
+        if (tok->size == 5 && !memcmp(p, "const", 5))  tok->kind = TOK_CONST;
+        break;
+    case 'd':
+        if (tok->size == 6 && !memcmp(p, "double", 6)) tok->kind = TOK_DOUBLE;
+        break;
+    case 'f':
+        if (tok->size == 5 && !memcmp(p, "float", 5))  tok->kind = TOK_FLOAT;
+        break;
+    case 'i':
+        if (tok->size == 3 && !memcmp(p, "int", 3))    tok->kind = TOK_INT;
+        break;
+    case 'l':
+        if (tok->size == 4 && !memcmp(p, "long", 4))   tok->kind = TOK_LONG;
+        break;
+    case 's':
+        if (tok->size == 5 && !memcmp(p, "short", 5))  tok->kind = TOK_SHORT;
+        if (tok->size == 6 && !memcmp(p, "signed", 6)) tok->kind = TOK_SIGNED;
+        if (tok->size == 6 && !memcmp(p, "struct", 6)) tok->kind = TOK_STRUCT;
+        break;
+    case 'u':
+        if (tok->size == 5 && !memcmp(p, "union", 5))  tok->kind = TOK_UNION;
+        if (tok->size == 8 && !memcmp(p,"unsigned",8)) tok->kind = 
TOK_UNSIGNED;
+        break;
+    case 'v':
+        if (tok->size == 4 && !memcmp(p, "void", 4))   tok->kind = TOK_VOID;
+        break;
+    }
+}
+
+static crx_type_t *parse_sequel_right(crxp_token_t *tok, crx_type_t *t1)
+{
+    switch (tok->kind) {
+
+    case TOK_OPEN_PAREN:
+        abort();
+
+    case TOK_OPEN_BRACKET:
+        abort();
+
+    default:
+        return t1;
+    }
+}
+
+static crx_type_t *parse_sequel(crxp_token_t *tok, crx_type_t *t1)
+{
+    while (1) {
+        switch (tok->kind) {
+
+        case TOK_STAR:
+            t1 = tok->cb->get_pointer_type(tok->cb, t1);
+            break;
+
+        case TOK_CONST:
+            t1 = tok->cb->get_const_type(tok->cb, t1);
+            break;
+
+        default:
+            return parse_sequel_right(tok, t1);
+        }
+
+        next_token(tok);
+    }
+}
+
+static crx_type_t *parse_complete(crxp_token_t *tok)
+{
+    crx_type_t *t1;
+    int is_const = (tok->kind == TOK_CONST);
+    if (is_const) {
+        next_token(tok);
+    }
+    switch (tok->kind) {
+    case TOK_INT:
+        t1 = tok->cb->get_signed_type(tok->cb, sizeof(int), "int");
+        break;
+    default:
+        tok->kind = TOK_ERROR;
+        return NULL;
+    }
+    next_token(tok);
+
+    if (is_const) {
+        t1 = tok->cb->get_const_type(tok->cb, t1);
+    }
+    return parse_sequel(tok, t1);
+}
+
+crx_type_t *creflect_decl_parser(crx_builder_t *cb, const char **input)
+{
+    crxp_token_t token;
+    crx_type_t *t1;
+    token.kind = TOK_START;
+    token.cb = cb;
+    token.p = *input;
+    token.size = 0;
+    next_token(&token);
+    t1 = parse_complete(&token);
+
+    if (token.kind == TOK_END)
+        return t1;
+    *input = token.p;
+    return NULL;
+}
diff --git a/test/test_c_decl_parser.py b/test/test_c_decl_parser.py
new file mode 100644
--- /dev/null
+++ b/test/test_c_decl_parser.py
@@ -0,0 +1,72 @@
+import os, subprocess
+from .udir import udir
+
+
+TESTER = r"""
+#include "c_decl_parser.c"
+#include "creflect_print.h"
+
+int main(int argc, char *argv[])
+{
+    const char *p = argv[1];
+    crx_type_t *t1 = creflect_decl_parser(&maincb, &p);
+    if (t1 != NULL)
+        printf("%s\n", t1->text);
+    else {
+        printf("error: %s\n       ", argv[1]);
+        while (p > argv[1]) {
+            printf(" ");
+            p--;
+        }
+        printf("^\n");
+    }
+    return 0;
+}
+"""
+
+
+def setup_module(mod):
+    executable = str(udir.join('c_decl_parser_test'))
+    f = open(executable + '.c', 'w')
+    f.write(TESTER)
+    f.close()
+    err = os.system("gcc -g -Werror '%s.c' -o '%s' -I../creflect/src" % (
+        executable, executable))
+    assert not err
+    mod.executable = executable
+
+
+def parse(input, expected_output):
+    global executable
+    got = subprocess.check_output([executable, input])
+    assert got == expected_output + '\n'
+
+def parse_error(input, expected_location):
+    parse(input, 'error: %s\n       %s^' % (input, " " * expected_location))
+
+def test_c_decl_parser():
+    parse("int **", "PTR PTR int")
+    parse("const int **", "PTR PTR CONST int")
+    parse("int const **", "PTR PTR CONST int")
+    parse("int *const *", "PTR CONST PTR int")
+    parse("int ** const", "CONST PTR PTR int")
+    import py; py.test.skip("in-progress")
+    parse("int[2]")
+    parse("int*[2][3]")
+    parse("int(*)[2][3]")
+    parse("int(*[2])[3]")
+    parse("int()")
+    parse("int(void)")
+    parse("int(int)")
+    parse("int(int *, int const *)")
+    parse("int(*)(int)")
+    parse("unsigned int")
+    parse("unsigned long long *")
+    parse("const unsigned long long *")
+    parse("unsigned long long const *")
+    parse("char(*(*)(long))(int)")
+    parse("foo_t[]")
+
+def test_c_decl_error():
+    parse_error("*", 0)
+    parse_error("int ]**", 4)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to