Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r103:1ad5d2bb1fa5 Date: 2014-11-28 23:11 +0100 http://bitbucket.org/cffi/creflect/changeset/1ad5d2bb1fa5/
Log: Parsing simple C declarations, starting diff --git a/creflect/src/c_decl_parser.c b/creflect/src/c_decl_parser.c new file mode 100644 --- /dev/null +++ b/creflect/src/c_decl_parser.c @@ -0,0 +1,197 @@ +#include <stdlib.h> +#include "creflect.h" + + +enum crxp_token_e { + TOK_STAR='*', + TOK_OPEN_PAREN='(', + TOK_CLOSE_PAREN=')', + TOK_OPEN_BRACKET='[', + TOK_CLOSE_BRACKET=']', + TOK_COMMA=',', + + TOK_START=256, + TOK_END, + TOK_ERROR, + TOK_IDENTIFIER, + TOK_INTEGER, + + /* keywords */ + TOK__BOOL, + TOK_CHAR, + //TOK__COMPLEX, + TOK_CONST, + TOK_DOUBLE, + TOK_FLOAT, + //TOK__IMAGINARY, + TOK_INT, + TOK_LONG, + TOK_SHORT, + TOK_SIGNED, + TOK_STRUCT, + TOK_UNION, + TOK_UNSIGNED, + TOK_VOID, +}; + +typedef struct { + enum crxp_token_e kind; + const char *p; + size_t size; + crx_builder_t *cb; +} crxp_token_t; + +static int is_space(char x) +{ + return (x == ' ' || x == '\f' || x == '\n' || x == '\r' || + x == '\t' || x == '\v'); +} + +static int is_ident_first(char x) +{ + return ('A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || x == '_'); +} + +static int is_ident_next(char x) +{ + return (is_ident_first(x) || '0' <= x && x <= '9'); +} + +static void next_token(crxp_token_t *tok) +{ + const char *p = tok->p + tok->size; + if (tok->kind == TOK_ERROR) + return; + while (!is_ident_first(*p)) { + if (is_space(*p)) { + p++; + } + else if (*p) { + tok->kind = *p; + tok->p = p; + tok->size = 1; + return; + } + else { + tok->kind = TOK_END; + tok->p = p; + tok->size = 0; + return; + } + } + tok->p = p; + tok->size = 1; + while (is_ident_next(p[tok->size])) + tok->size++; + tok->kind = TOK_IDENTIFIER; + + switch (*p) { + case '_': + if (tok->size == 5 && !memcmp(p, "_Bool", 5)) tok->kind = TOK__BOOL; + break; + case 'c': + if (tok->size == 4 && !memcmp(p, "char", 4)) tok->kind = TOK_CHAR; + if (tok->size == 5 && !memcmp(p, "const", 5)) tok->kind = TOK_CONST; + break; + case 'd': + if (tok->size == 6 && !memcmp(p, "double", 6)) tok->kind = TOK_DOUBLE; + break; + case 'f': + if (tok->size == 5 && !memcmp(p, "float", 5)) tok->kind = TOK_FLOAT; + break; + case 'i': + if (tok->size == 3 && !memcmp(p, "int", 3)) tok->kind = TOK_INT; + break; + case 'l': + if (tok->size == 4 && !memcmp(p, "long", 4)) tok->kind = TOK_LONG; + break; + case 's': + if (tok->size == 5 && !memcmp(p, "short", 5)) tok->kind = TOK_SHORT; + if (tok->size == 6 && !memcmp(p, "signed", 6)) tok->kind = TOK_SIGNED; + if (tok->size == 6 && !memcmp(p, "struct", 6)) tok->kind = TOK_STRUCT; + break; + case 'u': + if (tok->size == 5 && !memcmp(p, "union", 5)) tok->kind = TOK_UNION; + if (tok->size == 8 && !memcmp(p,"unsigned",8)) tok->kind = TOK_UNSIGNED; + break; + case 'v': + if (tok->size == 4 && !memcmp(p, "void", 4)) tok->kind = TOK_VOID; + break; + } +} + +static crx_type_t *parse_sequel_right(crxp_token_t *tok, crx_type_t *t1) +{ + switch (tok->kind) { + + case TOK_OPEN_PAREN: + abort(); + + case TOK_OPEN_BRACKET: + abort(); + + default: + return t1; + } +} + +static crx_type_t *parse_sequel(crxp_token_t *tok, crx_type_t *t1) +{ + while (1) { + switch (tok->kind) { + + case TOK_STAR: + t1 = tok->cb->get_pointer_type(tok->cb, t1); + break; + + case TOK_CONST: + t1 = tok->cb->get_const_type(tok->cb, t1); + break; + + default: + return parse_sequel_right(tok, t1); + } + + next_token(tok); + } +} + +static crx_type_t *parse_complete(crxp_token_t *tok) +{ + crx_type_t *t1; + int is_const = (tok->kind == TOK_CONST); + if (is_const) { + next_token(tok); + } + switch (tok->kind) { + case TOK_INT: + t1 = tok->cb->get_signed_type(tok->cb, sizeof(int), "int"); + break; + default: + tok->kind = TOK_ERROR; + return NULL; + } + next_token(tok); + + if (is_const) { + t1 = tok->cb->get_const_type(tok->cb, t1); + } + return parse_sequel(tok, t1); +} + +crx_type_t *creflect_decl_parser(crx_builder_t *cb, const char **input) +{ + crxp_token_t token; + crx_type_t *t1; + token.kind = TOK_START; + token.cb = cb; + token.p = *input; + token.size = 0; + next_token(&token); + t1 = parse_complete(&token); + + if (token.kind == TOK_END) + return t1; + *input = token.p; + return NULL; +} diff --git a/test/test_c_decl_parser.py b/test/test_c_decl_parser.py new file mode 100644 --- /dev/null +++ b/test/test_c_decl_parser.py @@ -0,0 +1,72 @@ +import os, subprocess +from .udir import udir + + +TESTER = r""" +#include "c_decl_parser.c" +#include "creflect_print.h" + +int main(int argc, char *argv[]) +{ + const char *p = argv[1]; + crx_type_t *t1 = creflect_decl_parser(&maincb, &p); + if (t1 != NULL) + printf("%s\n", t1->text); + else { + printf("error: %s\n ", argv[1]); + while (p > argv[1]) { + printf(" "); + p--; + } + printf("^\n"); + } + return 0; +} +""" + + +def setup_module(mod): + executable = str(udir.join('c_decl_parser_test')) + f = open(executable + '.c', 'w') + f.write(TESTER) + f.close() + err = os.system("gcc -g -Werror '%s.c' -o '%s' -I../creflect/src" % ( + executable, executable)) + assert not err + mod.executable = executable + + +def parse(input, expected_output): + global executable + got = subprocess.check_output([executable, input]) + assert got == expected_output + '\n' + +def parse_error(input, expected_location): + parse(input, 'error: %s\n %s^' % (input, " " * expected_location)) + +def test_c_decl_parser(): + parse("int **", "PTR PTR int") + parse("const int **", "PTR PTR CONST int") + parse("int const **", "PTR PTR CONST int") + parse("int *const *", "PTR CONST PTR int") + parse("int ** const", "CONST PTR PTR int") + import py; py.test.skip("in-progress") + parse("int[2]") + parse("int*[2][3]") + parse("int(*)[2][3]") + parse("int(*[2])[3]") + parse("int()") + parse("int(void)") + parse("int(int)") + parse("int(int *, int const *)") + parse("int(*)(int)") + parse("unsigned int") + parse("unsigned long long *") + parse("const unsigned long long *") + parse("unsigned long long const *") + parse("char(*(*)(long))(int)") + parse("foo_t[]") + +def test_c_decl_error(): + parse_error("*", 0) + parse_error("int ]**", 4) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit