q66 pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=6c268d98e5d721bf29953e892eec2fdd36e0a6c4

commit 6c268d98e5d721bf29953e892eec2fdd36e0a6c4
Author: Daniel Kolesa <d.kol...@samsung.com>
Date:   Thu Jul 17 14:17:19 2014 +0100

    eolian: lexing of string literals (including support for escape sequences 
etc.)
---
 src/lib/eolian/eo_lexer.c  | 133 ++++++++++++++++++++++++++++++++++++++++++---
 src/lib/eolian/eo_lexer.h  |   2 +-
 src/lib/eolian/eo_parser.c |  49 +++++++++--------
 3 files changed, 152 insertions(+), 32 deletions(-)

diff --git a/src/lib/eolian/eo_lexer.c b/src/lib/eolian/eo_lexer.c
index e6af35f..46a2078 100644
--- a/src/lib/eolian/eo_lexer.c
+++ b/src/lib/eolian/eo_lexer.c
@@ -44,7 +44,7 @@ next_char(Eo_Lexer *ls)
 
 static const char * const tokens[] =
 {
-   "<comment>", "<number>", "<value>",
+   "<comment>", "<string>", "<number>", "<value>",
 
    KEYWORDS
 };
@@ -104,9 +104,9 @@ init_hash(void)
    unsigned int i;
    if (keyword_map) return;
    keyword_map = eina_hash_string_superfast_new(NULL);
-   for (i = 3; i < (sizeof(tokens) / sizeof(const char*)); ++i)
+   for (i = 4; i < (sizeof(tokens) / sizeof(const char*)); ++i)
      {
-         eina_hash_add(keyword_map, tokens[i], (void*)(size_t)(i - 2));
+         eina_hash_add(keyword_map, tokens[i], (void*)(size_t)(i - 3));
      }
 }
 
@@ -193,7 +193,117 @@ read_long_comment(Eo_Lexer *ls, Eo_Token *tok)
           }
      }
    eina_strbuf_trim(ls->buff);
-   if (tok) tok->value = eina_strbuf_string_get(ls->buff);
+   if (tok) tok->value = 
eina_stringshare_add(eina_strbuf_string_get(ls->buff));
+}
+
+static void
+esc_error(Eo_Lexer *ls, int *c, int n, const char *msg)
+{
+   int i;
+   eina_strbuf_reset(ls->buff);
+   eina_strbuf_append_char(ls->buff, '\\');
+   for (i = 0; i < n && c[i]; ++i)
+     eina_strbuf_append_char(ls->buff, c[i]);
+   eo_lexer_lex_error(ls, msg, TOK_STRING);
+}
+
+static int
+hex_val(int c)
+{
+   if (c >= 'a') return c - 'a' + 10;
+   if (c >= 'A') return c - 'A' + 10;
+   return c - '0';
+}
+
+static int
+read_hex_esc(Eo_Lexer *ls)
+{
+   int c[3] = { 'x' };
+   int i, r = 0;
+   for (i = 1; i < 3; ++i)
+     {
+        next_char(ls);
+        c[i] = ls->current;
+        if (!isxdigit(c[i]))
+          esc_error(ls, c, i + 1, "hexadecimal digit expected");
+        r = (r << 4) + hex_val(c[i]);
+     }
+   return r;
+}
+
+static int
+read_dec_esc(Eo_Lexer *ls)
+{
+   int c[3];
+   int i, r = 0;
+   for (i = 0; i < 3 && isdigit(ls->current); ++i)
+     {
+        c[i] = ls->current;
+        r = r * 10 + (c[i] - '0');
+        next_char(ls);
+     }
+   if (r > UCHAR_MAX)
+     esc_error(ls, c, i, "decimal escape too large");
+   return r;
+}
+
+static void
+read_string(Eo_Lexer *ls, Eo_Token *tok)
+{
+   int del = ls->current;
+   eina_strbuf_reset(ls->buff);
+   eina_strbuf_append_char(ls->buff, del);
+   while (ls->current != del) switch (ls->current)
+     {
+      case '\0':
+        eo_lexer_lex_error(ls, "unfinished string", -1);
+        break;
+      case '\n': case '\r':
+        eo_lexer_lex_error(ls, "unfinished string", TOK_STRING);
+        break;
+      case '\\':
+        {
+           next_char(ls);
+           switch (ls->current)
+             {
+              case 'a': eina_strbuf_append_char(ls->buff, '\a'); goto next;
+              case 'b': eina_strbuf_append_char(ls->buff, '\b'); goto next;
+              case 'f': eina_strbuf_append_char(ls->buff, '\f'); goto next;
+              case 'n': eina_strbuf_append_char(ls->buff, '\n'); goto next;
+              case 'r': eina_strbuf_append_char(ls->buff, '\r'); goto next;
+              case 't': eina_strbuf_append_char(ls->buff, '\t'); goto next;
+              case 'v': eina_strbuf_append_char(ls->buff, '\v'); goto next;
+              case 'x':
+                eina_strbuf_append_char(ls->buff, read_hex_esc(ls));
+                goto next;
+              case '\n': case '\r':
+                next_line(ls);
+                eina_strbuf_append_char(ls->buff, '\n');
+                goto skip;
+              case '\\': case '"': case '\'':
+                eina_strbuf_append_char(ls->buff, ls->current);
+                goto skip;
+              case '\0':
+                goto skip;
+              default:
+                if (!isdigit(ls->current))
+                  esc_error(ls, &ls->current, 1, "invalid escape sequence");
+                eina_strbuf_append_char(ls->buff, read_dec_esc(ls));
+                goto skip;
+             }
+next:
+           next_char(ls);
+skip:
+           break;
+        }
+      default:
+        eina_strbuf_append_char(ls->buff, ls->current);
+        next_char(ls);
+     }
+   eina_strbuf_append_char(ls->buff, ls->current);
+   next_char(ls);
+   tok->value = eina_stringshare_add_length(eina_strbuf_string_get(ls->buff) + 
1,
+                                            eina_strbuf_length_get(ls->buff) - 
2);
 }
 
 static int
@@ -345,6 +455,7 @@ static int
 lex(Eo_Lexer *ls, Eo_Token *tok)
 {
    eina_strbuf_reset(ls->buff);
+   tok->value = NULL;
    for (;;) switch (ls->current)
      {
       case '\n':
@@ -371,6 +482,9 @@ lex(Eo_Lexer *ls, Eo_Token *tok)
         continue;
       case '\0':
         return -1;
+      case '"': case '\'':
+        read_string(ls, tok);
+        return TOK_STRING;
       case '.':
         next_char(ls);
         if (!isdigit(ls->current)) return '.';
@@ -412,7 +526,7 @@ lex(Eo_Lexer *ls, Eo_Token *tok)
                 ls->column = col + 1;
                 if (at_kw && tok->kw == 0)
                   eo_lexer_syntax_error(ls, "invalid keyword");
-                tok->value = str;
+                tok->value = eina_stringshare_add(str);
                 return TOK_VALUE;
              }
            else
@@ -450,7 +564,7 @@ lex_balanced(Eo_Lexer *ls, Eo_Token *tok, char beg, char 
end)
    eina_strbuf_trim(ls->buff);
    str     = eina_strbuf_string_get(ls->buff);
    tok->kw = (int)(uintptr_t)eina_hash_find(keyword_map, str);
-   tok->value = str;
+   tok->value = eina_stringshare_add(str);
    ls->column = col + 1;
    return TOK_VALUE;
 }
@@ -532,6 +646,11 @@ eo_lexer_get_balanced(Eo_Lexer *ls, char beg, char end)
 int
 eo_lexer_get(Eo_Lexer *ls)
 {
+   if (ls->t.token >= START_CUSTOM && ls->t.token != TOK_NUMBER)
+     {
+        eina_stringshare_del(ls->t.value);
+        ls->t.value = NULL;
+     }
    if (ls->lookahead.token >= 0)
      {
         ls->t               = ls->lookahead;
@@ -594,7 +713,7 @@ eo_lexer_token_to_str(int token, char *buf)
 const char *
 eo_lexer_keyword_str_get(int kw)
 {
-   return tokens[kw + 2];
+   return tokens[kw + 3];
 }
 
 Eina_Bool
diff --git a/src/lib/eolian/eo_lexer.h b/src/lib/eolian/eo_lexer.h
index 801d9bd..260d5a2 100644
--- a/src/lib/eolian/eo_lexer.h
+++ b/src/lib/eolian/eo_lexer.h
@@ -13,7 +13,7 @@
 
 enum Tokens
 {
-   TOK_COMMENT = START_CUSTOM, TOK_NUMBER, TOK_VALUE
+   TOK_COMMENT = START_CUSTOM, TOK_STRING, TOK_NUMBER, TOK_VALUE
 };
 
 /* all keywords in eolian, they can still be used as names (they're TOK_VALUE)
diff --git a/src/lib/eolian/eo_parser.c b/src/lib/eolian/eo_parser.c
index 24a45bb..9ddcf25 100644
--- a/src/lib/eolian/eo_parser.c
+++ b/src/lib/eolian/eo_parser.c
@@ -240,7 +240,7 @@ parse_struct(Eo_Lexer *ls, const char *name, Eina_Bool 
is_extern)
    check_next(ls, '{');
    if (ls->t.token == TOK_COMMENT)
      {
-        def->comment = eina_stringshare_add(ls->t.value);
+        def->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
    while (ls->t.token != '}')
@@ -251,7 +251,7 @@ parse_struct(Eo_Lexer *ls, const char *name, Eina_Bool 
is_extern)
         check(ls, TOK_VALUE);
         if (eina_hash_find(def->fields, ls->t.value))
           eo_lexer_syntax_error(ls, "double field definition");
-        fname = eina_stringshare_add(ls->t.value);
+        fname = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
         check_next(ls, ':');
         tp = parse_type_struct_nonvoid(ls, EINA_TRUE, EINA_FALSE);
@@ -263,7 +263,7 @@ parse_struct(Eo_Lexer *ls, const char *name, Eina_Bool 
is_extern)
         check_next(ls, ';');
         if (ls->t.token == TOK_COMMENT)
           {
-             fdef->comment = eina_stringshare_add(ls->t.value);
+             fdef->comment = eina_stringshare_ref(ls->t.value);
              eo_lexer_get(ls);
           }
      }
@@ -329,7 +329,7 @@ parse_type_struct(Eo_Lexer *ls, Eina_Bool allow_struct, 
Eina_Bool allow_anon)
              check(ls, TOK_VALUE);
              if (eo_lexer_get_c_type(ls->t.kw))
                eo_lexer_syntax_error(ls, "invalid struct name");
-             sname = eina_stringshare_add(ls->t.value);
+             sname = eina_stringshare_ref(ls->t.value);
              eo_lexer_get(ls);
              if (ls->t.token == '{')
                return parse_struct(ls, sname, is_extern);
@@ -339,7 +339,7 @@ parse_type_struct(Eo_Lexer *ls, Eina_Bool allow_struct, 
Eina_Bool allow_anon)
              check(ls, TOK_VALUE);
              if (eo_lexer_get_c_type(ls->t.kw))
                eo_lexer_syntax_error(ls, "invalid struct name");
-             sname = eina_stringshare_add(ls->t.value);
+             sname = eina_stringshare_ref(ls->t.value);
              eo_lexer_get(ls);
           }
         def = push_type(ls);
@@ -359,7 +359,8 @@ parse_type_struct(Eo_Lexer *ls, Eina_Bool allow_struct, 
Eina_Bool allow_anon)
         def->type = EOLIAN_TYPE_REGULAR;
         check(ls, TOK_VALUE);
         ctype = eo_lexer_get_c_type(ls->t.kw);
-        def->name = eina_stringshare_add(ctype ? ctype : ls->t.value);
+        def->name = ctype ? eina_stringshare_add(ctype)
+                          : eina_stringshare_ref(ls->t.value);
      }
    eo_lexer_get(ls);
 parse_ptr:
@@ -406,7 +407,7 @@ parse_typedef(Eo_Lexer *ls)
         eo_lexer_get(ls);
      }
    check(ls, TOK_VALUE);
-   ls->tmp.typedef_def->alias = eina_stringshare_add(ls->t.value);
+   ls->tmp.typedef_def->alias = eina_stringshare_ref(ls->t.value);
    eo_lexer_get(ls);
    (void)!!test_next(ls, ':');
    ls->tmp.typedef_def->type = parse_type_struct_nonvoid(ls, EINA_TRUE,
@@ -430,7 +431,7 @@ parse_return(Eo_Lexer *ls, Eina_Bool allow_void)
      {
         int line = ls->line_number, col = ls->column;
         eo_lexer_get_balanced(ls, '(', ')');
-        ret->default_ret_val = eina_stringshare_add(ls->t.value);
+        ret->default_ret_val = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
         check_match(ls, ')', '(', line, col);
      }
@@ -442,7 +443,7 @@ parse_return(Eo_Lexer *ls, Eina_Bool allow_void)
    check_next(ls, ';');
    if (ls->t.token == TOK_COMMENT)
      {
-        ret->comment = eina_stringshare_add(ls->t.value);
+        ret->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
 }
@@ -478,7 +479,7 @@ parse_param(Eo_Lexer *ls, Eina_Bool allow_inout)
      par->type = parse_type(ls);
    pop_type(ls);
    check(ls, TOK_VALUE);
-   par->name = eina_stringshare_add(ls->t.value);
+   par->name = eina_stringshare_ref(ls->t.value);
    eo_lexer_get(ls);
    if (ls->t.kw == KW_at_nonull)
      {
@@ -488,7 +489,7 @@ parse_param(Eo_Lexer *ls, Eina_Bool allow_inout)
    check_next(ls, ';');
    if (ls->t.token == TOK_COMMENT)
      {
-        par->comment = eina_stringshare_add(ls->t.value);
+        par->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
 }
@@ -498,7 +499,7 @@ parse_legacy(Eo_Lexer *ls)
 {
    eo_lexer_get(ls);
    check(ls, TOK_VALUE);
-   ls->tmp.legacy_def = eina_stringshare_add(ls->t.value);
+   ls->tmp.legacy_def = eina_stringshare_ref(ls->t.value);
    eo_lexer_get(ls);
    check_next(ls, ';');
 }
@@ -510,7 +511,7 @@ parse_attrs(Eo_Lexer *ls)
    Eina_Bool has_const = EINA_FALSE;
    acc = calloc(1, sizeof(Eo_Accessor_Param));
    ls->tmp.accessor_param = acc;
-   acc->name = eina_stringshare_add(ls->t.value);
+   acc->name = eina_stringshare_ref(ls->t.value);
    eo_lexer_get(ls);
    check_next(ls, ':');
    check(ls, TOK_VALUE);
@@ -545,7 +546,7 @@ parse_accessor(Eo_Lexer *ls)
    check_next(ls, '{');
    if (ls->t.token == TOK_COMMENT)
      {
-        acc->comment = eina_stringshare_add(ls->t.value);
+        acc->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
    for (;;) switch (ls->t.kw)
@@ -611,7 +612,7 @@ parse_property(Eo_Lexer *ls)
         prop->scope = EOLIAN_SCOPE_PROTECTED;
         eo_lexer_get(ls);
      }
-   prop->name = eina_stringshare_add(ls->t.value);
+   prop->name = eina_stringshare_ref(ls->t.value);
    eo_lexer_get(ls);
    line = ls->line_number;
    col = ls->column;
@@ -664,7 +665,7 @@ parse_method(Eo_Lexer *ls, Eina_Bool ctor)
      {
         if (ls->t.token != TOK_VALUE)
           eo_lexer_syntax_error(ls, "expected method name");
-        meth->name = eina_stringshare_add(ls->t.value);
+        meth->name = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
    else
@@ -675,7 +676,7 @@ parse_method(Eo_Lexer *ls, Eina_Bool ctor)
              eo_lexer_get(ls);
           }
         check(ls, TOK_VALUE);
-        meth->name = eina_stringshare_add(ls->t.value);
+        meth->name = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
    line = ls->line_number;
@@ -683,7 +684,7 @@ parse_method(Eo_Lexer *ls, Eina_Bool ctor)
    check_next(ls, '{');
    if (ls->t.token == TOK_COMMENT)
      {
-        meth->comment = eina_stringshare_add(ls->t.value);
+        meth->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
    for (;;) switch (ls->t.kw)
@@ -846,7 +847,7 @@ parse_event(Eo_Lexer *ls)
    eo_lexer_get(ls);
    if (ls->t.token == TOK_COMMENT)
      {
-        ev->comment = eina_stringshare_add(ls->t.value);
+        ev->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
 }
@@ -935,7 +936,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, 
Eolian_Class_Type type)
              has_events        = EINA_FALSE;
    if (ls->t.token == TOK_COMMENT)
      {
-        ls->tmp.kls->comment = eina_stringshare_add(ls->t.value);
+        ls->tmp.kls->comment = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
      }
    for (;;) switch (ls->t.kw)
@@ -945,7 +946,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, 
Eolian_Class_Type type)
         eo_lexer_get(ls);
         check_next(ls, ':');
         check(ls, TOK_VALUE);
-        ls->tmp.kls->legacy_prefix = eina_stringshare_add(ls->t.value);
+        ls->tmp.kls->legacy_prefix = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
         check_next(ls, ';');
         break;
@@ -954,7 +955,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, 
Eolian_Class_Type type)
         eo_lexer_get(ls);
         check_next(ls, ':');
         check(ls, TOK_VALUE);
-        ls->tmp.kls->eo_prefix = eina_stringshare_add(ls->t.value);
+        ls->tmp.kls->eo_prefix = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
         check_next(ls, ';');
         break;
@@ -963,7 +964,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, 
Eolian_Class_Type type)
         eo_lexer_get(ls);
         check_next(ls, ':');
         check(ls, TOK_VALUE);
-        ls->tmp.kls->data_type = eina_stringshare_add(ls->t.value);
+        ls->tmp.kls->data_type = eina_stringshare_ref(ls->t.value);
         eo_lexer_get(ls);
         check_next(ls, ';');
         break;
@@ -1065,7 +1066,7 @@ parse_unit(Eo_Lexer *ls, Eina_Bool eot)
            check(ls, TOK_VALUE);
            if (eo_lexer_get_c_type(ls->t.kw))
              eo_lexer_syntax_error(ls, "invalid struct name");
-           name = eina_stringshare_add(ls->t.value);
+           name = eina_stringshare_ref(ls->t.value);
            eo_lexer_get(ls);
            parse_struct(ls, name, is_extern);
            pop_type(ls);

-- 


Reply via email to