[RFC][PATCH 5/5] vm: preliminary jar-file parsing

Vegard Nossum Wed, 08 Jul 2009 16:22:24 -0700

This provides a recursive-descent parser for the grammar of sections,
as given at: http://java.sun.com/j2se/1.3/docs/guide/jar/jar.html


The actual manifest parsing has still to be done.

Signed-off-by: Vegard Nossum <vegard.nos...@gmail.com>
---
 Makefile         |    1 +
 include/vm/jar.h |    6 +
 vm/jar.c         |  359 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 vm/jato.c        |    9 ++
 4 files changed, 375 insertions(+), 0 deletions(-)
 create mode 100644 include/vm/jar.h
 create mode 100644 vm/jar.c

diff --git a/Makefile b/Makefile
index 08cebfe..e915812 100644
--- a/Makefile
+++ b/Makefile
@@ -97,6 +97,7 @@ VM_OBJS = \
        vm/field.o              \
        vm/guard-page.o         \
        vm/itable.o             \
+       vm/jar.o                \
        vm/jato.o               \
        vm/method.o             \
        vm/natives.o            \
diff --git a/include/vm/jar.h b/include/vm/jar.h
new file mode 100644
index 0000000..51f8541
--- /dev/null
+++ b/include/vm/jar.h
@@ -0,0 +1,6 @@
+#ifndef _VM_JAR_H
+#define _VM_JAR_H
+
+int vm_jar_open(const char *filename);
+
+#endif
diff --git a/vm/jar.c b/vm/jar.c
new file mode 100644
index 0000000..42ecafe
--- /dev/null
+++ b/vm/jar.c
@@ -0,0 +1,359 @@
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <zip.h>
+
+#include "lib/list.h"
+#include "lib/string.h"
+#include "vm/jar.h"
+
+struct parse_buffer {
+       const char *data;
+       unsigned int i;
+};
+
+/* The following is an implementation of the Jar file format as specified in:
+ * http://java.sun.com/j2se/1.3/docs/guide/jar/jar.html */
+
+struct jar_section {
+       struct list_head headers;
+};
+
+struct jar_header {
+       char *name;
+       char *value;
+
+       struct list_head node;
+};
+
+struct jar_section *jar_section_alloc(void);
+void jar_section_free(struct jar_section *js);
+
+static bool parse_section(struct parse_buffer *b,
+       struct jar_section **section_result);
+static bool parse_nonempty_section(struct parse_buffer *b,
+       struct jar_section **section_result);
+static bool parse_newline(struct parse_buffer *b);
+static bool parse_header(struct parse_buffer *b,
+       struct jar_header **header_result);
+static bool parse_name(struct parse_buffer *b, char **name_result);
+static bool parse_value(struct parse_buffer *b, char **value_result);
+static bool parse_continuation(struct parse_buffer *b,
+       char **continuation_result);
+static bool parse_alphanum(struct parse_buffer *b, char *alphanum_result);
+static bool parse_headerchar(struct parse_buffer *b, char *headerchar_result);
+static bool parse_otherchar(struct parse_buffer *b, char *otherchar_result);
+
+struct jar_section *jar_section_alloc(void)
+{
+       struct jar_section *js = malloc(sizeof *js);
+       INIT_LIST_HEAD(&js->headers);
+
+       return js;
+}
+
+void jar_section_free(struct jar_section *js)
+{
+       struct jar_header *header, *tmp_header;
+
+       list_for_each_entry_safe(header, tmp_header, &js->headers, node)
+               free(header);
+
+       free(js);
+}
+
+static bool parse_section(struct parse_buffer *b,
+       struct jar_section **section_result)
+{
+       struct jar_section *section = jar_section_alloc();
+
+       struct jar_header *header;
+       while (parse_header(b, &header))
+               list_add_tail(&header->node, &section->headers);
+
+       if (!parse_newline(b))
+               goto out_free_section;
+
+       while (parse_newline(b))
+               ;
+
+       *section_result = section;
+       return true;
+
+out_free_section:
+       jar_section_free(section);
+       return false;
+}
+
+static bool parse_nonempty_section(struct parse_buffer *b,
+       struct jar_section **section_result)
+{
+       struct jar_header *header;
+       if (!parse_header(b, &header))
+               return false;
+
+       struct jar_section *section = jar_section_alloc();
+
+       while (parse_header(b, &header))
+               list_add_tail(&header->node, &section->headers);
+
+       if (!parse_newline(b))
+               goto out_free_section;
+
+       while (parse_newline(b))
+               ;
+
+       *section_result = section;
+       return true;
+
+out_free_section:
+       jar_section_free(section);
+       return false;
+}
+
+static bool parse_newline(struct parse_buffer *b)
+{
+       if (b->data[b->i] == '\r') {
+               ++b->i;
+
+               if (b->data[b->i] == '\n')
+                       ++b->i;
+
+               return true;
+       }
+
+       if (b->data[b->i] == '\n') {
+               ++b->i;
+
+               return true;
+       }
+
+       return false;
+}
+
+static bool parse_header(struct parse_buffer *b,
+       struct jar_header **header_result)
+{
+       char *name;
+       if (!parse_name(b, &name))
+               return false;
+
+       if (b->data[b->i] != ':')
+               goto out_free_name;
+
+       ++b->i;
+
+       char *value;
+       if (!parse_value(b, &value))
+               goto out_free_name;
+
+       struct jar_header *header = malloc(sizeof *header);
+       header->name = name;
+       header->value = value;
+       *header_result = header;
+
+       return true;
+
+out_free_name:
+       free(name);
+       return false;
+}
+
+static bool parse_name(struct parse_buffer *b, char **name_result)
+{
+       char alphanum;
+       if (!parse_alphanum(b, &alphanum))
+               return false;
+
+       struct string *name = alloc_str();
+
+       str_append(name, "%c", alphanum);
+
+       char headerchar;
+       while (parse_headerchar(b, &headerchar))
+               str_append(name, "%c", headerchar);
+
+       *name_result = strdup(name->value);
+       free_str(name);
+       return true;
+}
+
+static bool parse_value(struct parse_buffer *b, char **value_result)
+{
+       if (b->data[b->i] != ' ')
+               return false;
+
+       struct string *value = alloc_str();
+
+       char otherchar;
+       while (parse_otherchar(b, &otherchar))
+               str_append(value, "%c", otherchar);
+
+       if (!parse_newline(b))
+               goto out_free_value;
+
+       char *continuation;
+       while (parse_continuation(b, &continuation))
+               str_append(value, "%s", continuation);
+
+       *value_result = strdup(value->value);
+       free_str(value);
+       return true;
+
+out_free_value:
+       free_str(value);
+       return false;
+}
+
+static bool parse_continuation(struct parse_buffer *b,
+       char **continuation_result)
+{
+       if (b->data[b->i] != ' ')
+               return false;
+
+       struct string *continuation = alloc_str();
+
+       char otherchar;
+       while (parse_otherchar(b, &otherchar))
+               str_append(continuation, "%c", otherchar);
+
+       if (!parse_newline(b))
+               goto out_free_continuation;
+
+       *continuation_result = strdup(continuation->value);
+       free_str(continuation);
+       return true;
+
+out_free_continuation:
+       free_str(continuation);
+       return false;
+}
+
+static bool parse_alphanum(struct parse_buffer *b, char *alphanum_result)
+{
+       char c = b->data[b->i];
+
+       if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+               || (c >= '0' && c <= '9'))
+       {
+               *alphanum_result = c;
+               ++b->i;
+               return true;
+       }
+
+       return false;
+}
+
+static bool parse_headerchar(struct parse_buffer *b, char *headerchar_result)
+{
+       if (parse_alphanum(b, headerchar_result))
+               return true;
+
+       char ch = b->data[b->i];
+       if (ch == '-' || ch == '_') {
+               *headerchar_result = ch;
+               ++b->i;
+               return true;
+       }
+
+       return false;
+}
+
+static bool parse_otherchar(struct parse_buffer *b, char *otherchar_result)
+{
+       char c = b->data[b->i];
+
+       if (c == '\0' || c == '\r' || c == '\n')
+               return false;
+
+       *otherchar_result = c;
+       ++b->i;
+       return true;
+}
+
+static int read_manifest(struct zip *zip)
+{
+       int zip_file_index;
+       struct zip_stat zip_stat;
+       struct zip_file *zip_file;
+       uint8_t *zip_file_buf;
+
+       zip_file_index = zip_name_locate(zip, "META-INF/MANIFEST.MF", 0);
+       if (zip_file_index == -1) {
+               NOT_IMPLEMENTED;
+               return -ENOENT;
+       }
+
+       if (zip_stat_index(zip, zip_file_index, 0, &zip_stat) == -1) {
+               NOT_IMPLEMENTED;
+               return -EIO;
+       }
+
+       zip_file_buf = malloc(zip_stat.size);
+       if (!zip_file_buf) {
+               NOT_IMPLEMENTED;
+               return -ENOMEM;
+       }
+
+       zip_file = zip_fopen_index(zip, zip_file_index, 0);
+       if (!zip_file) {
+               NOT_IMPLEMENTED;
+               return -EIO;
+       }
+
+       for (int offset = 0; offset != zip_stat.size;) {
+               int ret;
+
+               ret = zip_fread(zip_file,
+                       zip_file_buf + offset, zip_stat.size - offset);
+               if (ret == -1) {
+                       NOT_IMPLEMENTED;
+                       return -EIO;
+               }
+
+               offset += ret;
+       }
+
+       zip_fclose(zip_file);
+
+       struct parse_buffer pb;
+       pb.data = (char *) zip_file_buf;
+       pb.i = 0;
+
+       struct jar_section *section;
+       if (!parse_nonempty_section(&pb, &section)) {
+               NOT_IMPLEMENTED;
+               //printf("parse error, byte offset %d. oops\n", pb.i);
+               return -EINVAL;
+       }
+
+       /* This is here just to shut GCC up. */
+       if (0 && !parse_section(&pb, &section)) {
+       }
+
+       return 0;
+}
+
+int vm_jar_open(const char *filename)
+{
+       int zip_error;
+       struct zip *zip;
+       int err;
+
+       zip = zip_open(filename, 0, &zip_error);
+       if (!zip) {
+               NOT_IMPLEMENTED;
+               return -1;
+       }
+
+       err = read_manifest(zip);
+       if (err)
+               return err;
+
+       return 0;
+}
diff --git a/vm/jato.c b/vm/jato.c
index 081475c..976193a 100644
--- a/vm/jato.c
+++ b/vm/jato.c
@@ -56,6 +56,7 @@
 #include "vm/fault-inject.h"
 #include "vm/preload.h"
 #include "vm/itable.h"
+#include "vm/jar.h"
 #include "vm/jni.h"
 #include "vm/method.h"
 #include "vm/natives.h"
@@ -418,6 +419,14 @@ main(int argc, char *argv[])
 
                        if (classloader_add_to_classpath(argv[i]))
                                NOT_IMPLEMENTED;
+               } else if (!strcmp(argv[i], "-jar")) {
+                       if (++i >= argc) {
+                               NOT_IMPLEMENTED;
+                               break;
+                       }
+
+                       if (vm_jar_open(argv[i]))
+                               NOT_IMPLEMENTED;
                } else if (!strcmp(argv[i], "-Xtrace:asm")) {
                        opt_trace_method = true;
                        opt_trace_machine_code = true;
-- 
1.6.0.4


------------------------------------------------------------------------------
Enter the BlackBerry Developer Challenge  
This is your chance to win up to $100,000 in prizes! For a limited time, 
vendors submitting new applications to BlackBerry App World(TM) will have
the opportunity to enter the BlackBerry Developer Challenge. See full prize  
details at: http://p.sf.net/sfu/Challenge
_______________________________________________
Jatovm-devel mailing list
Jatovm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/jatovm-devel

[RFC][PATCH 5/5] vm: preliminary jar-file parsing

Reply via email to