This provides a recursive-descent parser for the grammar of sections and manifest files as given at:
http://java.sun.com/j2se/1.3/docs/guide/jar/jar.html Signed-off-by: Vegard Nossum <vegard.nos...@gmail.com> --- Makefile | 1 + include/vm/jar.h | 47 ++++ vm/jar.c | 620 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ vm/jato.c | 35 +++- 4 files changed, 702 insertions(+), 1 deletions(-) create mode 100644 include/vm/jar.h create mode 100644 vm/jar.c diff --git a/Makefile b/Makefile index e602e8d..222da4e 100644 --- a/Makefile +++ b/Makefile @@ -99,6 +99,7 @@ VM_OBJS = \ vm/field.o \ vm/guard-page.o \ vm/itable.o \ + vm/jar.o \ vm/jato.o \ vm/method.o \ vm/natives.o \ diff --git a/include/vm/jar.h b/include/vm/jar.h new file mode 100644 index 0000000..f9cf1c7 --- /dev/null +++ b/include/vm/jar.h @@ -0,0 +1,47 @@ +#ifndef _VM_JAR_H +#define _VM_JAR_H + +#include "lib/list.h" + +struct jar_section { + struct list_head headers; +}; + +struct jar_header { + char *name; + char *value; + + struct list_head node; +}; + +struct jar_main_section { + unsigned int major_version; + unsigned int minor_version; + + /* List of 'struct jar_header' */ + struct list_head main_attributes; +}; + +struct jar_individual_section { + struct jar_header *name_header; + + /* List of 'struct jar_header' */ + struct list_head perentry_attributes; + + struct list_head node; +}; + +struct jar_manifest { + struct jar_main_section *main_section; + struct list_head individual_sections; +}; + +struct vm_jar { + struct jar_manifest *manifest; +}; + +struct vm_jar *vm_jar_open(const char *filename); + +const char *vm_jar_get_main_class(const struct vm_jar *jar); + +#endif diff --git a/vm/jar.c b/vm/jar.c new file mode 100644 index 0000000..37798bd --- /dev/null +++ b/vm/jar.c @@ -0,0 +1,620 @@ +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <zip.h> + +#include "lib/list.h" +#include "lib/string.h" +#include "vm/jar.h" + +struct parse_buffer { + const char *data; + unsigned int i; +}; + +/* The following is an implementation of the Jar file format as specified in: + * http://java.sun.com/j2se/1.3/docs/guide/jar/jar.html */ + +struct jar_section *jar_section_alloc(void); +void jar_section_free(struct jar_section *js); + +struct jar_header *jar_header_alloc(void); +void jar_header_free(struct jar_header *jh); + +#if 0 +static bool parse_section(struct parse_buffer *b, + struct jar_section **section_result); +static bool parse_nonempty_section(struct parse_buffer *b, + struct jar_section **section_result); +#endif + +static bool parse_newline(struct parse_buffer *b); +static bool parse_header(struct parse_buffer *b, + struct jar_header **header_result); +static bool parse_name(struct parse_buffer *b, char **name_result); +static bool parse_value(struct parse_buffer *b, char **value_result); +static bool parse_continuation(struct parse_buffer *b, + char **continuation_result); +static bool parse_alphanum(struct parse_buffer *b, char *alphanum_result); +static bool parse_headerchar(struct parse_buffer *b, char *headerchar_result); +static bool parse_otherchar(struct parse_buffer *b, char *otherchar_result); + +struct jar_section *jar_section_alloc(void) +{ + struct jar_section *js = malloc(sizeof *js); + INIT_LIST_HEAD(&js->headers); + + return js; +} + +void jar_section_free(struct jar_section *js) +{ + struct jar_header *header, *tmp_header; + + list_for_each_entry_safe(header, tmp_header, &js->headers, node) + free(header); + + free(js); +} + +struct jar_header *jar_header_alloc(void) +{ + return malloc(sizeof(struct jar_header)); +} + +void jar_header_free(struct jar_header *jh) +{ + free(jh->name); + free(jh->value); + free(jh); +} + +#if 0 +static bool parse_section(struct parse_buffer *b, + struct jar_section **section_result) +{ + struct jar_section *section = jar_section_alloc(); + + struct jar_header *header; + while (parse_header(b, &header)) + list_add_tail(&header->node, §ion->headers); + + if (!parse_newline(b)) + goto out_free_section; + + while (parse_newline(b)) + ; + + *section_result = section; + return true; + +out_free_section: + jar_section_free(section); + return false; +} + +static bool parse_nonempty_section(struct parse_buffer *b, + struct jar_section **section_result) +{ + struct jar_header *header; + if (!parse_header(b, &header)) + return false; + + struct jar_section *section = jar_section_alloc(); + + while (parse_header(b, &header)) + list_add_tail(&header->node, §ion->headers); + + if (!parse_newline(b)) + goto out_free_section; + + while (parse_newline(b)) + ; + + *section_result = section; + return true; + +out_free_section: + jar_section_free(section); + return false; +} +#endif + +static bool parse_newline(struct parse_buffer *b) +{ + if (b->data[b->i] == '\r') { + ++b->i; + + if (b->data[b->i] == '\n') + ++b->i; + + return true; + } + + if (b->data[b->i] == '\n') { + ++b->i; + + return true; + } + + return false; +} + +static bool parse_header(struct parse_buffer *b, + struct jar_header **header_result) +{ + char *name; + if (!parse_name(b, &name)) + return false; + + if (b->data[b->i] != ':') + goto out_free_name; + + ++b->i; + + char *value; + if (!parse_value(b, &value)) + goto out_free_name; + + struct jar_header *header = jar_header_alloc(); + header->name = name; + header->value = value; + *header_result = header; + + return true; + +out_free_name: + free(name); + return false; +} + +static bool parse_name(struct parse_buffer *b, char **name_result) +{ + char alphanum; + if (!parse_alphanum(b, &alphanum)) + return false; + + struct string *name = alloc_str(); + + str_append(name, "%c", alphanum); + + char headerchar; + while (parse_headerchar(b, &headerchar)) + str_append(name, "%c", headerchar); + + *name_result = strdup(name->value); + free_str(name); + return true; +} + +static bool parse_value(struct parse_buffer *b, char **value_result) +{ + if (b->data[b->i] != ' ') + return false; + + ++b->i; + + struct string *value = alloc_str(); + + char otherchar; + while (parse_otherchar(b, &otherchar)) + str_append(value, "%c", otherchar); + + if (!parse_newline(b)) + goto out_free_value; + + char *continuation; + while (parse_continuation(b, &continuation)) + str_append(value, "%s", continuation); + + *value_result = strdup(value->value); + free_str(value); + return true; + +out_free_value: + free_str(value); + return false; +} + +static bool parse_continuation(struct parse_buffer *b, + char **continuation_result) +{ + if (b->data[b->i] != ' ') + return false; + + ++b->i; + + struct string *continuation = alloc_str(); + + char otherchar; + while (parse_otherchar(b, &otherchar)) + str_append(continuation, "%c", otherchar); + + if (!parse_newline(b)) + goto out_free_continuation; + + *continuation_result = strdup(continuation->value); + free_str(continuation); + return true; + +out_free_continuation: + free_str(continuation); + return false; +} + +static bool parse_alphanum(struct parse_buffer *b, char *alphanum_result) +{ + char c = b->data[b->i]; + + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + || (c >= '0' && c <= '9')) + { + *alphanum_result = c; + ++b->i; + return true; + } + + return false; +} + +static bool parse_headerchar(struct parse_buffer *b, char *headerchar_result) +{ + if (parse_alphanum(b, headerchar_result)) + return true; + + char ch = b->data[b->i]; + if (ch == '-' || ch == '_') { + *headerchar_result = ch; + ++b->i; + return true; + } + + return false; +} + +static bool parse_otherchar(struct parse_buffer *b, char *otherchar_result) +{ + char c = b->data[b->i]; + + if (c == '\0' || c == '\r' || c == '\n') + return false; + + *otherchar_result = c; + ++b->i; + return true; +} + +/* Manifest Specification */ + +struct jar_manifest *jar_manifest_alloc(void); +void jar_manifest_free(struct jar_manifest *jm); + +struct jar_main_section *jar_main_section_alloc(void); +void jar_main_section_free(struct jar_main_section *jms); + +struct jar_individual_section *jar_individual_section_alloc(void); +void jar_individual_section_free(struct jar_individual_section *jis); + +static bool parse_manifest_file(struct parse_buffer *b, + struct jar_manifest **manifest_result); +static bool parse_main_section(struct parse_buffer *b, + struct jar_main_section **main_section_result); +static bool parse_version_info(struct parse_buffer *b, + unsigned int *major_version_result, unsigned int *minor_version_result); +static bool parse_version_number(struct parse_buffer *b, + unsigned int *version_number_result); +static bool parse_individual_section(struct parse_buffer *b, + struct jar_individual_section **individual_section_result); +static bool parse_digit(struct parse_buffer *b, unsigned int *digit_result); + +struct jar_manifest *jar_manifest_alloc(void) +{ + struct jar_manifest *jm = malloc(sizeof *jm); + + jm->main_section = NULL; + INIT_LIST_HEAD(&jm->individual_sections); + + return jm; +} + +void jar_manifest_free(struct jar_manifest *jm) +{ + free(jm->main_section); + + struct jar_individual_section *section, *tmp_section; + list_for_each_entry_safe(section, tmp_section, + &jm->individual_sections, node) + { + free(section); + } + + free(jm); +} + +struct jar_main_section *jar_main_section_alloc(void) +{ + struct jar_main_section *jms = malloc(sizeof *jms); + + INIT_LIST_HEAD(&jms->main_attributes); + + return jms; +} + +void jar_main_section_free(struct jar_main_section *jms) +{ + free(jms); +} + +struct jar_individual_section *jar_individual_section_alloc(void) +{ + struct jar_individual_section *jis = malloc(sizeof *jis); + + jis->name_header = NULL; + INIT_LIST_HEAD(&jis->perentry_attributes); + + return jis; +} + +void jar_individual_section_free(struct jar_individual_section *jis) +{ + free(jis->name_header); + + struct jar_header *header, *tmp_header; + list_for_each_entry_safe(header, tmp_header, + &jis->perentry_attributes, node) + { + free(header); + } + + free(jis); +} + +static bool parse_manifest_file(struct parse_buffer *b, + struct jar_manifest **manifest_result) +{ + struct jar_manifest *manifest = jar_manifest_alloc(); + + if (!parse_main_section(b, &manifest->main_section)) + goto out_free_manifest; + + if (!parse_newline(b)) + goto out_free_manifest; + + struct jar_individual_section *section; + while (parse_individual_section(b, §ion)) + list_add_tail(§ion->node, &manifest->individual_sections); + + *manifest_result = manifest; + return true; + +out_free_manifest: + jar_manifest_free(manifest); + return false; +} + +static bool parse_main_section(struct parse_buffer *b, + struct jar_main_section **main_section_result) +{ + struct jar_main_section *main_section = jar_main_section_alloc(); + + if (!parse_version_info(b, + &main_section->major_version, &main_section->minor_version)) + { + goto out_free_main_section; + } + +#if 0 + /* Note: Although the grammar specifies a newline here, it is actually + * part of the version-info we parsed above (version-info is actually + * a header/attribute, which already includes the newline). */ + if (!parse_newline(b)) + goto out_free_main_section; +#endif + + struct jar_header *attribute; + while (parse_header(b, &attribute)) + list_add_tail(&attribute->node, &main_section->main_attributes); + + *main_section_result = main_section; + return true; + +out_free_main_section: + jar_main_section_free(main_section); + return false; +} + +static bool parse_version_info(struct parse_buffer *b, + unsigned int *major_version_result, unsigned int *minor_version_result) +{ + struct jar_header *header; + if (!parse_header(b, &header)) + return false; + + if (strcmp(header->name, "Manifest-Version")) + goto out_free_header; + + struct parse_buffer pb; + pb.data = header->value; + pb.i = 0; + + if (!parse_version_number(&pb, major_version_result)) + goto out_free_header; + + if (pb.data[pb.i] != '.') + goto out_free_header; + + ++pb.i; + + if (!parse_version_number(&pb, minor_version_result)) + goto out_free_header; + + /* Make sure this was the end of the value */ + if (pb.data[pb.i] != '\0') + goto out_free_header; + + jar_header_free(header); + return true; + +out_free_header: + jar_header_free(header); + return false; +} + +static bool parse_version_number(struct parse_buffer *b, + unsigned int *version_number_result) +{ + unsigned int version_number = 0; + + if (!parse_digit(b, &version_number)) + return false; + + unsigned int digit; + while (parse_digit(b, &digit)) + version_number = 10 * version_number + digit; + + *version_number_result = version_number; + return true; +} + +static bool parse_individual_section(struct parse_buffer *b, + struct jar_individual_section **individual_section_result) +{ + struct jar_individual_section *individual_section + = jar_individual_section_alloc(); + + if (!parse_header(b, &individual_section->name_header)) + goto out_free_individual_section; + + if (strcmp(individual_section->name_header->name, "Name")) + goto out_free_individual_section; + + struct jar_header *perentry_attribute; + while (parse_header(b, &perentry_attribute)) { + list_add_tail(&perentry_attribute->node, + &individual_section->perentry_attributes); + } + + return true; + +out_free_individual_section: + jar_individual_section_free(individual_section); + return false; +} + +static bool parse_digit(struct parse_buffer *b, unsigned int *digit_result) +{ + char c = b->data[b->i]; + + if (c >= '0' && c <= '9') { + *digit_result = c - '0'; + ++b->i; + return true; + } + + return false; +} + +static struct jar_manifest *read_manifest(struct zip *zip) +{ + int zip_file_index; + struct zip_stat zip_stat; + struct zip_file *zip_file; + uint8_t *zip_file_buf; + + zip_file_index = zip_name_locate(zip, "META-INF/MANIFEST.MF", 0); + if (zip_file_index == -1) { + NOT_IMPLEMENTED; + return NULL; + } + + if (zip_stat_index(zip, zip_file_index, 0, &zip_stat) == -1) { + NOT_IMPLEMENTED; + return NULL; + } + + zip_file_buf = malloc(zip_stat.size); + if (!zip_file_buf) { + NOT_IMPLEMENTED; + return NULL; + } + + zip_file = zip_fopen_index(zip, zip_file_index, 0); + if (!zip_file) { + NOT_IMPLEMENTED; + return NULL; + } + + for (int offset = 0; offset != zip_stat.size;) { + int ret; + + ret = zip_fread(zip_file, + zip_file_buf + offset, zip_stat.size - offset); + if (ret == -1) { + NOT_IMPLEMENTED; + return NULL; + } + + offset += ret; + } + + zip_fclose(zip_file); + + struct parse_buffer pb; + pb.data = (char *) zip_file_buf; + pb.i = 0; + + struct jar_manifest *manifest; + if (!parse_manifest_file(&pb, &manifest)) { + NOT_IMPLEMENTED; + printf("parse error, byte offset %d. oops\n", pb.i); + return NULL; + } + + return manifest; +} + +struct vm_jar *vm_jar_open(const char *filename) +{ + int zip_error; + struct zip *zip; + + zip = zip_open(filename, 0, &zip_error); + if (!zip) { + NOT_IMPLEMENTED; + return NULL; + } + + struct jar_manifest *manifest = read_manifest(zip); + if (!manifest) { + NOT_IMPLEMENTED; + return NULL; + } + + struct vm_jar *jar = malloc(sizeof *jar); + jar->manifest = manifest; + + return jar; +} + +const char *vm_jar_get_main_class(const struct vm_jar *jar) +{ + assert(jar); + assert(jar->manifest); + assert(jar->manifest->main_section); + + struct jar_main_section *section = jar->manifest->main_section; + + struct jar_header *header; + list_for_each_entry(header, §ion->main_attributes, node) { + if (!strcmp(header->name, "Main-Class")) + return header->value; + } + + return NULL; +} diff --git a/vm/jato.c b/vm/jato.c index de063a5..7a0f631 100644 --- a/vm/jato.c +++ b/vm/jato.c @@ -56,6 +56,7 @@ #include "vm/fault-inject.h" #include "vm/preload.h" #include "vm/itable.h" +#include "vm/jar.h" #include "vm/jni.h" #include "vm/method.h" #include "vm/natives.h" @@ -393,6 +394,37 @@ static void handle_classpath(const char *arg) } static char *classname; +static struct vm_jar *jar_file; + +static void handle_jar(const char *arg) +{ + /* Can't specify more than one jar file */ + if (jar_file) + usage(stderr, EXIT_FAILURE); + + jar_file = vm_jar_open(arg); + if (!jar_file) { + NOT_IMPLEMENTED; + exit(EXIT_FAILURE); + } + + const char *main_class = vm_jar_get_main_class(jar_file); + if (!main_class) { + NOT_IMPLEMENTED; + exit(EXIT_FAILURE); + } + + classname = strdup(main_class); + if (!classname) { + NOT_IMPLEMENTED; + exit(EXIT_FAILURE); + } + + /* XXX: Cheap solution. This can give funny results depending on where + * you put the -jar relative to the -classpath(s). Besides, we should + * save some memory and only open the zip file once. */ + classloader_add_to_classpath(arg); +} static void handle_perf(void) { @@ -483,6 +515,7 @@ const struct option options[] = { DEFINE_OPTION_ARG("classpath", handle_classpath), DEFINE_OPTION_ARG("cp", handle_classpath), + DEFINE_OPTION_ARG("jar", handle_jar), DEFINE_OPTION("Xperf", handle_perf), @@ -534,7 +567,7 @@ static void parse_options(int argc, char *argv[]) if (optind < argc) { /* Can't specify both a jar and a class file */ - if (classname) + if (jar_file) usage(stderr, EXIT_FAILURE); classname = argv[optind++]; -- 1.6.0.4 ------------------------------------------------------------------------------ Enter the BlackBerry Developer Challenge This is your chance to win up to $100,000 in prizes! For a limited time, vendors submitting new applications to BlackBerry App World(TM) will have the opportunity to enter the BlackBerry Developer Challenge. See full prize details at: http://p.sf.net/sfu/Challenge _______________________________________________ Jatovm-devel mailing list Jatovm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/jatovm-devel