Re: Review: Bo Yang -- libdom Core

John-Mark Bell Sun, 19 Jul 2009 17:31:00 -0700

On Wed, 2009-07-15 at 02:47 +0100, John-Mark Bell wrote:

In general, this is ok. There are quite a lot of typos in this code --
mostly in comments, though some in code. I've highlighted the code ones,
but it'd be good if the ones in comments are fixed, too. Also, there
seem to be a fair few functions that are lacking API documentation. This
needs fixing before merge.


There's a bunch of reference counting stuff that's not quite right in
this diff, but as you're working on this, I've not commented on it.

> Index: test/run-test.sh
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ test/run-test.sh  2009-07-15 02:08:31.000000000 +0100
> @@ -0,0 +1,130 @@
> +#!/bin/bash

Bash is not available on all supported build hosts. Thus, this script is
not portable.

Compilation of test binaries should be handled by the Makefile.

Running the test binaries should be performed by a testrunner written in
Perl. There's one in the core buildsystem that may be suitable.

> Index: test/README
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ test/README       2009-07-15 02:08:31.000000000 +0100
> @@ -0,0 +1,2 @@
> +Note the lib/ directory contain the old test related files, although they 
> +are not workable, I leave them here for later use if there is a chance.

Is there any need for this. Also, why preserve the lib directory --
version control does that for you.

> Index: include/dom/core/typeinfo.h
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ include/dom/core/typeinfo.h       2009-07-15 02:08:49.000000000 +0100
> @@ -0,0 +1,47 @@
> +/*
> + * This file is part of libdom.
> + * Licensed under the MIT License,
> + *                http://www.opensource.org/licenses/mit-license.php
> + * Copyright 2009 Bo Yang <[email protected]>
> + */
> +
> +#ifndef dom_core_typeinfo_h_
> +#define dom_core_typeinfo_h_
> +
> +#include <stdbool.h>
> +
> +#include <dom/core/exceptions.h>
> +
> +struct dom_string;
> +
> +typedef struct dom_type_info dom_type_info;
> +
> +typedef enum {
> +     DERIVATION_RESTRICTION  = 0x00000001,
> +     DERIVATION_EXTENSION    = 0x00000002,
> +     DERIVATION_UNION        = 0x00000004,
> +     DERIVATION_LIST         = 0x00000008
> +} dom_type_info_derivation_method;

Is this a bitfield?

> Index: src/utils/character_valid.h
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ src/utils/character_valid.h       2009-07-15 02:08:51.000000000 +0100
> @@ -0,0 +1,51 @@
> +/*
> + * This file is part of libdom.
> + * Licensed under the MIT License,
> + *                http://www.opensource.org/licenses/mit-license.php
> + * Copyright 2009 Bo Yang <[email protected]>
> + *
> + * This file contains the API used to validate whether certain character in
> + * name/value is legal according the XML 1.0 standard. See 
> + *
> + * http://www.w3.org/TR/2004/REC-xml-20040204/
> + * http://www.w3.org/TR/REC-xml/
> + * 
> + * for detail.
> + */
> +
> +#ifndef dom_utils_character_valid_h_
> +#define dom_utils_character_valid_h_
> +
> +#include <stdbool.h>
> +
> +struct xml_char_range {
> +    unsigned int start;
> +    unsigned int end;
> +};
> +
> +struct xml_char_group {
> +    int len;
> +    const struct xml_char_range *range;
> +};
> +
> +/* The groups */
> +extern const struct xml_char_group base_char_group;
> +extern const struct xml_char_group char_group;
> +extern const struct xml_char_group combining_char_group;
> +extern const struct xml_char_group digit_char_group;
> +extern const struct xml_char_group extender_group;
> +extern const struct xml_char_group ideographic_group;
> +
> +bool is_character_in_group(unsigned int ch, const struct xml_char_group 
> *group);
> +
> +#define is_base_char(ch) is_character_in_group((ch), &base_char_group)
> +#define is_char(ch) is_character_in_group((ch), &char_group)
> +#define is_combining_char(ch) is_character_in_group((ch), 
> &combining_char_group)
> +#define is_digit(ch) is_character_in_group((ch), &digit_char_group)
> +#define is_extender(ch) is_character_in_group((ch), &extender_group)
> +#define is_ideographic(ch) is_character_in_group((ch), &ideographic_group)
> +
> +#define is_letter(ch)  (is_base_char(ch) || is_ideographic(ch))
> +
> +#endif

General style point: libdom functions should begin with dom_ (or _dom_
if they're internal to the library).

> Index: src/utils/hashtable.h
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ src/utils/hashtable.h     2009-07-15 02:08:51.000000000 +0100
> @@ -0,0 +1,30 @@
> +/*
> + * This file is part of libdom.
> + * Licensed under the MIT License,
> + *                http://www.opensource.org/licenses/mit-license.php
> + * Copyright 2006 Rob Kendrick <[email protected]>
> + * Copyright 2009 Bo Yang <[email protected]>
> + */
> +
> +#ifndef _LIBDOM_UTILS_HASHTABLE_H_
> +#define _LIBDOM_UTILS_HASHTABLE_H_

Please make this guard macro match all the others.

> Index: src/utils/resource_mgr.h
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ src/utils/resource_mgr.h  2009-07-15 02:08:51.000000000 +0100
> @@ -0,0 +1,40 @@
> +/*
> + * This file is part of libdom.
> + * Licensed under the MIT License,
> + *                http://www.opensource.org/licenses/mit-license.php
> + * Copyright 2009 Bo Yang <[email protected]>
> + */
> +
> +#ifndef dom_utils_resource_mgr_h_
> +#define dom_utils_resource_mgr_h_
> +
> +#include <dom/functypes.h>
> +#include <dom/core/exceptions.h>
> +
> +#include "hashtable.h"
> +
> +struct lwc_context_s;
> +struct lwc_string_s;
> +struct dom_string;
> +
> +typedef struct resource_mgr {
> +    dom_alloc alloc;
> +    void *pw;
> +    struct lwc_context_s *ctx;
> +} resource_mgr;
> +
> +void *resource_mgr_alloc(struct resource_mgr *res, void *ptr, size_t size);
> +
> +dom_exception resource_mgr_create_string(struct resource_mgr *res,
> +             const uint8_t *data, size_t len, struct dom_string **result);
> +
> +dom_exception resource_mgr_create_lwcstring(struct resource_mgr *res,
> +             const uint8_t *data, size_t len, struct lwc_string_s **result);
> +
> +dom_exception resource_mgr_create_string_from_lwcstring(struct resource_mgr 
> *res,
> +             struct lwc_string_s *str, struct dom_string **result);
> +
> +dom_exception resource_mgr_create_hashtable(struct resource_mgr *res,
> +             size_t chains, hash_func f, struct hash_table **ht);
> +
> +#endif

I'm not sure I understand the need for a resource manager. Can you
explain?

> Index: src/utils/character_valid.c
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ src/utils/character_valid.c       2009-07-15 02:08:51.000000000 +0100
> @@ -0,0 +1,204 @@
> +/*
> + * This file is part of libdom.
> + * Licensed under the MIT License,
> + *                http://www.opensource.org/licenses/mit-license.php
> + * Copyright 2009 Bo Yang <[email protected]>
> + */
> +
> +#include "utils/character_valid.h"
> +
> +#include <assert.h>
> +
> +static const struct xml_char_range base_char_range[] = { {0x41, 0x5a}, 
> +    {0x61, 0x7a}, {0xc0, 0xd6}, {0xd8, 0xf6}, {0x00f8, 0x00ff}, {0x100, 
> 0x131},
> +    {0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3},
> +    {0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8},
> +    {0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c},
> +    {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da},
> +    {0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3},
> +    {0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481},
> +    {0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb},
> +    {0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559},
> +    {0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a},
> +    {0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce},
> +    {0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939},
> +    {0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990},
> +    {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9},
> +    {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a},
> +    {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33},
> +    {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e},
> +    {0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91},
> +    {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9},
> +    {0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10},
> +    {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39},
> +    {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a},
> +    {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c},
> +    {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5},
> +    {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28},
> +    {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c},
> +    {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9},
> +    {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10},
> +    {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e},
> +    {0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82},
> +    {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d},
> +    {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5},
> +    {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0},
> +    {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47},
> +    {0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100},
> +    {0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c},
> +    {0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140},
> +    {0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155},
> +    {0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165},
> +    {0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173},
> +    {0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab},
> +    {0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2},
> +    {0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b},
> +    {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
> +    {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b},
> +    {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc},
> +    {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
> +    {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
> +    {0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182},
> +    {0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}
> +};
> +
> +const struct xml_char_group base_char_group = { 202, base_char_range };

This seems particularly error prone. Is sizeof(base_char_range) /
sizeof(base_char_range[0]) insufficient? Similarly for the other
instances of this.

> Index: src/bootstrap/implementation.c
> ===================================================================
> --- /dev/null 2009-04-16 19:17:07.000000000 +0100
> +++ src/bootstrap/implementation.c    2009-07-15 02:08:58.000000000 +0100
> @@ -0,0 +1,407 @@
> +/*
> + * This file is part of libdom.
> + * Licensed under the MIT License,
> + *                http://www.opensource.org/licenses/mit-license.php
> + * Copyright 2007 John-Mark Bell <[email protected]>
> + */
> +
> +#include <dom/bootstrap/implpriv.h>
> +#include <dom/bootstrap/implregistry.h>
> +#include <dom/dom.h>
> +
> +#include <libwapcaplet/libwapcaplet.h>
> +
> +#include "core/node.h"
> +#include "core/document_type.h"
> +
> +#include "utils/utils.h"
> +#include "utils/validate.h"
> +#include "utils/namespace.h"
> +
> +#include "bootstrap/implementation.h"
> +
> +static dom_alloc _alloc;
> +static void * _pw;

Lose the space after '*'.

I'm not comfortable about these being static. Is there any guarantee
that they're shared by different implementation instances? Note that
DOMImplementations may need language-specific hooks (e.g. for
normalisation).

> +/**
> + * Create a document type node
> + *
> + * \param impl       The implementation to create the node
> + * \param qname      The qualified name of the document type
> + * \param public_id  The external subset public identifier
> + * \param system_id  The external subset system identifier
> + * \param doctype    Pointer to location to receive result
> + * \return DOM_NO_ERR on success,
> + *         DOM_INVALID_CHARACTER_ERR if ::qname is invalid,
> + *         DOM_NAMESPACE_ERR         if ::qname is malformed.
> + *
> + * Any memory allocated by this call should be allocated using
> + * the provided memory (de)allocation function.
> + *
> + * The doctype will be referenced, so the client need not do this
> + * explicitly. The client must unref the doctype once it has
> + * finished with it.
> + */
> +dom_exception impl_implementation_create_document_type(
> +             struct dom_implementation *impl,
> +             struct dom_string *qname,
> +             struct dom_string *public_id,
> +             struct dom_string *system_id,
> +             dom_alloc alloc, void *pw, struct lwc_context_s *ctx,
> +             struct dom_document_type **doctype)
> +{
> +     struct dom_document_type *d;
> +    struct dom_string *prefix, *lname;
> +     dom_exception err;

General style point: please ensure all indentations are with *tabs*, not
spaces.

> Index: include/dom/core/impllist.h
> ===================================================================
> --- include/dom/core/impllist.h       (revision 8546)
> +++ include/dom/core/impllist.h       (working copy)
> @@ -17,6 +17,8 @@
>  void dom_implementation_list_ref(struct dom_implementation_list *list);
>  void dom_implementation_list_unref(struct dom_implementation_list *list);
>  
> +void dom_implementation_list_destroy(struct dom_implementation_list *list);
> +

Why is this public? It should only be called when the list's reference
count falls to zero, right? This is determined when calling _unref().

> Index: include/dom/core/string.h
> ===================================================================
> --- include/dom/core/string.h (revision 8546)
> +++ include/dom/core/string.h (working copy)
> @@ -10,6 +10,7 @@
>  
>  #include <inttypes.h>
>  #include <stddef.h>
> +#include <libwapcaplet/libwapcaplet.h>
>  
>  #include <dom/functypes.h>
>  #include <dom/core/exceptions.h>
> @@ -25,6 +26,33 @@
>  dom_exception dom_string_create(dom_alloc alloc, void *pw,
>               const uint8_t *ptr, size_t len, struct dom_string **str);
>  
> +/* Clone a dom_string */
> +dom_exception dom_string_clone(dom_alloc alloc, void *pw, struct dom_string 
> *str,
> +             struct dom_string **ret);

Would this be better named as _copy()?

> +/* Create a DOM string from a buffer 
> + * We will not alloc any memory in this method because the client provide
> + * it. This function call mainly used for create a string from lwc_string */
> +dom_exception dom_string_create_from_lwcstring(dom_alloc alloc, void *pw,
> +             struct lwc_context_s *ctx, struct lwc_string_s *str, 
> +             struct dom_string **ret);

The documentation here is wrong. Also, does this not complicate things
hugely for the client? They'd have to know they're dealing with an
lwc_string.

> +/* lwc_string related functions */
> +/* Make the dom_string be interned in the lwc_context_s */
> +dom_exception dom_string_intern(struct dom_string *str, 
> +             struct lwc_context_s *ctx, struct lwc_string_s **lwcstr);

Is this an internal method? If so, it shouldn't be in the public API.

> +/* Get the internal lwc_string */
> +dom_exception dom_string_get_intern(struct dom_string *str, 
> +             struct lwc_context_s **ctx, struct lwc_string_s **lwcstr);
> +/* Test is the dom_string now be interned */
> +dom_exception dom_string_is_intern(struct dom_string *str, 
> +             struct lwc_context_s *ctx, bool *intern);

Why does this exist? Simply asking for the interned string and having it
interned on the fly if it isn't already should be fine.

> +/* Map the lwc_error to dom_exception */
> +dom_exception lwc_error_to_exception(lwc_error err);

This should be named dom_exception_from_lwc_error()

> +/* This function is used when we compare the raw data of two lwc_string */
> +int lwc_string_compare_raw(struct lwc_string_s *s1, struct lwc_string_s *s2);

What is this for? It should have a dom_ at the start of its name. Why is
lwc_context_string_isequal insufficient? Is this not internal
implementation detail?
 
> +/* Get the character at position index */
> +dom_exception dom_string_at(struct dom_string *str, uint32_t index, 
> +        uint32_t *ch);

What character set is the returned character in? It should be UCS-4.
What is the range of permissible indices -- [0, string_length)?

> Index: include/dom/core/attr.h
> ===================================================================
> --- include/dom/core/attr.h   (revision 8546)
> +++ include/dom/core/attr.h   (working copy)
> @@ -80,7 +80,7 @@
>       return ((dom_attr_vtable *) ((dom_node *) attr)->vtable)->
>                       dom_attr_get_owner(attr, result);
>  }
> -#define dom_attr_get_owner(a, r) dom_attr_get_owner((struct dom_attr *) (a), 
> \
> +#define dom_attr_get_owner_element(a, r) dom_attr_get_owner((struct dom_attr 
> *) (a), \
>               (struct dom_element **) (r))

Should the function itself not also be renamed?
 
> Index: include/dom/core/document.h
> ===================================================================
> --- include/dom/core/document.h       (revision 8546)
> +++ include/dom/core/document.h       (working copy)
> @@ -9,6 +9,8 @@
>  #define dom_core_document_h_
>  
>  #include <stdbool.h>
> +#include <inttypes.h>
> +#include <stddef.h>
>  #include <stdint.h>
>  
>  #include <dom/core/exceptions.h>
> @@ -29,6 +31,7 @@
>  struct dom_processing_instruction;
>  struct dom_string;
>  struct dom_text;
> +struct lwc_string_s;
>  
>  typedef struct dom_document dom_document;
>  
> @@ -117,6 +120,12 @@
>  dom_exception dom_document_create_string(struct dom_document *doc,
>               const uint8_t *data, size_t len, struct dom_string **result);
>  
> +dom_exception dom_document_create_lwcstring(struct dom_document *doc,
> +             const uint8_t *data, size_t len, struct lwc_string_s **result);

What is this for? Why is it in the public API?

> +dom_exception dom_document_create_string_from_lwcstring(struct dom_document 
> *doc,
> +             struct lwc_string_s *str, struct dom_string **result);
> +

Similarly.

> Index: include/dom/core/implementation.h
> ===================================================================
> --- include/dom/core/implementation.h (revision 8546)
> +++ include/dom/core/implementation.h (working copy)
> @@ -30,20 +30,19 @@
>  dom_exception dom_implementation_get_feature(
>               struct dom_implementation *impl,
>               struct dom_string *feature, struct dom_string *version,
> -             void **object,
> -             dom_alloc alloc, void *pw);
> +             void **object);

How is this going to create an object if it doesn't know what allocator
to use? An implementation may be used by more than one document.

> Index: include/dom/core/namednodemap.h
> ===================================================================
> --- include/dom/core/namednodemap.h   (revision 8546)
> +++ include/dom/core/namednodemap.h   (working copy)
> @@ -80,4 +80,11 @@
>               _dom_namednodemap_remove_named_item_ns((dom_namednodemap *) 
> (m), \
>               (dom_string *) (n), (dom_string *) (l), (dom_node **) (r))
>  
> +bool _dom_namednodemap_equal(struct dom_namednodemap *m1, 
> +             struct dom_namednodemap *m2);
> +
> +#define dom_namednodemap_equal(m1, m2) _dom_namednodemap_equal( \
> +             (struct dom_namednodemap *) (m1), \
> +             (struct dom_namednodemap *) (m2))

Is this not implementation detail? Why is it in the public API?

> Index: include/dom/bootstrap/implregistry.h
> ===================================================================
> --- include/dom/bootstrap/implregistry.h      (revision 8546)
> +++ include/dom/bootstrap/implregistry.h      (working copy)
> @@ -15,16 +15,18 @@
>  struct dom_implementation_list;
>  struct dom_string;
>  
> +/* Initialise the dom_implementation */
> +dom_exception dom_impleregistry_dom_implementation_initialise(
> +             dom_alloc allocator, void *ptr);

Why does the registry need initialising?

>  /* Retrieve a DOM implementation from the registry */
>  dom_exception dom_implregistry_get_dom_implementation(
>               struct dom_string *features,
> -             struct dom_implementation **impl,
> -             dom_alloc alloc, void *pw);
> +             struct dom_implementation **impl);
>  
>  /* Get a list of DOM implementations that support the requested features */
>  dom_exception dom_implregistry_get_dom_implementation_list(
>               struct dom_string *features,
> -             struct dom_implementation_list **list,
> -             dom_alloc alloc, void *pw);
> +             struct dom_implementation_list **list);

These API changes prevent implementation objects being created on the
fly.
 
> Index: src/utils/namespace.h
> ===================================================================
> --- src/utils/namespace.h     (revision 8546)
> +++ src/utils/namespace.h     (working copy)
> @@ -14,6 +14,11 @@
>  struct dom_document;
>  struct dom_string;
>  
> +/** XML prefix */
> +extern struct dom_string *xml;
> +/** XMLNS prefix */
> +extern struct dom_string *xmlns;

What needs access to these? They should be named _dom_foo.

> Index: src/utils/Makefile
> ===================================================================
> --- src/utils/Makefile        (revision 8546)
> +++ src/utils/Makefile        (working copy)
> @@ -1,4 +1,5 @@
>  # Sources
> -DIR_SOURCES := namespace.c
> +DIR_SOURCES :=       namespace.c hashtable.c resource_mgr.c 
> character_valid.c \

Lose the spurious tab.

> Index: src/utils/namespace.c
> ===================================================================
> --- src/utils/namespace.c     (revision 8546)
> +++ src/utils/namespace.c     (working copy)
> @@ -143,13 +153,18 @@
>               /* If namespace URI is for xmlns, ensure qname == "xmlns" */
>               if (namespace != NULL && 
>                               dom_string_cmp(namespace, 
> -                             dom_namespaces[DOM_NAMESPACE_XMLNS]) == 0 &&
> -                             dom_string_cmp(qname, xmlns) != 0) {
> +                             dom_namespaces[DOM_NAMESPACE_XMLNS]) != 0 &&
> +                             dom_string_cmp(qname, xmlns) == 0) {

This seems wrong -- dom_string_cmp returns 0 for a match.
The above now does:

if (namespace && namespace != DOM_NAMESPACE_XMLNS && qname == "xmlns")
    error;

when what appears to be needed from the commentary is:

if (namespace && namespace == DOM_NAMESPACE_XMLNS && qname != "xmlns")
    error;

>                       return DOM_NAMESPACE_ERR;
>               }
> -     } else {
> +     } else if (colon == 0) {
> +        /* Some name like ":name" */
> +        if (namespace != NULL)
> +            return DOM_NAMESPACE_ERR;

":name" means an element in a namespace with no prefix. It's perfectly
valid, assuming I'm remembering XML namespaces correctly.

> Index: src/core/string.c
> ===================================================================
> --- src/core/string.c (revision 8546)
> +++ src/core/string.c (working copy)
> @@ -5,6 +5,7 @@
>   * Copyright 2007 John-Mark Bell <[email protected]>
>   */
>  
> +#include <assert.h>
>  #include <ctype.h>
>  #include <inttypes.h>
>  #include <string.h>
> @@ -40,6 +41,11 @@
>       .refcnt = 1
>  };
>  
> +/* used for mapping from lwc_error to dom_exception */
> +static dom_exception exceptions[3] = { DOM_NO_ERR, DOM_NO_MEM_ERR,
> +        DOM_INDEX_SIZE_ERR};

Is there really a need for this? Can you not just switch on lwc_error?

> @@ -121,7 +137,163 @@
>       return DOM_NO_ERR;
>  }
>  
> +/* Clone a dom_string if neccessery 
> + 

What is necessary for a string to be cloned?

> + * \param alloc              The new allocator for this string

Lose spurious tab.
 
> +/**
> + * Get the UCS4 character at position index
> + *
> + * \param index     The position of the charater
> + * \param ch        The UCS4 character
> + */
> +dom_exception dom_string_at(struct dom_string *str, uint32_t index, 
> +        uint32_t *ch)
> +{
> +     const uint8_t *s;
> +     size_t clen, slen;
> +     uint32_t c, i;
> +     parserutils_error err;
> +
> +     if (str == NULL)
> +             str = &empty_string;
> +
> +     s = str->ptr;
> +     slen = str->len;
> +
> +     i = 0;
> +
> +     while (slen > 0) {
> +             err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
> +             if (err != PARSERUTILS_OK) {
> +                     return (uint32_t) -1;
> +             }

If this function is called a lot, you'll find that
parserutils_charset_utf8_char_byte_length() is faster, as it doesn't
perform the conversion of utf8 to ucs4 for every character (it simply
considers the first byte of the UTF-8 character and returns the implied
byte length. Obviously, you'll need to convert the indexth character to
UCS-4 to return it.

> Index: src/core/text.c
> ===================================================================
> --- src/core/text.c   (revision 8546)
> +++ src/core/text.c   (working copy)
> @@ -24,16 +27,31 @@
>       DOM_TEXT_VTABLE
>  };
>  
> -/* The destroy virtual function */
> -void _dom_text_destroy(struct dom_node_internal *node);
> -void _dom_text_destroy(struct dom_node_internal *node)
> -{
> -     struct dom_document *doc;
> -     dom_node_get_owner_document(node, &doc);
> +static struct dom_node_protect_vtable text_protect_vtable = {
> +     DOM_TEXT_PROTECT_VTABLE
> +};
>  
> -     dom_text_destroy(doc, (struct dom_text *) node);
> -}
> +/* Following comes helper functions */
> +typedef enum walk_operation {
> +     COLLECT,
> +     DELETE
> +} walk_operation;
> +typedef enum walk_order {
> +     LEFT,
> +     RIGHT
> +} walk_order;
>  
> +/* Walk the logic-adjacent text in document order */
> +static dom_exception walk_lat_order(dom_node_internal *node, walk_operation 
> opt,
> +             walk_order order, dom_string **ret, bool *cont);
> +/* Walk the logic-adjacent text */
> +static dom_exception walk_lat(dom_text *text, walk_operation opt, 
> +             dom_string **ret);

Can these two please be renamed to be more obvious -- "lat" is entirely
opaque to someone reading the code.

> +dom_exception walk_lat_order(dom_node_internal *node, walk_operation opt,
> +             walk_order order, dom_string **ret, bool *cont)

Needs documenting. 

It should also iterate across the subtree, not recurse. Recursion over
deep trees can result in stack overflows. Algorithms that recurse over
the tree should not appear anywhere in libdom.

> +dom_exception walk_lat(dom_text *text, walk_operation opt, 
> +             dom_string **ret)

As above.

> Index: src/core/nodelist.h
> ===================================================================
> --- src/core/nodelist.h       (revision 8546)
> +++ src/core/nodelist.h       (working copy)
> @@ -16,16 +16,23 @@
>  struct dom_node;
>  struct dom_nodelist;
>  struct dom_string;
> +struct lwc_string_s;
>  
> +typedef enum { 
> +     DOM_NODELIST_CHILDREN,
> +             DOM_NODELIST_BY_NAME,
> +             DOM_NODELIST_BY_NAMESPACE
> +} nodelist_type;

Lose spurious tabs.

> Index: src/core/characterdata.c
> ===================================================================
> --- src/core/characterdata.c  (revision 8546)
> +++ src/core/characterdata.c  (working copy)
> @@ -373,3 +381,36 @@
>       return DOM_NO_ERR;
>  }
>  
> +
> +/*                                                                   */
> +/*----------------------------------------------------------------------*/
> +/*                                                                   */
> +/* The protected virtual functions of Node 
> + *
> + * @note: the three following API never be called directly from the virtual
> + *     functions dispatch mechanism, they are here for the code consistent.
> + */
> +void _dom_characterdata_destroy(struct dom_node_internal *node)
> +{
> +     assert("Should never be here" != NULL);

This will always evaluate to true, which isn't what you want.
> +     UNUSED(node);
> +}
> +
> +dom_exception _dom_characterdata_alloc(struct dom_document *doc,
> +             struct dom_node_internal *n, struct dom_node_internal **ret)
> +{
> +     assert("Should never be here" != NULL);

Similarly.

> Index: src/core/element.c
> ===================================================================
> --- src/core/element.c        (revision 8546)
> +++ src/core/element.c        (working copy)
> +/* Note: The DOM implentation usally use uppercase letter for element names
> + *    and lowercase for attribute names.
> + */

This is completely untrue. Case sensitivity entirely depends upon the
source document language. Libdom can make no assumptions about it.

> +#define ID_ATTRIBUTE "id"

Is there a need for this? The language binding is probably the place
that needs to be able to extract ID attributes.

> +unsigned int hash_lwcstring(void *key);

Why isn't this static? Why can't you just call lwc_string_hash_value()?

> +static dom_exception hashtable_clone(struct hash_table *sh, 
> +             struct dom_document *sd, struct hash_table **th, 
> +             struct dom_document *td, dom_node_internal *parent);

Should this not be in hashtable.c?

> +void destroy_attributes(void *key, void *value, void *pw);
> +void destroy_nsattributes(void *key, void *value, void *pw);

Why aren't these static?

> +/* TODO: How to deal with default attribue:
> + *
> + *   The document object should hold a mapping for default attributes
> + *   for each element.

It's probably far simpler to just ask the language binding.

> Index: src/core/Makefile
> ===================================================================
> --- src/core/Makefile (revision 8546)
> +++ src/core/Makefile (working copy)
> @@ -1,8 +1,11 @@
>  # Sources
> -DIR_SOURCES := attr.c cdatasection.c characterdata.c comment.c \
> -     document.c document_type.c doc_fragment.c \
> -     element.c entity_ref.c implementation.c impllist.c \
> -     namednodemap.c node.c nodelist.c \
> -     pi.c string.c text.c
> +DIR_SOURCES := \
> +     string.c node.c \
> +     attr.c characterdata.c element.c \
> +     implementation.c impllist.c \
> +     text.c typeinfo.c comment.c \
> +     namednodemap.c nodelist.c \
> +     cdatasection.c document_type.c entity_ref.c pi.c \
> +     doc_fragment.c document.c 

Is there a reason why these are no longer in alphabetical order?
 
> Index: src/core/document_type.c
> ===================================================================
> --- src/core/document_type.c  (revision 8546)
> +++ src/core/document_type.c  (working copy)
> @@ -102,17 +101,95 @@
>       struct dom_document_type *doctype = 
>                       (struct dom_document_type *)doctypenode;
>  
> -     /* Finish with public and system IDs */
> -     dom_string_unref(doctype->system_id);
> -     dom_string_unref(doctype->public_id);
> -
>       /* Finalise base class */
> -     dom_node_finalise(doctype->base.owner, &doctype->base);
> +     dom_document_type_finalise(doctype);
>  
>       /* Free doctype */
> -     doctype->alloc(doctype, 0, doctype->pw);
> +     doctype->res.alloc(doctype, 0, doctype->res.pw);
>  }
>  
> +dom_exception dom_document_type_intiliase(struct dom_document_type *doctype,

"initialise".

> Index: src/core/node.c
> ===================================================================
> --- src/core/node.c   (revision 8546)
> +++ src/core/node.c   (working copy)
>       if (!null_owner_permitted) {
> -             /* Release the reference we claimed on the document. If this 
> -              * is the last reference held on the document and the list 
> -              * of nodes pending deletion is empty, then the document will 
> -              * be destroyed. */
> +             /* Claim a reference upon the owning document during 
> +              * destruction to ensure that the document doesn't get 
> +              * destroyed before its contents. */
>               dom_node_unref(owner);

The code doesn't match the commentary here. Which is correct?

> Index: src/bootstrap/init_fini.c
> ===================================================================
> --- src/bootstrap/init_fini.c (revision 8546)
> +++ src/bootstrap/init_fini.c (working copy)
> @@ -8,9 +8,11 @@
>  #include <stdbool.h>
>  
>  #include <dom/bootstrap/init_fini.h>
> +#include <dom/bootstrap/implregistry.h>
>  
>  #include "core/document.h"
>  #include "utils/namespace.h"
> +#include "bootstrap/implementation.h"
>  
>  static bool __initialised;
>  
> @@ -32,7 +34,7 @@
>               return DOM_NO_ERR;
>       }
>  
> -     err = _dom_document_initialise(alloc, pw);
> +     err = dom_document_module_initialise(alloc, pw);
>       if (err != DOM_NO_ERR) {
>               return err;
>       }
> @@ -42,6 +44,17 @@
>               return err;
>       }
>  
> +     err = dom_impleregistry_dom_implementation_initialise(

Lose the spurious 'e'.

> +                     
> Index: src/bootstrap/implregistry.c
> ===================================================================
> --- src/bootstrap/implregistry.c      (revision 8546)
> +++ src/bootstrap/implregistry.c      (working copy)
> @@ -11,6 +11,7 @@
>  #include <dom/bootstrap/implregistry.h>
>  
>  #include <dom/core/impllist.h>
> +#include <dom/core/implementation.h>
>  
>  /**
>   * Item in list of registered DOM implementation sources
> @@ -23,14 +24,23 @@
>  };
>  
>  static struct dom_impl_src_item *sources; /**< List of registered sources */
> +static dom_alloc alloc;
> +static void *pw;
>  
> +dom_exception dom_impleregistry_dom_implementation_initialise(

Lose spurious 'e'.


J.

Re: Review: Bo Yang -- libdom Core

Reply via email to