Hi
I took Sven's hstore patch and added a new functionality.
It is now possible to request an arbitrary number of extra hstore
columns, that contains all tags that start with the column name.
eg: osm2pgsql --hstore-column "name:" --hstore-column "wikipedia:"
will add two new columns "name:" and "wikipedia:". When a node or a
way contains a name:de=Deutschland tag, the "name:"-column will contain
an hstore entry 'de'=>'Deutschland'.
If no name:-Tag is present, the column is set to NULL. That way one can
easily look for translated objects by toing a "name:" IS NOT NULL query.
What kind of Index would you suggest for such a IS NOT NULL query? A
conditional index on osm_id WHERE "name:" IS NOT NULL?
I wrote the relevant code into a single function and also relocated the
hstore code into a seperate function and documented both. Can someone
with more C experience please check the patch because I'm not sure I did
all pointer logic correct.
I did successfully test it on the berlin geofabrik extract and will try
to run a planet import during the next week (need to talk about that on
Maps-l ;).
The attached patch is against r21892 of the osm repo.
Special thanks to Sven who did the real hard work.
Peter
Index: output.h
===================================================================
--- output.h (revision 21892)
+++ output.h (working copy)
@@ -29,6 +29,8 @@
const char *expire_tiles_filename; /* File name to output expired tiles
list to */
int enable_hstore; /* add an additional hstore column with objects key/value
pairs */
int enable_multi; /* Output multi-geometries intead of several simple
geometries */
+ char** hstore_columns; /* list of columns that should be written into their
own hstore column */
+ int n_hstore_columns; /* number of hstore columns */
};
struct output_t {
Index: osm2pgsql.c
===================================================================
--- osm2pgsql.c (revision 21892)
+++ osm2pgsql.c (working copy)
@@ -552,6 +552,9 @@
fprintf(stderr, " \t\tThis includes the username, userid,
timestamp and version.\n");
fprintf(stderr, " \t\tNote: this option also requires
additional entries in your style file.\n");
fprintf(stderr, " -k|--hstore\t\tGenerate an additional hstore
(key/value) column to postgresql tables\n");
+ fprintf(stderr, " -z|--hstore-column\tGenerate an additional hstore
(key/value) column to containing all tags\n");
+ fprintf(stderr, " \tthat start with the specified
string, eg --hstore-column \"name:\" will\n");
+ fprintf(stderr, " \tproduce an extra hstore column
that contains all name:xx tags\n");
fprintf(stderr, " -G|--multi-geometry\t\tGenerate multi-geometry
features in postgresql tables.\n");
fprintf(stderr, " -h|--help\t\tHelp information.\n");
fprintf(stderr, " -v|--verbose\t\tVerbose output.\n");
@@ -660,6 +663,8 @@
const char *style = OSM2PGSQL_DATADIR "/default.style";
const char *temparg;
const char *output_backend = "pgsql";
+ const char **hstore_columns = NULL;
+ int n_hstore_columns = 0;
int cache = 800;
struct output_options options;
PGconn *sql_conn;
@@ -693,12 +698,13 @@
{"expire-output", 1, 0, 'o'},
{"output", 1, 0, 'O'},
{"extra-attributes", 0, 0, 'x'},
- {"hstore", 0, 0, 'k'},
+ {"hstore", 0, 0, 'k'},
+ {"hstore-column", 1, 0, 'z'},
{"multi-geometry", 0, 0, 'G'},
{0, 0, 0, 0}
};
- c = getopt_long (argc, argv,
"ab:cd:hlmMp:suvU:WH:P:i:E:C:S:e:o:O:xkG", long_options, &option_index);
+ c = getopt_long (argc, argv,
"ab:cd:hlmMp:suvU:WH:P:i:E:C:S:e:o:O:xkG:z:", long_options, &option_index);
if (c == -1)
break;
@@ -732,6 +738,11 @@
case 'O': output_backend = optarg; break;
case 'x': extra_attributes=1; break;
case 'k': enable_hstore=1; break;
+ case 'z':
+ n_hstore_columns++;
+ hstore_columns = (const char**)realloc(hstore_columns,
sizeof(&n_hstore_columns) * n_hstore_columns);
+ hstore_columns[n_hstore_columns-1] = optarg;
+ break;
case 'G': enable_multi=1; break;
case 'h': long_usage_bool=1; break;
case '?':
@@ -804,6 +815,8 @@
options.expire_tiles_filename = expire_tiles_filename;
options.enable_multi = enable_multi;
options.enable_hstore = enable_hstore;
+ options.hstore_columns = hstore_columns;
+ options.n_hstore_columns = n_hstore_columns;
if (strcmp("pgsql", output_backend) == 0) {
out = &out_pgsql;
@@ -848,6 +861,9 @@
free(nds);
free(members);
+
+ // free the column pointer buffer
+ free(hstore_columns);
project_exit();
text_exit();
Index: keyvals.c
===================================================================
--- keyvals.c (revision 21892)
+++ keyvals.c (working copy)
@@ -287,25 +287,30 @@
*/
void keyval2hstore(char *hstring, struct keyval *tags)
{
+ keyval2hstore_manual(hstring, tags->key, tags->value);
+}
+
+void keyval2hstore_manual(char *hstring, char *key, char *value)
+{
static char* str=NULL;
static size_t stlen=0;
size_t len;
- len=strlen(tags->value);
+ len=strlen(value);
if (len>stlen) {
stlen=len;
str=realloc(str,1+stlen*2);
}
- len=strlen(tags->key);
+ len=strlen(key);
if (len>stlen) {
stlen=len;
str=realloc(str,1+stlen*2);
}
- escape4hstore(str,tags->key);
+ escape4hstore(str,key);
hstring+=sprintf(hstring,"\"%s\"=>",str);
- escape4hstore(str,tags->value);
+ escape4hstore(str,value);
sprintf(hstring,"\"%s\"",str);
}
Index: keyvals.h
===================================================================
--- keyvals.h (revision 21892)
+++ keyvals.h (working copy)
@@ -30,4 +30,5 @@
void updateItem(struct keyval *head, const char *name, const char *value);
void cloneList( struct keyval *target, struct keyval *source );
void keyval2hstore(char *hstring, struct keyval *tags);
+void keyval2hstore_manual(char *hstring, char *key, char *value);
#endif
Index: output-pgsql.c
===================================================================
--- output-pgsql.c (revision 21892)
+++ output-pgsql.c (working copy)
@@ -436,7 +436,127 @@
}
}
+static void write_hstore(enum table_id table, struct keyval *tags)
+{
+ // sql buffer
+ static size_t sqllen = 2048;
+ static char *sql;
+ sql = malloc(sqllen);
+
+ // a clone of the tags pointer
+ struct keyval *xtags = tags;
+
+ // while this tags has a follow-up..
+ while (xtags->next->key != NULL)
+ {
+ /*
+ hstore ASCII representation looks like
+ "<key>"=>"<value>"
+
+ we need at least strlen(key)+strlen(value)+6+'\0' bytes
+ in theory any single character could also be escaped
+ thus we need an additional factor of 2.
+ The maximum lenght of a single hstore element is thus
+ calcuated as follows:
+ */
+ size_t hlen=2 * (strlen(xtags->next->key) +
strlen(xtags->next->value)) + 7;
+
+ // if the sql buffer is too small
+ if (hlen > sqllen) {
+ sqllen = hlen;
+ sql = realloc(sql, sqllen);
+ }
+
+ // pack the tag with its value into the hstore
+ keyval2hstore(sql, xtags->next);
+ copy_to_table(table, sql);
+
+ // update the tag-pointer to point to the next tag
+ xtags = xtags->next;
+
+ // if the tag has a follow up, add a comma to the end
+ if (xtags->next->key != NULL)
+ copy_to_table(table, ",");
+ }
+
+ // finish the hstore column by placing a TAB into the data stream
+ copy_to_table(table, "\t");
+}
+// write an hstore column to the database
+static void write_hstore_columns(enum table_id table, struct keyval *tags)
+{
+ // sql buffer
+ static size_t sqllen = 2048;
+ static char *sql;
+ sql = malloc(sqllen);
+
+ // the index of the current hstore column
+ int i_hstore_column;
+
+ // iterate over all configured hstore colums in the options
+ for(i_hstore_column = 0; i_hstore_column < Options->n_hstore_columns;
i_hstore_column++)
+ {
+ // did this node have a tag that matched the current hstore column
+ int found = 0;
+
+ // a clone of the tags pointer
+ struct keyval *xtags = tags;
+
+ // while this tags has a follow-up..
+ while (xtags->next->key != NULL) {
+
+ // check if the tags' key starts with the name of the hstore column
+ char *pos = strstr(xtags->next->key,
Options->hstore_columns[i_hstore_column]);
+
+ // and if it does..
+ if(pos == xtags->next->key)
+ {
+ // remember we found one
+ found=1;
+
+ // generate the short key name
+ char *shortkey = xtags->next->key +
strlen(Options->hstore_columns[i_hstore_column]);
+
+ // calculate the size needed for this hstore entry
+ size_t hlen=2*(strlen(shortkey)+strlen(xtags->next->value))+7;
+
+ // if the sql buffer is too small
+ if (hlen > sqllen) {
+ // resize it
+ sqllen=hlen;
+ sql=realloc(sql,sqllen);
+ }
+
+ // and pack the shortkey with its value into the hstore
+ keyval2hstore_manual(sql, shortkey, xtags->next->value);
+ copy_to_table(table, sql);
+
+ // update the tag-pointer to point to the next tag
+ xtags=xtags->next;
+
+ // if the tag has a follow up, add a comma to the end
+ if (xtags->next->key != NULL)
+ copy_to_table(table, ",");
+ }
+ else
+ {
+ // update the tag-pointer to point to the next tag
+ xtags=xtags->next;
+ }
+ }
+
+ // if no matching tag has been found, write a NULL
+ if(!found)
+ copy_to_table(table, "\\N");
+
+ // finish the hstore column by placing a TAB into the data stream
+ copy_to_table(table, "\t");
+ }
+ // all hstore-columns have now been written
+}
+
+
/* example from: pg_dump -F p -t planet_osm gis
COPY planet_osm (osm_id, name, place, landuse, leisure, "natural", man_made,
waterway, highway, railway, amenity, tourism, learning, building, bridge,
layer, way) FROM stdin;
17959841 \N \N \N \N \N \N \N
bus_stop \N \N \N \N \N \N -\N
0101000020E610000030CCA462B6C3D4BF92998C9B38E04940
@@ -484,36 +604,13 @@
copy_to_table(t_point, "\t");
}
- if (Options->enable_hstore) {
- while (tags->next->key != NULL) {
- size_t hlen;
- /*
- hstore ASCII representation looks like
- "<key>"=>"<value>"
-
- we need at least strlen(key)+strlen(value)+6+'\0' bytes
- in theory any single character could also be escaped
- thus we need an additional factor of 2.
- The maximum lenght of a single hstore element is thus
- calcuated as follows:
- */
- hlen=2*(strlen(tags->next->key)+strlen(tags->next->value))+7;
- if (hlen > sqllen) {
- sqllen=hlen;
- sql=realloc(sql,sqllen);
- }
- keyval2hstore(sql,tags->next);
- tags=tags->next;
- if (tags->next->key != NULL) {
- copy_to_table(t_point, sql);
- copy_to_table(t_point, ",");
- } else {
- copy_to_table(t_point, sql);
- }
- }
- copy_to_table(t_point, "\t");
- }
-
+ // hstore columns
+ write_hstore_columns(t_point, tags);
+
+ // check if a regular hstore is requested
+ if (Options->enable_hstore)
+ write_hstore(t_point, tags);
+
sprintf(sql, "SRID=%d;POINT(%.15g %.15g)", SRID, node_lon, node_lat);
copy_to_table(t_point, sql);
copy_to_table(t_point, "\n");
@@ -555,37 +652,14 @@
copy_to_table(table, sql);
copy_to_table(table, "\t");
}
-
- if (Options->enable_hstore) {
- while (tags->next->key != NULL) {
- size_t hlen;
- /*
- hstore ASCII representation looks like
- "<key>"=>"<value>"
-
- we need at least strlen(key)+strlen(value)+6+'\0' bytes
- in theory any single character could also be escaped
- thus we need an additional factor of 2.
- The maximum lenght of a single hstore element is thus
- calcuated as follows:
- */
- hlen=2*(strlen(tags->next->key)+strlen(tags->next->value))+7;
- if (hlen > sqllen) {
- sqllen=hlen;
- //sql=realloc(sql,sqllen);
- }
- keyval2hstore(sql,tags->next);
- tags=tags->next;
- if (tags->next->key != NULL) {
- copy_to_table(table, sql);
- copy_to_table(table, ",");
- } else {
- copy_to_table(table, sql);
- }
- }
- copy_to_table(table, "\t");
- }
-
+
+ // hstore columns
+ write_hstore_columns(table, tags);
+
+ // check if a regular hstore is requested
+ if (Options->enable_hstore)
+ write_hstore(table, tags);
+
sprintf(sql, "SRID=%d;", SRID);
copy_to_table(table, sql);
copy_to_table(table, wkt);
@@ -1104,6 +1178,13 @@
}
strcat(sql, tmp);
}
+ int i_hstore_column;
+ for(i_hstore_column = 0; i_hstore_column <
Options->n_hstore_columns; i_hstore_column++)
+ {
+ strcat(sql, ",\"");
+ strcat(sql, Options->hstore_columns[i_hstore_column]);
+ strcat(sql, "\" hstore ");
+ }
if (Options->enable_hstore) {
strcat(sql, ",tags hstore );\n");
} else {
@@ -1169,6 +1250,14 @@
strcat(sql, tmp);
}
+ int i_hstore_column;
+ for(i_hstore_column = 0; i_hstore_column < Options->n_hstore_columns;
i_hstore_column++)
+ {
+ strcat(sql, ",\"");
+ strcat(sql, Options->hstore_columns[i_hstore_column]);
+ strcat(sql, "\" ");
+ }
+
if (Options->enable_hstore) strcat(sql,",tags");
tables[i].columns = strdup(sql);
_______________________________________________
dev mailing list
[email protected]
http://lists.openstreetmap.org/listinfo/dev