Hi Alexey, More input:
== Circular dependencies are allowed == This query will cause an assert in the optimizer, for obvious reasons: select * from json_table(JS3.size, '$' columns (size INT PATH '$.size')) as JS1, json_table(JS1.size, '$' columns (size INT PATH '$.size')) as JS2, json_table(JS1.size, '$' columns (size INT PATH '$.size')) as JS3 where 1; == Character set inference is wrong == create table t20 (json varchar(100) character set utf8); insert into t20 values ('{"value":"АБВ"}'); create table tj20 as select T.value from t20, json_table(t20.json, '$' columns (value varchar(32) PATH '$.value')) T; mysql> show create table tj20\G *************************** 1. row *************************** Table: tj20 Create Table: CREATE TABLE `tj20` ( `value` varchar(32) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1 The source document uses UTF-8 but the values from it are in latin1? == Comments and code cleanup == There are a lot of comments missing, some duplicate code, and code using out-of-date ways of doing things. Please find attached a patch with obvious fixes for most of it. == Explain == Let's follow MySQL here: let the tabular EXPLAIN show "Table function: json_table" in the Extra column, let EXPLAIN FORMAT=JSON show "table_function": "json_table" in the "table" element. BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
diff -u ../10.5-json-cp/sql_class.h sql/sql_class.h --- ../10.5-json-cp/sql_class.h 2020-08-02 16:09:22.113625982 +0300 +++ sql/sql_class.h 2020-08-03 13:23:56.910397573 +0300 @@ -7436,5 +7436,9 @@ extern THD_list server_threads; +void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps); +void +setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count); + #endif /* MYSQL_SERVER */ #endif /* SQL_CLASS_INCLUDED */ diff -u ../10.5-json-cp/sql_select.h sql/sql_select.h --- ../10.5-json-cp/sql_select.h 2020-08-02 16:09:22.121626229 +0300 +++ sql/sql_select.h 2020-08-03 13:23:58.622294391 +0300 @@ -2454,7 +2454,6 @@ TMP_ENGINE_COLUMNDEF **recinfo, ulonglong options); bool open_tmp_table(TABLE *table); -void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps); double prev_record_reads(const POSITION *positions, uint idx, table_map found_ref); void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); double get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size); diff -u ../10.5-json-cp/table_function.cc sql/table_function.cc --- ../10.5-json-cp/table_function.cc 2020-08-02 16:09:22.145626969 +0300 +++ sql/table_function.cc 2020-08-03 14:55:49.167668973 +0300 @@ -40,6 +40,10 @@ static table_function_handlerton table_function_hton; +/* + A table that produces output rows for JSON_TABLE(). +*/ + class ha_json_table: public handler { protected: @@ -133,6 +137,14 @@ }; +/* + @brief + Start scanning the JSON document in [str ... end] + + @detail + Note: non-root nested paths are set to scan one JSON node (that is, a + "subdocument") +*/ void Json_table_nested_path::scan_start(CHARSET_INFO *i_cs, const uchar *str, const uchar *end) { @@ -144,6 +156,11 @@ } +/* + @brief + Find the next JSON element that matches the search path. +*/ + int Json_table_nested_path::scan_next() { if (m_cur_nested) @@ -279,6 +296,10 @@ return HA_ERR_END_OF_FILE; m_jt->m_nested_path.get_current_position((uchar *) m_js->ptr(), m_cur_pos); + + /* + Move the primary nested path object to point to the data for the next row. + */ if (m_jt->m_nested_path.scan_next()) { if (m_jt->m_nested_path.m_engine.s.error) @@ -295,7 +316,10 @@ } return HA_ERR_END_OF_FILE; } - + + /* + Get the values for each field of the table + */ List_iterator_fast<Json_table_column> jc_i(m_jt->m_columns); my_ptrdiff_t ptrdiff= buf - table->record[0]; while ((jc= jc_i++)) @@ -305,72 +329,66 @@ if (ptrdiff) (*f)->move_field_offset(ptrdiff); - switch (jc->m_column_type) - { - case Json_table_column::FOR_ORDINALITY: - if (jc->m_nest->m_null) - (*f)->set_null(); - else - { - (*f)->set_notnull(); - (*f)->store(jc->m_nest->m_ordinality_counter, TRUE); - } - break; - case Json_table_column::PATH: - case Json_table_column::EXISTS_PATH: + + if (jc->m_nest->m_null) + (*f)->set_null(); + else { - json_engine_t je; - json_engine_t &nest_je= jc->m_nest->m_engine; - json_path_step_t *cur_step; - uint array_counters[JSON_DEPTH_LIMIT]; - int not_found; + (*f)->set_notnull(); - if (jc->m_nest->m_null) + switch (jc->m_column_type) { - (*f)->set_null(); + case Json_table_column::FOR_ORDINALITY: + (*f)->store(jc->m_nest->m_ordinality_counter, TRUE); break; - } - json_scan_start(&je, nest_je.s.cs, - nest_je.value_begin, nest_je.s.str_end); - - cur_step= jc->m_path.steps; - not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) || - json_read_value(&je); - - if (jc->m_column_type == Json_table_column::EXISTS_PATH) + case Json_table_column::PATH: + case Json_table_column::EXISTS_PATH: { - (*f)->set_notnull(); - (*f)->store(!not_found); - } - else /*PATH*/ - { - if (not_found) - jc->m_on_empty.respond(jc, *f); - else + json_engine_t je; + json_engine_t &nest_je= jc->m_nest->m_engine; + json_path_step_t *cur_step; + uint array_counters[JSON_DEPTH_LIMIT]; + int not_found; + + json_scan_start(&je, nest_je.s.cs, + nest_je.value_begin, nest_je.s.str_end); + + cur_step= jc->m_path.steps; + not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) || + json_read_value(&je); + + if (jc->m_column_type == Json_table_column::EXISTS_PATH) { - (*f)->set_notnull(); - if (!json_value_scalar(&je) || - (*f)->store((const char *) je.value, - (uint32) je.value_len, je.s.cs)) - jc->m_on_error.respond(jc, *f); + (*f)->store(!not_found); + } + else /*PATH*/ + { + if (not_found) + jc->m_on_empty.respond(jc, *f); else { - /* - If the path contains wildcards, check if there are - more matches for it in json and report an error if so. - */ - if (jc->m_path.types_used & - (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD) && - (json_scan_next(&je) || - !json_find_path(&je, &jc->m_path, &cur_step, array_counters))) + if (!json_value_scalar(&je) || + (*f)->store((const char *) je.value, + (uint32) je.value_len, je.s.cs)) jc->m_on_error.respond(jc, *f); + else + { + /* + If the path contains wildcards, check if there are + more matches for it in json and report an error if so. + */ + if (jc->m_path.types_used & + (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD) && + (json_scan_next(&je) || + !json_find_path(&je, &jc->m_path, &cur_step, array_counters))) + jc->m_on_error.respond(jc, *f); + } } - } + break; } - break; + }; } - }; if (ptrdiff) (*f)->move_field_offset(-ptrdiff); cont_loop: @@ -407,35 +425,6 @@ } -static void -setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count) -{ - uint bitmap_size= bitmap_buffer_size(field_count); - - DBUG_ASSERT(table->s->virtual_fields == 0); - - my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count, - FALSE); - bitmaps+= bitmap_size; - my_bitmap_init(&table->tmp_set, - (my_bitmap_map*) bitmaps, field_count, FALSE); - bitmaps+= bitmap_size; - my_bitmap_init(&table->eq_join_set, - (my_bitmap_map*) bitmaps, field_count, FALSE); - bitmaps+= bitmap_size; - my_bitmap_init(&table->cond_set, - (my_bitmap_map*) bitmaps, field_count, FALSE); - bitmaps+= bitmap_size; - my_bitmap_init(&table->has_value_set, - (my_bitmap_map*) bitmaps, field_count, FALSE); - /* write_set and all_set are copies of read_set */ - table->def_write_set= table->def_read_set; - table->s->all_set= table->def_read_set; - bitmap_set_all(&table->s->all_set); - table->default_column_bitmaps(); -} - - void Create_json_table::add_field(TABLE *table, Field *field, uint fieldnr, bool force_not_null_cols) { @@ -721,6 +710,12 @@ } +/* + @brief + Read the JSON_TABLE's field definitions from @jt and add the fields to + table @table. +*/ + bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table, Table_function_json_table *jt) { @@ -794,6 +789,22 @@ } +/* + @brief + Given a TABLE_LIST representing JSON_TABLE(...) syntax, create a temporary + table for it. + + @detail + The temporary table will have: + - fields whose names/datatypes are specified in JSON_TABLE(...) syntax + - a ha_json_table as the storage engine. + + The uses of the temporary table are: + - name resolution: the query may have references to the columns of + JSON_TABLE(...). A TABLE object will allow to resolve them. + - query execution: ha_json_table will produce JSON_TABLE's rows. +*/ + TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table) { TMP_TABLE_PARAM tp; @@ -878,10 +889,7 @@ */ int Json_table_column::print(THD *thd, Field **f, String *str) { - char column_type_buff[MAX_FIELD_WIDTH]; - String column_type(column_type_buff, sizeof(column_type_buff), - str->charset()); - + StringBuffer<MAX_FIELD_WIDTH> column_type(str->charset()); if (append_identifier(thd, str, &m_field->field_name) || str->append(' ')) return 1; @@ -923,14 +931,20 @@ /* This is done so the ::print function can just print the path string. - Can be removed if we redo that function to print the path using it's - anctual content. Not sure though if we should. + Can be removed if we redo that function to print the path using its + actual content. Not sure though if we should. */ m_path.s.c_str= (const uchar *) path.str; return 0; } +/* + @brief + Perform the action of this response on field @f (emit an error, or set @f + to NULL, or set it to default value). +*/ + void Json_table_column::On_response::respond(Json_table_column *jc, Field *f) { switch (m_response) @@ -1003,6 +1017,21 @@ } +/* + @brief + Perform name-resolution phase tasks + + @detail + - The only argument that needs resolution is the JSON text + - Then, we need to set dependencies: if JSON_TABLE refers to table's + column, e.g. + + JSON_TABLE (t1.col ... ) AS t2 + + then it can be computed only after table t1. + - The dependencies must not form a loop. +*/ + int Table_function_json_table::setup(THD *thd, TABLE_LIST *sql_table) { thd->where= "JSON_TABLE argument"; diff -u ../10.5-json-cp/table_function.h sql/table_function.h --- ../10.5-json-cp/table_function.h 2020-08-02 16:09:22.121626229 +0300 +++ sql/table_function.h 2020-08-03 14:59:42.643398344 +0300 @@ -19,11 +19,13 @@ #include <json_lib.h> +class Json_table_column; + /* The Json_table_nested_path represents the 'current nesting' level for a set of JSON_TABLE columns. Each column (Json_table_column instance) is linked with corresponding - 'nested path' object and gets it's piece of JSON to parse during the computation + 'nested path' object and gets its piece of JSON to parse during the computation phase. The root 'nested_path' is always present as a part of Table_function_json_table, then other 'nested_paths' can be created and linked into a tree structure when new @@ -46,13 +48,11 @@ m_nest &root &nested_b &nested_c &nested_n */ - -class Json_table_column; - class Json_table_nested_path : public Sql_alloc { public: - bool m_null; + bool m_null; // TRUE <=> produce SQL NULL. + json_path_t m_path; json_engine_t m_engine; json_path_t m_cur_path;
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp