Hi, Alexander! On Dec 28, Alexander Barkov wrote: > revision-id: 734aee3cd3c (mariadb-10.5.13-33-g734aee3cd3c) > parent(s): 2776635cb98 > author: Alexander Barkov > committer: Alexander Barkov > timestamp: 2021-12-27 17:54:31 +0400 > message: > > MDEV-27018 IF and COALESCE lose "json" property > > Hybrid functions (IF, COALESCE, etc) did not preserve the JSON property > from their arguments. The same problem was repeatable for single row > subselects. > > The problem happened because the method Item::is_json_type() was > inconsistently > implemented across the Item hierarchy. For example, Item_hybrid_func > and Item_singlerow_subselect did not override is_json_type().
Okay. The approach is fine, but see some questions below: > diff --git a/sql/field.cc b/sql/field.cc > index 2c768527ced..8fa3bbd538c 100644 > --- a/sql/field.cc > +++ b/sql/field.cc > @@ -7277,6 +7277,19 @@ bool Field_longstr::send(Protocol *protocol) > } > > > +const Type_handler *Field_string::type_handler() const > +{ > + if (is_var_string()) > + return &type_handler_var_string; > + /* > + This is a temporary solution and will be fixed soon (in 10.9?). > + Type_handler_string_json will provide its own Field_string_json. why? > + */ > + if (Type_handler_json_common::has_json_valid_constraint(this)) could you please use names that don't depend on how exactly we detect json? especially if the goal is to move to FORMAT JSON syntax > + return &type_handler_string_json; > + return &type_handler_string; > +} > + > /* Copy a string and fill with space */ > > int Field_string::store(const char *from, size_t length,CHARSET_INFO *cs) > diff --git a/sql/sql_type.cc b/sql/sql_type.cc > index c1801c1ae3e..3b2753d80a6 100644 > --- a/sql/sql_type.cc > +++ b/sql/sql_type.cc > @@ -1755,19 +1755,110 @@ const Type_handler > *Type_handler_typelib::cast_to_int_type_handler() const > > /***************************************************************************/ > > +class Recursive_type_pair_iterator > +{ > + const Type_handler *m_a; > + const Type_handler *m_b; > + uint m_switched_to_base_count; > +public: > + Recursive_type_pair_iterator(const Type_handler *a, > + const Type_handler *b, > + uint switched_to_base_count= 0) > + :m_a(a), m_b(b), m_switched_to_base_count(switched_to_base_count) > + { } > + const Type_handler *a() const { return m_a; } > + const Type_handler *b() const { return m_b; } > + Recursive_type_pair_iterator base() const > + { > + Recursive_type_pair_iterator res(m_a->type_handler_base(), > + m_b->type_handler_base()); > + res.m_switched_to_base_count= (res.m_a != NULL) + (res.m_b != NULL); > + if (res.m_a == NULL) > + res.m_a= m_a; > + if (res.m_b == NULL) > + res.m_b= m_b; > + return res; > + } > + bool done() const > + { > + switch (m_switched_to_base_count) > + { > + case 2: > + /* > + Expression: > + COALESCE(TYPE1, TYPE2) > + > + where both type handlers derive from some other handlers, e.g. > + VARCHAR -> TYPE1 > + TEXT -> TYPE2 > + > + Continue aggregation as: > + SELECT COALESCE(VARCHAR, TEXT) > + > + Note, we now have only one level of data type inheritance, > + e.g. VARCHAR -> VARCHAR/JSON > + > + This code branch will change when we have longer inheritance chains: > + A0 -> A1 -> A2 -> A3 > + B0 -> B1 -> B2 > + > + Functions like COALESE(A2, B2) will need to do full overload > resolution: > + - iterate through all pairs from the below matrix > + - find all existing candidates > + - resolve ambiguities in case multiple candidates, etc. and I don't see how you're going to do that. you need to try m_a+m_b.base(), m_a.base()+m_b, and m_a.base()+m_b.base(). And that's not what you're doing. I suggest to add an assert that m_switched_to_base_count < 2 and stop trying to solve complex cases until we have at least one. > + > + A0 A1 A2 A3 > + B0 . . . . > + B1 . . . . > + B2 . . . . > + */ > + return false; > + > + case 1: > + /* > + Expression: > + COALESCE(TYPE1, TEXT) > + > + If only one handler derives from some other handler: > + VARCHAR-> TYPE1 > + > + Continue aggregation as: > + SELECT COALESCE(VARCHAR, TEXT) > + */ > + return false; > + > + case 0: > + default: > + /* > + Non of the two handlers are derived from other handlers. > + Nothing left to try. > + */ > + return true; > + } > + } > +}; > diff --git a/sql/sql_type_json.h b/sql/sql_type_json.h > index 6c4ee8cb2eb..4a394809a06 100644 > --- a/sql/sql_type_json.h > +++ b/sql/sql_type_json.h > @@ -21,18 +21,145 @@ ... > + bool Item_append_extended_type_info(Send_field_extended_metadata *to, > + const Item *item) const > + { > + return set_format_name(to); // Send "format=json" in the protocol > + } > + > + bool Item_hybrid_func_fix_attributes(THD *thd, > + const char *name, > + Type_handler_hybrid_field_type > *hybrid, > + Type_all_attributes *attr, > + Item **items, uint nitems) > + const override > + { > + if (BASE::Item_hybrid_func_fix_attributes(thd, name, hybrid, attr, > + items, nitems)) > + return true; > + /* > + The above call can change the type handler on "hybrid", e.g. > + choose a proper BLOB type handler according to the calculated > max_length. > + Convert general purpose string type handler to its JSON counterpart. > + This makes hybrid functions preserve JSON data types, e.g.: > + COALESCE(json_expr1, json_expr2) -> JSON > + */ > + > hybrid->set_handler(json_type_handler_from_generic(hybrid->type_handler())); I don't get it. It's supposed to "fix attributes", like signed/unsigned, max length, etc. The correct type handler should've been set already, otherwise this type handler method wouldn't even be called. > + return false; > + } > }; > > + > +class Type_handler_string_json: > + public Type_handler_general_purpose_string_to_json<Type_handler_string, > + type_handler_string> > +{ }; > diff --git a/sql/sql_type_json.cc b/sql/sql_type_json.cc > index a804366ec03..a84f720bcc9 100644 > --- a/sql/sql_type_json.cc > +++ b/sql/sql_type_json.cc > @@ -20,17 +20,111 @@ > #include "sql_class.h" > > > -Type_handler_json_longtext type_handler_json_longtext; > +Named_type_handler<Type_handler_string_json> > + type_handler_string_json("char/json"); do you mean these names are not shown anywhere? > +Named_type_handler<Type_handler_varchar_json> > + type_handler_varchar_json("varchar/json"); > + > +Named_type_handler<Type_handler_tiny_blob_json> > + type_handler_tiny_blob_json("tinyblob/json"); > + > +Named_type_handler<Type_handler_blob_json> > + type_handler_blob_json("blob/json"); > + > +Named_type_handler<Type_handler_medium_blob_json> > + type_handler_medium_blob_json("mediumblob/json"); > + > +Named_type_handler<Type_handler_long_blob_json> > + type_handler_long_blob_json("longblob/json"); ... > +/* > + This method ressembles what Field_blob::type_handler() resembles (also below) > + does for general purpose BLOB type handlers. > +*/ > +const Type_handler * > +Type_handler_json_common::json_blob_type_handler_by_length_bytes(uint len) > +{ > + switch (len) { > + case 1: return &type_handler_tiny_blob_json; > + case 2: return &type_handler_blob_json; > + case 3: return &type_handler_medium_blob_json; > + } > + return &type_handler_long_blob_json; > +} Regards, Sergei VP of MariaDB Server Engineering and secur...@mariadb.org _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp