The branch, master has been updated via 71cef2fa1dd docs: document new Spotlight Elasticsearch options via 8e3372eceab mdssvc: add options to allow ignoring attribute and type mapping errors via c6743237891 mdssvc: prepare for ignore attribute and type mapping errors via 232146775bb selftest: add a test ignored spotlight/elasticsearch mapping failures from 8ab0238abd1 .gitlab-ci: Avoid duplicate CI on all merge requests
https://git.samba.org/?p=samba.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 71cef2fa1ddbe05d29e7ce571a35888ef4663b22 Author: Ralph Boehme <s...@samba.org> Date: Wed Oct 13 19:16:10 2021 +0200 docs: document new Spotlight Elasticsearch options elasticsearch:ignore unknown attribute = yes | no (default: no) elasticsearch:ignore unknown type = yes | no (default: no) Signed-off-by: Ralph Boehme <s...@samba.org> Reviewed-by: Noel Power <npo...@samba.org> Autobuild-User(master): Noel Power <npo...@samba.org> Autobuild-Date(master): Thu Oct 14 10:20:27 UTC 2021 on sn-devel-184 commit 8e3372eceab1bc7ad8ac813b12d654c979e20769 Author: Ralph Boehme <s...@samba.org> Date: Sat Oct 9 18:51:14 2021 +0200 mdssvc: add options to allow ignoring attribute and type mapping errors This adds two options that are used by the Spotlight query parser to optionally ignore unknown attributes or types in a query. elasticsearch:ignore unknown attribute = yes | no (default: no) elasticsearch:ignore unknown type = yes | no (default: no) Example Spotlight query with unknown attributes and type: kMDItemContentType=="public.calendar-event"||kMDItemSubject=="Kalender*"cdw|| kMDItemTitle=="Kalender*"cdw||kMDItemTopic=="Kalender*"cdw|| kMDItemTextContent=="Kalender*"cd||*=="Kalender*"cdw|| kMDItemTextContent=="Kalender*"cdw The unknown attributes are "kMDItemTopic" and "kMDItemSubject". The unkown type is "public.calendar-event". Currently the parser will outright fail to parse the query and the search will enter an error state. To give users some control over the mapping the above options can be used to tell the parser to simply ignore such unknown attributes and types. (meta.title:Kalender* OR content:Kalender* OR Kalender* OR content:Kalender*) Signed-off-by: Ralph Boehme <s...@samba.org> Reviewed-by: Noel Power <npo...@samba.org> commit c67432378910691456f1deec3d5a8a73a6080887 Author: Ralph Boehme <s...@samba.org> Date: Sat Oct 9 18:50:02 2021 +0200 mdssvc: prepare for ignore attribute and type mapping errors Lower the debug levels to debug from error. No change in behaviour. Signed-off-by: Ralph Boehme <s...@samba.org> Reviewed-by: Noel Power <npo...@samba.org> commit 232146775bb00769a3c208441ad0fa28bfe7f42f Author: Ralph Boehme <s...@samba.org> Date: Sat Oct 9 16:44:25 2021 +0200 selftest: add a test ignored spotlight/elasticsearch mapping failures Signed-off-by: Ralph Boehme <s...@samba.org> Reviewed-by: Noel Power <npo...@samba.org> ----------------------------------------------------------------------- Summary of changes: .../misc/elasticsearchignoreunknownattribute.xml | 19 ++++ .../misc/elasticsearchignoreunknowntype.xml | 19 ++++ selftest/tests.py | 6 ++ source3/rpc_server/mdssvc/es_mapping.c | 2 +- source3/rpc_server/mdssvc/es_parser.y | 102 ++++++++++++++++----- source3/rpc_server/mdssvc/test_mdsparser_es.c | 54 +++++++++++ 6 files changed, 180 insertions(+), 22 deletions(-) create mode 100644 docs-xml/smbdotconf/misc/elasticsearchignoreunknownattribute.xml create mode 100644 docs-xml/smbdotconf/misc/elasticsearchignoreunknowntype.xml Changeset truncated at 500 lines: diff --git a/docs-xml/smbdotconf/misc/elasticsearchignoreunknownattribute.xml b/docs-xml/smbdotconf/misc/elasticsearchignoreunknownattribute.xml new file mode 100644 index 00000000000..86368d30e58 --- /dev/null +++ b/docs-xml/smbdotconf/misc/elasticsearchignoreunknownattribute.xml @@ -0,0 +1,19 @@ +<samba:parameter name="elasticsearch:ignore unknown attribute" + context="G" + type="boolean" + xmlns:samba="http://www.samba.org/samba/DTD/samba-doc"> + <description> + <para> + Ignore unknown Spotlight attributes in search queries. An example query + using the unsupported attribute + <literal>"kMDItemTopic"</literal> would be + <literal>kMDItemTopic=="hotstuff"</literal>. By + default any query using such a type would completely fail. By enabling + this option, if the type match is a subexpression of a larger expression, + then this subexpression is just ignored. + </para> + </description> + + <value type="default">no</value> + <value type="example">yes</value> +</samba:parameter> diff --git a/docs-xml/smbdotconf/misc/elasticsearchignoreunknowntype.xml b/docs-xml/smbdotconf/misc/elasticsearchignoreunknowntype.xml new file mode 100644 index 00000000000..ca1f873adac --- /dev/null +++ b/docs-xml/smbdotconf/misc/elasticsearchignoreunknowntype.xml @@ -0,0 +1,19 @@ +<samba:parameter name="elasticsearch:ignore unknown type" + context="G" + type="boolean" + xmlns:samba="http://www.samba.org/samba/DTD/samba-doc"> + <description> + <para> + Ignore unknown Spotlight types in search queries. An example query using + the unsupported type <literal>"public.calendar-event"</literal> + would be + <literal>kMDItemContentType=="public.calendar-event"</literal>. By + default any query using such a type would completely fail. By enabling + this option, if the type match is a subexpression of a larger expression, + then this subexpression is just ignored. + </para> + </description> + + <value type="default">no</value> + <value type="example">yes</value> +</samba:parameter> diff --git a/selftest/tests.py b/selftest/tests.py index 128374210c4..8ff635f2777 100644 --- a/selftest/tests.py +++ b/selftest/tests.py @@ -435,6 +435,12 @@ plantestsuite("samba.unittests.test_oLschema2ldif", "none", if with_elasticsearch_backend: plantestsuite("samba.unittests.mdsparser_es", "none", [os.path.join(bindir(), "default/source3/test_mdsparser_es")] + [configuration]) + plantestsuite("samba.unittests.mdsparser_es_failures", "none", + [os.path.join(bindir(), "default/source3/test_mdsparser_es"), + " --option=elasticsearch:testmappingfailures=yes", + " --option=elasticsearch:ignoreunknownattribute=yes", + " --option=elasticsearch:ignoreunknowntype=yes"] + + [configuration]) plantestsuite("samba.unittests.credentials", "none", [os.path.join(bindir(), "default/auth/credentials/test_creds")]) plantestsuite("samba.unittests.tsocket_bsd_addr", "none", diff --git a/source3/rpc_server/mdssvc/es_mapping.c b/source3/rpc_server/mdssvc/es_mapping.c index 5c71e503bf5..577fc38e912 100644 --- a/source3/rpc_server/mdssvc/es_mapping.c +++ b/source3/rpc_server/mdssvc/es_mapping.c @@ -179,7 +179,7 @@ struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx, "type", &typestr); if (ret != 0) { - DBG_ERR("No JSON type mapping for [%s]\n", sl_attr); + DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr); return NULL; } diff --git a/source3/rpc_server/mdssvc/es_parser.y b/source3/rpc_server/mdssvc/es_parser.y index 764f4bc5c20..c154dd660fc 100644 --- a/source3/rpc_server/mdssvc/es_parser.y +++ b/source3/rpc_server/mdssvc/es_parser.y @@ -65,6 +65,9 @@ TALLOC_CTX *frame; json_t *kmd_map; json_t *mime_map; + bool ignore_unknown_attribute; + bool ignore_unknown_type; + bool type_error; YY_BUFFER_STATE s; const char *result; } *global_es_parser_state; @@ -115,23 +118,48 @@ input: line: expr { + if ($1 == NULL) { + YYABORT; + } + if (global_es_parser_state->type_error) { + YYABORT; + } global_es_parser_state->result = $1; } ; expr: OBRACE expr CBRACE { - if ($2 == NULL) YYABORT; - $$ = talloc_asprintf(talloc_tos(), "(%s)", $2); - if ($$ == NULL) YYABORT; + if ($2 == NULL) { + $$ = NULL; + } else { + $$ = talloc_asprintf(talloc_tos(), "(%s)", $2); + if ($$ == NULL) YYABORT; + } } | expr AND expr { - $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3); - if ($$ == NULL) YYABORT; + if ($1 == NULL && $3 == NULL) { + $$ = NULL; + } else if ($1 == NULL) { + $$ = $3; + } else if ($3 == NULL) { + $$ = $1; + } else { + $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3); + if ($$ == NULL) YYABORT; + } } | expr OR expr { - $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3); - if ($$ == NULL) YYABORT; + if ($1 == NULL && $3 == NULL) { + $$ = NULL; + } else if ($1 == NULL) { + $$ = $3; + } else if ($3 == NULL) { + $$ = $1; + } else { + $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3); + if ($$ == NULL) YYABORT; + } } | match { $$ = $1; @@ -150,20 +178,32 @@ OBRACE expr CBRACE { match: attribute EQUAL value { - $$ = map_expr($1, '=', $3, NULL); - if ($$ == NULL) YYABORT; + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '=', $3, NULL); + } } | attribute UNEQUAL value { - $$ = map_expr($1, '!', $3, NULL); - if ($$ == NULL) YYABORT; + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '!', $3, NULL); + } } | attribute LT value { - $$ = map_expr($1, '<', $3, NULL); - if ($$ == NULL) YYABORT; + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '<', $3, NULL); + } } | attribute GT value { - $$ = map_expr($1, '>', $3, NULL); - if ($$ == NULL) YYABORT; + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '>', $3, NULL); + } } | function { $$ = $1; @@ -174,8 +214,11 @@ attribute EQUAL value { function: FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE { - $$ = map_expr($3, '~', $5, $7); - if ($$ == NULL) YYABORT; + if ($3 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($3, '~', $5, $7); + } }; attribute: @@ -183,7 +226,11 @@ WORD { $$ = es_map_sl_attr(global_es_parser_state->frame, global_es_parser_state->kmd_map, $1); - if ($$ == NULL) YYABORT; + if ($$ == NULL && + !global_es_parser_state->ignore_unknown_attribute) + { + YYABORT; + } }; value: @@ -247,7 +294,10 @@ static char *map_type(const struct es_attr_map *attr, mime_type_list = es_map_sl_type(s->mime_map, val); if (mime_type_list == NULL) { - DBG_ERR("Mapping type [%s] failed\n", val); + DBG_DEBUG("Mapping type [%s] failed\n", val); + if (!s->ignore_unknown_type) { + s->type_error = true; + } return NULL; } @@ -553,8 +603,8 @@ static char *map_expr(const struct es_attr_map *attr, break; } if (es == NULL) { - DBG_ERR("Mapping [%s %c %s (%s)] failed\n", - attr->name, op, val1, val2 ? val2 : ""); + DBG_DEBUG("Mapping [%s %c %s (%s)] failed\n", + attr->name, op, val1, val2 ? val2 : ""); return NULL; } @@ -603,6 +653,16 @@ bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx, TALLOC_FREE(s.frame); return false; } + + s.ignore_unknown_attribute = lp_parm_bool(GLOBAL_SECTION_SNUM, + "elasticsearch", + "ignore unknown attribute", + false); + s.ignore_unknown_type = lp_parm_bool(GLOBAL_SECTION_SNUM, + "elasticsearch", + "ignore unknown type", + false); + global_es_parser_state = &s; result = mdsyylparse(); global_es_parser_state = NULL; diff --git a/source3/rpc_server/mdssvc/test_mdsparser_es.c b/source3/rpc_server/mdssvc/test_mdsparser_es.c index b5cb86db9a7..af2b8e64840 100644 --- a/source3/rpc_server/mdssvc/test_mdsparser_es.c +++ b/source3/rpc_server/mdssvc/test_mdsparser_es.c @@ -161,6 +161,40 @@ static struct { } }; +static struct { + const char *mds; + const char *es; +} map_ignore_failures[] = { + { + "*==\"Samba\"||foo==\"bar\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"&&foo==\"bar\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"&&kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||foo==\"bar\"||kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"&&foo==\"bar\"&&kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "foo==\"bar\"||kMDItemContentType==\"666\"||*==\"Samba\"||x!=\"6\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||InRange(foo,1,2)", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||foo==$time.iso(2018-10-01T10:00:00Z)", + "(Samba)" PATH_QUERY_SUBEXPR + } +}; + static void test_mdsparser_es(void **state) { TALLOC_CTX *frame = talloc_stackframe(); @@ -192,6 +226,26 @@ static void test_mdsparser_es(void **state) assert_string_equal(es_query, map[i].es); } + if (!lp_parm_bool(GLOBAL_SECTION_SNUM, + "elasticsearch", + "test mapping failures", + false)) + { + goto done; + } + + for (i = 0; i < ARRAY_SIZE(map_ignore_failures); i++) { + DBG_DEBUG("Mapping: %s\n", map_ignore_failures[i].mds); + ok = map_spotlight_to_es_query(frame, + mappings, + path_scope, + map_ignore_failures[i].mds, + &es_query); + assert_true(ok); + assert_string_equal(es_query, map_ignore_failures[i].es); + } + +done: json_decref(mappings); TALLOC_FREE(frame); } -- Samba Shared Repository