HAWQ-1526. Added support for functions in WHERE clause for PXF tables.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/b8b8d07c Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/b8b8d07c Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/b8b8d07c Branch: refs/heads/hdb-2.3.0.1 Commit: b8b8d07cd9e4137d500ff02f9b7c9b9290a98597 Parents: dcdeb13 Author: Oleksandr Diachenko <odiache...@pivotal.io> Authored: Fri Sep 22 11:26:49 2017 -0700 Committer: rlei <r...@pivotal.io> Committed: Sat Sep 23 17:33:24 2017 +0800 ---------------------------------------------------------------------- src/backend/access/external/pxffilters.c | 110 ++++++++++++++---- src/backend/access/external/pxfheaders.c | 8 +- .../access/external/test/pxffilters_test.c | 114 +++++++++++++++++-- .../access/external/test/pxfheaders_test.c | 35 ++++++ src/include/access/pxffilters.h | 2 +- 5 files changed, 237 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b8b8d07c/src/backend/access/external/pxffilters.c ---------------------------------------------------------------------- diff --git a/src/backend/access/external/pxffilters.c b/src/backend/access/external/pxffilters.c index f502e4e..7c1db56 100644 --- a/src/backend/access/external/pxffilters.c +++ b/src/backend/access/external/pxffilters.c @@ -44,6 +44,7 @@ static void scalar_const_to_str(Const *constval, StringInfo buf); static void list_const_to_str(Const *constval, StringInfo buf); static List* append_attr_from_var(Var* var, List* attrs); static void enrich_trivial_expression(List *expressionItems); +static List* get_attrs_from_expr(Expr *expr, bool* expressionIsSupported); /* * All supported HAWQ operators, and their respective HDFS operator code. @@ -851,12 +852,51 @@ append_attr_from_var(Var* var, List* attrs) return attrs; } +/* + * append_attr_from_func_args + * + * extracts all columns from FuncExpr into attrs + * assigns false to expressionIsSupported if at least one of items is not supported + */ +static List* +append_attr_from_func_args(FuncExpr *expr, List* attrs, bool* expressionIsSupported) { + if (!expressionIsSupported) { + return NIL; + } + ListCell *lc = NULL; + foreach (lc, expr->args) + { + Node *node = (Node *) lfirst(lc); + if (IsA(node, FuncExpr)) { + attrs = append_attr_from_func_args((FuncExpr *) node, attrs, expressionIsSupported); + } else if (IsA(node, Var)) { + attrs = append_attr_from_var((Var *) node, attrs); + } else if (IsA(node, OpExpr)) { + attrs = get_attrs_from_expr((OpExpr *) node, expressionIsSupported); + } else { + *expressionIsSupported = false; + return NIL; + } + } + + return attrs; + +} + +/* + * get_attrs_from_expr + * + * extracts and returns list of all columns from Expr + * assigns false to expressionIsSupported if at least one of items is not supported + */ static List* -get_attrs_from_expr(Expr *expr) +get_attrs_from_expr(Expr *expr, bool* expressionIsSupported) { - Node *leftop = NULL; - Node *rightop = NULL; - List *attrs = NIL; + Node *leftop = NULL; + Node *rightop = NULL; + List *attrs = NIL; + + *expressionIsSupported = true; if ((!expr)) return attrs; @@ -870,30 +910,46 @@ get_attrs_from_expr(Expr *expr) ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) expr; leftop = (Node *) linitial(saop->args); rightop = (Node *) lsecond(saop->args); + } else { + // If expression type is not known, report that it's not supported + *expressionIsSupported = false; + return NIL; } - //Process left operand - //For most of datatypes column is represented by Var node - if (IsA(leftop, Var)) + // We support following combinations of operands: + // Var, Const + // Relabel, Const + // FuncExpr, Const + // Const, Var + // Const, Relabel + // Const, FuncExpr + // For most of datatypes column is represented by Var node + // For varchar column is represented by RelabelType node + if (IsA(leftop, Var) && IsA(rightop, Const)) { attrs = append_attr_from_var((Var *) leftop, attrs); - } - //For varchar column is represented by RelabelType node - if (IsA(leftop, RelabelType)) + } else if (IsA(leftop, RelabelType) && IsA(rightop, Const)) { attrs = append_attr_from_var((Var *) ((RelabelType *) leftop)->arg, attrs); + } else if (IsA(leftop, FuncExpr) && IsA(rightop, Const)) { + FuncExpr *expr = (FuncExpr *) leftop; + attrs = append_attr_from_func_args(expr, attrs, expressionIsSupported); } - - //Process right operand - //For most of datatypes column is represented by Var node - if (IsA(rightop, Var)) + else if (IsA(rightop, Var) && IsA(leftop, Const)) { attrs = append_attr_from_var((Var *) rightop, attrs); - } - //For varchar column is represented by RelabelType node - if (IsA(rightop, RelabelType)) + } else if (IsA(rightop, RelabelType) && IsA(leftop, Const)) { attrs = append_attr_from_var((Var *) ((RelabelType *) rightop)->arg, attrs); + } else if (IsA(rightop, FuncExpr) && IsA(leftop, Const)) { + FuncExpr *expr = (FuncExpr *) rightop; + attrs = append_attr_from_func_args(expr, attrs, expressionIsSupported); + } + else { + // If operand type or combination is not known, report that it's not supported + // to avoid partially extracted attributes from expression + *expressionIsSupported = false; + return NIL; } return attrs; @@ -1251,11 +1307,16 @@ void enrich_trivial_expression(List *expressionItems) { * List might contain duplicates. * Caller should release memory once result is not needed. */ -List* extractPxfAttributes(List* quals) +List* extractPxfAttributes(List* quals, bool* qualsAreSupported) { + if (!(*qualsAreSupported)) { + return NIL; + } ListCell *lc = NULL; List *attributes = NIL; + bool expressionIsSupported; + *qualsAreSupported = true; if (list_length(quals) == 0) return NIL; @@ -1271,14 +1332,23 @@ List* extractPxfAttributes(List* quals) case T_ScalarArrayOpExpr: { Expr* expr = (Expr *) node; - List *attrs = get_attrs_from_expr(expr); + List *attrs = get_attrs_from_expr(expr, &expressionIsSupported); + if (!expressionIsSupported) { + *qualsAreSupported = false; + return NIL; + } attributes = list_concat(attributes, attrs); break; } case T_BoolExpr: { BoolExpr* expr = (BoolExpr *) node; - List *inner_result = extractPxfAttributes(expr->args); + bool innerBoolQualsAreSupported = true; + List *inner_result = extractPxfAttributes(expr->args, &innerBoolQualsAreSupported); + if (!innerBoolQualsAreSupported) { + *qualsAreSupported = false; + return NIL; + } attributes = list_concat(attributes, inner_result); break; } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b8b8d07c/src/backend/access/external/pxfheaders.c ---------------------------------------------------------------------- diff --git a/src/backend/access/external/pxfheaders.c b/src/backend/access/external/pxfheaders.c index d8904b4..395fe25 100644 --- a/src/backend/access/external/pxfheaders.c +++ b/src/backend/access/external/pxfheaders.c @@ -65,14 +65,18 @@ void build_http_header(PxfInputData *input) if (proj_info != NULL && proj_info->pi_isVarList) { - List* qualsAttributes = extractPxfAttributes(input->quals); + bool qualsAreSupported = true; + List* qualsAttributes = extractPxfAttributes(input->quals, &qualsAreSupported); /* projection information is incomplete if columns from WHERE clause wasn't extracted */ - if (qualsAttributes != NIL || list_length(input->quals) == 0) + /* if any of expressions in WHERE clause is not supported - do not send any projection information at all*/ + if (qualsAreSupported && (qualsAttributes != NIL || list_length(input->quals) == 0)) { add_projection_desc_httpheader(headers, proj_info, qualsAttributes); } else + { elog(DEBUG2, "Query will not be optimized to use projection information"); + } } /* GP cluster configuration */ http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b8b8d07c/src/backend/access/external/test/pxffilters_test.c ---------------------------------------------------------------------- diff --git a/src/backend/access/external/test/pxffilters_test.c b/src/backend/access/external/test/pxffilters_test.c index 5d38e04..8e28890 100644 --- a/src/backend/access/external/test/pxffilters_test.c +++ b/src/backend/access/external/test/pxffilters_test.c @@ -30,6 +30,10 @@ void run__scalar_const_to_str__negative(Const* input, StringInfo result, char* v void run__list_const_to_str(Const* input, StringInfo result, char* expected); void run__list_const_to_str__negative(Const* input, StringInfo result, int len, Datum *dats); + +/* date_eq(date, date) => bool */ +#define DateEqualFuncId 1086 + void test__supported_filter_type(void **state) { @@ -541,13 +545,13 @@ Var* build_var(Oid oid, int attno) { return arg_var; } -Const* build_const(Oid oid, char* value) +Const* build_const(Oid oid, char* value, bool expectToBeRead) { Const* arg_const = (Const*) palloc0(sizeof(Const)); arg_const->xpr.type = T_Const; arg_const->constisnull = (value == NULL); arg_const->consttype = oid; - if (value != NULL) + if (value != NULL && expectToBeRead) { mock__scalar_const_to_str(oid, value); } @@ -594,7 +598,7 @@ ExpressionItem* build_expression_item(int lattnum, Oid lattrtype, char* rconstst { ExpressionItem *expressionItem = (ExpressionItem*) palloc0(sizeof(ExpressionItem)); Var *leftop = build_var(lattrtype, lattnum); - Const *rightop = build_const(rattrtype, strdup(rconststr)); + Const *rightop = build_const(rattrtype, strdup(rconststr), true); OpExpr *operationExpression = build_op_expr(leftop, rightop, op); expressionItem->node = operationExpression; @@ -604,12 +608,44 @@ ExpressionItem* build_expression_item(int lattnum, Oid lattrtype, char* rconstst return expressionItem; } +FuncExpr* build_func_expr_operand(List *args, CoercionForm funcformat) { + FuncExpr* operand = palloc0(sizeof(FuncExpr)); + ((Node*) operand)->type = T_FuncExpr; + operand->args = args; + operand->funcformat = funcformat; + return operand; +} + +/** + * builds FuncExpr for a following expression: function(column1, column2,...,columnk) + * columnOids - oids of column1, column2, ... types + * + * Typical representation of a functions we support: + * COERCE_EXPLICIT_CAST->COERCE_EXPLICIT_CALL->COERCE_IMPLICIT_CAST->T_Var, + * where Var holds actual arguments - column1, column2,...,columnk + */ +FuncExpr* build_nested_func_expr_operand(List *columnsOids, List *attrsIndices) { + assert_int_equal(columnsOids->length, attrsIndices->length); + ListCell *columnOidLc = NULL, *attrIndexLc = NULL; + Var *var = NULL; + List* args = NIL; + forboth(columnOidLc, columnsOids, attrIndexLc, attrsIndices) { + var = build_var(lfirst(columnOidLc), lfirst(attrIndexLc)); + args = lappend(args, var); + } + FuncExpr* operandImplicitCast = build_func_expr_operand(args, COERCE_IMPLICIT_CAST); + FuncExpr* operandExplicitCall = build_func_expr_operand(list_make1(operandImplicitCast), COERCE_EXPLICIT_CALL); + FuncExpr* operandExplicitCast = build_func_expr_operand(list_make1(operandExplicitCall), COERCE_EXPLICIT_CALL); + + return operandExplicitCast; +} + void run__opexpr_to_pxffilter__positive(Oid dbop, PxfOperatorCode expectedPxfOp) { PxfFilterDesc *filter = (PxfFilterDesc*) palloc0(sizeof(PxfFilterDesc)); Var *arg_var = build_var(INT2OID, 1); char* const_value = strdup("1984"); /* will be free'd by const_to_str */ - Const* arg_const = build_const(INT2OID, const_value); + Const* arg_const = build_const(INT2OID, const_value, true); OpExpr *expr = build_op_expr(arg_var, arg_const, dbop); PxfFilterDesc* expected = build_filter( @@ -657,7 +693,8 @@ test__opexpr_to_pxffilter__attributeEqualsNull(void **state) { PxfFilterDesc *filter = (PxfFilterDesc*) palloc0(sizeof(PxfFilterDesc)); Var *arg_var = build_var(INT2OID, 1); - Const* arg_const = build_const(INT2OID, NULL); + Const* arg_const = build_const(INT2OID, NULL, true + ); OpExpr *expr = build_op_expr(arg_var, arg_const, 94 /* int2eq */); PxfFilterDesc* expected = build_filter( @@ -698,7 +735,7 @@ test__opexpr_to_pxffilter__differentTypes(void **state) PxfFilterDesc *filter = (PxfFilterDesc*) palloc0(sizeof(PxfFilterDesc)); Var *arg_var = build_var(INT2OID, 3); char* const_value = strdup("13"); /* will be free'd by const_to_str */ - Const *arg_const = build_const(INT8OID, const_value); + Const *arg_const = build_const(INT8OID, const_value, true); OpExpr *expr = build_op_expr(arg_const, arg_var, 1864 /* int28lt */); @@ -720,7 +757,7 @@ test__opexpr_to_pxffilter__unsupportedTypeCircle(void **state) { PxfFilterDesc *filter = (PxfFilterDesc*) palloc0(sizeof(PxfFilterDesc)); Var *arg_var = build_var(CIRCLEOID, 8); - Const *arg_const = build_const(CIRCLEOID, NULL); + Const *arg_const = build_const(CIRCLEOID, NULL, true); OpExpr *expr = build_op_expr(arg_const, arg_var, 0 /* whatever */); /* run test */ @@ -754,7 +791,7 @@ test__opexpr_to_pxffilter__unsupportedOpNot(void **state) { PxfFilterDesc *filter = (PxfFilterDesc*) palloc0(sizeof(PxfFilterDesc)); Var *arg_var = build_var(INT2OID, 3); - Const *arg_const = build_const(INT2OID, NULL); + Const *arg_const = build_const(INT2OID, NULL, true); OpExpr *expr = build_op_expr(arg_const, arg_var, 1877 /* int2not */); /* run test */ @@ -837,6 +874,63 @@ test__pxf_serialize_filter_list__manyFilters(void **state) expressionItems = NIL; } +void +test__extractPxfAttributes_empty_quals(void **state) +{ + bool qualsAreSupported; + qualsAreSupported = true; + List* quals = NIL; + extractPxfAttributes(quals, &qualsAreSupported); + assert_true(qualsAreSupported); +} + +/** + * covers queries like: + * SELECT a,b,c + * FROM tab1 + * WHERE d = function(e) = const + * + * d is a column number 5 of int4 datatype + * e is a column number 7 of date datatype + * const is a int4 datatype + * + */ +void +test__extractPxfAttributes_supported_function_one_arg(void **state) { + + int argumentColumnIndex = 6; // index starts from 0 + bool qualsAreSupported; + qualsAreSupported = true; + List* columnsOids = list_make1(DATEOID); + List* attrsIndices = list_make1(argumentColumnIndex); + + // Create operands FuncExpr, Const + FuncExpr* leftop = build_nested_func_expr_operand(columnsOids, attrsIndices); + + // extractPxfAttributes just extracts columns, doesn't read values of constants + Node* rightop = build_const(INT4OID, strdup("42"), false); + OpExpr* opExpr = build_op_expr(leftop, rightop, Int4EqualOperator); + List* quals = list_make1(opExpr); + + // Extract columns(attributes) from WHERE clause + List* attrs = extractPxfAttributes(quals, &qualsAreSupported); + + // Make sure we extracted one attribute, column e + assert_int_equal(1, attrs->length); + + // Make sure it's supported + assert_true(qualsAreSupported); + + ListCell* lc = NULL; + int attnum; + foreach(lc, attrs) + { + attnum = lfirst_int(lc); + // Index of attnum starts from 0 + assert_int_equal(attnum + 1, argumentColumnIndex); + } +} + int main(int argc, char* argv[]) { @@ -865,7 +959,9 @@ main(int argc, char* argv[]) unit_test(test__opexpr_to_pxffilter__attributeIsNull), unit_test(test__pxf_serialize_filter_list__oneFilter), unit_test(test__pxf_serialize_fillter_list__nullFilter), - unit_test(test__pxf_serialize_filter_list__manyFilters) + unit_test(test__pxf_serialize_filter_list__manyFilters), + unit_test(test__extractPxfAttributes_empty_quals), + unit_test(test__extractPxfAttributes_supported_function_one_arg) }; return run_tests(tests); } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b8b8d07c/src/backend/access/external/test/pxfheaders_test.c ---------------------------------------------------------------------- diff --git a/src/backend/access/external/test/pxfheaders_test.c b/src/backend/access/external/test/pxfheaders_test.c index 851483d..35ac7e3 100644 --- a/src/backend/access/external/test/pxfheaders_test.c +++ b/src/backend/access/external/test/pxfheaders_test.c @@ -125,6 +125,39 @@ test__build_http_header__remote_credentials_are_not_null(void **state) build_http_header(input_data); } +/** + * Query has valid projection info, + * but some of expression in WHERE clause is not supported + * Make sure we are not sending any projection information at all, + * to avoid incorrect results. + */ +void test__build_http_header__where_is_not_supported(void **state) +{ + + expect_external_vars(); + + expect_any(extractPxfAttributes, quals); + expect_any(extractPxfAttributes, qualsAreSupported); + will_return(extractPxfAttributes, NIL); + will_assign_value(extractPxfAttributes, qualsAreSupported, false); + + expect_churl_headers("X-GP-SEGMENT-ID", mock_extvar->GP_SEGMENT_ID); + expect_churl_headers("X-GP-SEGMENT-COUNT", mock_extvar->GP_SEGMENT_COUNT); + expect_churl_headers("X-GP-XID", mock_extvar->GP_XID); + expect_churl_headers_alignment(); + expect_churl_headers("X-GP-URL-HOST", gphd_uri->host); + expect_churl_headers("X-GP-URL-PORT", gphd_uri->port); + expect_churl_headers("X-GP-DATA-DIR", gphd_uri->data); + expect_churl_headers("X-GP-URI", gphd_uri->uri); + expect_churl_headers("X-GP-HAS-FILTER", "0"); + + input_data->proj_info = palloc0(sizeof(ProjectionInfo)); + input_data->proj_info->pi_isVarList = true; + input_data->quals = list_make1("Some not supported quals"); + + build_http_header(input_data); +} + void test__get_format_name(void **state) { @@ -262,6 +295,8 @@ main(int argc, char* argv[]) unit_test_setup_teardown(test__build_http_header__remote_credentials_are_not_null, common_setup, common_teardown), unit_test_setup_teardown(test__get_format_name, + common_setup, common_teardown), + unit_test_setup_teardown(test__build_http_header__where_is_not_supported, common_setup, common_teardown) }; http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b8b8d07c/src/include/access/pxffilters.h ---------------------------------------------------------------------- diff --git a/src/include/access/pxffilters.h b/src/include/access/pxffilters.h index c7e8aa8..206adce 100644 --- a/src/include/access/pxffilters.h +++ b/src/include/access/pxffilters.h @@ -137,6 +137,6 @@ static inline bool pxfoperand_is_list_const(PxfOperand x) } char *serializePxfFilterQuals(List *quals); -List* extractPxfAttributes(List* quals); +List* extractPxfAttributes(List* quals, bool* qualsAreSupported); #endif // _PXF_FILTERS_H_