tjbanghart commented on code in PR #3184:
URL: https://github.com/apache/calcite/pull/3184#discussion_r1181838154
##########
core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java:
##########
@@ -279,6 +279,11 @@ private StandardConvertletTable() {
SqlStdOperatorTable.POSITION.createCall(SqlParserPos.ZERO,
call.operand(1), call.operand(0))));
+ // "INSTR(string, substring, position, occurrence) is equivalent to
Review Comment:
We might want to add this comment to the `convertInstr` function since
`POSITION` is not referenced in line 284 - we're explicitly registering a
`convertInstr` method made for `INSTR`
##########
testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java:
##########
@@ -3686,7 +3686,7 @@ static void checkRlikeFails(SqlOperatorFixture f) {
f.checkScalarExact("position(x'bb' in x'aabbccaabbcc' FROM 3)", 5);
f.checkScalarExact("position(x'bb' in x'aabbccaabbcc' FROM 5)", 5);
f.checkScalarExact("position(x'bb' in x'aabbccaabbcc' FROM 6)", 0);
- f.checkScalarExact("position(x'bb' in x'aabbccaabbcc' FROM -5)", 0);
Review Comment:
Same here, that seems odd
##########
testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java:
##########
@@ -3677,7 +3677,7 @@ static void checkRlikeFails(SqlOperatorFixture f) {
f.checkScalarExact("position('b' in 'abcabc' FROM 3)", 5);
f.checkScalarExact("position('b' in 'abcabc' FROM 5)", 5);
f.checkScalarExact("position('b' in 'abcabc' FROM 6)", 0);
- f.checkScalarExact("position('b' in 'abcabc' FROM -5)", 0);
+ f.checkScalarExact("position('b' in 'abcabc' FROM -5)", 2);
Review Comment:
Why do we need to change this test?
##########
core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java:
##########
@@ -402,6 +407,32 @@ private static RexNode convertNvl(SqlRexContext cx,
SqlCall call) {
operand1)));
}
+ /** Converts a call to the INSTR function. */
+ private static RexNode convertInstr(SqlRexContext cx, SqlCall call) {
+ final RexBuilder rexBuilder = cx.getRexBuilder();
+ final List<RexNode> operands =
+ convertOperands(cx, call, SqlOperandTypeChecker.Consistency.NONE);
+ final RelDataType type =
+ cx.getValidator().getValidatedNodeType(call);
+ final List<RexNode> exprs = new ArrayList<>();
+ if (call.operandCount() == 2) {
Review Comment:
Comments here would also be helpful. It is not clear what each operand is
and why their order matters.
##########
core/src/main/java/org/apache/calcite/sql/dialect/BigQuerySqlDialect.java:
##########
@@ -150,15 +150,36 @@ public BigQuerySqlDialect(SqlDialect.Context context) {
final int rightPrec) {
switch (call.getKind()) {
case POSITION:
- final SqlWriter.Frame frame = writer.startFunCall("STRPOS");
- writer.sep(",");
- call.operand(1).unparse(writer, leftPrec, rightPrec);
- writer.sep(",");
- call.operand(0).unparse(writer, leftPrec, rightPrec);
+ if (2 == call.operandCount()) {
Review Comment:
I think we could simplify the `if` expressions here and elsewhere when
checking the `operandCount()`. Consider moving this into a helper function or
starting the frame before the `if` blocks and ending the with the
`writer.endFunCall(frame)` outside of a block.
##########
core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java:
##########
@@ -3104,23 +3104,105 @@ public static int position(ByteString seek, ByteString
s) {
/** SQL {@code POSITION(seek IN string FROM integer)} function. */
public static int position(String seek, String s, int from) {
- final int from0 = from - 1; // 0-based
- if (from0 > s.length() || from0 < 0) {
+ if (from == 0) {
+ throw RESOURCE.fromNotZero().ex();
+ }
+ // Case when from is positive
+ if (from > 0) {
+ final int from0 = from - 1; // 0-based
+ if (from0 >= s.length() || from0 < 0) {
+ return 0;
+ }
+
+ return s.indexOf(seek, from0) + 1;
+ }
+ // Case when from is negative
+ final int rightIndex = from + s.length(); // negative position to positive
index
+ if (rightIndex <= 0) {
return 0;
}
-
- return s.indexOf(seek, from0) + 1;
+ return s.substring(0, rightIndex + 1).lastIndexOf(seek) + 1;
}
/** SQL {@code POSITION(seek IN string FROM integer)} function for byte
* strings. */
public static int position(ByteString seek, ByteString s, int from) {
- final int from0 = from - 1;
- if (from0 > s.length() || from0 < 0) {
+ if (from == 0) {
+ throw RESOURCE.fromNotZero().ex();
+ }
+ // Case when from is positive
+ if (from > 0) {
+ final int from0 = from - 1; // 0-based
+ if (from0 >= s.length() || from0 < 0) {
+ return 0;
+ }
+ return s.indexOf(seek, from0) + 1;
+ }
+ // Case when from is negative
+ final int rightIndex = from + s.length();
+ if (rightIndex <= 0) {
return 0;
}
+ int lastIndex = 0;
+ while (lastIndex < rightIndex) {
+ int indexOf = s.substring(lastIndex, rightIndex + 1).indexOf(seek) + 1;
+ if (indexOf == 0) {
+ break;
+ }
+ lastIndex += indexOf;
+ }
+ return lastIndex;
+ }
+
+ /** SQL {@code POSITION(seek, string, from, occurrence)} function. */
+ public static int position(String seek, String s, int from, int occurrence) {
+ if (occurrence == 0) {
+ throw RESOURCE.occurrenceNotZero().ex();
+ }
+ for (int i = 0; i < occurrence; i++) {
+ if (from > 0) {
+ from = position(seek, s, from + (i == 0 ? 0 : 1));
+ if (from == 0) {
+ return 0;
+ }
+ } else {
+ from = position(seek, s, from);
+ if (from == 0) {
+ return 0;
+ }
+ from -= s.length() + 2;
+ }
+ }
+ if (from < 0) {
+ from += s.length() + 2;
+ }
+ return from;
+ }
- return s.indexOf(seek, from0) + 1;
+ /** SQL {@code POSITION(seek, string, from, occurrence)} function for byte
+ * strings. */
+ public static int position(ByteString seek, ByteString s, int from, int
occurrence) {
+ if (occurrence == 0) {
+ throw RESOURCE.occurrenceNotZero().ex();
+ }
+ for (int i = 0; i < occurrence; i++) {
Review Comment:
I think we should rely more heavily on String builtins here. For example
rather than making a recursive call to `position` why not just take a substring
with the `from` argument and then use a Pattern with grouping sets to count
occurrences.
Also consider StringUtils:
https://commons.apache.org/proper/commons-lang/apidocs/org/apache/commons/lang3/StringUtils.html#countMatches(java.lang.CharSequence,%20java.lang.CharSequence)
##########
core/src/main/java/org/apache/calcite/sql/fun/SqlPositionFunction.java:
##########
@@ -53,12 +57,23 @@ public SqlPositionFunction(String name) {
int leftPrec,
int rightPrec) {
final SqlWriter.Frame frame = writer.startFunCall(getName());
- call.operand(0).unparse(writer, leftPrec, rightPrec);
- writer.sep("IN");
- call.operand(1).unparse(writer, leftPrec, rightPrec);
- if (3 == call.operandCount()) {
- writer.sep("FROM");
+ if (2 == call.operandCount() || 3 == call.operandCount()) {
+ call.operand(0).unparse(writer, leftPrec, rightPrec);
+ writer.sep("IN");
+ call.operand(1).unparse(writer, leftPrec, rightPrec);
+ if (3 == call.operandCount()) {
+ writer.sep("FROM");
+ call.operand(2).unparse(writer, leftPrec, rightPrec);
+ }
+ }
+ if (4 == call.operandCount()) {
Review Comment:
What dialects support this 4 argument position without `IN` and `FROM`?
##########
core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java:
##########
@@ -3104,23 +3104,105 @@ public static int position(ByteString seek, ByteString
s) {
/** SQL {@code POSITION(seek IN string FROM integer)} function. */
public static int position(String seek, String s, int from) {
- final int from0 = from - 1; // 0-based
- if (from0 > s.length() || from0 < 0) {
+ if (from == 0) {
+ throw RESOURCE.fromNotZero().ex();
+ }
+ // Case when from is positive
+ if (from > 0) {
+ final int from0 = from - 1; // 0-based
+ if (from0 >= s.length() || from0 < 0) {
+ return 0;
+ }
+
+ return s.indexOf(seek, from0) + 1;
+ }
+ // Case when from is negative
+ final int rightIndex = from + s.length(); // negative position to positive
index
+ if (rightIndex <= 0) {
return 0;
}
-
- return s.indexOf(seek, from0) + 1;
+ return s.substring(0, rightIndex + 1).lastIndexOf(seek) + 1;
}
/** SQL {@code POSITION(seek IN string FROM integer)} function for byte
* strings. */
public static int position(ByteString seek, ByteString s, int from) {
- final int from0 = from - 1;
- if (from0 > s.length() || from0 < 0) {
+ if (from == 0) {
+ throw RESOURCE.fromNotZero().ex();
+ }
+ // Case when from is positive
+ if (from > 0) {
+ final int from0 = from - 1; // 0-based
+ if (from0 >= s.length() || from0 < 0) {
+ return 0;
+ }
+ return s.indexOf(seek, from0) + 1;
+ }
+ // Case when from is negative
+ final int rightIndex = from + s.length();
+ if (rightIndex <= 0) {
return 0;
}
+ int lastIndex = 0;
+ while (lastIndex < rightIndex) {
+ int indexOf = s.substring(lastIndex, rightIndex + 1).indexOf(seek) + 1;
+ if (indexOf == 0) {
+ break;
+ }
+ lastIndex += indexOf;
+ }
+ return lastIndex;
+ }
+
+ /** SQL {@code POSITION(seek, string, from, occurrence)} function. */
+ public static int position(String seek, String s, int from, int occurrence) {
+ if (occurrence == 0) {
+ throw RESOURCE.occurrenceNotZero().ex();
+ }
+ for (int i = 0; i < occurrence; i++) {
+ if (from > 0) {
+ from = position(seek, s, from + (i == 0 ? 0 : 1));
+ if (from == 0) {
+ return 0;
+ }
+ } else {
+ from = position(seek, s, from);
+ if (from == 0) {
+ return 0;
+ }
+ from -= s.length() + 2;
+ }
+ }
+ if (from < 0) {
+ from += s.length() + 2;
+ }
+ return from;
+ }
- return s.indexOf(seek, from0) + 1;
+ /** SQL {@code POSITION(seek, string, from, occurrence)} function for byte
+ * strings. */
+ public static int position(ByteString seek, ByteString s, int from, int
occurrence) {
+ if (occurrence == 0) {
+ throw RESOURCE.occurrenceNotZero().ex();
+ }
+ for (int i = 0; i < occurrence; i++) {
Review Comment:
^ I didn't think about overlapping patterns. I am not sure if those
suggestions will work.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]