This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch 1.21 in repository https://gitbox.apache.org/repos/asf/drill.git
commit 7478fd9074b283618fe7ebe6e4cefd5b24e13932 Author: Charles S. Givre <[email protected]> AuthorDate: Mon Mar 20 14:05:50 2023 -0400 DRILL-8414: Index Paginator Not Working When Provided URL (#2779) --- .../drill/exec/store/http/HttpBatchReader.java | 57 +++++++--- .../exec/store/http/paginator/IndexPaginator.java | 40 ++++++- .../drill/exec/store/http/TestPagination.java | 118 +++++++++++++++++++++ .../src/test/resources/data/index_response10.json | 14 +++ .../src/test/resources/data/index_response11.json | 14 +++ .../src/test/resources/data/index_response12.json | 14 +++ .../src/test/resources/data/index_response5.json | 15 +++ .../src/test/resources/data/index_response6.json | 15 +++ .../src/test/resources/data/index_response7.json | 15 +++ .../src/test/resources/data/index_response8.json | 14 +++ .../src/test/resources/data/index_response9.json | 14 +++ 11 files changed, 313 insertions(+), 17 deletions(-) diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java index 579643ea80..e6661ad043 100644 --- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java +++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java @@ -357,31 +357,58 @@ public class HttpBatchReader implements ManagedReader<SchemaNegotiator> { // At this point we know that there are more pages to come. Send the data to the paginator and // use that to generate the next page. - if (StringUtils.isNotEmpty(indexPaginator.getIndexParam())) { + if (StringUtils.isNotEmpty(indexPaginator.getIndexParam()) && paginationFields.containsKey(indexPaginator.getIndexParam())) { indexPaginator.setIndexValue(paginationFields.get(indexPaginator.getIndexParam()).toString()); } - if (StringUtils.isNotEmpty(indexPaginator.getNextPageParam())) { - indexPaginator.setNextPageValue(paginationFields.get(indexPaginator.getNextPageParam()).toString()); + if (StringUtils.isNotEmpty(indexPaginator.getNextPageParam()) && paginationFields.containsKey(indexPaginator.getNextPageParam())) { + Object nextPageValue = paginationFields.get(indexPaginator.getNextPageParam()); + if (nextPageValue != null) { + indexPaginator.setNextPageValue(nextPageValue.toString()); + } else { + // If the next URL is null, notify the paginator and stop paginating + paginator.notifyPartialPage(); + indexPaginator.setNextPageValue(null); + } } } else { // This covers the case of keyset/index pagination where there isn't a boolean parameter to indicate whether there are more results. // In this case, we will interpret the absence of the pagination field, receiving the same value, or a null value as the marker to stop pagination. - if ( (!paginationFields.containsKey(indexPaginator.getIndexParam())) || - paginationFields.get(indexPaginator.getIndexParam()) == null - ) { - // End pagination - paginator.notifyPartialPage(); + + // This represents the case when the index field is being used. + if (StringUtils.isNotEmpty(indexPaginator.getIndexParam())) { + if ((!paginationFields.containsKey(indexPaginator.getIndexParam())) || paginationFields.get(indexPaginator.getIndexParam()) == null) { + // End pagination + paginator.notifyPartialPage(); + } else { + // Otherwise, check to see if the field is present but empty, or contains the value from the last page. + // This will prevent runaway pagination calls. + String indexParameter = paginationFields.get(indexPaginator.getIndexParam()).toString(); + // Empty value or the last value is the same as the current one. + if (StringUtils.isEmpty(indexParameter) || (StringUtils.equals(indexParameter, indexPaginator.getLastIndexValue()))) { + paginator.notifyPartialPage(); + } else { + // Whew! We made it... get the next page. + indexPaginator.setIndexValue(indexParameter); + } + } } else { - // Otherwise, check to see if the field is present but empty, or contains the value from the last page. - // This will prevent runaway pagination calls. - String indexParameter = paginationFields.get(indexPaginator.getIndexParam()).toString(); - // Empty value or the last value is the same as the current one. - if (StringUtils.isEmpty(indexParameter) || (StringUtils.equals(indexParameter, indexPaginator.getLastIndexValue()))) { + // Case when the next page field is used. + if ((!paginationFields.containsKey(indexPaginator.getNextPageParam())) + || paginationFields.get(indexPaginator.getNextPageParam()) == null + || paginationFields.get(indexPaginator.getNextPageParam()) == "true") { + // End pagination paginator.notifyPartialPage(); } else { - // Whew! We made it... get the next page. - indexPaginator.setIndexValue(indexParameter); + // Get the next page + String nextURL = paginationFields.get(indexPaginator.getNextPageParam()).toString(); + // Empty value or the last value is the same as the current one. + if (StringUtils.isEmpty(nextURL) || (StringUtils.equals(nextURL, indexPaginator.getLastNextPageValue()))) { + paginator.notifyPartialPage(); + } else { + // Whew! We made it... get the next page. + indexPaginator.setNextPageValue(nextURL); + } } } } diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java index 67bc89c4a6..75fcba336c 100644 --- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java +++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java @@ -39,6 +39,7 @@ public class IndexPaginator extends Paginator { private String indexValue; private String lastIndexValue; + private String lastNextPageValue; private Boolean hasMoreValue; private String nextPageValue; private int pageCount; @@ -48,13 +49,14 @@ public class IndexPaginator extends Paginator { this.hasMoreParam = hasMoreParam; this.indexParam = indexParam; this.nextPageParam = nextPageParam; + this.lastNextPageValue = ""; + this.lastIndexValue = ""; pageCount = 0; } @Override public boolean hasNext() { return !partialPageReceived; - } public String getIndexParam() { @@ -81,7 +83,7 @@ public class IndexPaginator extends Paginator { } public void setNextPageValue(String nextPageValue) { - this.lastIndexValue = this.nextPageValue; + this.lastNextPageValue = this.nextPageValue; this.nextPageValue = nextPageValue; } @@ -89,6 +91,10 @@ public class IndexPaginator extends Paginator { return lastIndexValue; } + public String getLastNextPageValue() { + return lastNextPageValue; + } + public boolean isFirstPage() { return pageCount < 1; } @@ -116,6 +122,36 @@ public class IndexPaginator extends Paginator { } builder.removeAllEncodedQueryParameters(indexParam); builder.addQueryParameter(indexParam, indexValue); + } else if (StringUtils.isNotEmpty(nextPageValue)) { + // Case when we're using the next page URL. We have two cases here: + // 1. The nextPage contains the full URL of the next page. + // 2. The next page contains the path but lacks the base URL. + if (StringUtils.startsWith(nextPageValue, "http://") || + StringUtils.startsWith(nextPageValue, "https://")) { + pageCount++; + return nextPageValue; + } else { + // If the next page just contains the path, we have to reconstruct a URL from the incoming path. + int segmentIndex = 0; + + // Remove leading slash in path to avoid double slashes in URL + if (nextPageValue.startsWith("/")) { + nextPageValue = nextPageValue.substring(1); + } + + // Now remove the path segments and replace with the updated ones from the URL. + for (String segment : builder.build().pathSegments()) { + if (nextPageValue.contains(segment)) { + for (int i = builder.build().pathSegments().size() - 1; i >= segmentIndex; i--) { + builder.removePathSegment(i); + } + break; + } + segmentIndex++; + } + builder.addPathSegments(nextPageValue); + } + } pageCount++; return builder.build().url().toString(); diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java index ed30d8d8b1..afa5fe3581 100644 --- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java +++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java @@ -65,6 +65,14 @@ public class TestPagination extends ClusterTest { private static String TEST_JSON_INDEX_PAGE2; private static String TEST_JSON_INDEX_PAGE3; private static String TEST_JSON_INDEX_PAGE4; + private static String TEST_JSON_INDEX_PAGE5; + private static String TEST_JSON_INDEX_PAGE6; + private static String TEST_JSON_INDEX_PAGE7; + private static String TEST_JSON_INDEX_PAGE8; + private static String TEST_JSON_INDEX_PAGE9; + private static String TEST_JSON_INDEX_PAGE10; + private static String TEST_JSON_INDEX_PAGE11; + private static String TEST_JSON_INDEX_PAGE12; private static String TEST_JSON_NESTED_INDEX; private static String TEST_JSON_NESTED_INDEX2; private static String TEST_XML_PAGE1; @@ -89,6 +97,14 @@ public class TestPagination extends ClusterTest { TEST_JSON_INDEX_PAGE3 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response3.json"), Charsets.UTF_8).read(); TEST_JSON_INDEX_PAGE4 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response4.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE5 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response5.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE6 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response6.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE7 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response7.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE8 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response8.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE9 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response9.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE10 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response10.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE11 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response11.json"), Charsets.UTF_8).read(); + TEST_JSON_INDEX_PAGE12 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response12.json"), Charsets.UTF_8).read(); TEST_JSON_NESTED_INDEX = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/nested_pagination_fields.json"), Charsets.UTF_8).read(); TEST_JSON_NESTED_INDEX2 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/nested_pagination_fields2.json"), Charsets.UTF_8).read(); @@ -171,6 +187,37 @@ public class TestPagination extends ClusterTest { .inputType("json") .build(); + HttpPaginatorConfig nextPagePaginator = HttpPaginatorConfig.builder() + .nextPageParam("nextPage") + .hasMoreParam("has-more") + .method("index") + .build(); + + HttpApiConfig mockJsonConfigWithNextPagePaginator = HttpApiConfig.builder() + .url("http://localhost:8092/json") + .method("get") + .headers(headers) + .requireTail(false) + .dataPath("companies") + .paginator(nextPagePaginator) + .inputType("json") + .build(); + + HttpPaginatorConfig nextPagePaginatorNoHasMore = HttpPaginatorConfig.builder() + .nextPageParam("nextPage") + .method("index") + .build(); + + HttpApiConfig mockJsonConfigWithNextPagePaginatorNoHasMore = HttpApiConfig.builder() + .url("http://localhost:8092/json") + .method("get") + .headers(headers) + .requireTail(false) + .dataPath("companies") + .paginator(nextPagePaginatorNoHasMore) + .inputType("json") + .build(); + HttpPaginatorConfig nestedIndexPaginator = HttpPaginatorConfig.builder() .indexParam("after") .method("index") @@ -274,6 +321,8 @@ public class TestPagination extends ClusterTest { configs.put("csv_paginator", mockCsvConfigWithPaginator); configs.put("json_index", mockJsonConfigWithKeyset); configs.put("json_index_datapath", mockJsonConfigWithKeysetAndDataPath); + configs.put("next_page", mockJsonConfigWithNextPagePaginator); + configs.put("next_page2", mockJsonConfigWithNextPagePaginatorNoHasMore); configs.put("nested_keyset", mockJsonConfigWitNestedKeyset); configs.put("nested_keyset_and_datapath", mockJsonConfigWitNestedKeysetAndDataPath); configs.put("json_paginator", mockJsonConfigWithPaginator); @@ -447,6 +496,75 @@ public class TestPagination extends ClusterTest { assertEquals(4, count); } } + + @Test + public void nextPagePaginationWithURLAndHasMore() throws Exception { + String sql = "SELECT * FROM `local`.`next_page`"; + try (MockWebServer server = startServer()) { + + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE5)); + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE6)); + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE7)); + + List<QueryDataBatch> results = client.queryBuilder() + .sql(sql) + .results(); + + int count = 0; + for(QueryDataBatch b : results){ + count += b.getHeader().getRowCount(); + b.release(); + } + assertEquals(3, results.size()); + assertEquals(6, count); + } + } + + @Test + public void nextPagePaginationWithURLAndNoHasMore() throws Exception { + String sql = "SELECT * FROM `local`.`next_page2`"; + try (MockWebServer server = startServer()) { + + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE8)); + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE9)); + + List<QueryDataBatch> results = client.queryBuilder() + .sql(sql) + .results(); + + int count = 0; + for(QueryDataBatch b : results){ + count += b.getHeader().getRowCount(); + b.release(); + } + assertEquals(2, results.size()); + assertEquals(4, count); + } + } + + @Test + public void nextPagePaginationWithPathAndNoHasMore() throws Exception { + String sql = "SELECT * FROM `local`.`next_page2`"; + try (MockWebServer server = startServer()) { + + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE10)); + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE11)); + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE12)); + + List<QueryDataBatch> results = client.queryBuilder() + .sql(sql) + .results(); + + int count = 0; + for(QueryDataBatch b : results){ + count += b.getHeader().getRowCount(); + b.release(); + } + assertEquals(3, results.size()); + assertEquals(6, count); + } + } + @Test public void jsonQueryWithoutHasMore() throws Exception { String sql = "SELECT * FROM `local`.`nested_keyset` LIMIT 4"; diff --git a/contrib/storage-http/src/test/resources/data/index_response10.json b/contrib/storage-http/src/test/resources/data/index_response10.json new file mode 100644 index 0000000000..9f6d854db5 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response10.json @@ -0,0 +1,14 @@ +{ + "offset": 115279791, + "nextPage": "/json/services/data/v37.0/query/01gp000000AJ51VAAT-800", + "companies": [ + { + "portalId": 62515, + "companyId": 115200636 + }, + { + "portalId": 62515, + "companyId": 115279791 + } + ] +} diff --git a/contrib/storage-http/src/test/resources/data/index_response11.json b/contrib/storage-http/src/test/resources/data/index_response11.json new file mode 100644 index 0000000000..80436f650a --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response11.json @@ -0,0 +1,14 @@ +{ + "offset": 115279791, + "nextPage": "/json/services/data/v37.0/query/01gp000000AJ51VAAT-1600", + "companies": [ + { + "portalId": 62515, + "companyId": 115200636 + }, + { + "portalId": 62515, + "companyId": 115279791 + } + ] +} diff --git a/contrib/storage-http/src/test/resources/data/index_response12.json b/contrib/storage-http/src/test/resources/data/index_response12.json new file mode 100644 index 0000000000..d13e412d37 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response12.json @@ -0,0 +1,14 @@ +{ + "offset": 115279791, + "nextPage": null, + "companies": [ + { + "portalId": 62515, + "companyId": 115200636 + }, + { + "portalId": 62515, + "companyId": 115279791 + } + ] +} diff --git a/contrib/storage-http/src/test/resources/data/index_response5.json b/contrib/storage-http/src/test/resources/data/index_response5.json new file mode 100644 index 0000000000..f99eeef6b0 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response5.json @@ -0,0 +1,15 @@ +{ + "has-more": true, + "offset": 115279791, + "nextPage": "http://localhost:8092/json?page=2", + "companies": [ + { + "portalId": 62515, + "companyId": 115200636 + }, + { + "portalId": 62515, + "companyId": 115279791 + } + ] +} diff --git a/contrib/storage-http/src/test/resources/data/index_response6.json b/contrib/storage-http/src/test/resources/data/index_response6.json new file mode 100644 index 0000000000..72e21df444 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response6.json @@ -0,0 +1,15 @@ +{ + "has-more": true, + "offset": 115279791, + "nextPage": "http://localhost:8092/json?page=3", + "companies": [ + { + "portalId": 625154, + "companyId": 1152006364 + }, + { + "portalId": 625415, + "companyId": 1152379791 + } + ] +} \ No newline at end of file diff --git a/contrib/storage-http/src/test/resources/data/index_response7.json b/contrib/storage-http/src/test/resources/data/index_response7.json new file mode 100644 index 0000000000..419a854040 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response7.json @@ -0,0 +1,15 @@ +{ + "has-more": false, + "offset": 115279791, + "nextPage": null, + "companies": [ + { + "portalId": 625155, + "companyId": 1152005636 + }, + { + "portalId": 625315, + "companyId": 1152479791 + } + ] +} diff --git a/contrib/storage-http/src/test/resources/data/index_response8.json b/contrib/storage-http/src/test/resources/data/index_response8.json new file mode 100644 index 0000000000..67227374eb --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response8.json @@ -0,0 +1,14 @@ +{ + "offset": 115279791, + "nextPage": "http://localhost:8092/json?page=2", + "companies": [ + { + "portalId": 62515, + "companyId": 115200636 + }, + { + "portalId": 62515, + "companyId": 115279791 + } + ] +} diff --git a/contrib/storage-http/src/test/resources/data/index_response9.json b/contrib/storage-http/src/test/resources/data/index_response9.json new file mode 100644 index 0000000000..d13e412d37 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/index_response9.json @@ -0,0 +1,14 @@ +{ + "offset": 115279791, + "nextPage": null, + "companies": [ + { + "portalId": 62515, + "companyId": 115200636 + }, + { + "portalId": 62515, + "companyId": 115279791 + } + ] +}
