This is an automated email from the ASF dual-hosted git repository.

lynwee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 4305b78af fix: gitlab wont return total page if total number of 
records exceeded 10k (#8201)
4305b78af is described below

commit 4305b78afacf2a6cafe7a90a698b224c345e7451
Author: Klesh Wong <[email protected]>
AuthorDate: Mon Dec 2 15:58:59 2024 +0800

    fix: gitlab wont return total page if total number of records exceeded 10k 
(#8201)
---
 backend/helpers/pluginhelper/api/api_collector.go | 15 +++++++++++++--
 backend/plugins/gitlab/tasks/shared.go            | 22 +++++++++++-----------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/backend/helpers/pluginhelper/api/api_collector.go 
b/backend/helpers/pluginhelper/api/api_collector.go
index 440113269..c34fb7d02 100644
--- a/backend/helpers/pluginhelper/api/api_collector.go
+++ b/backend/helpers/pluginhelper/api/api_collector.go
@@ -56,6 +56,8 @@ type RequestData struct {
 // AsyncResponseHandler FIXME ...
 type AsyncResponseHandler func(res *http.Response) error
 
+var ErrUndetermined = errors.BadInput.New("undetermined")
+
 // ApiCollectorArgs FIXME ...
 type ApiCollectorArgs struct {
        RawDataSubTaskArgs
@@ -259,7 +261,7 @@ func (collector *ApiCollector) exec(input interface{}) {
                collector.fetchPagesDetermined(reqData)
                // fetch pages in parallel without number of total pages
        } else {
-               collector.fetchPagesUndetermined(reqData)
+               collector.fetchPagesUndetermined(reqData, false)
        }
 }
 
@@ -296,6 +298,12 @@ func (collector *ApiCollector) 
fetchPagesDetermined(reqData *RequestData) {
        collector.fetchAsync(reqData, func(count int, body []byte, res 
*http.Response) errors.Error {
                totalPages, err := collector.args.GetTotalPages(res, 
collector.args)
                if err != nil {
+                       // Some APIs might or might not return total 
pages/records based on total number of records
+                       // check 
https://github.com/apache/incubator-devlake/issues/8187 for details
+                       if err == ErrUndetermined {
+                               collector.fetchPagesUndetermined(reqData, true)
+                               return nil
+                       }
                        return errors.Default.Wrap(err, "fetchPagesDetermined 
get totalPages failed")
                }
                // spawn a none blocking go routine to fetch other pages
@@ -319,7 +327,7 @@ func (collector *ApiCollector) fetchPagesDetermined(reqData 
*RequestData) {
 }
 
 // fetchPagesUndetermined fetches data of all pages for APIs that do NOT 
return paging information
-func (collector *ApiCollector) fetchPagesUndetermined(reqData *RequestData) {
+func (collector *ApiCollector) fetchPagesUndetermined(reqData *RequestData, 
skipFirstPage bool) {
        //logger := collector.args.Ctx.GetLogger()
        //logger.Debug("fetch all pages in parallel with specified concurrency: 
%d", collector.args.Concurrency)
        // if api doesn't return total number of pages, employ a step 
concurrent technique
@@ -351,6 +359,9 @@ func (collector *ApiCollector) 
fetchPagesUndetermined(reqData *RequestData) {
                        Input:     reqData.Input,
                        InputJSON: reqData.InputJSON,
                }
+               if skipFirstPage && reqDataCopy.Pager.Page == 1 {
+                       reqDataCopy.Pager.Page += concurrency
+               }
                var collect func() errors.Error
                collect = func() errors.Error {
                        collector.fetchAsync(&reqDataCopy, func(count int, body 
[]byte, res *http.Response) errors.Error {
diff --git a/backend/plugins/gitlab/tasks/shared.go 
b/backend/plugins/gitlab/tasks/shared.go
index 59ce8b921..e8f8c650b 100644
--- a/backend/plugins/gitlab/tasks/shared.go
+++ b/backend/plugins/gitlab/tasks/shared.go
@@ -33,7 +33,7 @@ import (
        "github.com/apache/incubator-devlake/plugins/gitlab/models"
 
        "github.com/apache/incubator-devlake/core/plugin"
-       helper "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
+       "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
 )
 
 const (
@@ -62,10 +62,10 @@ type GitlabInput struct {
        Iid      int
 }
 
-func GetTotalPagesFromResponse(res *http.Response, args 
*helper.ApiCollectorArgs) (int, errors.Error) {
+func GetTotalPagesFromResponse(res *http.Response, args *api.ApiCollectorArgs) 
(int, errors.Error) {
        total := res.Header.Get("X-Total-Pages")
        if total == "" {
-               return 0, nil
+               return 0, api.ErrUndetermined
        }
        totalInt, err := strconv.Atoi(total)
        if err != nil {
@@ -140,13 +140,13 @@ func GetRawMessageUpdatedAtAfter(timeAfter *time.Time) 
func(res *http.Response)
                        }
                }
                if isFinish {
-                       return filterRawMessages, helper.ErrFinishCollect
+                       return filterRawMessages, api.ErrFinishCollect
                }
                return filterRawMessages, nil
        }
 }
 
-func GetQuery(reqData *helper.RequestData) (url.Values, errors.Error) {
+func GetQuery(reqData *api.RequestData) (url.Values, errors.Error) {
        query := url.Values{}
        query.Set("with_stats", "true")
        query.Set("sort", "asc")
@@ -155,9 +155,9 @@ func GetQuery(reqData *helper.RequestData) (url.Values, 
errors.Error) {
        return query, nil
 }
 
-func CreateRawDataSubTaskArgs(subtaskCtx plugin.SubTaskContext, Table string) 
(*helper.RawDataSubTaskArgs, *GitlabTaskData) {
+func CreateRawDataSubTaskArgs(subtaskCtx plugin.SubTaskContext, Table string) 
(*api.RawDataSubTaskArgs, *GitlabTaskData) {
        data := subtaskCtx.GetData().(*GitlabTaskData)
-       rawDataSubTaskArgs := &helper.RawDataSubTaskArgs{
+       rawDataSubTaskArgs := &api.RawDataSubTaskArgs{
                Ctx: subtaskCtx,
                Params: models.GitlabApiParams{
                        ProjectId:    data.Options.ProjectId,
@@ -168,9 +168,9 @@ func CreateRawDataSubTaskArgs(subtaskCtx 
plugin.SubTaskContext, Table string) (*
        return rawDataSubTaskArgs, data
 }
 
-func CreateSubtaskCommonArgs(subtaskCtx plugin.SubTaskContext, table string) 
(*helper.SubtaskCommonArgs, *GitlabTaskData) {
+func CreateSubtaskCommonArgs(subtaskCtx plugin.SubTaskContext, table string) 
(*api.SubtaskCommonArgs, *GitlabTaskData) {
        data := subtaskCtx.GetData().(*GitlabTaskData)
-       args := &helper.SubtaskCommonArgs{
+       args := &api.SubtaskCommonArgs{
                SubTaskContext: subtaskCtx,
                Table:          table,
                Params: models.GitlabApiParams{
@@ -181,7 +181,7 @@ func CreateSubtaskCommonArgs(subtaskCtx 
plugin.SubTaskContext, table string) (*h
        return args, data
 }
 
-func GetMergeRequestsIterator(taskCtx plugin.SubTaskContext, apiCollector 
*helper.StatefulApiCollector) (*helper.DalCursorIterator, errors.Error) {
+func GetMergeRequestsIterator(taskCtx plugin.SubTaskContext, apiCollector 
*api.StatefulApiCollector) (*api.DalCursorIterator, errors.Error) {
        db := taskCtx.GetDal()
        data := taskCtx.GetData().(*GitlabTaskData)
        clauses := []dal.Clause{
@@ -204,5 +204,5 @@ func GetMergeRequestsIterator(taskCtx 
plugin.SubTaskContext, apiCollector *helpe
                return nil, err
        }
 
-       return helper.NewDalCursorIterator(db, cursor, 
reflect.TypeOf(GitlabInput{}))
+       return api.NewDalCursorIterator(db, cursor, 
reflect.TypeOf(GitlabInput{}))
 }

Reply via email to