diff --git a/api/internal/crawl/crawler/github/crawler.go b/api/internal/crawl/crawler/github/crawler.go index 23bcbcd72..a749c6784 100644 --- a/api/internal/crawl/crawler/github/crawler.go +++ b/api/internal/crawl/crawler/github/crawler.go @@ -379,7 +379,7 @@ func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string, output chan<- GhResponseInfo) error { logger.Println("querying: ", query) - response := gcl.parseGithubResponse(query) + response := gcl.parseGithubResponseWithRetry(query) if response.Error != nil { return response.Error @@ -392,7 +392,7 @@ func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string, case <-ctx.Done(): return nil default: - response = gcl.parseGithubResponse(response.NextURL) + response = gcl.parseGithubResponseWithRetry(response.NextURL) if response.Error != nil { return response.Error } @@ -587,6 +587,8 @@ type githubResponse struct { // This is the number of files that match the query. TotalCount uint64 `json:"total_count,omitempty"` + IncompleteResults bool `json:"incomplete_results,omitempty"` + // Github representation of a file. Items []GhFileSpec `json:"items,omitempty"` } @@ -629,6 +631,17 @@ func parseGithubLinkFormat(links string) (string, string) { return next, last } +func (gcl GhClient) parseGithubResponseWithRetry(getRequest string) GhResponseInfo { + resp := gcl.parseGithubResponse(getRequest) + retries := 0 + for resp.Parsed.IncompleteResults { + resp = gcl.parseGithubResponse(getRequest) + retries++ + } + log.Printf("The result of query(%s) is complete after %d retries", getRequest, retries) + return resp +} + func (gcl GhClient) parseGithubResponse(getRequest string) GhResponseInfo { resp, err := gcl.SearchGithubAPI(getRequest) requestInfo := GhResponseInfo{ diff --git a/api/internal/crawl/crawler/github/split_search_ranges.go b/api/internal/crawl/crawler/github/split_search_ranges.go index 81482a1b9..a852330c8 100644 --- a/api/internal/crawl/crawler/github/split_search_ranges.go +++ b/api/internal/crawl/crawler/github/split_search_ranges.go @@ -172,7 +172,7 @@ func (c githubCachedSearch) CountResults(lowerBound, upperBound uint64) (uint64, sizeRange := RangeWithin{lowerBound, upperBound} rangeRequest := c.RequestString(sizeRange) - result := c.gcl.parseGithubResponse(rangeRequest) + result := c.gcl.parseGithubResponseWithRetry(rangeRequest) if result.Error != nil { return count, result.Error } @@ -206,7 +206,7 @@ func (c githubCachedSearch) CountResults(lowerBound, upperBound uint64) (uint64, "Retrying query... current lower bound: %d, got: %d\n", c.cache[prev], result.Parsed.TotalCount) - result = c.gcl.parseGithubResponse(rangeRequest) + result = c.gcl.parseGithubResponseWithRetry(rangeRequest) if result.Error != nil { return count, result.Error } @@ -221,8 +221,8 @@ func (c githubCachedSearch) CountResults(lowerBound, upperBound uint64) (uint64, } count = result.Parsed.TotalCount - logger.Printf("Caching new query %s, with count %d\n", - sizeRange.RangeString(), count) + logger.Printf("Caching new query %s, with count %d (incomplete_results: %v)\n", + sizeRange.RangeString(), count, result.Parsed.IncompleteResults) c.cache[upperBound] = count return count, nil }