Check the incomplete_results field of github query responses

Currently, we don't check the `incomplete_results` field of a github
query response, which is problematic when incomplete query results are
used to split the query ranges: the splitted query ranges will
be very wild.
This commit is contained in:
Haiyan Meng
2020-01-30 11:12:21 -08:00
parent 7a87c84403
commit f5419e9f72
2 changed files with 19 additions and 6 deletions

View File

@@ -379,7 +379,7 @@ func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string,
output chan<- GhResponseInfo) error {
logger.Println("querying: ", query)
response := gcl.parseGithubResponse(query)
response := gcl.parseGithubResponseWithRetry(query)
if response.Error != nil {
return response.Error
@@ -392,7 +392,7 @@ func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string,
case <-ctx.Done():
return nil
default:
response = gcl.parseGithubResponse(response.NextURL)
response = gcl.parseGithubResponseWithRetry(response.NextURL)
if response.Error != nil {
return response.Error
}
@@ -587,6 +587,8 @@ type githubResponse struct {
// This is the number of files that match the query.
TotalCount uint64 `json:"total_count,omitempty"`
IncompleteResults bool `json:"incomplete_results,omitempty"`
// Github representation of a file.
Items []GhFileSpec `json:"items,omitempty"`
}
@@ -629,6 +631,17 @@ func parseGithubLinkFormat(links string) (string, string) {
return next, last
}
func (gcl GhClient) parseGithubResponseWithRetry(getRequest string) GhResponseInfo {
resp := gcl.parseGithubResponse(getRequest)
retries := 0
for resp.Parsed.IncompleteResults {
resp = gcl.parseGithubResponse(getRequest)
retries++
}
log.Printf("The result of query(%s) is complete after %d retries", getRequest, retries)
return resp
}
func (gcl GhClient) parseGithubResponse(getRequest string) GhResponseInfo {
resp, err := gcl.SearchGithubAPI(getRequest)
requestInfo := GhResponseInfo{

View File

@@ -172,7 +172,7 @@ func (c githubCachedSearch) CountResults(lowerBound, upperBound uint64) (uint64,
sizeRange := RangeWithin{lowerBound, upperBound}
rangeRequest := c.RequestString(sizeRange)
result := c.gcl.parseGithubResponse(rangeRequest)
result := c.gcl.parseGithubResponseWithRetry(rangeRequest)
if result.Error != nil {
return count, result.Error
}
@@ -206,7 +206,7 @@ func (c githubCachedSearch) CountResults(lowerBound, upperBound uint64) (uint64,
"Retrying query... current lower bound: %d, got: %d\n",
c.cache[prev], result.Parsed.TotalCount)
result = c.gcl.parseGithubResponse(rangeRequest)
result = c.gcl.parseGithubResponseWithRetry(rangeRequest)
if result.Error != nil {
return count, result.Error
}
@@ -221,8 +221,8 @@ func (c githubCachedSearch) CountResults(lowerBound, upperBound uint64) (uint64,
}
count = result.Parsed.TotalCount
logger.Printf("Caching new query %s, with count %d\n",
sizeRange.RangeString(), count)
logger.Printf("Caching new query %s, with count %d (incomplete_results: %v)\n",
sizeRange.RangeString(), count, result.Parsed.IncompleteResults)
c.cache[upperBound] = count
return count, nil
}