Check the incomplete_results field of github query responses

Currently, we don't check the `incomplete_results` field of a github
query response, which is problematic when incomplete query results are
used to split the query ranges: the splitted query ranges will
be very wild.
This commit is contained in:
Haiyan Meng
2020-01-30 11:12:21 -08:00
parent 7a87c84403
commit f5419e9f72
2 changed files with 19 additions and 6 deletions

View File

@@ -379,7 +379,7 @@ func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string,
output chan<- GhResponseInfo) error {
logger.Println("querying: ", query)
response := gcl.parseGithubResponse(query)
response := gcl.parseGithubResponseWithRetry(query)
if response.Error != nil {
return response.Error
@@ -392,7 +392,7 @@ func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string,
case <-ctx.Done():
return nil
default:
response = gcl.parseGithubResponse(response.NextURL)
response = gcl.parseGithubResponseWithRetry(response.NextURL)
if response.Error != nil {
return response.Error
}
@@ -587,6 +587,8 @@ type githubResponse struct {
// This is the number of files that match the query.
TotalCount uint64 `json:"total_count,omitempty"`
IncompleteResults bool `json:"incomplete_results,omitempty"`
// Github representation of a file.
Items []GhFileSpec `json:"items,omitempty"`
}
@@ -629,6 +631,17 @@ func parseGithubLinkFormat(links string) (string, string) {
return next, last
}
func (gcl GhClient) parseGithubResponseWithRetry(getRequest string) GhResponseInfo {
resp := gcl.parseGithubResponse(getRequest)
retries := 0
for resp.Parsed.IncompleteResults {
resp = gcl.parseGithubResponse(getRequest)
retries++
}
log.Printf("The result of query(%s) is complete after %d retries", getRequest, retries)
return resp
}
func (gcl GhClient) parseGithubResponse(getRequest string) GhResponseInfo {
resp, err := gcl.SearchGithubAPI(getRequest)
requestInfo := GhResponseInfo{