From ee659a70e409f51ec2d26a7a3589e95b65f07823 Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Thu, 19 Dec 2019 12:30:02 -0800 Subject: [PATCH 1/4] Fix how to construct URLs for finding all the commits related to a github file The existing logic sets the creation time of a github file to the time when the github repository was created. The fix sets the creation time of a github file to the time when the file was created. --- api/internal/crawl/crawler/github/queries.go | 24 +++++++++---------- .../crawl/crawler/github/queries_test.go | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/api/internal/crawl/crawler/github/queries.go b/api/internal/crawl/crawler/github/queries.go index 98eb1b9f3..557e0f371 100644 --- a/api/internal/crawl/crawler/github/queries.go +++ b/api/internal/crawl/crawler/github/queries.go @@ -116,9 +116,11 @@ type RequestConfig struct { // the URL method to get the string value of the URL. See request.CopyWith, to // understand why the request object is useful. func (rc RequestConfig) CodeSearchRequestWith(query Query) request { - req := rc.makeRequest("search/code", query) - req.vals.Set("sort", "indexed") - req.vals.Set("order", "desc") + vals := url.Values{ + "sort": []string{"indexed"}, + "order": []string{"desc"}, + } + req := rc.makeRequest("search/code", query, vals) return req } @@ -126,27 +128,25 @@ func (rc RequestConfig) CodeSearchRequestWith(query Query) request { // query for the Github API to find the dowload information of this filepath. func (rc RequestConfig) ContentsRequest(fullRepoName, path string) string { uri := fmt.Sprintf("repos/%s/contents/%s", fullRepoName, path) - return rc.makeRequest(uri, Query{}).URL() + return rc.makeRequest(uri, Query{}, url.Values{}).URL() } func (rc RequestConfig) ReposRequest(fullRepoName string) string { uri := fmt.Sprintf("repos/%s", fullRepoName) - return rc.makeRequest(uri, Query{}).URL() -} - -func escapeSpace(s string) string { - return strings.Replace(s, " ", "%20", -1) + return rc.makeRequest(uri, Query{}, url.Values{}).URL() } // CommitsRequest given the repo name, and a filepath returns a formatted query // for the Github API to find the commits that affect this file. func (rc RequestConfig) CommitsRequest(fullRepoName, path string) string { uri := fmt.Sprintf("repos/%s/commits", fullRepoName) - return rc.makeRequest(uri, Query{Path(escapeSpace(path))}).URL() + vals := url.Values{ + "path": []string{path}, + } + return rc.makeRequest(uri, Query{}, vals).URL() } -func (rc RequestConfig) makeRequest(path string, query Query) request { - vals := url.Values{} +func (rc RequestConfig) makeRequest(path string, query Query, vals url.Values) request { vals.Set(perPageArg, fmt.Sprint(rc.perPage)) return request{ diff --git a/api/internal/crawl/crawler/github/queries_test.go b/api/internal/crawl/crawler/github/queries_test.go index 07c9036b6..fcab464f0 100644 --- a/api/internal/crawl/crawler/github/queries_test.go +++ b/api/internal/crawl/crawler/github/queries_test.go @@ -101,7 +101,7 @@ func TestGithubSearchQuery(t *testing.T) { "examples/helloWorld/kustomization.yaml?per_page=100", expectedCommitsQuery: "https://api.github.com/repos/kubernetes-sigs/kustomize/commits?" + - "q=path:examples/helloWorld/kustomization.yaml&per_page=100", + "path=examples%2FhelloWorld%2Fkustomization.yaml&per_page=100", }, { rc: RequestConfig{ @@ -121,7 +121,7 @@ func TestGithubSearchQuery(t *testing.T) { "examples%201/helloWorld/kustomization.yaml?per_page=100", expectedCommitsQuery: "https://api.github.com/repos/kubernetes-sigs/kustomize/commits?" + - "q=path:examples%201/helloWorld/kustomization.yaml&per_page=100", + "path=examples+1%2FhelloWorld%2Fkustomization.yaml&per_page=100", }, } From 5f8a8b545bd93235c7fbe6fb497bd7170671441f Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Fri, 3 Jan 2020 14:34:54 -0800 Subject: [PATCH 2/4] Add "kustomization" into the kustomization filenames used by the crawler --- api/internal/crawl/cmd/crawler/crawler.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/internal/crawl/cmd/crawler/crawler.go b/api/internal/crawl/cmd/crawler/crawler.go index fdbafeafd..6831fed87 100644 --- a/api/internal/crawl/cmd/crawler/crawler.go +++ b/api/internal/crawl/cmd/crawler/crawler.go @@ -136,6 +136,7 @@ func main() { github.QueryWith( github.Filename("kustomization.yaml"), github.Filename("kustomization.yml"), + github.Filename("kustomization"), github.User(user)), ) } else if repo != "" { @@ -143,13 +144,15 @@ func main() { github.QueryWith( github.Filename("kustomization.yaml"), github.Filename("kustomization.yml"), + github.Filename("kustomization"), github.Repo(repo)), ) } else { return github.NewCrawler(githubToken, retryCount, clientCache, github.QueryWith( github.Filename("kustomization.yaml"), - github.Filename("kustomization.yml")), + github.Filename("kustomization.yml"), + github.Filename("kustomization")), ) } } From 142c10550050cf854ce2af5a6e178222be8a706d Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Mon, 6 Jan 2020 11:53:27 -0800 Subject: [PATCH 3/4] SKip the empty resource/base item in a kustomization file and set the defaultBranch if needed --- api/internal/crawl/crawler/github/crawler.go | 11 +++++++++++ api/internal/crawl/doc/doc.go | 3 +++ 2 files changed, 14 insertions(+) diff --git a/api/internal/crawl/crawler/github/crawler.go b/api/internal/crawl/crawler/github/crawler.go index fec1628b0..1696a296a 100644 --- a/api/internal/crawl/crawler/github/crawler.go +++ b/api/internal/crawl/crawler/github/crawler.go @@ -97,6 +97,17 @@ func (gc githubCrawler) Crawl( // it will try to add each string in konfig.RecognizedKustomizationFileNames() to // d.FilePath, and try to fetch the document again. func (gc githubCrawler) FetchDocument(_ context.Context, d *doc.Document) error { + // set the default branch if it is empty + if d.DefaultBranch == "" { + url := gc.client.ReposRequest(d.RepositoryFullName()) + defaultBranch, err := gc.client.GetDefaultBranch(url) + if err != nil { + logger.Printf( + "(error: %v) setting default_branch to master\n", err) + defaultBranch = "master" + } + d.DefaultBranch = defaultBranch + } repoURL := d.RepositoryURL + "/" + d.FilePath + "?ref=" + d.DefaultBranch repoSpec, err := git.NewRepoSpecFromUrl(repoURL) if err != nil { diff --git a/api/internal/crawl/doc/doc.go b/api/internal/crawl/doc/doc.go index 953f8d4b4..ec2e031ba 100644 --- a/api/internal/crawl/doc/doc.go +++ b/api/internal/crawl/doc/doc.go @@ -78,6 +78,9 @@ func (doc *KustomizationDocument) GetResources() ([]*Document, error) { res := make([]*Document, 0, len(k.Resources)) for _, r := range k.Resources { + if strings.TrimSpace(r) == "" { + continue + } next, err := doc.Document.FromRelativePath(r) if err != nil { fmt.Printf("GetResources error: %v\n", err) From 745b58b3d0c922fef1634b0d16de551fc6a0ec53 Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Mon, 6 Jan 2020 11:55:22 -0800 Subject: [PATCH 4/4] Check whether a pointer is empty before accessing it to avoid SIGSEGV --- api/internal/crawl/index/elasticsearch.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/internal/crawl/index/elasticsearch.go b/api/internal/crawl/index/elasticsearch.go index b80d7f901..3226b4970 100644 --- a/api/internal/crawl/index/elasticsearch.go +++ b/api/internal/crawl/index/elasticsearch.go @@ -309,9 +309,9 @@ func (idx *index) Exists(id string) (bool, error) { op.WithPretty(), ) - if !res.IsError() { + if res != nil && !res.IsError() { return true, nil - } else if res.StatusCode == 404 { + } else if res != nil && res.StatusCode == 404 { return false, nil } else { return false, idx.responseErrorOrNil(