diff --git a/hack/crawl/crawler/crawler_test.go b/hack/crawl/crawler/crawler_test.go index e4fccd5a4..e1f06eb7a 100644 --- a/hack/crawl/crawler/crawler_test.go +++ b/hack/crawl/crawler/crawler_test.go @@ -11,7 +11,7 @@ import ( "testing" "time" - "sigs.k8s.io/kustomize/api/pgmconfig" + "sigs.k8s.io/kustomize/api/konfig" "sigs.k8s.io/kustomize/hack/crawl/doc" ) @@ -37,7 +37,7 @@ func (c testCrawler) FetchDocument(ctx context.Context, d *doc.Document) error { d.DocumentData = c.docs[i].DocumentData return nil } - for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() { + for _, suffix := range konfig.RecognizedKustomizationFileNames() { fmt.Println(d.ID(), "/", suffix) i, ok := c.lukp[d.ID()+"/"+suffix] if !ok { @@ -76,7 +76,7 @@ func (c testCrawler) Crawl(ctx context.Context, for i, d := range c.docs { isResource := true - for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() { + for _, suffix := range konfig.RecognizedKustomizationFileNames() { if strings.HasSuffix(d.FilePath, suffix) { isResource = false break diff --git a/hack/crawl/crawler/git/cloner.go b/hack/crawl/crawler/git/cloner.go new file mode 100644 index 000000000..4bffe251d --- /dev/null +++ b/hack/crawl/crawler/git/cloner.go @@ -0,0 +1,121 @@ +// Copyright 2019 The Kubernetes Authors. +// SPDX-License-Identifier: Apache-2.0 + +package git + +import ( + "bytes" + "log" + "os/exec" + + "github.com/pkg/errors" + "sigs.k8s.io/kustomize/api/filesys" +) + +// Cloner is a function that can clone a git repo. +type Cloner func(repoSpec *RepoSpec) error + +// ClonerUsingGitExec uses a local git install, as opposed +// to say, some remote API, to obtain a local clone of +// a remote repo. +func ClonerUsingGitExec(repoSpec *RepoSpec) error { + gitProgram, err := exec.LookPath("git") + if err != nil { + return errors.Wrap(err, "no 'git' program on path") + } + repoSpec.Dir, err = filesys.NewTmpConfirmedDir() + if err != nil { + return err + } + cmd := exec.Command( + gitProgram, + "init", + repoSpec.Dir.String()) + var out bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &out + err = cmd.Run() + if err != nil { + log.Printf("Error initializing empty git repo: %s", out.String()) + return errors.Wrapf( + err, + "trouble initializing empty git repo in %s", + repoSpec.Dir.String()) + } + + cmd = exec.Command( + gitProgram, + "remote", + "add", + "origin", + repoSpec.CloneSpec()) + cmd.Stdout = &out + cmd.Stderr = &out + cmd.Dir = repoSpec.Dir.String() + err = cmd.Run() + if err != nil { + log.Printf("Error setting git remote: %s", out.String()) + return errors.Wrapf( + err, + "trouble adding remote %s", + repoSpec.CloneSpec()) + } + if repoSpec.Ref == "" { + repoSpec.Ref = "master" + } + cmd = exec.Command( + gitProgram, + "fetch", + "--depth=1", + "origin", + repoSpec.Ref) + cmd.Stdout = &out + cmd.Stderr = &out + cmd.Dir = repoSpec.Dir.String() + err = cmd.Run() + if err != nil { + log.Printf("Error performing git fetch: %s", out.String()) + return errors.Wrapf(err, "trouble fetching %s", repoSpec.Ref) + } + + cmd = exec.Command( + gitProgram, + "reset", + "--hard", + "FETCH_HEAD") + cmd.Stdout = &out + cmd.Stderr = &out + cmd.Dir = repoSpec.Dir.String() + err = cmd.Run() + if err != nil { + log.Printf("Error performing git reset: %s", out.String()) + return errors.Wrapf( + err, "trouble hard resetting empty repository to %s", repoSpec.Ref) + } + + cmd = exec.Command( + gitProgram, + "submodule", + "update", + "--init", + "--recursive") + cmd.Stdout = &out + cmd.Dir = repoSpec.Dir.String() + err = cmd.Run() + if err != nil { + return errors.Wrapf(err, "trouble fetching submodules for %s", repoSpec.Ref) + } + + return nil +} + +// DoNothingCloner returns a cloner that only sets +// cloneDir field in the repoSpec. It's assumed that +// the cloneDir is associated with some fake filesystem +// used in a test. +func DoNothingCloner(dir filesys.ConfirmedDir) Cloner { + return func(rs *RepoSpec) error { + rs.Dir = dir + return nil + } +} diff --git a/hack/crawl/crawler/git/repospec.go b/hack/crawl/crawler/git/repospec.go new file mode 100644 index 000000000..ad8dfc24d --- /dev/null +++ b/hack/crawl/crawler/git/repospec.go @@ -0,0 +1,219 @@ +// Copyright 2019 The Kubernetes Authors. +// SPDX-License-Identifier: Apache-2.0 + +package git + +import ( + "fmt" + "path/filepath" + "regexp" + "strings" + + "sigs.k8s.io/kustomize/api/filesys" +) + +// Used as a temporary non-empty occupant of the cloneDir +// field, as something distinguishable from the empty string +// in various outputs (especially tests). Not using an +// actual directory name here, as that's a temporary directory +// with a unique name that isn't created until clone time. +const notCloned = filesys.ConfirmedDir("/notCloned") + +// RepoSpec specifies a git repository and a branch and path therein. +type RepoSpec struct { + // Raw, original spec, used to look for cycles. + // TODO(monopole): Drop raw, use processed fields instead. + raw string + + // Host, e.g. github.com + Host string + + // orgRepo name (organization/repoName), + // e.g. kubernetes-sigs/kustomize + OrgRepo string + + // Dir where the orgRepo is cloned to. + Dir filesys.ConfirmedDir + + // Relative path in the repository, and in the cloneDir, + // to a Kustomization. + Path string + + // Branch or tag reference. + Ref string + + // e.g. .git or empty in case of _git is present + GitSuffix string +} + +// CloneSpec returns a string suitable for "git clone {spec}". +func (x *RepoSpec) CloneSpec() string { + if isAzureHost(x.Host) || isAWSHost(x.Host) { + return x.Host + x.OrgRepo + } + return x.Host + x.OrgRepo + x.GitSuffix +} + +func (x *RepoSpec) CloneDir() filesys.ConfirmedDir { + return x.Dir +} + +func (x *RepoSpec) Raw() string { + return x.raw +} + +func (x *RepoSpec) AbsPath() string { + return x.Dir.Join(x.Path) +} + +func (x *RepoSpec) Cleaner(fSys filesys.FileSystem) func() error { + return func() error { return fSys.RemoveAll(x.Dir.String()) } +} + +// From strings like git@github.com:someOrg/someRepo.git or +// https://github.com/someOrg/someRepo?ref=someHash, extract +// the parts. +func NewRepoSpecFromUrl(n string) (*RepoSpec, error) { + if filepath.IsAbs(n) { + return nil, fmt.Errorf("uri looks like abs path: %s", n) + } + host, orgRepo, path, gitRef, gitSuffix := parseGitUrl(n) + if orgRepo == "" { + return nil, fmt.Errorf("url lacks orgRepo: %s", n) + } + if host == "" { + return nil, fmt.Errorf("url lacks host: %s", n) + } + return &RepoSpec{ + raw: n, Host: host, OrgRepo: orgRepo, + Dir: notCloned, Path: path, Ref: gitRef, GitSuffix: gitSuffix}, nil +} + +const ( + refQuery = "?ref=" + refQueryRegex = "\\?(version|ref)=" + gitSuffix = ".git" + gitDelimiter = "_git/" +) + +// From strings like git@github.com:someOrg/someRepo.git or +// https://github.com/someOrg/someRepo?ref=someHash, extract +// the parts. +func parseGitUrl(n string) ( + host string, orgRepo string, path string, gitRef string, gitSuff string) { + + if strings.Contains(n, gitDelimiter) { + index := strings.Index(n, gitDelimiter) + // Adding _git/ to host + host = normalizeGitHostSpec(n[:index+len(gitDelimiter)]) + orgRepo = strings.Split(strings.Split(n[index+len(gitDelimiter):], "/")[0], "?")[0] + path, gitRef = peelQuery(n[index+len(gitDelimiter)+len(orgRepo):]) + return + } + host, n = parseHostSpec(n) + gitSuff = gitSuffix + if strings.Contains(n, gitSuffix) { + index := strings.Index(n, gitSuffix) + orgRepo = n[0:index] + n = n[index+len(gitSuffix):] + path, gitRef = peelQuery(n) + return + } + + i := strings.Index(n, "/") + if i < 1 { + return "", "", "", "", "" + } + j := strings.Index(n[i+1:], "/") + if j >= 0 { + j += i + 1 + orgRepo = n[:j] + path, gitRef = peelQuery(n[j+1:]) + return + } + path = "" + orgRepo, gitRef = peelQuery(n) + return host, orgRepo, path, gitRef, gitSuff +} + +func peelQuery(arg string) (string, string) { + + r, _ := regexp.Compile(refQueryRegex) + j := r.FindStringIndex(arg) + + if len(j) > 0 { + return arg[:j[0]], arg[j[0]+len(r.FindString(arg)):] + } + return arg, "" +} + +func parseHostSpec(n string) (string, string) { + var host string + // Start accumulating the host part. + for _, p := range []string{ + // Order matters here. + "git::", "gh:", "ssh://", "https://", "http://", + "git@", "github.com:", "github.com/"} { + if len(p) < len(n) && strings.ToLower(n[:len(p)]) == p { + n = n[len(p):] + host += p + } + } + if host == "git@" { + i := strings.Index(n, "/") + if i > -1 { + host += n[:i+1] + n = n[i+1:] + } else { + i = strings.Index(n, ":") + if i > -1 { + host += n[:i+1] + n = n[i+1:] + } + } + return host, n + } + + // If host is a http(s) or ssh URL, grab the domain part. + for _, p := range []string{ + "ssh://", "https://", "http://"} { + if strings.HasSuffix(host, p) { + i := strings.Index(n, "/") + if i > -1 { + host = host + n[0:i+1] + n = n[i+1:] + } + break + } + } + + return normalizeGitHostSpec(host), n +} + +func normalizeGitHostSpec(host string) string { + s := strings.ToLower(host) + if strings.Contains(s, "github.com") { + if strings.Contains(s, "git@") || strings.Contains(s, "ssh:") { + host = "git@github.com:" + } else { + host = "https://github.com/" + } + } + if strings.HasPrefix(s, "git::") { + host = strings.TrimPrefix(s, "git::") + } + return host +} + +// The format of Azure repo URL is documented +// https://docs.microsoft.com/en-us/azure/devops/repos/git/clone?view=vsts&tabs=visual-studio#clone_url +func isAzureHost(host string) bool { + return strings.Contains(host, "dev.azure.com") || + strings.Contains(host, "visualstudio.com") +} + +// The format of AWS repo URL is documented +// https://docs.aws.amazon.com/codecommit/latest/userguide/regions.html +func isAWSHost(host string) bool { + return strings.Contains(host, "amazonaws.com") +} diff --git a/hack/crawl/crawler/git/repospec_test.go b/hack/crawl/crawler/git/repospec_test.go new file mode 100644 index 000000000..3a1a760f9 --- /dev/null +++ b/hack/crawl/crawler/git/repospec_test.go @@ -0,0 +1,288 @@ +// Copyright 2019 The Kubernetes Authors. +// SPDX-License-Identifier: Apache-2.0 + +package git + +import ( + "fmt" + "path/filepath" + "strings" + "testing" +) + +var orgRepos = []string{"someOrg/someRepo", "kubernetes/website"} + +var pathNames = []string{"README.md", "foo/krusty.txt", ""} + +var hrefArgs = []string{"someBranch", "master", "v0.1.0", ""} + +var hostNamesRawAndNormalized = [][]string{ + {"gh:", "gh:"}, + {"GH:", "gh:"}, + {"gitHub.com/", "https://github.com/"}, + {"github.com:", "https://github.com/"}, + {"http://github.com/", "https://github.com/"}, + {"https://github.com/", "https://github.com/"}, + {"hTTps://github.com/", "https://github.com/"}, + {"https://git-codecommit.us-east-2.amazonaws.com/", "https://git-codecommit.us-east-2.amazonaws.com/"}, + {"https://fabrikops2.visualstudio.com/", "https://fabrikops2.visualstudio.com/"}, + {"ssh://git.example.com:7999/", "ssh://git.example.com:7999/"}, + {"git::https://gitlab.com/", "https://gitlab.com/"}, + {"git::http://git.example.com/", "http://git.example.com/"}, + {"git::https://git.example.com/", "https://git.example.com/"}, + {"git@github.com:", "git@github.com:"}, + {"git@github.com/", "git@github.com:"}, + {"git@gitlab2.sqtools.ru:10022/", "git@gitlab2.sqtools.ru:10022/"}, +} + +func makeUrl(hostFmt, orgRepo, path, href string) string { + if len(path) > 0 { + orgRepo = filepath.Join(orgRepo, path) + } + url := hostFmt + orgRepo + if href != "" { + url += refQuery + href + } + return url +} + +func TestNewRepoSpecFromUrl(t *testing.T) { + var bad [][]string + for _, tuple := range hostNamesRawAndNormalized { + hostRaw := tuple[0] + hostSpec := tuple[1] + for _, orgRepo := range orgRepos { + for _, pathName := range pathNames { + for _, hrefArg := range hrefArgs { + uri := makeUrl(hostRaw, orgRepo, pathName, hrefArg) + rs, err := NewRepoSpecFromUrl(uri) + if err != nil { + t.Errorf("problem %v", err) + } + if rs.Host != hostSpec { + bad = append(bad, []string{"host", uri, rs.Host, hostSpec}) + } + if rs.OrgRepo != orgRepo { + bad = append(bad, []string{"orgRepo", uri, rs.OrgRepo, orgRepo}) + } + if rs.Path != pathName { + bad = append(bad, []string{"path", uri, rs.Path, pathName}) + } + if rs.Ref != hrefArg { + bad = append(bad, []string{"ref", uri, rs.Ref, hrefArg}) + } + } + } + } + } + if len(bad) > 0 { + for _, tuple := range bad { + fmt.Printf("\n"+ + " from uri: %s\n"+ + " actual %4s: %s\n"+ + "expected %4s: %s\n", + tuple[1], tuple[0], tuple[2], tuple[0], tuple[3]) + } + t.Fail() + } +} + +var badData = [][]string{ + {"/tmp", "uri looks like abs path"}, + {"iauhsdiuashduas", "url lacks orgRepo"}, + {"htxxxtp://github.com/", "url lacks host"}, + {"ssh://git.example.com", "url lacks orgRepo"}, + {"git::___", "url lacks orgRepo"}, +} + +func TestNewRepoSpecFromUrlErrors(t *testing.T) { + for _, tuple := range badData { + _, err := NewRepoSpecFromUrl(tuple[0]) + if err == nil { + t.Error("expected error") + } + if !strings.Contains(err.Error(), tuple[1]) { + t.Errorf("unexpected error: %s", err) + } + } +} + +func TestNewRepoSpecFromUrl_CloneSpecs(t *testing.T) { + testcases := []struct { + input string + cloneSpec string + absPath string + ref string + }{ + { + input: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo/somedir", + cloneSpec: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo", + absPath: notCloned.Join("somedir"), + ref: "", + }, + { + input: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo/somedir?ref=testbranch", + cloneSpec: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo", + absPath: notCloned.Join("somedir"), + ref: "testbranch", + }, + { + input: "https://fabrikops2.visualstudio.com/someorg/somerepo?ref=master", + cloneSpec: "https://fabrikops2.visualstudio.com/someorg/somerepo", + absPath: notCloned.String(), + ref: "master", + }, + { + input: "http://github.com/someorg/somerepo/somedir", + cloneSpec: "https://github.com/someorg/somerepo.git", + absPath: notCloned.Join("somedir"), + ref: "", + }, + { + input: "git@github.com:someorg/somerepo/somedir", + cloneSpec: "git@github.com:someorg/somerepo.git", + absPath: notCloned.Join("somedir"), + ref: "", + }, + { + input: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git?ref=v0.1.0", + cloneSpec: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git", + absPath: notCloned.String(), + ref: "v0.1.0", + }, + { + input: "git@bitbucket.org:company/project.git//path?ref=branch", + cloneSpec: "git@bitbucket.org:company/project.git", + absPath: notCloned.Join("path"), + ref: "branch", + }, + { + input: "https://itfs.mycompany.com/collection/project/_git/somerepos", + cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", + absPath: notCloned.String(), + ref: "", + }, + { + input: "https://itfs.mycompany.com/collection/project/_git/somerepos?version=v1.0.0", + cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", + absPath: notCloned.String(), + ref: "v1.0.0", + }, + { + input: "https://itfs.mycompany.com/collection/project/_git/somerepos/somedir?version=v1.0.0", + cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", + absPath: notCloned.Join("somedir"), + ref: "v1.0.0", + }, + { + input: "git::https://itfs.mycompany.com/collection/project/_git/somerepos", + cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", + absPath: notCloned.String(), + ref: "", + }, + } + for _, testcase := range testcases { + rs, err := NewRepoSpecFromUrl(testcase.input) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if rs.CloneSpec() != testcase.cloneSpec { + t.Errorf("CloneSpec expected to be %v, but got %v on %s", + testcase.cloneSpec, rs.CloneSpec(), testcase.input) + } + if rs.AbsPath() != testcase.absPath { + t.Errorf("AbsPath expected to be %v, but got %v on %s", + testcase.absPath, rs.AbsPath(), testcase.input) + } + if rs.Ref != testcase.ref { + t.Errorf("ref expected to be %v, but got %v on %s", + testcase.ref, rs.Ref, testcase.input) + } + } +} + +func TestIsAzureHost(t *testing.T) { + testcases := []struct { + input string + expect bool + }{ + { + input: "https://git-codecommit.us-east-2.amazonaws.com", + expect: false, + }, + { + input: "ssh://git-codecommit.us-east-2.amazonaws.com", + expect: false, + }, + { + input: "https://fabrikops2.visualstudio.com/", + expect: true, + }, + { + input: "https://dev.azure.com/myorg/myproject/", + expect: true, + }, + } + for _, testcase := range testcases { + actual := isAzureHost(testcase.input) + if actual != testcase.expect { + t.Errorf("IsAzureHost: expected %v, but got %v on %s", testcase.expect, actual, testcase.input) + } + } +} + +func TestPeelQuery(t *testing.T) { + testcases := []struct { + input string + expect [2]string + }{ + { + input: "somerepos?ref=v1.0.0", + expect: [2]string{"somerepos", "v1.0.0"}, + }, + { + input: "somerepos?version=master", + expect: [2]string{"somerepos", "master"}, + }, + { + input: "somerepos", + expect: [2]string{"somerepos", ""}, + }, + } + for _, testcase := range testcases { + path, ref := peelQuery(testcase.input) + if path != testcase.expect[0] || ref != testcase.expect[1] { + t.Errorf("peelQuery: expected (%s, %s) got (%s, %s) on %s", testcase.expect[0], testcase.expect[1], path, ref, testcase.input) + } + } +} + +func TestIsAWSHost(t *testing.T) { + testcases := []struct { + input string + expect bool + }{ + { + input: "https://git-codecommit.us-east-2.amazonaws.com", + expect: true, + }, + { + input: "ssh://git-codecommit.us-east-2.amazonaws.com", + expect: true, + }, + { + input: "git@github.com:", + expect: false, + }, + { + input: "http://github.com/", + expect: false, + }, + } + for _, testcase := range testcases { + actual := isAWSHost(testcase.input) + if actual != testcase.expect { + t.Errorf("IsAWSHost: expected %v, but got %v on %s", testcase.expect, actual, testcase.input) + } + } +} diff --git a/hack/crawl/crawler/github/crawler.go b/hack/crawl/crawler/github/crawler.go index a4afa316b..cd0b1c7a5 100644 --- a/hack/crawl/crawler/github/crawler.go +++ b/hack/crawl/crawler/github/crawler.go @@ -16,9 +16,9 @@ import ( "strings" "time" - "sigs.k8s.io/kustomize/api/git" - "sigs.k8s.io/kustomize/api/pgmconfig" + "sigs.k8s.io/kustomize/api/konfig" "sigs.k8s.io/kustomize/hack/crawl/crawler" + "sigs.k8s.io/kustomize/hack/crawl/crawler/git" "sigs.k8s.io/kustomize/hack/crawl/doc" "sigs.k8s.io/kustomize/hack/crawl/httpclient" ) @@ -122,7 +122,7 @@ func (gc githubCrawler) FetchDocument(ctx context.Context, d *doc.Document) erro return nil } - for _, file := range pgmconfig.RecognizedKustomizationFileNames() { + for _, file := range konfig.RecognizedKustomizationFileNames() { resp, err = gc.client.GetRawUserContent(url + "/" + file) err := handle(resp, err, "/"+file) if err != nil { diff --git a/hack/crawl/doc/doc.go b/hack/crawl/doc/doc.go index 5093d30b8..5e59dd543 100644 --- a/hack/crawl/doc/doc.go +++ b/hack/crawl/doc/doc.go @@ -5,7 +5,7 @@ import ( "strings" "sigs.k8s.io/kustomize/api/k8sdeps/kunstruct" - "sigs.k8s.io/kustomize/api/pgmconfig" + "sigs.k8s.io/kustomize/api/konfig" "sigs.k8s.io/kustomize/api/types" "sigs.k8s.io/yaml" ) @@ -46,7 +46,7 @@ type set map[string]struct{} // Implements the CrawlerDocument interface. func (doc *KustomizationDocument) GetResources() ([]*Document, error) { isResource := true - for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() { + for _, suffix := range konfig.RecognizedKustomizationFileNames() { if strings.HasSuffix(doc.FilePath, "/"+suffix) { isResource = false } @@ -85,7 +85,7 @@ func (doc *KustomizationDocument) GetResources() ([]*Document, error) { func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) { data := []byte(doc.DocumentData) - for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() { + for _, suffix := range konfig.RecognizedKustomizationFileNames() { if !strings.HasSuffix(doc.FilePath, "/"+suffix) { continue } diff --git a/hack/crawl/doc/docname.go b/hack/crawl/doc/docname.go index 58d93e4f2..36e7da7e0 100644 --- a/hack/crawl/doc/docname.go +++ b/hack/crawl/doc/docname.go @@ -4,7 +4,7 @@ import ( "path" "time" - "sigs.k8s.io/kustomize/api/git" + "sigs.k8s.io/kustomize/hack/crawl/crawler/git" ) type Document struct {