From d08140d3f7e503b3d9163e91a011f177efaf1ad0 Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Thu, 14 Nov 2019 13:35:00 -0800 Subject: [PATCH] Remove `api/internal/hack/crawl/crawler/git` dir, use `api/internal/git` instead. --- api/internal/crawl/crawler/git/cloner.go | 121 -------- api/internal/crawl/crawler/git/repospec.go | 219 ------------- .../crawl/crawler/git/repospec_test.go | 288 ------------------ api/internal/crawl/crawler/github/crawler.go | 2 +- api/internal/crawl/doc/docname.go | 2 +- 5 files changed, 2 insertions(+), 630 deletions(-) delete mode 100644 api/internal/crawl/crawler/git/cloner.go delete mode 100644 api/internal/crawl/crawler/git/repospec.go delete mode 100644 api/internal/crawl/crawler/git/repospec_test.go diff --git a/api/internal/crawl/crawler/git/cloner.go b/api/internal/crawl/crawler/git/cloner.go deleted file mode 100644 index 4bffe251d..000000000 --- a/api/internal/crawl/crawler/git/cloner.go +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2019 The Kubernetes Authors. -// SPDX-License-Identifier: Apache-2.0 - -package git - -import ( - "bytes" - "log" - "os/exec" - - "github.com/pkg/errors" - "sigs.k8s.io/kustomize/api/filesys" -) - -// Cloner is a function that can clone a git repo. -type Cloner func(repoSpec *RepoSpec) error - -// ClonerUsingGitExec uses a local git install, as opposed -// to say, some remote API, to obtain a local clone of -// a remote repo. -func ClonerUsingGitExec(repoSpec *RepoSpec) error { - gitProgram, err := exec.LookPath("git") - if err != nil { - return errors.Wrap(err, "no 'git' program on path") - } - repoSpec.Dir, err = filesys.NewTmpConfirmedDir() - if err != nil { - return err - } - cmd := exec.Command( - gitProgram, - "init", - repoSpec.Dir.String()) - var out bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &out - err = cmd.Run() - if err != nil { - log.Printf("Error initializing empty git repo: %s", out.String()) - return errors.Wrapf( - err, - "trouble initializing empty git repo in %s", - repoSpec.Dir.String()) - } - - cmd = exec.Command( - gitProgram, - "remote", - "add", - "origin", - repoSpec.CloneSpec()) - cmd.Stdout = &out - cmd.Stderr = &out - cmd.Dir = repoSpec.Dir.String() - err = cmd.Run() - if err != nil { - log.Printf("Error setting git remote: %s", out.String()) - return errors.Wrapf( - err, - "trouble adding remote %s", - repoSpec.CloneSpec()) - } - if repoSpec.Ref == "" { - repoSpec.Ref = "master" - } - cmd = exec.Command( - gitProgram, - "fetch", - "--depth=1", - "origin", - repoSpec.Ref) - cmd.Stdout = &out - cmd.Stderr = &out - cmd.Dir = repoSpec.Dir.String() - err = cmd.Run() - if err != nil { - log.Printf("Error performing git fetch: %s", out.String()) - return errors.Wrapf(err, "trouble fetching %s", repoSpec.Ref) - } - - cmd = exec.Command( - gitProgram, - "reset", - "--hard", - "FETCH_HEAD") - cmd.Stdout = &out - cmd.Stderr = &out - cmd.Dir = repoSpec.Dir.String() - err = cmd.Run() - if err != nil { - log.Printf("Error performing git reset: %s", out.String()) - return errors.Wrapf( - err, "trouble hard resetting empty repository to %s", repoSpec.Ref) - } - - cmd = exec.Command( - gitProgram, - "submodule", - "update", - "--init", - "--recursive") - cmd.Stdout = &out - cmd.Dir = repoSpec.Dir.String() - err = cmd.Run() - if err != nil { - return errors.Wrapf(err, "trouble fetching submodules for %s", repoSpec.Ref) - } - - return nil -} - -// DoNothingCloner returns a cloner that only sets -// cloneDir field in the repoSpec. It's assumed that -// the cloneDir is associated with some fake filesystem -// used in a test. -func DoNothingCloner(dir filesys.ConfirmedDir) Cloner { - return func(rs *RepoSpec) error { - rs.Dir = dir - return nil - } -} diff --git a/api/internal/crawl/crawler/git/repospec.go b/api/internal/crawl/crawler/git/repospec.go deleted file mode 100644 index ad8dfc24d..000000000 --- a/api/internal/crawl/crawler/git/repospec.go +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2019 The Kubernetes Authors. -// SPDX-License-Identifier: Apache-2.0 - -package git - -import ( - "fmt" - "path/filepath" - "regexp" - "strings" - - "sigs.k8s.io/kustomize/api/filesys" -) - -// Used as a temporary non-empty occupant of the cloneDir -// field, as something distinguishable from the empty string -// in various outputs (especially tests). Not using an -// actual directory name here, as that's a temporary directory -// with a unique name that isn't created until clone time. -const notCloned = filesys.ConfirmedDir("/notCloned") - -// RepoSpec specifies a git repository and a branch and path therein. -type RepoSpec struct { - // Raw, original spec, used to look for cycles. - // TODO(monopole): Drop raw, use processed fields instead. - raw string - - // Host, e.g. github.com - Host string - - // orgRepo name (organization/repoName), - // e.g. kubernetes-sigs/kustomize - OrgRepo string - - // Dir where the orgRepo is cloned to. - Dir filesys.ConfirmedDir - - // Relative path in the repository, and in the cloneDir, - // to a Kustomization. - Path string - - // Branch or tag reference. - Ref string - - // e.g. .git or empty in case of _git is present - GitSuffix string -} - -// CloneSpec returns a string suitable for "git clone {spec}". -func (x *RepoSpec) CloneSpec() string { - if isAzureHost(x.Host) || isAWSHost(x.Host) { - return x.Host + x.OrgRepo - } - return x.Host + x.OrgRepo + x.GitSuffix -} - -func (x *RepoSpec) CloneDir() filesys.ConfirmedDir { - return x.Dir -} - -func (x *RepoSpec) Raw() string { - return x.raw -} - -func (x *RepoSpec) AbsPath() string { - return x.Dir.Join(x.Path) -} - -func (x *RepoSpec) Cleaner(fSys filesys.FileSystem) func() error { - return func() error { return fSys.RemoveAll(x.Dir.String()) } -} - -// From strings like git@github.com:someOrg/someRepo.git or -// https://github.com/someOrg/someRepo?ref=someHash, extract -// the parts. -func NewRepoSpecFromUrl(n string) (*RepoSpec, error) { - if filepath.IsAbs(n) { - return nil, fmt.Errorf("uri looks like abs path: %s", n) - } - host, orgRepo, path, gitRef, gitSuffix := parseGitUrl(n) - if orgRepo == "" { - return nil, fmt.Errorf("url lacks orgRepo: %s", n) - } - if host == "" { - return nil, fmt.Errorf("url lacks host: %s", n) - } - return &RepoSpec{ - raw: n, Host: host, OrgRepo: orgRepo, - Dir: notCloned, Path: path, Ref: gitRef, GitSuffix: gitSuffix}, nil -} - -const ( - refQuery = "?ref=" - refQueryRegex = "\\?(version|ref)=" - gitSuffix = ".git" - gitDelimiter = "_git/" -) - -// From strings like git@github.com:someOrg/someRepo.git or -// https://github.com/someOrg/someRepo?ref=someHash, extract -// the parts. -func parseGitUrl(n string) ( - host string, orgRepo string, path string, gitRef string, gitSuff string) { - - if strings.Contains(n, gitDelimiter) { - index := strings.Index(n, gitDelimiter) - // Adding _git/ to host - host = normalizeGitHostSpec(n[:index+len(gitDelimiter)]) - orgRepo = strings.Split(strings.Split(n[index+len(gitDelimiter):], "/")[0], "?")[0] - path, gitRef = peelQuery(n[index+len(gitDelimiter)+len(orgRepo):]) - return - } - host, n = parseHostSpec(n) - gitSuff = gitSuffix - if strings.Contains(n, gitSuffix) { - index := strings.Index(n, gitSuffix) - orgRepo = n[0:index] - n = n[index+len(gitSuffix):] - path, gitRef = peelQuery(n) - return - } - - i := strings.Index(n, "/") - if i < 1 { - return "", "", "", "", "" - } - j := strings.Index(n[i+1:], "/") - if j >= 0 { - j += i + 1 - orgRepo = n[:j] - path, gitRef = peelQuery(n[j+1:]) - return - } - path = "" - orgRepo, gitRef = peelQuery(n) - return host, orgRepo, path, gitRef, gitSuff -} - -func peelQuery(arg string) (string, string) { - - r, _ := regexp.Compile(refQueryRegex) - j := r.FindStringIndex(arg) - - if len(j) > 0 { - return arg[:j[0]], arg[j[0]+len(r.FindString(arg)):] - } - return arg, "" -} - -func parseHostSpec(n string) (string, string) { - var host string - // Start accumulating the host part. - for _, p := range []string{ - // Order matters here. - "git::", "gh:", "ssh://", "https://", "http://", - "git@", "github.com:", "github.com/"} { - if len(p) < len(n) && strings.ToLower(n[:len(p)]) == p { - n = n[len(p):] - host += p - } - } - if host == "git@" { - i := strings.Index(n, "/") - if i > -1 { - host += n[:i+1] - n = n[i+1:] - } else { - i = strings.Index(n, ":") - if i > -1 { - host += n[:i+1] - n = n[i+1:] - } - } - return host, n - } - - // If host is a http(s) or ssh URL, grab the domain part. - for _, p := range []string{ - "ssh://", "https://", "http://"} { - if strings.HasSuffix(host, p) { - i := strings.Index(n, "/") - if i > -1 { - host = host + n[0:i+1] - n = n[i+1:] - } - break - } - } - - return normalizeGitHostSpec(host), n -} - -func normalizeGitHostSpec(host string) string { - s := strings.ToLower(host) - if strings.Contains(s, "github.com") { - if strings.Contains(s, "git@") || strings.Contains(s, "ssh:") { - host = "git@github.com:" - } else { - host = "https://github.com/" - } - } - if strings.HasPrefix(s, "git::") { - host = strings.TrimPrefix(s, "git::") - } - return host -} - -// The format of Azure repo URL is documented -// https://docs.microsoft.com/en-us/azure/devops/repos/git/clone?view=vsts&tabs=visual-studio#clone_url -func isAzureHost(host string) bool { - return strings.Contains(host, "dev.azure.com") || - strings.Contains(host, "visualstudio.com") -} - -// The format of AWS repo URL is documented -// https://docs.aws.amazon.com/codecommit/latest/userguide/regions.html -func isAWSHost(host string) bool { - return strings.Contains(host, "amazonaws.com") -} diff --git a/api/internal/crawl/crawler/git/repospec_test.go b/api/internal/crawl/crawler/git/repospec_test.go deleted file mode 100644 index 3a1a760f9..000000000 --- a/api/internal/crawl/crawler/git/repospec_test.go +++ /dev/null @@ -1,288 +0,0 @@ -// Copyright 2019 The Kubernetes Authors. -// SPDX-License-Identifier: Apache-2.0 - -package git - -import ( - "fmt" - "path/filepath" - "strings" - "testing" -) - -var orgRepos = []string{"someOrg/someRepo", "kubernetes/website"} - -var pathNames = []string{"README.md", "foo/krusty.txt", ""} - -var hrefArgs = []string{"someBranch", "master", "v0.1.0", ""} - -var hostNamesRawAndNormalized = [][]string{ - {"gh:", "gh:"}, - {"GH:", "gh:"}, - {"gitHub.com/", "https://github.com/"}, - {"github.com:", "https://github.com/"}, - {"http://github.com/", "https://github.com/"}, - {"https://github.com/", "https://github.com/"}, - {"hTTps://github.com/", "https://github.com/"}, - {"https://git-codecommit.us-east-2.amazonaws.com/", "https://git-codecommit.us-east-2.amazonaws.com/"}, - {"https://fabrikops2.visualstudio.com/", "https://fabrikops2.visualstudio.com/"}, - {"ssh://git.example.com:7999/", "ssh://git.example.com:7999/"}, - {"git::https://gitlab.com/", "https://gitlab.com/"}, - {"git::http://git.example.com/", "http://git.example.com/"}, - {"git::https://git.example.com/", "https://git.example.com/"}, - {"git@github.com:", "git@github.com:"}, - {"git@github.com/", "git@github.com:"}, - {"git@gitlab2.sqtools.ru:10022/", "git@gitlab2.sqtools.ru:10022/"}, -} - -func makeUrl(hostFmt, orgRepo, path, href string) string { - if len(path) > 0 { - orgRepo = filepath.Join(orgRepo, path) - } - url := hostFmt + orgRepo - if href != "" { - url += refQuery + href - } - return url -} - -func TestNewRepoSpecFromUrl(t *testing.T) { - var bad [][]string - for _, tuple := range hostNamesRawAndNormalized { - hostRaw := tuple[0] - hostSpec := tuple[1] - for _, orgRepo := range orgRepos { - for _, pathName := range pathNames { - for _, hrefArg := range hrefArgs { - uri := makeUrl(hostRaw, orgRepo, pathName, hrefArg) - rs, err := NewRepoSpecFromUrl(uri) - if err != nil { - t.Errorf("problem %v", err) - } - if rs.Host != hostSpec { - bad = append(bad, []string{"host", uri, rs.Host, hostSpec}) - } - if rs.OrgRepo != orgRepo { - bad = append(bad, []string{"orgRepo", uri, rs.OrgRepo, orgRepo}) - } - if rs.Path != pathName { - bad = append(bad, []string{"path", uri, rs.Path, pathName}) - } - if rs.Ref != hrefArg { - bad = append(bad, []string{"ref", uri, rs.Ref, hrefArg}) - } - } - } - } - } - if len(bad) > 0 { - for _, tuple := range bad { - fmt.Printf("\n"+ - " from uri: %s\n"+ - " actual %4s: %s\n"+ - "expected %4s: %s\n", - tuple[1], tuple[0], tuple[2], tuple[0], tuple[3]) - } - t.Fail() - } -} - -var badData = [][]string{ - {"/tmp", "uri looks like abs path"}, - {"iauhsdiuashduas", "url lacks orgRepo"}, - {"htxxxtp://github.com/", "url lacks host"}, - {"ssh://git.example.com", "url lacks orgRepo"}, - {"git::___", "url lacks orgRepo"}, -} - -func TestNewRepoSpecFromUrlErrors(t *testing.T) { - for _, tuple := range badData { - _, err := NewRepoSpecFromUrl(tuple[0]) - if err == nil { - t.Error("expected error") - } - if !strings.Contains(err.Error(), tuple[1]) { - t.Errorf("unexpected error: %s", err) - } - } -} - -func TestNewRepoSpecFromUrl_CloneSpecs(t *testing.T) { - testcases := []struct { - input string - cloneSpec string - absPath string - ref string - }{ - { - input: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo/somedir", - cloneSpec: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo", - absPath: notCloned.Join("somedir"), - ref: "", - }, - { - input: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo/somedir?ref=testbranch", - cloneSpec: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo", - absPath: notCloned.Join("somedir"), - ref: "testbranch", - }, - { - input: "https://fabrikops2.visualstudio.com/someorg/somerepo?ref=master", - cloneSpec: "https://fabrikops2.visualstudio.com/someorg/somerepo", - absPath: notCloned.String(), - ref: "master", - }, - { - input: "http://github.com/someorg/somerepo/somedir", - cloneSpec: "https://github.com/someorg/somerepo.git", - absPath: notCloned.Join("somedir"), - ref: "", - }, - { - input: "git@github.com:someorg/somerepo/somedir", - cloneSpec: "git@github.com:someorg/somerepo.git", - absPath: notCloned.Join("somedir"), - ref: "", - }, - { - input: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git?ref=v0.1.0", - cloneSpec: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git", - absPath: notCloned.String(), - ref: "v0.1.0", - }, - { - input: "git@bitbucket.org:company/project.git//path?ref=branch", - cloneSpec: "git@bitbucket.org:company/project.git", - absPath: notCloned.Join("path"), - ref: "branch", - }, - { - input: "https://itfs.mycompany.com/collection/project/_git/somerepos", - cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", - absPath: notCloned.String(), - ref: "", - }, - { - input: "https://itfs.mycompany.com/collection/project/_git/somerepos?version=v1.0.0", - cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", - absPath: notCloned.String(), - ref: "v1.0.0", - }, - { - input: "https://itfs.mycompany.com/collection/project/_git/somerepos/somedir?version=v1.0.0", - cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", - absPath: notCloned.Join("somedir"), - ref: "v1.0.0", - }, - { - input: "git::https://itfs.mycompany.com/collection/project/_git/somerepos", - cloneSpec: "https://itfs.mycompany.com/collection/project/_git/somerepos", - absPath: notCloned.String(), - ref: "", - }, - } - for _, testcase := range testcases { - rs, err := NewRepoSpecFromUrl(testcase.input) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if rs.CloneSpec() != testcase.cloneSpec { - t.Errorf("CloneSpec expected to be %v, but got %v on %s", - testcase.cloneSpec, rs.CloneSpec(), testcase.input) - } - if rs.AbsPath() != testcase.absPath { - t.Errorf("AbsPath expected to be %v, but got %v on %s", - testcase.absPath, rs.AbsPath(), testcase.input) - } - if rs.Ref != testcase.ref { - t.Errorf("ref expected to be %v, but got %v on %s", - testcase.ref, rs.Ref, testcase.input) - } - } -} - -func TestIsAzureHost(t *testing.T) { - testcases := []struct { - input string - expect bool - }{ - { - input: "https://git-codecommit.us-east-2.amazonaws.com", - expect: false, - }, - { - input: "ssh://git-codecommit.us-east-2.amazonaws.com", - expect: false, - }, - { - input: "https://fabrikops2.visualstudio.com/", - expect: true, - }, - { - input: "https://dev.azure.com/myorg/myproject/", - expect: true, - }, - } - for _, testcase := range testcases { - actual := isAzureHost(testcase.input) - if actual != testcase.expect { - t.Errorf("IsAzureHost: expected %v, but got %v on %s", testcase.expect, actual, testcase.input) - } - } -} - -func TestPeelQuery(t *testing.T) { - testcases := []struct { - input string - expect [2]string - }{ - { - input: "somerepos?ref=v1.0.0", - expect: [2]string{"somerepos", "v1.0.0"}, - }, - { - input: "somerepos?version=master", - expect: [2]string{"somerepos", "master"}, - }, - { - input: "somerepos", - expect: [2]string{"somerepos", ""}, - }, - } - for _, testcase := range testcases { - path, ref := peelQuery(testcase.input) - if path != testcase.expect[0] || ref != testcase.expect[1] { - t.Errorf("peelQuery: expected (%s, %s) got (%s, %s) on %s", testcase.expect[0], testcase.expect[1], path, ref, testcase.input) - } - } -} - -func TestIsAWSHost(t *testing.T) { - testcases := []struct { - input string - expect bool - }{ - { - input: "https://git-codecommit.us-east-2.amazonaws.com", - expect: true, - }, - { - input: "ssh://git-codecommit.us-east-2.amazonaws.com", - expect: true, - }, - { - input: "git@github.com:", - expect: false, - }, - { - input: "http://github.com/", - expect: false, - }, - } - for _, testcase := range testcases { - actual := isAWSHost(testcase.input) - if actual != testcase.expect { - t.Errorf("IsAWSHost: expected %v, but got %v on %s", testcase.expect, actual, testcase.input) - } - } -} diff --git a/api/internal/crawl/crawler/github/crawler.go b/api/internal/crawl/crawler/github/crawler.go index cd0b1c7a5..3973cdf84 100644 --- a/api/internal/crawl/crawler/github/crawler.go +++ b/api/internal/crawl/crawler/github/crawler.go @@ -16,9 +16,9 @@ import ( "strings" "time" + "sigs.k8s.io/kustomize/api/internal/git" "sigs.k8s.io/kustomize/api/konfig" "sigs.k8s.io/kustomize/hack/crawl/crawler" - "sigs.k8s.io/kustomize/hack/crawl/crawler/git" "sigs.k8s.io/kustomize/hack/crawl/doc" "sigs.k8s.io/kustomize/hack/crawl/httpclient" ) diff --git a/api/internal/crawl/doc/docname.go b/api/internal/crawl/doc/docname.go index 36e7da7e0..44fef0236 100644 --- a/api/internal/crawl/doc/docname.go +++ b/api/internal/crawl/doc/docname.go @@ -4,7 +4,7 @@ import ( "path" "time" - "sigs.k8s.io/kustomize/hack/crawl/crawler/git" + "sigs.k8s.io/kustomize/api/internal/git" ) type Document struct {