From ad400cd13dce88ad3b1e9ec0ba1cddecf662a948 Mon Sep 17 00:00:00 2001 From: jregan Date: Sun, 27 Jan 2019 07:26:37 -0800 Subject: [PATCH] Introduce RepoSpec. This PR closes a TODO in the fileLoader type to replace the string representing a git repo url with a struct called holding validated fields. New struct is called RepoSpec. It holds what we need in coming PRs for remote repo base containment checks, and will also allow the overlay cycle prevention code to take into account different host schemes or branches. The struct is in a new file called repospec.go. The new file is mostly just a code move of url parsing code that was in cloner.go. Git "blame" history preserved through the move. No change in execution (see final commit by itself). --- pkg/fs/confirmeddir.go | 12 ++ pkg/git/cloner.go | 184 +++-------------------- pkg/git/cloner_test.go | 276 ---------------------------------- pkg/git/repospec.go | 107 ++++++------- pkg/git/repospec_test.go | 19 ++- pkg/loader/fileloader.go | 23 +-- pkg/loader/fileloader_test.go | 5 +- 7 files changed, 104 insertions(+), 522 deletions(-) diff --git a/pkg/fs/confirmeddir.go b/pkg/fs/confirmeddir.go index a07335a89..6d2f5c30d 100644 --- a/pkg/fs/confirmeddir.go +++ b/pkg/fs/confirmeddir.go @@ -17,6 +17,7 @@ limitations under the License. package fs import ( + "io/ioutil" "path/filepath" "strings" ) @@ -25,6 +26,17 @@ import ( // that was confirmed to point to an existing directory. type ConfirmedDir string +// Return a temporary dir, else error. +// The directory is cleaned, no symlinks, etc. so its +// returned as a ConfirmedDir. +func NewTmpConfirmedDir() (ConfirmedDir, error) { + n, err := ioutil.TempDir("", "kustomize-") + if err != nil { + return "", err + } + return ConfirmedDir(n), nil +} + // HasPrefix returns true if the directory argument // is a prefix of self (d) from the point of view of // a file system. diff --git a/pkg/git/cloner.go b/pkg/git/cloner.go index d2f296974..27e6ef712 100644 --- a/pkg/git/cloner.go +++ b/pkg/git/cloner.go @@ -18,193 +18,51 @@ package git import ( "bytes" - "io/ioutil" "os/exec" - "path/filepath" - "regexp" - "strings" "github.com/pkg/errors" + "sigs.k8s.io/kustomize/pkg/fs" ) // Cloner is a function that can clone a git repo. -type Cloner func(url string) ( - // Directory where the repo is cloned to. - checkoutDir string, - // Relative path in the checkoutDir to location - // of kustomization file. - pathInCoDir string, - // Any error encountered when cloning. - err error) +type Cloner func(url string) (*RepoSpec, error) -// IsRepoUrl checks if a string is likely a github repo Url. -func IsRepoUrl(arg string) bool { - arg = strings.ToLower(arg) - return !filepath.IsAbs(arg) && - (strings.HasPrefix(arg, "git::") || - strings.HasPrefix(arg, "gh:") || - strings.HasPrefix(arg, "ssh:") || - strings.HasPrefix(arg, "github.com") || - strings.HasPrefix(arg, "git@") || - strings.Index(arg, "github.com/") > -1 || - isAzureHost(arg) || isAWSHost(arg)) -} - -func makeTmpDir() (string, error) { - return ioutil.TempDir("", "kustomize-") -} - -func ClonerUsingGitExec(spec string) ( - checkoutDir string, pathInCoDir string, err error) { +// ClonerUsingGitExec uses a local git install, as opposed +// to say, some remote API, to obtain a local clone of +// a remote repo. +func ClonerUsingGitExec(spec string) (*RepoSpec, error) { gitProgram, err := exec.LookPath("git") if err != nil { - return "", "", errors.Wrap(err, "no 'git' program on path") + return nil, errors.Wrap(err, "no 'git' program on path") } - checkoutDir, err = makeTmpDir() + repoSpec, err := NewRepoSpecFromUrl(spec) if err != nil { - return + return nil, err } - repo, pathInCoDir, gitRef, err := parseUrl(spec) + repoSpec.cloneDir, err = fs.NewTmpConfirmedDir() if err != nil { - return + return nil, err } cmd := exec.Command( gitProgram, "clone", - repo, - checkoutDir) + repoSpec.repo, + repoSpec.cloneDir.String()) var out bytes.Buffer cmd.Stdout = &out err = cmd.Run() if err != nil { - return "", "", - errors.Wrapf(err, "trouble cloning %s", spec) + return nil, errors.Wrapf(err, "trouble cloning %s", spec) } - if gitRef == "" { - return + if repoSpec.ref == "" { + return repoSpec, nil } - cmd = exec.Command(gitProgram, "checkout", gitRef) - cmd.Dir = checkoutDir + cmd = exec.Command(gitProgram, "checkout", repoSpec.ref) + cmd.Dir = repoSpec.cloneDir.String() err = cmd.Run() if err != nil { - return "", "", - errors.Wrapf(err, "trouble checking out href %s", gitRef) + return nil, errors.Wrapf( + err, "trouble checking out href %s", repoSpec.ref) } - return checkoutDir, pathInCoDir, nil -} - -func parseUrl(n string) ( - repo string, path string, gitRef string, err error) { - host, repo, path, gitRef, err := parseGithubUrl(n) - if err != nil { - return - } - if isAzureHost(host) || isAWSHost(host) { - repo = host + repo - return - } - repo = host + repo + ".git" - return -} - -const ( - refQuery = "?ref=" - gitSuffix = ".git" -) - -// From strings like git@github.com:someOrg/someRepo.git or -// https://github.com/someOrg/someRepo?ref=someHash, extract -// the parts. -func parseGithubUrl(n string) ( - host string, repo string, path string, gitRef string, err error) { - host, n = parseHostSpec(n) - host = normalizeGitHostSpec(host) - - if strings.HasSuffix(n, gitSuffix) { - repo = n[0 : len(n)-len(gitSuffix)] - return - } - if strings.Contains(n, gitSuffix) { - index := strings.Index(n, gitSuffix) - repo = n[0:index] - n = n[index+len(gitSuffix):] - path, gitRef = peelQuery(n) - return - } - i := strings.Index(n, "/") - if i < 1 { - return "", "", "", "", errors.New("no separator") - } - j := strings.Index(n[i+1:], "/") - if j >= 0 { - j += i + 1 - repo = n[:j] - path, gitRef = peelQuery(n[j+1:]) - } else { - path = "" - repo, gitRef = peelQuery(n) - } - return -} - -func peelQuery(arg string) (string, string) { - j := strings.Index(arg, refQuery) - if j >= 0 { - return arg[:j], arg[j+len(refQuery):] - } - return arg, "" -} - -func parseHostSpec(n string) (string, string) { - var host string - for _, p := range []string{ - // Order matters here. - "git::", "gh:", "ssh://", "https://", "http://", - "git@", "github.com:", "github.com/"} { - if strings.ToLower(n[:len(p)]) == p { - n = n[len(p):] - host = host + p - } - } - - // If host is a http(s) or ssh URL, grab the domain part. - for _, p := range []string{ - "ssh://", "https://", "http://"} { - if strings.HasSuffix(strings.ToLower(host), p) { - index := regexp.MustCompile("^(.*?)/").FindStringIndex(n) - if len(index) > 0 { - host = host + n[0:index[len(index)-1]] - n = n[index[len(index)-1]:] - } - } - } - return host, n -} - -func normalizeGitHostSpec(host string) string { - s := strings.ToLower(host) - if strings.Contains(s, "github.com") { - if strings.Contains(s, "git@") || strings.Contains(s, "ssh:") { - host = "git@github.com:" - } else { - host = "https://github.com/" - } - } - if strings.HasPrefix(s, "git::") { - host = strings.TrimLeft(s, "git::") - } - return host -} - -// The format of Azure repo URL is documented -// https://docs.microsoft.com/en-us/azure/devops/repos/git/clone?view=vsts&tabs=visual-studio#clone_url -func isAzureHost(host string) bool { - return strings.Contains(host, "dev.azure.com") || - strings.Contains(host, "visualstudio.com") -} - -// The format of AWS repo URL is documented -// https://docs.aws.amazon.com/codecommit/latest/userguide/regions.html -func isAWSHost(host string) bool { - return strings.Contains(host, "amazonaws.com") + return repoSpec, nil } diff --git a/pkg/git/cloner_test.go b/pkg/git/cloner_test.go index 67f5e9c04..d3816aacf 100644 --- a/pkg/git/cloner_test.go +++ b/pkg/git/cloner_test.go @@ -15,279 +15,3 @@ limitations under the License. */ package git - -import ( - "fmt" - "path/filepath" - "testing" -) - -func TestIsRepoURL(t *testing.T) { - - testcases := []struct { - input string - expected bool - }{ - { - input: "https://github.com/org/repo", - expected: true, - }, - { - input: "github.com/org/repo", - expected: true, - }, - { - input: "git@github.com:org/repo", - expected: true, - }, - { - input: "gh:org/repo", - expected: true, - }, - { - input: "git::https://gitlab.com/org/repo", - expected: true, - }, - { - input: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git?ref=v0.1.0", - expected: true, - }, - { - input: "git@bitbucket.org:org/repo.git", - expected: true, - }, - { - input: "git::http://git.example.com/org/repo.git", - expected: true, - }, - { - input: "git::https://git.example.com/org/repo.git", - expected: true, - }, - { - input: "ssh://git.example.com:7999/org/repo.git", - expected: true, - }, - { - input: "/github.com/org/repo", - expected: false, - }, - { - input: "/abs/path/to/file", - expected: false, - }, - { - input: "../relative", - expected: false, - }, - { - input: "foo", - expected: false, - }, - { - input: ".", - expected: false, - }, - { - input: "", - expected: false, - }, - } - for _, tc := range testcases { - actual := IsRepoUrl(tc.input) - if actual != tc.expected { - t.Errorf("unexpected error: unexpected result %t for input %s", actual, tc.input) - } - } -} - -var repoNames = []string{"someOrg/someRepo", "kubernetes/website"} - -var paths = []string{"README.md", "foo/krusty.txt", ""} - -var hrefArgs = []string{"someBranch", ""} - -var extractFmts = map[string]string{ - "gh:%s": "gh:", - "GH:%s": "gh:", - "gitHub.com/%s": "https://github.com/", - "https://github.com/%s": "https://github.com/", - "hTTps://github.com/%s": "https://github.com/", - "git::https://gitlab.com/%s": "https://gitlab.com/", - "github.com:%s": "https://github.com/", - "git::http://git.example.com/%s": "http://git.example.com/", - "git::https://git.example.com/%s": "https://git.example.com/", - "ssh://git.example.com:7999/%s": "ssh://git.example.com:7999/", -} - -func TestParseGithubUrl(t *testing.T) { - for _, repoName := range repoNames { - for _, pathName := range paths { - for extractFmt, hostSpec := range extractFmts { - for _, hrefArg := range hrefArgs { - spec := repoName - if len(pathName) > 0 { - spec = filepath.Join(spec, pathName) - } - input := fmt.Sprintf(extractFmt, spec) - if hrefArg != "" { - input = input + refQuery + hrefArg - } - if !IsRepoUrl(input) { - t.Errorf("Should smell like github arg: %s\n", input) - continue - } - host, repo, path, gitRef, err := parseGithubUrl(input) - if err != nil { - t.Errorf("problem %v", err) - } - if host != hostSpec { - t.Errorf("\n"+ - " from %s\n"+ - " actual host %s\n"+ - "expected host %s\n", input, host, hostSpec) - } - if repo != repoName { - t.Errorf("\n"+ - " from %s\n"+ - " actual Repo %s\n"+ - "expected Repo %s\n", input, repo, repoName) - } - if path != pathName { - t.Errorf("\n"+ - " from %s\n"+ - " actual Path %s\n"+ - "expected Path %s\n", input, path, pathName) - } - if gitRef != hrefArg { - t.Errorf("\n"+ - " from %s\n"+ - " actual Href %s\n"+ - "expected Href %s\n", input, gitRef, hrefArg) - } - } - } - } - } -} - -func TestParseUrl(t *testing.T) { - testcases := []struct { - input string - repo string - path string - ref string - }{ - { - input: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo/somedir", - repo: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo", - path: "somedir", - ref: "", - }, - { - input: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo/somedir?ref=testbranch", - repo: "https://git-codecommit.us-east-2.amazonaws.com/someorg/somerepo", - path: "somedir", - ref: "testbranch", - }, - { - input: "https://fabrikops2.visualstudio.com/someorg/somerepo?ref=master", - repo: "https://fabrikops2.visualstudio.com/someorg/somerepo", - path: "", - ref: "master", - }, - { - input: "http://github.com/someorg/somerepo/somedir", - repo: "https://github.com/someorg/somerepo.git", - path: "somedir", - ref: "", - }, - { - input: "git@github.com:someorg/somerepo/somedir", - repo: "git@github.com:someorg/somerepo.git", - path: "somedir", - ref: "", - }, - { - input: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git?ref=v0.1.0", - repo: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git", - path: "", - ref: "v0.1.0", - }, - } - for _, testcase := range testcases { - repo, path, ref, err := parseUrl(testcase.input) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if repo != testcase.repo { - t.Errorf("repo expected to be %v, but got %v on %s", testcase.repo, repo, testcase.input) - } - if path != testcase.path { - t.Errorf("path expected to be %v, but got %v on %s", testcase.path, path, testcase.input) - } - if ref != testcase.ref { - t.Errorf("ref expected to be %v, but got %v on %s", testcase.ref, ref, testcase.input) - } - } -} - -func TestIsAzureHost(t *testing.T) { - testcases := []struct { - input string - expect bool - }{ - { - input: "https://git-codecommit.us-east-2.amazonaws.com", - expect: false, - }, - { - input: "ssh://git-codecommit.us-east-2.amazonaws.com", - expect: false, - }, - { - input: "https://fabrikops2.visualstudio.com/", - expect: true, - }, - { - input: "https://dev.azure.com/myorg/myproject/", - expect: true, - }, - } - for _, testcase := range testcases { - actual := isAzureHost(testcase.input) - if actual != testcase.expect { - t.Errorf("IsAzureHost: expected %v, but got %v on %s", testcase.expect, actual, testcase.input) - } - } -} - -func TestIsAWSHost(t *testing.T) { - testcases := []struct { - input string - expect bool - }{ - { - input: "https://git-codecommit.us-east-2.amazonaws.com", - expect: true, - }, - { - input: "ssh://git-codecommit.us-east-2.amazonaws.com", - expect: true, - }, - { - input: "git@github.com:", - expect: false, - }, - { - input: "http://github.com/", - expect: false, - }, - } - for _, testcase := range testcases { - actual := isAWSHost(testcase.input) - if actual != testcase.expect { - t.Errorf("IsAWSHost: expected %v, but got %v on %s", testcase.expect, actual, testcase.input) - } - } -} diff --git a/pkg/git/repospec.go b/pkg/git/repospec.go index d2f296974..90b5f38cc 100644 --- a/pkg/git/repospec.go +++ b/pkg/git/repospec.go @@ -17,25 +17,49 @@ limitations under the License. package git import ( - "bytes" - "io/ioutil" - "os/exec" "path/filepath" "regexp" "strings" "github.com/pkg/errors" + "sigs.k8s.io/kustomize/pkg/fs" ) -// Cloner is a function that can clone a git repo. -type Cloner func(url string) ( - // Directory where the repo is cloned to. - checkoutDir string, - // Relative path in the checkoutDir to location - // of kustomization file. - pathInCoDir string, - // Any error encountered when cloning. - err error) +// RepoSpec specifies a git repo and a branch and path therein. +type RepoSpec struct { + // Raw, original spec, used to look for cycles. + // TODO(monopole): Drop raw, use processed fields instead. + raw string + + // Repo, e.g. github.com/kubernetes-sigs/kustomize + repo string + + // ConfirmedDir where the repo is cloned to. + cloneDir fs.ConfirmedDir + + // Relative path in the repo, and in the cloneDir, + // to a Kustomization. + path string + + // Branch or tag reference. + ref string +} + +func (x *RepoSpec) CloneDir() fs.ConfirmedDir { + return x.cloneDir +} + +func (x *RepoSpec) Raw() string { + return x.raw +} + +func (x *RepoSpec) AbsPath() string { + return x.cloneDir.Join(x.path) +} + +func (x *RepoSpec) Cleaner(fSys fs.FileSystem) func() error { + return func() error { return fSys.RemoveAll(x.cloneDir.String()) } +} // IsRepoUrl checks if a string is likely a github repo Url. func IsRepoUrl(arg string) bool { @@ -50,61 +74,22 @@ func IsRepoUrl(arg string) bool { isAzureHost(arg) || isAWSHost(arg)) } -func makeTmpDir() (string, error) { - return ioutil.TempDir("", "kustomize-") -} - -func ClonerUsingGitExec(spec string) ( - checkoutDir string, pathInCoDir string, err error) { - gitProgram, err := exec.LookPath("git") - if err != nil { - return "", "", errors.Wrap(err, "no 'git' program on path") - } - checkoutDir, err = makeTmpDir() - if err != nil { - return - } - repo, pathInCoDir, gitRef, err := parseUrl(spec) - if err != nil { - return - } - cmd := exec.Command( - gitProgram, - "clone", - repo, - checkoutDir) - var out bytes.Buffer - cmd.Stdout = &out - err = cmd.Run() - if err != nil { - return "", "", - errors.Wrapf(err, "trouble cloning %s", spec) - } - if gitRef == "" { - return - } - cmd = exec.Command(gitProgram, "checkout", gitRef) - cmd.Dir = checkoutDir - err = cmd.Run() - if err != nil { - return "", "", - errors.Wrapf(err, "trouble checking out href %s", gitRef) - } - return checkoutDir, pathInCoDir, nil -} - -func parseUrl(n string) ( - repo string, path string, gitRef string, err error) { +func NewRepoSpecFromUrl(n string) (*RepoSpec, error) { host, repo, path, gitRef, err := parseGithubUrl(n) if err != nil { - return + return nil, err } if isAzureHost(host) || isAWSHost(host) { repo = host + repo - return + } else { + repo = host + repo + ".git" } - repo = host + repo + ".git" - return + return &RepoSpec{raw: n, repo: repo, path: path, ref: gitRef}, nil +} + +func NewRepoSpec( + raw string, cloneDir fs.ConfirmedDir, path string) *RepoSpec { + return &RepoSpec{raw: raw, cloneDir: cloneDir, path: path} } const ( diff --git a/pkg/git/repospec_test.go b/pkg/git/repospec_test.go index 67f5e9c04..dd1c76fee 100644 --- a/pkg/git/repospec_test.go +++ b/pkg/git/repospec_test.go @@ -171,7 +171,7 @@ func TestParseGithubUrl(t *testing.T) { } } -func TestParseUrl(t *testing.T) { +func TestNewRepoSpecFromUrl(t *testing.T) { testcases := []struct { input string repo string @@ -216,18 +216,21 @@ func TestParseUrl(t *testing.T) { }, } for _, testcase := range testcases { - repo, path, ref, err := parseUrl(testcase.input) + rs, err := NewRepoSpecFromUrl(testcase.input) if err != nil { t.Errorf("Unexpected error: %v", err) } - if repo != testcase.repo { - t.Errorf("repo expected to be %v, but got %v on %s", testcase.repo, repo, testcase.input) + if rs.repo != testcase.repo { + t.Errorf("repo expected to be %v, but got %v on %s", + testcase.repo, rs.repo, testcase.input) } - if path != testcase.path { - t.Errorf("path expected to be %v, but got %v on %s", testcase.path, path, testcase.input) + if rs.path != testcase.path { + t.Errorf("path expected to be %v, but got %v on %s", + testcase.path, rs.path, testcase.input) } - if ref != testcase.ref { - t.Errorf("ref expected to be %v, but got %v on %s", testcase.ref, ref, testcase.input) + if rs.ref != testcase.ref { + t.Errorf("ref expected to be %v, but got %v on %s", + testcase.ref, rs.ref, testcase.input) } } } diff --git a/pkg/loader/fileloader.go b/pkg/loader/fileloader.go index ba59f39b8..11138eb17 100644 --- a/pkg/loader/fileloader.go +++ b/pkg/loader/fileloader.go @@ -86,9 +86,9 @@ type fileLoader struct { // The Load function reads from this directory, // or directories below it. root fs.ConfirmedDir - // URI, if any, used for a download into root. - // TODO(monopole): use non-string type. - uri string + // If this is non-nil, the files were + // obtained from the given repository. + repoSpec *git.RepoSpec // File system utilities. fSys fs.FileSystem // Used to clone repositories. @@ -179,26 +179,26 @@ func (l *fileLoader) New(path string) (ifc.Loader, error) { func newLoaderAtGitClone( uri string, fSys fs.FileSystem, referrer *fileLoader, cloner git.Cloner) (ifc.Loader, error) { - tmpDirForRepo, pathInRepo, err := cloner(uri) + repoSpec, err := cloner(uri) if err != nil { return nil, err } - root, f, err := fSys.CleanedAbs( - filepath.Join(tmpDirForRepo, pathInRepo)) + root, f, err := fSys.CleanedAbs(repoSpec.AbsPath()) if err != nil { return nil, err } if f != "" { return nil, fmt.Errorf( - "'%s' refers to file '%s'; expecting directory", pathInRepo, f) + "'%s' refers to file '%s'; expecting directory", + repoSpec.AbsPath(), f) } return &fileLoader{ root: root, referrer: referrer, - uri: uri, + repoSpec: repoSpec, fSys: fSys, cloner: cloner, - cleaner: func() error { return fSys.RemoveAll(tmpDirForRepo) }, + cleaner: repoSpec.Cleaner(fSys), }, nil } @@ -221,11 +221,12 @@ func (l *fileLoader) errIfArgEqualOrHigher( // I.e. Allow a distinction between git URI with // path foo and tag bar and a git URI with the same // path but a different tag? +// TODO(monopole): Use parsed data instead of looking at Raw(). func (l *fileLoader) errIfPreviouslySeenUri(uri string) error { - if strings.HasPrefix(l.uri, uri) { + if strings.HasPrefix(l.repoSpec.Raw(), uri) { return fmt.Errorf( "cycle detected: URI '%s' referenced by previous URI '%s'", - uri, l.uri) + uri, l.repoSpec.Raw()) } if l.referrer == nil { return nil diff --git a/pkg/loader/fileloader_test.go b/pkg/loader/fileloader_test.go index a92974b02..bf27c487b 100644 --- a/pkg/loader/fileloader_test.go +++ b/pkg/loader/fileloader_test.go @@ -358,14 +358,13 @@ func makeFakeGitCloner(t *testing.T, fSys fs.FileSystem, coRoot string) git.Clon if !fSys.IsDir(coRoot) { t.Fatalf("expecting a directory at '%s'", coRoot) } - return func(url string) ( - checkoutDir string, pathInCoDir string, err error) { + return func(url string) (*git.RepoSpec, error) { _, path := splitOnNthSlash(url, 3) if !fSys.IsDir(coRoot + "/" + path) { t.Fatalf("expecting a directory at '%s'/'%s'", coRoot, path) } - return coRoot, path, nil + return git.NewRepoSpec(url, fs.ConfirmedDir(coRoot), path), nil } }