diff --git a/api/internal/git/repospec.go b/api/internal/git/repospec.go index a1c29e6c7..20ca82ab5 100644 --- a/api/internal/git/repospec.go +++ b/api/internal/git/repospec.go @@ -110,12 +110,20 @@ const ( // the parts. func parseGitURL(n string) ( host string, orgRepo string, path string, gitRef string, gitSubmodules bool, gitSuff string, gitTimeout time.Duration) { + // parse query first + // safe because according to rfc3986: ? only allowed in query + // and not recognized %-encoded + beforeQuery, query, _ := strings.Cut(n, "?") + n = beforeQuery + // if no query, defaults returned + gitRef, gitTimeout, gitSubmodules = parseQuery(query) + if strings.Contains(n, gitDelimiter) { index := strings.Index(n, gitDelimiter) // Adding _git/ to host host = normalizeGitHostSpec(n[:index+len(gitDelimiter)]) - orgRepo = strings.Split(strings.Split(n[index+len(gitDelimiter):], "/")[0], "?")[0] - path, gitRef, gitTimeout, gitSubmodules = peelQuery(n[index+len(gitDelimiter)+len(orgRepo):]) + orgRepo = strings.Split(n[index+len(gitDelimiter):], "/")[0] + path = parsePath(n[index+len(gitDelimiter)+len(orgRepo):]) return } host, n = parseHostSpec(n) @@ -131,7 +139,7 @@ func parseGitURL(n string) ( if len(n) > 0 && n[0] == '/' { n = n[1:] } - path, gitRef, gitTimeout, gitSubmodules = peelQuery(n) + path = parsePath(n) return } @@ -139,29 +147,27 @@ func parseGitURL(n string) ( if idx := strings.Index(n, "//"); idx > 0 { orgRepo = n[:idx] n = n[idx+2:] - path, gitRef, gitTimeout, gitSubmodules = peelQuery(n) + path = parsePath(n) return } - path, gitRef, gitTimeout, gitSubmodules = peelQuery(n) - orgRepo = path - path = "" + orgRepo = parsePath(n) return } i := strings.Index(n, "/") if i < 1 { - path, gitRef, gitTimeout, gitSubmodules = peelQuery(n) + path = parsePath(n) return } j := strings.Index(n[i+1:], "/") if j >= 0 { j += i + 1 orgRepo = n[:j] - path, gitRef, gitTimeout, gitSubmodules = peelQuery(n[j+1:]) + path = parsePath(n[j+1:]) return } path = "" - orgRepo, gitRef, gitTimeout, gitSubmodules = peelQuery(n) + orgRepo = parsePath(n) return host, orgRepo, path, gitRef, gitSubmodules, gitSuff, gitTimeout } @@ -171,14 +177,12 @@ const defaultSubmodules = true // Arbitrary, but non-infinite, timeout for running commands. const defaultTimeout = 27 * time.Second -func peelQuery(arg string) (string, string, time.Duration, bool) { - // Parse the given arg into a URL. In the event of a parse failure, return - // our defaults. - parsed, err := url.Parse(arg) +func parseQuery(query string) (string, time.Duration, bool) { + values, err := url.ParseQuery(query) + // in event of parse failure, return defaults if err != nil { - return arg, "", defaultTimeout, defaultSubmodules + return "", defaultTimeout, defaultSubmodules } - values := parsed.Query() // ref is the desired git ref to target. Can be specified by in a git URL // with ?ref= or ?version=, although ref takes precedence. @@ -209,7 +213,16 @@ func peelQuery(arg string) (string, string, time.Duration, bool) { } } - return parsed.Path, ref, duration, submodules + return ref, duration, submodules +} + +func parsePath(n string) string { + parsed, err := url.Parse(n) + // TODO(annasong): decide how to handle error, i.e. return error, empty string, etc. + if err != nil { + return n + } + return parsed.Path } func parseHostSpec(n string) (string, string) { diff --git a/api/internal/git/repospec_test.go b/api/internal/git/repospec_test.go index ad63b4aa0..220b326f1 100644 --- a/api/internal/git/repospec_test.go +++ b/api/internal/git/repospec_test.go @@ -6,7 +6,6 @@ package git import ( "fmt" "path/filepath" - "strings" "testing" "time" @@ -38,7 +37,7 @@ func TestNewRepoSpecFromUrl_Permute(t *testing.T) { } var orgRepos = []string{"someOrg/someRepo", "kubernetes/website"} var pathNames = []string{"README.md", "foo/krusty.txt", ""} - var refArgs = []string{"someBranch", "master", "v0.1.0", ""} + var refArgs = []string{"group/version", "someBranch", "master", "v0.1.0", ""} makeURL := func(hostFmt, orgRepo, path, ref string) string { if len(path) > 0 { @@ -51,64 +50,65 @@ func TestNewRepoSpecFromUrl_Permute(t *testing.T) { return url } - var bad [][]string + var i int for _, v := range schemeAuthority { hostRaw := v.raw hostSpec := v.normalized for _, orgRepo := range orgRepos { for _, pathName := range pathNames { for _, hrefArg := range refArgs { - uri := makeURL(hostRaw, orgRepo, pathName, hrefArg) - rs, err := NewRepoSpecFromURL(uri) - if err != nil { - t.Errorf("problem %v", err) - continue - } - if rs.Host != hostSpec { - bad = append(bad, []string{"host", uri, rs.Host, hostSpec}) - } - if rs.OrgRepo != orgRepo { - bad = append(bad, []string{"orgRepo", uri, rs.OrgRepo, orgRepo}) - } - if rs.Path != pathName { - bad = append(bad, []string{"path", uri, rs.Path, pathName}) - } - if rs.Ref != hrefArg { - bad = append(bad, []string{"ref", uri, rs.Ref, hrefArg}) - } + t.Run(fmt.Sprintf("t%d", i), func(t *testing.T) { + uri := makeURL(hostRaw, orgRepo, pathName, hrefArg) + rs, err := NewRepoSpecFromURL(uri) + require.NoErrorf(t, err, "unexpected error creating RepoSpec for uri %s", uri) + assert.Equal(t, hostSpec, rs.Host, "unexpected host for uri %s", uri) + assert.Equal(t, orgRepo, rs.OrgRepo, "unexpected orgRepo for uri %s", uri) + assert.Equal(t, pathName, rs.Path, "unexpected path for uri %s", uri) + assert.Equal(t, hrefArg, rs.Ref, "unexpected ref for uri %s", uri) + }) + i++ } } } } - if len(bad) > 0 { - for _, tuple := range bad { - fmt.Printf("\n"+ - " from uri: %s\n"+ - " actual %4s: %s\n"+ - "expected %4s: %s\n", - tuple[1], tuple[0], tuple[2], tuple[0], tuple[3]) - } - t.Fail() - } } func TestNewRepoSpecFromUrlErrors(t *testing.T) { - var badData = []struct{ url, error string }{ - {"/tmp", "uri looks like abs path"}, - {"iauhsdiuashduas", "url lacks orgRepo"}, - {"htxxxtp://github.com/", "url lacks host"}, - {"ssh://git.example.com", "url lacks orgRepo"}, - {"git::___", "url lacks orgRepo"}, + badData := map[string]struct { + url, error string + }{ + "absolute_path": { + "/tmp", + "uri looks like abs path", + }, + "no_slashes": { + "iauhsdiuashduas", + "url lacks orgRepo", + }, + "bad_scheme": { + "htxxxtp://github.com/", + "url lacks host", + }, + "no_org_repo": { + "ssh://git.example.com", + "url lacks orgRepo", + }, + "hashicorp_git_only": { + "git::___", + "url lacks orgRepo", + }, + "query_after_host": { + "https://host?ref=group/version/minor_version", + "url lacks orgRepo", + }, } - for _, testCase := range badData { - _, err := NewRepoSpecFromURL(testCase.url) - if err == nil { - t.Error("expected error") - } - if !strings.Contains(err.Error(), testCase.error) { - t.Errorf("unexpected error: %s", err) - } + for name, testCase := range badData { + t.Run(name, func(t *testing.T) { + _, err := NewRepoSpecFromURL(testCase.url) + require.Error(t, err) + require.Contains(t, err.Error(), testCase.error) + }) } } @@ -407,6 +407,54 @@ func TestNewRepoSpecFromUrl_Smoke(t *testing.T) { GitSuffix: ".git", }, }, + { + name: "query_slash", + input: "https://authority/org/repo?ref=group/version", + cloneSpec: "https://authority/org/repo.git", + absPath: notCloned.String(), + repoSpec: RepoSpec{ + Host: "https://authority/", + OrgRepo: "org/repo", + Ref: "group/version", + GitSuffix: ".git", + }, + }, + { + name: "query_git_delimiter", + input: "https://authority/org/repo/?ref=includes_git/for_some_reason", + cloneSpec: "https://authority/org/repo.git", + absPath: notCloned.String(), + repoSpec: RepoSpec{ + Host: "https://authority/", + OrgRepo: "org/repo", + Ref: "includes_git/for_some_reason", + GitSuffix: ".git", + }, + }, + { + name: "query_git_suffix", + input: "https://authority/org/repo/?ref=includes.git/for_some_reason", + cloneSpec: "https://authority/org/repo.git", + absPath: notCloned.String(), + repoSpec: RepoSpec{ + Host: "https://authority/", + OrgRepo: "org/repo", + Ref: "includes.git/for_some_reason", + GitSuffix: ".git", + }, + }, + { + name: "non_parsable_path", + input: "https://authority/org/repo/%-invalid-uri-so-not-parsable-by-net/url.Parse", + cloneSpec: "https://authority/org/repo.git", + absPath: notCloned.Join("%-invalid-uri-so-not-parsable-by-net/url.Parse"), + repoSpec: RepoSpec{ + Host: "https://authority/", + OrgRepo: "org/repo", + Path: "%-invalid-uri-so-not-parsable-by-net/url.Parse", + GitSuffix: ".git", + }, + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { @@ -420,7 +468,7 @@ func TestNewRepoSpecFromUrl_Smoke(t *testing.T) { assert.Equal(t, tc.absPath, rs.AbsPath(), "absPath mismatch") // some values have defaults. Clear them here so test cases remain compact. // This means submodules and timeout cannot be tested here. That's fine since - // they are tested in TestPeelQuery. + // they are tested in TestParseQuery. rs.raw = "" rs.Dir = "" rs.Submodules = false @@ -430,6 +478,13 @@ func TestNewRepoSpecFromUrl_Smoke(t *testing.T) { } } +func TestNewRepoSpecFromURL_DefaultQueryParams(t *testing.T) { + repoSpec, err := NewRepoSpecFromURL("https://github.com/org/repo") + require.NoError(t, err) + require.Equal(t, defaultSubmodules, repoSpec.Submodules) + require.Equal(t, defaultTimeout, repoSpec.Timeout) +} + func TestIsAzureHost(t *testing.T) { testcases := []struct { input string @@ -460,138 +515,128 @@ func TestIsAzureHost(t *testing.T) { } } -func TestPeelQuery(t *testing.T) { +func TestParseQuery(t *testing.T) { testcases := []struct { name string input string - path string ref string submodules bool timeout time.Duration }{ { - name: "t1", - // All empty. - input: "somerepos", - path: "somerepos", + name: "empty", + input: "", ref: "", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t2", - input: "somerepos?ref=v1.0.0", - path: "somerepos", + name: "ref", + input: "ref=v1.0.0", ref: "v1.0.0", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t3", - input: "somerepos?version=master", - path: "somerepos", + name: "ref_slash", + input: "ref=kustomize/v4.5.7", + ref: "kustomize/v4.5.7", + submodules: defaultSubmodules, + timeout: defaultTimeout, + }, + { + name: "version", + input: "version=master", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t4", + name: "ref_and_version", // A ref value takes precedence over a version value. - input: "somerepos?version=master&ref=v1.0.0", - path: "somerepos", + input: "version=master&ref=v1.0.0", ref: "v1.0.0", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t5", + name: "empty_submodules", // Empty submodules value uses default. - input: "somerepos?version=master&submodules=", - path: "somerepos", + input: "version=master&submodules=", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t6", + name: "bad_submodules", // Malformed submodules value uses default. - input: "somerepos?version=master&submodules=maybe", - path: "somerepos", + input: "version=master&submodules=maybe", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t7", - input: "somerepos?version=master&submodules=true", - path: "somerepos", + name: "submodules_true", + input: "version=master&submodules=true", ref: "master", submodules: true, timeout: defaultTimeout, }, { - name: "t8", - input: "somerepos?version=master&submodules=false", - path: "somerepos", + name: "submodules_false", + input: "version=master&submodules=false", ref: "master", submodules: false, timeout: defaultTimeout, }, { - name: "t9", + name: "empty_timeout", // Empty timeout value uses default. - input: "somerepos?version=master&timeout=", - path: "somerepos", + input: "version=master&timeout=", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t10", + name: "bad_timeout", // Malformed timeout value uses default. - input: "somerepos?version=master&timeout=jiffy", - path: "somerepos", + input: "version=master&timeout=jiffy", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t11", + name: "zero_timeout", // Zero timeout value uses default. - input: "somerepos?version=master&timeout=0", - path: "somerepos", + input: "version=master&timeout=0", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t12", - input: "somerepos?version=master&timeout=0s", - path: "somerepos", + name: "zero_unit_timeout", + input: "version=master&timeout=0s", ref: "master", submodules: defaultSubmodules, timeout: defaultTimeout, }, { - name: "t13", - input: "somerepos?version=master&timeout=61", - path: "somerepos", + name: "timeout", + input: "version=master&timeout=61", ref: "master", submodules: defaultSubmodules, timeout: 61 * time.Second, }, { - name: "t14", - input: "somerepos?version=master&timeout=1m1s", - path: "somerepos", + name: "timeout_unit", + input: "version=master&timeout=1m1s", ref: "master", submodules: defaultSubmodules, timeout: 61 * time.Second, }, { - name: "t15", - input: "somerepos?version=master&submodules=false&timeout=1m1s", - path: "somerepos", + name: "all", + input: "version=master&submodules=false&timeout=1m1s", ref: "master", submodules: false, timeout: 61 * time.Second, @@ -599,8 +644,7 @@ func TestPeelQuery(t *testing.T) { } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { - path, ref, timeout, submodules := peelQuery(tc.input) - assert.Equal(t, tc.path, path, "path mismatch") + ref, timeout, submodules := parseQuery(tc.input) assert.Equal(t, tc.ref, ref, "ref mismatch") assert.Equal(t, tc.timeout, timeout, "timeout mismatch") assert.Equal(t, tc.submodules, submodules, "submodules mismatch")