URL based configuration for git exec timeouts and git submodule cloning

Adds a number of user-accessable options for configuring internal git resource
cloning behavior.
- Git commands are executed with a configurable timeout by including a parameter
  like "?timeout=2m30s" in the resource URL. This can improve cloning a large
  repository, or over a slow network.
- Git submodule cloning can be disabled by including a parameter like
  "?submodules=false" in the resource URL.
- Switch the overall query parsing to use url.Parse() and be more extensible.
This commit is contained in:
Josh Komoroske
2021-05-18 18:28:35 -07:00
parent 9557888b32
commit 24a64bdee3
4 changed files with 192 additions and 38 deletions

View File

@@ -14,7 +14,7 @@ type Cloner func(repoSpec *RepoSpec) error
// to say, some remote API, to obtain a local clone of
// a remote repo.
func ClonerUsingGitExec(repoSpec *RepoSpec) error {
r, err := newCmdRunner()
r, err := newCmdRunner(repoSpec.Timeout)
if err != nil {
return err
}
@@ -36,7 +36,10 @@ func ClonerUsingGitExec(repoSpec *RepoSpec) error {
if err = r.run("checkout", "FETCH_HEAD"); err != nil {
return err
}
return r.run("submodule", "update", "--init", "--recursive")
if repoSpec.Submodules {
return r.run("submodule", "update", "--init", "--recursive")
}
return nil
}
// DoNothingCloner returns a cloner that only sets

View File

@@ -12,9 +12,6 @@ import (
"sigs.k8s.io/kustomize/api/internal/utils"
)
// Arbitrary, but non-infinite, timeout for running commands.
const defaultDuration = 27 * time.Second
// gitRunner runs the external git binary.
type gitRunner struct {
gitProgram string
@@ -24,7 +21,7 @@ type gitRunner struct {
// newCmdRunner returns a gitRunner if it can find the binary.
// It also creats a temp directory for cloning repos.
func newCmdRunner() (*gitRunner, error) {
func newCmdRunner(timeout time.Duration) (*gitRunner, error) {
gitProgram, err := exec.LookPath("git")
if err != nil {
return nil, errors.Wrap(err, "no 'git' program on path")
@@ -35,7 +32,7 @@ func newCmdRunner() (*gitRunner, error) {
}
return &gitRunner{
gitProgram: gitProgram,
duration: defaultDuration,
duration: timeout,
dir: dir,
}, nil
}

View File

@@ -5,9 +5,11 @@ package git
import (
"fmt"
"net/url"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"sigs.k8s.io/kustomize/api/filesys"
)
@@ -44,6 +46,12 @@ type RepoSpec struct {
// e.g. .git or empty in case of _git is present
GitSuffix string
// Submodules indicates whether or not to clone git submodules.
Submodules bool
// Timeout is the maximum duration allowed for execing git commands.
Timeout time.Duration
}
// CloneSpec returns a string suitable for "git clone {spec}".
@@ -77,7 +85,7 @@ func NewRepoSpecFromUrl(n string) (*RepoSpec, error) {
if filepath.IsAbs(n) {
return nil, fmt.Errorf("uri looks like abs path: %s", n)
}
host, orgRepo, path, gitRef, gitSuffix := parseGitUrl(n)
host, orgRepo, path, gitRef, gitSubmodules, gitSuffix, gitTimeout := parseGitUrl(n)
if orgRepo == "" {
return nil, fmt.Errorf("url lacks orgRepo: %s", n)
}
@@ -86,28 +94,28 @@ func NewRepoSpecFromUrl(n string) (*RepoSpec, error) {
}
return &RepoSpec{
raw: n, Host: host, OrgRepo: orgRepo,
Dir: notCloned, Path: path, Ref: gitRef, GitSuffix: gitSuffix}, nil
Dir: notCloned, Path: path, Ref: gitRef, GitSuffix: gitSuffix,
Submodules: gitSubmodules, Timeout: gitTimeout}, nil
}
const (
refQuery = "?ref="
refQueryRegex = "\\?(version|ref)="
gitSuffix = ".git"
gitDelimiter = "_git/"
refQuery = "?ref="
gitSuffix = ".git"
gitDelimiter = "_git/"
)
// From strings like git@github.com:someOrg/someRepo.git or
// https://github.com/someOrg/someRepo?ref=someHash, extract
// the parts.
func parseGitUrl(n string) (
host string, orgRepo string, path string, gitRef string, gitSuff string) {
host string, orgRepo string, path string, gitRef string, gitSubmodules bool, gitSuff string, gitTimeout time.Duration) {
if strings.Contains(n, gitDelimiter) {
index := strings.Index(n, gitDelimiter)
// Adding _git/ to host
host = normalizeGitHostSpec(n[:index+len(gitDelimiter)])
orgRepo = strings.Split(strings.Split(n[index+len(gitDelimiter):], "/")[0], "?")[0]
path, gitRef = peelQuery(n[index+len(gitDelimiter)+len(orgRepo):])
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n[index+len(gitDelimiter)+len(orgRepo):])
return
}
host, n = parseHostSpec(n)
@@ -116,35 +124,72 @@ func parseGitUrl(n string) (
index := strings.Index(n, gitSuffix)
orgRepo = n[0:index]
n = n[index+len(gitSuffix):]
path, gitRef = peelQuery(n)
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return
}
i := strings.Index(n, "/")
if i < 1 {
return "", "", "", "", ""
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return
}
j := strings.Index(n[i+1:], "/")
if j >= 0 {
j += i + 1
orgRepo = n[:j]
path, gitRef = peelQuery(n[j+1:])
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n[j+1:])
return
}
path = ""
orgRepo, gitRef = peelQuery(n)
return host, orgRepo, path, gitRef, gitSuff
orgRepo, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return host, orgRepo, path, gitRef, gitSubmodules, gitSuff, gitTimeout
}
func peelQuery(arg string) (string, string) {
// Clone git submodules by default.
const defaultSubmodules = true
r, _ := regexp.Compile(refQueryRegex)
j := r.FindStringIndex(arg)
// Arbitrary, but non-infinite, timeout for running commands.
const defaultTimeout = 27 * time.Second
if len(j) > 0 {
return arg[:j[0]], arg[j[0]+len(r.FindString(arg)):]
func peelQuery(arg string) (string, string, time.Duration, bool) {
// Parse the given arg into a URL. In the event of a parse failure, return
// our defaults.
parsed, err := url.Parse(arg)
if err != nil {
return arg, "", defaultTimeout, defaultSubmodules
}
return arg, ""
values := parsed.Query()
// ref is the desired git ref to target. Can be specified by in a git URL
// with ?ref=<string> or ?version=<string>, although ref takes precedence.
ref := values.Get("version")
if queryValue := values.Get("ref"); queryValue != "" {
ref = queryValue
}
// depth is the desired git exec timeout. Can be specified by in a git URL
// with ?timeout=<duration>.
duration := defaultTimeout
if queryValue := values.Get("timeout"); queryValue != "" {
// Attempt to first parse as a number of integer seconds (like "61"),
// and then attempt to parse as a suffixed duration (like "61s").
if intValue, err := strconv.Atoi(queryValue); err == nil && intValue > 0 {
duration = time.Duration(intValue) * time.Second
} else if durationValue, err := time.ParseDuration(queryValue); err == nil && durationValue > 0 {
duration = durationValue
}
}
// submodules indicates if git submodule cloning is desired. Can be
// specified by in a git URL with ?submodules=<bool>.
submodules := defaultSubmodules
if queryValue := values.Get("submodules"); queryValue != "" {
if boolValue, err := strconv.ParseBool(queryValue); err == nil {
submodules = boolValue
}
}
return parsed.Path, ref, duration, submodules
}
func parseHostSpec(n string) (string, string) {

View File

@@ -8,6 +8,7 @@ import (
"path/filepath"
"strings"
"testing"
"time"
)
var orgRepos = []string{"someOrg/someRepo", "kubernetes/website"}
@@ -233,26 +234,134 @@ func TestIsAzureHost(t *testing.T) {
func TestPeelQuery(t *testing.T) {
testcases := []struct {
input string
expect [2]string
input string
path string
ref string
submodules bool
timeout time.Duration
}{
{
input: "somerepos?ref=v1.0.0",
expect: [2]string{"somerepos", "v1.0.0"},
// All empty.
input: "somerepos",
path: "somerepos",
ref: "",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
input: "somerepos?version=master",
expect: [2]string{"somerepos", "master"},
input: "somerepos?ref=v1.0.0",
path: "somerepos",
ref: "v1.0.0",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
input: "somerepos",
expect: [2]string{"somerepos", ""},
input: "somerepos?version=master",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
// A ref value takes precedence over a version value.
input: "somerepos?version=master&ref=v1.0.0",
path: "somerepos",
ref: "v1.0.0",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
// Empty submodules value uses default.
input: "somerepos?version=master&submodules=",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
// Malformed submodules value uses default.
input: "somerepos?version=master&submodules=maybe",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
input: "somerepos?version=master&submodules=true",
path: "somerepos",
ref: "master",
submodules: true,
timeout: defaultTimeout,
},
{
input: "somerepos?version=master&submodules=false",
path: "somerepos",
ref: "master",
submodules: false,
timeout: defaultTimeout,
},
{
// Empty timeout value uses default.
input: "somerepos?version=master&timeout=",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
// Malformed timeout value uses default.
input: "somerepos?version=master&timeout=jiffy",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
// Zero timeout value uses default.
input: "somerepos?version=master&timeout=0",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
input: "somerepos?version=master&timeout=0s",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: defaultTimeout,
},
{
input: "somerepos?version=master&timeout=61",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: 61 * time.Second,
},
{
input: "somerepos?version=master&timeout=1m1s",
path: "somerepos",
ref: "master",
submodules: defaultSubmodules,
timeout: 61 * time.Second,
},
{
input: "somerepos?version=master&submodules=false&timeout=1m1s",
path: "somerepos",
ref: "master",
submodules: false,
timeout: 61 * time.Second,
},
}
for _, testcase := range testcases {
path, ref := peelQuery(testcase.input)
if path != testcase.expect[0] || ref != testcase.expect[1] {
t.Errorf("peelQuery: expected (%s, %s) got (%s, %s) on %s", testcase.expect[0], testcase.expect[1], path, ref, testcase.input)
path, ref, timeout, submodules := peelQuery(testcase.input)
if path != testcase.path || ref != testcase.ref || timeout != testcase.timeout || submodules != testcase.submodules {
t.Errorf("peelQuery: expected (%s, %s, %v, %v) got (%s, %s, %v, %v) on %s",
testcase.path, testcase.ref, testcase.timeout, testcase.submodules,
path, ref, timeout, submodules,
testcase.input)
}
}
}