diff --git a/api/internal/localizer/localizer.go b/api/internal/localizer/localizer.go index 181db13a2..61c2fb109 100644 --- a/api/internal/localizer/localizer.go +++ b/api/internal/localizer/localizer.go @@ -296,7 +296,10 @@ func (lc *localizer) localizeDir(path string) (string, error) { if repo := ldr.Repo(); repo != "" { // TODO(annasong): You need to check if you can add a localize directory here to store // the remote file content. There may be a directory that shares the localize directory name. - locPath = locRootPath(path, repo, root) + locPath, err = locRootPath(path, repo, root, lc.fSys) + if err != nil { + return "", err + } } else { locPath, err = filepath.Rel(lc.root.String(), root.String()) if err != nil { diff --git a/api/internal/localizer/util.go b/api/internal/localizer/util.go index e435eaf2e..c6380e86f 100644 --- a/api/internal/localizer/util.go +++ b/api/internal/localizer/util.go @@ -21,6 +21,9 @@ const ( // LocalizeDir is the name of the localize directories used to store remote content in the localize destination LocalizeDir = "localized-files" + + // FileSchemeDir is the name of the directory immediately inside LocalizeDir used to store file-schemed repos + FileSchemeDir = "file-schemed" ) // establishScope returns the effective scope given localize arguments and targetLdr at rawTarget. For remote rawTarget, @@ -132,14 +135,16 @@ func locFilePath(fileURL string) string { // File urls must have http or https scheme, so it is safe to use url.Parse. u, err := url.Parse(fileURL) if err != nil { - log.Fatalf("cannot parse validated file url %q: %s", fileURL, err.Error()) + log.Panicf("cannot parse validated file url %q: %s", fileURL, err) } - // Percent-encodings should be preserved in case sub-delims have special meaning. + // HTTP requests use the escaped path, so we use it here. Escaped paths also help us + // preserve percent-encoding in the original path, in the absence of illegal characters, + // in case they have special meaning to the host. // Extraneous '..' parent directory dot-segments should be removed. path := filepath.Join(string(filepath.Separator), filepath.FromSlash(u.EscapedPath())) - // The host should not include userinfo or port. + // We intentionally exclude userinfo and port. // Raw github urls are the only type of file urls kustomize officially accepts. // In this case, the path already consists of org, repo, version, and path in repo, in order, // so we can use it as is. @@ -147,10 +152,63 @@ func locFilePath(fileURL string) string { } // locRootPath returns the relative localized path of the validated root url rootURL, where the local copy of its repo -// is at repoDir and the copy of its root is at rootDir -// TODO(annasong): implement -func locRootPath(rootURL string, repoDir string, rootDir filesys.ConfirmedDir) string { - _ = rootURL - _, _ = repoDir, rootDir - return "" +// is at repoDir and the copy of its root is at root on fSys. +func locRootPath(rootURL, repoDir string, root filesys.ConfirmedDir, fSys filesys.FileSystem) (string, error) { + repoSpec, err := git.NewRepoSpecFromURL(rootURL) + if err != nil { + log.Panicf("cannot parse validated repo url %q: %s", rootURL, err) + } + host, err := parseHost(repoSpec) + if err != nil { + return "", errors.WrapPrefixf(err, "unable to parse host of remote root %q", rootURL) + } + repo, err := filesys.ConfirmDir(fSys, repoDir) + if err != nil { + log.Panicf("unable to establish validated repo download location %q: %s", repoDir, err) + } + // calculate from copy instead of url to straighten symlinks + inRepo, err := filepath.Rel(repo.String(), root.String()) + if err != nil { + log.Panicf("cannot find path from %q to child directory %q: %s", repo, root, err) + } + // We do not need to escape OrgRepo, a path on the git server. + // However, like git, we clean dot-segments from OrgRepo. + // Git does not allow ref value to contain dot-segments. + return filepath.Join(LocalizeDir, + host, + filepath.Join(string(filepath.Separator), filepath.FromSlash(repoSpec.OrgRepo)), + filepath.FromSlash(repoSpec.Ref), + inRepo), nil +} + +// parseHost returns the localize directory path corresponding to repoSpec.Host +func parseHost(repoSpec *git.RepoSpec) (string, error) { + var target string + switch scheme, _, _ := strings.Cut(repoSpec.Host, "://"); scheme { + case "gh:": + // 'gh' was meant to be a local github.com shorthand, in which case + // the .gitconfig file could map it to any host. See origin here: + // https://github.com/kubernetes-sigs/kustomize/blob/kustomize/v4.5.7/api/internal/git/repospec.go#L203 + // We give it a special host directory here under the assumption + // that we are unlikely to have another host simply named 'gh'. + return "gh", nil + case "file": + // We put file-scheme repos under a special directory to avoid + // colluding local absolute paths with hosts. + return FileSchemeDir, nil + case "https", "http", "ssh": + target = repoSpec.Host + default: + // We must have relative ssh url; in other words, the url has scp-like syntax. + // We attach a scheme to avoid url.Parse errors. + target = "ssh://" + repoSpec.Host + } + // url.Parse will not recognize ':' delimiter that both RepoSpec and git accept. + target = strings.TrimSuffix(target, ":") + u, err := url.Parse(target) + if err != nil { + return "", errors.Wrap(err) + } + // strip scheme, userinfo, port, and any trailing slashes. + return u.Hostname(), nil } diff --git a/api/internal/localizer/util_test.go b/api/internal/localizer/util_test.go index 956c46a46..ce70b4c01 100644 --- a/api/internal/localizer/util_test.go +++ b/api/internal/localizer/util_test.go @@ -4,13 +4,59 @@ package localizer //nolint:testpackage import ( + "fmt" + "os" "path/filepath" "strings" "testing" "github.com/stretchr/testify/require" + "sigs.k8s.io/kustomize/api/ifc" + "sigs.k8s.io/kustomize/api/internal/git" + "sigs.k8s.io/kustomize/kyaml/filesys" ) +func TestDefaultNewDirRepo(t *testing.T) { + for name, test := range map[string]struct { + url, dst string + }{ + "simple": { + url: "https://github.com/org/repo?ref=value", + dst: "localized-repo-value", + }, + "slashed_ref": { + url: "https://github.com/org/repo?ref=group/version", + dst: "localized-repo-group-version", + }, + } { + t.Run(name, func(t *testing.T) { + repoSpec, err := git.NewRepoSpecFromURL(test.url) + require.NoError(t, err) + require.Equal(t, test.dst, defaultNewDir(&fakeLoader{t.TempDir()}, repoSpec)) + }) + } +} + +type fakeLoader struct { + root string +} + +func (fl *fakeLoader) Root() string { + return fl.root +} +func (fl *fakeLoader) Repo() string { + return fl.root +} +func (fl *fakeLoader) Load(_ string) ([]byte, error) { + return []byte{}, nil +} +func (fl *fakeLoader) New(path string) (ifc.Loader, error) { + return &fakeLoader{path}, nil +} +func (fl *fakeLoader) Cleanup() error { + return nil +} + func TestUrlBase(t *testing.T) { require.Equal(t, "repo", urlBase("https://github.com/org/repo")) } @@ -34,6 +80,14 @@ func TestLocFilePath(t *testing.T) { url: "https://raw.githubusercontent.com/org/repo/ref/path/to/file.yaml", path: simpleJoin(t, "raw.githubusercontent.com", "org", "repo", "ref", "path", "to", "file.yaml"), }, + "http-scheme": { + url: "http://host/path", + path: simpleJoin(t, "host", "path"), + }, + "extraneous_components": { + url: "http://userinfo@host:1234/path/file?query", + path: simpleJoin(t, "host", "path", "file"), + }, "empty_path": { url: "https://host", path: "host", @@ -42,11 +96,7 @@ func TestLocFilePath(t *testing.T) { url: "https://host//", path: "host", }, - "extraneous_components": { - url: "http://userinfo@host:1234/path/file?query", - path: simpleJoin(t, "host", "path", "file"), - }, - "percent-encoding": { + "percent-encoded_path": { url: "https://host/file%2Eyaml", path: simpleJoin(t, "host", "file%2Eyaml"), }, @@ -64,3 +114,194 @@ func TestLocFilePath(t *testing.T) { }) } } + +func TestLocFilePathColon(t *testing.T) { + req := require.New(t) + + // The colon is special because it was once used as the unix file separator. + const url = "https://[2001:4860:4860::8888]/file.yaml" + const host = "2001:4860:4860::8888" + const file = "file.yaml" + req.Equal(simpleJoin(t, LocalizeDir, host, file), locFilePath(url)) + + fSys := filesys.MakeFsOnDisk() + targetDir := simpleJoin(t, t.TempDir(), host) + + // We check that we can create single directory, meaning ':' not used as file separator. + req.NoError(fSys.Mkdir(targetDir)) + _, err := fSys.Create(simpleJoin(t, targetDir, file)) + req.NoError(err) + + // We check that the directory with such name is readable. + files, err := fSys.ReadDir(targetDir) + req.NoError(err) + req.Equal([]string{file}, files) +} + +func TestLocFilePath_SpecialChar(t *testing.T) { + req := require.New(t) + + // The wild card character is one of the legal uri characters with more meaning + // to the system, so we test it here. + const wildcard = "*" + req.Equal(simpleJoin(t, LocalizeDir, "host", wildcard), locFilePath("https://host/*")) + + fSys := filesys.MakeFsOnDisk() + testDir := t.TempDir() + req.NoError(fSys.Mkdir(simpleJoin(t, testDir, "a"))) + req.NoError(fSys.WriteFile(simpleJoin(t, testDir, "b"), []byte{})) + + // We check that we can create and read from wild card-named file. + // We check that the file system is not matching it to existing file names. + req.NoError(fSys.WriteFile(simpleJoin(t, testDir, wildcard), []byte("test"))) + content, err := fSys.ReadFile(simpleJoin(t, testDir, wildcard)) + req.NoError(err) + req.Equal("test", string(content)) +} + +func TestLocFilePath_SpecialFiles(t *testing.T) { + for name, tFSys := range map[string]struct { + urlPath string + pathDir, pathFile string + }{ + "windows_reserved_name": { + urlPath: "/aux/file", + pathDir: "aux", + pathFile: "file", + }, + "hidden_files": { + urlPath: "/.../.file", + pathDir: "...", + pathFile: ".file", + }, + } { + t.Run(name, func(t *testing.T) { + req := require.New(t) + + expectedPath := simpleJoin(t, LocalizeDir, "host", tFSys.pathDir, tFSys.pathFile) + req.Equal(expectedPath, locFilePath("https://host"+tFSys.urlPath)) + + fSys := filesys.MakeFsOnDisk() + targetDir := simpleJoin(t, t.TempDir(), tFSys.pathDir) + req.NoError(fSys.Mkdir(targetDir)) + req.NoError(fSys.WriteFile(simpleJoin(t, targetDir, tFSys.pathFile), []byte("test"))) + + content, err := fSys.ReadFile(simpleJoin(t, targetDir, tFSys.pathFile)) + req.NoError(err) + req.Equal([]byte("test"), content) + }) + } +} + +func makeConfirmedDir(t *testing.T) (filesys.FileSystem, filesys.ConfirmedDir) { + t.Helper() + + fSys := filesys.MakeFsOnDisk() + testDir, err := filesys.NewTmpConfirmedDir() + require.NoError(t, err) + t.Cleanup(func() { + _ = fSys.RemoveAll(testDir.String()) + }) + + return fSys, testDir +} + +func TestLocRootPath_URLComponents(t *testing.T) { + for name, test := range map[string]struct { + urlf, path string + }{ + "ssh": { + urlf: "ssh://git@github.com/org/repo//%s?ref=value", + path: simpleJoin(t, "github.com", "org", "repo", "value"), + }, + "rel_ssh": { + urlf: "git@github.com:org/repo//%s?ref=value", + path: simpleJoin(t, "github.com", "org", "repo", "value"), + }, + "https": { + urlf: "https://gitlab.com/org/repo//%s?ref=value", + path: simpleJoin(t, "gitlab.com", "org", "repo", "value"), + }, + "file": { + urlf: "file:///var/run/repo//%s?ref=value", + path: simpleJoin(t, FileSchemeDir, "var", "run", "repo", "value"), + }, + "gh_shorthand": { + urlf: "gh:org/repo//%s?ref=value", + path: simpleJoin(t, "gh", "org", "repo", "value"), + }, + "IPv6": { + urlf: "https://[2001:4860:4860::8888]/org/repo//%s?ref=value", + path: simpleJoin(t, "2001:4860:4860::8888", "org", "repo", "value"), + }, + "port": { + urlf: "https://localhost.com:8080/org/repo//%s?ref=value", + path: simpleJoin(t, "localhost.com", "org", "repo", "value"), + }, + "no_org": { + urlf: "https://github.com/repo//%s?ref=value", + path: simpleJoin(t, "github.com", "repo", "value"), + }, + ".git_suffix": { + urlf: "https://github.com/org1/org2/repo.git//%s?ref=value", + path: simpleJoin(t, "github.com", "org1", "org2", "repo", "value"), + }, + "dot-segments": { + urlf: "https://github.com/./../org/../org/repo.git//%s?ref=value", + path: simpleJoin(t, "github.com", "org", "repo", "value"), + }, + "no_path_delimiter": { + urlf: "https://github.com/org/repo/%s?ref=value", + path: simpleJoin(t, "github.com", "org", "repo", "value"), + }, + "illegal_windows_dir": { + urlf: "https://gitlab.com/org./repo..git//%s?ref=value", + path: simpleJoin(t, "gitlab.com", "org.", "repo.", "value"), + }, + "ref_has_slash": { + urlf: "https://gitlab.com/org/repo//%s?ref=group/version/kind", + path: simpleJoin(t, "gitlab.com", "org", "repo", "group", "version", "kind"), + }, + } { + t.Run(name, func(t *testing.T) { + u := fmt.Sprintf(test.urlf, "path/to/root") + path := simpleJoin(t, LocalizeDir, test.path, "path", "to", "root") + + fSys, testDir := makeConfirmedDir(t) + repoDir := simpleJoin(t, testDir.String(), "repo_random-hash") + require.NoError(t, fSys.Mkdir(repoDir)) + rootDir := simpleJoin(t, repoDir, "path", "to", "root") + require.NoError(t, fSys.MkdirAll(rootDir)) + + actual, err := locRootPath(u, repoDir, filesys.ConfirmedDir(rootDir), fSys) + require.NoError(t, err) + require.Equal(t, path, actual) + + require.NoError(t, fSys.MkdirAll(simpleJoin(t, testDir.String(), path))) + }) + } +} + +func TestLocRootPath_Repo(t *testing.T) { + const url = "https://github.com/org/repo?ref=value" + expected := simpleJoin(t, LocalizeDir, "github.com", "org", "repo", "value") + + fSys, testDir := makeConfirmedDir(t) + actual, err := locRootPath(url, testDir.String(), testDir, fSys) + require.NoError(t, err) + require.Equal(t, expected, actual) +} + +func TestLocRootPath_SymlinkPath(t *testing.T) { + const url = "https://github.com/org/repo//symlink?ref=value" + + fSys, repoDir := makeConfirmedDir(t) + rootDir := simpleJoin(t, repoDir.String(), "actual-root") + require.NoError(t, fSys.Mkdir(rootDir)) + require.NoError(t, os.Symlink(rootDir, simpleJoin(t, repoDir.String(), "symlink"))) + + expected := simpleJoin(t, LocalizeDir, "github.com", "org", "repo", "value", "actual-root") + actual, err := locRootPath(url, repoDir.String(), filesys.ConfirmedDir(rootDir), fSys) + require.NoError(t, err) + require.Equal(t, expected, actual) +}