From 877da8da6df522f72b203cb71991bd52dfb42fad Mon Sep 17 00:00:00 2001 From: Sam Wronski Date: Fri, 30 Oct 2020 16:03:30 -0700 Subject: [PATCH] Update module span to check commits - Use regex to detect if check should run - Update scan to be per-commit --- cmd/prchecker/main.go | 230 ++++++++++++++++++++++------ cmd/prchecker/main_test.go | 303 +++++++++++++++++++++++++++++++++++++ 2 files changed, 488 insertions(+), 45 deletions(-) create mode 100644 cmd/prchecker/main_test.go diff --git a/cmd/prchecker/main.go b/cmd/prchecker/main.go index 369b764bd..0c13eeeda 100644 --- a/cmd/prchecker/main.go +++ b/cmd/prchecker/main.go @@ -19,11 +19,61 @@ import ( "flag" "fmt" "os" + "regexp" "strings" "github.com/google/go-github/github" ) +// splitCommitsHelp is a help message displayed when a PR has commits which +// span modules. +const splitCommitsHelp = "\nModifications to multiple restricted directories occurred.\n" + + "To resolve this please limit code changes in a commit to a single go module.\n\n" + + "You may learn more about how to split your existing commits at: https://git-scm.com/docs/git-rebase#_splitting_commits\n" + +// Ignore span pattern defines a regular expression pattern that, when matched, +// causes this check to be ignored. +// Pattern expects "ALLOW_MODULE_SPAN" in CAPS on a line by itself. +// Spaces may be included before or after but no other characters with one +// exception. ">" may be used to quote the exclusion. +// Pattern may be provided at any point in the pull request description. +// +// Ex: "ALLOW_MODULE_SPAN", "> ALLOW_MODULE_SPAN", " ALLOW_MODULE_SPAN " +const ignoreSpanPattern = "(?m)^>?\\s*ALLOW_MODULE_SPAN\\s*$" + +// Changeset represents a set of file modifications associated with a commit +type Changeset struct { + files []string + id string +} + +// GitHubRepository represents a pairing of the owner and repository to make +// passing around references to specific projects simpler +type GitHubRepository struct { + client *github.Client + owner *string + repo *string +} + +// GitHubService is the collection of GitHub API interactions this service consumes +type GitHubService interface { + GetPullRequest(prId int) (*github.PullRequest, *github.Response, error) + GetCommit(commitSha string) (*github.RepositoryCommit, *github.Response, error) + ListCommits(prId int, options *github.ListOptions) ([]*github.RepositoryCommit, *github.Response, error) +} + +func (repository GitHubRepository) GetPullRequest(prId int) (*github.PullRequest, *github.Response, error) { + return repository.client.PullRequests.Get(context.Background(), *repository.owner, *repository.repo, prId) +} + +func (repository GitHubRepository) GetCommit(commitSha string) (*github.RepositoryCommit, *github.Response, error) { + return repository.client.Repositories.GetCommit(context.Background(), *repository.owner, *repository.repo, commitSha) +} + +func (repository GitHubRepository) ListCommits(prId int, options *github.ListOptions) ([]*github.RepositoryCommit, *github.Response, error) { + return repository.client.PullRequests.ListCommits(context.Background(), *repository.owner, *repository.repo, prId, options) +} + func main() { owner := flag.String("owner", "", "the github repository owner name") repo := flag.String("repo", "", "the github repository name") @@ -41,79 +91,169 @@ func main() { client := github.NewClient(nil) - files, err := ListAllPullRequestFiles(client, owner, pullrequest, repo) + githubRepo := &GitHubRepository{ + client: client, + owner: owner, + repo: repo, + } + + // Check if module span is allowed before scanning on commits + isSpanAllowed, err := ModuleSpanAllowed(githubRepo, *pullrequest) if err != nil { - fmt.Println("Unable to retrieve pull request details:", err.Error()) + fmt.Printf("unable to retrieve pull request details: %v", err.Error()) os.Exit(2) } - contributedRestrictedPaths := CountRestrictedPathUses(files, restrictedPaths) - modifiedRestrictedDirectories := CountModifiedRestrictedDirectories(contributedRestrictedPaths) + if isSpanAllowed { + fmt.Println("Check not run. Module spanning was allowed in this Pull Request.") + os.Exit(0) + } + + isSpanningPull, _, err := PullRequestSpanningPathList(githubRepo, *pullrequest, restrictedPaths) + + if err != nil { + fmt.Printf("unable to retrieve pull request details: %v", err) + os.Exit(2) + } // Exit with error if two or more restricted directories where modified - if modifiedRestrictedDirectories > 1 { - fmt.Println("Modifications to multiple restricted directories occurred.") + if isSpanningPull { + // Provide a suggestion for potential solution if the check fails. + fmt.Println(splitCommitsHelp) os.Exit(1) } } -// ListAllPullRequestFiles retrieves as many files as possible for the -// target pull request. -// -// Note: GitHub API limits ListFiles to a maximum of 3000 files. Very large -// changes which exceed this limit may pass this check even if they -// do contain spanning changes. -// see: https://developer.github.com/v3/pulls/#list-pull-requests-files -func ListAllPullRequestFiles(client *github.Client, owner *string, pullrequest *int, repo *string) ([]*github.CommitFile, error) { +// ConstructChangeset creates a changeset from a GitHub Commit object +func ConstructChangeset(commit *github.RepositoryCommit) *Changeset { + id := commit.SHA + fileset := []string{} + + for _, file := range commit.Files { + fileset = append(fileset, *file.Filename) + } + + return &Changeset{ + files: fileset, + id: *id, + } +} + +// StringAllowsModuleSpan tests if a string matches the allow span regex +func StringAllowsModuleSpan(body string) (bool, error) { + return regexp.MatchString(ignoreSpanPattern, body) +} + +// IsModuleSpanAllowed tests a Pull Requests description for a regular +// expression. If the expression matches then spanning modules are allowed. +func ModuleSpanAllowed(repository GitHubService, pullId int) (bool, error) { + + // Note: There are multiple ways to pull a github commit object + // we want a RepositoryCommit. + pullRequest, _, err := repository.GetPullRequest(pullId) + + if err != nil { + return false, err + } + + return StringAllowsModuleSpan(*pullRequest.Body) +} + +// GetCommitChanges looks up a github commit by SHA and returns a Changeset +// containing the modified files in the specified commit. +func GetCommitChanges(repository GitHubService, commitSha string) (*Changeset, error) { + + // Note: There are multiple ways to pull a github commit object + // we want a RepositoryCommit. + commit, _, err := repository.GetCommit(commitSha) + + if err != nil { + return nil, err + } + + return ConstructChangeset(commit), nil +} + +// GetPullRequestCommits constructs a list of all commits and the associated +// files changes in the given pull request +func GetPullRequestCommits(repository GitHubService, pullrequest int) ([]*Changeset, error) { + // foundFiles across all pages from github api - var foundFiles []*github.CommitFile - // GitHub returns the first 30 files by default, increase this value - // Note: Page 1 is the first page of results. Page 0 is an end of list mark. - // Github only returns (max) 100 results per page and PR's may exceed this - // so loop until all pages have been enumerated. - options := &github.ListOptions{PerPage: 100, Page: 1} + var collectedCommits []*github.RepositoryCommit + // Github only returns a limited set of commits per request and PR's may + // exceed this so loop until all pages have been enumerated. + options := &github.ListOptions{Page: 1} for options.Page != 0 { - files, response, err := client.PullRequests.ListFiles(context.Background(), *owner, *repo, *pullrequest, options) + commits, response, err := repository.ListCommits(pullrequest, options) // If an error has occurred while querying api exit early, report error if err != nil { return nil, err } - foundFiles = append(foundFiles, files...) + collectedCommits = append(collectedCommits, commits...) // setup next page to continue loop - options = &github.ListOptions{PerPage: 100, Page: response.NextPage} + options = &github.ListOptions{Page: response.NextPage} } - return foundFiles, nil + + var changesetResults []*Changeset + for _, commit := range collectedCommits { + // The repository commits from list commits are not hydrated + // We will need to retrieve the complete object: + changeset, err := GetCommitChanges(repository, *commit.SHA) + if err != nil { + return nil, err + } + + changesetResults = append(changesetResults, changeset) + } + return changesetResults, nil } -// CountModifiedRestrictedDirectories Accepts a map of paths and the number of -// occurances and returns the count of the paths which had a non-zero value. -func CountModifiedRestrictedDirectories(contributedRestrictedPaths map[string]int) int { - modifiedRestrictedDirectories := 0 - for _, occurance := range contributedRestrictedPaths { - if occurance != 0 { - modifiedRestrictedDirectories++ +// IsPullRequestSpanningPathList tests if a pull request spans multiple +// directory paths +func PullRequestSpanningPathList(repository GitHubService, pullrequest int, paths []string) (bool, []*Changeset, error) { + // Create a buffer for commits + changesets, err := GetPullRequestCommits(repository, pullrequest) + + if err != nil { + return false, nil, err + } + + spanningChangesExist := false + for _, changeset := range changesets { + if ChangesetSpanningPathList(changeset, paths) { + // When detecting the first spanning changeset print a prefix message + if !spanningChangesExist { + fmt.Printf("Spanning changesets detected in the following commits:\n\n") + } + + fmt.Printf("\t* %s\n", changeset.id) + // In order provide a full list of outstanding commits, do not shortcircuit this check + spanningChangesExist = true } } - return modifiedRestrictedDirectories + + return spanningChangesExist, changesets, nil } -// CountRestrictedPathUses Constructs a map that contains the number of -// references keyed to each restricted path. This provides details about how -// many files in the list are associated with each restricted path. -func CountRestrictedPathUses(files []*github.CommitFile, restrictedPaths []string) map[string]int { - contributedRestrictedPaths := make(map[string]int) - for _, path := range restrictedPaths { - contributedRestrictedPaths[path] = 0 - } +// IsChangesetSpanningPathList tests if a changeset is spanning +// multiple directory paths. +func ChangesetSpanningPathList(changeset *Changeset, paths []string) bool { + matchedPath := "" - for _, file := range files { - for path := range contributedRestrictedPaths { - if strings.HasPrefix(*file.Filename, path) { - contributedRestrictedPaths[path]++ + for _, file := range changeset.files { + for _, path := range paths { + if strings.HasPrefix(file, path) { + // If a different path has already matched then the changeset spans multiple restricted paths + if matchedPath != "" && matchedPath != path { + return true + } + matchedPath = path } } } - return contributedRestrictedPaths + + // If more than 1 matched paths then the changeset spans paths + return false } diff --git a/cmd/prchecker/main_test.go b/cmd/prchecker/main_test.go new file mode 100644 index 000000000..edb6f9dbe --- /dev/null +++ b/cmd/prchecker/main_test.go @@ -0,0 +1,303 @@ +package main + +import ( + "testing" + + "github.com/google/go-github/github" +) + +func TestStringAllowsModuleSpan(t *testing.T) { + var tests = []struct { + name string + body string + matchExpected bool + }{ + { + "exception not included", + "foo", + false, + }, + { + "exception mentioned in sentence does not exempt span check", + "don't ALLOW_MODULE_SPAN", + false, + }, + { + "PR body is just exception", + "ALLOW_MODULE_SPAN", + true, + }, + { + "support markdown quoting exception", + "> ALLOW_MODULE_SPAN", + true, + }, + { + "support whitespace padding", + "\t ALLOW_MODULE_SPAN\t ", + true, + }, + { + "module span exemption allowed at start of string", + "ALLOW_MODULE_SPAN\nat start of file", + true, + }, + { + "module span exemption allowed at end of string", + "at end of file\nALLOW_MODULE_SPAN", + true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := StringAllowsModuleSpan(tt.body) + + if err != nil { + t.Errorf(err.Error()) + } + + if result != tt.matchExpected { + t.Errorf("got %t, want %t", result, tt.matchExpected) + } + }) + } +} + +func TestIsModuleSpanAllowed(t *testing.T) { + var tests = []struct { + name string + body string + matchExpected bool + }{ + { + "module spanning not allowed", + "don't ALLOW_MODULE_SPAN", + false, + }, + { + "module spanning allowed", + "ALLOW_MODULE_SPAN", + true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + client := FakeGitHubService{ + pullRequest: &github.PullRequest{ + Body: &tt.body, + }, + } + + result, _ := ModuleSpanAllowed(&client, 1) + + if result != tt.matchExpected { + t.Errorf("got %t, want %t", result, tt.matchExpected) + } + }) + } +} + +func TestGetCommitChanges(t *testing.T) { + client := FakeGitHubService{ + commit: &github.RepositoryCommit{ + SHA: github.String("abc123"), + Files: []github.CommitFile{{Filename: github.String("foo")}, {Filename: github.String("bar")}}, + }, + } + + result, _ := GetCommitChanges(&client, "abc123") + + if result.id != "abc123" { + t.Errorf("got %v, want %v", result.id, "abc123") + } + + if len(result.files) != 2 { + t.Errorf("got %v, want %v", len(result.files), "2") + } + + expectedFiles := []string{"foo", "bar"} + if !StringSliceAreEqual(result.files, expectedFiles) { + t.Errorf("got %v, want %v", result.files, expectedFiles) + } +} + +func TestGetPullRequestCommits(t *testing.T) { + client := FakeGitHubService{ + commit: &github.RepositoryCommit{ + SHA: github.String("abc123"), + Files: []github.CommitFile{{Filename: github.String("foo")}, {Filename: github.String("bar")}}, + }, + commitList: []*github.RepositoryCommit{ + { + SHA: github.String("abc123"), + }, + { + SHA: github.String("abc123"), + }, + }, + } + + result, _ := GetPullRequestCommits(&client, 42) + + if len(result) != 2 { + t.Errorf("got %v, want %v", len(result), 2) + } + + expectedFiles := []string{"foo", "bar"} + if !StringSliceAreEqual(result[0].files, expectedFiles) { + t.Errorf("[%d] got %v, want %v", 0, result[0].files, expectedFiles) + } + if !StringSliceAreEqual(result[1].files, expectedFiles) { + t.Errorf("[%d] got %v, want %v", 1, result[1].files, expectedFiles) + } +} + +func TestContstructingChangeset(t *testing.T) { + var tests = []struct { + name string + sha string + files []github.CommitFile + expected Changeset + }{ + { + "construct from single file", + "abc123", + []github.CommitFile{{Filename: github.String("foo")}}, + Changeset{id: "abc123", files: []string{"foo"}}, + }, + { + "construct from multiple files", + "1234", + []github.CommitFile{{Filename: github.String("foo")}, {Filename: github.String("bar")}}, + Changeset{id: "1234", files: []string{"foo", "bar"}}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + commit := &github.RepositoryCommit{SHA: github.String(tt.sha), Files: tt.files} + result := ConstructChangeset(commit) + + if tt.expected.id != result.id { + t.Errorf("got %v, want %v", result.id, tt.expected.id) + } + + if !StringSliceAreEqual(tt.expected.files, result.files) { + t.Errorf("got %v, want %v", result.files, tt.expected.files) + } + }) + } +} + +func TestIsChangesetSpanning(t *testing.T) { + var tests = []struct { + name string + changeset []string + files []string + expected bool + }{ + { + "matching sets of 1 element do not span", + []string{"1"}, + []string{"1"}, + false, + }, + { + "subdirectories do not match top level directories", + []string{"a/1"}, + []string{"1"}, + false, + }, + { + "distinct sets do not span", + []string{"1", "2"}, + []string{"a", "b"}, + false, + }, + { + "single matching path does not span", + []string{"1", "a"}, + []string{"1", "2"}, + false, + }, + { + "path and subdirectory of same restriction do not span", + []string{"1", "1/a"}, + []string{"1", "2", "3"}, + false, + }, + { + "matching sets span", + []string{"1", "2"}, + []string{"1", "2"}, + true, + }, + { + "superset of restricted paths spans", + []string{"1", "2", "3"}, + []string{"1", "2"}, + true, + }, + { + "subset of restricted paths spans", + []string{"1", "3"}, + []string{"1", "2", "3"}, + true, + }, + { + "subdirectories of restricted paths span", + []string{"1/a", "3/b"}, + []string{"1", "2", "3"}, + true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + changeset := &Changeset{files: tt.changeset} + + result := ChangesetSpanningPathList(changeset, tt.files) + + if result != tt.expected { + t.Errorf("got %t, want %t", result, tt.expected) + } + }) + } +} + +type FakeGitHubService struct { + pullRequest *github.PullRequest + commit *github.RepositoryCommit + commitList []*github.RepositoryCommit +} + +func (repository FakeGitHubService) GetPullRequest(prId int) (*github.PullRequest, *github.Response, error) { + return repository.pullRequest, nil, nil +} + +func (repository FakeGitHubService) GetCommit(commitSha string) (*github.RepositoryCommit, *github.Response, error) { + return repository.commit, nil, nil + +} + +func (repository FakeGitHubService) ListCommits(prId int, options *github.ListOptions) ([]*github.RepositoryCommit, *github.Response, error) { + return repository.commitList, &github.Response{NextPage: 0}, nil +} + +func StringSliceAreEqual(left []string, right []string) bool { + if len(left) != len(right) { + return false + } + + for i, elem := range left { + if elem != right[i] { + return false + } + } + + return true +}