Update module span to check commits

- Use regex to detect if check should run
- Update scan to be per-commit
This commit is contained in:
Sam Wronski
2020-10-30 16:03:30 -07:00
parent 76a8f034cb
commit 877da8da6d
2 changed files with 488 additions and 45 deletions

View File

@@ -19,11 +19,61 @@ import (
"flag"
"fmt"
"os"
"regexp"
"strings"
"github.com/google/go-github/github"
)
// splitCommitsHelp is a help message displayed when a PR has commits which
// span modules.
const splitCommitsHelp = "\nModifications to multiple restricted directories occurred.\n" +
"To resolve this please limit code changes in a commit to a single go module.\n\n" +
"You may learn more about how to split your existing commits at: https://git-scm.com/docs/git-rebase#_splitting_commits\n"
// Ignore span pattern defines a regular expression pattern that, when matched,
// causes this check to be ignored.
// Pattern expects "ALLOW_MODULE_SPAN" in CAPS on a line by itself.
// Spaces may be included before or after but no other characters with one
// exception. ">" may be used to quote the exclusion.
// Pattern may be provided at any point in the pull request description.
//
// Ex: "ALLOW_MODULE_SPAN", "> ALLOW_MODULE_SPAN", " ALLOW_MODULE_SPAN "
const ignoreSpanPattern = "(?m)^>?\\s*ALLOW_MODULE_SPAN\\s*$"
// Changeset represents a set of file modifications associated with a commit
type Changeset struct {
files []string
id string
}
// GitHubRepository represents a pairing of the owner and repository to make
// passing around references to specific projects simpler
type GitHubRepository struct {
client *github.Client
owner *string
repo *string
}
// GitHubService is the collection of GitHub API interactions this service consumes
type GitHubService interface {
GetPullRequest(prId int) (*github.PullRequest, *github.Response, error)
GetCommit(commitSha string) (*github.RepositoryCommit, *github.Response, error)
ListCommits(prId int, options *github.ListOptions) ([]*github.RepositoryCommit, *github.Response, error)
}
func (repository GitHubRepository) GetPullRequest(prId int) (*github.PullRequest, *github.Response, error) {
return repository.client.PullRequests.Get(context.Background(), *repository.owner, *repository.repo, prId)
}
func (repository GitHubRepository) GetCommit(commitSha string) (*github.RepositoryCommit, *github.Response, error) {
return repository.client.Repositories.GetCommit(context.Background(), *repository.owner, *repository.repo, commitSha)
}
func (repository GitHubRepository) ListCommits(prId int, options *github.ListOptions) ([]*github.RepositoryCommit, *github.Response, error) {
return repository.client.PullRequests.ListCommits(context.Background(), *repository.owner, *repository.repo, prId, options)
}
func main() {
owner := flag.String("owner", "", "the github repository owner name")
repo := flag.String("repo", "", "the github repository name")
@@ -41,79 +91,169 @@ func main() {
client := github.NewClient(nil)
files, err := ListAllPullRequestFiles(client, owner, pullrequest, repo)
githubRepo := &GitHubRepository{
client: client,
owner: owner,
repo: repo,
}
// Check if module span is allowed before scanning on commits
isSpanAllowed, err := ModuleSpanAllowed(githubRepo, *pullrequest)
if err != nil {
fmt.Println("Unable to retrieve pull request details:", err.Error())
fmt.Printf("unable to retrieve pull request details: %v", err.Error())
os.Exit(2)
}
contributedRestrictedPaths := CountRestrictedPathUses(files, restrictedPaths)
modifiedRestrictedDirectories := CountModifiedRestrictedDirectories(contributedRestrictedPaths)
if isSpanAllowed {
fmt.Println("Check not run. Module spanning was allowed in this Pull Request.")
os.Exit(0)
}
isSpanningPull, _, err := PullRequestSpanningPathList(githubRepo, *pullrequest, restrictedPaths)
if err != nil {
fmt.Printf("unable to retrieve pull request details: %v", err)
os.Exit(2)
}
// Exit with error if two or more restricted directories where modified
if modifiedRestrictedDirectories > 1 {
fmt.Println("Modifications to multiple restricted directories occurred.")
if isSpanningPull {
// Provide a suggestion for potential solution if the check fails.
fmt.Println(splitCommitsHelp)
os.Exit(1)
}
}
// ListAllPullRequestFiles retrieves as many files as possible for the
// target pull request.
//
// Note: GitHub API limits ListFiles to a maximum of 3000 files. Very large
// changes which exceed this limit may pass this check even if they
// do contain spanning changes.
// see: https://developer.github.com/v3/pulls/#list-pull-requests-files
func ListAllPullRequestFiles(client *github.Client, owner *string, pullrequest *int, repo *string) ([]*github.CommitFile, error) {
// ConstructChangeset creates a changeset from a GitHub Commit object
func ConstructChangeset(commit *github.RepositoryCommit) *Changeset {
id := commit.SHA
fileset := []string{}
for _, file := range commit.Files {
fileset = append(fileset, *file.Filename)
}
return &Changeset{
files: fileset,
id: *id,
}
}
// StringAllowsModuleSpan tests if a string matches the allow span regex
func StringAllowsModuleSpan(body string) (bool, error) {
return regexp.MatchString(ignoreSpanPattern, body)
}
// IsModuleSpanAllowed tests a Pull Requests description for a regular
// expression. If the expression matches then spanning modules are allowed.
func ModuleSpanAllowed(repository GitHubService, pullId int) (bool, error) {
// Note: There are multiple ways to pull a github commit object
// we want a RepositoryCommit.
pullRequest, _, err := repository.GetPullRequest(pullId)
if err != nil {
return false, err
}
return StringAllowsModuleSpan(*pullRequest.Body)
}
// GetCommitChanges looks up a github commit by SHA and returns a Changeset
// containing the modified files in the specified commit.
func GetCommitChanges(repository GitHubService, commitSha string) (*Changeset, error) {
// Note: There are multiple ways to pull a github commit object
// we want a RepositoryCommit.
commit, _, err := repository.GetCommit(commitSha)
if err != nil {
return nil, err
}
return ConstructChangeset(commit), nil
}
// GetPullRequestCommits constructs a list of all commits and the associated
// files changes in the given pull request
func GetPullRequestCommits(repository GitHubService, pullrequest int) ([]*Changeset, error) {
// foundFiles across all pages from github api
var foundFiles []*github.CommitFile
// GitHub returns the first 30 files by default, increase this value
// Note: Page 1 is the first page of results. Page 0 is an end of list mark.
// Github only returns (max) 100 results per page and PR's may exceed this
// so loop until all pages have been enumerated.
options := &github.ListOptions{PerPage: 100, Page: 1}
var collectedCommits []*github.RepositoryCommit
// Github only returns a limited set of commits per request and PR's may
// exceed this so loop until all pages have been enumerated.
options := &github.ListOptions{Page: 1}
for options.Page != 0 {
files, response, err := client.PullRequests.ListFiles(context.Background(), *owner, *repo, *pullrequest, options)
commits, response, err := repository.ListCommits(pullrequest, options)
// If an error has occurred while querying api exit early, report error
if err != nil {
return nil, err
}
foundFiles = append(foundFiles, files...)
collectedCommits = append(collectedCommits, commits...)
// setup next page to continue loop
options = &github.ListOptions{PerPage: 100, Page: response.NextPage}
options = &github.ListOptions{Page: response.NextPage}
}
return foundFiles, nil
var changesetResults []*Changeset
for _, commit := range collectedCommits {
// The repository commits from list commits are not hydrated
// We will need to retrieve the complete object:
changeset, err := GetCommitChanges(repository, *commit.SHA)
if err != nil {
return nil, err
}
changesetResults = append(changesetResults, changeset)
}
return changesetResults, nil
}
// CountModifiedRestrictedDirectories Accepts a map of paths and the number of
// occurances and returns the count of the paths which had a non-zero value.
func CountModifiedRestrictedDirectories(contributedRestrictedPaths map[string]int) int {
modifiedRestrictedDirectories := 0
for _, occurance := range contributedRestrictedPaths {
if occurance != 0 {
modifiedRestrictedDirectories++
// IsPullRequestSpanningPathList tests if a pull request spans multiple
// directory paths
func PullRequestSpanningPathList(repository GitHubService, pullrequest int, paths []string) (bool, []*Changeset, error) {
// Create a buffer for commits
changesets, err := GetPullRequestCommits(repository, pullrequest)
if err != nil {
return false, nil, err
}
spanningChangesExist := false
for _, changeset := range changesets {
if ChangesetSpanningPathList(changeset, paths) {
// When detecting the first spanning changeset print a prefix message
if !spanningChangesExist {
fmt.Printf("Spanning changesets detected in the following commits:\n\n")
}
fmt.Printf("\t* %s\n", changeset.id)
// In order provide a full list of outstanding commits, do not shortcircuit this check
spanningChangesExist = true
}
}
return modifiedRestrictedDirectories
return spanningChangesExist, changesets, nil
}
// CountRestrictedPathUses Constructs a map that contains the number of
// references keyed to each restricted path. This provides details about how
// many files in the list are associated with each restricted path.
func CountRestrictedPathUses(files []*github.CommitFile, restrictedPaths []string) map[string]int {
contributedRestrictedPaths := make(map[string]int)
for _, path := range restrictedPaths {
contributedRestrictedPaths[path] = 0
}
// IsChangesetSpanningPathList tests if a changeset is spanning
// multiple directory paths.
func ChangesetSpanningPathList(changeset *Changeset, paths []string) bool {
matchedPath := ""
for _, file := range files {
for path := range contributedRestrictedPaths {
if strings.HasPrefix(*file.Filename, path) {
contributedRestrictedPaths[path]++
for _, file := range changeset.files {
for _, path := range paths {
if strings.HasPrefix(file, path) {
// If a different path has already matched then the changeset spans multiple restricted paths
if matchedPath != "" && matchedPath != path {
return true
}
matchedPath = path
}
}
}
return contributedRestrictedPaths
// If more than 1 matched paths then the changeset spans paths
return false
}

303
cmd/prchecker/main_test.go Normal file
View File

@@ -0,0 +1,303 @@
package main
import (
"testing"
"github.com/google/go-github/github"
)
func TestStringAllowsModuleSpan(t *testing.T) {
var tests = []struct {
name string
body string
matchExpected bool
}{
{
"exception not included",
"foo",
false,
},
{
"exception mentioned in sentence does not exempt span check",
"don't ALLOW_MODULE_SPAN",
false,
},
{
"PR body is just exception",
"ALLOW_MODULE_SPAN",
true,
},
{
"support markdown quoting exception",
"> ALLOW_MODULE_SPAN",
true,
},
{
"support whitespace padding",
"\t ALLOW_MODULE_SPAN\t ",
true,
},
{
"module span exemption allowed at start of string",
"ALLOW_MODULE_SPAN\nat start of file",
true,
},
{
"module span exemption allowed at end of string",
"at end of file\nALLOW_MODULE_SPAN",
true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := StringAllowsModuleSpan(tt.body)
if err != nil {
t.Errorf(err.Error())
}
if result != tt.matchExpected {
t.Errorf("got %t, want %t", result, tt.matchExpected)
}
})
}
}
func TestIsModuleSpanAllowed(t *testing.T) {
var tests = []struct {
name string
body string
matchExpected bool
}{
{
"module spanning not allowed",
"don't ALLOW_MODULE_SPAN",
false,
},
{
"module spanning allowed",
"ALLOW_MODULE_SPAN",
true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
client := FakeGitHubService{
pullRequest: &github.PullRequest{
Body: &tt.body,
},
}
result, _ := ModuleSpanAllowed(&client, 1)
if result != tt.matchExpected {
t.Errorf("got %t, want %t", result, tt.matchExpected)
}
})
}
}
func TestGetCommitChanges(t *testing.T) {
client := FakeGitHubService{
commit: &github.RepositoryCommit{
SHA: github.String("abc123"),
Files: []github.CommitFile{{Filename: github.String("foo")}, {Filename: github.String("bar")}},
},
}
result, _ := GetCommitChanges(&client, "abc123")
if result.id != "abc123" {
t.Errorf("got %v, want %v", result.id, "abc123")
}
if len(result.files) != 2 {
t.Errorf("got %v, want %v", len(result.files), "2")
}
expectedFiles := []string{"foo", "bar"}
if !StringSliceAreEqual(result.files, expectedFiles) {
t.Errorf("got %v, want %v", result.files, expectedFiles)
}
}
func TestGetPullRequestCommits(t *testing.T) {
client := FakeGitHubService{
commit: &github.RepositoryCommit{
SHA: github.String("abc123"),
Files: []github.CommitFile{{Filename: github.String("foo")}, {Filename: github.String("bar")}},
},
commitList: []*github.RepositoryCommit{
{
SHA: github.String("abc123"),
},
{
SHA: github.String("abc123"),
},
},
}
result, _ := GetPullRequestCommits(&client, 42)
if len(result) != 2 {
t.Errorf("got %v, want %v", len(result), 2)
}
expectedFiles := []string{"foo", "bar"}
if !StringSliceAreEqual(result[0].files, expectedFiles) {
t.Errorf("[%d] got %v, want %v", 0, result[0].files, expectedFiles)
}
if !StringSliceAreEqual(result[1].files, expectedFiles) {
t.Errorf("[%d] got %v, want %v", 1, result[1].files, expectedFiles)
}
}
func TestContstructingChangeset(t *testing.T) {
var tests = []struct {
name string
sha string
files []github.CommitFile
expected Changeset
}{
{
"construct from single file",
"abc123",
[]github.CommitFile{{Filename: github.String("foo")}},
Changeset{id: "abc123", files: []string{"foo"}},
},
{
"construct from multiple files",
"1234",
[]github.CommitFile{{Filename: github.String("foo")}, {Filename: github.String("bar")}},
Changeset{id: "1234", files: []string{"foo", "bar"}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
commit := &github.RepositoryCommit{SHA: github.String(tt.sha), Files: tt.files}
result := ConstructChangeset(commit)
if tt.expected.id != result.id {
t.Errorf("got %v, want %v", result.id, tt.expected.id)
}
if !StringSliceAreEqual(tt.expected.files, result.files) {
t.Errorf("got %v, want %v", result.files, tt.expected.files)
}
})
}
}
func TestIsChangesetSpanning(t *testing.T) {
var tests = []struct {
name string
changeset []string
files []string
expected bool
}{
{
"matching sets of 1 element do not span",
[]string{"1"},
[]string{"1"},
false,
},
{
"subdirectories do not match top level directories",
[]string{"a/1"},
[]string{"1"},
false,
},
{
"distinct sets do not span",
[]string{"1", "2"},
[]string{"a", "b"},
false,
},
{
"single matching path does not span",
[]string{"1", "a"},
[]string{"1", "2"},
false,
},
{
"path and subdirectory of same restriction do not span",
[]string{"1", "1/a"},
[]string{"1", "2", "3"},
false,
},
{
"matching sets span",
[]string{"1", "2"},
[]string{"1", "2"},
true,
},
{
"superset of restricted paths spans",
[]string{"1", "2", "3"},
[]string{"1", "2"},
true,
},
{
"subset of restricted paths spans",
[]string{"1", "3"},
[]string{"1", "2", "3"},
true,
},
{
"subdirectories of restricted paths span",
[]string{"1/a", "3/b"},
[]string{"1", "2", "3"},
true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
changeset := &Changeset{files: tt.changeset}
result := ChangesetSpanningPathList(changeset, tt.files)
if result != tt.expected {
t.Errorf("got %t, want %t", result, tt.expected)
}
})
}
}
type FakeGitHubService struct {
pullRequest *github.PullRequest
commit *github.RepositoryCommit
commitList []*github.RepositoryCommit
}
func (repository FakeGitHubService) GetPullRequest(prId int) (*github.PullRequest, *github.Response, error) {
return repository.pullRequest, nil, nil
}
func (repository FakeGitHubService) GetCommit(commitSha string) (*github.RepositoryCommit, *github.Response, error) {
return repository.commit, nil, nil
}
func (repository FakeGitHubService) ListCommits(prId int, options *github.ListOptions) ([]*github.RepositoryCommit, *github.Response, error) {
return repository.commitList, &github.Response{NextPage: 0}, nil
}
func StringSliceAreEqual(left []string, right []string) bool {
if len(left) != len(right) {
return false
}
for i, elem := range left {
if elem != right[i] {
return false
}
}
return true
}