Merge pull request #2109 from haiyanmeng/stats

Add support to get files referred in the generators and tranformers fields
This commit is contained in:
Jeff Regan
2020-01-16 09:54:14 -08:00
committed by GitHub
22 changed files with 422 additions and 126 deletions

View File

@@ -44,7 +44,7 @@ type kustomizeSearch struct {
// /register: not implemented, but meant as an endpoint for adding new
// kustomization files to the corpus.
func NewKustomizeSearch(ctx context.Context) (*kustomizeSearch, error) {
idx, err := index.NewKustomizeIndex(ctx)
idx, err := index.NewKustomizeIndex(ctx, "kustomize")
if err != nil {
return nil, err
}

View File

@@ -9,6 +9,8 @@ import (
"os"
"time"
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
@@ -26,6 +28,7 @@ const (
)
type CrawlMode int
const (
CrawlUnknown CrawlMode = iota
// Crawl all the kustomization files in all the repositories of a Github user
@@ -125,13 +128,13 @@ func main() {
// seen tracks the IDs of all the documents in the index.
// This helps avoid indexing a given document multiple times.
seen := crawler.NewSeenMap()
seen := utils.NewSeenMap()
mode := NewCrawlMode(*modePtr)
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
if user != "" {
return github.NewCrawler(githubToken, retryCount, clientCache,
return github.NewCrawler(githubToken, retryCount, clientCache,
github.QueryWith(
github.Filename("kustomization.yaml"),
github.Filename("kustomization.yml"),

View File

@@ -5,12 +5,21 @@ import (
"flag"
"fmt"
"log"
"path/filepath"
"net/http"
"os"
"sort"
"time"
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
"sigs.k8s.io/kustomize/api/internal/crawl/index"
"sigs.k8s.io/kustomize/api/konfig"
)
const (
githubAccessTokenVar = "GITHUB_ACCESS_TOKEN"
retryCount = 3
)
// iterateArr adds each item in arr into countMap.
@@ -25,17 +34,6 @@ func iterateArr(arr []string, countMap map[string]int) {
}
// isKustomizationFile determines whether a file path is a kustomization file
func isKustomizationFile(path string) bool {
basename := filepath.Base(path)
for _, name := range konfig.RecognizedKustomizationFileNames() {
if basename == name {
return true
}
}
return false
}
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
// in the map, and outputs the sorted keys as a slice.
func SortMapKeyByValue(m map[string]int) []string {
@@ -44,10 +42,129 @@ func SortMapKeyByValue(m map[string]int) []string {
keys = append(keys, key)
}
// sort keys according to their values in the map m
sort.Slice(keys, func(i, j int) bool {return m[keys[i]] > m[keys[j]]})
sort.Slice(keys, func(i, j int) bool { return m[keys[i]] > m[keys[j]] })
return keys
}
func GeneratorOrTransformerStats(ctx context.Context,
docs []*doc.Document, isGenerator bool, idx *index.KustomizeIndex) {
fieldName := "generators"
if !isGenerator {
fieldName = "transformers"
}
// allReferredDocs includes all the documents referred in the field
allReferredDocs := doc.NewUniqueDocuments()
// docUsingGeneratorCount counts the number of the kustomization files using generators or transformers
docCount := 0
// collect all the documents referred in the field
for _, d := range docs {
kdoc := doc.KustomizationDocument{
Document: *d,
}
referredDocs, err := kdoc.GetResources(false, !isGenerator, isGenerator)
if err != nil {
log.Printf("failed to parse the %s field of the Document (%s): %v",
fieldName, d.Path(), err)
}
if len(referredDocs) > 0 {
docCount++
allReferredDocs.AddDocuments(referredDocs)
}
}
fileCount, dirCount, fileTypeDocs, dirTypeDocs := DocumentTypeSummary(ctx, allReferredDocs.Documents())
// check whether any of the files are not in the index
nonExistFileCount := ExistInIndex(idx, fileTypeDocs, fieldName + " file ")
// check whether any of the dirs are not in the index
nonExistDirCount := ExistInIndex(idx, dirTypeDocs, fieldName + " dir ")
GitRepositorySummary(fileTypeDocs, fieldName + " files")
GitRepositorySummary(dirTypeDocs, fieldName + " dirs")
fmt.Printf("%d kustomization files use %s: %d %s are files and %d %s are dirs.\n",
docCount, fieldName, fileCount, fieldName, dirCount, fieldName)
fmt.Printf("%d %s files do not exist in the index\n", nonExistFileCount, fieldName)
fmt.Printf("%d %s dirs do not exist in the index\n", nonExistDirCount, fieldName)
}
// GitRepositorySummary counts the distribution of docs:
// 1) how many git repositories are these docs from?
// 2) how many docs are from each git repository?
func GitRepositorySummary(docs []*doc.Document, msgPrefix string) {
m := make(map[string]int)
for _, d := range docs {
if _, ok := m[d.RepositoryURL]; ok {
m[d.RepositoryURL]++
} else {
m[d.RepositoryURL] = 1
}
}
sortedKeys := SortMapKeyByValue(m)
for _, k := range sortedKeys {
fmt.Printf("%d %s are from %s\n", m[k], msgPrefix, k)
}
}
// ExistInIndex goes through each Document in docs, and check whether it is in the index or not.
// It returns the number of documents which does not exist in the index.
func ExistInIndex(idx *index.KustomizeIndex, docs []*doc.Document, msgPrefix string) int {
nonExistCount := 0
for _, d := range docs {
exists, err := idx.Exists(d.ID())
if err != nil {
log.Println(err)
}
if !exists {
log.Printf("%s (%s) does not exist in the index", msgPrefix, d.Path())
nonExistCount++
}
}
return nonExistCount
}
// DocumentTypeSummary goes through each doc in docs, and determines whether it is a file or dir.
func DocumentTypeSummary(ctx context.Context, docs []*doc.Document) (
fileCount, dirCount int, files, dirs []*doc.Document) {
githubToken := os.Getenv(githubAccessTokenVar)
if githubToken == "" {
log.Fatalf("Must set the variable '%s' to make github requests.\n",
githubAccessTokenVar)
}
ghCrawler := github.NewCrawler(githubToken, retryCount, &http.Client{}, github.QueryWith())
for _, d := range docs {
oldFilePath := d.FilePath
if err := ghCrawler.FetchDocument(ctx, d); err != nil {
log.Printf("FetchDocument failed on %s: %v", d.Path(), err)
continue
}
if d.FilePath == oldFilePath {
fileCount++
files = append(files, d)
} else {
dirCount++
dirs = append(dirs, d)
}
}
return fileCount, dirCount, files, dirs
}
// ExistInSlice checks where target exits in items.
func ExistInSlice(items []string, target string) bool {
for _, item := range items {
if item == target {
return true
}
}
return false
}
func main() {
topKindsPtr := flag.Int(
"kinds", -1,
@@ -64,10 +181,12 @@ If you only want to list the 10 most popular identifiers, set the flag to 10.`)
`the number of kustomize features to be listed according to their popularities.
By default, all the features will be listed.
If you only want to list the 10 most popular features, set the flag to 10.`)
indexNamePtr := flag.String(
"index", "kustomize", "The name of the ElasticSearch index.")
flag.Parse()
ctx := context.Background()
idx, err := index.NewKustomizeIndex(ctx)
idx, err := index.NewKustomizeIndex(ctx, *indexNamePtr)
if err != nil {
log.Fatalf("Could not create an index: %v\n", err)
}
@@ -85,6 +204,12 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
// ids tracks the unique IDs of the documents in the index
ids := make(map[string]struct{})
// generatorDocs includes all the docs using generators
generatorDocs := make([]*doc.Document, 0)
// transformersDocs includes all the docs using transformers
transformersDocs := make([]*doc.Document, 0)
// get all the documents in the index
query := []byte(`{ "query":{ "match_all":{} } }`)
it := idx.IterateQuery(query, 10000, 60*time.Second)
@@ -94,21 +219,28 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
if _, ok := ids[hit.ID]; !ok {
ids[hit.ID] = struct{}{}
} else {
fmt.Printf("Found duplicate ID (%s)\n", hit.ID)
log.Printf("Found duplicate ID (%s)\n", hit.ID)
}
count++
iterateArr(hit.Document.Kinds, kindsMap)
iterateArr(hit.Document.Identifiers, identifiersMap)
if isKustomizationFile(hit.Document.FilePath) {
if doc.IsKustomizationFile(hit.Document.FilePath) {
kustomizationFilecount++
iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap)
if ExistInSlice(hit.Document.Identifiers, "generators") {
generatorDocs = append(generatorDocs, hit.Document.Copy())
}
if ExistInSlice(hit.Document.Identifiers, "transformers") {
transformersDocs = append(transformersDocs, hit.Document.Copy())
}
}
}
}
if err := it.Err(); err != nil {
fmt.Printf("Error iterating: %v\n", err)
log.Fatalf("Error iterating: %v\n", err)
}
sortedKindsMapKeys := SortMapKeyByValue(kindsMap)
@@ -147,4 +279,7 @@ There are %d documents in the kustomize index.
kustomizeFeatureCount++
}
}
GeneratorOrTransformerStats(ctx, generatorDocs, true, idx)
GeneratorOrTransformerStats(ctx, transformersDocs, false, idx)
}

View File

@@ -11,8 +11,13 @@ spec:
image: gcr.io/haiyanmeng-gke-dev/kustomize_stats:v1
imagePullPolicy: Always
command: ["/kustomize_stats"]
args: ["--kinds=50", "--identifiers=50", "--kustomize-features=50"]
args: ["--index=kustomize", "--kinds=50", "--identifiers=50", "--kustomize-features=50"]
env:
- name: GITHUB_ACCESS_TOKEN
valueFrom:
secretKeyRef:
name: github-access-token
key: token
- name: ELASTICSEARCH_URL
valueFrom:
configMapKeyRef:

View File

@@ -10,6 +10,8 @@ import (
"os"
"sync"
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
"sigs.k8s.io/kustomize/api/internal/crawl/index"
_ "github.com/gomodule/redigo/redis"
@@ -29,7 +31,7 @@ type Crawler interface {
// Crawl returns when it is done processing. This method does not take
// ownership of the channel. The channel is write only, and it
// designates where the crawler should forward the documents.
Crawl(ctx context.Context, output chan<- CrawledDocument, seen SeenMap) error
Crawl(ctx context.Context, output chan<- CrawledDocument, seen utils.SeenMap) error
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
FetchDocument(context.Context, *doc.Document) error
@@ -43,25 +45,15 @@ type CrawledDocument interface {
ID() string
GetDocument() *doc.Document
// Get all the Documents directly referred in a Document.
GetResources() ([]*doc.Document, error)
// For a Document representing a non-kustomization file, an empty slice will be returned.
// For a Document representing a kustomization file:
// the `includeResources` parameter determines whether the documents referred in the `resources` field are returned or not;
// the `includeTransformers` parameter determines whether the documents referred in the `transformers` field are returned or not;
// the `includeGenerators` parameter determines whether the documents referred in the `generators` field are returned or not.
GetResources(includeResources, includeTransformers, includeGenerators bool) ([]*doc.Document, error)
WasCached() bool
}
type SeenMap map[string]struct{}
func (seen SeenMap) Seen(item string) bool {
_, ok := seen[item]
return ok
}
func (seen SeenMap) Add(item string) {
seen[item] = struct{}{}
}
func NewSeenMap() SeenMap {
return make(map[string]struct{})
}
type CrawlSeed []*doc.Document
type IndexFunc func(CrawledDocument, index.Mode) error
@@ -84,18 +76,18 @@ func findMatch(d *doc.Document, crawlers []Crawler) Crawler {
}
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
seen SeenMap, stack *CrawlSeed) {
seen utils.SeenMap, stack *CrawlSeed) {
seen.Add(cdoc.ID())
// Insert into index
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
logger.Printf("Failed to insert or update %s %s: %v",
cdoc.GetDocument().RepositoryURL, cdoc.GetDocument().FilePath, err)
logger.Printf("Failed to insert or update doc(%s): %v",
cdoc.GetDocument().Path(), err)
return
}
deps, err := cdoc.GetResources()
deps, err := cdoc.GetResources(true, false, false)
if err != nil {
logger.Println(err)
return
@@ -110,7 +102,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
}
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
seen SeenMap, stack *CrawlSeed) {
seen utils.SeenMap, stack *CrawlSeed) {
UpdatedDocCount := 0
seenDocCount := 0
@@ -131,7 +123,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
*docsPtr = (*docsPtr)[:(len(*docsPtr) - 1)]
crawledDocCount++
logger.Printf("Crawling doc %d: %s %s", crawledDocCount, tail.RepositoryURL, tail.FilePath)
logger.Printf("Crawling doc %d: %s", crawledDocCount, tail.Path())
if seen.Seen(tail.ID()) {
logger.Printf("this doc has been seen before")
@@ -140,7 +132,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
}
if tail.WasCached() {
logger.Printf("%s %s is cached already", tail.RepositoryURL, tail.FilePath)
logger.Printf("doc(%s) is cached already", tail.Path())
cachedDocCount++
continue
}
@@ -161,10 +153,8 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
// `bases` field.
seen.Add(tail.ID())
if err := match.FetchDocument(ctx, tail); err != nil {
logger.Printf("FetchDocument failed on %s %s: %v",
tail.RepositoryURL, tail.FilePath, err)
logger.Printf("FetchDocument failed on doc(%s): %v", tail.Path(), err)
FetchDocumentErrCount++
// delete the document from the index
cdoc := &doc.KustomizationDocument{
@@ -172,16 +162,14 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
}
seen.Add(cdoc.ID())
if err := indx(cdoc, index.Delete); err != nil {
logger.Printf("Failed to delete %s %s: %v",
cdoc.RepositoryURL, cdoc.FilePath, err)
logger.Printf("Failed to delete doc(%s): %v", cdoc.Path(), err)
}
deleteDocCount++
continue
}
if err := match.SetCreated(ctx, tail); err != nil {
logger.Printf("SetCreated failed on %s %s: %v",
tail.RepositoryURL, tail.FilePath, err)
logger.Printf("SetCreated failed on doc(%s): %v", tail.Path(), err)
SetCreatedErrCount++
}
@@ -189,8 +177,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
// If conv returns an error, cdoc can still be added into the index so that
// cdoc.Document can be searched.
if err != nil {
logger.Printf("conv failed on %s %s: %v",
tail.RepositoryURL, tail.FilePath, err)
logger.Printf("conv failed on doc(%s): %v", tail.Path(), err)
convErrCount++
}
@@ -211,7 +198,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
// CrawlFromSeed updates all the documents in seed, and crawls all the new
// documents referred in the seed.
func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
conv Converter, indx IndexFunc, seen SeenMap) {
conv Converter, indx IndexFunc, seen utils.SeenMap) {
// stack tracks the documents directly referred in other documents.
stack := make(CrawlSeed, 0)
@@ -247,7 +234,7 @@ func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
// from the seed will be processed before any other documents from the
// crawlers.
func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
crawlers []Crawler, seen SeenMap) []error {
crawlers []Crawler, seen utils.SeenMap) []error {
errs := make([]error, len(crawlers))
wg := sync.WaitGroup{}
@@ -291,7 +278,7 @@ func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
// CrawlGithub crawls all the kustomization files on Github.
func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
indx IndexFunc, seen SeenMap) {
indx IndexFunc, seen utils.SeenMap) {
// stack tracks the documents directly referred in other documents.
stack := make(CrawlSeed, 0)

View File

@@ -12,6 +12,8 @@ import (
"testing"
"time"
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
"sigs.k8s.io/kustomize/api/internal/crawl/index"
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
@@ -76,7 +78,7 @@ func newCrawler(matchPrefix string, err error,
// Crawl implements the Crawler interface for testing.
func (c testCrawler) Crawl(_ context.Context,
output chan<- CrawledDocument, _ SeenMap) error {
output chan<- CrawledDocument, _ utils.SeenMap) error {
for i, d := range c.docs {
isResource := true
@@ -182,7 +184,7 @@ func TestCrawlGithubRunner(t *testing.T) {
defer close(output)
defer wg.Done()
seen := NewSeenMap()
seen := utils.NewSeenMap()
errs := CrawlGithubRunner(context.Background(),
output, test.tc, seen)
@@ -324,7 +326,7 @@ resources:
visited[d.ID()]++
return nil
},
NewSeenMap(),
utils.NewSeenMap(),
)
if lv, lc := len(visited), len(tc.corpus); lv != lc {
t.Errorf("error: %d of %d documents visited.", lv, lc)

View File

@@ -16,6 +16,8 @@ import (
"strings"
"time"
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
"sigs.k8s.io/kustomize/api/internal/crawl/httpclient"
@@ -68,7 +70,7 @@ func (gc githubCrawler) DefaultBranch(repo string) string {
// Implements crawler.Crawler.
func (gc githubCrawler) Crawl(ctx context.Context,
output chan<- crawler.CrawledDocument, seen crawler.SeenMap) error {
output chan<- crawler.CrawledDocument, seen utils.SeenMap) error {
noETagClient := GhClient{
RequestConfig: gc.client.RequestConfig,
@@ -195,9 +197,9 @@ func (gc githubCrawler) Match(d *doc.Document) bool {
type RangeQueryResult struct {
totalDocCnt uint64
seenDocCnt uint64
newDocCnt uint64
errorCnt uint64
seenDocCnt uint64
newDocCnt uint64
errorCnt uint64
}
func (r *RangeQueryResult) Add(other RangeQueryResult) {
@@ -209,7 +211,7 @@ func (r *RangeQueryResult) Add(other RangeQueryResult) {
func (r *RangeQueryResult) String() string {
return fmt.Sprintf("got %d files from API. "+
"%d have been seen before. %d are new and sent to the output channel." +
"%d have been seen before. %d are new and sent to the output channel."+
" %d have kustomizationResultAdapter errors.",
r.totalDocCnt, r.seenDocCnt, r.newDocCnt, r.errorCnt)
}
@@ -217,7 +219,7 @@ func (r *RangeQueryResult) String() string {
// processQuery follows all of the pages in a query, and updates/adds the
// documents from the crawl to the datastore/index.
func processQuery(ctx context.Context, gcl GhClient, query string,
output chan<- crawler.CrawledDocument, seen crawler.SeenMap,
output chan<- crawler.CrawledDocument, seen utils.SeenMap,
branchMap map[string]string) (RangeQueryResult, error) {
queryPages := make(chan GhResponseInfo)
@@ -271,7 +273,7 @@ func processQuery(ctx context.Context, gcl GhClient, query string,
return result, errs
}
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen crawler.SeenMap,
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen utils.SeenMap,
branchMap map[string]string) (crawler.CrawledDocument, error) {
url := gcl.ReposRequest(k.Repository.FullName)
defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap)

View File

@@ -117,7 +117,7 @@ type RequestConfig struct {
// understand why the request object is useful.
func (rc RequestConfig) CodeSearchRequestWith(query Query) request {
vals := url.Values{
"sort": []string{"indexed"},
"sort": []string{"indexed"},
"order": []string{"desc"},
}
req := rc.makeRequest("search/code", query, vals)

View File

@@ -3,6 +3,7 @@ package doc
import (
"fmt"
"log"
"path/filepath"
"sort"
"strings"
@@ -51,15 +52,21 @@ func (doc *KustomizationDocument) String() string {
doc.IsSame, doc.Kinds, len(doc.Identifiers), len(doc.Values))
}
// Implements the CrawlerDocument interface.
func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
isResource := true
for _, suffix := range konfig.RecognizedKustomizationFileNames() {
if strings.HasSuffix(doc.FilePath, "/"+suffix) {
isResource = false
// IsKustomizationFile determines whether a file path is a kustomization file
func IsKustomizationFile(path string) bool {
basename := filepath.Base(path)
for _, name := range konfig.RecognizedKustomizationFileNames() {
if basename == name {
return true
}
}
if isResource {
return false
}
// Implements the CrawlerDocument interface.
func (doc *KustomizationDocument) GetResources(
includeResources, includeTransformers, includeGenerators bool) ([]*Document, error) {
if !IsKustomizationFile(doc.FilePath) {
return []*Document{}, nil
}
@@ -77,20 +84,42 @@ func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
}
k.FixKustomizationPostUnmarshalling()
res := make([]*Document, 0, len(k.Resources))
for _, r := range k.Resources {
res := make([]*Document, 0)
if includeResources {
resourceDocs := doc.CollectDocuments(k.Resources)
res = append(res, resourceDocs...)
}
if includeGenerators {
generatorDocs := doc.CollectDocuments(k.Generators)
res = append(res, generatorDocs...)
}
if includeTransformers {
transformerDocs := doc.CollectDocuments(k.Transformers)
res = append(res, transformerDocs...)
}
return res, nil
}
// CollectDocuments construct a Document for each path in paths, and return
// a slice of Document pointers.
func (doc *KustomizationDocument) CollectDocuments(paths []string) []*Document {
docs := make([]*Document, 0, len(paths))
for _, r := range paths {
if strings.TrimSpace(r) == "" {
continue
}
next, err := doc.Document.FromRelativePath(r)
if err != nil {
log.Printf("GetResources error: %v\n", err)
log.Printf("CollectDocuments error: %v\n", err)
continue
}
res = append(res, &next)
docs = append(docs, &next)
}
return res, nil
return docs
}
func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) {

View File

@@ -189,11 +189,13 @@ metadata:
}
}
type TestStructForGetResources struct {
doc KustomizationDocument
resources []*Document
}
func TestGetResources(t *testing.T) {
tests := []struct {
doc KustomizationDocument
resources []*Document
}{
tests := []TestStructForGetResources{
{
doc: KustomizationDocument{
Document: Document{
@@ -248,9 +250,12 @@ resources:
resources: []*Document{},
},
}
runTest(t, tests, true, false, false)
}
func runTest(t *testing.T, tests []TestStructForGetResources, includeResources, includeTransformers, includeGenerators bool) {
for _, test := range tests {
res, err := test.doc.GetResources()
res, err := test.doc.GetResources(includeResources, includeTransformers, includeGenerators)
if err != nil {
t.Errorf("Unexpected error: %v\n", err)
continue
@@ -284,3 +289,73 @@ resources:
}
}
}
func TestGetResourcesAndGenerators(t *testing.T) {
tests := []TestStructForGetResources{
{
doc: KustomizationDocument{
Document: Document{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/kustomization.yaml",
DocumentData: `
resources:
- file.yaml
generators:
- gen.yaml
transformers:
- tr.yaml
`},
},
resources: []*Document{
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/gen.yaml",
},
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/file.yaml",
},
},
},
}
runTest(t, tests, true, false, true)
}
func TestGetResourcesAndGeneratorsAndTransformers(t *testing.T) {
tests := []TestStructForGetResources{
{
doc: KustomizationDocument{
Document: Document{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/kustomization.yaml",
DocumentData: `
resources:
- file.yaml
generators:
- gen.yaml
transformers:
- tr.yaml
`},
},
resources: []*Document{
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/tr.yaml",
},
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/gen.yaml",
},
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/file.yaml",
},
},
},
}
runTest(t, tests, true, true, true)
}

View File

@@ -35,6 +35,11 @@ func (doc *Document) Copy() *Document {
}
}
func (doc *Document) Path() string {
return fmt.Sprintf("repoURL: %s filePath: %s branch: %s",
doc.RepositoryURL, doc.FilePath, doc.DefaultBranch)
}
// Implements the CrawlerDocument interface.
func (doc *Document) WasCached() bool {
return doc.IsSame

View File

@@ -65,7 +65,7 @@ func TestFromRelativePath(t *testing.T) {
func TestDocument_RepositoryFullName(t *testing.T) {
testCases := []struct {
doc Document
doc Document
expectedRepositoryFullName string
}{
{
@@ -108,4 +108,4 @@ func TestDocument_RepositoryFullName(t *testing.T) {
returnedRepositoryFullName)
}
}
}
}

View File

@@ -0,0 +1,36 @@
package doc
import (
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
)
// UniqueDocuments make sure a Document with a given ID appears only once
type UniqueDocuments struct {
docs []*Document
docIDs utils.SeenMap
}
func NewUniqueDocuments() UniqueDocuments {
return UniqueDocuments{
docs: []*Document{},
docIDs: utils.NewSeenMap(),
}
}
func (uds *UniqueDocuments) Add(d *Document) {
if uds.docIDs.Seen(d.ID()) {
return
}
uds.docs = append(uds.docs, d)
uds.docIDs.Add(d.ID())
}
func (uds *UniqueDocuments) AddDocuments(docs []*Document) {
for _, d := range docs {
uds.Add(d)
}
}
func (uds *UniqueDocuments) Documents() []*Document {
return uds.docs
}

View File

@@ -18,6 +18,7 @@ const (
)
type Mode int
const (
InsertOrUpdate = iota
Delete

View File

@@ -1,6 +1,6 @@
Find out the largest value of the `creationTime` field:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"max_creationTime" : { "max" : { "field" : "creationTime" } }
@@ -11,7 +11,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
Find out the smallest value of the `creationTime` field:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"min_creationTime" : { "min" : { "field" : "creationTime" } }
@@ -22,7 +22,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
Find out the smallest value of the `creationTime` field of all the kustomization files:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -40,7 +40,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
Find out the smallest value of the `creationTime` field of all kustomize resource files:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -58,7 +58,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
Query all the documents whose `creationTime` <= `2016-07-29T17:38:26.000Z`:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"range": {
@@ -73,7 +73,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Query all the documents whose `creationTime` falls within the specific range:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"range": {
@@ -89,7 +89,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Aggregate how many new kustomization files were added into Github each month:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -112,7 +112,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
Aggregate how many new kustomize resource files were added into Github each month:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -135,7 +135,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
Aggregate how many new kustomization files were added into Github each year:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -158,7 +158,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
Aggregate how many new kustomize resource files were added into Github each year:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {

View File

@@ -1,6 +1,6 @@
Count distinct values of the `defaultBranch` field:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"defaultBranch_count" : {
@@ -17,7 +17,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
List all the github branches where kustomization files and kustomize resource files live,
and how many kustomization files and kustomize resource files live in each branch:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"defaultBranch" : {

View File

@@ -1,7 +1,7 @@
Count the documents whose `document` field is empty (The reason why the `document` field
of a document is empty is because of empty documents):
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 10000,
"query": {
@@ -19,7 +19,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Find all the documents having the `creationTime` field set:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"exists": {
@@ -32,7 +32,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Find all the documents whose `creationTime` field is not set:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 10000,
"query": {

View File

@@ -1,7 +1,7 @@
Count the documents in the index whose `repositoryUrl` field starts with
`https://github.com/`:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -17,7 +17,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Count the documents in the index whose `repositoryUrl` field does not start with
`https://github.com/`:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -33,7 +33,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search all the documents matching the given `repositoryUrl` and `filePath`, and return
a version for each search hit:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 10000,
"version": true,
@@ -52,7 +52,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search all the documents whose filePath ends with one of these following three filenames:
`kustomization.yaml`, `kustomization.yml`, `kustomization`:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -68,7 +68,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search all the documents whose filePath does not end with any of these following
three filenames: `kustomization.yaml`, `kustomization.yml`, `kustomization`:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {

View File

@@ -10,10 +10,10 @@ curl "${ElasticSearchURL}:9200/_cat/indices?v"
Get the mapping of the index:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_mapping?pretty"
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_mapping?pretty"
```
Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**):
```
curl -X DELETE "${ElasticSearchURL}:9200/kustomize?pretty"
curl -X DELETE "${ElasticSearchURL}:9200/${INDEXNAME}?pretty"
```

View File

@@ -1,6 +1,6 @@
Count distinct values of the `repositoryUrl` field:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"repositoryUrl_count" : {
@@ -16,7 +16,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
Count how many Github repositories include kustomization files:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -39,7 +39,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
Count how many Github repositories include kustomize resource files:
```
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -64,7 +64,7 @@ List all the github repositories including kustomization files and kustomize res
and how many kustomization files and kustomize resource files each github repository includes
(the github repository including the most kustomization files is listed first):
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"repositoryUrl" : {
@@ -80,7 +80,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
List the top 20 Github repositories including the most amount of kustomization files:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -103,7 +103,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
List the top 20 Github repositories including the most amount of kustomize resource files:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {

View File

@@ -1,6 +1,6 @@
Search for all the kustomize resource files including a Deployment object:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"match" : {
@@ -16,7 +16,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the kustomize resource files including a Deployment object, but only
including the `kinds` field in the result:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"_source": {
"includes": ["kinds"]
@@ -35,7 +35,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the kustomize resource files including both a Deployment object and
a Service object:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"match" : {
@@ -52,7 +52,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Count the number of documents including Deployment and the number of documents
including Service:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 0,
"aggs" : {
@@ -71,7 +71,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the kustomization files involving CRDs:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 10000,
"query": {
@@ -87,7 +87,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the kustomization files defining configMapGenerator:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 10000,
"query": {
@@ -103,7 +103,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the documents having a `kind` field:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -118,7 +118,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the kuostmization files having a `kind` field:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"bool": {
@@ -134,7 +134,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
Search for all the kustomization files defining the `generatorOptions:disableNameSuffixHash` feature:
```
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"match" : {

View File

@@ -0,0 +1,16 @@
package utils
type SeenMap map[string]struct{}
func (seen SeenMap) Seen(item string) bool {
_, ok := seen[item]
return ok
}
func (seen SeenMap) Add(item string) {
seen[item] = struct{}{}
}
func NewSeenMap() SeenMap {
return make(map[string]struct{})
}