diff --git a/api/internal/crawl/backend/search_backend.go b/api/internal/crawl/backend/search_backend.go index e6062fbf6..cdc76e6ef 100644 --- a/api/internal/crawl/backend/search_backend.go +++ b/api/internal/crawl/backend/search_backend.go @@ -44,7 +44,7 @@ type kustomizeSearch struct { // /register: not implemented, but meant as an endpoint for adding new // kustomization files to the corpus. func NewKustomizeSearch(ctx context.Context) (*kustomizeSearch, error) { - idx, err := index.NewKustomizeIndex(ctx) + idx, err := index.NewKustomizeIndex(ctx, "kustomize") if err != nil { return nil, err } diff --git a/api/internal/crawl/cmd/crawler/crawler.go b/api/internal/crawl/cmd/crawler/crawler.go index 4a7883e54..2d301efe1 100644 --- a/api/internal/crawl/cmd/crawler/crawler.go +++ b/api/internal/crawl/cmd/crawler/crawler.go @@ -9,6 +9,8 @@ import ( "os" "time" + "sigs.k8s.io/kustomize/api/internal/crawl/utils" + "sigs.k8s.io/kustomize/api/internal/crawl/crawler" "sigs.k8s.io/kustomize/api/internal/crawl/crawler/github" "sigs.k8s.io/kustomize/api/internal/crawl/doc" @@ -26,6 +28,7 @@ const ( ) type CrawlMode int + const ( CrawlUnknown CrawlMode = iota // Crawl all the kustomization files in all the repositories of a Github user @@ -125,13 +128,13 @@ func main() { // seen tracks the IDs of all the documents in the index. // This helps avoid indexing a given document multiple times. - seen := crawler.NewSeenMap() + seen := utils.NewSeenMap() mode := NewCrawlMode(*modePtr) ghCrawlerConstructor := func(user, repo string) crawler.Crawler { if user != "" { - return github.NewCrawler(githubToken, retryCount, clientCache, + return github.NewCrawler(githubToken, retryCount, clientCache, github.QueryWith( github.Filename("kustomization.yaml"), github.Filename("kustomization.yml"), diff --git a/api/internal/crawl/cmd/kustomize_stats/main.go b/api/internal/crawl/cmd/kustomize_stats/main.go index 403989d86..103405079 100644 --- a/api/internal/crawl/cmd/kustomize_stats/main.go +++ b/api/internal/crawl/cmd/kustomize_stats/main.go @@ -5,12 +5,21 @@ import ( "flag" "fmt" "log" - "path/filepath" + "net/http" + "os" "sort" "time" + "sigs.k8s.io/kustomize/api/internal/crawl/crawler/github" + + "sigs.k8s.io/kustomize/api/internal/crawl/doc" + "sigs.k8s.io/kustomize/api/internal/crawl/index" - "sigs.k8s.io/kustomize/api/konfig" +) + +const ( + githubAccessTokenVar = "GITHUB_ACCESS_TOKEN" + retryCount = 3 ) // iterateArr adds each item in arr into countMap. @@ -25,17 +34,6 @@ func iterateArr(arr []string, countMap map[string]int) { } -// isKustomizationFile determines whether a file path is a kustomization file -func isKustomizationFile(path string) bool { - basename := filepath.Base(path) - for _, name := range konfig.RecognizedKustomizationFileNames() { - if basename == name { - return true - } - } - return false -} - // SortMapKeyByValue takes a map as its input, sorts its keys according to their values // in the map, and outputs the sorted keys as a slice. func SortMapKeyByValue(m map[string]int) []string { @@ -44,10 +42,129 @@ func SortMapKeyByValue(m map[string]int) []string { keys = append(keys, key) } // sort keys according to their values in the map m - sort.Slice(keys, func(i, j int) bool {return m[keys[i]] > m[keys[j]]}) + sort.Slice(keys, func(i, j int) bool { return m[keys[i]] > m[keys[j]] }) return keys } +func GeneratorOrTransformerStats(ctx context.Context, + docs []*doc.Document, isGenerator bool, idx *index.KustomizeIndex) { + + fieldName := "generators" + if !isGenerator { + fieldName = "transformers" + } + + // allReferredDocs includes all the documents referred in the field + allReferredDocs := doc.NewUniqueDocuments() + + // docUsingGeneratorCount counts the number of the kustomization files using generators or transformers + docCount := 0 + + // collect all the documents referred in the field + for _, d := range docs { + kdoc := doc.KustomizationDocument{ + Document: *d, + } + referredDocs, err := kdoc.GetResources(false, !isGenerator, isGenerator) + if err != nil { + log.Printf("failed to parse the %s field of the Document (%s): %v", + fieldName, d.Path(), err) + } + if len(referredDocs) > 0 { + docCount++ + allReferredDocs.AddDocuments(referredDocs) + } + } + + fileCount, dirCount, fileTypeDocs, dirTypeDocs := DocumentTypeSummary(ctx, allReferredDocs.Documents()) + + // check whether any of the files are not in the index + nonExistFileCount := ExistInIndex(idx, fileTypeDocs, fieldName + " file ") + // check whether any of the dirs are not in the index + nonExistDirCount := ExistInIndex(idx, dirTypeDocs, fieldName + " dir ") + + GitRepositorySummary(fileTypeDocs, fieldName + " files") + GitRepositorySummary(dirTypeDocs, fieldName + " dirs") + + fmt.Printf("%d kustomization files use %s: %d %s are files and %d %s are dirs.\n", + docCount, fieldName, fileCount, fieldName, dirCount, fieldName) + fmt.Printf("%d %s files do not exist in the index\n", nonExistFileCount, fieldName) + fmt.Printf("%d %s dirs do not exist in the index\n", nonExistDirCount, fieldName) +} + +// GitRepositorySummary counts the distribution of docs: +// 1) how many git repositories are these docs from? +// 2) how many docs are from each git repository? +func GitRepositorySummary(docs []*doc.Document, msgPrefix string) { + m := make(map[string]int) + for _, d := range docs { + if _, ok := m[d.RepositoryURL]; ok { + m[d.RepositoryURL]++ + } else { + m[d.RepositoryURL] = 1 + } + } + sortedKeys := SortMapKeyByValue(m) + for _, k := range sortedKeys { + fmt.Printf("%d %s are from %s\n", m[k], msgPrefix, k) + } +} + +// ExistInIndex goes through each Document in docs, and check whether it is in the index or not. +// It returns the number of documents which does not exist in the index. +func ExistInIndex(idx *index.KustomizeIndex, docs []*doc.Document, msgPrefix string) int { + nonExistCount := 0 + for _, d := range docs { + exists, err := idx.Exists(d.ID()) + if err != nil { + log.Println(err) + } + if !exists { + log.Printf("%s (%s) does not exist in the index", msgPrefix, d.Path()) + nonExistCount++ + } + } + return nonExistCount +} + +// DocumentTypeSummary goes through each doc in docs, and determines whether it is a file or dir. +func DocumentTypeSummary(ctx context.Context, docs []*doc.Document) ( + fileCount, dirCount int, files, dirs []*doc.Document) { + githubToken := os.Getenv(githubAccessTokenVar) + if githubToken == "" { + log.Fatalf("Must set the variable '%s' to make github requests.\n", + githubAccessTokenVar) + } + ghCrawler := github.NewCrawler(githubToken, retryCount, &http.Client{}, github.QueryWith()) + + for _, d := range docs { + oldFilePath := d.FilePath + if err := ghCrawler.FetchDocument(ctx, d); err != nil { + log.Printf("FetchDocument failed on %s: %v", d.Path(), err) + continue + } + + if d.FilePath == oldFilePath { + fileCount++ + files = append(files, d) + } else { + dirCount++ + dirs = append(dirs, d) + } + } + return fileCount, dirCount, files, dirs +} + +// ExistInSlice checks where target exits in items. +func ExistInSlice(items []string, target string) bool { + for _, item := range items { + if item == target { + return true + } + } + return false +} + func main() { topKindsPtr := flag.Int( "kinds", -1, @@ -64,10 +181,12 @@ If you only want to list the 10 most popular identifiers, set the flag to 10.`) `the number of kustomize features to be listed according to their popularities. By default, all the features will be listed. If you only want to list the 10 most popular features, set the flag to 10.`) + indexNamePtr := flag.String( + "index", "kustomize", "The name of the ElasticSearch index.") flag.Parse() ctx := context.Background() - idx, err := index.NewKustomizeIndex(ctx) + idx, err := index.NewKustomizeIndex(ctx, *indexNamePtr) if err != nil { log.Fatalf("Could not create an index: %v\n", err) } @@ -85,6 +204,12 @@ If you only want to list the 10 most popular features, set the flag to 10.`) // ids tracks the unique IDs of the documents in the index ids := make(map[string]struct{}) + // generatorDocs includes all the docs using generators + generatorDocs := make([]*doc.Document, 0) + + // transformersDocs includes all the docs using transformers + transformersDocs := make([]*doc.Document, 0) + // get all the documents in the index query := []byte(`{ "query":{ "match_all":{} } }`) it := idx.IterateQuery(query, 10000, 60*time.Second) @@ -94,21 +219,28 @@ If you only want to list the 10 most popular features, set the flag to 10.`) if _, ok := ids[hit.ID]; !ok { ids[hit.ID] = struct{}{} } else { - fmt.Printf("Found duplicate ID (%s)\n", hit.ID) + log.Printf("Found duplicate ID (%s)\n", hit.ID) } count++ iterateArr(hit.Document.Kinds, kindsMap) iterateArr(hit.Document.Identifiers, identifiersMap) - if isKustomizationFile(hit.Document.FilePath) { + if doc.IsKustomizationFile(hit.Document.FilePath) { kustomizationFilecount++ iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap) + if ExistInSlice(hit.Document.Identifiers, "generators") { + generatorDocs = append(generatorDocs, hit.Document.Copy()) + } + if ExistInSlice(hit.Document.Identifiers, "transformers") { + transformersDocs = append(transformersDocs, hit.Document.Copy()) + } } } } + if err := it.Err(); err != nil { - fmt.Printf("Error iterating: %v\n", err) + log.Fatalf("Error iterating: %v\n", err) } sortedKindsMapKeys := SortMapKeyByValue(kindsMap) @@ -147,4 +279,7 @@ There are %d documents in the kustomize index. kustomizeFeatureCount++ } } + + GeneratorOrTransformerStats(ctx, generatorDocs, true, idx) + GeneratorOrTransformerStats(ctx, transformersDocs, false, idx) } diff --git a/api/internal/crawl/config/crawler/kustomize_stats/job.yaml b/api/internal/crawl/config/crawler/kustomize_stats/job.yaml index e7d22cd51..9e2457983 100644 --- a/api/internal/crawl/config/crawler/kustomize_stats/job.yaml +++ b/api/internal/crawl/config/crawler/kustomize_stats/job.yaml @@ -11,8 +11,13 @@ spec: image: gcr.io/haiyanmeng-gke-dev/kustomize_stats:v1 imagePullPolicy: Always command: ["/kustomize_stats"] - args: ["--kinds=50", "--identifiers=50", "--kustomize-features=50"] + args: ["--index=kustomize", "--kinds=50", "--identifiers=50", "--kustomize-features=50"] env: + - name: GITHUB_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: github-access-token + key: token - name: ELASTICSEARCH_URL valueFrom: configMapKeyRef: diff --git a/api/internal/crawl/crawler/crawler.go b/api/internal/crawl/crawler/crawler.go index b8f9d3874..11715616d 100644 --- a/api/internal/crawl/crawler/crawler.go +++ b/api/internal/crawl/crawler/crawler.go @@ -10,6 +10,8 @@ import ( "os" "sync" + "sigs.k8s.io/kustomize/api/internal/crawl/utils" + "sigs.k8s.io/kustomize/api/internal/crawl/index" _ "github.com/gomodule/redigo/redis" @@ -29,7 +31,7 @@ type Crawler interface { // Crawl returns when it is done processing. This method does not take // ownership of the channel. The channel is write only, and it // designates where the crawler should forward the documents. - Crawl(ctx context.Context, output chan<- CrawledDocument, seen SeenMap) error + Crawl(ctx context.Context, output chan<- CrawledDocument, seen utils.SeenMap) error // Get the document data given the FilePath, Repo, and Ref/Tag/Branch. FetchDocument(context.Context, *doc.Document) error @@ -43,25 +45,15 @@ type CrawledDocument interface { ID() string GetDocument() *doc.Document // Get all the Documents directly referred in a Document. - GetResources() ([]*doc.Document, error) + // For a Document representing a non-kustomization file, an empty slice will be returned. + // For a Document representing a kustomization file: + // the `includeResources` parameter determines whether the documents referred in the `resources` field are returned or not; + // the `includeTransformers` parameter determines whether the documents referred in the `transformers` field are returned or not; + // the `includeGenerators` parameter determines whether the documents referred in the `generators` field are returned or not. + GetResources(includeResources, includeTransformers, includeGenerators bool) ([]*doc.Document, error) WasCached() bool } -type SeenMap map[string]struct{} - -func (seen SeenMap) Seen(item string) bool { - _, ok := seen[item] - return ok -} - -func (seen SeenMap) Add(item string) { - seen[item] = struct{}{} -} - -func NewSeenMap() SeenMap { - return make(map[string]struct{}) -} - type CrawlSeed []*doc.Document type IndexFunc func(CrawledDocument, index.Mode) error @@ -84,18 +76,18 @@ func findMatch(d *doc.Document, crawlers []Crawler) Crawler { } func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc, - seen SeenMap, stack *CrawlSeed) { + seen utils.SeenMap, stack *CrawlSeed) { seen.Add(cdoc.ID()) // Insert into index if err := indx(cdoc, index.InsertOrUpdate); err != nil { - logger.Printf("Failed to insert or update %s %s: %v", - cdoc.GetDocument().RepositoryURL, cdoc.GetDocument().FilePath, err) + logger.Printf("Failed to insert or update doc(%s): %v", + cdoc.GetDocument().Path(), err) return } - deps, err := cdoc.GetResources() + deps, err := cdoc.GetResources(true, false, false) if err != nil { logger.Println(err) return @@ -110,7 +102,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc, } func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc, - seen SeenMap, stack *CrawlSeed) { + seen utils.SeenMap, stack *CrawlSeed) { UpdatedDocCount := 0 seenDocCount := 0 @@ -131,7 +123,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C *docsPtr = (*docsPtr)[:(len(*docsPtr) - 1)] crawledDocCount++ - logger.Printf("Crawling doc %d: %s %s", crawledDocCount, tail.RepositoryURL, tail.FilePath) + logger.Printf("Crawling doc %d: %s", crawledDocCount, tail.Path()) if seen.Seen(tail.ID()) { logger.Printf("this doc has been seen before") @@ -140,7 +132,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C } if tail.WasCached() { - logger.Printf("%s %s is cached already", tail.RepositoryURL, tail.FilePath) + logger.Printf("doc(%s) is cached already", tail.Path()) cachedDocCount++ continue } @@ -161,10 +153,8 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C // `bases` field. seen.Add(tail.ID()) - if err := match.FetchDocument(ctx, tail); err != nil { - logger.Printf("FetchDocument failed on %s %s: %v", - tail.RepositoryURL, tail.FilePath, err) + logger.Printf("FetchDocument failed on doc(%s): %v", tail.Path(), err) FetchDocumentErrCount++ // delete the document from the index cdoc := &doc.KustomizationDocument{ @@ -172,16 +162,14 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C } seen.Add(cdoc.ID()) if err := indx(cdoc, index.Delete); err != nil { - logger.Printf("Failed to delete %s %s: %v", - cdoc.RepositoryURL, cdoc.FilePath, err) + logger.Printf("Failed to delete doc(%s): %v", cdoc.Path(), err) } deleteDocCount++ continue } if err := match.SetCreated(ctx, tail); err != nil { - logger.Printf("SetCreated failed on %s %s: %v", - tail.RepositoryURL, tail.FilePath, err) + logger.Printf("SetCreated failed on doc(%s): %v", tail.Path(), err) SetCreatedErrCount++ } @@ -189,8 +177,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C // If conv returns an error, cdoc can still be added into the index so that // cdoc.Document can be searched. if err != nil { - logger.Printf("conv failed on %s %s: %v", - tail.RepositoryURL, tail.FilePath, err) + logger.Printf("conv failed on doc(%s): %v", tail.Path(), err) convErrCount++ } @@ -211,7 +198,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C // CrawlFromSeed updates all the documents in seed, and crawls all the new // documents referred in the seed. func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler, - conv Converter, indx IndexFunc, seen SeenMap) { + conv Converter, indx IndexFunc, seen utils.SeenMap) { // stack tracks the documents directly referred in other documents. stack := make(CrawlSeed, 0) @@ -247,7 +234,7 @@ func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler, // from the seed will be processed before any other documents from the // crawlers. func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument, - crawlers []Crawler, seen SeenMap) []error { + crawlers []Crawler, seen utils.SeenMap) []error { errs := make([]error, len(crawlers)) wg := sync.WaitGroup{} @@ -291,7 +278,7 @@ func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument, // CrawlGithub crawls all the kustomization files on Github. func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter, - indx IndexFunc, seen SeenMap) { + indx IndexFunc, seen utils.SeenMap) { // stack tracks the documents directly referred in other documents. stack := make(CrawlSeed, 0) diff --git a/api/internal/crawl/crawler/crawler_test.go b/api/internal/crawl/crawler/crawler_test.go index 7dace4da6..d18a4afce 100644 --- a/api/internal/crawl/crawler/crawler_test.go +++ b/api/internal/crawl/crawler/crawler_test.go @@ -12,6 +12,8 @@ import ( "testing" "time" + "sigs.k8s.io/kustomize/api/internal/crawl/utils" + "sigs.k8s.io/kustomize/api/internal/crawl/index" "sigs.k8s.io/kustomize/api/internal/crawl/doc" @@ -76,7 +78,7 @@ func newCrawler(matchPrefix string, err error, // Crawl implements the Crawler interface for testing. func (c testCrawler) Crawl(_ context.Context, - output chan<- CrawledDocument, _ SeenMap) error { + output chan<- CrawledDocument, _ utils.SeenMap) error { for i, d := range c.docs { isResource := true @@ -182,7 +184,7 @@ func TestCrawlGithubRunner(t *testing.T) { defer close(output) defer wg.Done() - seen := NewSeenMap() + seen := utils.NewSeenMap() errs := CrawlGithubRunner(context.Background(), output, test.tc, seen) @@ -324,7 +326,7 @@ resources: visited[d.ID()]++ return nil }, - NewSeenMap(), + utils.NewSeenMap(), ) if lv, lc := len(visited), len(tc.corpus); lv != lc { t.Errorf("error: %d of %d documents visited.", lv, lc) diff --git a/api/internal/crawl/crawler/github/crawler.go b/api/internal/crawl/crawler/github/crawler.go index 046ba4af0..e1aeff401 100644 --- a/api/internal/crawl/crawler/github/crawler.go +++ b/api/internal/crawl/crawler/github/crawler.go @@ -16,6 +16,8 @@ import ( "strings" "time" + "sigs.k8s.io/kustomize/api/internal/crawl/utils" + "sigs.k8s.io/kustomize/api/internal/crawl/crawler" "sigs.k8s.io/kustomize/api/internal/crawl/doc" "sigs.k8s.io/kustomize/api/internal/crawl/httpclient" @@ -68,7 +70,7 @@ func (gc githubCrawler) DefaultBranch(repo string) string { // Implements crawler.Crawler. func (gc githubCrawler) Crawl(ctx context.Context, - output chan<- crawler.CrawledDocument, seen crawler.SeenMap) error { + output chan<- crawler.CrawledDocument, seen utils.SeenMap) error { noETagClient := GhClient{ RequestConfig: gc.client.RequestConfig, @@ -195,9 +197,9 @@ func (gc githubCrawler) Match(d *doc.Document) bool { type RangeQueryResult struct { totalDocCnt uint64 - seenDocCnt uint64 - newDocCnt uint64 - errorCnt uint64 + seenDocCnt uint64 + newDocCnt uint64 + errorCnt uint64 } func (r *RangeQueryResult) Add(other RangeQueryResult) { @@ -209,7 +211,7 @@ func (r *RangeQueryResult) Add(other RangeQueryResult) { func (r *RangeQueryResult) String() string { return fmt.Sprintf("got %d files from API. "+ - "%d have been seen before. %d are new and sent to the output channel." + + "%d have been seen before. %d are new and sent to the output channel."+ " %d have kustomizationResultAdapter errors.", r.totalDocCnt, r.seenDocCnt, r.newDocCnt, r.errorCnt) } @@ -217,7 +219,7 @@ func (r *RangeQueryResult) String() string { // processQuery follows all of the pages in a query, and updates/adds the // documents from the crawl to the datastore/index. func processQuery(ctx context.Context, gcl GhClient, query string, - output chan<- crawler.CrawledDocument, seen crawler.SeenMap, + output chan<- crawler.CrawledDocument, seen utils.SeenMap, branchMap map[string]string) (RangeQueryResult, error) { queryPages := make(chan GhResponseInfo) @@ -271,7 +273,7 @@ func processQuery(ctx context.Context, gcl GhClient, query string, return result, errs } -func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen crawler.SeenMap, +func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen utils.SeenMap, branchMap map[string]string) (crawler.CrawledDocument, error) { url := gcl.ReposRequest(k.Repository.FullName) defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap) diff --git a/api/internal/crawl/crawler/github/queries.go b/api/internal/crawl/crawler/github/queries.go index 557e0f371..ba05af38f 100644 --- a/api/internal/crawl/crawler/github/queries.go +++ b/api/internal/crawl/crawler/github/queries.go @@ -117,7 +117,7 @@ type RequestConfig struct { // understand why the request object is useful. func (rc RequestConfig) CodeSearchRequestWith(query Query) request { vals := url.Values{ - "sort": []string{"indexed"}, + "sort": []string{"indexed"}, "order": []string{"desc"}, } req := rc.makeRequest("search/code", query, vals) diff --git a/api/internal/crawl/doc/doc.go b/api/internal/crawl/doc/doc.go index 4a709f693..87cd5a3e2 100644 --- a/api/internal/crawl/doc/doc.go +++ b/api/internal/crawl/doc/doc.go @@ -3,6 +3,7 @@ package doc import ( "fmt" "log" + "path/filepath" "sort" "strings" @@ -51,15 +52,21 @@ func (doc *KustomizationDocument) String() string { doc.IsSame, doc.Kinds, len(doc.Identifiers), len(doc.Values)) } -// Implements the CrawlerDocument interface. -func (doc *KustomizationDocument) GetResources() ([]*Document, error) { - isResource := true - for _, suffix := range konfig.RecognizedKustomizationFileNames() { - if strings.HasSuffix(doc.FilePath, "/"+suffix) { - isResource = false +// IsKustomizationFile determines whether a file path is a kustomization file +func IsKustomizationFile(path string) bool { + basename := filepath.Base(path) + for _, name := range konfig.RecognizedKustomizationFileNames() { + if basename == name { + return true } } - if isResource { + return false +} + +// Implements the CrawlerDocument interface. +func (doc *KustomizationDocument) GetResources( + includeResources, includeTransformers, includeGenerators bool) ([]*Document, error) { + if !IsKustomizationFile(doc.FilePath) { return []*Document{}, nil } @@ -77,20 +84,42 @@ func (doc *KustomizationDocument) GetResources() ([]*Document, error) { } k.FixKustomizationPostUnmarshalling() - res := make([]*Document, 0, len(k.Resources)) - for _, r := range k.Resources { + res := make([]*Document, 0) + + if includeResources { + resourceDocs := doc.CollectDocuments(k.Resources) + res = append(res, resourceDocs...) + } + + if includeGenerators { + generatorDocs := doc.CollectDocuments(k.Generators) + res = append(res, generatorDocs...) + } + + if includeTransformers { + transformerDocs := doc.CollectDocuments(k.Transformers) + res = append(res, transformerDocs...) + } + + return res, nil +} + +// CollectDocuments construct a Document for each path in paths, and return +// a slice of Document pointers. +func (doc *KustomizationDocument) CollectDocuments(paths []string) []*Document { + docs := make([]*Document, 0, len(paths)) + for _, r := range paths { if strings.TrimSpace(r) == "" { continue } next, err := doc.Document.FromRelativePath(r) if err != nil { - log.Printf("GetResources error: %v\n", err) + log.Printf("CollectDocuments error: %v\n", err) continue } - res = append(res, &next) + docs = append(docs, &next) } - - return res, nil + return docs } func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) { diff --git a/api/internal/crawl/doc/doc_test.go b/api/internal/crawl/doc/doc_test.go index 6fef92d97..c193809a6 100644 --- a/api/internal/crawl/doc/doc_test.go +++ b/api/internal/crawl/doc/doc_test.go @@ -189,11 +189,13 @@ metadata: } } +type TestStructForGetResources struct { + doc KustomizationDocument + resources []*Document +} + func TestGetResources(t *testing.T) { - tests := []struct { - doc KustomizationDocument - resources []*Document - }{ + tests := []TestStructForGetResources{ { doc: KustomizationDocument{ Document: Document{ @@ -248,9 +250,12 @@ resources: resources: []*Document{}, }, } + runTest(t, tests, true, false, false) +} +func runTest(t *testing.T, tests []TestStructForGetResources, includeResources, includeTransformers, includeGenerators bool) { for _, test := range tests { - res, err := test.doc.GetResources() + res, err := test.doc.GetResources(includeResources, includeTransformers, includeGenerators) if err != nil { t.Errorf("Unexpected error: %v\n", err) continue @@ -284,3 +289,73 @@ resources: } } } + +func TestGetResourcesAndGenerators(t *testing.T) { + tests := []TestStructForGetResources{ + { + doc: KustomizationDocument{ + Document: Document{ + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/kustomization.yaml", + DocumentData: ` +resources: +- file.yaml + +generators: +- gen.yaml + +transformers: +- tr.yaml +`}, + }, + resources: []*Document{ + { + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/gen.yaml", + }, + { + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/file.yaml", + }, + }, + }, + } + runTest(t, tests, true, false, true) +} + +func TestGetResourcesAndGeneratorsAndTransformers(t *testing.T) { + tests := []TestStructForGetResources{ + { + doc: KustomizationDocument{ + Document: Document{ + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/kustomization.yaml", + DocumentData: ` +resources: +- file.yaml + +generators: +- gen.yaml + +transformers: +- tr.yaml +`}, + }, + resources: []*Document{ + { + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/tr.yaml", + }, + { + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/gen.yaml", + }, + { + RepositoryURL: "sigs.k8s.io/kustomize", + FilePath: "some/path/to/kdir/file.yaml", + }, + }, + }, + } + runTest(t, tests, true, true, true) +} diff --git a/api/internal/crawl/doc/docname.go b/api/internal/crawl/doc/docname.go index 5afca1290..e295e4620 100644 --- a/api/internal/crawl/doc/docname.go +++ b/api/internal/crawl/doc/docname.go @@ -35,6 +35,11 @@ func (doc *Document) Copy() *Document { } } +func (doc *Document) Path() string { + return fmt.Sprintf("repoURL: %s filePath: %s branch: %s", + doc.RepositoryURL, doc.FilePath, doc.DefaultBranch) +} + // Implements the CrawlerDocument interface. func (doc *Document) WasCached() bool { return doc.IsSame diff --git a/api/internal/crawl/doc/docname_test.go b/api/internal/crawl/doc/docname_test.go index f1b65dc8f..a03beaf06 100644 --- a/api/internal/crawl/doc/docname_test.go +++ b/api/internal/crawl/doc/docname_test.go @@ -65,7 +65,7 @@ func TestFromRelativePath(t *testing.T) { func TestDocument_RepositoryFullName(t *testing.T) { testCases := []struct { - doc Document + doc Document expectedRepositoryFullName string }{ { @@ -108,4 +108,4 @@ func TestDocument_RepositoryFullName(t *testing.T) { returnedRepositoryFullName) } } -} \ No newline at end of file +} diff --git a/api/internal/crawl/doc/unique_doc.go b/api/internal/crawl/doc/unique_doc.go new file mode 100644 index 000000000..026b345a5 --- /dev/null +++ b/api/internal/crawl/doc/unique_doc.go @@ -0,0 +1,36 @@ +package doc + +import ( + "sigs.k8s.io/kustomize/api/internal/crawl/utils" +) + +// UniqueDocuments make sure a Document with a given ID appears only once +type UniqueDocuments struct { + docs []*Document + docIDs utils.SeenMap +} + +func NewUniqueDocuments() UniqueDocuments { + return UniqueDocuments{ + docs: []*Document{}, + docIDs: utils.NewSeenMap(), + } +} + +func (uds *UniqueDocuments) Add(d *Document) { + if uds.docIDs.Seen(d.ID()) { + return + } + uds.docs = append(uds.docs, d) + uds.docIDs.Add(d.ID()) +} + +func (uds *UniqueDocuments) AddDocuments(docs []*Document) { + for _, d := range docs { + uds.Add(d) + } +} + +func (uds *UniqueDocuments) Documents() []*Document { + return uds.docs +} diff --git a/api/internal/crawl/index/kustomize.go b/api/internal/crawl/index/kustomize.go index e55c5547e..7e2e7b104 100644 --- a/api/internal/crawl/index/kustomize.go +++ b/api/internal/crawl/index/kustomize.go @@ -18,6 +18,7 @@ const ( ) type Mode int + const ( InsertOrUpdate = iota Delete diff --git a/api/internal/crawl/search_cmds/creationTime.md b/api/internal/crawl/search_cmds/creationTime.md index 4029e4b0b..3ebfaf157 100644 --- a/api/internal/crawl/search_cmds/creationTime.md +++ b/api/internal/crawl/search_cmds/creationTime.md @@ -1,6 +1,6 @@ Find out the largest value of the `creationTime` field: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "aggs" : { "max_creationTime" : { "max" : { "field" : "creationTime" } } @@ -11,7 +11,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont Find out the smallest value of the `creationTime` field: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "aggs" : { "min_creationTime" : { "min" : { "field" : "creationTime" } } @@ -22,7 +22,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont Find out the smallest value of the `creationTime` field of all the kustomization files: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -40,7 +40,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont Find out the smallest value of the `creationTime` field of all kustomize resource files: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -58,7 +58,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont Query all the documents whose `creationTime` <= `2016-07-29T17:38:26.000Z`: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "range": { @@ -73,7 +73,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Query all the documents whose `creationTime` falls within the specific range: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "range": { @@ -89,7 +89,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Aggregate how many new kustomization files were added into Github each month: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -112,7 +112,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte Aggregate how many new kustomize resource files were added into Github each month: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -135,7 +135,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte Aggregate how many new kustomization files were added into Github each year: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -158,7 +158,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte Aggregate how many new kustomize resource files were added into Github each year: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { diff --git a/api/internal/crawl/search_cmds/defaultBranch.md b/api/internal/crawl/search_cmds/defaultBranch.md index 89822d4c6..d4cab8e6b 100644 --- a/api/internal/crawl/search_cmds/defaultBranch.md +++ b/api/internal/crawl/search_cmds/defaultBranch.md @@ -1,6 +1,6 @@ Count distinct values of the `defaultBranch` field: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "aggs" : { "defaultBranch_count" : { @@ -17,7 +17,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont List all the github branches where kustomization files and kustomize resource files live, and how many kustomization files and kustomize resource files live in each branch: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "aggs" : { "defaultBranch" : { diff --git a/api/internal/crawl/search_cmds/fieldExistence.md b/api/internal/crawl/search_cmds/fieldExistence.md index 591abdbff..f804c784a 100644 --- a/api/internal/crawl/search_cmds/fieldExistence.md +++ b/api/internal/crawl/search_cmds/fieldExistence.md @@ -1,7 +1,7 @@ Count the documents whose `document` field is empty (The reason why the `document` field of a document is empty is because of empty documents): ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "size": 10000, "query": { @@ -19,7 +19,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Find all the documents having the `creationTime` field set: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "exists": { @@ -32,7 +32,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Find all the documents whose `creationTime` field is not set: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "size": 10000, "query": { diff --git a/api/internal/crawl/search_cmds/keyword_search.md b/api/internal/crawl/search_cmds/keyword_search.md index e3c152d00..588f938fb 100644 --- a/api/internal/crawl/search_cmds/keyword_search.md +++ b/api/internal/crawl/search_cmds/keyword_search.md @@ -1,7 +1,7 @@ Count the documents in the index whose `repositoryUrl` field starts with `https://github.com/`: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -17,7 +17,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Count the documents in the index whose `repositoryUrl` field does not start with `https://github.com/`: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -33,7 +33,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search all the documents matching the given `repositoryUrl` and `filePath`, and return a version for each search hit: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "size": 10000, "version": true, @@ -52,7 +52,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search all the documents whose filePath ends with one of these following three filenames: `kustomization.yaml`, `kustomization.yml`, `kustomization`: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -68,7 +68,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search all the documents whose filePath does not end with any of these following three filenames: `kustomization.yaml`, `kustomization.yml`, `kustomization`: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { diff --git a/api/internal/crawl/search_cmds/misc.md b/api/internal/crawl/search_cmds/misc.md index ac2659d62..303ae5d3b 100644 --- a/api/internal/crawl/search_cmds/misc.md +++ b/api/internal/crawl/search_cmds/misc.md @@ -10,10 +10,10 @@ curl "${ElasticSearchURL}:9200/_cat/indices?v" Get the mapping of the index: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_mapping?pretty" +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_mapping?pretty" ``` Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**): ``` -curl -X DELETE "${ElasticSearchURL}:9200/kustomize?pretty" +curl -X DELETE "${ElasticSearchURL}:9200/${INDEXNAME}?pretty" ``` \ No newline at end of file diff --git a/api/internal/crawl/search_cmds/repositoryUrl.md b/api/internal/crawl/search_cmds/repositoryUrl.md index ef7802e04..291aa1c69 100644 --- a/api/internal/crawl/search_cmds/repositoryUrl.md +++ b/api/internal/crawl/search_cmds/repositoryUrl.md @@ -1,6 +1,6 @@ Count distinct values of the `repositoryUrl` field: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "aggs" : { "repositoryUrl_count" : { @@ -16,7 +16,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont Count how many Github repositories include kustomization files: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -39,7 +39,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont Count how many Github repositories include kustomize resource files: ``` -curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -64,7 +64,7 @@ List all the github repositories including kustomization files and kustomize res and how many kustomization files and kustomize resource files each github repository includes (the github repository including the most kustomization files is listed first): ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "aggs" : { "repositoryUrl" : { @@ -80,7 +80,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte List the top 20 Github repositories including the most amount of kustomization files: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -103,7 +103,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte List the top 20 Github repositories including the most amount of kustomize resource files: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { diff --git a/api/internal/crawl/search_cmds/text_search.md b/api/internal/crawl/search_cmds/text_search.md index 37a7701b5..dcc033fb8 100644 --- a/api/internal/crawl/search_cmds/text_search.md +++ b/api/internal/crawl/search_cmds/text_search.md @@ -1,6 +1,6 @@ Search for all the kustomize resource files including a Deployment object: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "match" : { @@ -16,7 +16,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the kustomize resource files including a Deployment object, but only including the `kinds` field in the result: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "_source": { "includes": ["kinds"] @@ -35,7 +35,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the kustomize resource files including both a Deployment object and a Service object: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "match" : { @@ -52,7 +52,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Count the number of documents including Deployment and the number of documents including Service: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "size": 0, "aggs" : { @@ -71,7 +71,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the kustomization files involving CRDs: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "size": 10000, "query": { @@ -87,7 +87,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the kustomization files defining configMapGenerator: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "size": 10000, "query": { @@ -103,7 +103,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the documents having a `kind` field: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -118,7 +118,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the kuostmization files having a `kind` field: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "bool": { @@ -134,7 +134,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type Search for all the kustomization files defining the `generatorOptions:disableNameSuffixHash` feature: ``` -curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d' { "query": { "match" : { diff --git a/api/internal/crawl/utils/utils.go b/api/internal/crawl/utils/utils.go new file mode 100644 index 000000000..a397b2d52 --- /dev/null +++ b/api/internal/crawl/utils/utils.go @@ -0,0 +1,16 @@ +package utils + +type SeenMap map[string]struct{} + +func (seen SeenMap) Seen(item string) bool { + _, ok := seen[item] + return ok +} + +func (seen SeenMap) Add(item string) { + seen[item] = struct{}{} +} + +func NewSeenMap() SeenMap { + return make(map[string]struct{}) +}