mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Merge pull request #2150 from haiyanmeng/stats
Add `fileType` and `User` into the index
This commit is contained in:
@@ -126,7 +126,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// seen tracks the IDs of all the documents in the index.
|
// seen tracks the IDs of all the documents in the index and their corresponding file types.
|
||||||
// This helps avoid indexing a given document multiple times.
|
// This helps avoid indexing a given document multiple times.
|
||||||
seen := utils.NewSeenMap()
|
seen := utils.NewSeenMap()
|
||||||
|
|
||||||
@@ -187,6 +187,12 @@ func main() {
|
|||||||
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlGithub:
|
case CrawlGithub:
|
||||||
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
||||||
|
// add all the documents in the index into seen.
|
||||||
|
// this greatly reduces the time overhead of CrawlGithub.
|
||||||
|
getSeedDocsFunc()
|
||||||
|
for _, d := range seedDocs {
|
||||||
|
seen[d.ID()] = d.FileType
|
||||||
|
}
|
||||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlUser:
|
case CrawlUser:
|
||||||
if *githubUserPtr == "" {
|
if *githubUserPtr == "" {
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ type Crawler interface {
|
|||||||
// Write to the document what the created time is.
|
// Write to the document what the created time is.
|
||||||
SetCreated(context.Context, *doc.Document) error
|
SetCreated(context.Context, *doc.Document) error
|
||||||
|
|
||||||
|
SetDefaultBranch(*doc.Document)
|
||||||
|
|
||||||
Match(*doc.Document) bool
|
Match(*doc.Document) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,7 +80,9 @@ func findMatch(d *doc.Document, crawlers []Crawler) Crawler {
|
|||||||
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
||||||
seen utils.SeenMap, stack *CrawlSeed) {
|
seen utils.SeenMap, stack *CrawlSeed) {
|
||||||
|
|
||||||
seen.Add(cdoc.ID())
|
seen.Set(cdoc.ID(), cdoc.GetDocument().FileType)
|
||||||
|
|
||||||
|
match.SetDefaultBranch(cdoc.GetDocument())
|
||||||
|
|
||||||
// Insert into index
|
// Insert into index
|
||||||
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
||||||
@@ -87,14 +91,14 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
deps, err := cdoc.GetResources(true, false, false)
|
deps, err := cdoc.GetResources(true, true, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Println(err)
|
logger.Println(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, dep := range deps {
|
for _, dep := range deps {
|
||||||
if seen.Seen(dep.ID()) {
|
if seen.Seen(dep.ID()) && seen.Value(dep.ID()) == dep.FileType {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
*stack = append(*stack, dep)
|
*stack = append(*stack, dep)
|
||||||
@@ -102,7 +106,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
||||||
seen utils.SeenMap, stack *CrawlSeed) {
|
seen utils.SeenMap, stack *CrawlSeed, refreshDoc bool, updateFileType bool) {
|
||||||
|
|
||||||
UpdatedDocCount := 0
|
UpdatedDocCount := 0
|
||||||
seenDocCount := 0
|
seenDocCount := 0
|
||||||
@@ -126,9 +130,11 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
logger.Printf("Crawling doc %d: %s", crawledDocCount, tail.Path())
|
logger.Printf("Crawling doc %d: %s", crawledDocCount, tail.Path())
|
||||||
|
|
||||||
if seen.Seen(tail.ID()) {
|
if seen.Seen(tail.ID()) {
|
||||||
logger.Printf("this doc has been seen before")
|
if !updateFileType || seen.Value(tail.ID()) == tail.FileType {
|
||||||
seenDocCount++
|
logger.Printf("this doc has been seen before")
|
||||||
continue
|
seenDocCount++
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if tail.WasCached() {
|
if tail.WasCached() {
|
||||||
@@ -144,6 +150,10 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if tail.User == "" {
|
||||||
|
tail.User = doc.UserName(tail.RepositoryURL)
|
||||||
|
}
|
||||||
|
|
||||||
// If the Document represents a kustomization root, FetchDcoument will change
|
// If the Document represents a kustomization root, FetchDcoument will change
|
||||||
// the `filePath` field of the Document by adding `kustomization.yaml` or
|
// the `filePath` field of the Document by adding `kustomization.yaml` or
|
||||||
// `kustomization.yml` or `kustomization` into the the field.
|
// `kustomization.yml` or `kustomization` into the the field.
|
||||||
@@ -151,26 +161,34 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// calling FetchDocument. Otherwise, the binary may enter into an infinite loop
|
// calling FetchDocument. Otherwise, the binary may enter into an infinite loop
|
||||||
// if a kustomization file points to its kustmozation root in its `resources` or
|
// if a kustomization file points to its kustmozation root in its `resources` or
|
||||||
// `bases` field.
|
// `bases` field.
|
||||||
seen.Add(tail.ID())
|
seen.Set(tail.ID(), tail.FileType)
|
||||||
|
|
||||||
if err := match.FetchDocument(ctx, tail); err != nil {
|
if refreshDoc || tail.DefaultBranch == "" {
|
||||||
logger.Printf("FetchDocument failed on doc(%s): %v", tail.Path(), err)
|
match.SetDefaultBranch(tail)
|
||||||
FetchDocumentErrCount++
|
|
||||||
// delete the document from the index
|
|
||||||
cdoc := &doc.KustomizationDocument{
|
|
||||||
Document: *tail,
|
|
||||||
}
|
|
||||||
seen.Add(cdoc.ID())
|
|
||||||
if err := indx(cdoc, index.Delete); err != nil {
|
|
||||||
logger.Printf("Failed to delete doc(%s): %v", cdoc.Path(), err)
|
|
||||||
}
|
|
||||||
deleteDocCount++
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := match.SetCreated(ctx, tail); err != nil {
|
if refreshDoc || tail.DocumentData == "" {
|
||||||
logger.Printf("SetCreated failed on doc(%s): %v", tail.Path(), err)
|
if err := match.FetchDocument(ctx, tail); err != nil {
|
||||||
SetCreatedErrCount++
|
logger.Printf("FetchDocument failed on doc(%s): %v", tail.Path(), err)
|
||||||
|
FetchDocumentErrCount++
|
||||||
|
// delete the document from the index
|
||||||
|
cdoc := &doc.KustomizationDocument{
|
||||||
|
Document: *tail,
|
||||||
|
}
|
||||||
|
seen.Set(cdoc.ID(), tail.FileType)
|
||||||
|
if err := indx(cdoc, index.Delete); err != nil {
|
||||||
|
logger.Printf("Failed to delete doc(%s): %v", cdoc.Path(), err)
|
||||||
|
}
|
||||||
|
deleteDocCount++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if refreshDoc || tail.CreationTime == nil {
|
||||||
|
if err := match.SetCreated(ctx, tail); err != nil {
|
||||||
|
logger.Printf("SetCreated failed on doc(%s): %v", tail.Path(), err)
|
||||||
|
SetCreatedErrCount++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cdoc, err := conv(tail)
|
cdoc, err := conv(tail)
|
||||||
@@ -206,14 +224,14 @@ func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
|||||||
// Exploit seed to update bulk of corpus.
|
// Exploit seed to update bulk of corpus.
|
||||||
logger.Printf("updating %d documents from seed\n", len(seed))
|
logger.Printf("updating %d documents from seed\n", len(seed))
|
||||||
// each unique document in seed will be crawled once.
|
// each unique document in seed will be crawled once.
|
||||||
doCrawl(ctx, &seed, crawlers, conv, indx, seen, &stack)
|
doCrawl(ctx, &seed, crawlers, conv, indx, seen, &stack, true, false)
|
||||||
|
|
||||||
// Traverse any new documents added while updating corpus.
|
// Traverse any new documents added while updating corpus.
|
||||||
logger.Printf("crawling %d new documents found in the seed\n", len(stack))
|
logger.Printf("crawling %d new documents found in the seed\n", len(stack))
|
||||||
// While crawling each document in stack, the documents directly referred in the document
|
// While crawling each document in stack, the documents directly referred in the document
|
||||||
// will be added into stack.
|
// will be added into stack.
|
||||||
// After this statement is done, stack will become empty.
|
// After this statement is done, stack will become empty.
|
||||||
doCrawl(ctx, &stack, crawlers, conv, indx, seen, &stack)
|
doCrawl(ctx, &stack, crawlers, conv, indx, seen, &stack, false, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CrawlGithubRunner is a blocking function and only returns once all of the
|
// CrawlGithubRunner is a blocking function and only returns once all of the
|
||||||
@@ -294,6 +312,8 @@ func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
|||||||
for cdoc := range ch {
|
for cdoc := range ch {
|
||||||
docCount++
|
docCount++
|
||||||
logger.Printf("Processing doc %d found on Github", docCount)
|
logger.Printf("Processing doc %d found on Github", docCount)
|
||||||
|
// all the docs here are kustomization files found by querying Github, and
|
||||||
|
// their `FileType` fields all should be empty.
|
||||||
if seen.Seen(cdoc.ID()) {
|
if seen.Seen(cdoc.ID()) {
|
||||||
logger.Printf("the doc has been seen before")
|
logger.Printf("the doc has been seen before")
|
||||||
continue
|
continue
|
||||||
@@ -320,5 +340,5 @@ func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
|||||||
// Handle deps of newly discovered documents.
|
// Handle deps of newly discovered documents.
|
||||||
logger.Printf("crawling the %d new documents referred by other documents",
|
logger.Printf("crawling the %d new documents referred by other documents",
|
||||||
len(stack))
|
len(stack))
|
||||||
doCrawl(ctx, &stack, crawlers, conv, indx, seen, &stack)
|
doCrawl(ctx, &stack, crawlers, conv, indx, seen, &stack, false, true)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ func (c testCrawler) Match(d *doc.Document) bool {
|
|||||||
return d != nil
|
return d != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c testCrawler) SetDefaultBranch(d *doc.Document) {}
|
||||||
|
|
||||||
func (c testCrawler) FetchDocument(_ context.Context, d *doc.Document) error {
|
func (c testCrawler) FetchDocument(_ context.Context, d *doc.Document) error {
|
||||||
if i, ok := c.lukp[d.ID()]; ok {
|
if i, ok := c.lukp[d.ID()]; ok {
|
||||||
d.DocumentData = c.docs[i].DocumentData
|
d.DocumentData = c.docs[i].DocumentData
|
||||||
|
|||||||
@@ -60,8 +60,16 @@ func NewCrawler(accessToken string, retryCount uint64, client *http.Client,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gc githubCrawler) SetDefaultBranch(repo, branch string) {
|
func (gc githubCrawler) SetDefaultBranch(d *doc.Document) {
|
||||||
gc.branchMap[repo] = branch
|
url := gc.client.ReposRequest(d.RepositoryFullName())
|
||||||
|
defaultBranch, err := gc.client.GetDefaultBranch(url, d.RepositoryURL, gc.branchMap)
|
||||||
|
if err != nil {
|
||||||
|
logger.Printf(
|
||||||
|
"(error: %v) setting default_branch to master\n", err)
|
||||||
|
defaultBranch = "master"
|
||||||
|
}
|
||||||
|
d.DefaultBranch = defaultBranch
|
||||||
|
gc.branchMap[d.RepositoryURL] = d.DefaultBranch
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gc githubCrawler) DefaultBranch(repo string) string {
|
func (gc githubCrawler) DefaultBranch(repo string) string {
|
||||||
@@ -79,10 +87,20 @@ func (gc githubCrawler) Crawl(ctx context.Context,
|
|||||||
accessToken: gc.client.accessToken,
|
accessToken: gc.client.accessToken,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var ranges []string
|
||||||
|
var err error
|
||||||
// Since Github returns a max of 1000 results per query, we can use
|
// Since Github returns a max of 1000 results per query, we can use
|
||||||
// multiple queries that split the search space into chunks of at most
|
// multiple queries that split the search space into chunks of at most
|
||||||
// 1000 files to get all of the data.
|
// 1000 files to get all of the data.
|
||||||
ranges, err := FindRangesForRepoSearch(newCache(noETagClient, gc.query))
|
for i := 0; i < 5; i++ {
|
||||||
|
ranges, err = FindRangesForRepoSearch(newCache(noETagClient, gc.query))
|
||||||
|
if err == nil {
|
||||||
|
logger.Printf("FindRangesForRepoSearch succeeded after %d retries", i)
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
time.Sleep(time.Minute)
|
||||||
|
}
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not split %v into ranges, %v\n",
|
return fmt.Errorf("could not split %v into ranges, %v\n",
|
||||||
gc.query, err)
|
gc.query, err)
|
||||||
@@ -114,19 +132,6 @@ func (gc githubCrawler) Crawl(ctx context.Context,
|
|||||||
// it will try to add each string in konfig.RecognizedKustomizationFileNames() to
|
// it will try to add each string in konfig.RecognizedKustomizationFileNames() to
|
||||||
// d.FilePath, and try to fetch the document again.
|
// d.FilePath, and try to fetch the document again.
|
||||||
func (gc githubCrawler) FetchDocument(_ context.Context, d *doc.Document) error {
|
func (gc githubCrawler) FetchDocument(_ context.Context, d *doc.Document) error {
|
||||||
// set the default branch if it is empty
|
|
||||||
if d.DefaultBranch == "" {
|
|
||||||
url := gc.client.ReposRequest(d.RepositoryFullName())
|
|
||||||
defaultBranch, err := gc.client.GetDefaultBranch(url, d.RepositoryURL, gc.branchMap)
|
|
||||||
if err != nil {
|
|
||||||
logger.Printf(
|
|
||||||
"(error: %v) setting default_branch to master\n", err)
|
|
||||||
defaultBranch = "master"
|
|
||||||
}
|
|
||||||
d.DefaultBranch = defaultBranch
|
|
||||||
}
|
|
||||||
gc.SetDefaultBranch(d.RepositoryURL, d.DefaultBranch)
|
|
||||||
|
|
||||||
repoURL := d.RepositoryURL + "/" + d.FilePath + "?ref=" + d.DefaultBranch
|
repoURL := d.RepositoryURL + "/" + d.FilePath + "?ref=" + d.DefaultBranch
|
||||||
repoSpec, err := git.NewRepoSpecFromUrl(repoURL)
|
repoSpec, err := git.NewRepoSpecFromUrl(repoURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -283,10 +288,13 @@ func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen utils.SeenMap,
|
|||||||
defaultBranch = "master"
|
defaultBranch = "master"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// document here is a kustomization file found by querying Github, whose
|
||||||
|
// `FileType` field should be empty.
|
||||||
document := doc.Document{
|
document := doc.Document{
|
||||||
FilePath: k.Path,
|
FilePath: k.Path,
|
||||||
DefaultBranch: defaultBranch,
|
DefaultBranch: defaultBranch,
|
||||||
RepositoryURL: k.Repository.URL,
|
RepositoryURL: k.Repository.URL,
|
||||||
|
User: doc.UserName(k.Repository.URL),
|
||||||
}
|
}
|
||||||
|
|
||||||
if seen.Seen(document.ID()) {
|
if seen.Seen(document.ID()) {
|
||||||
@@ -304,6 +312,7 @@ func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen utils.SeenMap,
|
|||||||
FilePath: k.Path,
|
FilePath: k.Path,
|
||||||
DefaultBranch: defaultBranch,
|
DefaultBranch: defaultBranch,
|
||||||
RepositoryURL: k.Repository.URL,
|
RepositoryURL: k.Repository.URL,
|
||||||
|
User: doc.UserName(k.Repository.URL),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
creationTime, err := gcl.GetFileCreationTime(k)
|
creationTime, err := gcl.GetFileCreationTime(k)
|
||||||
|
|||||||
@@ -87,17 +87,17 @@ func (doc *KustomizationDocument) GetResources(
|
|||||||
res := make([]*Document, 0)
|
res := make([]*Document, 0)
|
||||||
|
|
||||||
if includeResources {
|
if includeResources {
|
||||||
resourceDocs := doc.CollectDocuments(k.Resources)
|
resourceDocs := doc.CollectDocuments(k.Resources, "resource")
|
||||||
res = append(res, resourceDocs...)
|
res = append(res, resourceDocs...)
|
||||||
}
|
}
|
||||||
|
|
||||||
if includeGenerators {
|
if includeGenerators {
|
||||||
generatorDocs := doc.CollectDocuments(k.Generators)
|
generatorDocs := doc.CollectDocuments(k.Generators, "generator")
|
||||||
res = append(res, generatorDocs...)
|
res = append(res, generatorDocs...)
|
||||||
}
|
}
|
||||||
|
|
||||||
if includeTransformers {
|
if includeTransformers {
|
||||||
transformerDocs := doc.CollectDocuments(k.Transformers)
|
transformerDocs := doc.CollectDocuments(k.Transformers, "transformer")
|
||||||
res = append(res, transformerDocs...)
|
res = append(res, transformerDocs...)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,7 +106,8 @@ func (doc *KustomizationDocument) GetResources(
|
|||||||
|
|
||||||
// CollectDocuments construct a Document for each path in paths, and return
|
// CollectDocuments construct a Document for each path in paths, and return
|
||||||
// a slice of Document pointers.
|
// a slice of Document pointers.
|
||||||
func (doc *KustomizationDocument) CollectDocuments(paths []string) []*Document {
|
func (doc *KustomizationDocument) CollectDocuments(
|
||||||
|
paths []string, fileType string) []*Document {
|
||||||
docs := make([]*Document, 0, len(paths))
|
docs := make([]*Document, 0, len(paths))
|
||||||
for _, r := range paths {
|
for _, r := range paths {
|
||||||
if strings.TrimSpace(r) == "" {
|
if strings.TrimSpace(r) == "" {
|
||||||
@@ -117,6 +118,7 @@ func (doc *KustomizationDocument) CollectDocuments(paths []string) []*Document {
|
|||||||
log.Printf("CollectDocuments error: %v\n", err)
|
log.Printf("CollectDocuments error: %v\n", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
next.FileType = fileType
|
||||||
docs = append(docs, &next)
|
docs = append(docs, &next)
|
||||||
}
|
}
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -215,19 +215,27 @@ resources:
|
|||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/base",
|
FilePath: "some/path/to/base",
|
||||||
|
FileType: "resource",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/otherbase",
|
FilePath: "some/path/to/otherbase",
|
||||||
|
FileType: "resource",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/kdir/file.yaml",
|
FilePath: "some/path/to/kdir/file.yaml",
|
||||||
|
FileType: "resource",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RepositoryURL: "https://github.com/kubernetes-sigs/kustomize",
|
RepositoryURL: "https://github.com/kubernetes-sigs/kustomize",
|
||||||
FilePath: "examples/helloWorld",
|
FilePath: "examples/helloWorld",
|
||||||
DefaultBranch: "v3.1.0",
|
DefaultBranch: "v3.1.0",
|
||||||
|
FileType: "resource",
|
||||||
|
User: "kubernetes-sigs",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -312,10 +320,14 @@ transformers:
|
|||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/kdir/gen.yaml",
|
FilePath: "some/path/to/kdir/gen.yaml",
|
||||||
|
FileType: "generator",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/kdir/file.yaml",
|
FilePath: "some/path/to/kdir/file.yaml",
|
||||||
|
FileType: "resource",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -345,14 +357,20 @@ transformers:
|
|||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/kdir/tr.yaml",
|
FilePath: "some/path/to/kdir/tr.yaml",
|
||||||
|
FileType: "transformer",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/kdir/gen.yaml",
|
FilePath: "some/path/to/kdir/gen.yaml",
|
||||||
|
FileType: "generator",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
FilePath: "some/path/to/kdir/file.yaml",
|
FilePath: "some/path/to/kdir/file.yaml",
|
||||||
|
FileType: "resource",
|
||||||
|
User: "sigs.k8s.io",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -11,12 +11,18 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Document struct {
|
type Document struct {
|
||||||
RepositoryURL string `json:"repositoryUrl,omitempty"`
|
RepositoryURL string `json:"repositoryUrl,omitempty"`
|
||||||
|
// User makes it easy to aggregate data in the user level instead
|
||||||
|
// of the repository level
|
||||||
|
User string `json:"user,omitempty"`
|
||||||
FilePath string `json:"filePath,omitempty"`
|
FilePath string `json:"filePath,omitempty"`
|
||||||
DefaultBranch string `json:"defaultBranch,omitempty"`
|
DefaultBranch string `json:"defaultBranch,omitempty"`
|
||||||
DocumentData string `json:"document,omitempty"`
|
DocumentData string `json:"document,omitempty"`
|
||||||
CreationTime *time.Time `json:"creationTime,omitempty"`
|
CreationTime *time.Time `json:"creationTime,omitempty"`
|
||||||
IsSame bool `json:"-"`
|
IsSame bool `json:"-"`
|
||||||
|
// FileType can be one of the following:
|
||||||
|
// "generator", "transformer", "resource", "".
|
||||||
|
FileType string `json:"fileType,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implements the CrawlerDocument interface.
|
// Implements the CrawlerDocument interface.
|
||||||
@@ -27,11 +33,13 @@ func (doc *Document) GetDocument() *Document {
|
|||||||
func (doc *Document) Copy() *Document {
|
func (doc *Document) Copy() *Document {
|
||||||
return &Document{
|
return &Document{
|
||||||
RepositoryURL: doc.RepositoryURL,
|
RepositoryURL: doc.RepositoryURL,
|
||||||
|
User: doc.User,
|
||||||
FilePath: doc.FilePath,
|
FilePath: doc.FilePath,
|
||||||
DefaultBranch: doc.DefaultBranch,
|
DefaultBranch: doc.DefaultBranch,
|
||||||
DocumentData: doc.DocumentData,
|
DocumentData: doc.DocumentData,
|
||||||
CreationTime: doc.CreationTime,
|
CreationTime: doc.CreationTime,
|
||||||
IsSame: doc.IsSame,
|
IsSame: doc.IsSame,
|
||||||
|
FileType: doc.FileType,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -52,6 +60,7 @@ func (doc *Document) FromRelativePath(newFile string) (Document, error) {
|
|||||||
RepositoryURL: repoSpec.Host + path.Clean(repoSpec.OrgRepo),
|
RepositoryURL: repoSpec.Host + path.Clean(repoSpec.OrgRepo),
|
||||||
FilePath: path.Clean(repoSpec.Path),
|
FilePath: path.Clean(repoSpec.Path),
|
||||||
DefaultBranch: repoSpec.Ref,
|
DefaultBranch: repoSpec.Ref,
|
||||||
|
User: UserName(repoSpec.Host + path.Clean(repoSpec.OrgRepo)),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
// else document is probably relative path.
|
// else document is probably relative path.
|
||||||
@@ -59,6 +68,7 @@ func (doc *Document) FromRelativePath(newFile string) (Document, error) {
|
|||||||
ret := Document{
|
ret := Document{
|
||||||
RepositoryURL: doc.RepositoryURL,
|
RepositoryURL: doc.RepositoryURL,
|
||||||
DefaultBranch: doc.DefaultBranch,
|
DefaultBranch: doc.DefaultBranch,
|
||||||
|
User: UserName(doc.RepositoryURL),
|
||||||
}
|
}
|
||||||
ogDir, _ := path.Split(doc.FilePath)
|
ogDir, _ := path.Split(doc.FilePath)
|
||||||
|
|
||||||
@@ -83,13 +93,7 @@ func (doc *Document) ID() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (doc *Document) RepositoryFullName() string {
|
func (doc *Document) RepositoryFullName() string {
|
||||||
url := strings.TrimRight(doc.RepositoryURL, "/")
|
url := TrimUrl(doc.RepositoryURL)
|
||||||
|
|
||||||
gitPrefix := "git@github.com:"
|
|
||||||
if strings.HasPrefix(url, gitPrefix) {
|
|
||||||
url = url[len(gitPrefix):]
|
|
||||||
}
|
|
||||||
|
|
||||||
sections := strings.Split(url, "/")
|
sections := strings.Split(url, "/")
|
||||||
l := len(sections)
|
l := len(sections)
|
||||||
if l < 2 {
|
if l < 2 {
|
||||||
@@ -97,3 +101,24 @@ func (doc *Document) RepositoryFullName() string {
|
|||||||
}
|
}
|
||||||
return path.Join(sections[l-2], sections[l-1])
|
return path.Join(sections[l-2], sections[l-1])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TrimUrl removes all the trailing slashes and the "git@github.com:" prefix (if exists).
|
||||||
|
func TrimUrl(s string) string {
|
||||||
|
url := strings.TrimRight(s, "/")
|
||||||
|
|
||||||
|
gitPrefix := "git@github.com:"
|
||||||
|
if strings.HasPrefix(url, gitPrefix) {
|
||||||
|
url = url[len(gitPrefix):]
|
||||||
|
}
|
||||||
|
return url
|
||||||
|
}
|
||||||
|
|
||||||
|
func UserName(repositoryURL string) string {
|
||||||
|
url := TrimUrl(repositoryURL)
|
||||||
|
sections := strings.Split(url, "/")
|
||||||
|
l := len(sections)
|
||||||
|
if l < 2 {
|
||||||
|
return url
|
||||||
|
}
|
||||||
|
return sections[l-2]
|
||||||
|
}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ func TestFromRelativePath(t *testing.T) {
|
|||||||
RepositoryURL: "example.com/repo",
|
RepositoryURL: "example.com/repo",
|
||||||
FilePath: "path/to/other/file/resource.yaml",
|
FilePath: "path/to/other/file/resource.yaml",
|
||||||
DefaultBranch: "master",
|
DefaultBranch: "master",
|
||||||
|
User: "example.com",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -36,6 +37,7 @@ func TestFromRelativePath(t *testing.T) {
|
|||||||
RepositoryURL: "example.com/repo",
|
RepositoryURL: "example.com/repo",
|
||||||
FilePath: "path/to/other/file/patch.yaml",
|
FilePath: "path/to/other/file/patch.yaml",
|
||||||
DefaultBranch: "master",
|
DefaultBranch: "master",
|
||||||
|
User: "example.com",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -44,6 +46,7 @@ func TestFromRelativePath(t *testing.T) {
|
|||||||
RepositoryURL: "example.com/repo",
|
RepositoryURL: "example.com/repo",
|
||||||
FilePath: "path/to/file/service.yaml",
|
FilePath: "path/to/file/service.yaml",
|
||||||
DefaultBranch: "master",
|
DefaultBranch: "master",
|
||||||
|
User: "example.com",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -109,3 +112,39 @@ func TestDocument_RepositoryFullName(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDocument_UserName(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
repositoryURL string
|
||||||
|
expectedUserName string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
repositoryURL: "https://github.com/user/repo",
|
||||||
|
expectedUserName: "user",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
repositoryURL: "https://github.com//user/repo////",
|
||||||
|
expectedUserName: "user",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
repositoryURL: "repo/",
|
||||||
|
expectedUserName: "repo",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
repositoryURL: "",
|
||||||
|
expectedUserName: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
repositoryURL: "git@github.com:user/repo",
|
||||||
|
expectedUserName: "user",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
returnedUserName := UserName(tc.repositoryURL)
|
||||||
|
if returnedUserName != tc.expectedUserName {
|
||||||
|
t.Errorf("UserName expected %s, got %s",
|
||||||
|
tc.expectedUserName, returnedUserName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ func (uds *UniqueDocuments) Add(d *Document) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
uds.docs = append(uds.docs, d)
|
uds.docs = append(uds.docs, d)
|
||||||
uds.docIDs.Add(d.ID())
|
uds.docIDs.Set(d.ID(), "")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (uds *UniqueDocuments) AddDocuments(docs []*Document) {
|
func (uds *UniqueDocuments) AddDocuments(docs []*Document) {
|
||||||
|
|||||||
@@ -20,12 +20,18 @@ const IndexConfig = `
|
|||||||
"repositoryUrl": {
|
"repositoryUrl": {
|
||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
},
|
},
|
||||||
|
"user": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
"filePath": {
|
"filePath": {
|
||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
},
|
},
|
||||||
"defaultBranch": {
|
"defaultBranch": {
|
||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
},
|
},
|
||||||
|
"fileType": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
"document": {
|
"document": {
|
||||||
"type": "text"
|
"type": "text"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'C
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"filter": [
|
"filter": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -45,8 +45,53 @@ curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'C
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must_not": {
|
"must_not": {
|
||||||
"regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
}
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"min_creationTime" : { "min" : { "field" : "creationTime" } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find out the smallest value of the `creationTime` field of all kustomize generator files:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"min_creationTime" : { "min" : { "field" : "creationTime" } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find out the smallest value of the `creationTime` field of all kustomize transformer files:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
@@ -87,6 +132,30 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-T
|
|||||||
'
|
'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Query all the kustomization files whose `creationTime` falls within the specific range:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"size": 20,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"must": {
|
||||||
|
"range": {
|
||||||
|
"creationTime": {
|
||||||
|
"gte": "2017-09-24T15:49:57.000Z",
|
||||||
|
"lte": "2017-09-24T15:49:57.000Z"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
Aggregate how many new kustomization files were added into Github each month:
|
Aggregate how many new kustomization files were added into Github each month:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
@@ -94,7 +163,7 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"filter": [
|
"filter": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -117,7 +186,62 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must_not": [
|
"must_not": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
],
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"newFiles_over_time" : {
|
||||||
|
"date_histogram" : {
|
||||||
|
"field" : "creationTime",
|
||||||
|
"interval" : "month"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Aggregate how many new kustomize generator files were added into Github each month:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
],
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"newFiles_over_time" : {
|
||||||
|
"date_histogram" : {
|
||||||
|
"field" : "creationTime",
|
||||||
|
"interval" : "month"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Aggregate how many new kustomize transformer files were added into Github each month:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
],
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -140,7 +264,7 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"filter": [
|
"filter": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -163,8 +287,11 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must_not": [
|
"must_not": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
],
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
@@ -177,4 +304,108 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
'
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Aggregate how many new kustomize generator files were added into Github each year:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
],
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"newFiles_over_time" : {
|
||||||
|
"date_histogram" : {
|
||||||
|
"field" : "creationTime",
|
||||||
|
"interval" : "year"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Aggregate how many new kustomize transformer files were added into Github each year:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
],
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"newFiles_over_time" : {
|
||||||
|
"date_histogram" : {
|
||||||
|
"field" : "creationTime",
|
||||||
|
"interval" : "year"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find the generator files created within the given time range:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
],
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"must": {
|
||||||
|
"range": {
|
||||||
|
"creationTime": {
|
||||||
|
"gte": "2019-04-26T16:40:02.000Z",
|
||||||
|
"lte": "2019-04-26T16:40:02.000Z"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find the transformer files created within the given time range:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
],
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"must": {
|
||||||
|
"range": {
|
||||||
|
"creationTime": {
|
||||||
|
"gte": "2019-04-26T16:40:02.000Z",
|
||||||
|
"lte": "2019-04-26T16:40:02.000Z"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
```
|
```
|
||||||
301
api/internal/crawl/search_cmds/fileType.md
Normal file
301
api/internal/crawl/search_cmds/fileType.md
Normal file
@@ -0,0 +1,301 @@
|
|||||||
|
Find all the documents having the `fileType` field set:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"exists": {
|
||||||
|
"field": "fileType"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find all the documents whose `fileType` field is not set:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"size": 10000,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"exists": {
|
||||||
|
"field": "fileType"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the documents whose `fileType` field is `resource`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the kustomization files whose `fileType` field is `resource`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }},
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the kustomize resource files:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
],
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search all the kustomization files including a `generators` field:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"size": 10000,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must": {
|
||||||
|
"match" : {
|
||||||
|
"identifiers" : {
|
||||||
|
"query" : "generators"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filter": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the documents whose `fileType` field is `generator`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the kustomization files whose `fileType` field is `generator`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }},
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the kustomize generator files:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
],
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search all the kustomization files including a `transformers` field:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"size": 10000,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must": {
|
||||||
|
"match" : {
|
||||||
|
"identifiers" : {
|
||||||
|
"query" : "transformers"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filter": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the documents whose `fileType` field is `transformer`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the kustomization files whose `fileType` field is `transformer`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }},
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the kustomize transformer files:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
],
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `fileType` field:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"fileType_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "fileType",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
List all the values of the `fileType` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"fileType" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "fileType"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
For all the kustomization files in the index, list all the values of the
|
||||||
|
`fileType` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"fileType" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "fileType"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the non-kustomization files in the index, list all the values of the
|
||||||
|
`fileType` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"fileType" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "fileType"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
12
api/internal/crawl/search_cmds/id.md
Normal file
12
api/internal/crawl/search_cmds/id.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
Find the document with the given `_id`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"terms": {
|
||||||
|
"_id": [ "b3a03f3327841617db696e2d6abc30e1a1bd653f1a2bbce05637f7dcae1a43f7" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
@@ -57,7 +57,7 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-T
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"filter": [
|
"filter": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -73,7 +73,7 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-T
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must_not": [
|
"must_not": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,4 +16,17 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_mapping?pretty"
|
|||||||
Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**):
|
Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**):
|
||||||
```
|
```
|
||||||
curl -X DELETE "${ElasticSearchURL}:9200/${INDEXNAME}?pretty"
|
curl -X DELETE "${ElasticSearchURL}:9200/${INDEXNAME}?pretty"
|
||||||
|
```
|
||||||
|
|
||||||
|
Add a new field into an existing index.
|
||||||
|
```
|
||||||
|
curl -X PUT "${ElasticSearchURL}:9200/${INDEXNAME}/_mapping/_doc?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"fileType": {
|
||||||
|
"type": "keyword"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
```
|
```
|
||||||
@@ -21,7 +21,7 @@ curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'C
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"filter": [
|
"filter": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -37,16 +37,143 @@ curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'C
|
|||||||
'
|
'
|
||||||
```
|
```
|
||||||
|
|
||||||
Count how many Github repositories include kustomize resource files:
|
Count distinct values of the `repositoryUrl` field for all the kustomize resource files in the index:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must_not": {
|
"must_not": {
|
||||||
"regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"repositoryUrl_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "repositoryUrl",
|
||||||
|
"precision_threshold": 40000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `repositoryUrl` field for all the kustomize generator files in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"repositoryUrl_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "repositoryUrl",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `repositoryUrl` field for all the kustomize transformer files in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"repositoryUrl_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "repositoryUrl",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `repositoryUrl` field for all the kustomize resource dirs in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"repositoryUrl_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "repositoryUrl",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `repositoryUrl` field for all the kustomize generator dirs in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"repositoryUrl_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "repositoryUrl",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `repositoryUrl` field for all the kustomize transformer dirs in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"repositoryUrl_count" : {
|
"repositoryUrl_count" : {
|
||||||
@@ -85,7 +212,7 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"filter": [
|
"filter": [
|
||||||
{ "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }}
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -108,8 +235,11 @@ curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Co
|
|||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must_not": {
|
"must_not": {
|
||||||
"regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
}
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
|
|||||||
380
api/internal/crawl/search_cmds/user.md
Normal file
380
api/internal/crawl/search_cmds/user.md
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
Find all the documents having the `user` field set:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"exists": {
|
||||||
|
"field": "user"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find all the documents whose `user` field is not set:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"size": 10000,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"exists": {
|
||||||
|
"field": "user"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Search for all the documents whose `user` field is `kubernetes-sigs`:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "user": "kubernetes-sigs" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
List all the values of the `user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"size" : 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field for all the kustomization files in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the kustomization files in the index, list all the values of the
|
||||||
|
`user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"size": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field for all the kustomize resource files in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the kustomize resource files in the index, list all the values of the
|
||||||
|
`user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "resource" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"size": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field for all the kustomize generator files in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the kustomize generator files in the index, list all the values of the
|
||||||
|
`user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"size": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field for all the kustomize transformer files in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the kustomize transformer files in the index, list all the values of the
|
||||||
|
`user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must_not": {
|
||||||
|
"regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }
|
||||||
|
},
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"size": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field for all the kustomize generator dirs in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the kustomize generator dirs in the index, list all the values of the
|
||||||
|
`user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "generator" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"size": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Count distinct values of the `user` field for all the kustomize transformer dirs in the index:
|
||||||
|
```
|
||||||
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user_count" : {
|
||||||
|
"cardinality" : {
|
||||||
|
"field" : "user",
|
||||||
|
"precision_threshold": 40000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
For all the kustomize transformer dirs in the index, list all the values of the
|
||||||
|
`user` field and the frequency of each value:
|
||||||
|
```
|
||||||
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{ "regexp": { "fileType": "transformer" }},
|
||||||
|
{ "regexp": { "filePath": "(.*/)?kustomization((.yaml)?|(.yml)?)(/)*" }}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs" : {
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
@@ -1,16 +1,21 @@
|
|||||||
package utils
|
package utils
|
||||||
|
|
||||||
type SeenMap map[string]struct{}
|
type SeenMap map[string]string
|
||||||
|
|
||||||
func (seen SeenMap) Seen(item string) bool {
|
func (seen SeenMap) Seen(item string) bool {
|
||||||
_, ok := seen[item]
|
_, ok := seen[item]
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (seen SeenMap) Add(item string) {
|
func (seen SeenMap) Set(k, v string) {
|
||||||
seen[item] = struct{}{}
|
seen[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
// The caller should make sure that key is in the map.
|
||||||
|
func (seen SeenMap) Value(k string) string {
|
||||||
|
return seen[k]
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSeenMap() SeenMap {
|
func NewSeenMap() SeenMap {
|
||||||
return make(map[string]struct{})
|
return make(map[string]string)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user