mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Merge pull request #2109 from haiyanmeng/stats
Add support to get files referred in the generators and tranformers fields
This commit is contained in:
@@ -44,7 +44,7 @@ type kustomizeSearch struct {
|
|||||||
// /register: not implemented, but meant as an endpoint for adding new
|
// /register: not implemented, but meant as an endpoint for adding new
|
||||||
// kustomization files to the corpus.
|
// kustomization files to the corpus.
|
||||||
func NewKustomizeSearch(ctx context.Context) (*kustomizeSearch, error) {
|
func NewKustomizeSearch(ctx context.Context) (*kustomizeSearch, error) {
|
||||||
idx, err := index.NewKustomizeIndex(ctx)
|
idx, err := index.NewKustomizeIndex(ctx, "kustomize")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||||
@@ -26,6 +28,7 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type CrawlMode int
|
type CrawlMode int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
CrawlUnknown CrawlMode = iota
|
CrawlUnknown CrawlMode = iota
|
||||||
// Crawl all the kustomization files in all the repositories of a Github user
|
// Crawl all the kustomization files in all the repositories of a Github user
|
||||||
@@ -125,13 +128,13 @@ func main() {
|
|||||||
|
|
||||||
// seen tracks the IDs of all the documents in the index.
|
// seen tracks the IDs of all the documents in the index.
|
||||||
// This helps avoid indexing a given document multiple times.
|
// This helps avoid indexing a given document multiple times.
|
||||||
seen := crawler.NewSeenMap()
|
seen := utils.NewSeenMap()
|
||||||
|
|
||||||
mode := NewCrawlMode(*modePtr)
|
mode := NewCrawlMode(*modePtr)
|
||||||
|
|
||||||
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
|
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
|
||||||
if user != "" {
|
if user != "" {
|
||||||
return github.NewCrawler(githubToken, retryCount, clientCache,
|
return github.NewCrawler(githubToken, retryCount, clientCache,
|
||||||
github.QueryWith(
|
github.QueryWith(
|
||||||
github.Filename("kustomization.yaml"),
|
github.Filename("kustomization.yaml"),
|
||||||
github.Filename("kustomization.yml"),
|
github.Filename("kustomization.yml"),
|
||||||
|
|||||||
@@ -5,12 +5,21 @@ import (
|
|||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"path/filepath"
|
"net/http"
|
||||||
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
||||||
|
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||||
"sigs.k8s.io/kustomize/api/konfig"
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
githubAccessTokenVar = "GITHUB_ACCESS_TOKEN"
|
||||||
|
retryCount = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
// iterateArr adds each item in arr into countMap.
|
// iterateArr adds each item in arr into countMap.
|
||||||
@@ -25,17 +34,6 @@ func iterateArr(arr []string, countMap map[string]int) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// isKustomizationFile determines whether a file path is a kustomization file
|
|
||||||
func isKustomizationFile(path string) bool {
|
|
||||||
basename := filepath.Base(path)
|
|
||||||
for _, name := range konfig.RecognizedKustomizationFileNames() {
|
|
||||||
if basename == name {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
|
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
|
||||||
// in the map, and outputs the sorted keys as a slice.
|
// in the map, and outputs the sorted keys as a slice.
|
||||||
func SortMapKeyByValue(m map[string]int) []string {
|
func SortMapKeyByValue(m map[string]int) []string {
|
||||||
@@ -44,10 +42,129 @@ func SortMapKeyByValue(m map[string]int) []string {
|
|||||||
keys = append(keys, key)
|
keys = append(keys, key)
|
||||||
}
|
}
|
||||||
// sort keys according to their values in the map m
|
// sort keys according to their values in the map m
|
||||||
sort.Slice(keys, func(i, j int) bool {return m[keys[i]] > m[keys[j]]})
|
sort.Slice(keys, func(i, j int) bool { return m[keys[i]] > m[keys[j]] })
|
||||||
return keys
|
return keys
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GeneratorOrTransformerStats(ctx context.Context,
|
||||||
|
docs []*doc.Document, isGenerator bool, idx *index.KustomizeIndex) {
|
||||||
|
|
||||||
|
fieldName := "generators"
|
||||||
|
if !isGenerator {
|
||||||
|
fieldName = "transformers"
|
||||||
|
}
|
||||||
|
|
||||||
|
// allReferredDocs includes all the documents referred in the field
|
||||||
|
allReferredDocs := doc.NewUniqueDocuments()
|
||||||
|
|
||||||
|
// docUsingGeneratorCount counts the number of the kustomization files using generators or transformers
|
||||||
|
docCount := 0
|
||||||
|
|
||||||
|
// collect all the documents referred in the field
|
||||||
|
for _, d := range docs {
|
||||||
|
kdoc := doc.KustomizationDocument{
|
||||||
|
Document: *d,
|
||||||
|
}
|
||||||
|
referredDocs, err := kdoc.GetResources(false, !isGenerator, isGenerator)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("failed to parse the %s field of the Document (%s): %v",
|
||||||
|
fieldName, d.Path(), err)
|
||||||
|
}
|
||||||
|
if len(referredDocs) > 0 {
|
||||||
|
docCount++
|
||||||
|
allReferredDocs.AddDocuments(referredDocs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fileCount, dirCount, fileTypeDocs, dirTypeDocs := DocumentTypeSummary(ctx, allReferredDocs.Documents())
|
||||||
|
|
||||||
|
// check whether any of the files are not in the index
|
||||||
|
nonExistFileCount := ExistInIndex(idx, fileTypeDocs, fieldName + " file ")
|
||||||
|
// check whether any of the dirs are not in the index
|
||||||
|
nonExistDirCount := ExistInIndex(idx, dirTypeDocs, fieldName + " dir ")
|
||||||
|
|
||||||
|
GitRepositorySummary(fileTypeDocs, fieldName + " files")
|
||||||
|
GitRepositorySummary(dirTypeDocs, fieldName + " dirs")
|
||||||
|
|
||||||
|
fmt.Printf("%d kustomization files use %s: %d %s are files and %d %s are dirs.\n",
|
||||||
|
docCount, fieldName, fileCount, fieldName, dirCount, fieldName)
|
||||||
|
fmt.Printf("%d %s files do not exist in the index\n", nonExistFileCount, fieldName)
|
||||||
|
fmt.Printf("%d %s dirs do not exist in the index\n", nonExistDirCount, fieldName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GitRepositorySummary counts the distribution of docs:
|
||||||
|
// 1) how many git repositories are these docs from?
|
||||||
|
// 2) how many docs are from each git repository?
|
||||||
|
func GitRepositorySummary(docs []*doc.Document, msgPrefix string) {
|
||||||
|
m := make(map[string]int)
|
||||||
|
for _, d := range docs {
|
||||||
|
if _, ok := m[d.RepositoryURL]; ok {
|
||||||
|
m[d.RepositoryURL]++
|
||||||
|
} else {
|
||||||
|
m[d.RepositoryURL] = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sortedKeys := SortMapKeyByValue(m)
|
||||||
|
for _, k := range sortedKeys {
|
||||||
|
fmt.Printf("%d %s are from %s\n", m[k], msgPrefix, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExistInIndex goes through each Document in docs, and check whether it is in the index or not.
|
||||||
|
// It returns the number of documents which does not exist in the index.
|
||||||
|
func ExistInIndex(idx *index.KustomizeIndex, docs []*doc.Document, msgPrefix string) int {
|
||||||
|
nonExistCount := 0
|
||||||
|
for _, d := range docs {
|
||||||
|
exists, err := idx.Exists(d.ID())
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
log.Printf("%s (%s) does not exist in the index", msgPrefix, d.Path())
|
||||||
|
nonExistCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nonExistCount
|
||||||
|
}
|
||||||
|
|
||||||
|
// DocumentTypeSummary goes through each doc in docs, and determines whether it is a file or dir.
|
||||||
|
func DocumentTypeSummary(ctx context.Context, docs []*doc.Document) (
|
||||||
|
fileCount, dirCount int, files, dirs []*doc.Document) {
|
||||||
|
githubToken := os.Getenv(githubAccessTokenVar)
|
||||||
|
if githubToken == "" {
|
||||||
|
log.Fatalf("Must set the variable '%s' to make github requests.\n",
|
||||||
|
githubAccessTokenVar)
|
||||||
|
}
|
||||||
|
ghCrawler := github.NewCrawler(githubToken, retryCount, &http.Client{}, github.QueryWith())
|
||||||
|
|
||||||
|
for _, d := range docs {
|
||||||
|
oldFilePath := d.FilePath
|
||||||
|
if err := ghCrawler.FetchDocument(ctx, d); err != nil {
|
||||||
|
log.Printf("FetchDocument failed on %s: %v", d.Path(), err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if d.FilePath == oldFilePath {
|
||||||
|
fileCount++
|
||||||
|
files = append(files, d)
|
||||||
|
} else {
|
||||||
|
dirCount++
|
||||||
|
dirs = append(dirs, d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fileCount, dirCount, files, dirs
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExistInSlice checks where target exits in items.
|
||||||
|
func ExistInSlice(items []string, target string) bool {
|
||||||
|
for _, item := range items {
|
||||||
|
if item == target {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
topKindsPtr := flag.Int(
|
topKindsPtr := flag.Int(
|
||||||
"kinds", -1,
|
"kinds", -1,
|
||||||
@@ -64,10 +181,12 @@ If you only want to list the 10 most popular identifiers, set the flag to 10.`)
|
|||||||
`the number of kustomize features to be listed according to their popularities.
|
`the number of kustomize features to be listed according to their popularities.
|
||||||
By default, all the features will be listed.
|
By default, all the features will be listed.
|
||||||
If you only want to list the 10 most popular features, set the flag to 10.`)
|
If you only want to list the 10 most popular features, set the flag to 10.`)
|
||||||
|
indexNamePtr := flag.String(
|
||||||
|
"index", "kustomize", "The name of the ElasticSearch index.")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
idx, err := index.NewKustomizeIndex(ctx)
|
idx, err := index.NewKustomizeIndex(ctx, *indexNamePtr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Could not create an index: %v\n", err)
|
log.Fatalf("Could not create an index: %v\n", err)
|
||||||
}
|
}
|
||||||
@@ -85,6 +204,12 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
|||||||
// ids tracks the unique IDs of the documents in the index
|
// ids tracks the unique IDs of the documents in the index
|
||||||
ids := make(map[string]struct{})
|
ids := make(map[string]struct{})
|
||||||
|
|
||||||
|
// generatorDocs includes all the docs using generators
|
||||||
|
generatorDocs := make([]*doc.Document, 0)
|
||||||
|
|
||||||
|
// transformersDocs includes all the docs using transformers
|
||||||
|
transformersDocs := make([]*doc.Document, 0)
|
||||||
|
|
||||||
// get all the documents in the index
|
// get all the documents in the index
|
||||||
query := []byte(`{ "query":{ "match_all":{} } }`)
|
query := []byte(`{ "query":{ "match_all":{} } }`)
|
||||||
it := idx.IterateQuery(query, 10000, 60*time.Second)
|
it := idx.IterateQuery(query, 10000, 60*time.Second)
|
||||||
@@ -94,21 +219,28 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
|||||||
if _, ok := ids[hit.ID]; !ok {
|
if _, ok := ids[hit.ID]; !ok {
|
||||||
ids[hit.ID] = struct{}{}
|
ids[hit.ID] = struct{}{}
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("Found duplicate ID (%s)\n", hit.ID)
|
log.Printf("Found duplicate ID (%s)\n", hit.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
count++
|
count++
|
||||||
iterateArr(hit.Document.Kinds, kindsMap)
|
iterateArr(hit.Document.Kinds, kindsMap)
|
||||||
iterateArr(hit.Document.Identifiers, identifiersMap)
|
iterateArr(hit.Document.Identifiers, identifiersMap)
|
||||||
|
|
||||||
if isKustomizationFile(hit.Document.FilePath) {
|
if doc.IsKustomizationFile(hit.Document.FilePath) {
|
||||||
kustomizationFilecount++
|
kustomizationFilecount++
|
||||||
iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap)
|
iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap)
|
||||||
|
if ExistInSlice(hit.Document.Identifiers, "generators") {
|
||||||
|
generatorDocs = append(generatorDocs, hit.Document.Copy())
|
||||||
|
}
|
||||||
|
if ExistInSlice(hit.Document.Identifiers, "transformers") {
|
||||||
|
transformersDocs = append(transformersDocs, hit.Document.Copy())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := it.Err(); err != nil {
|
if err := it.Err(); err != nil {
|
||||||
fmt.Printf("Error iterating: %v\n", err)
|
log.Fatalf("Error iterating: %v\n", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
sortedKindsMapKeys := SortMapKeyByValue(kindsMap)
|
sortedKindsMapKeys := SortMapKeyByValue(kindsMap)
|
||||||
@@ -147,4 +279,7 @@ There are %d documents in the kustomize index.
|
|||||||
kustomizeFeatureCount++
|
kustomizeFeatureCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GeneratorOrTransformerStats(ctx, generatorDocs, true, idx)
|
||||||
|
GeneratorOrTransformerStats(ctx, transformersDocs, false, idx)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,8 +11,13 @@ spec:
|
|||||||
image: gcr.io/haiyanmeng-gke-dev/kustomize_stats:v1
|
image: gcr.io/haiyanmeng-gke-dev/kustomize_stats:v1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command: ["/kustomize_stats"]
|
command: ["/kustomize_stats"]
|
||||||
args: ["--kinds=50", "--identifiers=50", "--kustomize-features=50"]
|
args: ["--index=kustomize", "--kinds=50", "--identifiers=50", "--kustomize-features=50"]
|
||||||
env:
|
env:
|
||||||
|
- name: GITHUB_ACCESS_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: github-access-token
|
||||||
|
key: token
|
||||||
- name: ELASTICSEARCH_URL
|
- name: ELASTICSEARCH_URL
|
||||||
valueFrom:
|
valueFrom:
|
||||||
configMapKeyRef:
|
configMapKeyRef:
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||||
|
|
||||||
_ "github.com/gomodule/redigo/redis"
|
_ "github.com/gomodule/redigo/redis"
|
||||||
@@ -29,7 +31,7 @@ type Crawler interface {
|
|||||||
// Crawl returns when it is done processing. This method does not take
|
// Crawl returns when it is done processing. This method does not take
|
||||||
// ownership of the channel. The channel is write only, and it
|
// ownership of the channel. The channel is write only, and it
|
||||||
// designates where the crawler should forward the documents.
|
// designates where the crawler should forward the documents.
|
||||||
Crawl(ctx context.Context, output chan<- CrawledDocument, seen SeenMap) error
|
Crawl(ctx context.Context, output chan<- CrawledDocument, seen utils.SeenMap) error
|
||||||
|
|
||||||
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
|
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
|
||||||
FetchDocument(context.Context, *doc.Document) error
|
FetchDocument(context.Context, *doc.Document) error
|
||||||
@@ -43,25 +45,15 @@ type CrawledDocument interface {
|
|||||||
ID() string
|
ID() string
|
||||||
GetDocument() *doc.Document
|
GetDocument() *doc.Document
|
||||||
// Get all the Documents directly referred in a Document.
|
// Get all the Documents directly referred in a Document.
|
||||||
GetResources() ([]*doc.Document, error)
|
// For a Document representing a non-kustomization file, an empty slice will be returned.
|
||||||
|
// For a Document representing a kustomization file:
|
||||||
|
// the `includeResources` parameter determines whether the documents referred in the `resources` field are returned or not;
|
||||||
|
// the `includeTransformers` parameter determines whether the documents referred in the `transformers` field are returned or not;
|
||||||
|
// the `includeGenerators` parameter determines whether the documents referred in the `generators` field are returned or not.
|
||||||
|
GetResources(includeResources, includeTransformers, includeGenerators bool) ([]*doc.Document, error)
|
||||||
WasCached() bool
|
WasCached() bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type SeenMap map[string]struct{}
|
|
||||||
|
|
||||||
func (seen SeenMap) Seen(item string) bool {
|
|
||||||
_, ok := seen[item]
|
|
||||||
return ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func (seen SeenMap) Add(item string) {
|
|
||||||
seen[item] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewSeenMap() SeenMap {
|
|
||||||
return make(map[string]struct{})
|
|
||||||
}
|
|
||||||
|
|
||||||
type CrawlSeed []*doc.Document
|
type CrawlSeed []*doc.Document
|
||||||
|
|
||||||
type IndexFunc func(CrawledDocument, index.Mode) error
|
type IndexFunc func(CrawledDocument, index.Mode) error
|
||||||
@@ -84,18 +76,18 @@ func findMatch(d *doc.Document, crawlers []Crawler) Crawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
||||||
seen SeenMap, stack *CrawlSeed) {
|
seen utils.SeenMap, stack *CrawlSeed) {
|
||||||
|
|
||||||
seen.Add(cdoc.ID())
|
seen.Add(cdoc.ID())
|
||||||
|
|
||||||
// Insert into index
|
// Insert into index
|
||||||
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
||||||
logger.Printf("Failed to insert or update %s %s: %v",
|
logger.Printf("Failed to insert or update doc(%s): %v",
|
||||||
cdoc.GetDocument().RepositoryURL, cdoc.GetDocument().FilePath, err)
|
cdoc.GetDocument().Path(), err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
deps, err := cdoc.GetResources()
|
deps, err := cdoc.GetResources(true, false, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Println(err)
|
logger.Println(err)
|
||||||
return
|
return
|
||||||
@@ -110,7 +102,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
||||||
seen SeenMap, stack *CrawlSeed) {
|
seen utils.SeenMap, stack *CrawlSeed) {
|
||||||
|
|
||||||
UpdatedDocCount := 0
|
UpdatedDocCount := 0
|
||||||
seenDocCount := 0
|
seenDocCount := 0
|
||||||
@@ -131,7 +123,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
*docsPtr = (*docsPtr)[:(len(*docsPtr) - 1)]
|
*docsPtr = (*docsPtr)[:(len(*docsPtr) - 1)]
|
||||||
|
|
||||||
crawledDocCount++
|
crawledDocCount++
|
||||||
logger.Printf("Crawling doc %d: %s %s", crawledDocCount, tail.RepositoryURL, tail.FilePath)
|
logger.Printf("Crawling doc %d: %s", crawledDocCount, tail.Path())
|
||||||
|
|
||||||
if seen.Seen(tail.ID()) {
|
if seen.Seen(tail.ID()) {
|
||||||
logger.Printf("this doc has been seen before")
|
logger.Printf("this doc has been seen before")
|
||||||
@@ -140,7 +132,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
}
|
}
|
||||||
|
|
||||||
if tail.WasCached() {
|
if tail.WasCached() {
|
||||||
logger.Printf("%s %s is cached already", tail.RepositoryURL, tail.FilePath)
|
logger.Printf("doc(%s) is cached already", tail.Path())
|
||||||
cachedDocCount++
|
cachedDocCount++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -161,10 +153,8 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// `bases` field.
|
// `bases` field.
|
||||||
seen.Add(tail.ID())
|
seen.Add(tail.ID())
|
||||||
|
|
||||||
|
|
||||||
if err := match.FetchDocument(ctx, tail); err != nil {
|
if err := match.FetchDocument(ctx, tail); err != nil {
|
||||||
logger.Printf("FetchDocument failed on %s %s: %v",
|
logger.Printf("FetchDocument failed on doc(%s): %v", tail.Path(), err)
|
||||||
tail.RepositoryURL, tail.FilePath, err)
|
|
||||||
FetchDocumentErrCount++
|
FetchDocumentErrCount++
|
||||||
// delete the document from the index
|
// delete the document from the index
|
||||||
cdoc := &doc.KustomizationDocument{
|
cdoc := &doc.KustomizationDocument{
|
||||||
@@ -172,16 +162,14 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
}
|
}
|
||||||
seen.Add(cdoc.ID())
|
seen.Add(cdoc.ID())
|
||||||
if err := indx(cdoc, index.Delete); err != nil {
|
if err := indx(cdoc, index.Delete); err != nil {
|
||||||
logger.Printf("Failed to delete %s %s: %v",
|
logger.Printf("Failed to delete doc(%s): %v", cdoc.Path(), err)
|
||||||
cdoc.RepositoryURL, cdoc.FilePath, err)
|
|
||||||
}
|
}
|
||||||
deleteDocCount++
|
deleteDocCount++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := match.SetCreated(ctx, tail); err != nil {
|
if err := match.SetCreated(ctx, tail); err != nil {
|
||||||
logger.Printf("SetCreated failed on %s %s: %v",
|
logger.Printf("SetCreated failed on doc(%s): %v", tail.Path(), err)
|
||||||
tail.RepositoryURL, tail.FilePath, err)
|
|
||||||
SetCreatedErrCount++
|
SetCreatedErrCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,8 +177,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// If conv returns an error, cdoc can still be added into the index so that
|
// If conv returns an error, cdoc can still be added into the index so that
|
||||||
// cdoc.Document can be searched.
|
// cdoc.Document can be searched.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Printf("conv failed on %s %s: %v",
|
logger.Printf("conv failed on doc(%s): %v", tail.Path(), err)
|
||||||
tail.RepositoryURL, tail.FilePath, err)
|
|
||||||
convErrCount++
|
convErrCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -211,7 +198,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// CrawlFromSeed updates all the documents in seed, and crawls all the new
|
// CrawlFromSeed updates all the documents in seed, and crawls all the new
|
||||||
// documents referred in the seed.
|
// documents referred in the seed.
|
||||||
func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
||||||
conv Converter, indx IndexFunc, seen SeenMap) {
|
conv Converter, indx IndexFunc, seen utils.SeenMap) {
|
||||||
|
|
||||||
// stack tracks the documents directly referred in other documents.
|
// stack tracks the documents directly referred in other documents.
|
||||||
stack := make(CrawlSeed, 0)
|
stack := make(CrawlSeed, 0)
|
||||||
@@ -247,7 +234,7 @@ func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
|||||||
// from the seed will be processed before any other documents from the
|
// from the seed will be processed before any other documents from the
|
||||||
// crawlers.
|
// crawlers.
|
||||||
func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
||||||
crawlers []Crawler, seen SeenMap) []error {
|
crawlers []Crawler, seen utils.SeenMap) []error {
|
||||||
|
|
||||||
errs := make([]error, len(crawlers))
|
errs := make([]error, len(crawlers))
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
@@ -291,7 +278,7 @@ func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
|||||||
|
|
||||||
// CrawlGithub crawls all the kustomization files on Github.
|
// CrawlGithub crawls all the kustomization files on Github.
|
||||||
func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
||||||
indx IndexFunc, seen SeenMap) {
|
indx IndexFunc, seen utils.SeenMap) {
|
||||||
// stack tracks the documents directly referred in other documents.
|
// stack tracks the documents directly referred in other documents.
|
||||||
stack := make(CrawlSeed, 0)
|
stack := make(CrawlSeed, 0)
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||||
@@ -76,7 +78,7 @@ func newCrawler(matchPrefix string, err error,
|
|||||||
|
|
||||||
// Crawl implements the Crawler interface for testing.
|
// Crawl implements the Crawler interface for testing.
|
||||||
func (c testCrawler) Crawl(_ context.Context,
|
func (c testCrawler) Crawl(_ context.Context,
|
||||||
output chan<- CrawledDocument, _ SeenMap) error {
|
output chan<- CrawledDocument, _ utils.SeenMap) error {
|
||||||
|
|
||||||
for i, d := range c.docs {
|
for i, d := range c.docs {
|
||||||
isResource := true
|
isResource := true
|
||||||
@@ -182,7 +184,7 @@ func TestCrawlGithubRunner(t *testing.T) {
|
|||||||
defer close(output)
|
defer close(output)
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
seen := NewSeenMap()
|
seen := utils.NewSeenMap()
|
||||||
errs := CrawlGithubRunner(context.Background(),
|
errs := CrawlGithubRunner(context.Background(),
|
||||||
output, test.tc, seen)
|
output, test.tc, seen)
|
||||||
|
|
||||||
@@ -324,7 +326,7 @@ resources:
|
|||||||
visited[d.ID()]++
|
visited[d.ID()]++
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
NewSeenMap(),
|
utils.NewSeenMap(),
|
||||||
)
|
)
|
||||||
if lv, lc := len(visited), len(tc.corpus); lv != lc {
|
if lv, lc := len(visited), len(tc.corpus); lv != lc {
|
||||||
t.Errorf("error: %d of %d documents visited.", lv, lc)
|
t.Errorf("error: %d of %d documents visited.", lv, lc)
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/httpclient"
|
"sigs.k8s.io/kustomize/api/internal/crawl/httpclient"
|
||||||
@@ -68,7 +70,7 @@ func (gc githubCrawler) DefaultBranch(repo string) string {
|
|||||||
|
|
||||||
// Implements crawler.Crawler.
|
// Implements crawler.Crawler.
|
||||||
func (gc githubCrawler) Crawl(ctx context.Context,
|
func (gc githubCrawler) Crawl(ctx context.Context,
|
||||||
output chan<- crawler.CrawledDocument, seen crawler.SeenMap) error {
|
output chan<- crawler.CrawledDocument, seen utils.SeenMap) error {
|
||||||
|
|
||||||
noETagClient := GhClient{
|
noETagClient := GhClient{
|
||||||
RequestConfig: gc.client.RequestConfig,
|
RequestConfig: gc.client.RequestConfig,
|
||||||
@@ -195,9 +197,9 @@ func (gc githubCrawler) Match(d *doc.Document) bool {
|
|||||||
|
|
||||||
type RangeQueryResult struct {
|
type RangeQueryResult struct {
|
||||||
totalDocCnt uint64
|
totalDocCnt uint64
|
||||||
seenDocCnt uint64
|
seenDocCnt uint64
|
||||||
newDocCnt uint64
|
newDocCnt uint64
|
||||||
errorCnt uint64
|
errorCnt uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RangeQueryResult) Add(other RangeQueryResult) {
|
func (r *RangeQueryResult) Add(other RangeQueryResult) {
|
||||||
@@ -209,7 +211,7 @@ func (r *RangeQueryResult) Add(other RangeQueryResult) {
|
|||||||
|
|
||||||
func (r *RangeQueryResult) String() string {
|
func (r *RangeQueryResult) String() string {
|
||||||
return fmt.Sprintf("got %d files from API. "+
|
return fmt.Sprintf("got %d files from API. "+
|
||||||
"%d have been seen before. %d are new and sent to the output channel." +
|
"%d have been seen before. %d are new and sent to the output channel."+
|
||||||
" %d have kustomizationResultAdapter errors.",
|
" %d have kustomizationResultAdapter errors.",
|
||||||
r.totalDocCnt, r.seenDocCnt, r.newDocCnt, r.errorCnt)
|
r.totalDocCnt, r.seenDocCnt, r.newDocCnt, r.errorCnt)
|
||||||
}
|
}
|
||||||
@@ -217,7 +219,7 @@ func (r *RangeQueryResult) String() string {
|
|||||||
// processQuery follows all of the pages in a query, and updates/adds the
|
// processQuery follows all of the pages in a query, and updates/adds the
|
||||||
// documents from the crawl to the datastore/index.
|
// documents from the crawl to the datastore/index.
|
||||||
func processQuery(ctx context.Context, gcl GhClient, query string,
|
func processQuery(ctx context.Context, gcl GhClient, query string,
|
||||||
output chan<- crawler.CrawledDocument, seen crawler.SeenMap,
|
output chan<- crawler.CrawledDocument, seen utils.SeenMap,
|
||||||
branchMap map[string]string) (RangeQueryResult, error) {
|
branchMap map[string]string) (RangeQueryResult, error) {
|
||||||
|
|
||||||
queryPages := make(chan GhResponseInfo)
|
queryPages := make(chan GhResponseInfo)
|
||||||
@@ -271,7 +273,7 @@ func processQuery(ctx context.Context, gcl GhClient, query string,
|
|||||||
return result, errs
|
return result, errs
|
||||||
}
|
}
|
||||||
|
|
||||||
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen crawler.SeenMap,
|
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen utils.SeenMap,
|
||||||
branchMap map[string]string) (crawler.CrawledDocument, error) {
|
branchMap map[string]string) (crawler.CrawledDocument, error) {
|
||||||
url := gcl.ReposRequest(k.Repository.FullName)
|
url := gcl.ReposRequest(k.Repository.FullName)
|
||||||
defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap)
|
defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap)
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ type RequestConfig struct {
|
|||||||
// understand why the request object is useful.
|
// understand why the request object is useful.
|
||||||
func (rc RequestConfig) CodeSearchRequestWith(query Query) request {
|
func (rc RequestConfig) CodeSearchRequestWith(query Query) request {
|
||||||
vals := url.Values{
|
vals := url.Values{
|
||||||
"sort": []string{"indexed"},
|
"sort": []string{"indexed"},
|
||||||
"order": []string{"desc"},
|
"order": []string{"desc"},
|
||||||
}
|
}
|
||||||
req := rc.makeRequest("search/code", query, vals)
|
req := rc.makeRequest("search/code", query, vals)
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package doc
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -51,15 +52,21 @@ func (doc *KustomizationDocument) String() string {
|
|||||||
doc.IsSame, doc.Kinds, len(doc.Identifiers), len(doc.Values))
|
doc.IsSame, doc.Kinds, len(doc.Identifiers), len(doc.Values))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implements the CrawlerDocument interface.
|
// IsKustomizationFile determines whether a file path is a kustomization file
|
||||||
func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
|
func IsKustomizationFile(path string) bool {
|
||||||
isResource := true
|
basename := filepath.Base(path)
|
||||||
for _, suffix := range konfig.RecognizedKustomizationFileNames() {
|
for _, name := range konfig.RecognizedKustomizationFileNames() {
|
||||||
if strings.HasSuffix(doc.FilePath, "/"+suffix) {
|
if basename == name {
|
||||||
isResource = false
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if isResource {
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implements the CrawlerDocument interface.
|
||||||
|
func (doc *KustomizationDocument) GetResources(
|
||||||
|
includeResources, includeTransformers, includeGenerators bool) ([]*Document, error) {
|
||||||
|
if !IsKustomizationFile(doc.FilePath) {
|
||||||
return []*Document{}, nil
|
return []*Document{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,20 +84,42 @@ func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
|
|||||||
}
|
}
|
||||||
k.FixKustomizationPostUnmarshalling()
|
k.FixKustomizationPostUnmarshalling()
|
||||||
|
|
||||||
res := make([]*Document, 0, len(k.Resources))
|
res := make([]*Document, 0)
|
||||||
for _, r := range k.Resources {
|
|
||||||
|
if includeResources {
|
||||||
|
resourceDocs := doc.CollectDocuments(k.Resources)
|
||||||
|
res = append(res, resourceDocs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if includeGenerators {
|
||||||
|
generatorDocs := doc.CollectDocuments(k.Generators)
|
||||||
|
res = append(res, generatorDocs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if includeTransformers {
|
||||||
|
transformerDocs := doc.CollectDocuments(k.Transformers)
|
||||||
|
res = append(res, transformerDocs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CollectDocuments construct a Document for each path in paths, and return
|
||||||
|
// a slice of Document pointers.
|
||||||
|
func (doc *KustomizationDocument) CollectDocuments(paths []string) []*Document {
|
||||||
|
docs := make([]*Document, 0, len(paths))
|
||||||
|
for _, r := range paths {
|
||||||
if strings.TrimSpace(r) == "" {
|
if strings.TrimSpace(r) == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
next, err := doc.Document.FromRelativePath(r)
|
next, err := doc.Document.FromRelativePath(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("GetResources error: %v\n", err)
|
log.Printf("CollectDocuments error: %v\n", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
res = append(res, &next)
|
docs = append(docs, &next)
|
||||||
}
|
}
|
||||||
|
return docs
|
||||||
return res, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) {
|
func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) {
|
||||||
|
|||||||
@@ -189,11 +189,13 @@ metadata:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TestStructForGetResources struct {
|
||||||
|
doc KustomizationDocument
|
||||||
|
resources []*Document
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetResources(t *testing.T) {
|
func TestGetResources(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []TestStructForGetResources{
|
||||||
doc KustomizationDocument
|
|
||||||
resources []*Document
|
|
||||||
}{
|
|
||||||
{
|
{
|
||||||
doc: KustomizationDocument{
|
doc: KustomizationDocument{
|
||||||
Document: Document{
|
Document: Document{
|
||||||
@@ -248,9 +250,12 @@ resources:
|
|||||||
resources: []*Document{},
|
resources: []*Document{},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
runTest(t, tests, true, false, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func runTest(t *testing.T, tests []TestStructForGetResources, includeResources, includeTransformers, includeGenerators bool) {
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
res, err := test.doc.GetResources()
|
res, err := test.doc.GetResources(includeResources, includeTransformers, includeGenerators)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error: %v\n", err)
|
t.Errorf("Unexpected error: %v\n", err)
|
||||||
continue
|
continue
|
||||||
@@ -284,3 +289,73 @@ resources:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetResourcesAndGenerators(t *testing.T) {
|
||||||
|
tests := []TestStructForGetResources{
|
||||||
|
{
|
||||||
|
doc: KustomizationDocument{
|
||||||
|
Document: Document{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/kustomization.yaml",
|
||||||
|
DocumentData: `
|
||||||
|
resources:
|
||||||
|
- file.yaml
|
||||||
|
|
||||||
|
generators:
|
||||||
|
- gen.yaml
|
||||||
|
|
||||||
|
transformers:
|
||||||
|
- tr.yaml
|
||||||
|
`},
|
||||||
|
},
|
||||||
|
resources: []*Document{
|
||||||
|
{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/gen.yaml",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/file.yaml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTest(t, tests, true, false, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetResourcesAndGeneratorsAndTransformers(t *testing.T) {
|
||||||
|
tests := []TestStructForGetResources{
|
||||||
|
{
|
||||||
|
doc: KustomizationDocument{
|
||||||
|
Document: Document{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/kustomization.yaml",
|
||||||
|
DocumentData: `
|
||||||
|
resources:
|
||||||
|
- file.yaml
|
||||||
|
|
||||||
|
generators:
|
||||||
|
- gen.yaml
|
||||||
|
|
||||||
|
transformers:
|
||||||
|
- tr.yaml
|
||||||
|
`},
|
||||||
|
},
|
||||||
|
resources: []*Document{
|
||||||
|
{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/tr.yaml",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/gen.yaml",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||||
|
FilePath: "some/path/to/kdir/file.yaml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTest(t, tests, true, true, true)
|
||||||
|
}
|
||||||
|
|||||||
@@ -35,6 +35,11 @@ func (doc *Document) Copy() *Document {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (doc *Document) Path() string {
|
||||||
|
return fmt.Sprintf("repoURL: %s filePath: %s branch: %s",
|
||||||
|
doc.RepositoryURL, doc.FilePath, doc.DefaultBranch)
|
||||||
|
}
|
||||||
|
|
||||||
// Implements the CrawlerDocument interface.
|
// Implements the CrawlerDocument interface.
|
||||||
func (doc *Document) WasCached() bool {
|
func (doc *Document) WasCached() bool {
|
||||||
return doc.IsSame
|
return doc.IsSame
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ func TestFromRelativePath(t *testing.T) {
|
|||||||
|
|
||||||
func TestDocument_RepositoryFullName(t *testing.T) {
|
func TestDocument_RepositoryFullName(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
doc Document
|
doc Document
|
||||||
expectedRepositoryFullName string
|
expectedRepositoryFullName string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
@@ -108,4 +108,4 @@ func TestDocument_RepositoryFullName(t *testing.T) {
|
|||||||
returnedRepositoryFullName)
|
returnedRepositoryFullName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
36
api/internal/crawl/doc/unique_doc.go
Normal file
36
api/internal/crawl/doc/unique_doc.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
package doc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
// UniqueDocuments make sure a Document with a given ID appears only once
|
||||||
|
type UniqueDocuments struct {
|
||||||
|
docs []*Document
|
||||||
|
docIDs utils.SeenMap
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewUniqueDocuments() UniqueDocuments {
|
||||||
|
return UniqueDocuments{
|
||||||
|
docs: []*Document{},
|
||||||
|
docIDs: utils.NewSeenMap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (uds *UniqueDocuments) Add(d *Document) {
|
||||||
|
if uds.docIDs.Seen(d.ID()) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
uds.docs = append(uds.docs, d)
|
||||||
|
uds.docIDs.Add(d.ID())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (uds *UniqueDocuments) AddDocuments(docs []*Document) {
|
||||||
|
for _, d := range docs {
|
||||||
|
uds.Add(d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (uds *UniqueDocuments) Documents() []*Document {
|
||||||
|
return uds.docs
|
||||||
|
}
|
||||||
@@ -18,6 +18,7 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Mode int
|
type Mode int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
InsertOrUpdate = iota
|
InsertOrUpdate = iota
|
||||||
Delete
|
Delete
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
Find out the largest value of the `creationTime` field:
|
Find out the largest value of the `creationTime` field:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"max_creationTime" : { "max" : { "field" : "creationTime" } }
|
"max_creationTime" : { "max" : { "field" : "creationTime" } }
|
||||||
@@ -11,7 +11,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
|
|
||||||
Find out the smallest value of the `creationTime` field:
|
Find out the smallest value of the `creationTime` field:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"min_creationTime" : { "min" : { "field" : "creationTime" } }
|
"min_creationTime" : { "min" : { "field" : "creationTime" } }
|
||||||
@@ -22,7 +22,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
|
|
||||||
Find out the smallest value of the `creationTime` field of all the kustomization files:
|
Find out the smallest value of the `creationTime` field of all the kustomization files:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -40,7 +40,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
|
|
||||||
Find out the smallest value of the `creationTime` field of all kustomize resource files:
|
Find out the smallest value of the `creationTime` field of all kustomize resource files:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -58,7 +58,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
|
|
||||||
Query all the documents whose `creationTime` <= `2016-07-29T17:38:26.000Z`:
|
Query all the documents whose `creationTime` <= `2016-07-29T17:38:26.000Z`:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"range": {
|
"range": {
|
||||||
@@ -73,7 +73,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Query all the documents whose `creationTime` falls within the specific range:
|
Query all the documents whose `creationTime` falls within the specific range:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"range": {
|
"range": {
|
||||||
@@ -89,7 +89,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Aggregate how many new kustomization files were added into Github each month:
|
Aggregate how many new kustomization files were added into Github each month:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -112,7 +112,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
|
|||||||
|
|
||||||
Aggregate how many new kustomize resource files were added into Github each month:
|
Aggregate how many new kustomize resource files were added into Github each month:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -135,7 +135,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
|
|||||||
|
|
||||||
Aggregate how many new kustomization files were added into Github each year:
|
Aggregate how many new kustomization files were added into Github each year:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -158,7 +158,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
|
|||||||
|
|
||||||
Aggregate how many new kustomize resource files were added into Github each year:
|
Aggregate how many new kustomize resource files were added into Github each year:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
Count distinct values of the `defaultBranch` field:
|
Count distinct values of the `defaultBranch` field:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"defaultBranch_count" : {
|
"defaultBranch_count" : {
|
||||||
@@ -17,7 +17,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
List all the github branches where kustomization files and kustomize resource files live,
|
List all the github branches where kustomization files and kustomize resource files live,
|
||||||
and how many kustomization files and kustomize resource files live in each branch:
|
and how many kustomization files and kustomize resource files live in each branch:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"defaultBranch" : {
|
"defaultBranch" : {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
Count the documents whose `document` field is empty (The reason why the `document` field
|
Count the documents whose `document` field is empty (The reason why the `document` field
|
||||||
of a document is empty is because of empty documents):
|
of a document is empty is because of empty documents):
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"size": 10000,
|
"size": 10000,
|
||||||
"query": {
|
"query": {
|
||||||
@@ -19,7 +19,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Find all the documents having the `creationTime` field set:
|
Find all the documents having the `creationTime` field set:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"exists": {
|
"exists": {
|
||||||
@@ -32,7 +32,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Find all the documents whose `creationTime` field is not set:
|
Find all the documents whose `creationTime` field is not set:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"size": 10000,
|
"size": 10000,
|
||||||
"query": {
|
"query": {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
Count the documents in the index whose `repositoryUrl` field starts with
|
Count the documents in the index whose `repositoryUrl` field starts with
|
||||||
`https://github.com/`:
|
`https://github.com/`:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -17,7 +17,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Count the documents in the index whose `repositoryUrl` field does not start with
|
Count the documents in the index whose `repositoryUrl` field does not start with
|
||||||
`https://github.com/`:
|
`https://github.com/`:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -33,7 +33,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Search all the documents matching the given `repositoryUrl` and `filePath`, and return
|
Search all the documents matching the given `repositoryUrl` and `filePath`, and return
|
||||||
a version for each search hit:
|
a version for each search hit:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"size": 10000,
|
"size": 10000,
|
||||||
"version": true,
|
"version": true,
|
||||||
@@ -52,7 +52,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Search all the documents whose filePath ends with one of these following three filenames:
|
Search all the documents whose filePath ends with one of these following three filenames:
|
||||||
`kustomization.yaml`, `kustomization.yml`, `kustomization`:
|
`kustomization.yaml`, `kustomization.yml`, `kustomization`:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -68,7 +68,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Search all the documents whose filePath does not end with any of these following
|
Search all the documents whose filePath does not end with any of these following
|
||||||
three filenames: `kustomization.yaml`, `kustomization.yml`, `kustomization`:
|
three filenames: `kustomization.yaml`, `kustomization.yml`, `kustomization`:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
|
|||||||
@@ -10,10 +10,10 @@ curl "${ElasticSearchURL}:9200/_cat/indices?v"
|
|||||||
|
|
||||||
Get the mapping of the index:
|
Get the mapping of the index:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_mapping?pretty"
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_mapping?pretty"
|
||||||
```
|
```
|
||||||
|
|
||||||
Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**):
|
Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**):
|
||||||
```
|
```
|
||||||
curl -X DELETE "${ElasticSearchURL}:9200/kustomize?pretty"
|
curl -X DELETE "${ElasticSearchURL}:9200/${INDEXNAME}?pretty"
|
||||||
```
|
```
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
Count distinct values of the `repositoryUrl` field:
|
Count distinct values of the `repositoryUrl` field:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"repositoryUrl_count" : {
|
"repositoryUrl_count" : {
|
||||||
@@ -16,7 +16,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
|
|
||||||
Count how many Github repositories include kustomization files:
|
Count how many Github repositories include kustomization files:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -39,7 +39,7 @@ curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Cont
|
|||||||
|
|
||||||
Count how many Github repositories include kustomize resource files:
|
Count how many Github repositories include kustomize resource files:
|
||||||
```
|
```
|
||||||
curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X POST "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -64,7 +64,7 @@ List all the github repositories including kustomization files and kustomize res
|
|||||||
and how many kustomization files and kustomize resource files each github repository includes
|
and how many kustomization files and kustomize resource files each github repository includes
|
||||||
(the github repository including the most kustomization files is listed first):
|
(the github repository including the most kustomization files is listed first):
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
"repositoryUrl" : {
|
"repositoryUrl" : {
|
||||||
@@ -80,7 +80,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
|
|||||||
|
|
||||||
List the top 20 Github repositories including the most amount of kustomization files:
|
List the top 20 Github repositories including the most amount of kustomization files:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -103,7 +103,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Conte
|
|||||||
|
|
||||||
List the top 20 Github repositories including the most amount of kustomize resource files:
|
List the top 20 Github repositories including the most amount of kustomize resource files:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?size=0&pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
Search for all the kustomize resource files including a Deployment object:
|
Search for all the kustomize resource files including a Deployment object:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"match" : {
|
"match" : {
|
||||||
@@ -16,7 +16,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Search for all the kustomize resource files including a Deployment object, but only
|
Search for all the kustomize resource files including a Deployment object, but only
|
||||||
including the `kinds` field in the result:
|
including the `kinds` field in the result:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"_source": {
|
"_source": {
|
||||||
"includes": ["kinds"]
|
"includes": ["kinds"]
|
||||||
@@ -35,7 +35,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Search for all the kustomize resource files including both a Deployment object and
|
Search for all the kustomize resource files including both a Deployment object and
|
||||||
a Service object:
|
a Service object:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"match" : {
|
"match" : {
|
||||||
@@ -52,7 +52,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
Count the number of documents including Deployment and the number of documents
|
Count the number of documents including Deployment and the number of documents
|
||||||
including Service:
|
including Service:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"size": 0,
|
"size": 0,
|
||||||
"aggs" : {
|
"aggs" : {
|
||||||
@@ -71,7 +71,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Search for all the kustomization files involving CRDs:
|
Search for all the kustomization files involving CRDs:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"size": 10000,
|
"size": 10000,
|
||||||
"query": {
|
"query": {
|
||||||
@@ -87,7 +87,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Search for all the kustomization files defining configMapGenerator:
|
Search for all the kustomization files defining configMapGenerator:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"size": 10000,
|
"size": 10000,
|
||||||
"query": {
|
"query": {
|
||||||
@@ -103,7 +103,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Search for all the documents having a `kind` field:
|
Search for all the documents having a `kind` field:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -118,7 +118,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Search for all the kuostmization files having a `kind` field:
|
Search for all the kuostmization files having a `kind` field:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@@ -134,7 +134,7 @@ curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type
|
|||||||
|
|
||||||
Search for all the kustomization files defining the `generatorOptions:disableNameSuffixHash` feature:
|
Search for all the kustomization files defining the `generatorOptions:disableNameSuffixHash` feature:
|
||||||
```
|
```
|
||||||
curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d'
|
curl -X GET "${ElasticSearchURL}:9200/${INDEXNAME}/_search?pretty" -H 'Content-Type: application/json' -d'
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"match" : {
|
"match" : {
|
||||||
|
|||||||
16
api/internal/crawl/utils/utils.go
Normal file
16
api/internal/crawl/utils/utils.go
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
type SeenMap map[string]struct{}
|
||||||
|
|
||||||
|
func (seen SeenMap) Seen(item string) bool {
|
||||||
|
_, ok := seen[item]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func (seen SeenMap) Add(item string) {
|
||||||
|
seen[item] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewSeenMap() SeenMap {
|
||||||
|
return make(map[string]struct{})
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user