mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-12 01:14:22 +00:00
make seen a non-primitive type
This commit is contained in:
@@ -121,7 +121,7 @@ func main() {
|
|||||||
|
|
||||||
// seen tracks the IDs of all the documents in the index.
|
// seen tracks the IDs of all the documents in the index.
|
||||||
// This helps avoid indexing a given document multiple times.
|
// This helps avoid indexing a given document multiple times.
|
||||||
seen := make(map[string]struct{})
|
seen := crawler.NewSeenMap()
|
||||||
|
|
||||||
var mode CrawlMode
|
var mode CrawlMode
|
||||||
if len(os.Args) == 1 {
|
if len(os.Args) == 1 {
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ type Crawler interface {
|
|||||||
// Crawl returns when it is done processing. This method does not take
|
// Crawl returns when it is done processing. This method does not take
|
||||||
// ownership of the channel. The channel is write only, and it
|
// ownership of the channel. The channel is write only, and it
|
||||||
// designates where the crawler should forward the documents.
|
// designates where the crawler should forward the documents.
|
||||||
Crawl(ctx context.Context, output chan<- CrawledDocument, seen map[string]struct{}) error
|
Crawl(ctx context.Context, output chan<- CrawledDocument, seen SeenMap) error
|
||||||
|
|
||||||
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
|
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
|
||||||
FetchDocument(context.Context, *doc.Document) error
|
FetchDocument(context.Context, *doc.Document) error
|
||||||
@@ -47,6 +47,21 @@ type CrawledDocument interface {
|
|||||||
WasCached() bool
|
WasCached() bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SeenMap map[string]struct{}
|
||||||
|
|
||||||
|
func (seen SeenMap) Seen(item string) bool {
|
||||||
|
_, ok := seen[item]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func (seen SeenMap) Add(item string) {
|
||||||
|
seen[item] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewSeenMap() SeenMap {
|
||||||
|
return make(map[string]struct{})
|
||||||
|
}
|
||||||
|
|
||||||
type CrawlSeed []*doc.Document
|
type CrawlSeed []*doc.Document
|
||||||
|
|
||||||
type IndexFunc func(CrawledDocument, index.Mode) error
|
type IndexFunc func(CrawledDocument, index.Mode) error
|
||||||
@@ -69,9 +84,9 @@ func findMatch(d *doc.Document, crawlers []Crawler) Crawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
||||||
seen map[string]struct{}, stack *CrawlSeed) {
|
seen SeenMap, stack *CrawlSeed) {
|
||||||
|
|
||||||
seen[cdoc.ID()] = struct{}{}
|
seen.Add(cdoc.ID())
|
||||||
|
|
||||||
// Insert into index
|
// Insert into index
|
||||||
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
||||||
@@ -87,7 +102,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, dep := range deps {
|
for _, dep := range deps {
|
||||||
if _, ok := seen[dep.ID()]; ok {
|
if seen.Seen(dep.ID()) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
*stack = append(*stack, dep)
|
*stack = append(*stack, dep)
|
||||||
@@ -95,7 +110,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
||||||
seen map[string]struct{}, stack *CrawlSeed) {
|
seen SeenMap, stack *CrawlSeed) {
|
||||||
|
|
||||||
UpdatedDocCount := 0
|
UpdatedDocCount := 0
|
||||||
seenDocCount := 0
|
seenDocCount := 0
|
||||||
@@ -118,7 +133,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
crawledDocCount++
|
crawledDocCount++
|
||||||
logger.Printf("Crawling doc %d: %s %s", crawledDocCount, tail.RepositoryURL, tail.FilePath)
|
logger.Printf("Crawling doc %d: %s %s", crawledDocCount, tail.RepositoryURL, tail.FilePath)
|
||||||
|
|
||||||
if _, ok := seen[tail.ID()]; ok {
|
if seen.Seen(tail.ID()) {
|
||||||
logger.Printf("this doc has been seen before")
|
logger.Printf("this doc has been seen before")
|
||||||
seenDocCount++
|
seenDocCount++
|
||||||
continue
|
continue
|
||||||
@@ -144,7 +159,8 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// calling FetchDocument. Otherwise, the binary may enter into an infinite loop
|
// calling FetchDocument. Otherwise, the binary may enter into an infinite loop
|
||||||
// if a kustomization file points to its kustmozation root in its `resources` or
|
// if a kustomization file points to its kustmozation root in its `resources` or
|
||||||
// `bases` field.
|
// `bases` field.
|
||||||
seen[tail.ID()] = struct{}{}
|
seen.Add(tail.ID())
|
||||||
|
|
||||||
|
|
||||||
if err := match.FetchDocument(ctx, tail); err != nil {
|
if err := match.FetchDocument(ctx, tail); err != nil {
|
||||||
logger.Printf("FetchDocument failed on %s %s: %v",
|
logger.Printf("FetchDocument failed on %s %s: %v",
|
||||||
@@ -154,7 +170,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
cdoc := &doc.KustomizationDocument{
|
cdoc := &doc.KustomizationDocument{
|
||||||
Document: *tail,
|
Document: *tail,
|
||||||
}
|
}
|
||||||
seen[cdoc.ID()] = struct{}{}
|
seen.Add(cdoc.ID())
|
||||||
if err := indx(cdoc, index.Delete); err != nil {
|
if err := indx(cdoc, index.Delete); err != nil {
|
||||||
logger.Printf("Failed to delete %s %s: %v",
|
logger.Printf("Failed to delete %s %s: %v",
|
||||||
cdoc.RepositoryURL, cdoc.FilePath, err)
|
cdoc.RepositoryURL, cdoc.FilePath, err)
|
||||||
@@ -195,7 +211,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// CrawlFromSeed updates all the documents in seed, and crawls all the new
|
// CrawlFromSeed updates all the documents in seed, and crawls all the new
|
||||||
// documents referred in the seed.
|
// documents referred in the seed.
|
||||||
func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
||||||
conv Converter, indx IndexFunc, seen map[string]struct{}) {
|
conv Converter, indx IndexFunc, seen SeenMap) {
|
||||||
|
|
||||||
// stack tracks the documents directly referred in other documents.
|
// stack tracks the documents directly referred in other documents.
|
||||||
stack := make(CrawlSeed, 0)
|
stack := make(CrawlSeed, 0)
|
||||||
@@ -231,7 +247,7 @@ func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
|||||||
// from the seed will be processed before any other documents from the
|
// from the seed will be processed before any other documents from the
|
||||||
// crawlers.
|
// crawlers.
|
||||||
func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
||||||
crawlers []Crawler, seen map[string]struct{}) []error {
|
crawlers []Crawler, seen SeenMap) []error {
|
||||||
|
|
||||||
errs := make([]error, len(crawlers))
|
errs := make([]error, len(crawlers))
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
@@ -275,7 +291,7 @@ func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
|||||||
|
|
||||||
// CrawlGithub crawls all the kustomization files on Github.
|
// CrawlGithub crawls all the kustomization files on Github.
|
||||||
func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
||||||
indx IndexFunc, seen map[string]struct{}) {
|
indx IndexFunc, seen SeenMap) {
|
||||||
// stack tracks the documents directly referred in other documents.
|
// stack tracks the documents directly referred in other documents.
|
||||||
stack := make(CrawlSeed, 0)
|
stack := make(CrawlSeed, 0)
|
||||||
|
|
||||||
@@ -291,7 +307,7 @@ func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
|||||||
for cdoc := range ch {
|
for cdoc := range ch {
|
||||||
docCount++
|
docCount++
|
||||||
logger.Printf("Processing doc %d found on Github", docCount)
|
logger.Printf("Processing doc %d found on Github", docCount)
|
||||||
if _, ok := seen[cdoc.ID()]; ok {
|
if seen.Seen(cdoc.ID()) {
|
||||||
logger.Printf("the doc has been seen before")
|
logger.Printf("the doc has been seen before")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ func newCrawler(matchPrefix string, err error,
|
|||||||
|
|
||||||
// Crawl implements the Crawler interface for testing.
|
// Crawl implements the Crawler interface for testing.
|
||||||
func (c testCrawler) Crawl(_ context.Context,
|
func (c testCrawler) Crawl(_ context.Context,
|
||||||
output chan<- CrawledDocument, _ map[string]struct{}) error {
|
output chan<- CrawledDocument, _ SeenMap) error {
|
||||||
|
|
||||||
for i, d := range c.docs {
|
for i, d := range c.docs {
|
||||||
isResource := true
|
isResource := true
|
||||||
@@ -181,7 +181,7 @@ func TestCrawlGithubRunner(t *testing.T) {
|
|||||||
defer close(output)
|
defer close(output)
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
seen := map[string]struct{}{}
|
seen := NewSeenMap()
|
||||||
errs := CrawlGithubRunner(context.Background(),
|
errs := CrawlGithubRunner(context.Background(),
|
||||||
output, test.tc, seen)
|
output, test.tc, seen)
|
||||||
|
|
||||||
@@ -323,7 +323,7 @@ resources:
|
|||||||
visited[d.ID()]++
|
visited[d.ID()]++
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
make(map[string]struct{}),
|
NewSeenMap(),
|
||||||
)
|
)
|
||||||
if lv, lc := len(visited), len(tc.corpus); lv != lc {
|
if lv, lc := len(visited), len(tc.corpus); lv != lc {
|
||||||
t.Errorf("error: %d of %d documents visited.", lv, lc)
|
t.Errorf("error: %d of %d documents visited.", lv, lc)
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ func (gc githubCrawler) DefaultBranch(repo string) string {
|
|||||||
|
|
||||||
// Implements crawler.Crawler.
|
// Implements crawler.Crawler.
|
||||||
func (gc githubCrawler) Crawl(ctx context.Context,
|
func (gc githubCrawler) Crawl(ctx context.Context,
|
||||||
output chan<- crawler.CrawledDocument, seen map[string]struct{}) error {
|
output chan<- crawler.CrawledDocument, seen crawler.SeenMap) error {
|
||||||
|
|
||||||
noETagClient := GhClient{
|
noETagClient := GhClient{
|
||||||
RequestConfig: gc.client.RequestConfig,
|
RequestConfig: gc.client.RequestConfig,
|
||||||
@@ -217,7 +217,7 @@ func (r *RangeQueryResult) String() string {
|
|||||||
// processQuery follows all of the pages in a query, and updates/adds the
|
// processQuery follows all of the pages in a query, and updates/adds the
|
||||||
// documents from the crawl to the datastore/index.
|
// documents from the crawl to the datastore/index.
|
||||||
func processQuery(ctx context.Context, gcl GhClient, query string,
|
func processQuery(ctx context.Context, gcl GhClient, query string,
|
||||||
output chan<- crawler.CrawledDocument, seen map[string]struct{},
|
output chan<- crawler.CrawledDocument, seen crawler.SeenMap,
|
||||||
branchMap map[string]string) (RangeQueryResult, error) {
|
branchMap map[string]string) (RangeQueryResult, error) {
|
||||||
|
|
||||||
queryPages := make(chan GhResponseInfo)
|
queryPages := make(chan GhResponseInfo)
|
||||||
@@ -271,7 +271,7 @@ func processQuery(ctx context.Context, gcl GhClient, query string,
|
|||||||
return result, errs
|
return result, errs
|
||||||
}
|
}
|
||||||
|
|
||||||
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen map[string]struct{},
|
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen crawler.SeenMap,
|
||||||
branchMap map[string]string) (crawler.CrawledDocument, error) {
|
branchMap map[string]string) (crawler.CrawledDocument, error) {
|
||||||
url := gcl.ReposRequest(k.Repository.FullName)
|
url := gcl.ReposRequest(k.Repository.FullName)
|
||||||
defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap)
|
defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap)
|
||||||
@@ -287,7 +287,7 @@ func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen map[string]stru
|
|||||||
RepositoryURL: k.Repository.URL,
|
RepositoryURL: k.Repository.URL,
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := seen[document.ID()]; ok {
|
if seen.Seen(document.ID()) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user