mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-13 18:10:59 +00:00
Merge pull request #2090 from haiyanmeng/retry
Add the Document ID pointing to a kuostomization root into cache to avoid crawling it repeatedly
This commit is contained in:
@@ -105,6 +105,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
SetCreatedErrCount := 0
|
SetCreatedErrCount := 0
|
||||||
convErrCount := 0
|
convErrCount := 0
|
||||||
deleteDocCount := 0
|
deleteDocCount := 0
|
||||||
|
crawledDocCount := 0
|
||||||
|
|
||||||
// During the execution of the for loop, more Documents may be added into (*docsPtr).
|
// During the execution of the for loop, more Documents may be added into (*docsPtr).
|
||||||
for len(*docsPtr) > 0 {
|
for len(*docsPtr) > 0 {
|
||||||
@@ -114,7 +115,11 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
// remove the last Document in (*docPtr)
|
// remove the last Document in (*docPtr)
|
||||||
*docsPtr = (*docsPtr)[:(len(*docsPtr) - 1)]
|
*docsPtr = (*docsPtr)[:(len(*docsPtr) - 1)]
|
||||||
|
|
||||||
|
crawledDocCount++
|
||||||
|
logger.Printf("Crawling doc %d: %s %s", crawledDocCount, tail.RepositoryURL, tail.FilePath)
|
||||||
|
|
||||||
if _, ok := seen[tail.ID()]; ok {
|
if _, ok := seen[tail.ID()]; ok {
|
||||||
|
logger.Printf("this doc has been seen before")
|
||||||
seenDocCount++
|
seenDocCount++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -132,7 +137,15 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Println("Crawling ", tail.RepositoryURL, tail.FilePath)
|
// If the Document represents a kustomization root, FetchDcoument will change
|
||||||
|
// the `filePath` field of the Document by adding `kustomization.yaml` or
|
||||||
|
// `kustomization.yml` or `kustomization` into the the field.
|
||||||
|
// Therefore, it is necessary to add the ID of the Document into seen before
|
||||||
|
// calling FetchDocument. Otherwise, the binary may enter into an infinite loop
|
||||||
|
// if a kustomization file points to its kustmozation root in its `resources` or
|
||||||
|
// `bases` field.
|
||||||
|
seen[tail.ID()] = struct{}{}
|
||||||
|
|
||||||
if err := match.FetchDocument(ctx, tail); err != nil {
|
if err := match.FetchDocument(ctx, tail); err != nil {
|
||||||
logger.Printf("FetchDocument failed on %s %s: %v",
|
logger.Printf("FetchDocument failed on %s %s: %v",
|
||||||
tail.RepositoryURL, tail.FilePath, err)
|
tail.RepositoryURL, tail.FilePath, err)
|
||||||
@@ -274,8 +287,12 @@ func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
|||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
docCount := 0
|
||||||
for cdoc := range ch {
|
for cdoc := range ch {
|
||||||
|
docCount++
|
||||||
|
logger.Printf("Processing doc %d found on Github", docCount)
|
||||||
if _, ok := seen[cdoc.ID()]; ok {
|
if _, ok := seen[cdoc.ID()]; ok {
|
||||||
|
logger.Printf("the doc has been seen before")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
match := findMatch(cdoc.GetDocument(), crawlers)
|
match := findMatch(cdoc.GetDocument(), crawlers)
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ func (idx *index) responseErrorOrNil(info string, res *esapi.Response,
|
|||||||
|
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
if res.IsError() {
|
if res.IsError() {
|
||||||
return fmt.Errorf("%s: %s", messageStart, res.String())
|
return fmt.Errorf("%s: %s [%d]", messageStart, res.String(), res.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
if reader != nil {
|
if reader != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user