Avoid to index a document if FetchDcoument or SetCreated fails

This commit is contained in:
Haiyan Meng
2019-12-16 11:39:54 -08:00
parent 12fc8f41c7
commit 8c89f0946c
3 changed files with 25 additions and 16 deletions

View File

@@ -72,17 +72,17 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
seen[cdoc.ID()] = struct{}{}
// Insert into index
err := indx(cdoc, match)
logIfErr(err)
if err != nil {
if err := indx(cdoc, match); err != nil {
logger.Println("Failed to index: ", err)
return
}
deps, err := cdoc.GetResources()
logIfErr(err)
if err != nil {
logger.Println(err)
return
}
for _, dep := range deps {
if _, ok := seen[dep.ID()]; ok {
continue
@@ -107,29 +107,33 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
}
docCount++
if tail.WasCached() {
logger.Printf("%s %s is cached already", tail.RepositoryURL, tail.FilePath)
continue
}
match := findMatch(tail, crawlers)
if match == nil {
logIfErr(fmt.Errorf(
"%v could not match any crawler", tail))
logIfErr(fmt.Errorf("%v could not match any crawler", tail))
continue
}
logger.Println("Crawling ", tail.RepositoryURL, tail.FilePath)
err := match.FetchDocument(ctx, tail)
logIfErr(err)
// If there was no change or there is an error, we don't have
// to branch out, since the dependencies are already in the
// index, or we cannot find the document.
if err != nil || tail.WasCached() {
if tail.WasCached() {
logger.Println(tail.RepositoryURL, tail.FilePath, "is cached already")
}
if err := match.FetchDocument(ctx, tail); err != nil {
logger.Printf("FetchDocument failed on %s %s: %v",
tail.RepositoryURL, tail.FilePath, err)
continue
}
logIfErr(match.SetCreated(ctx, tail))
if err := match.SetCreated(ctx, tail); err != nil {
logger.Printf("SetCreated failed on %s %s: %v",
tail.RepositoryURL, tail.FilePath, err)
continue
}
cdoc, err := conv(tail)
// If conv returns an error, cdoc can still be added into the index so that
// cdoc.Document can be searched.
logIfErr(err)
addBranches(cdoc, match, indx, seen, stack)

View File

@@ -93,6 +93,9 @@ func (gc githubCrawler) Crawl(
return nil
}
// FetchDocument first tries to fetch the document with d.FilePath. If it fails,
// it will try to add each string in konfig.RecognizedKustomizationFileNames() to
// d.FilePath, and try to fetch the document again.
func (gc githubCrawler) FetchDocument(_ context.Context, d *doc.Document) error {
repoURL := d.RepositoryURL + "/" + d.FilePath + "?ref=" + d.DefaultBranch
repoSpec, err := git.NewRepoSpecFromUrl(repoURL)

View File

@@ -116,6 +116,8 @@ func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error)
return configs, nil
}
// ParseYAML parses doc.Document and sets the following fields of doc:
// Kinds, Values, Identifiers.
func (doc *KustomizationDocument) ParseYAML() error {
doc.Identifiers = make([]string, 0)
doc.Values = make([]string, 0)