mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-10 08:20:59 +00:00
Improve the efficiency of crawling github by skipping the documents
already in the index
This commit is contained in:
@@ -187,6 +187,12 @@ func main() {
|
||||
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||
case CrawlGithub:
|
||||
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
||||
// add all the documents in the index into seen.
|
||||
// this greatly reduces the time overhead of CrawlGithub.
|
||||
getSeedDocsFunc()
|
||||
for _, d := range seedDocs {
|
||||
seen[d.ID()] = d.FileType
|
||||
}
|
||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||
case CrawlUser:
|
||||
if *githubUserPtr == "" {
|
||||
|
||||
@@ -82,6 +82,8 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
||||
|
||||
seen.Set(cdoc.ID(), cdoc.GetDocument().FileType)
|
||||
|
||||
match.SetDefaultBranch(cdoc.GetDocument())
|
||||
|
||||
// Insert into index
|
||||
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
||||
logger.Printf("Failed to insert or update doc(%s): %v",
|
||||
|
||||
Reference in New Issue
Block a user