mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Improve the efficiency of crawling github by skipping the documents
already in the index
This commit is contained in:
@@ -187,6 +187,12 @@ func main() {
|
|||||||
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlGithub:
|
case CrawlGithub:
|
||||||
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
||||||
|
// add all the documents in the index into seen.
|
||||||
|
// this greatly reduces the time overhead of CrawlGithub.
|
||||||
|
getSeedDocsFunc()
|
||||||
|
for _, d := range seedDocs {
|
||||||
|
seen[d.ID()] = d.FileType
|
||||||
|
}
|
||||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlUser:
|
case CrawlUser:
|
||||||
if *githubUserPtr == "" {
|
if *githubUserPtr == "" {
|
||||||
|
|||||||
@@ -82,6 +82,8 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
|||||||
|
|
||||||
seen.Set(cdoc.ID(), cdoc.GetDocument().FileType)
|
seen.Set(cdoc.ID(), cdoc.GetDocument().FileType)
|
||||||
|
|
||||||
|
match.SetDefaultBranch(cdoc.GetDocument())
|
||||||
|
|
||||||
// Insert into index
|
// Insert into index
|
||||||
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
if err := indx(cdoc, index.InsertOrUpdate); err != nil {
|
||||||
logger.Printf("Failed to insert or update doc(%s): %v",
|
logger.Printf("Failed to insert or update doc(%s): %v",
|
||||||
|
|||||||
Reference in New Issue
Block a user