Improve the efficency of crawling github

Make sure a github file is crawled once
This commit is contained in:
Haiyan Meng
2020-01-13 14:56:47 -08:00
parent d71d2df364
commit 81d62f90bf
3 changed files with 96 additions and 32 deletions

View File

@@ -75,7 +75,7 @@ func newCrawler(matchPrefix string, err error,
// Crawl implements the Crawler interface for testing.
func (c testCrawler) Crawl(_ context.Context,
output chan<- CrawledDocument) error {
output chan<- CrawledDocument, _ map[string]struct{}) error {
for i, d := range c.docs {
isResource := true
@@ -181,8 +181,9 @@ func TestCrawlGithubRunner(t *testing.T) {
defer close(output)
defer wg.Done()
seen := map[string]struct{}{}
errs := CrawlGithubRunner(context.Background(),
output, test.tc)
output, test.tc, seen)
// Check that errors are returned as they should be.
if !reflect.DeepEqual(errs, test.errs) {