Use flags to specify crawling mode and github user/repo info

This commit is contained in:
Haiyan Meng
2020-01-14 15:36:12 -08:00
parent 7ac573ae51
commit af131c7471
3 changed files with 30 additions and 37 deletions

View File

@@ -46,7 +46,7 @@ func NewCrawlMode(s string) CrawlMode {
return CrawlUser
case "github-repo":
return CrawlRepo
case "":
case "index+github":
return CrawlIndexAndGithub
case "index":
return CrawlIndex
@@ -57,21 +57,20 @@ func NewCrawlMode(s string) CrawlMode {
}
}
func Usage() {
fmt.Printf("Usage: %s [mode] [githubUser|githubRepo]\n", os.Args[0])
fmt.Printf("\tmode can be one of [github-user, github-repo, index, github]\n")
fmt.Printf("%s: crawl all the documents in the index and crawling all the kustomization files on Github\n", os.Args[0])
fmt.Printf("%s index: crawl all the documents in the index\n", os.Args[0])
fmt.Printf("%s gihub: crawl all the kustomization files on Github\n", os.Args[0])
fmt.Printf("%s github-user <github-user>: Crawl all the kustomization files in all the repositories of a Github user\n", os.Args[0])
fmt.Printf("\tFor example, %s github-user kubernetes-sigs\n", os.Args[0])
fmt.Printf("%s github-repo <github-repo>: Crawl all the kustomization files in a Github repo\n", os.Args[0])
fmt.Printf("\tFor example, %s github-repo kubernetes-sigs/kustomize\n", os.Args[0])
}
func main() {
indexNamePtr := flag.String(
"index", "kustomize", "The name of the ElasticSearch index.")
modePtr := flag.String("mode", "index+github",
`The crawling mode, which can be one of [github-user, github-repo, index, github, index+github].
* github-user: crawl all the kustomization files in all the repositories of a Github user (--github-user must be specified for this mode).
* github-repo: crawl all the kustomization files in a Github repository (--github-repo must be specified for this mode).
* index: crawl all the documents in the index.
* gihub: crawl all the kustomization files on Github.
* index+github: crawl all the documents in the index and crawling all the kustomization files on Github.`)
githubUserPtr := flag.String("github-user", "",
"A github user name (e.g., kubernetes-sigs). This flag is required for the `github-user` mode.")
githubRepoPtr := flag.String("github-repo", "",
"A github repository name (e.g., kubernetes-sigs/kustomize). This flag is required for the `github-repo` mode.")
flag.Parse()
githubToken := os.Getenv(githubAccessTokenVar)
@@ -128,12 +127,7 @@ func main() {
// This helps avoid indexing a given document multiple times.
seen := crawler.NewSeenMap()
var mode CrawlMode
if len(os.Args) == 1 {
mode = CrawlIndexAndGithub
} else {
mode = NewCrawlMode(os.Args[1])
}
mode := NewCrawlMode(*modePtr)
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
if user != "" {
@@ -192,21 +186,21 @@ func main() {
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlUser:
if len(os.Args) < 3 {
Usage()
log.Fatalf("Please specify a github user!")
if *githubUserPtr == "" {
flag.Usage()
log.Fatalf("Please specify a github user with the github-user flag!")
}
crawlers := []crawler.Crawler{ghCrawlerConstructor(os.Args[2], "")}
crawlers := []crawler.Crawler{ghCrawlerConstructor(*githubUserPtr, "")}
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlRepo:
if len(os.Args) < 3 {
Usage()
log.Fatalf("Please specify a github repo!")
if *githubRepoPtr == "" {
flag.Usage()
log.Fatalf("Please specify a github repository with the github-repo flag!")
}
crawlers := []crawler.Crawler{ghCrawlerConstructor("", os.Args[2])}
crawlers := []crawler.Crawler{ghCrawlerConstructor("", *githubRepoPtr)}
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlUnknown:
Usage()
log.Fatalf("The crawler mode must be one of [github-user, github-repo, index, github]")
flag.Usage()
log.Fatalf("The --mode flag must be one of [github-user, github-repo, index, github, index+github].")
}
}

View File

@@ -1,4 +1,4 @@
There are three ways of running the crawler job.
The crawler job can run in one of the following mode:
# Crawling all the documents in the index and crawling all the kustomization files on Github
@@ -7,14 +7,13 @@ of the container should be:
```
command: ["/crawler"]
args: []
```
Or
```
command: ["/crawler"]
args: [""]
args: ["--mode=index+github"]
```
# Crawling all the documents in the index
@@ -23,7 +22,7 @@ The `command` and `args` field of the container should be:
```
command: ["/crawler"]
args: ["index"]
args: ["--mode=index"]
```
# Crawling all the kustomization files on Github
@@ -32,7 +31,7 @@ The `command` and `args` field of the container should be:
```
command: ["/crawler"]
args: ["github"]
args: ["--mode=github"]
```
# Crawling all the kustomization files in a Github repo
@@ -41,7 +40,7 @@ The `command` and `args` field of the container should be like:
```
command: ["/crawler"]
args: ["github-repo", "kubernetes-sigs/kustomize"]
args: ["--mode=github-repo", "--github-repo=kubernetes-sigs/kustomize"]
```
# Crawling all the kustomization files in all the repositories of a Github user
@@ -50,5 +49,5 @@ The `command` and `args` field of the container should be like:
```
command: ["/crawler"]
args: ["github-user", "kubernetes-sigs"]
args: ["--github-user", "--github-user=kubernetes-sigs"]
```

View File

@@ -11,7 +11,7 @@ spec:
image: gcr.io/haiyanmeng-gke-dev/crawler:v1
imagePullPolicy: Always
command: ["/crawler"]
args: ["github-repo", "kubernetes-sigs/kustomize"]
args: ["--mode=github-repo", "--github-repo=kubernetes-sigs/kustomize", "--index=kustomize"]
env:
- name: GITHUB_ACCESS_TOKEN
valueFrom: