Use flags to specify crawling mode and github user/repo info

This commit is contained in:
Haiyan Meng
2020-01-14 15:36:12 -08:00
parent 7ac573ae51
commit af131c7471
3 changed files with 30 additions and 37 deletions

View File

@@ -46,7 +46,7 @@ func NewCrawlMode(s string) CrawlMode {
return CrawlUser return CrawlUser
case "github-repo": case "github-repo":
return CrawlRepo return CrawlRepo
case "": case "index+github":
return CrawlIndexAndGithub return CrawlIndexAndGithub
case "index": case "index":
return CrawlIndex return CrawlIndex
@@ -57,21 +57,20 @@ func NewCrawlMode(s string) CrawlMode {
} }
} }
func Usage() {
fmt.Printf("Usage: %s [mode] [githubUser|githubRepo]\n", os.Args[0])
fmt.Printf("\tmode can be one of [github-user, github-repo, index, github]\n")
fmt.Printf("%s: crawl all the documents in the index and crawling all the kustomization files on Github\n", os.Args[0])
fmt.Printf("%s index: crawl all the documents in the index\n", os.Args[0])
fmt.Printf("%s gihub: crawl all the kustomization files on Github\n", os.Args[0])
fmt.Printf("%s github-user <github-user>: Crawl all the kustomization files in all the repositories of a Github user\n", os.Args[0])
fmt.Printf("\tFor example, %s github-user kubernetes-sigs\n", os.Args[0])
fmt.Printf("%s github-repo <github-repo>: Crawl all the kustomization files in a Github repo\n", os.Args[0])
fmt.Printf("\tFor example, %s github-repo kubernetes-sigs/kustomize\n", os.Args[0])
}
func main() { func main() {
indexNamePtr := flag.String( indexNamePtr := flag.String(
"index", "kustomize", "The name of the ElasticSearch index.") "index", "kustomize", "The name of the ElasticSearch index.")
modePtr := flag.String("mode", "index+github",
`The crawling mode, which can be one of [github-user, github-repo, index, github, index+github].
* github-user: crawl all the kustomization files in all the repositories of a Github user (--github-user must be specified for this mode).
* github-repo: crawl all the kustomization files in a Github repository (--github-repo must be specified for this mode).
* index: crawl all the documents in the index.
* gihub: crawl all the kustomization files on Github.
* index+github: crawl all the documents in the index and crawling all the kustomization files on Github.`)
githubUserPtr := flag.String("github-user", "",
"A github user name (e.g., kubernetes-sigs). This flag is required for the `github-user` mode.")
githubRepoPtr := flag.String("github-repo", "",
"A github repository name (e.g., kubernetes-sigs/kustomize). This flag is required for the `github-repo` mode.")
flag.Parse() flag.Parse()
githubToken := os.Getenv(githubAccessTokenVar) githubToken := os.Getenv(githubAccessTokenVar)
@@ -128,12 +127,7 @@ func main() {
// This helps avoid indexing a given document multiple times. // This helps avoid indexing a given document multiple times.
seen := crawler.NewSeenMap() seen := crawler.NewSeenMap()
var mode CrawlMode mode := NewCrawlMode(*modePtr)
if len(os.Args) == 1 {
mode = CrawlIndexAndGithub
} else {
mode = NewCrawlMode(os.Args[1])
}
ghCrawlerConstructor := func(user, repo string) crawler.Crawler { ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
if user != "" { if user != "" {
@@ -192,21 +186,21 @@ func main() {
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")} crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen) crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlUser: case CrawlUser:
if len(os.Args) < 3 { if *githubUserPtr == "" {
Usage() flag.Usage()
log.Fatalf("Please specify a github user!") log.Fatalf("Please specify a github user with the github-user flag!")
} }
crawlers := []crawler.Crawler{ghCrawlerConstructor(os.Args[2], "")} crawlers := []crawler.Crawler{ghCrawlerConstructor(*githubUserPtr, "")}
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen) crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlRepo: case CrawlRepo:
if len(os.Args) < 3 { if *githubRepoPtr == "" {
Usage() flag.Usage()
log.Fatalf("Please specify a github repo!") log.Fatalf("Please specify a github repository with the github-repo flag!")
} }
crawlers := []crawler.Crawler{ghCrawlerConstructor("", os.Args[2])} crawlers := []crawler.Crawler{ghCrawlerConstructor("", *githubRepoPtr)}
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen) crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlUnknown: case CrawlUnknown:
Usage() flag.Usage()
log.Fatalf("The crawler mode must be one of [github-user, github-repo, index, github]") log.Fatalf("The --mode flag must be one of [github-user, github-repo, index, github, index+github].")
} }
} }

View File

@@ -1,4 +1,4 @@
There are three ways of running the crawler job. The crawler job can run in one of the following mode:
# Crawling all the documents in the index and crawling all the kustomization files on Github # Crawling all the documents in the index and crawling all the kustomization files on Github
@@ -7,14 +7,13 @@ of the container should be:
``` ```
command: ["/crawler"] command: ["/crawler"]
args: []
``` ```
Or Or
``` ```
command: ["/crawler"] command: ["/crawler"]
args: [""] args: ["--mode=index+github"]
``` ```
# Crawling all the documents in the index # Crawling all the documents in the index
@@ -23,7 +22,7 @@ The `command` and `args` field of the container should be:
``` ```
command: ["/crawler"] command: ["/crawler"]
args: ["index"] args: ["--mode=index"]
``` ```
# Crawling all the kustomization files on Github # Crawling all the kustomization files on Github
@@ -32,7 +31,7 @@ The `command` and `args` field of the container should be:
``` ```
command: ["/crawler"] command: ["/crawler"]
args: ["github"] args: ["--mode=github"]
``` ```
# Crawling all the kustomization files in a Github repo # Crawling all the kustomization files in a Github repo
@@ -41,7 +40,7 @@ The `command` and `args` field of the container should be like:
``` ```
command: ["/crawler"] command: ["/crawler"]
args: ["github-repo", "kubernetes-sigs/kustomize"] args: ["--mode=github-repo", "--github-repo=kubernetes-sigs/kustomize"]
``` ```
# Crawling all the kustomization files in all the repositories of a Github user # Crawling all the kustomization files in all the repositories of a Github user
@@ -50,5 +49,5 @@ The `command` and `args` field of the container should be like:
``` ```
command: ["/crawler"] command: ["/crawler"]
args: ["github-user", "kubernetes-sigs"] args: ["--github-user", "--github-user=kubernetes-sigs"]
``` ```

View File

@@ -11,7 +11,7 @@ spec:
image: gcr.io/haiyanmeng-gke-dev/crawler:v1 image: gcr.io/haiyanmeng-gke-dev/crawler:v1
imagePullPolicy: Always imagePullPolicy: Always
command: ["/crawler"] command: ["/crawler"]
args: ["github-repo", "kubernetes-sigs/kustomize"] args: ["--mode=github-repo", "--github-repo=kubernetes-sigs/kustomize", "--index=kustomize"]
env: env:
- name: GITHUB_ACCESS_TOKEN - name: GITHUB_ACCESS_TOKEN
valueFrom: valueFrom: