mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-12 01:14:22 +00:00
Support diffrent modes of running the crawler
This commit is contained in:
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
@@ -23,11 +24,31 @@ const (
|
||||
retryCount = 3
|
||||
githubUserEnv = "GITHUB_USER"
|
||||
githubRepoEnv = "GITHUB_REPO"
|
||||
crawlIndexOnlyEnv = "CRAWL_INDEX_ONLY"
|
||||
crawlGithubOnlyEnv = "CRAWL_GITHUB_ONLY"
|
||||
)
|
||||
|
||||
// countEnvs count the environment variables whose values are not empty.
|
||||
func countEnvs(envs ...string) int {
|
||||
count := 0
|
||||
for _, env := range envs {
|
||||
if env != "" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func main() {
|
||||
githubUser := os.Getenv(githubUserEnv)
|
||||
githubRepo := os.Getenv(githubRepoEnv)
|
||||
crawlIndexOnly := os.Getenv(crawlIndexOnlyEnv)
|
||||
crawlGithubOnly := os.Getenv(crawlGithubOnlyEnv)
|
||||
|
||||
if countEnvs(githubUser, githubRepo, crawlIndexOnly, crawlGithubOnly) > 1 {
|
||||
log.Fatalf("only one of [%s, %s, %s, %s] should be set",
|
||||
githubUserEnv, githubRepoEnv, crawlIndexOnlyEnv, crawlGithubOnlyEnv)
|
||||
}
|
||||
|
||||
githubToken := os.Getenv(githubAccessTokenVar)
|
||||
if githubToken == "" {
|
||||
@@ -122,6 +143,13 @@ func main() {
|
||||
}
|
||||
|
||||
crawlers := []crawler.Crawler{ghCrawler}
|
||||
|
||||
if crawlGithubOnly == "true" || githubRepo != "" || githubUser != "" {
|
||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||
} else if crawlIndexOnly == "true" {
|
||||
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||
} else {
|
||||
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||
}
|
||||
}
|
||||
|
||||
41
api/internal/crawl/config/crawler/job/README.md
Normal file
41
api/internal/crawl/config/crawler/job/README.md
Normal file
@@ -0,0 +1,41 @@
|
||||
There are three ways of running the crawler job.
|
||||
|
||||
# Crawling all the documents in the index and crawling all the kustomization files on Github
|
||||
|
||||
This is the default setting of the crawler job.
|
||||
|
||||
# Crawling all the documents in the index
|
||||
|
||||
Set the environment variable `CRAWL_INDEX_ONLY` to `true` like this:
|
||||
|
||||
```
|
||||
- name: CRAWL_INDEX_ONLY
|
||||
value: true
|
||||
```
|
||||
|
||||
# Crawling all the kustomization files on Github
|
||||
|
||||
Set the environment variable `CRAWL_GITHUB_ONLY` to `true` like this:
|
||||
|
||||
```
|
||||
- name: CRAWL_GITHUB_ONLY
|
||||
value: true
|
||||
```
|
||||
|
||||
# Crawling all the kustomization files in a Github repo
|
||||
|
||||
Add the environment variable `GITHUB_REPO` into the crawler container. For example:
|
||||
|
||||
```
|
||||
- name: GITHUB_REPO
|
||||
value: kubernetes-sigs/kustomize
|
||||
```
|
||||
|
||||
# Crawling all the kustomization files in all the repositories of a Github user
|
||||
|
||||
Add the environment variable `GITHUB_USER` into the crawler container. For example:
|
||||
|
||||
```
|
||||
- name: GITHUB_USER
|
||||
value: kubernetes-sigs
|
||||
```
|
||||
@@ -8,7 +8,7 @@ spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: crawler
|
||||
image: gcr.io/kustomize-search/crawler:latest
|
||||
image: gcr.io/haiyanmeng-gke-dev/crawler:v1
|
||||
imagePullPolicy: Always
|
||||
env:
|
||||
- name: GITHUB_ACCESS_TOKEN
|
||||
|
||||
Reference in New Issue
Block a user