mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-12 01:14:22 +00:00
Support diffrent modes of running the crawler
This commit is contained in:
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
@@ -23,11 +24,31 @@ const (
|
|||||||
retryCount = 3
|
retryCount = 3
|
||||||
githubUserEnv = "GITHUB_USER"
|
githubUserEnv = "GITHUB_USER"
|
||||||
githubRepoEnv = "GITHUB_REPO"
|
githubRepoEnv = "GITHUB_REPO"
|
||||||
|
crawlIndexOnlyEnv = "CRAWL_INDEX_ONLY"
|
||||||
|
crawlGithubOnlyEnv = "CRAWL_GITHUB_ONLY"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// countEnvs count the environment variables whose values are not empty.
|
||||||
|
func countEnvs(envs ...string) int {
|
||||||
|
count := 0
|
||||||
|
for _, env := range envs {
|
||||||
|
if env != "" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
githubUser := os.Getenv(githubUserEnv)
|
githubUser := os.Getenv(githubUserEnv)
|
||||||
githubRepo := os.Getenv(githubRepoEnv)
|
githubRepo := os.Getenv(githubRepoEnv)
|
||||||
|
crawlIndexOnly := os.Getenv(crawlIndexOnlyEnv)
|
||||||
|
crawlGithubOnly := os.Getenv(crawlGithubOnlyEnv)
|
||||||
|
|
||||||
|
if countEnvs(githubUser, githubRepo, crawlIndexOnly, crawlGithubOnly) > 1 {
|
||||||
|
log.Fatalf("only one of [%s, %s, %s, %s] should be set",
|
||||||
|
githubUserEnv, githubRepoEnv, crawlIndexOnlyEnv, crawlGithubOnlyEnv)
|
||||||
|
}
|
||||||
|
|
||||||
githubToken := os.Getenv(githubAccessTokenVar)
|
githubToken := os.Getenv(githubAccessTokenVar)
|
||||||
if githubToken == "" {
|
if githubToken == "" {
|
||||||
@@ -122,6 +143,13 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
crawlers := []crawler.Crawler{ghCrawler}
|
crawlers := []crawler.Crawler{ghCrawler}
|
||||||
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
|
||||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
if crawlGithubOnly == "true" || githubRepo != "" || githubUser != "" {
|
||||||
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
|
} else if crawlIndexOnly == "true" {
|
||||||
|
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||||
|
} else {
|
||||||
|
crawler.CrawlFromSeed(ctx, seedDocs, crawlers, docConverter, indexFunc, seen)
|
||||||
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
41
api/internal/crawl/config/crawler/job/README.md
Normal file
41
api/internal/crawl/config/crawler/job/README.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
There are three ways of running the crawler job.
|
||||||
|
|
||||||
|
# Crawling all the documents in the index and crawling all the kustomization files on Github
|
||||||
|
|
||||||
|
This is the default setting of the crawler job.
|
||||||
|
|
||||||
|
# Crawling all the documents in the index
|
||||||
|
|
||||||
|
Set the environment variable `CRAWL_INDEX_ONLY` to `true` like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
- name: CRAWL_INDEX_ONLY
|
||||||
|
value: true
|
||||||
|
```
|
||||||
|
|
||||||
|
# Crawling all the kustomization files on Github
|
||||||
|
|
||||||
|
Set the environment variable `CRAWL_GITHUB_ONLY` to `true` like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
- name: CRAWL_GITHUB_ONLY
|
||||||
|
value: true
|
||||||
|
```
|
||||||
|
|
||||||
|
# Crawling all the kustomization files in a Github repo
|
||||||
|
|
||||||
|
Add the environment variable `GITHUB_REPO` into the crawler container. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
- name: GITHUB_REPO
|
||||||
|
value: kubernetes-sigs/kustomize
|
||||||
|
```
|
||||||
|
|
||||||
|
# Crawling all the kustomization files in all the repositories of a Github user
|
||||||
|
|
||||||
|
Add the environment variable `GITHUB_USER` into the crawler container. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
- name: GITHUB_USER
|
||||||
|
value: kubernetes-sigs
|
||||||
|
```
|
||||||
@@ -8,7 +8,7 @@ spec:
|
|||||||
restartPolicy: OnFailure
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler
|
- name: crawler
|
||||||
image: gcr.io/kustomize-search/crawler:latest
|
image: gcr.io/haiyanmeng-gke-dev/crawler:v1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
env:
|
env:
|
||||||
- name: GITHUB_ACCESS_TOKEN
|
- name: GITHUB_ACCESS_TOKEN
|
||||||
|
|||||||
Reference in New Issue
Block a user