mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-13 10:00:56 +00:00
Merge pull request #2102 from haiyanmeng/seed
Use flags for configuring the crawler job
This commit is contained in:
@@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -45,7 +46,7 @@ func NewCrawlMode(s string) CrawlMode {
|
|||||||
return CrawlUser
|
return CrawlUser
|
||||||
case "github-repo":
|
case "github-repo":
|
||||||
return CrawlRepo
|
return CrawlRepo
|
||||||
case "":
|
case "index+github":
|
||||||
return CrawlIndexAndGithub
|
return CrawlIndexAndGithub
|
||||||
case "index":
|
case "index":
|
||||||
return CrawlIndex
|
return CrawlIndex
|
||||||
@@ -56,30 +57,33 @@ func NewCrawlMode(s string) CrawlMode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Usage() {
|
|
||||||
fmt.Printf("Usage: %s [mode] [githubUser|githubRepo]\n", os.Args[0])
|
|
||||||
fmt.Printf("\tmode can be one of [github-user, github-repo, index, github]\n")
|
|
||||||
fmt.Printf("%s: crawl all the documents in the index and crawling all the kustomization files on Github\n", os.Args[0])
|
|
||||||
fmt.Printf("%s index: crawl all the documents in the index\n", os.Args[0])
|
|
||||||
fmt.Printf("%s gihub: crawl all the kustomization files on Github\n", os.Args[0])
|
|
||||||
fmt.Printf("%s github-user <github-user>: Crawl all the kustomization files in all the repositories of a Github user\n", os.Args[0])
|
|
||||||
fmt.Printf("\tFor example, %s github-user kubernetes-sigs\n", os.Args[0])
|
|
||||||
fmt.Printf("%s github-repo <github-repo>: Crawl all the kustomization files in a Github repo\n", os.Args[0])
|
|
||||||
fmt.Printf("\tFor example, %s github-repo kubernetes-sigs/kustomize\n", os.Args[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
indexNamePtr := flag.String(
|
||||||
|
"index", "kustomize", "The name of the ElasticSearch index.")
|
||||||
|
modePtr := flag.String("mode", "index+github",
|
||||||
|
`The crawling mode, which can be one of [github-user, github-repo, index, github, index+github].
|
||||||
|
* github-user: crawl all the kustomization files in all the repositories of a Github user (--github-user must be specified for this mode).
|
||||||
|
* github-repo: crawl all the kustomization files in a Github repository (--github-repo must be specified for this mode).
|
||||||
|
* index: crawl all the documents in the index.
|
||||||
|
* gihub: crawl all the kustomization files on Github.
|
||||||
|
* index+github: crawl all the documents in the index and crawling all the kustomization files on Github.`)
|
||||||
|
githubUserPtr := flag.String("github-user", "",
|
||||||
|
"A github user name (e.g., kubernetes-sigs). This flag is required for the `github-user` mode.")
|
||||||
|
githubRepoPtr := flag.String("github-repo", "",
|
||||||
|
"A github repository name (e.g., kubernetes-sigs/kustomize). This flag is required for the `github-repo` mode.")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
githubToken := os.Getenv(githubAccessTokenVar)
|
githubToken := os.Getenv(githubAccessTokenVar)
|
||||||
if githubToken == "" {
|
if githubToken == "" {
|
||||||
fmt.Printf("Must set the variable '%s' to make github requests.\n",
|
log.Printf("Must set the variable '%s' to make github requests.\n",
|
||||||
githubAccessTokenVar)
|
githubAccessTokenVar)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
idx, err := index.NewKustomizeIndex(ctx)
|
idx, err := index.NewKustomizeIndex(ctx, *indexNamePtr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Could not create an index: %v\n", err)
|
log.Printf("Could not create an index: %v\n", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -87,7 +91,7 @@ func main() {
|
|||||||
cache, err := redis.DialURL(cacheURL)
|
cache, err := redis.DialURL(cacheURL)
|
||||||
clientCache := &http.Client{}
|
clientCache := &http.Client{}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error: redis could not make a connection: %v\n", err)
|
log.Printf("Error: redis could not make a connection: %v\n", err)
|
||||||
} else {
|
} else {
|
||||||
clientCache = httpclient.NewClient(cache)
|
clientCache = httpclient.NewClient(cache)
|
||||||
}
|
}
|
||||||
@@ -108,10 +112,10 @@ func main() {
|
|||||||
case *doc.KustomizationDocument:
|
case *doc.KustomizationDocument:
|
||||||
switch mode {
|
switch mode {
|
||||||
case index.Delete:
|
case index.Delete:
|
||||||
fmt.Println("Deleting: ", d)
|
log.Printf("Deleting: %v", d)
|
||||||
return idx.Delete(d.ID())
|
return idx.Delete(d.ID())
|
||||||
default:
|
default:
|
||||||
fmt.Println("Inserting: ", d)
|
log.Printf("Inserting: %v", d)
|
||||||
return idx.Put(d.ID(), d)
|
return idx.Put(d.ID(), d)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -123,12 +127,7 @@ func main() {
|
|||||||
// This helps avoid indexing a given document multiple times.
|
// This helps avoid indexing a given document multiple times.
|
||||||
seen := crawler.NewSeenMap()
|
seen := crawler.NewSeenMap()
|
||||||
|
|
||||||
var mode CrawlMode
|
mode := NewCrawlMode(*modePtr)
|
||||||
if len(os.Args) == 1 {
|
|
||||||
mode = CrawlIndexAndGithub
|
|
||||||
} else {
|
|
||||||
mode = NewCrawlMode(os.Args[1])
|
|
||||||
}
|
|
||||||
|
|
||||||
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
|
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
|
||||||
if user != "" {
|
if user != "" {
|
||||||
@@ -169,7 +168,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := it.Err(); err != nil {
|
if err := it.Err(); err != nil {
|
||||||
fmt.Printf("Error iterating: %v\n", err)
|
log.Fatalf("getSeedDocsFunc Error iterating: %v\n", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -187,21 +186,21 @@ func main() {
|
|||||||
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
crawlers := []crawler.Crawler{ghCrawlerConstructor("", "")}
|
||||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlUser:
|
case CrawlUser:
|
||||||
if len(os.Args) < 3 {
|
if *githubUserPtr == "" {
|
||||||
Usage()
|
flag.Usage()
|
||||||
log.Fatalf("Please specify a github user!")
|
log.Fatalf("Please specify a github user with the github-user flag!")
|
||||||
}
|
}
|
||||||
crawlers := []crawler.Crawler{ghCrawlerConstructor(os.Args[2], "")}
|
crawlers := []crawler.Crawler{ghCrawlerConstructor(*githubUserPtr, "")}
|
||||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlRepo:
|
case CrawlRepo:
|
||||||
if len(os.Args) < 3 {
|
if *githubRepoPtr == "" {
|
||||||
Usage()
|
flag.Usage()
|
||||||
log.Fatalf("Please specify a github repo!")
|
log.Fatalf("Please specify a github repository with the github-repo flag!")
|
||||||
}
|
}
|
||||||
crawlers := []crawler.Crawler{ghCrawlerConstructor("", os.Args[2])}
|
crawlers := []crawler.Crawler{ghCrawlerConstructor("", *githubRepoPtr)}
|
||||||
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
|
||||||
case CrawlUnknown:
|
case CrawlUnknown:
|
||||||
Usage()
|
flag.Usage()
|
||||||
log.Fatalf("The crawler mode must be one of [github-user, github-repo, index, github]")
|
log.Fatalf("The --mode flag must be one of [github-user, github-repo, index, github, index+github].")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ func main() {
|
|||||||
|
|
||||||
m := entry.(map[string]interface{})
|
m := entry.(map[string]interface{})
|
||||||
if payload, ok := m["textPayload"]; ok {
|
if payload, ok := m["textPayload"]; ok {
|
||||||
|
// use fmt.Printf here instead of log.Printf to avoid the time and code location info the log package provides
|
||||||
fmt.Printf("%s", payload)
|
fmt.Printf("%s", payload)
|
||||||
} else {
|
} else {
|
||||||
log.Printf("the log entry does not have the `textPayload` field: %s\n", line)
|
log.Printf("the log entry does not have the `textPayload` field: %s\n", line)
|
||||||
|
|||||||
@@ -2,5 +2,4 @@ configmapGenerator:
|
|||||||
- name: elasticsearch-config
|
- name: elasticsearch-config
|
||||||
literals:
|
literals:
|
||||||
- es-url="http://esbasic-master:9200"
|
- es-url="http://esbasic-master:9200"
|
||||||
- kustomize-index-name="kustomize"
|
|
||||||
- plugin-index-name="plugin"
|
- plugin-index-name="plugin"
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
There are three ways of running the crawler job.
|
The crawler job can run in one of the following mode:
|
||||||
|
|
||||||
# Crawling all the documents in the index and crawling all the kustomization files on Github
|
# Crawling all the documents in the index and crawling all the kustomization files on Github
|
||||||
|
|
||||||
@@ -7,14 +7,13 @@ of the container should be:
|
|||||||
|
|
||||||
```
|
```
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: []
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Or
|
Or
|
||||||
|
|
||||||
```
|
```
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: [""]
|
args: ["--mode=index+github"]
|
||||||
```
|
```
|
||||||
|
|
||||||
# Crawling all the documents in the index
|
# Crawling all the documents in the index
|
||||||
@@ -23,7 +22,7 @@ The `command` and `args` field of the container should be:
|
|||||||
|
|
||||||
```
|
```
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: ["index"]
|
args: ["--mode=index"]
|
||||||
```
|
```
|
||||||
|
|
||||||
# Crawling all the kustomization files on Github
|
# Crawling all the kustomization files on Github
|
||||||
@@ -32,7 +31,7 @@ The `command` and `args` field of the container should be:
|
|||||||
|
|
||||||
```
|
```
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: ["github"]
|
args: ["--mode=github"]
|
||||||
```
|
```
|
||||||
|
|
||||||
# Crawling all the kustomization files in a Github repo
|
# Crawling all the kustomization files in a Github repo
|
||||||
@@ -41,7 +40,7 @@ The `command` and `args` field of the container should be like:
|
|||||||
|
|
||||||
```
|
```
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: ["github-repo", "kubernetes-sigs/kustomize"]
|
args: ["--mode=github-repo", "--github-repo=kubernetes-sigs/kustomize"]
|
||||||
```
|
```
|
||||||
|
|
||||||
# Crawling all the kustomization files in all the repositories of a Github user
|
# Crawling all the kustomization files in all the repositories of a Github user
|
||||||
@@ -50,5 +49,5 @@ The `command` and `args` field of the container should be like:
|
|||||||
|
|
||||||
```
|
```
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: ["github-user", "kubernetes-sigs"]
|
args: ["--github-user", "--github-user=kubernetes-sigs"]
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ spec:
|
|||||||
image: gcr.io/haiyanmeng-gke-dev/crawler:v1
|
image: gcr.io/haiyanmeng-gke-dev/crawler:v1
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command: ["/crawler"]
|
command: ["/crawler"]
|
||||||
args: ["github-repo", "kubernetes-sigs/kustomize"]
|
args: ["--mode=github-repo", "--github-repo=kubernetes-sigs/kustomize", "--index=kustomize"]
|
||||||
env:
|
env:
|
||||||
- name: GITHUB_ACCESS_TOKEN
|
- name: GITHUB_ACCESS_TOKEN
|
||||||
valueFrom:
|
valueFrom:
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -110,7 +111,7 @@ func (s sortableDocs) Len() int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCrawlGithubRunner(t *testing.T) {
|
func TestCrawlGithubRunner(t *testing.T) {
|
||||||
fmt.Println("testing CrawlGithubRunner")
|
log.Println("testing CrawlGithubRunner")
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
tc []Crawler
|
tc []Crawler
|
||||||
errs []error
|
errs []error
|
||||||
@@ -216,7 +217,7 @@ func TestCrawlGithubRunner(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCrawlFromSeed(t *testing.T) {
|
func TestCrawlFromSeed(t *testing.T) {
|
||||||
fmt.Println("testing CrawlFromSeed")
|
log.Println("testing CrawlFromSeed")
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
seed CrawlSeed
|
seed CrawlSeed
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package github
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@@ -11,7 +12,7 @@ type testCachedSearch struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c testCachedSearch) CountResults(upperBound uint64) (uint64, error) {
|
func (c testCachedSearch) CountResults(upperBound uint64) (uint64, error) {
|
||||||
fmt.Printf("CountResults(%05x)\n", upperBound)
|
log.Printf("CountResults(%05x)\n", upperBound)
|
||||||
count, ok := c.cache[upperBound]
|
count, ok := c.cache[upperBound]
|
||||||
if !ok {
|
if !ok {
|
||||||
return count, fmt.Errorf("cache not set at %x", upperBound)
|
return count, fmt.Errorf("cache not set at %x", upperBound)
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package doc
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -83,7 +84,7 @@ func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
|
|||||||
}
|
}
|
||||||
next, err := doc.Document.FromRelativePath(r)
|
next, err := doc.Document.FromRelativePath(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("GetResources error: %v\n", err)
|
log.Printf("GetResources error: %v\n", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
res = append(res, &next)
|
res = append(res, &next)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -97,14 +98,14 @@ type KustomizeIndex struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create index reference to the index containing the kustomize documents.
|
// Create index reference to the index containing the kustomize documents.
|
||||||
func NewKustomizeIndex(ctx context.Context) (*KustomizeIndex, error) {
|
func NewKustomizeIndex(ctx context.Context, indexName string) (*KustomizeIndex, error) {
|
||||||
idx, err := newIndex(ctx, "kustomize")
|
idx, err := newIndex(ctx, indexName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
indicesExistsOp := idx.client.Indices.Exists
|
indicesExistsOp := idx.client.Indices.Exists
|
||||||
resp, err := indicesExistsOp([]string{"kustomize"},
|
resp, err := indicesExistsOp([]string{indexName},
|
||||||
indicesExistsOp.WithContext(idx.ctx),
|
indicesExistsOp.WithContext(idx.ctx),
|
||||||
indicesExistsOp.WithPretty())
|
indicesExistsOp.WithPretty())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -112,9 +113,9 @@ func NewKustomizeIndex(ctx context.Context) (*KustomizeIndex, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode == 200 {
|
if resp.StatusCode == 200 {
|
||||||
fmt.Printf("The kustomize index already exists\n")
|
log.Printf("The %s index already exists", indexName)
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("Creating the kustomize index\n")
|
log.Printf("Creating the %s index\n", indexName)
|
||||||
if err := idx.CreateIndex([]byte(IndexConfig)); err != nil {
|
if err := idx.CreateIndex([]byte(IndexConfig)); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -252,7 +253,7 @@ func (it *KustomizeIterator) Next() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if it.err == nil {
|
if it.err == nil {
|
||||||
fmt.Printf("updating scroll: %s\n", *it.scrollImpl.ScrollID)
|
log.Printf("updating scroll: %s\n", *it.scrollImpl.ScrollID)
|
||||||
it.err = it.update(*it.scrollImpl.ScrollID, reader)
|
it.err = it.update(*it.scrollImpl.ScrollID, reader)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -341,7 +342,7 @@ func (ki *KustomizeIndex) Search(query string,
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to format query %s", query)
|
return nil, fmt.Errorf("failed to format query %s", query)
|
||||||
}
|
}
|
||||||
fmt.Printf("formated query: %s\n", data)
|
log.Printf("formated query: %s\n", data)
|
||||||
|
|
||||||
var kr ElasticKustomizeResult
|
var kr ElasticKustomizeResult
|
||||||
err = ki.index.Search(data, opts.SearchOptions, func(results io.Reader) error {
|
err = ki.index.Search(data, opts.SearchOptions, func(results io.Reader) error {
|
||||||
|
|||||||
Reference in New Issue
Block a user