mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Make the crawler work
1) add the crawler binary and fix the crawler library 2) remove the readiness probe in the search backend 3) add config for redis keystore 4) add github_api_secret.txt file with instructions
This commit is contained in:
15
api/internal/crawl/cmd/crawler/Dockerfile
Normal file
15
api/internal/crawl/cmd/crawler/Dockerfile
Normal file
@@ -0,0 +1,15 @@
|
||||
FROM golang:1.11 AS build
|
||||
|
||||
ARG GO111MODULE=on
|
||||
|
||||
WORKDIR /go/src/sigs.k8s.io/kustomize/api/internal/crawl
|
||||
COPY . /go/src/sigs.k8s.io/kustomize//api/internal/crawl
|
||||
|
||||
RUN go mod download
|
||||
RUN CGO_ENABLED=0 go install -v ./cmd/crawler/crawler.go
|
||||
|
||||
FROM scratch
|
||||
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
|
||||
COPY --from=build /go/bin/crawler /
|
||||
ENTRYPOINT ["/crawler"]
|
||||
CMD []
|
||||
99
api/internal/crawl/cmd/crawler/crawler.go
Normal file
99
api/internal/crawl/cmd/crawler/crawler.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/httpclient"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||
|
||||
"github.com/gomodule/redigo/redis"
|
||||
)
|
||||
|
||||
const (
|
||||
githubAccessTokenVar = "GITHUB_ACCESS_TOKEN"
|
||||
redisCacheURL = "REDIS_CACHE_URL"
|
||||
redisKeyURL = "REDIS_KEY_URL"
|
||||
retryCount = 3
|
||||
)
|
||||
|
||||
func main() {
|
||||
githubToken := os.Getenv(githubAccessTokenVar)
|
||||
if githubToken == "" {
|
||||
fmt.Printf("Must set the variable '%s' to make github requests.\n",
|
||||
githubAccessTokenVar)
|
||||
return
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
idx, err := index.NewKustomizeIndex(ctx)
|
||||
if err != nil {
|
||||
fmt.Printf("Could not create an index: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
cacheURL := os.Getenv(redisCacheURL)
|
||||
keystoreURL := os.Getenv(redisKeyURL)
|
||||
|
||||
query := []byte(`{ "query":{ "match_all":{} } }`)
|
||||
it := idx.IterateQuery(query, 10000, 60*time.Second)
|
||||
docs := make(crawler.CrawlSeed, 0)
|
||||
for it.Next() {
|
||||
for _, hit := range it.Value().Hits.Hits {
|
||||
docs = append(docs, hit.Document.GetDocument())
|
||||
}
|
||||
}
|
||||
if err := it.Err(); err != nil {
|
||||
fmt.Printf("Error iterating: %v\n", err)
|
||||
}
|
||||
|
||||
cache, err := redis.DialURL(cacheURL)
|
||||
clientCache := &http.Client{}
|
||||
if err != nil {
|
||||
fmt.Printf("Error: redis could not make a connection: %v\n", err)
|
||||
} else {
|
||||
clientCache = httpclient.NewClient(cache)
|
||||
}
|
||||
|
||||
_, err = redis.DialURL(keystoreURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error: redis could not make a connection: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ghCrawler := github.NewCrawler(githubToken, retryCount, clientCache,
|
||||
github.QueryWith(
|
||||
github.Filename("kustomization.yaml"),
|
||||
github.Filename("kustomization.yml")),
|
||||
)
|
||||
|
||||
crawler.CrawlFromSeed(ctx, docs, []crawler.Crawler{ghCrawler},
|
||||
// Converter takes in a plain document and processes it for the
|
||||
// index.
|
||||
func(d *doc.Document) (crawler.CrawledDocument, error) {
|
||||
kdoc := doc.KustomizationDocument{
|
||||
Document: *d,
|
||||
}
|
||||
|
||||
err := kdoc.ParseYAML()
|
||||
return &kdoc, err
|
||||
},
|
||||
// IndexFunc updates the value in the index.
|
||||
func(cdoc crawler.CrawledDocument, crwlr crawler.Crawler) error {
|
||||
switch d := cdoc.(type) {
|
||||
case *doc.KustomizationDocument:
|
||||
fmt.Println("Inserting: ", d)
|
||||
_, err := idx.Put("", d)
|
||||
return err
|
||||
default:
|
||||
return fmt.Errorf("Type %T not supported", d)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
<ADD YOUR GITHUB PERSONAL ACCESS TOKEN HERE WITHOUT A TRAILING NEWLINE>
|
||||
Run: printf "<your-token>" > github_api_secret.txt
|
||||
@@ -5,7 +5,9 @@ configmapGenerator:
|
||||
- name: crawler-http-cache
|
||||
literals:
|
||||
- redis-cache-url="redis://redis-http-cache:6379"
|
||||
|
||||
- name: redis-keystore
|
||||
literals:
|
||||
- keystore-url="redis://redis-docs-keystore:6379"
|
||||
|
||||
secretGenerator:
|
||||
- name: github-access-token
|
||||
|
||||
@@ -21,10 +21,6 @@ spec:
|
||||
httpGet:
|
||||
path: /liveness
|
||||
port: backend-port
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readiness
|
||||
port: backend-port
|
||||
ports:
|
||||
- name: backend-port
|
||||
containerPort: 8080
|
||||
|
||||
@@ -16,11 +16,11 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/git"
|
||||
"sigs.k8s.io/kustomize/api/konfig"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/httpclient"
|
||||
"sigs.k8s.io/kustomize/api/internal/git"
|
||||
"sigs.k8s.io/kustomize/api/konfig"
|
||||
)
|
||||
|
||||
var logger = log.New(os.Stdout, "Github Crawler: ",
|
||||
@@ -34,11 +34,11 @@ type githubCrawler struct {
|
||||
|
||||
type GhClient struct {
|
||||
RequestConfig
|
||||
retryCount uint64
|
||||
client *http.Client
|
||||
retryCount uint64
|
||||
client *http.Client
|
||||
accessToken string
|
||||
}
|
||||
|
||||
/*
|
||||
func NewCrawler(accessToken string, retryCount uint64, client *http.Client,
|
||||
query Query) githubCrawler {
|
||||
|
||||
@@ -47,14 +47,13 @@ func NewCrawler(accessToken string, retryCount uint64, client *http.Client,
|
||||
retryCount: retryCount,
|
||||
client: client,
|
||||
RequestConfig: RequestConfig{
|
||||
perPage: githubMaxPageSize,
|
||||
accessToken: accessToken,
|
||||
perPage: githubMaxPageSize,
|
||||
},
|
||||
accessToken: accessToken,
|
||||
},
|
||||
query: query,
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Implements crawler.Crawler.
|
||||
func (gc githubCrawler) Crawl(
|
||||
@@ -64,6 +63,7 @@ func (gc githubCrawler) Crawl(
|
||||
RequestConfig: gc.client.RequestConfig,
|
||||
client: &http.Client{Timeout: gc.client.client.Timeout},
|
||||
retryCount: gc.client.retryCount,
|
||||
accessToken: gc.client.accessToken,
|
||||
}
|
||||
|
||||
// Since Github returns a max of 1000 results per query, we can use
|
||||
@@ -129,7 +129,7 @@ func (gc githubCrawler) FetchDocument(ctx context.Context, d *doc.Document) erro
|
||||
continue
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("file not found: %s", url)
|
||||
return fmt.Errorf("file not found: %s, error: %v", url, err)
|
||||
}
|
||||
|
||||
func (gc githubCrawler) SetCreated(ctx context.Context, d *doc.Document) error {
|
||||
@@ -534,10 +534,20 @@ func (gcl GhClient) GetRawUserContent(query string) (*http.Response, error) {
|
||||
return gcl.getWithRetry(query)
|
||||
}
|
||||
|
||||
func (gcl GhClient) Do(query string) (*http.Response, error) {
|
||||
req, err := http.NewRequest("GET", query, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Add("Authorization", fmt.Sprintf("token %s", gcl.accessToken))
|
||||
return gcl.client.Do(req)
|
||||
}
|
||||
|
||||
func (gcl GhClient) getWithRetry(
|
||||
query string) (resp *http.Response, err error) {
|
||||
|
||||
resp, err = gcl.client.Get(query)
|
||||
resp, err = gcl.Do(query)
|
||||
|
||||
retryCount := gcl.retryCount
|
||||
|
||||
for err == nil &&
|
||||
@@ -556,7 +566,7 @@ func (gcl GhClient) getWithRetry(
|
||||
logger.Printf("waiting %d seconds before retrying\n", i)
|
||||
time.Sleep(time.Second * time.Duration(i))
|
||||
retryCount--
|
||||
resp, err = gcl.client.Get(query)
|
||||
resp, err = gcl.Do(query)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -11,6 +11,8 @@ const (
|
||||
accessTokenArg = "access_token"
|
||||
)
|
||||
|
||||
const githubMaxPageSize = 100
|
||||
|
||||
// Implementation detail, not important to external API.
|
||||
type queryField struct {
|
||||
name string
|
||||
@@ -96,14 +98,12 @@ func Path(p string) queryField {
|
||||
// - CommitsRequests: asks Github to list commits made one a file. Useful to
|
||||
// determine the date of a file.
|
||||
type RequestConfig struct {
|
||||
perPage uint64
|
||||
accessToken string
|
||||
perPage uint64
|
||||
}
|
||||
|
||||
func NewRequestConfig(perPage uint64, accessToken string) RequestConfig {
|
||||
func NewRequestConfig(perPage uint64) RequestConfig {
|
||||
return RequestConfig{
|
||||
perPage: perPage,
|
||||
accessToken: accessToken,
|
||||
perPage: perPage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,9 +139,6 @@ func (rc RequestConfig) CommitsRequest(fullRepoName, path string) string {
|
||||
|
||||
func (rc RequestConfig) makeRequest(path string, query Query) request {
|
||||
vals := url.Values{}
|
||||
if rc.accessToken != "" {
|
||||
vals.Set(accessTokenArg, rc.accessToken)
|
||||
}
|
||||
vals.Set(perPageArg, fmt.Sprint(rc.perPage))
|
||||
|
||||
return request{
|
||||
@@ -183,7 +180,7 @@ func (r request) URL() string {
|
||||
if encoded == "" && query != "" {
|
||||
sep = "?"
|
||||
}
|
||||
r.url.RawQuery = encoded + sep + query
|
||||
r.url.RawQuery = query + sep + encoded
|
||||
return r.url.String()
|
||||
}
|
||||
|
||||
|
||||
@@ -84,7 +84,6 @@ func TestGithubSearchQuery(t *testing.T) {
|
||||
{
|
||||
rc: RequestConfig{
|
||||
perPage: perPage,
|
||||
accessToken: accessToken,
|
||||
},
|
||||
codeQuery: Query{
|
||||
Filename("kustomization.yaml"),
|
||||
@@ -94,13 +93,13 @@ func TestGithubSearchQuery(t *testing.T) {
|
||||
path: "examples/helloWorld/kustomization.yaml",
|
||||
|
||||
expectedCodeQuery: "https://api.github.com/search/code?" +
|
||||
"access_token=random_token&order=desc&per_page=100&sort=indexed&q=filename:kustomization.yaml+size:64..128",
|
||||
"q=filename:kustomization.yaml+size:64..128&order=desc&per_page=100&sort=indexed",
|
||||
|
||||
expectedContentsQuery: "https://api.github.com/repos/kubernetes-sigs/kustomize/contents/" +
|
||||
"examples/helloWorld/kustomization.yaml?access_token=random_token&per_page=100",
|
||||
"examples/helloWorld/kustomization.yaml?per_page=100",
|
||||
|
||||
expectedCommitsQuery: "https://api.github.com/repos/kubernetes-sigs/kustomize/commits?" +
|
||||
"access_token=random_token&per_page=100&q=path:examples/helloWorld/kustomization.yaml",
|
||||
"q=path:examples/helloWorld/kustomization.yaml&per_page=100",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user