Crawler performance improvements, better structure

Refactored the crawler implementation to make the whole thing more
testable. Added a document interface to make the crawler generic.
This will be useful for collecting plugins, and other documents.
This commit is contained in:
Damien Robichaud
2019-08-23 18:11:55 -07:00
parent a66808a10d
commit c2d6f09ef3
13 changed files with 1251 additions and 99 deletions

View File

@@ -6,11 +6,19 @@ package crawler
import (
"context"
"fmt"
"log"
"os"
"sync"
_ "github.com/gomodule/redigo/redis"
"sigs.k8s.io/kustomize/internal/tools/doc"
)
var (
logger = log.New(os.Stdout, "Crawler: ", log.LstdFlags|log.LUTC|log.Llongfile)
)
// Crawler forwards documents from source repositories to index and store them
// for searching. Each crawler is responsible for querying it's source of
// information, and forwarding files that have not been seen before or that need
@@ -19,7 +27,152 @@ type Crawler interface {
// Crawl returns when it is done processing. This method does not take
// ownership of the channel. The channel is write only, and it
// designates where the crawler should forward the documents.
Crawl(ctx context.Context, output chan<- *doc.KustomizationDocument) error
Crawl(ctx context.Context, output chan<- CrawlerDocument) error
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
FetchDocument(context.Context, *doc.Document) error
// Write to the document what the created time is.
SetCreated(context.Context, *doc.Document) error
Match(*doc.Document) bool
}
type CrawlerDocument interface {
ID() string
GetDocument() *doc.Document
GetResources() ([]*doc.Document, error)
WasCached() bool
}
type CrawlerSeed []*doc.Document
type IndexFunc func(CrawlerDocument, Crawler) error
type Converter func(*doc.Document) (CrawlerDocument, error)
// Cleaner, more efficient, and more extensible crawler implementation.
// The seed must include the ids of each document in the index.
func CrawlFromSeed(ctx context.Context, seed CrawlerSeed,
crawlers []Crawler, conv Converter, indx IndexFunc) {
seen := make(map[string]struct{})
logIfErr := func(err error) {
if err == nil {
return
}
logger.Println("error: ", err)
}
stack := make(CrawlerSeed, 0)
findMatch := func(d *doc.Document) Crawler {
for _, crawl := range crawlers {
if crawl.Match(d) {
return crawl
}
}
return nil
}
addBranches := func(cdoc CrawlerDocument, match Crawler) {
if _, ok := seen[cdoc.ID()]; ok {
return
}
seen[cdoc.ID()] = struct{}{}
// Insert into index
err := indx(cdoc, match)
logIfErr(err)
if err != nil {
return
}
deps, err := cdoc.GetResources()
logIfErr(err)
if err != nil {
return
}
for _, dep := range deps {
if _, ok := seen[dep.ID()]; ok {
continue
}
stack = append(stack, dep)
}
}
doCrawl := func(docsPtr *CrawlerSeed) {
for len(*docsPtr) > 0 {
back := len(*docsPtr) - 1
next := (*docsPtr)[back]
*docsPtr = (*docsPtr)[:back]
match := findMatch(next)
if match == nil {
logIfErr(fmt.Errorf(
"%v could not match any crawler", next))
continue
}
err := match.FetchDocument(ctx, next)
logIfErr(err)
// If there was no change or there is an error, we don't have
// to branch out, since the dependencies are already in the
// index, or we cannot find the document.
if err != nil || next.WasCached() {
continue
}
cdoc, err := conv(next)
logIfErr(err)
if err != nil {
continue
}
addBranches(cdoc, match)
}
}
// Exploit seed to update bulk of corpus.
logger.Printf("updating %d documents from seed\n", len(seed))
doCrawl(&seed)
// Traverse any new links added while updating corpus.
logger.Printf("crawling %d new documents found in the seed\n", len(stack))
doCrawl(&stack)
ch := make(chan CrawlerDocument, 1<<10)
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
for cdoc := range ch {
if _, ok := seen[cdoc.ID()]; ok {
continue
}
match := findMatch(cdoc.GetDocument())
if match == nil {
logIfErr(fmt.Errorf(
"%v could not match any crawler", cdoc))
continue
}
addBranches(cdoc, match)
}
}()
// Exploration through APIs.
errs := CrawlerRunner(ctx, ch, crawlers)
if errs != nil {
for _, err := range errs {
logIfErr(err)
}
}
close(ch)
logger.Println("Processing the new documents from the crawlers' exploration.")
wg.Wait()
// Handle deps of newly discovered documents.
logger.Printf("crawling the %d new documents from the crawlers' exploration.",
len(stack))
doCrawl(&stack)
}
// CrawlerRunner is a blocking function and only returns once all of the
@@ -32,8 +185,15 @@ type Crawler interface {
// The return value is an array of errors in which each index represents the
// index of the crawler that emitted the error. Although the errors themselves
// can be nil, the array will always be exactly the size of the crawlers array.
//
// Crawler Runner takes in a seed, which represents the documents stored in an
// index somewhere. The document data is not required to be populated. If there
// are many documents, this is preferable. The order of iteration over the seed
// is not garanteed, but the CrawlerRunner does guarantee that every element
// from the seed will be processed before any other documents from the
// crawlers.
func CrawlerRunner(ctx context.Context,
output chan<- *doc.KustomizationDocument, crawlers []Crawler) []error {
output chan<- CrawlerDocument, crawlers []Crawler) []error {
errs := make([]error, len(crawlers))
wg := sync.WaitGroup{}
@@ -41,12 +201,12 @@ func CrawlerRunner(ctx context.Context,
for i, crawler := range crawlers {
// Crawler implementations get their own channels to prevent a
// crawler from closing the main output channel.
docs := make(chan *doc.KustomizationDocument)
docs := make(chan CrawlerDocument)
wg.Add(2)
// Forward all of the documents from this crawler's channel to
// the main output channel.
go func(docs <-chan *doc.KustomizationDocument) {
go func(docs <-chan CrawlerDocument) {
defer wg.Done()
for doc := range docs {
output <- doc
@@ -55,7 +215,7 @@ func CrawlerRunner(ctx context.Context,
// Run this crawler and capture its returned error.
go func(idx int, crawler Crawler,
docs chan<- *doc.KustomizationDocument) {
docs chan<- CrawlerDocument) {
defer func() {
wg.Done()

View File

@@ -3,26 +3,88 @@ package crawler
import (
"context"
"errors"
"fmt"
"reflect"
"sort"
"strings"
"sync"
"testing"
"time"
"sigs.k8s.io/kustomize/internal/tools/doc"
"sigs.k8s.io/kustomize/v3/pkg/pgmconfig"
)
const (
kustomizeRepo = "https://github.com/kubernetes-sigs/kustomize"
)
// Simple crawler that forwards it's list of documents to a provided channel and
// returns it's error to the caller.
type testCrawler struct {
docs []doc.KustomizationDocument
err error
matchPrefix string
err error
docs []doc.KustomizationDocument
lukp map[string]int
}
func (c testCrawler) Match(d *doc.Document) bool {
return d != nil && strings.HasPrefix(d.ID(), c.matchPrefix)
}
func (c testCrawler) FetchDocument(ctx context.Context, d *doc.Document) error {
if i, ok := c.lukp[d.ID()]; ok {
d.DocumentData = c.docs[i].DocumentData
return nil
}
for _, suffix := range pgmconfig.KustomizationFileNames {
fmt.Println(d.ID(), "/", suffix)
i, ok := c.lukp[d.ID()+"/"+suffix]
if !ok {
continue
}
d.FilePath += "/" + suffix
d.DocumentData = c.docs[i].DocumentData
return nil
}
return fmt.Errorf("Document %v does not exist for matcher: %s",
d, c.matchPrefix)
}
func (c testCrawler) SetCreated(ctx context.Context, d *doc.Document) error {
d.CreationTime = &time.Time{}
return nil
}
func newCrawler(matchPrefix string, err error,
docs []doc.KustomizationDocument) testCrawler {
c := testCrawler{
matchPrefix: matchPrefix,
err: err,
docs: docs,
lukp: make(map[string]int),
}
for i, d := range docs {
c.lukp[d.ID()] = i
}
return c
}
// Crawl implements the Crawler interface for testing.
func (c testCrawler) Crawl(ctx context.Context,
output chan<- *doc.KustomizationDocument) error {
output chan<- CrawlerDocument) error {
for i := range c.docs {
for i, d := range c.docs {
isResource := true
for _, suffix := range pgmconfig.KustomizationFileNames {
if strings.HasSuffix(d.FilePath, suffix) {
isResource = false
break
}
}
if isResource {
continue
}
output <- &c.docs[i]
}
return c.err
@@ -45,6 +107,7 @@ func (s sortableDocs) Len() int {
}
func TestCrawlerRunner(t *testing.T) {
fmt.Println("testing CrawlerRunner")
tests := []struct {
tc []Crawler
errs []error
@@ -54,17 +117,27 @@ func TestCrawlerRunner(t *testing.T) {
tc: []Crawler{
testCrawler{
docs: []doc.KustomizationDocument{
{FilePath: "crawler1/doc1"},
{FilePath: "crawler1/doc2"},
{FilePath: "crawler1/doc3"},
{Document: doc.Document{
FilePath: "crawler1/doc1/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler1/doc2/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler1/doc3/kustomization.yaml",
}},
},
},
testCrawler{err: errors.New("crawler2")},
testCrawler{},
testCrawler{
docs: []doc.KustomizationDocument{
{FilePath: "crawler4/doc1"},
{FilePath: "crawler4/doc2"},
{Document: doc.Document{
FilePath: "crawler4/doc1/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler4/doc2/kustomization.yaml",
}},
},
err: errors.New("crawler4"),
},
@@ -76,17 +149,27 @@ func TestCrawlerRunner(t *testing.T) {
errors.New("crawler4"),
},
docs: sortableDocs{
{FilePath: "crawler1/doc1"},
{FilePath: "crawler1/doc2"},
{FilePath: "crawler1/doc3"},
{FilePath: "crawler4/doc1"},
{FilePath: "crawler4/doc2"},
{Document: doc.Document{
FilePath: "crawler1/doc1/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler1/doc2/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler1/doc3/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler4/doc1/kustomization.yaml",
}},
{Document: doc.Document{
FilePath: "crawler4/doc2/kustomization.yaml",
}},
},
},
}
for _, test := range tests {
output := make(chan *doc.KustomizationDocument)
output := make(chan CrawlerDocument)
wg := sync.WaitGroup{}
wg.Add(1)
@@ -95,8 +178,8 @@ func TestCrawlerRunner(t *testing.T) {
defer close(output)
defer wg.Done()
errs := CrawlerRunner(context.Background(), output,
test.tc)
errs := CrawlerRunner(context.Background(),
output, test.tc)
// Check that errors are returned as they should be.
if !reflect.DeepEqual(errs, test.errs) {
@@ -108,8 +191,13 @@ func TestCrawlerRunner(t *testing.T) {
// Iterate over the output channel of Crawler runner.
returned := make(sortableDocs, 0, len(test.docs))
for doc := range output {
returned = append(returned, *doc)
for o := range output {
d, ok := o.(*doc.KustomizationDocument)
if !ok || d == nil {
t.Errorf("%T not expected type (%T)",
o, d)
}
returned = append(returned, *d)
}
// Check that all documents are received.
@@ -122,3 +210,147 @@ func TestCrawlerRunner(t *testing.T) {
wg.Wait()
}
}
func TestCrawlFromSeed(t *testing.T) {
fmt.Println("testing CrawlFromSeed")
tests := []struct {
seed CrawlerSeed
matcher string
corpus []doc.KustomizationDocument
}{
{
seed: CrawlerSeed{
{
RepositoryURL: kustomizeRepo,
FilePath: "examples/helloWorld/kustomization.yaml",
},
{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/kustomization.yaml",
},
},
matcher: kustomizeRepo,
corpus: []doc.KustomizationDocument{
// Visited from the seed, will be ignored in the crawl.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/helloWorld/kustomization.yaml",
DocumentData: `
resources:
- deployment.yaml
`,
}},
// Also visited from the seed as a relative resource.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/helloWorld/deployment.yaml",
DocumentData: `
apiVersion: apps/v1
kind: Deployment
metadata:
name: hello
`,
}},
// Visited from the seed. Has a remote import.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/kustomization.yaml",
DocumentData: `
resources:
- https://github.com/kubernetes-sigs/kustomize/examples/other/overlay
- service.yaml
`,
}},
// Imported as a base from the seed.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/overlay/kustomization.yaml",
DocumentData: `
resources:
- https://github.com/kubernetes-sigs/kustomize/examples/seedcrawl1
- https://github.com/kubernetes-sigs/kustomize/examples/seedcrawl2
`,
}},
// Imported as a resource from the seed.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/service.yaml",
}},
// Visited from crawling seed.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/seedcrawl1/kustomization.yml",
}},
// Visited from crawling seed.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/seedcrawl2/kustomization.yaml",
DocumentData: `
resources:
- ../base
- job.yaml
`,
}},
// Visited from crawling seed.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/base/kustomization.yml",
}},
// Visited from crawling seed imported as resource.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/seedcrawl2/job.yaml",
}},
// Visited from the crawler runner.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/base/kustomization.yaml",
DocumentData: `
resources:
- ../app
`,
}},
// Visited from the crawler runner.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/app/kustomization.yaml",
DocumentData: `
resources:
- resource.yaml
`,
}},
// Visited from crawling runner imported as resource.
{Document: doc.Document{
RepositoryURL: kustomizeRepo,
FilePath: "examples/other/app/resource.yaml",
}},
},
},
}
for _, tc := range tests {
cr := newCrawler(tc.matcher, nil, tc.corpus)
visited := make(map[string]int)
CrawlFromSeed(context.Background(), tc.seed, []Crawler{cr},
func(d *doc.Document) (CrawlerDocument, error) {
return &doc.KustomizationDocument{
Document: *d,
}, nil
},
func(d CrawlerDocument, cr Crawler) error {
visited[d.ID()]++
return nil
},
)
if lv, lc := len(visited), len(tc.corpus); lv != lc {
t.Errorf("error: %d of %d documents visited.", lv, lc)
t.Errorf("\nvisited (%v)\nexpected (%v).", visited, cr.lukp)
}
for id, cnt := range visited {
if cnt != 1 {
t.Errorf("%s not visited once (%d)", id, cnt)
}
}
}
}

View File

@@ -16,7 +16,11 @@ import (
"strings"
"time"
"sigs.k8s.io/kustomize/internal/tools/crawler"
"sigs.k8s.io/kustomize/internal/tools/doc"
"sigs.k8s.io/kustomize/internal/tools/httpclient"
"sigs.k8s.io/kustomize/v3/pkg/git"
"sigs.k8s.io/kustomize/v3/pkg/pgmconfig"
)
var logger = log.New(os.Stdout, "Github Crawler: ",
@@ -34,6 +38,17 @@ type GitHubClient struct {
client *http.Client
}
func NewClient(accessToken string, retryCount uint64, client *http.Client) GitHubClient {
return GitHubClient{
retryCount: retryCount,
client: client,
RequestConfig: RequestConfig{
perPage: githubMaxPageSize,
accessToken: accessToken,
},
}
}
func NewCrawler(accessToken string, retryCount uint64, client *http.Client,
query Query) githubCrawler {
@@ -52,7 +67,7 @@ func NewCrawler(accessToken string, retryCount uint64, client *http.Client,
// Implements crawler.Crawler.
func (gc githubCrawler) Crawl(
ctx context.Context, output chan<- *doc.KustomizationDocument) error {
ctx context.Context, output chan<- crawler.CrawlerDocument) error {
noETagClient := GitHubClient{
RequestConfig: gc.client.RequestConfig,
@@ -80,13 +95,78 @@ func (gc githubCrawler) Crawl(
}
}
return errs
if len(errs) > 0 {
return errs
}
return nil
}
func (gc githubCrawler) FetchDocument(ctx context.Context, d *doc.Document) error {
repoURL := d.RepositoryURL + "/" + d.FilePath + "?ref=" + d.DefaultBranch
repoSpec, err := git.NewRepoSpecFromUrl(repoURL)
if err != nil {
return fmt.Errorf("invalid repospec: %v", err)
}
url := "https://raw.githubusercontent.com/" + repoSpec.OrgRepo +
"/" + repoSpec.Ref + "/" + repoSpec.Path
handle := func(resp *http.Response, err error, path string) error {
if err == nil && resp.StatusCode == http.StatusOK {
d.IsSame = httpclient.FromCache(resp.Header)
defer resp.Body.Close()
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
d.DocumentData = string(data)
d.FilePath = d.FilePath + path
return nil
}
return err
}
resp, err := gc.client.GetRawUserContent(url)
if err := handle(resp, err, ""); err == nil {
return nil
}
for _, file := range pgmconfig.KustomizationFileNames {
resp, err = gc.client.GetRawUserContent(url + "/" + file)
err := handle(resp, err, "/"+file)
if err != nil {
continue
}
}
return fmt.Errorf("File Not Found: %s", url)
}
func (gc githubCrawler) SetCreated(ctx context.Context, d *doc.Document) error {
fs := GithubFileSpec{}
fs.Repository.FullName = d.RepositoryURL + "/" + d.FilePath
creationTime, err := gc.client.GetFileCreationTime(fs)
if err != nil {
return err
}
d.CreationTime = &creationTime
return nil
}
func (gc githubCrawler) Match(d *doc.Document) bool {
url := d.RepositoryURL + "/" + d.FilePath + "?ref=" + "/" +
d.DefaultBranch
repoSpec, err := git.NewRepoSpecFromUrl(url)
if err != nil {
return false
}
return strings.Contains(repoSpec.Host, "github.com")
}
// processQuery follows all of the pages in a query, and updates/adds the
// documents from the crawl to the datastore/index.
func processQuery(ctx context.Context, gcl GitHubClient, query string,
output chan<- *doc.KustomizationDocument) error {
output chan<- crawler.CrawlerDocument) error {
queryPages := make(chan GithubResponseInfo)
@@ -112,13 +192,6 @@ func processQuery(ctx context.Context, gcl GitHubClient, query string,
}
for _, file := range page.Parsed.Items {
// TODO(damienr74) This is where we'd need to
// communicate with redis. Currently always doing a full
// reindex of the documents. Since the documents are in
// sorted order in each bucket, we can short circuit the
// search when we find a file that has been seen, or we
// can choose to selectively update files.
k, err := kustomizationResultAdapter(gcl, file)
if err != nil {
errs = append(errs, err)
@@ -137,23 +210,33 @@ func processQuery(ctx context.Context, gcl GitHubClient, query string,
}
func kustomizationResultAdapter(gcl GitHubClient, k GithubFileSpec) (
*doc.KustomizationDocument, error) {
crawler.CrawlerDocument, error) {
data, err := gcl.GetFileData(k)
if err != nil {
return nil, err
}
creationTime, err := gcl.GetFileCreationTime(k)
if err != nil {
logger.Printf("(error: %v) initializing to current time.", err)
logger.Printf(
"(error: %v) initializing to current time.\n", err)
}
url := gcl.ReposRequest(k.Repository.FullName)
defaultBranch, err := gcl.GetDefaultBranch(url)
if err != nil {
logger.Printf(
"(error: %v) setting default_branch to master\n", err)
defaultBranch = "master"
}
doc := doc.KustomizationDocument{
DocumentData: string(data),
FilePath: k.Path,
RepositoryURL: k.Repository.URL,
CreationTime: creationTime,
Document: doc.Document{
DocumentData: string(data),
FilePath: k.Path,
DefaultBranch: defaultBranch,
RepositoryURL: k.Repository.URL,
},
}
return &doc, nil
@@ -227,7 +310,34 @@ func (gcl GitHubClient) GetFileData(k GithubFileSpec) ([]byte, error) {
}
defer resp.Body.Close()
return ioutil.ReadAll(resp.Body)
data, err = ioutil.ReadAll(resp.Body)
return data, err
}
func (gcl GitHubClient) GetDefaultBranch(url string) (string, error) {
resp, err := gcl.GetReposData(url)
if err != nil {
return "", fmt.Errorf(
"'%s' could not get default_branch: %v", url, err)
}
defer resp.Body.Close()
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf(
"could not read default_branch: %v", err)
}
type defaultBranch struct {
DefaultBranch string `json:"default_branch,omitempty"`
}
var branch defaultBranch
err = json.Unmarshal(data, &branch)
if err != nil {
return "", fmt.Errorf(
"default_branch json malformed: %v", err)
}
return branch.DefaultBranch, nil
}
// GetFileCreationTime gets the earliest date of a file.
@@ -301,29 +411,23 @@ func throttleRepoAPI() {
<-contentRateTicker.C
}
const (
accessTokenKeyword = "access_token="
perPageKeyword = "per_page="
contentSearchURL = "https://api.github.com/repos"
contentKeyword = "contents"
)
type multiError []error
func (me multiError) Error() string {
size := len(me) + 2
strs := make([]string, size)
strs[0] = "Errors [\n\t"
strs[0] = "Errors ["
for i, err := range me {
strs[i+1] = err.Error()
strs[i+1] = "\t" + err.Error()
}
strs[size-1] = "\n]"
return strings.Join(strs, "\n\t")
strs[size-1] = "]"
return strings.Join(strs, "\n")
}
type GithubFileSpec struct {
Path string `json:"path,omitempty"`
Repository struct {
API string `json:"url,omitempty"`
URL string `json:"html_url,omitempty"`
FullName string `json:"full_name,omitempty"`
} `json:"repository,omitempty"`

View File

@@ -127,6 +127,11 @@ func (rc RequestConfig) ContentsRequest(fullRepoName, path string) string {
return rc.makeRequest(uri, Query{}).URL()
}
func (rc RequestConfig) ReposRequest(fullRepoName string) string {
uri := fmt.Sprintf("repos/%s", fullRepoName)
return rc.makeRequest(uri, Query{}).URL()
}
// CommitsRequest given the repo name, and a filepath returns a formatted query
// for the Github API to find the commits that affect this file.
func (rc RequestConfig) CommitsRequest(fullRepoName, path string) string {

View File

@@ -1,5 +1,102 @@
package github
// GitHub only returns at most 1000 results per search query,
// this is problematic if you want to retrieve all the results for a given
// search query. However, GitHub allows you to specify as much as you want per
// query to make things more specific. Specifically for files, GitHub allows
// you to specify their sizes with range queries. This is very convenient
// since it allows us to split the search into disjoint sets/shards of results
// from the different file size ranges.
//
// Some important factors to consider:
//
// - These queries are rate limited by the API to roughly once query every two
// seconds.
//
// - The search space for file sizes is in bytes, from 0B to < 512KiB (this is
// a huge search space that cannot be probed linearly in a timely manner if
// granularity is to be expected).
//
// - If you have K files there will likely be ~K/1000 sets that you have find
// from this search space in order to get all of the results.
//
// - If you have O(K) sets it is unlikely that they are all of the same size,
// since (most files are power law distributed). That means that the range
// might be significantly smaller for 1000 small files, than it is for
// 1000 large files.
//
// - This method is a best effort approach. There are some limitations to what
// it can and can't do, so please note the following:
//
// + There may very well be a filesize that has more than 1000 results.
// this method cannot help in this case. However, requerying over time
// (days/weeks/months) while sorting by last indexed values may be
// sufficient to eventually get all of the results.
//
// + It's possible that the github API returns inconsistent counts. This
// is problematic in most cases, since it can cause many issues if the
// case is not handled properly. For instance, if you requested the
// number of files of an interval from size:0..64 and get that there
// are 900 results, you may query at size:0..96 and get that there
// are 800 results. To guarantee that this approach completes and does
// not get into a query loop over the same intervals, it will retry a few
// times and take the largest of the results or the largest previously
// queried value from another range (in this case, the implementation
// could decide that size:0..96 must have 900) results. This makes the
// approach best effort even if there are no single file sizes of over
// 1000 results.
//
//
// The approach that was taken to solve this problem is the following:
//
// 1. Determine the total number of results by querying from the lower bound
// to the upper bound (size:0..max). If there are less than 1000 files,
// return a single range of values (size:0..max) since all results can be
// retrieved.
//
// 2. Otherwise, set a target number of files to be 1000.
//
// 3. Binary search for the range from 0..r that provides a file count that is
// less than or equal to the target. Once this value is found, store the
// upper bound of range (r). If r is the same as the previous value, (or 0)
// increase r by one (this guarantees progress, but will miss out on some
// results).
//
// 4. Increase the target by 1000.
//
// 5. Repeat steps 3 and 4 until the target is at or exceeds the total number
// of files.
//
//
// In general there are other ways to get all of the files from GitHub. In
// some cases it would be sufficient to just get the files that are being
// updated/indexed by github periodically to update the corpus, so this
// complicated approach does not have to be run every time. However, for
// some searches, there may be too many results on a time interval to do
// this simple update search limited to only 1000 results.
//
// There is also a more sophisticated approach that may yield better
// performance:
// - Perform this search once and create a prior distribution of file sizes.
// Each time you want to retrieve the results of the query, scale the
// prior of expected ranges to the current number of files. From each
// expected range of 1000 files, perform a exponential search to find the
// lower bound of the range. This would likely reduce the total number
// of queries by a significant amount since it would only have to search
// for a small set of values around each likely range boundary.
//
// However, actually retrieving the files will be the bottleneck operation
// since the number of queries to find the ranges will be close to:
// log2(maxFileSize) * totalResults / 1000 ~= totalResults / 50
// whereas the number of queries to actually get all of the search results
// are close to:
// apiCallsPerResult * 10(pages) * 100(resultsPerPage) * totalResults / 1000
// = apiCallsPerResult * totalResults.
//
// So it could very well take apiCallsPerResult * 50 times longer to acutally
// fetch the results (assuming the quotas for the API calls are the same as the
// search API), than it does to perform these range searches.
import (
"fmt"
"math/bits"
@@ -12,14 +109,20 @@ const (
githubMaxResultsPerQuery = uint64(1000)
)
// Interface for testing purposes. Not expecting to have multiple
// implementations.
// Interface instead of struct for testing purposes.
// Not expecting to have multiple implementations.
type cachedSearch interface {
CountResults(uint64) (uint64, error)
RequestString(filesize rangeFormatter) string
}
// Cache uses bit tricks to be more efficient in detecting
// cachedSearch is a simple data structure that maps the upper bound (r) of a
// range from 0 to r to the number of files that have between 0 and r files
// (inclusive). It also guarantees that the counts are monotonically increasing
// (not strict) as the value for r increases, by looking at the maximal
// previous file count for the value that precedes r in the cache.
//
// It uses a bit trick to be more efficient in detecting
// inconsistencies in the returned data from the Github API.
// Therefore, the cache expects a search to always start at 0, and
// it expects the max file size to be a power of 2. If this is to be changed
@@ -36,11 +139,12 @@ type cachedSearch interface {
// problematic). The current cache implementation looks at the
// predecessor entry to find out if the current value is monotonic.
// This is where the bit trick is used, since each step in the binary
// search is adding or ommiting to add a decreasing of 2 to the query value,
// we can remove the least significant set bit to find the predecessor in
// constant time. Ultimately since the search is rate limited, we could also
// easily afford to compute this in linear time by iterating
// over cached values.
// search is adding or ommiting to add a decreasing power of 2 to the query
// value, we can remove the least significant set bit to find the
// predecessor in constant time. Ultimately since the search is rate
// limited, we could also easily afford to compute this in linear time
// by iterating over cached values. So this trick is not crucial to the
// cache's performance.
type githubCachedSearch struct {
cache map[uint64]uint64
gcl GitHubClient
@@ -160,7 +264,7 @@ func FindRangesForRepoSearch(cache cachedSearch) ([]string, error) {
//
// My intuition is that this approach is competitive to a perfectly
// optimal solution, but I didn't actually take the time to do a
// rigurous proof. Intuitively, since files sizes are typically power
// rigorous proof. Intuitively, since files sizes are typically power
// law distibuted the binary search will be very skewed towards the
// smaller file ranges. This means that in practice this approach will
// make fewer than (#files/1000)*(log(n) = 19) queries for

View File

@@ -3,11 +3,16 @@ package doc
import (
"fmt"
"strings"
"time"
"sigs.k8s.io/kustomize/v3/k8sdeps/kunstruct"
"sigs.k8s.io/kustomize/v3/pkg/ifc"
"sigs.k8s.io/kustomize/v3/pkg/pgmconfig"
"sigs.k8s.io/kustomize/v3/pkg/types"
"sigs.k8s.io/yaml"
)
var fileReader ifc.KunstructuredFactory = kunstruct.NewKunstructuredFactoryImpl()
// This document is meant to be used at the elasticsearch document type.
// Fields are serialized as-is to elasticsearch, where indices are built
// to facilitate text search queries. Identifiers, Values, FilePath,
@@ -31,25 +36,122 @@ import (
// facilitates the use of complex text search features from elasticsearch such
// as fuzzy searching, regex, wildcards, etc.
type KustomizationDocument struct {
DocumentData string `json:"document,omitempty"`
Kinds []string `json:"kinds,omitempty"`
Identifiers []string `json:"identifiers,omitempty"`
Values []string `json:"values,omitempty"`
FilePath string `json:"filePath,omitempty"`
RepositoryURL string `json:"repositoryUrl,omitempty"`
CreationTime time.Time `json:"creationTime,omitempty"`
Document
Kinds []string `json:"kinds,omitempty"`
Identifiers []string `json:"identifiers,omitempty"`
Values []string `json:"values,omitempty"`
}
type set map[string]struct{}
// Implements the CrawlerDocument interface.
func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
isResource := true
for _, suffix := range pgmconfig.KustomizationFileNames {
if strings.HasSuffix(doc.FilePath, "/"+suffix) {
isResource = false
}
}
if isResource {
return []*Document{}, nil
}
content := []byte(doc.DocumentData)
content, err := FixKustomizationPreUnmarshallingNonFatal(content)
if err != nil {
return nil, fmt.Errorf("could not fix kustomize file: %v", err)
}
var k types.Kustomization
err = yaml.Unmarshal(content, &k)
if err != nil {
return nil, fmt.Errorf(
"could not parse kustomization: %v", err)
}
k.FixKustomizationPostUnmarshalling()
res := make([]*Document, 0, len(k.Resources))
for _, r := range k.Resources {
next, err := doc.Document.FromRelativePath(r)
if err != nil {
fmt.Printf("GetResources error: %v\n", err)
continue
}
res = append(res, &next)
}
return res, nil
}
func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) {
data := []byte(doc.DocumentData)
for _, suffix := range pgmconfig.KustomizationFileNames {
if !strings.HasSuffix(doc.FilePath, "/"+suffix) {
continue
}
var config map[string]interface{}
err := yaml.Unmarshal(data, &config)
if err != nil {
return nil, fmt.Errorf(
"unable to parse kustomization: %v", err)
}
return []map[string]interface{}{config}, nil
}
configs := make([]map[string]interface{}, 0)
ks, err := fileReader.SliceFromBytes(data)
if err != nil {
return nil, fmt.Errorf("unable to parse resource: %v", err)
}
for _, k := range ks {
configs = append(configs, k.Map())
}
return configs, nil
}
func (doc *KustomizationDocument) ParseYAML() error {
doc.Identifiers = make([]string, 0)
doc.Values = make([]string, 0)
doc.Kinds = make([]string, 0, 1)
var kustomization map[string]interface{}
err := yaml.Unmarshal([]byte(doc.DocumentData), &kustomization)
if err != nil {
return fmt.Errorf("unable to parse kustomization file: %s", err)
identifierSet := make(set)
valueSet := make(set)
getKind := func(m map[string]interface{}) string {
const defaultStr = "Kustomization"
kind, ok := m["kind"]
if !ok {
return defaultStr
}
if str, ok := kind.(string); ok && str != "" {
return str
}
return defaultStr
}
ks, err := doc.readBytes()
if err != nil {
return err
}
for _, contents := range ks {
doc.Kinds = append(doc.Kinds, getKind(contents))
createFlatStructure(identifierSet, valueSet, contents)
}
for val := range valueSet {
doc.Values = append(doc.Values, val)
}
for key := range identifierSet {
doc.Identifiers = append(doc.Identifiers, key)
}
return nil
}
func createFlatStructure(identifierSet set, valueSet set, contents map[string]interface{}) {
type Map struct {
data map[string]interface{}
prefix string
@@ -57,18 +159,15 @@ func (doc *KustomizationDocument) ParseYAML() error {
toVisit := []Map{
{
data: kustomization,
data: contents,
prefix: "",
},
}
identifierSet := make(map[string]struct{})
valueSet := make(map[string]struct{})
for i := 0; i < len(toVisit); i++ {
visiting := toVisit[i]
for k, v := range visiting.data {
identifier := fmt.Sprintf("%s:%s", visiting.prefix,
strings.Replace(k, ":", "%3A", -1))
identifier := fmt.Sprintf("%s:%s", visiting.prefix, k)
// noop after the first iteration.
identifier = strings.TrimLeft(identifier, ":")
@@ -88,8 +187,7 @@ func (doc *KustomizationDocument) ParseYAML() error {
traverseStructure(val)
}
case interface{}:
esc := strings.Replace(fmt.Sprintf("%v",
value), ":", "%3A", -1)
esc := fmt.Sprintf("%v", value)
valuePath := fmt.Sprintf("%s=%v",
identifier, esc)
@@ -99,17 +197,6 @@ func (doc *KustomizationDocument) ParseYAML() error {
traverseStructure(v)
identifierSet[identifier] = struct{}{}
}
}
for val := range valueSet {
doc.Values = append(doc.Values, val)
}
for key := range identifierSet {
doc.Identifiers = append(doc.Identifiers, key)
}
return nil
}

View File

@@ -11,6 +11,8 @@ func TestParseYAML(t *testing.T) {
testCases := []struct {
identifiers []string
values []string
kinds []string
filepath string
yaml string
}{
{
@@ -21,15 +23,19 @@ func TestParseYAML(t *testing.T) {
"kind",
},
values: []string{
"kind=",
"namePrefix=dev-",
"metadata:name=app",
"kind=Deployment",
},
kinds: []string{
"Kustomization",
},
filepath: "some/path/to/kustomization.yaml",
yaml: `
namePrefix: dev-
metadata:
name: app
kind: Deployment
kind: ""
`,
},
{
@@ -49,13 +55,17 @@ kind: Deployment
"namePrefix=dev-",
"metadata:name=n1",
"metadata:spec:replicas=3",
"kind=Deployment",
"kind=Kustomization",
"replicas:name=n1",
"replicas:name=n2",
"replicas:count=3",
"resource=file1.yaml",
"resource=file2.yaml",
},
kinds: []string{
"Kustomization",
},
filepath: "./kustomization.yaml",
yaml: `
namePrefix: dev-
# map of map
@@ -63,7 +73,7 @@ metadata:
name: n1
spec:
replicas: 3
kind: Deployment
kind: Kustomization
#list of map
replicas:
@@ -76,14 +86,51 @@ replicas:
resource:
- file1.yaml
- file2.yaml
`,
},
{
identifiers: []string{
"kind",
"metadata",
"metadata:name",
},
values: []string{
"kind=Deployment",
"kind=Service",
"kind=Custom",
"metadata:name=app",
"metadata:name=app-service",
"metadata:name=app-crd",
},
kinds: []string{
"Deployment",
"Service",
"Custom",
},
filepath: "resources.yaml",
yaml: `
---
kind: Deployment
metadata:
name: app
---
kind: Service
metadata:
name: app-service
---
kind: Custom
metadata:
name: app-crd
`,
},
}
for _, test := range testCases {
doc := KustomizationDocument{
DocumentData: test.yaml,
FilePath: "example/path/kustomization.yaml",
Document: Document{
DocumentData: test.yaml,
FilePath: test.filepath,
},
}
err := doc.ParseYAML()
@@ -106,5 +153,102 @@ resource:
cmpStrings(doc.Identifiers, test.identifiers, "identifiers")
cmpStrings(doc.Values, test.values, "values")
cmpStrings(doc.Kinds, test.kinds, "kinds")
}
}
func TestGetResources(t *testing.T) {
tests := []struct {
doc KustomizationDocument
resources []*Document
}{
{
doc: KustomizationDocument{
Document: Document{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/kustomization.yaml",
DocumentData: `
bases:
- ../base
- ../otherbase
resources:
- file.yaml
- https://github.com/kubernetes-sigs/kustomize/examples/helloWorld?ref=v3.1.0
`},
},
resources: []*Document{
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/base",
},
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/otherbase",
},
{
RepositoryURL: "sigs.k8s.io/kustomize",
FilePath: "some/path/to/kdir/file.yaml",
},
{
RepositoryURL: "https://github.com/kubernetes-sigs/kustomize",
FilePath: "examples/helloWorld",
DefaultBranch: "v3.1.0",
},
},
},
{
doc: KustomizationDocument{
Document: Document{
RepositoryURL: "https://github.com/some/repo",
FilePath: "some/resource.yaml",
DocumentData: `
bases:
- ../base
- ../overlay
resources:
- https://github.com/kubernetes-sigs/kustomize/examples/helloWorld?ref=v3.1.0
- some/file.yaml
`,
},
},
resources: []*Document{},
},
}
for _, test := range tests {
res, err := test.doc.GetResources()
if err != nil {
t.Errorf("Unexpected error: %v\n", err)
continue
}
if len(test.resources) != len(res) {
t.Errorf("Number of resources does not match.")
continue
}
cmp := func(docs []*Document) func(i, j int) bool {
return func(i, j int) bool {
if docs[i].RepositoryURL != docs[j].RepositoryURL {
return docs[i].RepositoryURL <
docs[j].RepositoryURL
}
if docs[i].FilePath != docs[j].FilePath {
return docs[i].FilePath <
docs[j].FilePath
}
return docs[i].DefaultBranch < docs[j].DefaultBranch
}
}
sort.Slice(test.resources, cmp(test.resources))
sort.Slice(res, cmp(res))
for i, r := range test.resources {
if !reflect.DeepEqual(res[i], r) {
t.Errorf("Expected '%+v' to equal '%+v'\n",
res[i], r)
}
}
}
}

View File

@@ -0,0 +1,58 @@
package doc
import (
"path"
"time"
"sigs.k8s.io/kustomize/v3/pkg/git"
)
type Document struct {
RepositoryURL string `json:"repositoryUrl,omitempty"`
FilePath string `json:"filePath,omitempty"`
DefaultBranch string `json:"defaultBranch,omitempty"`
DocumentData string `json:"document,omitempty"`
CreationTime *time.Time `json:"creationTime,omitempty"`
IsSame bool `json:"-"`
}
// Implements the CrawlerDocument interface.
func (doc *Document) GetDocument() *Document {
return doc
}
// Implements the CrawlerDocument interface.
func (doc *Document) WasCached() bool {
return doc.IsSame
}
func (doc *Document) FromRelativePath(newFile string) (Document, error) {
repoSpec, err := git.NewRepoSpecFromUrl(newFile)
if err == nil {
return Document{
RepositoryURL: repoSpec.Host + path.Clean(repoSpec.OrgRepo),
FilePath: path.Clean(repoSpec.Path),
DefaultBranch: repoSpec.Ref,
}, nil
}
// else document is probably relative path.
ret := Document{
RepositoryURL: doc.RepositoryURL,
DefaultBranch: doc.DefaultBranch,
}
ogDir, _ := path.Split(doc.FilePath)
cleaned := path.Clean(newFile)
if !path.IsAbs(cleaned) {
cleaned = path.Clean(ogDir + "/" + cleaned)
}
ret.FilePath = cleaned
return ret, nil
}
func (doc *Document) ID() string {
return doc.RepositoryURL + "/" +
doc.DefaultBranch + "/" + doc.FilePath
}

View File

@@ -0,0 +1,64 @@
package doc
import (
"reflect"
"testing"
)
func TestFromRelativePath(t *testing.T) {
type Case struct {
RelativePath string
Expected Document
}
testCases := []struct {
BaseDoc Document
Cases []Case
}{
{
BaseDoc: Document{
RepositoryURL: "example.com/repo",
FilePath: "path/to/file/kustomization.yaml",
DefaultBranch: "master",
},
Cases: []Case{
{
RelativePath: "../other/file/resource.yaml",
Expected: Document{
RepositoryURL: "example.com/repo",
FilePath: "path/to/other/file/resource.yaml",
DefaultBranch: "master",
},
},
{
RelativePath: "../file/../../something/../to/other/file/patch.yaml",
Expected: Document{
RepositoryURL: "example.com/repo",
FilePath: "path/to/other/file/patch.yaml",
DefaultBranch: "master",
},
},
{
RelativePath: "service.yaml",
Expected: Document{
RepositoryURL: "example.com/repo",
FilePath: "path/to/file/service.yaml",
DefaultBranch: "master",
},
},
},
},
}
for _, tc := range testCases {
for _, c := range tc.Cases {
rd, err := tc.BaseDoc.FromRelativePath(c.RelativePath)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(rd, c.Expected) {
t.Errorf("document mismatch expected %v, got %v", c.Expected, rd)
}
}
}
}

View File

@@ -0,0 +1,51 @@
package doc
import (
"fmt"
"regexp"
"sigs.k8s.io/yaml"
)
func FixKustomizationPreUnmarshallingNonFatal(data []byte) ([]byte, error) {
deprecateFieldsMap := map[string]string{
"imageTags:": "images:",
}
for oldname, newname := range deprecateFieldsMap {
pattern := regexp.MustCompile(oldname)
data = pattern.ReplaceAll(data, []byte(newname))
}
found, err := useLegacyPatch(data)
if err == nil && found {
pattern := regexp.MustCompile("patches:")
data = pattern.ReplaceAll(data, []byte("patchesStrategicMerge:"))
}
return data, err
}
func useLegacyPatch(data []byte) (bool, error) {
found := false
var object map[string]interface{}
err := yaml.Unmarshal(data, &object)
if err != nil {
return false, fmt.Errorf("invalid content from %s",
string(data))
}
if rawPatches, ok := object["patches"]; ok {
patches, ok := rawPatches.([]interface{})
if !ok {
return false, fmt.Errorf("invalid patches from %v",
rawPatches)
}
for _, p := range patches {
_, ok := p.(string)
if ok {
found = true
}
}
}
return found, nil
}

View File

@@ -3,8 +3,11 @@ module sigs.k8s.io/kustomize/internal/tools
go 1.12
require (
github.com/elastic/go-elasticsearch/v6 v6.8.2
github.com/gomodule/redigo v2.0.0+incompatible
github.com/gorilla/mux v1.7.3
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79
gopkg.in/yaml.v2 v2.2.2 // indirect
github.com/rs/cors v1.7.0
sigs.k8s.io/kustomize/v3 v3.1.1-0.20190826160027-84519c236bac
sigs.k8s.io/yaml v1.1.0
)

View File

@@ -1,10 +1,146 @@
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/elastic/go-elasticsearch/v6 v6.8.2 h1:rp5DGrd63V5c6nHLjF6QEXUpZSvs0+QM3ld7m9VhV2g=
github.com/elastic/go-elasticsearch/v6 v6.8.2/go.mod h1:UwaDJsD3rWLM5rKNFzv9hgox93HoX8utj1kxD9aFUcI=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/emicklei/go-restful v2.9.6+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/evanphx/json-patch v4.5.0+incompatible h1:ouOWdg56aJriqS0huScTkVXPC5IcNrDCXZ6OoTAWu7M=
github.com/evanphx/json-patch v4.5.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg=
github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg=
github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc=
github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc=
github.com/go-openapi/spec v0.19.2/go.mod h1:sCxk3jxKgioEJikev4fgkNmwS+3kuYdJtcsZsD5zxMY=
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/gomodule/redigo v2.0.0+incompatible h1:K/R+8tc58AaqLkqG2Ol3Qk+DR/TlNuhuh457pBFPtt0=
github.com/gomodule/redigo v2.0.0+incompatible/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4=
github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI=
github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/googleapis/gnostic v0.0.0-20170426233943-68f4ded48ba9/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY=
github.com/googleapis/gnostic v0.3.0 h1:CcQijm0XKekKjP/YCz28LXVSpgguuB+nCxaSjCe09y0=
github.com/googleapis/gnostic v0.3.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY=
github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw=
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA=
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.6 h1:MrUvLMLTMxbqFJ9kzlvat/rYZqZnW3u4wkLzWTaFwKs=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190620125010-da37f6c1e481/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.8.0 h1:VkHVNpR4iVnU8XQR6DBm8BqYjN7CRzw+xKUbVVbbW9w=
github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.5.0 h1:izbySO9zDPmjJ8rDjLvkA2zJHIo+HkYXHnf7eN7SSyo=
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
github.com/spf13/cobra v0.0.2/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190621203818-d432491b9138 h1:t8BZD9RDjkm9/h7yYN6kE8oaeov5r9aztkB7zKA5Tkg=
golang.org/x/sys v0.0.0-20190621203818-d432491b9138/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab h1:DG9A67baNpoeweOy2spF1OWHhnVY5KR7/Ek/+U1lVZc=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA=
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 h1:IS7K02iBkQXpCeieSiyJjGoLSdVOv2DbPaWHJ+ZtgKg=
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1/go.mod h1:ccL7Eh7zubPUSh9A3USN90/OzHNSVN6zxzde07TDCL0=
k8s.io/client-go v11.0.0+incompatible h1:LBbX2+lOwY9flffWlJM7f1Ct8V2SRNiMRDFeiwnJo9o=
k8s.io/client-go v11.0.0+incompatible/go.mod h1:7vJpHMYJwNQCWgzmNV+VYUl1zCObLyodBc8nIyt8L5s=
k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
k8s.io/klog v0.0.0-20181102134211-b9b56d5dfc92/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk=
k8s.io/klog v0.3.3 h1:niceAagH1tzskmaie/icWd7ci1wbG7Bf2c6YGcQv+3c=
k8s.io/klog v0.3.3/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk=
k8s.io/kube-openapi v0.0.0-20190603182131-db7b694dc208 h1:5sW+fEHvlJI3Ngolx30CmubFulwH28DhKjGf70Xmtco=
k8s.io/kube-openapi v0.0.0-20190603182131-db7b694dc208/go.mod h1:nfDlWeOsu3pUf4yWGL+ERqohP4YsZcBJXWMK+gkzOA4=
sigs.k8s.io/kustomize/v3 v3.1.1-0.20190826160027-84519c236bac h1:kmfwkekoKBD2cZCiold2zvyTDzycW6ieBstiBo352bk=
sigs.k8s.io/kustomize/v3 v3.1.1-0.20190826160027-84519c236bac/go.mod h1:ztX4zYc/QIww3gSripwF7TBOarBTm5BvyAMem0kCzOE=
sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI=
sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=

View File

@@ -9,6 +9,10 @@ import (
redis_cache "github.com/gregjones/httpcache/redis"
)
func FromCache(header http.Header) bool {
return header.Get(httpcache.XFromCache) != ""
}
func NewClient(conn redis.Conn) *http.Client {
etagCache := redis_cache.NewWithClient(conn)
tr := httpcache.NewTransport(etagCache)