mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Move SeenMap to the utils dir
This commit is contained in:
@@ -9,6 +9,8 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||
@@ -26,6 +28,7 @@ const (
|
||||
)
|
||||
|
||||
type CrawlMode int
|
||||
|
||||
const (
|
||||
CrawlUnknown CrawlMode = iota
|
||||
// Crawl all the kustomization files in all the repositories of a Github user
|
||||
@@ -125,13 +128,13 @@ func main() {
|
||||
|
||||
// seen tracks the IDs of all the documents in the index.
|
||||
// This helps avoid indexing a given document multiple times.
|
||||
seen := crawler.NewSeenMap()
|
||||
seen := utils.NewSeenMap()
|
||||
|
||||
mode := NewCrawlMode(*modePtr)
|
||||
|
||||
ghCrawlerConstructor := func(user, repo string) crawler.Crawler {
|
||||
if user != "" {
|
||||
return github.NewCrawler(githubToken, retryCount, clientCache,
|
||||
return github.NewCrawler(githubToken, retryCount, clientCache,
|
||||
github.QueryWith(
|
||||
github.Filename("kustomization.yaml"),
|
||||
github.Filename("kustomization.yml"),
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
@@ -51,10 +50,10 @@ func GeneratorAndTransformerStats(ctx context.Context,
|
||||
generatorDocs []*doc.Document, transformerDocs []*doc.Document,
|
||||
idx *index.KustomizeIndex) {
|
||||
// allGenerators includes all the documents referred in the generators field
|
||||
allGenerators := crawler.NewUniqueDocuments()
|
||||
allGenerators := doc.NewUniqueDocuments()
|
||||
|
||||
// allTransformers includes all the documents referred in the transformers field
|
||||
allTransformers := crawler.NewUniqueDocuments()
|
||||
allTransformers := doc.NewUniqueDocuments()
|
||||
|
||||
// docUsingGeneratorCount counts the number of the kustomization files using generators
|
||||
docUsingGeneratorCount := 0
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||
|
||||
_ "github.com/gomodule/redigo/redis"
|
||||
@@ -29,7 +31,7 @@ type Crawler interface {
|
||||
// Crawl returns when it is done processing. This method does not take
|
||||
// ownership of the channel. The channel is write only, and it
|
||||
// designates where the crawler should forward the documents.
|
||||
Crawl(ctx context.Context, output chan<- CrawledDocument, seen SeenMap) error
|
||||
Crawl(ctx context.Context, output chan<- CrawledDocument, seen utils.SeenMap) error
|
||||
|
||||
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
|
||||
FetchDocument(context.Context, *doc.Document) error
|
||||
@@ -52,21 +54,6 @@ type CrawledDocument interface {
|
||||
WasCached() bool
|
||||
}
|
||||
|
||||
type SeenMap map[string]struct{}
|
||||
|
||||
func (seen SeenMap) Seen(item string) bool {
|
||||
_, ok := seen[item]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (seen SeenMap) Add(item string) {
|
||||
seen[item] = struct{}{}
|
||||
}
|
||||
|
||||
func NewSeenMap() SeenMap {
|
||||
return make(map[string]struct{})
|
||||
}
|
||||
|
||||
type CrawlSeed []*doc.Document
|
||||
|
||||
type IndexFunc func(CrawledDocument, index.Mode) error
|
||||
@@ -89,7 +76,7 @@ func findMatch(d *doc.Document, crawlers []Crawler) Crawler {
|
||||
}
|
||||
|
||||
func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
||||
seen SeenMap, stack *CrawlSeed) {
|
||||
seen utils.SeenMap, stack *CrawlSeed) {
|
||||
|
||||
seen.Add(cdoc.ID())
|
||||
|
||||
@@ -115,7 +102,7 @@ func addBranches(cdoc CrawledDocument, match Crawler, indx IndexFunc,
|
||||
}
|
||||
|
||||
func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv Converter, indx IndexFunc,
|
||||
seen SeenMap, stack *CrawlSeed) {
|
||||
seen utils.SeenMap, stack *CrawlSeed) {
|
||||
|
||||
UpdatedDocCount := 0
|
||||
seenDocCount := 0
|
||||
@@ -166,7 +153,6 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
||||
// `bases` field.
|
||||
seen.Add(tail.ID())
|
||||
|
||||
|
||||
if err := match.FetchDocument(ctx, tail); err != nil {
|
||||
logger.Printf("FetchDocument failed on doc(%s): %v", tail.Path(), err)
|
||||
FetchDocumentErrCount++
|
||||
@@ -212,7 +198,7 @@ func doCrawl(ctx context.Context, docsPtr *CrawlSeed, crawlers []Crawler, conv C
|
||||
// CrawlFromSeed updates all the documents in seed, and crawls all the new
|
||||
// documents referred in the seed.
|
||||
func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
||||
conv Converter, indx IndexFunc, seen SeenMap) {
|
||||
conv Converter, indx IndexFunc, seen utils.SeenMap) {
|
||||
|
||||
// stack tracks the documents directly referred in other documents.
|
||||
stack := make(CrawlSeed, 0)
|
||||
@@ -248,7 +234,7 @@ func CrawlFromSeed(ctx context.Context, seed CrawlSeed, crawlers []Crawler,
|
||||
// from the seed will be processed before any other documents from the
|
||||
// crawlers.
|
||||
func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
||||
crawlers []Crawler, seen SeenMap) []error {
|
||||
crawlers []Crawler, seen utils.SeenMap) []error {
|
||||
|
||||
errs := make([]error, len(crawlers))
|
||||
wg := sync.WaitGroup{}
|
||||
@@ -292,7 +278,7 @@ func CrawlGithubRunner(ctx context.Context, output chan<- CrawledDocument,
|
||||
|
||||
// CrawlGithub crawls all the kustomization files on Github.
|
||||
func CrawlGithub(ctx context.Context, crawlers []Crawler, conv Converter,
|
||||
indx IndexFunc, seen SeenMap) {
|
||||
indx IndexFunc, seen utils.SeenMap) {
|
||||
// stack tracks the documents directly referred in other documents.
|
||||
stack := make(CrawlSeed, 0)
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||
@@ -76,7 +78,7 @@ func newCrawler(matchPrefix string, err error,
|
||||
|
||||
// Crawl implements the Crawler interface for testing.
|
||||
func (c testCrawler) Crawl(_ context.Context,
|
||||
output chan<- CrawledDocument, _ SeenMap) error {
|
||||
output chan<- CrawledDocument, _ utils.SeenMap) error {
|
||||
|
||||
for i, d := range c.docs {
|
||||
isResource := true
|
||||
@@ -182,7 +184,7 @@ func TestCrawlGithubRunner(t *testing.T) {
|
||||
defer close(output)
|
||||
defer wg.Done()
|
||||
|
||||
seen := NewSeenMap()
|
||||
seen := utils.NewSeenMap()
|
||||
errs := CrawlGithubRunner(context.Background(),
|
||||
output, test.tc, seen)
|
||||
|
||||
@@ -324,7 +326,7 @@ resources:
|
||||
visited[d.ID()]++
|
||||
return nil
|
||||
},
|
||||
NewSeenMap(),
|
||||
utils.NewSeenMap(),
|
||||
)
|
||||
if lv, lc := len(visited), len(tc.corpus); lv != lc {
|
||||
t.Errorf("error: %d of %d documents visited.", lv, lc)
|
||||
|
||||
@@ -16,6 +16,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/httpclient"
|
||||
@@ -68,7 +70,7 @@ func (gc githubCrawler) DefaultBranch(repo string) string {
|
||||
|
||||
// Implements crawler.Crawler.
|
||||
func (gc githubCrawler) Crawl(ctx context.Context,
|
||||
output chan<- crawler.CrawledDocument, seen crawler.SeenMap) error {
|
||||
output chan<- crawler.CrawledDocument, seen utils.SeenMap) error {
|
||||
|
||||
noETagClient := GhClient{
|
||||
RequestConfig: gc.client.RequestConfig,
|
||||
@@ -195,9 +197,9 @@ func (gc githubCrawler) Match(d *doc.Document) bool {
|
||||
|
||||
type RangeQueryResult struct {
|
||||
totalDocCnt uint64
|
||||
seenDocCnt uint64
|
||||
newDocCnt uint64
|
||||
errorCnt uint64
|
||||
seenDocCnt uint64
|
||||
newDocCnt uint64
|
||||
errorCnt uint64
|
||||
}
|
||||
|
||||
func (r *RangeQueryResult) Add(other RangeQueryResult) {
|
||||
@@ -209,7 +211,7 @@ func (r *RangeQueryResult) Add(other RangeQueryResult) {
|
||||
|
||||
func (r *RangeQueryResult) String() string {
|
||||
return fmt.Sprintf("got %d files from API. "+
|
||||
"%d have been seen before. %d are new and sent to the output channel." +
|
||||
"%d have been seen before. %d are new and sent to the output channel."+
|
||||
" %d have kustomizationResultAdapter errors.",
|
||||
r.totalDocCnt, r.seenDocCnt, r.newDocCnt, r.errorCnt)
|
||||
}
|
||||
@@ -217,7 +219,7 @@ func (r *RangeQueryResult) String() string {
|
||||
// processQuery follows all of the pages in a query, and updates/adds the
|
||||
// documents from the crawl to the datastore/index.
|
||||
func processQuery(ctx context.Context, gcl GhClient, query string,
|
||||
output chan<- crawler.CrawledDocument, seen crawler.SeenMap,
|
||||
output chan<- crawler.CrawledDocument, seen utils.SeenMap,
|
||||
branchMap map[string]string) (RangeQueryResult, error) {
|
||||
|
||||
queryPages := make(chan GhResponseInfo)
|
||||
@@ -271,7 +273,7 @@ func processQuery(ctx context.Context, gcl GhClient, query string,
|
||||
return result, errs
|
||||
}
|
||||
|
||||
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen crawler.SeenMap,
|
||||
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec, seen utils.SeenMap,
|
||||
branchMap map[string]string) (crawler.CrawledDocument, error) {
|
||||
url := gcl.ReposRequest(k.Repository.FullName)
|
||||
defaultBranch, err := gcl.GetDefaultBranch(url, k.Repository.URL, branchMap)
|
||||
|
||||
@@ -117,7 +117,7 @@ type RequestConfig struct {
|
||||
// understand why the request object is useful.
|
||||
func (rc RequestConfig) CodeSearchRequestWith(query Query) request {
|
||||
vals := url.Values{
|
||||
"sort": []string{"indexed"},
|
||||
"sort": []string{"indexed"},
|
||||
"order": []string{"desc"},
|
||||
}
|
||||
req := rc.makeRequest("search/code", query, vals)
|
||||
|
||||
@@ -65,7 +65,7 @@ func TestFromRelativePath(t *testing.T) {
|
||||
|
||||
func TestDocument_RepositoryFullName(t *testing.T) {
|
||||
testCases := []struct {
|
||||
doc Document
|
||||
doc Document
|
||||
expectedRepositoryFullName string
|
||||
}{
|
||||
{
|
||||
|
||||
36
api/internal/crawl/doc/unique_doc.go
Normal file
36
api/internal/crawl/doc/unique_doc.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package doc
|
||||
|
||||
import (
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/utils"
|
||||
)
|
||||
|
||||
// UniqueDocuments make sure a Document with a given ID appears only once
|
||||
type UniqueDocuments struct {
|
||||
docs []*Document
|
||||
docIDs utils.SeenMap
|
||||
}
|
||||
|
||||
func NewUniqueDocuments() UniqueDocuments {
|
||||
return UniqueDocuments{
|
||||
docs: []*Document{},
|
||||
docIDs: utils.NewSeenMap(),
|
||||
}
|
||||
}
|
||||
|
||||
func (uds *UniqueDocuments) Add(d *Document) {
|
||||
if uds.docIDs.Seen(d.ID()) {
|
||||
return
|
||||
}
|
||||
uds.docs = append(uds.docs, d)
|
||||
uds.docIDs.Add(d.ID())
|
||||
}
|
||||
|
||||
func (uds *UniqueDocuments) AddDocuments(docs []*Document) {
|
||||
for _, d := range docs {
|
||||
uds.Add(d)
|
||||
}
|
||||
}
|
||||
|
||||
func (uds *UniqueDocuments) Documents() []*Document {
|
||||
return uds.docs
|
||||
}
|
||||
@@ -18,6 +18,7 @@ const (
|
||||
)
|
||||
|
||||
type Mode int
|
||||
|
||||
const (
|
||||
InsertOrUpdate = iota
|
||||
Delete
|
||||
|
||||
16
api/internal/crawl/utils/utils.go
Normal file
16
api/internal/crawl/utils/utils.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package utils
|
||||
|
||||
type SeenMap map[string]struct{}
|
||||
|
||||
func (seen SeenMap) Seen(item string) bool {
|
||||
_, ok := seen[item]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (seen SeenMap) Add(item string) {
|
||||
seen[item] = struct{}{}
|
||||
}
|
||||
|
||||
func NewSeenMap() SeenMap {
|
||||
return make(map[string]struct{})
|
||||
}
|
||||
Reference in New Issue
Block a user