mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-12 17:34:21 +00:00
Mulitple improvements of the crawler
1) Set document IDs to avoid duplicating documents; 2) Set the `creationTime` field of each document in the index; 3) set the `values`, `kinds` and `identifiers` fields for all documents; 4) Add a `Copy` method into the `Document` struct: this fixes the issue where all the documents existing in the index point to the same Document object; 5) Avoid using keystore redis; 6) Set imagePullPolicy to `Always` for crawler jobs.
This commit is contained in:
@@ -133,8 +133,12 @@ func (gc githubCrawler) FetchDocument(_ context.Context, d *doc.Document) error
|
||||
}
|
||||
|
||||
func (gc githubCrawler) SetCreated(_ context.Context, d *doc.Document) error {
|
||||
fs := GhFileSpec{}
|
||||
fs.Repository.FullName = d.RepositoryURL + "/" + d.FilePath
|
||||
fs := GhFileSpec{
|
||||
Path: d.FilePath,
|
||||
Repository: GitRepository{
|
||||
FullName: d.RepositoryFullName(),
|
||||
},
|
||||
}
|
||||
creationTime, err := gc.client.GetFileCreationTime(fs)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -185,9 +189,9 @@ func processQuery(ctx context.Context, gcl GhClient, query string,
|
||||
for _, file := range page.Parsed.Items {
|
||||
k, err := kustomizationResultAdapter(gcl, file)
|
||||
if err != nil {
|
||||
logger.Printf("kustomizationResultAdapter failed: %v", err)
|
||||
errs = append(errs, err)
|
||||
errorCnt++
|
||||
continue
|
||||
}
|
||||
output <- k
|
||||
totalCnt++
|
||||
@@ -224,6 +228,18 @@ func kustomizationResultAdapter(gcl GhClient, k GhFileSpec) (
|
||||
RepositoryURL: k.Repository.URL,
|
||||
},
|
||||
}
|
||||
logger.Printf("Set the creationTime field")
|
||||
creationTime, err := gcl.GetFileCreationTime(k)
|
||||
if err != nil {
|
||||
logger.Printf("GetFileCreationTime failed: %v", err)
|
||||
return &d, err
|
||||
}
|
||||
d.CreationTime = &creationTime
|
||||
|
||||
if err := d.ParseYAML(); err != nil {
|
||||
logger.Printf("ParseYAML failed: %v", err)
|
||||
return &d, err
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
@@ -410,13 +426,15 @@ func (e multiError) Error() string {
|
||||
return strings.Join(strs, "\n")
|
||||
}
|
||||
|
||||
type GitRepository struct {
|
||||
API string `json:"url,omitempty"`
|
||||
URL string `json:"html_url,omitempty"`
|
||||
FullName string `json:"full_name,omitempty"`
|
||||
}
|
||||
|
||||
type GhFileSpec struct {
|
||||
Path string `json:"path,omitempty"`
|
||||
Repository struct {
|
||||
API string `json:"url,omitempty"`
|
||||
URL string `json:"html_url,omitempty"`
|
||||
FullName string `json:"full_name,omitempty"`
|
||||
} `json:"repository,omitempty"`
|
||||
Path string `json:"path,omitempty"`
|
||||
Repository GitRepository `json:"repository,omitempty"`
|
||||
}
|
||||
|
||||
type githubResponse struct {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
perPageArg = "per_page"
|
||||
perPageArg = "per_page"
|
||||
)
|
||||
|
||||
const githubMaxPageSize = 100
|
||||
|
||||
@@ -68,7 +68,7 @@ func TestQueryType(t *testing.T) {
|
||||
|
||||
func TestGithubSearchQuery(t *testing.T) {
|
||||
const (
|
||||
perPage = 100
|
||||
perPage = 100
|
||||
)
|
||||
|
||||
testCases := []struct {
|
||||
@@ -82,7 +82,7 @@ func TestGithubSearchQuery(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
rc: RequestConfig{
|
||||
perPage: perPage,
|
||||
perPage: perPage,
|
||||
},
|
||||
codeQuery: Query{
|
||||
Filename("kustomization.yaml"),
|
||||
|
||||
Reference in New Issue
Block a user