mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Improve the analysis on generator and transformer
This commit is contained in:
@@ -6,13 +6,9 @@ import (
|
|||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
|
||||||
"os"
|
|
||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||||
|
|
||||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||||
@@ -35,9 +31,9 @@ func iterateArr(arr []string, countMap map[string]int) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
|
// SortMapKeyByValueInt takes a map as its input, sorts its keys according to their values
|
||||||
// in the map, and outputs the sorted keys as a slice.
|
// in the map, and outputs the sorted keys as a slice.
|
||||||
func SortMapKeyByValue(m map[string]int) []string {
|
func SortMapKeyByValueInt(m map[string]int) []string {
|
||||||
keys := make([]string, 0, len(m))
|
keys := make([]string, 0, len(m))
|
||||||
for key := range m {
|
for key := range m {
|
||||||
keys = append(keys, key)
|
keys = append(keys, key)
|
||||||
@@ -47,56 +43,58 @@ func SortMapKeyByValue(m map[string]int) []string {
|
|||||||
return keys
|
return keys
|
||||||
}
|
}
|
||||||
|
|
||||||
func GeneratorOrTransformerStats(ctx context.Context,
|
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
|
||||||
docs []*doc.Document, isGenerator bool, idx *index.KustomizeIndex) {
|
// in the map, and outputs the sorted keys as a slice.
|
||||||
|
func SortMapKeyByValueLen(m map[string][]string) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for key := range m {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
// sort keys according to their values in the map m
|
||||||
|
sort.Slice(keys, func(i, j int) bool { return len(m[keys[i]]) > len(m[keys[j]]) })
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
fieldName := "generators"
|
func GeneratorOrTransformerStats(docs []*doc.KustomizationDocument) {
|
||||||
if !isGenerator {
|
n := len(docs)
|
||||||
fieldName = "transformers"
|
if n == 0 {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// allReferredDocs includes all the documents referred in the field
|
fileType := docs[0].FileType
|
||||||
allReferredDocs := doc.NewUniqueDocuments()
|
fmt.Printf("There are totally %d %s files.\n", n, fileType)
|
||||||
|
|
||||||
// docUsingGeneratorCount counts the number of the kustomization files using generators or transformers
|
GitRepositorySummary(docs, fileType)
|
||||||
docCount := 0
|
|
||||||
|
// key of kindToUrls: a string in the KustomizationDocument.Kinds field
|
||||||
|
// value of kindToUrls: a slice of string urls defining a given kind.
|
||||||
|
kindToUrls := make(map[string][]string)
|
||||||
|
|
||||||
// collect all the documents referred in the field
|
|
||||||
for _, d := range docs {
|
for _, d := range docs {
|
||||||
kdoc := doc.KustomizationDocument{
|
url := fmt.Sprintf("%s/blob/%s/%s", d.RepositoryURL, d.DefaultBranch, d.FilePath)
|
||||||
Document: *d,
|
for _, kind := range d.Kinds {
|
||||||
}
|
if _, ok := kindToUrls[kind]; !ok {
|
||||||
referredDocs, err := kdoc.GetResources(false, !isGenerator, isGenerator)
|
kindToUrls[kind] = []string{url}
|
||||||
if err != nil {
|
} else {
|
||||||
log.Printf("failed to parse the %s field of the Document (%s): %v",
|
kindToUrls[kind] = append(kindToUrls[kind], url)
|
||||||
fieldName, d.Path(), err)
|
}
|
||||||
}
|
}
|
||||||
if len(referredDocs) > 0 {
|
}
|
||||||
docCount++
|
fmt.Printf("There are totally %d kinds of %s\n", len(kindToUrls), fileType)
|
||||||
allReferredDocs.AddDocuments(referredDocs)
|
sortedKeys := SortMapKeyByValueLen(kindToUrls)
|
||||||
|
for _, k := range sortedKeys {
|
||||||
|
sort.Strings(kindToUrls[k])
|
||||||
|
fmt.Printf("%s kind %s appears %d times\n", fileType, k, len(kindToUrls[k]))
|
||||||
|
for _, url := range kindToUrls[k] {
|
||||||
|
fmt.Printf("%s\n", url)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fileCount, dirCount, fileTypeDocs, dirTypeDocs := DocumentTypeSummary(ctx, allReferredDocs.Documents())
|
|
||||||
|
|
||||||
// check whether any of the files are not in the index
|
|
||||||
nonExistFileCount := ExistInIndex(idx, fileTypeDocs, fieldName + " file ")
|
|
||||||
// check whether any of the dirs are not in the index
|
|
||||||
nonExistDirCount := ExistInIndex(idx, dirTypeDocs, fieldName + " dir ")
|
|
||||||
|
|
||||||
GitRepositorySummary(fileTypeDocs, fieldName + " files")
|
|
||||||
GitRepositorySummary(dirTypeDocs, fieldName + " dirs")
|
|
||||||
|
|
||||||
fmt.Printf("%d kustomization files use %s: %d %s are files and %d %s are dirs.\n",
|
|
||||||
docCount, fieldName, fileCount, fieldName, dirCount, fieldName)
|
|
||||||
fmt.Printf("%d %s files do not exist in the index\n", nonExistFileCount, fieldName)
|
|
||||||
fmt.Printf("%d %s dirs do not exist in the index\n", nonExistDirCount, fieldName)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GitRepositorySummary counts the distribution of docs:
|
// GitRepositorySummary counts the distribution of docs:
|
||||||
// 1) how many git repositories are these docs from?
|
// 1) how many git repositories are these docs from?
|
||||||
// 2) how many docs are from each git repository?
|
// 2) how many docs are from each git repository?
|
||||||
func GitRepositorySummary(docs []*doc.Document, msgPrefix string) {
|
func GitRepositorySummary(docs []*doc.KustomizationDocument, fileType string) {
|
||||||
m := make(map[string]int)
|
m := make(map[string]int)
|
||||||
for _, d := range docs {
|
for _, d := range docs {
|
||||||
if _, ok := m[d.RepositoryURL]; ok {
|
if _, ok := m[d.RepositoryURL]; ok {
|
||||||
@@ -105,65 +103,16 @@ func GitRepositorySummary(docs []*doc.Document, msgPrefix string) {
|
|||||||
m[d.RepositoryURL] = 1
|
m[d.RepositoryURL] = 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sortedKeys := SortMapKeyByValue(m)
|
sortedKeys := SortMapKeyByValueInt(m)
|
||||||
|
topN := 10
|
||||||
|
i := 0
|
||||||
for _, k := range sortedKeys {
|
for _, k := range sortedKeys {
|
||||||
fmt.Printf("%d %s are from %s\n", m[k], msgPrefix, k)
|
if i >= topN {
|
||||||
}
|
break
|
||||||
}
|
|
||||||
|
|
||||||
// ExistInIndex goes through each Document in docs, and check whether it is in the index or not.
|
|
||||||
// It returns the number of documents which does not exist in the index.
|
|
||||||
func ExistInIndex(idx *index.KustomizeIndex, docs []*doc.Document, msgPrefix string) int {
|
|
||||||
nonExistCount := 0
|
|
||||||
for _, d := range docs {
|
|
||||||
exists, err := idx.Exists(d.ID())
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
if !exists {
|
|
||||||
log.Printf("%s (%s) does not exist in the index", msgPrefix, d.Path())
|
|
||||||
nonExistCount++
|
|
||||||
}
|
}
|
||||||
|
fmt.Printf("%d %s are from %s\n", m[k], fileType, k)
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
return nonExistCount
|
|
||||||
}
|
|
||||||
|
|
||||||
// DocumentTypeSummary goes through each doc in docs, and determines whether it is a file or dir.
|
|
||||||
func DocumentTypeSummary(ctx context.Context, docs []*doc.Document) (
|
|
||||||
fileCount, dirCount int, files, dirs []*doc.Document) {
|
|
||||||
githubToken := os.Getenv(githubAccessTokenVar)
|
|
||||||
if githubToken == "" {
|
|
||||||
log.Fatalf("Must set the variable '%s' to make github requests.\n",
|
|
||||||
githubAccessTokenVar)
|
|
||||||
}
|
|
||||||
ghCrawler := github.NewCrawler(githubToken, retryCount, &http.Client{}, github.QueryWith())
|
|
||||||
|
|
||||||
for _, d := range docs {
|
|
||||||
oldFilePath := d.FilePath
|
|
||||||
if err := ghCrawler.FetchDocument(ctx, d); err != nil {
|
|
||||||
log.Printf("FetchDocument failed on %s: %v", d.Path(), err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if d.FilePath == oldFilePath {
|
|
||||||
fileCount++
|
|
||||||
files = append(files, d)
|
|
||||||
} else {
|
|
||||||
dirCount++
|
|
||||||
dirs = append(dirs, d)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fileCount, dirCount, files, dirs
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExistInSlice checks where target exits in items.
|
|
||||||
func ExistInSlice(items []string, target string) bool {
|
|
||||||
for _, item := range items {
|
|
||||||
if item == target {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -205,11 +154,11 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
|||||||
// ids tracks the unique IDs of the documents in the index
|
// ids tracks the unique IDs of the documents in the index
|
||||||
ids := make(map[string]struct{})
|
ids := make(map[string]struct{})
|
||||||
|
|
||||||
// generatorDocs includes all the docs using generators
|
// generatorFiles include all the non-kustomization files whose FileType is generator
|
||||||
generatorDocs := make([]*doc.Document, 0)
|
generatorFiles := make([]*doc.KustomizationDocument, 0)
|
||||||
|
|
||||||
// transformersDocs includes all the docs using transformers
|
// transformersFiles include all the non-kustomization files whose FileType is transformer
|
||||||
transformersDocs := make([]*doc.Document, 0)
|
transformersFiles := make([]*doc.KustomizationDocument, 0)
|
||||||
|
|
||||||
checksums := make(map[string]int)
|
checksums := make(map[string]int)
|
||||||
|
|
||||||
@@ -239,11 +188,13 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
|||||||
if doc.IsKustomizationFile(hit.Document.FilePath) {
|
if doc.IsKustomizationFile(hit.Document.FilePath) {
|
||||||
kustomizationFilecount++
|
kustomizationFilecount++
|
||||||
iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap)
|
iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap)
|
||||||
if ExistInSlice(hit.Document.Identifiers, "generators") {
|
|
||||||
generatorDocs = append(generatorDocs, hit.Document.Copy())
|
} else {
|
||||||
}
|
switch hit.Document.FileType {
|
||||||
if ExistInSlice(hit.Document.Identifiers, "transformers") {
|
case "generator":
|
||||||
transformersDocs = append(transformersDocs, hit.Document.Copy())
|
generatorFiles = append(generatorFiles, hit.Document.Copy())
|
||||||
|
case "transformer":
|
||||||
|
transformersFiles = append(transformersFiles, hit.Document.Copy())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -253,9 +204,9 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
|||||||
log.Fatalf("Error iterating: %v\n", err)
|
log.Fatalf("Error iterating: %v\n", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
sortedKindsMapKeys := SortMapKeyByValue(kindsMap)
|
sortedKindsMapKeys := SortMapKeyByValueInt(kindsMap)
|
||||||
sortedIdentifiersMapKeys := SortMapKeyByValue(identifiersMap)
|
sortedIdentifiersMapKeys := SortMapKeyByValueInt(identifiersMap)
|
||||||
sortedKustomizeIdentifiersMapKeys := SortMapKeyByValue(kustomizeIdentifiersMap)
|
sortedKustomizeIdentifiersMapKeys := SortMapKeyByValueInt(kustomizeIdentifiersMap)
|
||||||
|
|
||||||
fmt.Printf(`The count of unique document IDs in the kustomize index: %d
|
fmt.Printf(`The count of unique document IDs in the kustomize index: %d
|
||||||
There are %d documents in the kustomize index.
|
There are %d documents in the kustomize index.
|
||||||
@@ -290,11 +241,11 @@ There are %d documents in the kustomize index.
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GeneratorOrTransformerStats(ctx, generatorDocs, true, idx)
|
GeneratorOrTransformerStats(generatorFiles)
|
||||||
GeneratorOrTransformerStats(ctx, transformersDocs, false, idx)
|
GeneratorOrTransformerStats(transformersFiles)
|
||||||
|
|
||||||
fmt.Printf("There are total %d checksums of document contents\n", len(checksums))
|
fmt.Printf("There are total %d checksums of document contents\n", len(checksums))
|
||||||
sortedChecksums := SortMapKeyByValue(checksums)
|
sortedChecksums := SortMapKeyByValueInt(checksums)
|
||||||
sortedChecksums = sortedChecksums[:20]
|
sortedChecksums = sortedChecksums[:20]
|
||||||
fmt.Printf("The top 20 checksums are:\n")
|
fmt.Printf("The top 20 checksums are:\n")
|
||||||
for _, key := range sortedChecksums {
|
for _, key := range sortedChecksums {
|
||||||
|
|||||||
Reference in New Issue
Block a user