mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-06-11 17:12:51 +00:00
Improve the analysis on generator and transformer
This commit is contained in:
@@ -6,13 +6,9 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/crawler/github"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/doc"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/internal/crawl/index"
|
||||
@@ -35,9 +31,9 @@ func iterateArr(arr []string, countMap map[string]int) {
|
||||
|
||||
}
|
||||
|
||||
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
|
||||
// SortMapKeyByValueInt takes a map as its input, sorts its keys according to their values
|
||||
// in the map, and outputs the sorted keys as a slice.
|
||||
func SortMapKeyByValue(m map[string]int) []string {
|
||||
func SortMapKeyByValueInt(m map[string]int) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for key := range m {
|
||||
keys = append(keys, key)
|
||||
@@ -47,56 +43,58 @@ func SortMapKeyByValue(m map[string]int) []string {
|
||||
return keys
|
||||
}
|
||||
|
||||
func GeneratorOrTransformerStats(ctx context.Context,
|
||||
docs []*doc.Document, isGenerator bool, idx *index.KustomizeIndex) {
|
||||
// SortMapKeyByValue takes a map as its input, sorts its keys according to their values
|
||||
// in the map, and outputs the sorted keys as a slice.
|
||||
func SortMapKeyByValueLen(m map[string][]string) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for key := range m {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
// sort keys according to their values in the map m
|
||||
sort.Slice(keys, func(i, j int) bool { return len(m[keys[i]]) > len(m[keys[j]]) })
|
||||
return keys
|
||||
}
|
||||
|
||||
fieldName := "generators"
|
||||
if !isGenerator {
|
||||
fieldName = "transformers"
|
||||
func GeneratorOrTransformerStats(docs []*doc.KustomizationDocument) {
|
||||
n := len(docs)
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// allReferredDocs includes all the documents referred in the field
|
||||
allReferredDocs := doc.NewUniqueDocuments()
|
||||
fileType := docs[0].FileType
|
||||
fmt.Printf("There are totally %d %s files.\n", n, fileType)
|
||||
|
||||
// docUsingGeneratorCount counts the number of the kustomization files using generators or transformers
|
||||
docCount := 0
|
||||
GitRepositorySummary(docs, fileType)
|
||||
|
||||
// key of kindToUrls: a string in the KustomizationDocument.Kinds field
|
||||
// value of kindToUrls: a slice of string urls defining a given kind.
|
||||
kindToUrls := make(map[string][]string)
|
||||
|
||||
// collect all the documents referred in the field
|
||||
for _, d := range docs {
|
||||
kdoc := doc.KustomizationDocument{
|
||||
Document: *d,
|
||||
}
|
||||
referredDocs, err := kdoc.GetResources(false, !isGenerator, isGenerator)
|
||||
if err != nil {
|
||||
log.Printf("failed to parse the %s field of the Document (%s): %v",
|
||||
fieldName, d.Path(), err)
|
||||
}
|
||||
if len(referredDocs) > 0 {
|
||||
docCount++
|
||||
allReferredDocs.AddDocuments(referredDocs)
|
||||
url := fmt.Sprintf("%s/blob/%s/%s", d.RepositoryURL, d.DefaultBranch, d.FilePath)
|
||||
for _, kind := range d.Kinds {
|
||||
if _, ok := kindToUrls[kind]; !ok {
|
||||
kindToUrls[kind] = []string{url}
|
||||
} else {
|
||||
kindToUrls[kind] = append(kindToUrls[kind], url)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Printf("There are totally %d kinds of %s\n", len(kindToUrls), fileType)
|
||||
sortedKeys := SortMapKeyByValueLen(kindToUrls)
|
||||
for _, k := range sortedKeys {
|
||||
sort.Strings(kindToUrls[k])
|
||||
fmt.Printf("%s kind %s appears %d times\n", fileType, k, len(kindToUrls[k]))
|
||||
for _, url := range kindToUrls[k] {
|
||||
fmt.Printf("%s\n", url)
|
||||
}
|
||||
}
|
||||
|
||||
fileCount, dirCount, fileTypeDocs, dirTypeDocs := DocumentTypeSummary(ctx, allReferredDocs.Documents())
|
||||
|
||||
// check whether any of the files are not in the index
|
||||
nonExistFileCount := ExistInIndex(idx, fileTypeDocs, fieldName + " file ")
|
||||
// check whether any of the dirs are not in the index
|
||||
nonExistDirCount := ExistInIndex(idx, dirTypeDocs, fieldName + " dir ")
|
||||
|
||||
GitRepositorySummary(fileTypeDocs, fieldName + " files")
|
||||
GitRepositorySummary(dirTypeDocs, fieldName + " dirs")
|
||||
|
||||
fmt.Printf("%d kustomization files use %s: %d %s are files and %d %s are dirs.\n",
|
||||
docCount, fieldName, fileCount, fieldName, dirCount, fieldName)
|
||||
fmt.Printf("%d %s files do not exist in the index\n", nonExistFileCount, fieldName)
|
||||
fmt.Printf("%d %s dirs do not exist in the index\n", nonExistDirCount, fieldName)
|
||||
}
|
||||
|
||||
// GitRepositorySummary counts the distribution of docs:
|
||||
// 1) how many git repositories are these docs from?
|
||||
// 2) how many docs are from each git repository?
|
||||
func GitRepositorySummary(docs []*doc.Document, msgPrefix string) {
|
||||
func GitRepositorySummary(docs []*doc.KustomizationDocument, fileType string) {
|
||||
m := make(map[string]int)
|
||||
for _, d := range docs {
|
||||
if _, ok := m[d.RepositoryURL]; ok {
|
||||
@@ -105,65 +103,16 @@ func GitRepositorySummary(docs []*doc.Document, msgPrefix string) {
|
||||
m[d.RepositoryURL] = 1
|
||||
}
|
||||
}
|
||||
sortedKeys := SortMapKeyByValue(m)
|
||||
sortedKeys := SortMapKeyByValueInt(m)
|
||||
topN := 10
|
||||
i := 0
|
||||
for _, k := range sortedKeys {
|
||||
fmt.Printf("%d %s are from %s\n", m[k], msgPrefix, k)
|
||||
}
|
||||
}
|
||||
|
||||
// ExistInIndex goes through each Document in docs, and check whether it is in the index or not.
|
||||
// It returns the number of documents which does not exist in the index.
|
||||
func ExistInIndex(idx *index.KustomizeIndex, docs []*doc.Document, msgPrefix string) int {
|
||||
nonExistCount := 0
|
||||
for _, d := range docs {
|
||||
exists, err := idx.Exists(d.ID())
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
if !exists {
|
||||
log.Printf("%s (%s) does not exist in the index", msgPrefix, d.Path())
|
||||
nonExistCount++
|
||||
if i >= topN {
|
||||
break
|
||||
}
|
||||
fmt.Printf("%d %s are from %s\n", m[k], fileType, k)
|
||||
i++
|
||||
}
|
||||
return nonExistCount
|
||||
}
|
||||
|
||||
// DocumentTypeSummary goes through each doc in docs, and determines whether it is a file or dir.
|
||||
func DocumentTypeSummary(ctx context.Context, docs []*doc.Document) (
|
||||
fileCount, dirCount int, files, dirs []*doc.Document) {
|
||||
githubToken := os.Getenv(githubAccessTokenVar)
|
||||
if githubToken == "" {
|
||||
log.Fatalf("Must set the variable '%s' to make github requests.\n",
|
||||
githubAccessTokenVar)
|
||||
}
|
||||
ghCrawler := github.NewCrawler(githubToken, retryCount, &http.Client{}, github.QueryWith())
|
||||
|
||||
for _, d := range docs {
|
||||
oldFilePath := d.FilePath
|
||||
if err := ghCrawler.FetchDocument(ctx, d); err != nil {
|
||||
log.Printf("FetchDocument failed on %s: %v", d.Path(), err)
|
||||
continue
|
||||
}
|
||||
|
||||
if d.FilePath == oldFilePath {
|
||||
fileCount++
|
||||
files = append(files, d)
|
||||
} else {
|
||||
dirCount++
|
||||
dirs = append(dirs, d)
|
||||
}
|
||||
}
|
||||
return fileCount, dirCount, files, dirs
|
||||
}
|
||||
|
||||
// ExistInSlice checks where target exits in items.
|
||||
func ExistInSlice(items []string, target string) bool {
|
||||
for _, item := range items {
|
||||
if item == target {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func main() {
|
||||
@@ -205,11 +154,11 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
||||
// ids tracks the unique IDs of the documents in the index
|
||||
ids := make(map[string]struct{})
|
||||
|
||||
// generatorDocs includes all the docs using generators
|
||||
generatorDocs := make([]*doc.Document, 0)
|
||||
// generatorFiles include all the non-kustomization files whose FileType is generator
|
||||
generatorFiles := make([]*doc.KustomizationDocument, 0)
|
||||
|
||||
// transformersDocs includes all the docs using transformers
|
||||
transformersDocs := make([]*doc.Document, 0)
|
||||
// transformersFiles include all the non-kustomization files whose FileType is transformer
|
||||
transformersFiles := make([]*doc.KustomizationDocument, 0)
|
||||
|
||||
checksums := make(map[string]int)
|
||||
|
||||
@@ -239,11 +188,13 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
||||
if doc.IsKustomizationFile(hit.Document.FilePath) {
|
||||
kustomizationFilecount++
|
||||
iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap)
|
||||
if ExistInSlice(hit.Document.Identifiers, "generators") {
|
||||
generatorDocs = append(generatorDocs, hit.Document.Copy())
|
||||
}
|
||||
if ExistInSlice(hit.Document.Identifiers, "transformers") {
|
||||
transformersDocs = append(transformersDocs, hit.Document.Copy())
|
||||
|
||||
} else {
|
||||
switch hit.Document.FileType {
|
||||
case "generator":
|
||||
generatorFiles = append(generatorFiles, hit.Document.Copy())
|
||||
case "transformer":
|
||||
transformersFiles = append(transformersFiles, hit.Document.Copy())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -253,9 +204,9 @@ If you only want to list the 10 most popular features, set the flag to 10.`)
|
||||
log.Fatalf("Error iterating: %v\n", err)
|
||||
}
|
||||
|
||||
sortedKindsMapKeys := SortMapKeyByValue(kindsMap)
|
||||
sortedIdentifiersMapKeys := SortMapKeyByValue(identifiersMap)
|
||||
sortedKustomizeIdentifiersMapKeys := SortMapKeyByValue(kustomizeIdentifiersMap)
|
||||
sortedKindsMapKeys := SortMapKeyByValueInt(kindsMap)
|
||||
sortedIdentifiersMapKeys := SortMapKeyByValueInt(identifiersMap)
|
||||
sortedKustomizeIdentifiersMapKeys := SortMapKeyByValueInt(kustomizeIdentifiersMap)
|
||||
|
||||
fmt.Printf(`The count of unique document IDs in the kustomize index: %d
|
||||
There are %d documents in the kustomize index.
|
||||
@@ -290,11 +241,11 @@ There are %d documents in the kustomize index.
|
||||
}
|
||||
}
|
||||
|
||||
GeneratorOrTransformerStats(ctx, generatorDocs, true, idx)
|
||||
GeneratorOrTransformerStats(ctx, transformersDocs, false, idx)
|
||||
GeneratorOrTransformerStats(generatorFiles)
|
||||
GeneratorOrTransformerStats(transformersFiles)
|
||||
|
||||
fmt.Printf("There are total %d checksums of document contents\n", len(checksums))
|
||||
sortedChecksums := SortMapKeyByValue(checksums)
|
||||
sortedChecksums := SortMapKeyByValueInt(checksums)
|
||||
sortedChecksums = sortedChecksums[:20]
|
||||
fmt.Printf("The top 20 checksums are:\n")
|
||||
for _, key := range sortedChecksums {
|
||||
|
||||
Reference in New Issue
Block a user