From 594a3bf0d2f7d9ac7777db28187f6d8ed8b919d3 Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Fri, 20 Dec 2019 10:01:15 -0800 Subject: [PATCH] Add a binary for generating the stats of the index 1) how many kinds of objects are being customized? 2) how many times is every kind of object customized? 3) how many kustomization features are being used? 4) how many times is every kustomization feature used? --- .../crawl/cmd/kustomize_stats/Dockerfile | 14 ++ .../crawl/cmd/kustomize_stats/main.go | 150 ++++++++++++++++++ .../config/crawler/kustomize_stats/job.yaml | 20 +++ .../kustomize_stats/kustomization.yaml | 3 + 4 files changed, 187 insertions(+) create mode 100644 api/internal/crawl/cmd/kustomize_stats/Dockerfile create mode 100644 api/internal/crawl/cmd/kustomize_stats/main.go create mode 100644 api/internal/crawl/config/crawler/kustomize_stats/job.yaml create mode 100644 api/internal/crawl/config/crawler/kustomize_stats/kustomization.yaml diff --git a/api/internal/crawl/cmd/kustomize_stats/Dockerfile b/api/internal/crawl/cmd/kustomize_stats/Dockerfile new file mode 100644 index 000000000..80e33948f --- /dev/null +++ b/api/internal/crawl/cmd/kustomize_stats/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.11 AS build + +ARG GO111MODULE=on + +WORKDIR /go/src/sigs.k8s.io/kustomize/api/internal/crawl +COPY . /go/src/sigs.k8s.io/kustomize/api/internal/crawl + +RUN go mod download +RUN CGO_ENABLED=0 go install ./cmd/kustomize_stats + +FROM scratch +COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ +COPY --from=build /go/bin/kustomize_stats / +ENTRYPOINT ["/kustomize_stats"] diff --git a/api/internal/crawl/cmd/kustomize_stats/main.go b/api/internal/crawl/cmd/kustomize_stats/main.go new file mode 100644 index 000000000..403989d86 --- /dev/null +++ b/api/internal/crawl/cmd/kustomize_stats/main.go @@ -0,0 +1,150 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log" + "path/filepath" + "sort" + "time" + + "sigs.k8s.io/kustomize/api/internal/crawl/index" + "sigs.k8s.io/kustomize/api/konfig" +) + +// iterateArr adds each item in arr into countMap. +func iterateArr(arr []string, countMap map[string]int) { + for _, item := range arr { + if _, ok := countMap[item]; !ok { + countMap[item] = 1 + } else { + countMap[item]++ + } + } + +} + +// isKustomizationFile determines whether a file path is a kustomization file +func isKustomizationFile(path string) bool { + basename := filepath.Base(path) + for _, name := range konfig.RecognizedKustomizationFileNames() { + if basename == name { + return true + } + } + return false +} + +// SortMapKeyByValue takes a map as its input, sorts its keys according to their values +// in the map, and outputs the sorted keys as a slice. +func SortMapKeyByValue(m map[string]int) []string { + keys := make([]string, 0, len(m)) + for key := range m { + keys = append(keys, key) + } + // sort keys according to their values in the map m + sort.Slice(keys, func(i, j int) bool {return m[keys[i]] > m[keys[j]]}) + return keys +} + +func main() { + topKindsPtr := flag.Int( + "kinds", -1, + `the number of kubernetes object kinds to be listed according to their popularities. +By default, all the kinds will be listed. +If you only want to list the 10 most popular kinds, set the flag to 10.`) + topIdentifiersPtr := flag.Int( + "identifiers", -1, + `the number of identifiers to be listed according to their popularities. +By default, all the identifiers will be listed. +If you only want to list the 10 most popular identifiers, set the flag to 10.`) + topKustomizeFeaturesPtr := flag.Int( + "kustomize-features", -1, + `the number of kustomize features to be listed according to their popularities. +By default, all the features will be listed. +If you only want to list the 10 most popular features, set the flag to 10.`) + flag.Parse() + + ctx := context.Background() + idx, err := index.NewKustomizeIndex(ctx) + if err != nil { + log.Fatalf("Could not create an index: %v\n", err) + } + + // count tracks the number of documents in the index + count := 0 + + // kustomizationFilecount tracks the number of kustomization files in the index + kustomizationFilecount := 0 + + kindsMap := make(map[string]int) + identifiersMap := make(map[string]int) + kustomizeIdentifiersMap := make(map[string]int) + + // ids tracks the unique IDs of the documents in the index + ids := make(map[string]struct{}) + + // get all the documents in the index + query := []byte(`{ "query":{ "match_all":{} } }`) + it := idx.IterateQuery(query, 10000, 60*time.Second) + for it.Next() { + for _, hit := range it.Value().Hits.Hits { + // check whether there is any duplicate IDs in the index + if _, ok := ids[hit.ID]; !ok { + ids[hit.ID] = struct{}{} + } else { + fmt.Printf("Found duplicate ID (%s)\n", hit.ID) + } + + count++ + iterateArr(hit.Document.Kinds, kindsMap) + iterateArr(hit.Document.Identifiers, identifiersMap) + + if isKustomizationFile(hit.Document.FilePath) { + kustomizationFilecount++ + iterateArr(hit.Document.Identifiers, kustomizeIdentifiersMap) + } + } + } + if err := it.Err(); err != nil { + fmt.Printf("Error iterating: %v\n", err) + } + + sortedKindsMapKeys := SortMapKeyByValue(kindsMap) + sortedIdentifiersMapKeys := SortMapKeyByValue(identifiersMap) + sortedKustomizeIdentifiersMapKeys := SortMapKeyByValue(kustomizeIdentifiersMap) + + fmt.Printf(`The count of unique document IDs in the kustomize index: %d +There are %d documents in the kustomize index. +%d kinds of kubernetes objects are customized:`, len(ids), count, len(kindsMap)) + fmt.Printf("\n") + + kindCount := 0 + for _, key := range sortedKindsMapKeys { + if *topKindsPtr < 0 || (*topKindsPtr >= 0 && kindCount < *topKindsPtr) { + fmt.Printf("\tkind `%s` is customimzed in %d documents\n", key, kindsMap[key]) + kindCount++ + } + } + + fmt.Printf("%d kinds of identifiers are found:\n", len(identifiersMap)) + identifierCount := 0 + for _, key := range sortedIdentifiersMapKeys { + if *topIdentifiersPtr < 0 || (*topIdentifiersPtr >= 0 && identifierCount < *topIdentifiersPtr) { + fmt.Printf("\tidentifier `%s` appears in %d documents\n", key, identifiersMap[key]) + identifierCount++ + } + } + + fmt.Printf(`There are %d kustomization files in the kustomize index. +%d kinds of kustomize features are found:`, kustomizationFilecount, len(kustomizeIdentifiersMap)) + fmt.Printf("\n") + kustomizeFeatureCount := 0 + for _, key := range sortedKustomizeIdentifiersMapKeys { + if *topKustomizeFeaturesPtr < 0 || (*topKustomizeFeaturesPtr >= 0 && kustomizeFeatureCount < *topKustomizeFeaturesPtr) { + fmt.Printf("\tfeature `%s` is used in %d documents\n", key, kustomizeIdentifiersMap[key]) + kustomizeFeatureCount++ + } + } +} diff --git a/api/internal/crawl/config/crawler/kustomize_stats/job.yaml b/api/internal/crawl/config/crawler/kustomize_stats/job.yaml new file mode 100644 index 000000000..e7d22cd51 --- /dev/null +++ b/api/internal/crawl/config/crawler/kustomize_stats/job.yaml @@ -0,0 +1,20 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: kustomize-stats +spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: kustomize-stats + image: gcr.io/haiyanmeng-gke-dev/kustomize_stats:v1 + imagePullPolicy: Always + command: ["/kustomize_stats"] + args: ["--kinds=50", "--identifiers=50", "--kustomize-features=50"] + env: + - name: ELASTICSEARCH_URL + valueFrom: + configMapKeyRef: + name: elasticsearch-config + key: es-url diff --git a/api/internal/crawl/config/crawler/kustomize_stats/kustomization.yaml b/api/internal/crawl/config/crawler/kustomize_stats/kustomization.yaml new file mode 100644 index 000000000..a2933dd58 --- /dev/null +++ b/api/internal/crawl/config/crawler/kustomize_stats/kustomization.yaml @@ -0,0 +1,3 @@ +resources: +- ../base +- job.yaml