From e2b56910f91fdb5bd86a81efab7edd36d6750b86 Mon Sep 17 00:00:00 2001 From: Haiyan Meng Date: Fri, 20 Dec 2019 10:02:27 -0800 Subject: [PATCH] Add ElasticSearch query examples --- .../crawl/search_cmds/creationTime.md | 180 ++++++++++++++++++ .../crawl/search_cmds/defaultBranch.md | 32 ++++ .../crawl/search_cmds/fieldExistence.md | 55 ++++++ .../crawl/search_cmds/keyword_search.md | 66 +++++++ api/internal/crawl/search_cmds/misc.md | 19 ++ .../crawl/search_cmds/repositoryUrl.md | 125 ++++++++++++ api/internal/crawl/search_cmds/text_search.md | 148 ++++++++++++++ 7 files changed, 625 insertions(+) create mode 100644 api/internal/crawl/search_cmds/creationTime.md create mode 100644 api/internal/crawl/search_cmds/defaultBranch.md create mode 100644 api/internal/crawl/search_cmds/fieldExistence.md create mode 100644 api/internal/crawl/search_cmds/keyword_search.md create mode 100644 api/internal/crawl/search_cmds/misc.md create mode 100644 api/internal/crawl/search_cmds/repositoryUrl.md create mode 100644 api/internal/crawl/search_cmds/text_search.md diff --git a/api/internal/crawl/search_cmds/creationTime.md b/api/internal/crawl/search_cmds/creationTime.md new file mode 100644 index 000000000..4029e4b0b --- /dev/null +++ b/api/internal/crawl/search_cmds/creationTime.md @@ -0,0 +1,180 @@ +Find out the largest value of the `creationTime` field: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "aggs" : { + "max_creationTime" : { "max" : { "field" : "creationTime" } } + } +} +' +``` + +Find out the smallest value of the `creationTime` field: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "aggs" : { + "min_creationTime" : { "min" : { "field" : "creationTime" } } + } +} +' +``` + +Find out the smallest value of the `creationTime` field of all the kustomization files: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "min_creationTime" : { "min" : { "field" : "creationTime" } } + } +} +' +``` + +Find out the smallest value of the `creationTime` field of all kustomize resource files: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "must_not": { + "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" } + } + } + }, + "aggs" : { + "min_creationTime" : { "min" : { "field" : "creationTime" } } + } +} +' +``` + +Query all the documents whose `creationTime` <= `2016-07-29T17:38:26.000Z`: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "range": { + "creationTime": { + "lte": "2016-07-29T17:38:26.000Z" + } + } + } +} +' +``` + +Query all the documents whose `creationTime` falls within the specific range: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "range": { + "creationTime": { + "gte": "2016-07-29T17:38:26.000Z", + "lte": "2016-08-29T17:38:26.000Z" + } + } + } +} +' +``` + +Aggregate how many new kustomization files were added into Github each month: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "newFiles_over_time" : { + "date_histogram" : { + "field" : "creationTime", + "interval" : "month" + } + } + } +} +' +``` + +Aggregate how many new kustomize resource files were added into Github each month: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "must_not": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "newFiles_over_time" : { + "date_histogram" : { + "field" : "creationTime", + "interval" : "month" + } + } + } +} +' +``` + +Aggregate how many new kustomization files were added into Github each year: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "newFiles_over_time" : { + "date_histogram" : { + "field" : "creationTime", + "interval" : "year" + } + } + } +} +' +``` + +Aggregate how many new kustomize resource files were added into Github each year: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "must_not": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "newFiles_over_time" : { + "date_histogram" : { + "field" : "creationTime", + "interval" : "year" + } + } + } +} +' +``` \ No newline at end of file diff --git a/api/internal/crawl/search_cmds/defaultBranch.md b/api/internal/crawl/search_cmds/defaultBranch.md new file mode 100644 index 000000000..89822d4c6 --- /dev/null +++ b/api/internal/crawl/search_cmds/defaultBranch.md @@ -0,0 +1,32 @@ +Count distinct values of the `defaultBranch` field: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "aggs" : { + "defaultBranch_count" : { + "cardinality" : { + "field" : "defaultBranch", + "precision_threshold": 40000 + } + } + } +} +' +``` + +List all the github branches where kustomization files and kustomize resource files live, +and how many kustomization files and kustomize resource files live in each branch: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "aggs" : { + "defaultBranch" : { + "terms" : { + "field" : "defaultBranch", + "size": 41 + } + } + } +} +' +``` \ No newline at end of file diff --git a/api/internal/crawl/search_cmds/fieldExistence.md b/api/internal/crawl/search_cmds/fieldExistence.md new file mode 100644 index 000000000..591abdbff --- /dev/null +++ b/api/internal/crawl/search_cmds/fieldExistence.md @@ -0,0 +1,55 @@ +Count the documents whose `document` field is empty (The reason why the `document` field +of a document is empty is because of empty documents): +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "size": 10000, + "query": { + "bool": { + "must_not": { + "exists": { + "field": "document" + } + } + } + } +} +' +``` + +Find all the documents having the `creationTime` field set: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "exists": { + "field": "creationTime" + } + } +} +' +``` + +Find all the documents whose `creationTime` field is not set: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "size": 10000, + "query": { + "bool": { + "must_not": { + "exists": { + "field": "creationTime" + } + } + } + } +} +' +``` + +The following fields of a document in the kustomize index are always non-empty: +`repositoryUrl`, `filePath`, `defaultBranch`. + +The following fields of a document in the kustomize index may be empty: +`kinds`, `identifiers`, `values`. diff --git a/api/internal/crawl/search_cmds/keyword_search.md b/api/internal/crawl/search_cmds/keyword_search.md new file mode 100644 index 000000000..db703e91a --- /dev/null +++ b/api/internal/crawl/search_cmds/keyword_search.md @@ -0,0 +1,66 @@ +Count the documents in the index whose `repositoryUrl` field starts with +`https://github.com/`: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "repositoryUrl": "https://github.com/.*" }} + ] + } + } +} +' +``` + +Count the documents in the index whose `repositoryUrl` field does not start with +`https://github.com/`: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "must_not": [ + { "regexp": { "repositoryUrl": "https://github.com/.*" }} + ] + } + } +} +' +``` + +Search all the documents matching the given `repositoryUrl` and `filePath`, and return +a version for each search hit: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "size": 10000, + "version": true, + "query": { + "bool": { + "filter": [ + { "regexp": { "repositoryUrl": "git@github.com:talos-systems/talos-controller-manager" }}, + { "regexp": { "filePath": "hack/config.*" }} + ] + } + } +} +' +``` + +Search all the documents whose filePath ends with one of these following three filenames: +`kustomization.yaml`, `kustomization.yml`, `kustomization`: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + } +} +' +``` \ No newline at end of file diff --git a/api/internal/crawl/search_cmds/misc.md b/api/internal/crawl/search_cmds/misc.md new file mode 100644 index 000000000..ac2659d62 --- /dev/null +++ b/api/internal/crawl/search_cmds/misc.md @@ -0,0 +1,19 @@ +Check the health status of an ElasticSearch cluster: +``` +curl -X GET "${ElasticSearchURL}:9200/_cat/health?v&pretty" +``` + +Check the indices in an ElasticSearch cluster: +``` +curl "${ElasticSearchURL}:9200/_cat/indices?v" +``` + +Get the mapping of the index: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_mapping?pretty" +``` + +Delete the kustomize index from the ElasticSearch cluster (**Use this command with caution**): +``` +curl -X DELETE "${ElasticSearchURL}:9200/kustomize?pretty" +``` \ No newline at end of file diff --git a/api/internal/crawl/search_cmds/repositoryUrl.md b/api/internal/crawl/search_cmds/repositoryUrl.md new file mode 100644 index 000000000..ef7802e04 --- /dev/null +++ b/api/internal/crawl/search_cmds/repositoryUrl.md @@ -0,0 +1,125 @@ +Count distinct values of the `repositoryUrl` field: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "aggs" : { + "repositoryUrl_count" : { + "cardinality" : { + "field" : "repositoryUrl", + "precision_threshold": 40000 + } + } + } +} +' +``` + +Count how many Github repositories include kustomization files: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "repositoryUrl_count" : { + "cardinality" : { + "field" : "repositoryUrl", + "precision_threshold": 40000 + } + } + } +} +' +``` + +Count how many Github repositories include kustomize resource files: +``` +curl -X POST "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "must_not": { + "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" } + } + } + }, + "aggs" : { + "repositoryUrl_count" : { + "cardinality" : { + "field" : "repositoryUrl", + "precision_threshold": 40000 + } + } + } +} +' +``` + +List all the github repositories including kustomization files and kustomize resource files, +and how many kustomization files and kustomize resource files each github repository includes +(the github repository including the most kustomization files is listed first): +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "aggs" : { + "repositoryUrl" : { + "terms" : { + "field" : "repositoryUrl", + "size": 2082 + } + } + } +} +' +``` + +List the top 20 Github repositories including the most amount of kustomization files: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" }} + ] + } + }, + "aggs" : { + "repositoryUrl" : { + "terms" : { + "field" : "repositoryUrl", + "size": 20 + } + } + } +} +' +``` + +List the top 20 Github repositories including the most amount of kustomize resource files: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?size=0&pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "must_not": { + "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)/*" } + } + } + }, + "aggs" : { + "repositoryUrl" : { + "terms" : { + "field" : "repositoryUrl", + "size": 20 + } + } + } +} +' +``` \ No newline at end of file diff --git a/api/internal/crawl/search_cmds/text_search.md b/api/internal/crawl/search_cmds/text_search.md new file mode 100644 index 000000000..37a7701b5 --- /dev/null +++ b/api/internal/crawl/search_cmds/text_search.md @@ -0,0 +1,148 @@ +Search for all the kustomize resource files including a Deployment object: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "match" : { + "kinds" : { + "query" : "Deployment" + } + } + } +} +' +``` + +Search for all the kustomize resource files including a Deployment object, but only +including the `kinds` field in the result: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "_source": { + "includes": ["kinds"] + }, + "query": { + "match" : { + "kinds" : { + "query" : "Deployment" + } + } + } +} +' +``` + +Search for all the kustomize resource files including both a Deployment object and +a Service object: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "match" : { + "kinds" : { + "query" : "Deployment Service", + "operator" : "and" + } + } + } +} +' +``` + +Count the number of documents including Deployment and the number of documents +including Service: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "size": 0, + "aggs" : { + "messages" : { + "filters" : { + "filters" : { + "Deployment" : { "match" : { "kinds" : "Deployment" }}, + "Service" : { "match" : { "kinds" : "Service" }} + } + } + } + } +} +' +``` + +Search for all the kustomization files involving CRDs: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "size": 10000, + "query": { + "match" : { + "identifiers" : { + "query" : "crds" + } + } + } +} +' +``` + +Search for all the kustomization files defining configMapGenerator: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "size": 10000, + "query": { + "match" : { + "identifiers" : { + "query" : "configMapGenerator" + } + } + } +} +' +``` + +Search for all the documents having a `kind` field: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "match" : { "identifiers" : { "query" : "kind" }}} + ] + } + } +} +' +``` + +Search for all the kuostmization files having a `kind` field: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "bool": { + "filter": [ + { "regexp": { "filePath": ".*/kustomization((.yaml)?|(.yml)?)" }}, + { "match" : { "identifiers" : { "query" : "kind" }}} + ] + } + } +} +' +``` + +Search for all the kustomization files defining the `generatorOptions:disableNameSuffixHash` feature: +``` +curl -X GET "${ElasticSearchURL}:9200/kustomize/_search?pretty" -H 'Content-Type: application/json' -d' +{ + "query": { + "match" : { + "identifiers" : { + "query" : "generatorOptions:disableNameSuffixHash" + } + } + } +} +' +``` \ No newline at end of file