mirror of
https://github.com/kubernetes-sigs/kustomize.git
synced 2026-05-18 03:05:28 +00:00
Move hacks to hack (match k8s pattern).
This commit is contained in:
16
hack/awker.sh
Executable file
16
hack/awker.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
function showDeps {
|
||||
echo "==== begin $1 =================================="
|
||||
find $1 -name "*.go" |\
|
||||
xargs grep \"sigs.k8s.io/kustomize/??? |\
|
||||
grep -v "/api/" |\
|
||||
sed 's|"sigs.k8s.io/kustomize/v3/||' |\
|
||||
awk '{ printf "%40s %s\n", $2, $1 }' |\
|
||||
sed 's|" \./| |' |\
|
||||
sed 's|:$||' |\
|
||||
sort | uniq
|
||||
echo "==== end $1 =================================="
|
||||
}
|
||||
|
||||
|
||||
showDeps ./plugin
|
||||
showDeps ./kustomize
|
||||
428
hack/crawl/README.md
Normal file
428
hack/crawl/README.md
Normal file
@@ -0,0 +1,428 @@
|
||||
## What is this?
|
||||
### In short
|
||||
Be the GoDoc.org of k8s configuration files.
|
||||
|
||||
### More explicitly
|
||||
Support k8s document indexing from open-source configurations in order to make
|
||||
it easy for people to learn to use a new feature, explore k8s configs in a
|
||||
central hub, and see some metrics about kustomize use.
|
||||
|
||||
We want people to be able to support three main classes of queries:
|
||||
|
||||
1. Structured document queries: how should I use the following fields
|
||||
- Grace periods: `spec:template:spec:terminationGracePeriod`?
|
||||
- Kustomize inline patch: `patches:patch`?
|
||||
|
||||
2. Key value queries: how should I use this more specific use case of a
|
||||
structure configuration.
|
||||
- HorizontalPodAutoScalers: `kind=HorizontalPodAutoScaler`?
|
||||
- Patches on StatefulSets: `patches:target:kind=StatefulSet`?
|
||||
|
||||
3. Full text search: search the comments and the document text from any
|
||||
type of k8s config file.
|
||||
|
||||
## Road map
|
||||
There is a lot that can be added in order to improve the state of this
|
||||
application. Some more details along with general thoughts and comments can be
|
||||
found in the Roadmap.md file in this directory. This README contains only
|
||||
what can be considered as mostly complete and iterable parts of this project.
|
||||
|
||||
## Running this project
|
||||
Everything is configured using kubernetes, so it should be easy for people to
|
||||
spin this up on any k8s cluster. Everything should just work (TM).
|
||||
|
||||
The config files live in the `config` directory.
|
||||
|
||||
```
|
||||
config
|
||||
├── base
|
||||
│ └── kustomization.yaml
|
||||
├── crawler
|
||||
│ ├── base
|
||||
│ │ ├── github_api_secret.txt
|
||||
│ │ └── kustomization.yaml
|
||||
│ ├── cronjob
|
||||
│ │ ├── cronjob.yaml
|
||||
│ │ └── kustomization.yaml
|
||||
│ └── job
|
||||
│ ├── job.yaml
|
||||
│ └── kustomization.yaml
|
||||
├── elastic
|
||||
│ └── ...
|
||||
├── redis
|
||||
│ ├── document_keystore
|
||||
│ │ ├── kustomization.yaml
|
||||
│ │ ├── redis.yaml
|
||||
│ │ └── service.yaml
|
||||
│ └── http_cache
|
||||
│ ├── kustomization.yaml
|
||||
│ ├── redis.yaml
|
||||
│ └── service.yaml
|
||||
├── webapp
|
||||
│ ├── backend
|
||||
│ │ ├── deployment.yaml
|
||||
│ │ ├── kustomization.yaml
|
||||
│ │ └── service.yaml
|
||||
│ └── frontend
|
||||
│ ├── deployment.yaml
|
||||
│ ├── kustomization.yaml
|
||||
│ └── service.yaml
|
||||
└── schema_files
|
||||
└── kustomization_index
|
||||
├── es_index_mappings.json
|
||||
└── es_index_settings.json
|
||||
```
|
||||
|
||||
To get everything up and running you have to:
|
||||
|
||||
1. Get some instance of elasticsearch working... and configure the
|
||||
configmapGenerator in `config/base` to point to the right endpoint(s). The
|
||||
configurations that need this value to be populated are the following:
|
||||
- `config/crawler/cronjob` to run periodic crawls.
|
||||
- `config/crawler/job` to run crawls on demand.
|
||||
- `config/webapp/backend` to run the search server.
|
||||
|
||||
2. Configure the elasticsearch indices:
|
||||
```
|
||||
kustomize build config/schema_files/kustomization_index | kubectl apply -f -
|
||||
```
|
||||
This will run a `curl` command that reads json data from a ConfigMap. This will
|
||||
setup the schema. If you want to make more complex modifications to the
|
||||
schema, you should refer to the elastic docs to figure out whether the mapping
|
||||
can be added to the current index, or whether you will need to copy the
|
||||
existing index into a different one with the appropriate mappings. Modifications
|
||||
can be made by using the elasticsearch go library and writing a simple program,
|
||||
or it can be made with any http command to the appropriate server endpoint from
|
||||
within the cluster. Unfortunately I did not have the time to write a few helper
|
||||
tools for this. Feel free to contact me if you need help with modifying
|
||||
elasticsearch configs, I'm by no means an expert, but I can try to help.
|
||||
|
||||
3. (Optional) run the redis http chache for the crawler:
|
||||
```
|
||||
kubectl apply -k config/redis/http_cache
|
||||
```
|
||||
This will create a deployment for the cache, and a service. The crawler should
|
||||
be configured to connect to the `http_cache` if it exists, but you can always
|
||||
check the logs to make sure it connects, and that the identifiers match in the
|
||||
crawler configuration and for the service endpoint.
|
||||
|
||||
The please be aware that the cache does not have a persistent volume.
|
||||
|
||||
4. Configure the main redis instance:
|
||||
```
|
||||
kubectl apply -k config/redis/document_keystore
|
||||
```
|
||||
This will create a StatefulSet with a volume of 4GiB for a redis instance.
|
||||
|
||||
5. Get an access token from GitHub.
|
||||
|
||||
To be able to kindly ask GitHub for it's data on k8s config files, you'll need
|
||||
to create an access\_token. From my understanding, this is the only way to do
|
||||
these code search queries (without first specifying a repository).
|
||||
|
||||
To generate a token, go to your GitHub's account in Settings > Developer
|
||||
Settings > Personal access tokens. It should look like this.
|
||||
|
||||

|
||||
|
||||
From here you want to generate a new token and have the following
|
||||
configuration:
|
||||
|
||||

|
||||
|
||||
If you have uses for any other data from this token, (org data, or something
|
||||
else) you can pick and choose, but be careful since it can grant this
|
||||
application access to your notifications, etc. However, any such extension
|
||||
is explicitly a non-goal and would not be maintained by this project.
|
||||
|
||||
6. Launch the crawler:
|
||||
```
|
||||
kustomize build config/crawler/cronjob | kubectl apply -f -
|
||||
```
|
||||
This will periodically run the crawler every day according to the cron timing
|
||||
rules in the cronjob.yaml file.
|
||||
|
||||
Instead, to get the crawler running now, you can run:
|
||||
```
|
||||
kustomize build config/crawler/cronjob | kubectl apply -f -
|
||||
```
|
||||
which will launch a non-periodic version of the crawler. It will take a few
|
||||
minutes for the crawler to split the search, but then config files should
|
||||
start to get populated within 20 minutes. It may take a while to do the
|
||||
first crawl, since it has to fetch rate-limited endpoints for each new file it
|
||||
finds. It should get significantly faster to update in the future.
|
||||
|
||||
5. Launch the search backend
|
||||
```
|
||||
kustomize build config/webapp/backend | kubectl apply -f -
|
||||
```
|
||||
|
||||
6. Launch the search frontend
|
||||
```
|
||||
kustomize build config/webapp/frontend | kubectl apply -f -
|
||||
```
|
||||
|
||||
## Notes about the components
|
||||
|
||||
### Elasticsearch
|
||||
I will add a basic working setup soon. I just did the lazy thing and used an
|
||||
already packaged solution. Most clouds will provide their own elastic
|
||||
environments, however, Elasticsearch is also working on their own
|
||||
implementation of a
|
||||
, which might
|
||||
be worth checking out. Please note that it comes with its own license
|
||||
agreement.
|
||||
|
||||
### Redis
|
||||
There are two Redis instances that are used in this application.
|
||||
|
||||
One of them is configured to have on disk persistence, so make sure to have
|
||||
that set up in your kubernetes cluster. Also note that it is running on a
|
||||
single master node (i.e. it does not automatically shard keys to multiple head
|
||||
nodes as part of a highly available cluster). Since it's storing a sparse
|
||||
graph, I can't imagine this being much of an issue, but it's probably worth
|
||||
mentioning.
|
||||
|
||||
The other Redis instance is running as a HTTP (RFC 7234) cache for etags from
|
||||
GitHub (or any other document store from which we could crawl/index). This one
|
||||
does not require full persistent storage on disk. The caching strategy is an
|
||||
LRU cache which is probably a good starting point. It might be worth it to
|
||||
investigate other cache policies, but I think LRU will work well since
|
||||
documents may or may not expire anyway, and the amount of memory allocated for
|
||||
keys is fairly large, so eviction of frequently used documents seems unlikely
|
||||
anyway.
|
||||
|
||||
### Nginx + Angular
|
||||
There is a Dockerfile included for generating the container image with Nginx
|
||||
(using the default package) and adding all of the supporting compiled angular
|
||||
files. Any modifications to the code-base should be compatible with this setup,
|
||||
so all that's needed is to rebuild the container image, and possibly modify
|
||||
the image tags in the k8s file.
|
||||
|
||||
### Supporting Go binaries
|
||||
There are a few go binaries that each have their own Dockerfile to build
|
||||
containers in which to run them on k8s, namely the crawler and the search
|
||||
service. Their configurations are not optimal (read: needs to be cleaned up),
|
||||
but they are functional.
|
||||
|
||||
## Technical details
|
||||
|
||||
### Overall design and imlpementation
|
||||
|
||||
There are a few components that are all running together in order to get
|
||||
the overall application to work smoothly. This section will provide a brief
|
||||
overview of each component with the following sections going into more details.
|
||||
|
||||
The overall structure is outlined in the following figure:
|
||||

|
||||
|
||||
#### Crawler
|
||||
The leftmost component consists of a crawler with an http cache of GitHub
|
||||
queries does two things, it first looks at the list of documents in
|
||||
elasticsearch and tries to update them. In doing so, it maintains a set of
|
||||
newly updated files to exclude them from other parts of the crawl.
|
||||
|
||||
To find newly added documents, the crawler crawls any new dependencies
|
||||
introduced in the document updating step and it also queries GitHub for the
|
||||
most recently indexed kustomization.\* files. Each new file will be processed
|
||||
for efficient text queries and put into the document index. Any new dependency
|
||||
will also incur more crawl operations. Finally, a graphical
|
||||
representation of the documents and their dependencies is built in Redis to be
|
||||
used for graph algorithms such as PageRank and component analysis.
|
||||
|
||||
#### Data library
|
||||
There are a few helper libaries for dealing with Elasticsearch, Redis and
|
||||
documents. This is not persistent, nor is it centralized. They act as small
|
||||
components that help to package common pieces of code. Eventually it may make
|
||||
sense to merge all of it together and make a proper persistent model around
|
||||
this while providing an external API for document insertion/deletion. But
|
||||
that is definitely out of scope in terms of getting this to run. However
|
||||
there are limitations with the current model in terms of minimizing the
|
||||
API surface for the different components of the application. For now this
|
||||
problem is mostly mitigated by having the query server only connected to
|
||||
a data node of the Elasticsearch cluster, but the problem of knowing what
|
||||
is accessible and what isn't is left to the programmer instead of being
|
||||
clearly and explicitly supported by the API.
|
||||
|
||||
#### Server
|
||||
Uses the data library to communicate with the data store and answer queries.
|
||||
Processes the user entered text queries into somewhat optimized elasticsearch
|
||||
queries. Provides a few endpoints to get different metrics and to eventually
|
||||
allow for registration of remote repositories.
|
||||
|
||||
This application has an exposing service in order to allow users of the
|
||||
application access to queries and the results.
|
||||
|
||||
#### Nginx + Angular
|
||||
Communicates directly with the backend server to forward user queries and
|
||||
their results. Presents the results on an interface. It's still pretty simple
|
||||
looking but it seems usable (to me).
|
||||
|
||||
|
||||
### Crawling GitHub
|
||||
With the use of API keys, GitHub allows account owners to search for files
|
||||
using their API.
|
||||
|
||||
The search endpoints allow for the use of metadata search
|
||||
that is fairly useful/powerful. For instance they provide a `filename:` keyword
|
||||
that permits us to look for `kustomization.yaml`, `kustomization.yml`, etc.
|
||||
This enables the fetching of a list of kustomization documents, from which
|
||||
we can get the actual content from another endpoint
|
||||
(raw.githubusercontent.com).
|
||||
|
||||
However, the search API is fairly limited. There is a restriction to the number
|
||||
of documents that can be retrieved from this method. One possible way to
|
||||
mitigate this would be to periodically query GitHub for results, sorted by the
|
||||
last indexed time. This would allow you to collect most documents from this
|
||||
point forwards. The downside to this is that it may require a large number of
|
||||
requests to their API since you cannot know when new files will be added.
|
||||
Furthermore, there is a possibility that you would not be able to get all of
|
||||
files either, depending on the velocity of growth.
|
||||
|
||||
The approach that was taken to mitigate this is to use the `filesize:` keyword
|
||||
and to shard the search space into contiguous buckets of appropriate size in
|
||||
order to get all of the documents. This is fairly efficient, since you can find
|
||||
a good enough way to shard the documents in
|
||||
`lg(max file size) * number of documents / 1000` API queries. Moreover, since
|
||||
queries are paginated with at most 100 results per query, this solution is
|
||||
competitive with getting the optimal (non-contiguous) sharding of result sets.
|
||||
Furthermore, filesize queries can be cached to minimize the total number of
|
||||
queries called to the API in order to shard the search space. This is done by
|
||||
querying for file size intervals that always start with 0..X and binary
|
||||
searching over the `filesize:` space. This will allow you to reuse a lot of
|
||||
queries when you're looking for the next range, since it is upper bounded and
|
||||
lower bounded to a smaller number of queries within a range that has also been
|
||||
queried. I think this is only true because filesizes are power law distributed,
|
||||
so searches will typically require less queries as they progress from left to
|
||||
right.
|
||||
|
||||
However, this method in no way depends on intervals of the form 0..X, as
|
||||
the number of documents in the many intervals of the range search could be
|
||||
added together to also make this work. This approach just seemed simpler to
|
||||
implement, maintain, and debug so it was preferred.
|
||||
|
||||
To get an idea of how efficient this method is, to shard the search space of
|
||||
7000 documents, it will only take ~90 API range queries which should only take
|
||||
a few minutes. While actually fetching the documents and their relevant
|
||||
metadata (creation time, etc.) will take several hours. Furthermore, this
|
||||
could be made more efficient if a prior distribution is approximated.
|
||||
This prior could be scaled to the number of documents that need to be fetched,
|
||||
and then finding a shard that has an adequate number of requests, will only
|
||||
take a few queries per shard. It could probably be supported in a constant
|
||||
number of size queries if the size of each shard is halved which shouldn't
|
||||
have terrible performance impact for the retrieval. However, there where
|
||||
more pressing things to implement. I might revisit this later.
|
||||
|
||||
### Document Indexing and Processing
|
||||
In order to support simple text queries the structured documents must be
|
||||
processed in some way that makes searching them easy. The current method
|
||||
is to recursively traverse the map of configurations to generate each sub-path
|
||||
and each key-value pair for the leaf nodes of the recursion tree.
|
||||
|
||||
However, note that this means that a document has to be valid yaml/json
|
||||
format in order for indexing to happen. The rest of the document is treated
|
||||
as mostly text and uses default text settings from Elasticsearch.
|
||||
|
||||
What this means is that for the following yaml document:
|
||||
|
||||
```yaml
|
||||
resources:
|
||||
- service.yaml
|
||||
- deployment.yaml
|
||||
|
||||
configmapGenerator:
|
||||
- name: app-configuration
|
||||
files:
|
||||
- config.yaml
|
||||
|
||||
patchesJson6902:
|
||||
- target:
|
||||
version: v1
|
||||
kind: StatefulSet
|
||||
name: ss-name
|
||||
path: ss-patch.yaml
|
||||
- target:
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: dep-name
|
||||
path: dep-patch.yaml
|
||||
```
|
||||
|
||||
the following flattened structure would look like:
|
||||
```
|
||||
{
|
||||
"identifiers": [
|
||||
"resources",
|
||||
"configmapGenerator",
|
||||
"configmapGenerator:name",
|
||||
"configmapGenerator:files",
|
||||
"patchesJson6902",
|
||||
"patchesJson6902:target",
|
||||
"patchesJson6902:target:version",
|
||||
"patchesJson6902:target:kind",
|
||||
"patchesJson6902:target:name",
|
||||
"patchesJson6902:path",
|
||||
],
|
||||
"values": [
|
||||
"resources=service.yaml",
|
||||
"resources=deployment.yaml",
|
||||
"configmapGenerator:name=app-configuration",
|
||||
"configmapGenerator:files=config.yaml",
|
||||
"patchesJson6902:target:version=v1",
|
||||
"patchesJson6902:target:kind=StatefulSet",
|
||||
"patchesJson6902:target:name=ss-name",
|
||||
"patchesJson6902:path=ss-patch.yaml",
|
||||
"patchesJson6902:target:kind=Deployment",
|
||||
"patchesJson6902:target:name=dep-name",
|
||||
"patchesJson6902:path=dep-patch.yaml",
|
||||
],
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Note that unique paths and values are deduplicated.
|
||||
|
||||
On the search side, exact queries will be prioritized, but the document paths
|
||||
and key=value pairs will also be analyzed with 3-grams to have some amount of
|
||||
fuzzy search. The reason that a Levenshtein-Distance was not used instead, is due
|
||||
to searching multiple fields at the same time, which is a use case where
|
||||
Elasticsearch does not support proper fuzzy searching.
|
||||
|
||||
### Document Search
|
||||
Given a text query, each token is considered separately. Each token will be fed
|
||||
through a handful of analyzers on the Elasticsearch side, and will be compared
|
||||
with the reverse document index of each document fields. It will then determine
|
||||
the best matching documents. Text ordering is largely insignificant. This makes
|
||||
sense for the structured search, but may leave room for improvement for the
|
||||
text only search within the document.
|
||||
|
||||
Each token _must_ be matched, so each white space character acts as a
|
||||
conjunction of individual queries. There are also ways of telling
|
||||
Elasticsearch that some things _should_ match, but I think for now it makes
|
||||
more sense to leave it as is.
|
||||
|
||||
I think this behavior is sufficient to make the search feel fairly intuitive
|
||||
while providing support for fairly complex use cases.
|
||||
|
||||
### Metrics Computation
|
||||
From the each kustomization document that is indexed, we can find it's
|
||||
resources that are publicly available. This includes other kustomizations.
|
||||
From this, we can build a directed graph of dependencies and reverse
|
||||
dependencies.
|
||||
|
||||
This opens up the possibility to add a plethora of graph metrics that can
|
||||
give the project maintainers feedback and insight into how people are using
|
||||
their tools.
|
||||
|
||||
Some of these are useful such as getting an idea for how large the dependency
|
||||
graphs actually grow in practice, and can be used to find _popular_
|
||||
kustomizations within the corpus. This lends itself to implementing PageRank
|
||||
to help bubble up popular results as good search results. I unfortunately
|
||||
did not have the time to implement the algorithm, but I do plan to revisit
|
||||
this sometime soon to add a few good and efficient implementations of useful
|
||||
graph algorithms that would be useful to have. See the Roadmap.md for a more
|
||||
complete list of features that could be added and how I think they could be
|
||||
implemented.
|
||||
176
hack/crawl/ROADMAP.md
Normal file
176
hack/crawl/ROADMAP.md
Normal file
@@ -0,0 +1,176 @@
|
||||
# Road map and comments about this work
|
||||
|
||||
From working on this project, here is a collection of thoughts and suggestions
|
||||
for future improvements. For any questions about this, or to request help do
|
||||
not hesitate to contact @damienr74 on GitHub, my email should be listed.
|
||||
|
||||
I think this project has the potential for the K8s community to promote best
|
||||
practices. If this becomes popular, It could become easier to find
|
||||
*subjectively good* configurations. This can act as a way to guide newcomers
|
||||
to k8s config features that are easy to maintain, practical, and tested in some
|
||||
real world environment. However, a lot of work remains to be made if this is
|
||||
to happen. Extracting and ranking semantic-level information from the open
|
||||
source configuration files, is definitely not trivial, and will require a lot of
|
||||
though and consideration from the experts and the patterns that successful k8s
|
||||
project follow. This, is outside of my scope having little to no experience with
|
||||
k8s other than working on this project; however, if you have ideas I can
|
||||
probably suggest approaches in order to implement it, having worked a lot on
|
||||
this project.
|
||||
|
||||
### Improving configuration files and container configs
|
||||
I did not have a lot of time to refactor the images to use configmaps for
|
||||
everything. This is a good thing to improve, should be fairly easy. Another
|
||||
thing that could make the user experience of launcing this could be to make all
|
||||
of the go utilities be subcommands to the same binary/container image. This
|
||||
would reduce the number of things that would have to be rebuilt, in order to get
|
||||
it running, and it would make the application (and its components) more self
|
||||
contained. (also has some disadvantages, so I'll let someone else decide.
|
||||
|
||||
### Adding graph metrics
|
||||
From the Redis graph representation, we are able to run a multitude of graph
|
||||
algorithms (not all of which are implemented).
|
||||
|
||||
The simplest one would be to run kruskal's algorithm to find connected
|
||||
components, and to compute graph metrics on each component. Here are some of the
|
||||
metrics that may be useful:
|
||||
|
||||
+ Average size and histograms of the sizes of each components.
|
||||
|
||||
+ Average size and histograms of the node with the highest in degree (rdeps) of
|
||||
each component.
|
||||
|
||||
+ Average size and histograms of the number of repositories in a connected
|
||||
component.
|
||||
|
||||
+ Any other metric that may be helpful to measure the scale of the kustomize
|
||||
import graph.
|
||||
|
||||
Another cool thing that may be helpful, would be to output the graph
|
||||
representation of deps/rdeps. This should be fairly easy to do with graphviz/dot
|
||||
so if anyone really wants this, I (damienr74) should be able to do it. Feel free
|
||||
to send me an email or to @ mention me in an issue.
|
||||
|
||||
Note: dfs could also be used to find connected components, but I think union
|
||||
find is preferable, since the results can be stored and modified very
|
||||
efficiently. The only challenging part would be to implement deleting of edges
|
||||
and nodes from a component efficiently, but I know it is possible to support
|
||||
these operations with a union find structure.
|
||||
|
||||
### Implementing PageRank
|
||||
The graph is set up to be able to efficiently compute PageRank since the edge
|
||||
weights are real valued, and the graph representation is sparse which means that
|
||||
it will fit in the memory of a single machine which will make the processing
|
||||
much more efficient.
|
||||
|
||||
It could also be implemented as a Redis script, but I feel like there's
|
||||
something fundamentally wrong with implementing PageRank in lua. :P
|
||||
|
||||
### Implement feature tracking
|
||||
Each day, when the crawler finds and indexes these structured documents,
|
||||
it should insert aggregate data to a separate index. This data could look like the
|
||||
following:
|
||||
|
||||
```
|
||||
{
|
||||
"kind": "kustomization",
|
||||
"added_identifiers": [
|
||||
{
|
||||
"identifier": "some:new:k8s:feature",
|
||||
"addedIn": [
|
||||
"docID1",
|
||||
"docID100",
|
||||
"docID45",
|
||||
...
|
||||
],
|
||||
},
|
||||
{
|
||||
"identifier": "another:k8s:feature",
|
||||
"documents": [
|
||||
...
|
||||
],
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
"removed_identifiers": [
|
||||
{
|
||||
"identifier": "some:deprecated:field",
|
||||
"documents": [
|
||||
...
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This would make it fairly easy to get deep insight into:
|
||||
- the speed at which things can effectively be deprecated.
|
||||
- how many people are migrating to current best practices.
|
||||
- how many documents get updated frequently/rarely.
|
||||
- detailed cross sections of growth/regression over conjunctions of features.
|
||||
- a world of possibilities.
|
||||
|
||||
This is also something that I would be interested to work on sometime soon, so
|
||||
feel free to contact me (damienr74) or ask questions about this.
|
||||
|
||||
As needed, it could be a good idea to also aggregate past data with a larger
|
||||
granularity. for instance each month, the past 30 days can be aggregated into
|
||||
weekish durations, And every year these weekly aggregations can be converted
|
||||
into monthly summaries depending on how much data this ends up being, and how
|
||||
much you want to pay for the storage of this data.
|
||||
|
||||
Another cool way to compress this data would be to dynamically compress this
|
||||
data into a logarithmic number of buckets with decreasing granularity. But it
|
||||
seems like overkill for the amount of data that we'd likely get.
|
||||
|
||||
### The UI probably needs a lot of work
|
||||
I'm not much of a UI/UX person and have little to no experience in developing
|
||||
these types of applications. If anyone with Angular experience wants to dive in
|
||||
and completely restructure the app to make the UI/UX/Code health better that
|
||||
would be greatly appreciated.
|
||||
|
||||
### Query tuning probably still has to be adjusted
|
||||
I'm also not an expert in Elasticsearch. From what I could read in the docs,
|
||||
I think I've made sane decisions in converting user queries into meaningful
|
||||
Elasticsearch queries, but I'm sure there are a lot of improvements that remain
|
||||
to be done in order to get more accurate results.
|
||||
|
||||
|
||||
### Some other signals that indicate the presence of a good configuration file
|
||||
There are lots of heuristics that could be used to achieve this. Here are a
|
||||
couple in no particular order:
|
||||
|
||||
+ Penalize for the number of yaml `---` document splits. I'm not sure what the
|
||||
general consensus is, but I think it's better to separate them, since it
|
||||
makes git commits less noisy, it's a trivial transformation, and it makes
|
||||
config files smaller. However, I can understand the argument that its somewhat
|
||||
practical to keep an overall view of the configurations together (maybe).
|
||||
|
||||
+ Penalize the number of unique identifiers in a structured document. I think
|
||||
this makes sense, since we don't want to have someone game the search engine
|
||||
to match documents with every possible path from the k8s docs. PageRank might
|
||||
help with this to some extent, but with a small corpus it would be fairly easy
|
||||
to game.
|
||||
|
||||
+ Assign weights to the usefulness of certain fields. It would be good to
|
||||
promote documents that use `keyRefFromConfigMap`, liveness probes, etc.
|
||||
|
||||
These are the main ones I can think of, but I'm sure there are a *ton* of
|
||||
ways to achieve this.
|
||||
|
||||
If the corpus gets large enough, we might even be able to use *blockchains*,
|
||||
*machine learning*, and maybe even self-driving cars.
|
||||
|
||||
### Add more support for indexing of other k8s/kustomize related data
|
||||
One thing that jumps to mind is the use of kustomize plugins. They are easy
|
||||
to track since they all have an unused global variable: `var KustomizePluggin`
|
||||
it would be easy to run the pluginator command and generate godocs for each
|
||||
go file with this unique identifier.
|
||||
|
||||
For the sake of completeness, here is the full GitHub query that we can use to
|
||||
find these:
|
||||
`api.github.com/search/code?q=var+KustomizePlugin+extension%3A.go&access_token=access_token`
|
||||
|
||||
Godoc will not show much, since most packages will be using package main, but
|
||||
using pluginator we can make it a properly named package such that Godoc would
|
||||
actually generate the relevant documentation.
|
||||
195
hack/crawl/backend/search_backend.go
Normal file
195
hack/crawl/backend/search_backend.go
Normal file
@@ -0,0 +1,195 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/rs/cors"
|
||||
|
||||
"sigs.k8s.io/kustomize/hack/crawl/index"
|
||||
)
|
||||
|
||||
type kustomizeSearch struct {
|
||||
ctx context.Context
|
||||
// Eventually pIndex *index.PlugginIndex
|
||||
idx *index.KustomizeIndex
|
||||
router *mux.Router
|
||||
log *log.Logger
|
||||
}
|
||||
|
||||
// New server. Creating a server does not launch it. To launch simply:
|
||||
// srv, _ := NewKustomizeSearch(context.Backgroud())
|
||||
// err := srv.Serve()
|
||||
// if err != nil {
|
||||
// // Handle server issues.
|
||||
// }
|
||||
//
|
||||
// The server has three enpoints, two of which are functional:
|
||||
//
|
||||
// /search: processes the ?q= parameter for a text query and
|
||||
// returns a list of 10 resutls starting from the ?from= value provided,
|
||||
// with the default being zero.
|
||||
//
|
||||
// /metrics: returns overall metrics about the files indexed. Returns
|
||||
// timeseries data for kustomization files, and returns breakdown of file
|
||||
// counts by their 'kind' fields
|
||||
//
|
||||
// /register: not implemented, but meant as an endpoint for adding new
|
||||
// kustomization files to the corpus.
|
||||
func NewKustomizeSearch(ctx context.Context) (*kustomizeSearch, error) {
|
||||
idx, err := index.NewKustomizeIndex(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ks := &kustomizeSearch{
|
||||
ctx: ctx,
|
||||
idx: idx,
|
||||
router: mux.NewRouter(),
|
||||
log: log.New(os.Stdout, "Kustomize server: ",
|
||||
log.LstdFlags|log.Llongfile|log.LUTC),
|
||||
}
|
||||
|
||||
return ks, nil
|
||||
}
|
||||
|
||||
// Set up common middleware and the routes for the server.
|
||||
func (ks *kustomizeSearch) routes() {
|
||||
|
||||
// Setup middleware.
|
||||
ks.router.Use(func(handler http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
handler.ServeHTTP(w, r)
|
||||
})
|
||||
})
|
||||
|
||||
ks.router.HandleFunc("/liveness", ks.liveness()).Methods(http.MethodGet)
|
||||
ks.router.HandleFunc("/readiness", ks.readiness()).Methods(http.MethodGet)
|
||||
ks.router.HandleFunc("/search", ks.search()).Methods(http.MethodGet)
|
||||
ks.router.HandleFunc("/metrics", ks.metrics()).Methods(http.MethodGet)
|
||||
ks.router.HandleFunc("/register", ks.register()).Methods(http.MethodPost)
|
||||
}
|
||||
|
||||
// Start listening and serving on the provided port.
|
||||
func (ks *kustomizeSearch) Serve(port int) error {
|
||||
ks.routes()
|
||||
handler := cors.Default().Handler(ks.router)
|
||||
s := &http.Server{
|
||||
Addr: fmt.Sprintf(":%d", port),
|
||||
Handler: handler,
|
||||
// Timeouts/Limits
|
||||
}
|
||||
|
||||
return s.ListenAndServe()
|
||||
}
|
||||
|
||||
// /liveness endpoint
|
||||
func (ks *kustomizeSearch) liveness() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
}
|
||||
|
||||
// /readyness endpoint
|
||||
func (ks *kustomizeSearch) readiness() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
opt := index.KustomizeSearchOptions{}
|
||||
_, err := ks.idx.Search("", opt)
|
||||
if err != nil {
|
||||
http.Error(w,
|
||||
`{ "error": "could not connect to database" }`,
|
||||
http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
}
|
||||
|
||||
// /register endpoint.
|
||||
func (ks *kustomizeSearch) register() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "not implemented", http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
// /search endpoint.
|
||||
func (ks *kustomizeSearch) search() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
values := r.URL.Query()
|
||||
|
||||
queries := values["q"]
|
||||
ks.log.Println("Query: ", values)
|
||||
|
||||
var from int
|
||||
fromParam := values["from"]
|
||||
if len(fromParam) > 0 {
|
||||
from, _ = strconv.Atoi(fromParam[0])
|
||||
if from < 0 {
|
||||
from = 0
|
||||
}
|
||||
}
|
||||
_, noKinds := values["nokinds"]
|
||||
|
||||
opt := index.KustomizeSearchOptions{
|
||||
SearchOptions: index.SearchOptions{
|
||||
Size: 10,
|
||||
From: from,
|
||||
},
|
||||
KindAggregation: !noKinds,
|
||||
}
|
||||
|
||||
results, err := ks.idx.Search(strings.Join(queries, " "), opt)
|
||||
if err != nil {
|
||||
ks.log.Println("Error: ", err)
|
||||
http.Error(w, fmt.Sprintf(
|
||||
`{ "error": "could not complete the query" }`),
|
||||
http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(w)
|
||||
setIndent(enc)
|
||||
if err = enc.Encode(results); err != nil {
|
||||
http.Error(w, `{ "error": "failed to send back results" }`,
|
||||
http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// metrics endpoint.
|
||||
func (ks *kustomizeSearch) metrics() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
res, err := ks.idx.Search("", index.KustomizeSearchOptions{
|
||||
KindAggregation: true,
|
||||
TimeseriesAggregation: true,
|
||||
})
|
||||
if err != nil {
|
||||
http.Error(w, `{ "error": "could not perform the search."}`,
|
||||
http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(w)
|
||||
setIndent(enc)
|
||||
if err := enc.Encode(res); err != nil {
|
||||
http.Error(w, `{ "error": "could not format return value" }`,
|
||||
http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// make json response human readable.
|
||||
func setIndent(e *json.Encoder) {
|
||||
e.SetIndent("", " ")
|
||||
}
|
||||
14
hack/crawl/cmd/backend/Dockerfile
Normal file
14
hack/crawl/cmd/backend/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM golang:1.11 AS build
|
||||
|
||||
ARG GO111MODULE=on
|
||||
|
||||
WORKDIR /go/src/sigs.k8s.io/kustomize/internal/tools
|
||||
COPY . /go/src/sigs.k8s.io/kustomize/internal/tools
|
||||
|
||||
RUN go mod download
|
||||
RUN CGO_ENABLED=0 go install sigs.k8s.io/kustomize/internal/tools/cmd/backend/
|
||||
|
||||
FROM scratch
|
||||
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
|
||||
COPY --from=build /go/bin/backend /
|
||||
ENTRYPOINT ["/backend"]
|
||||
29
hack/crawl/cmd/backend/main.go
Normal file
29
hack/crawl/cmd/backend/main.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
server "sigs.k8s.io/kustomize/hack/crawl/backend"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func main() {
|
||||
portStr := os.Getenv("PORT")
|
||||
port, err := strconv.Atoi(portStr)
|
||||
if portStr == "" || err != nil {
|
||||
log.Fatalf("$PORT(%s) must be set to an integer\n", portStr)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
ks, err := server.NewKustomizeSearch(ctx)
|
||||
if err != nil {
|
||||
log.Fatalf("Error creating kustomize server: %v", ks)
|
||||
}
|
||||
|
||||
err = ks.Serve(port)
|
||||
if err != nil {
|
||||
log.Fatalf("Error while running server: %v", err)
|
||||
}
|
||||
}
|
||||
6
hack/crawl/config/base/kustomization.yaml
Normal file
6
hack/crawl/config/base/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
configmapGenerator:
|
||||
- name: elasticsearch-config
|
||||
literals:
|
||||
- es-url="http://esbasic-master:9200"
|
||||
- kustomize-index-name="kustomize"
|
||||
- plugin-index-name="plugin"
|
||||
13
hack/crawl/config/crawler/base/kustomization.yaml
Normal file
13
hack/crawl/config/crawler/base/kustomization.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
resources:
|
||||
- ../../base
|
||||
|
||||
configmapGenerator:
|
||||
- name: crawler-http-cache
|
||||
literals:
|
||||
- redis-cache-url="redis://redis-http-cache:6379"
|
||||
|
||||
|
||||
secretGenerator:
|
||||
- name: github-access-token
|
||||
files:
|
||||
- token=github_api_secret.txt
|
||||
30
hack/crawl/config/crawler/cronjob/cronjob.yaml
Normal file
30
hack/crawl/config/crawler/cronjob/cronjob.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
apiVersion: batch/v1beta1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: crawler
|
||||
spec:
|
||||
schedule: "5 0 * * */1"
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: crawler
|
||||
image: gcr.io/kustomize-search/crawler:latest
|
||||
env:
|
||||
- name: GITHUB_ACCESS_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: github-access-token
|
||||
key: token
|
||||
- name: ELASTICSEARCH_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: elasticsearch-config
|
||||
key: es-url
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: crawler-http-cache
|
||||
key: redis-cache-url
|
||||
3
hack/crawl/config/crawler/cronjob/kustomization.yaml
Normal file
3
hack/crawl/config/crawler/cronjob/kustomization.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
resources:
|
||||
- ../base
|
||||
- cronjob.yaml
|
||||
32
hack/crawl/config/crawler/job/job.yaml
Normal file
32
hack/crawl/config/crawler/job/job.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: crawler
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: crawler
|
||||
image: gcr.io/kustomize-search/crawler:latest
|
||||
env:
|
||||
- name: GITHUB_ACCESS_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: github-access-token
|
||||
key: token
|
||||
- name: ELASTICSEARCH_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: elasticsearch-config
|
||||
key: es-url
|
||||
- name: REDIS_CACHE_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: crawler-http-cache
|
||||
key: redis-cache-url
|
||||
- name: REDIS_KEY_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: redis-keystore
|
||||
key: keystore-url
|
||||
3
hack/crawl/config/crawler/job/kustomization.yaml
Normal file
3
hack/crawl/config/crawler/job/kustomization.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
resources:
|
||||
- ../base
|
||||
- job.yaml
|
||||
@@ -0,0 +1,7 @@
|
||||
resources:
|
||||
- redis.yaml
|
||||
- service.yaml
|
||||
|
||||
commonLabels:
|
||||
app: redis
|
||||
tier: document-keystore
|
||||
37
hack/crawl/config/redis/document_keystore/redis.yaml
Normal file
37
hack/crawl/config/redis/document_keystore/redis.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: redis-docs-keystore
|
||||
spec:
|
||||
serviceName: "redis-docs-keystore"
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:5-alpine
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- "--save"
|
||||
- "900"
|
||||
- "1"
|
||||
- "--save"
|
||||
- "30"
|
||||
- "100"
|
||||
- "--appendonly"
|
||||
- "yes"
|
||||
ports:
|
||||
- name: redis-docs-port
|
||||
containerPort: 6379
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: redis-docs-keystore-data
|
||||
restartPolicy: Always
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: redis-docs-keystore-data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 4Gi
|
||||
10
hack/crawl/config/redis/document_keystore/service.yaml
Normal file
10
hack/crawl/config/redis/document_keystore/service.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis-docs-keystore
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- protocol: "TCP"
|
||||
port: 6379
|
||||
targetPort: redis-docs-port
|
||||
7
hack/crawl/config/redis/http_cache/kustomization.yaml
Normal file
7
hack/crawl/config/redis/http_cache/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
resources:
|
||||
- redis.yaml
|
||||
- service.yaml
|
||||
|
||||
commonLabels:
|
||||
app: redis
|
||||
tier: http-cache
|
||||
16
hack/crawl/config/redis/http_cache/redis.yaml
Normal file
16
hack/crawl/config/redis/http_cache/redis.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: redis-http-cache
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:5-alpine
|
||||
imagePullPolicy: Always
|
||||
# see redis.io/topics/lru-cache for other policy options.
|
||||
args: ["--maxmemory", "1gb", "--maxmemory-policy", "allkeys-lru"]
|
||||
ports:
|
||||
- name: http-cache-port
|
||||
containerPort: 6379
|
||||
10
hack/crawl/config/redis/http_cache/service.yaml
Normal file
10
hack/crawl/config/redis/http_cache/service.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis-http-cache
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- protocol: "TCP"
|
||||
port: 6379
|
||||
targetPort: http-cache-port
|
||||
38
hack/crawl/config/webapp/backend/deployment.yaml
Normal file
38
hack/crawl/config/webapp/backend/deployment.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: kustomize-search
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kustomize-search
|
||||
tier: backend
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: kustomize-search
|
||||
tier: backend
|
||||
spec:
|
||||
containers:
|
||||
- name: kustomize-search
|
||||
image: gcr.io/kustomize-search/backend:latest
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /liveness
|
||||
port: backend-port
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readiness
|
||||
port: backend-port
|
||||
ports:
|
||||
- name: backend-port
|
||||
containerPort: 8080
|
||||
env:
|
||||
- name: ELASTICSEARCH_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: elasticsearch-config
|
||||
key: es-url
|
||||
- name: PORT
|
||||
value: "8080"
|
||||
4
hack/crawl/config/webapp/backend/kustomization.yaml
Normal file
4
hack/crawl/config/webapp/backend/kustomization.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
resources:
|
||||
- ../../base
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
14
hack/crawl/config/webapp/backend/service.yaml
Normal file
14
hack/crawl/config/webapp/backend/service.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kustomize-search
|
||||
spec:
|
||||
selector:
|
||||
app: kustomize-search
|
||||
tier: backend
|
||||
ports:
|
||||
- protocol: "TCP"
|
||||
port: 80
|
||||
targetPort: backend-port
|
||||
type: LoadBalancer
|
||||
loadBalancerIP: ""
|
||||
22
hack/crawl/config/webapp/frontend/deployment.yaml
Normal file
22
hack/crawl/config/webapp/frontend/deployment.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: kustomize-search-ui
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kustomize-search
|
||||
tier: frontend
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: kustomize-search
|
||||
tier: frontend
|
||||
spec:
|
||||
containers:
|
||||
- name: frontend
|
||||
image: gcr.io/kustomize-search/frontend:latest
|
||||
ports:
|
||||
- name: frontend-port
|
||||
containerPort: 80
|
||||
4
hack/crawl/config/webapp/frontend/kustomization.yaml
Normal file
4
hack/crawl/config/webapp/frontend/kustomization.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
resources:
|
||||
- ../../base
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
14
hack/crawl/config/webapp/frontend/service.yaml
Normal file
14
hack/crawl/config/webapp/frontend/service.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kustomize-search-ui
|
||||
spec:
|
||||
selector:
|
||||
app: kustomize-search
|
||||
tier: frontend
|
||||
ports:
|
||||
- protocol: "TCP"
|
||||
port: 80
|
||||
targetPort: frontend-port
|
||||
type: LoadBalancer
|
||||
loadBalancerIP: ""
|
||||
236
hack/crawl/crawler/crawler.go
Normal file
236
hack/crawl/crawler/crawler.go
Normal file
@@ -0,0 +1,236 @@
|
||||
// Package crawler provides helper methods and defines an interface for lauching
|
||||
// source repository crawlers that retrieve files from a source and forwards
|
||||
// to a channel for indexing and retrieval.
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
_ "github.com/gomodule/redigo/redis"
|
||||
|
||||
"sigs.k8s.io/kustomize/hack/crawl/doc"
|
||||
)
|
||||
|
||||
var (
|
||||
logger = log.New(os.Stdout, "Crawler: ", log.LstdFlags|log.LUTC|log.Llongfile)
|
||||
)
|
||||
|
||||
// Crawler forwards documents from source repositories to index and store them
|
||||
// for searching. Each crawler is responsible for querying it's source of
|
||||
// information, and forwarding files that have not been seen before or that need
|
||||
// updating.
|
||||
type Crawler interface {
|
||||
// Crawl returns when it is done processing. This method does not take
|
||||
// ownership of the channel. The channel is write only, and it
|
||||
// designates where the crawler should forward the documents.
|
||||
Crawl(ctx context.Context, output chan<- CrawledDocument) error
|
||||
|
||||
// Get the document data given the FilePath, Repo, and Ref/Tag/Branch.
|
||||
FetchDocument(context.Context, *doc.Document) error
|
||||
// Write to the document what the created time is.
|
||||
SetCreated(context.Context, *doc.Document) error
|
||||
|
||||
Match(*doc.Document) bool
|
||||
}
|
||||
|
||||
type CrawledDocument interface {
|
||||
ID() string
|
||||
GetDocument() *doc.Document
|
||||
GetResources() ([]*doc.Document, error)
|
||||
WasCached() bool
|
||||
}
|
||||
|
||||
type CrawlSeed []*doc.Document
|
||||
|
||||
type IndexFunc func(CrawledDocument, Crawler) error
|
||||
type Converter func(*doc.Document) (CrawledDocument, error)
|
||||
|
||||
// Cleaner, more efficient, and more extensible crawler implementation.
|
||||
// The seed must include the ids of each document in the index.
|
||||
func CrawlFromSeed(ctx context.Context, seed CrawlSeed,
|
||||
crawlers []Crawler, conv Converter, indx IndexFunc) {
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
|
||||
logIfErr := func(err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
logger.Println("error: ", err)
|
||||
}
|
||||
|
||||
stack := make(CrawlSeed, 0)
|
||||
|
||||
findMatch := func(d *doc.Document) Crawler {
|
||||
for _, crawl := range crawlers {
|
||||
if crawl.Match(d) {
|
||||
return crawl
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
addBranches := func(cdoc CrawledDocument, match Crawler) {
|
||||
if _, ok := seen[cdoc.ID()]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
seen[cdoc.ID()] = struct{}{}
|
||||
// Insert into index
|
||||
err := indx(cdoc, match)
|
||||
logIfErr(err)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
deps, err := cdoc.GetResources()
|
||||
logIfErr(err)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, dep := range deps {
|
||||
if _, ok := seen[dep.ID()]; ok {
|
||||
continue
|
||||
}
|
||||
stack = append(stack, dep)
|
||||
}
|
||||
}
|
||||
|
||||
doCrawl := func(docsPtr *CrawlSeed) {
|
||||
for len(*docsPtr) > 0 {
|
||||
back := len(*docsPtr) - 1
|
||||
next := (*docsPtr)[back]
|
||||
*docsPtr = (*docsPtr)[:back]
|
||||
|
||||
match := findMatch(next)
|
||||
if match == nil {
|
||||
logIfErr(fmt.Errorf(
|
||||
"%v could not match any crawler", next))
|
||||
continue
|
||||
}
|
||||
|
||||
err := match.FetchDocument(ctx, next)
|
||||
logIfErr(err)
|
||||
// If there was no change or there is an error, we don't have
|
||||
// to branch out, since the dependencies are already in the
|
||||
// index, or we cannot find the document.
|
||||
if err != nil || next.WasCached() {
|
||||
continue
|
||||
}
|
||||
|
||||
cdoc, err := conv(next)
|
||||
logIfErr(err)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
addBranches(cdoc, match)
|
||||
}
|
||||
}
|
||||
// Exploit seed to update bulk of corpus.
|
||||
logger.Printf("updating %d documents from seed\n", len(seed))
|
||||
doCrawl(&seed)
|
||||
// Traverse any new links added while updating corpus.
|
||||
logger.Printf("crawling %d new documents found in the seed\n", len(stack))
|
||||
doCrawl(&stack)
|
||||
|
||||
ch := make(chan CrawledDocument, 1<<10)
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for cdoc := range ch {
|
||||
if _, ok := seen[cdoc.ID()]; ok {
|
||||
continue
|
||||
}
|
||||
match := findMatch(cdoc.GetDocument())
|
||||
if match == nil {
|
||||
logIfErr(fmt.Errorf(
|
||||
"%v could not match any crawler", cdoc))
|
||||
continue
|
||||
}
|
||||
addBranches(cdoc, match)
|
||||
}
|
||||
}()
|
||||
|
||||
// Exploration through APIs.
|
||||
errs := CRunner(ctx, ch, crawlers)
|
||||
if errs != nil {
|
||||
for _, err := range errs {
|
||||
logIfErr(err)
|
||||
}
|
||||
}
|
||||
close(ch)
|
||||
logger.Println("Processing the new documents from the crawlers' exploration.")
|
||||
wg.Wait()
|
||||
// Handle deps of newly discovered documents.
|
||||
logger.Printf("crawling the %d new documents from the crawlers' exploration.",
|
||||
len(stack))
|
||||
doCrawl(&stack)
|
||||
}
|
||||
|
||||
// CRunner is a blocking function and only returns once all of the
|
||||
// crawlers are finished with execution.
|
||||
//
|
||||
// This function uses the output channel to forward kustomization documents
|
||||
// from a list of crawlers. The output is to be consumed by a database/search
|
||||
// indexer for later retrieval.
|
||||
//
|
||||
// The return value is an array of errors in which each index represents the
|
||||
// index of the crawler that emitted the error. Although the errors themselves
|
||||
// can be nil, the array will always be exactly the size of the crawlers array.
|
||||
//
|
||||
// CRunner takes in a seed, which represents the documents stored in an
|
||||
// index somewhere. The document data is not required to be populated. If there
|
||||
// are many documents, this is preferable. The order of iteration over the seed
|
||||
// is not garanteed, but the CRunner does guarantee that every element
|
||||
// from the seed will be processed before any other documents from the
|
||||
// crawlers.
|
||||
func CRunner(ctx context.Context,
|
||||
output chan<- CrawledDocument, crawlers []Crawler) []error {
|
||||
|
||||
errs := make([]error, len(crawlers))
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
for i, crawler := range crawlers {
|
||||
// Crawler implementations get their own channels to prevent a
|
||||
// crawler from closing the main output channel.
|
||||
docs := make(chan CrawledDocument)
|
||||
wg.Add(2)
|
||||
|
||||
// Forward all of the documents from this crawler's channel to
|
||||
// the main output channel.
|
||||
go func(docs <-chan CrawledDocument) {
|
||||
defer wg.Done()
|
||||
for d := range docs {
|
||||
output <- d
|
||||
}
|
||||
}(docs)
|
||||
|
||||
// Run this crawler and capture its returned error.
|
||||
go func(idx int, crawler Crawler,
|
||||
docs chan<- CrawledDocument) {
|
||||
|
||||
defer func() {
|
||||
wg.Done()
|
||||
if r := recover(); r != nil {
|
||||
errs[idx] = fmt.Errorf(
|
||||
"%+v panicked: %v, additional error %v",
|
||||
crawler, r, errs[idx],
|
||||
)
|
||||
}
|
||||
}()
|
||||
defer close(docs)
|
||||
errs[idx] = crawler.Crawl(ctx, docs)
|
||||
}(i, crawler, docs) // Copies the index and the crawler
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
return errs
|
||||
}
|
||||
356
hack/crawl/crawler/crawler_test.go
Normal file
356
hack/crawl/crawler/crawler_test.go
Normal file
@@ -0,0 +1,356 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/pgmconfig"
|
||||
"sigs.k8s.io/kustomize/hack/crawl/doc"
|
||||
)
|
||||
|
||||
const (
|
||||
kustomizeRepo = "https://github.com/kubernetes-sigs/kustomize"
|
||||
)
|
||||
|
||||
// Simple crawler that forwards it's list of documents to a provided channel and
|
||||
// returns it's error to the caller.
|
||||
type testCrawler struct {
|
||||
matchPrefix string
|
||||
err error
|
||||
docs []doc.KustomizationDocument
|
||||
lukp map[string]int
|
||||
}
|
||||
|
||||
func (c testCrawler) Match(d *doc.Document) bool {
|
||||
return d != nil && strings.HasPrefix(d.ID(), c.matchPrefix)
|
||||
}
|
||||
|
||||
func (c testCrawler) FetchDocument(ctx context.Context, d *doc.Document) error {
|
||||
if i, ok := c.lukp[d.ID()]; ok {
|
||||
d.DocumentData = c.docs[i].DocumentData
|
||||
return nil
|
||||
}
|
||||
for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() {
|
||||
fmt.Println(d.ID(), "/", suffix)
|
||||
i, ok := c.lukp[d.ID()+"/"+suffix]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
d.FilePath += "/" + suffix
|
||||
d.DocumentData = c.docs[i].DocumentData
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("document %v does not exist for matcher: %s",
|
||||
d, c.matchPrefix)
|
||||
}
|
||||
|
||||
func (c testCrawler) SetCreated(ctx context.Context, d *doc.Document) error {
|
||||
d.CreationTime = &time.Time{}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newCrawler(matchPrefix string, err error,
|
||||
docs []doc.KustomizationDocument) testCrawler {
|
||||
c := testCrawler{
|
||||
matchPrefix: matchPrefix,
|
||||
err: err,
|
||||
docs: docs,
|
||||
lukp: make(map[string]int),
|
||||
}
|
||||
for i, d := range docs {
|
||||
c.lukp[d.ID()] = i
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// Crawl implements the Crawler interface for testing.
|
||||
func (c testCrawler) Crawl(ctx context.Context,
|
||||
output chan<- CrawledDocument) error {
|
||||
|
||||
for i, d := range c.docs {
|
||||
isResource := true
|
||||
for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() {
|
||||
if strings.HasSuffix(d.FilePath, suffix) {
|
||||
isResource = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if isResource {
|
||||
continue
|
||||
}
|
||||
output <- &c.docs[i]
|
||||
}
|
||||
return c.err
|
||||
}
|
||||
|
||||
// Used to make sure that we're comparing documents in order. This is needed
|
||||
// since these documents will be sent concurrently.
|
||||
type sortableDocs []doc.KustomizationDocument
|
||||
|
||||
func (s sortableDocs) Less(i, j int) bool {
|
||||
return s[i].FilePath < s[j].FilePath
|
||||
}
|
||||
|
||||
func (s sortableDocs) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sortableDocs) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func TestCrawlerRunner(t *testing.T) {
|
||||
fmt.Println("testing CRunner")
|
||||
tests := []struct {
|
||||
tc []Crawler
|
||||
errs []error
|
||||
docs sortableDocs
|
||||
}{
|
||||
{
|
||||
tc: []Crawler{
|
||||
testCrawler{
|
||||
docs: []doc.KustomizationDocument{
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler1/doc1/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler1/doc2/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler1/doc3/kustomization.yaml",
|
||||
}},
|
||||
},
|
||||
},
|
||||
testCrawler{err: errors.New("crawler2")},
|
||||
testCrawler{},
|
||||
testCrawler{
|
||||
docs: []doc.KustomizationDocument{
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler4/doc1/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler4/doc2/kustomization.yaml",
|
||||
}},
|
||||
},
|
||||
err: errors.New("crawler4"),
|
||||
},
|
||||
},
|
||||
errs: []error{
|
||||
nil,
|
||||
errors.New("crawler2"),
|
||||
nil,
|
||||
errors.New("crawler4"),
|
||||
},
|
||||
docs: sortableDocs{
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler1/doc1/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler1/doc2/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler1/doc3/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler4/doc1/kustomization.yaml",
|
||||
}},
|
||||
{Document: doc.Document{
|
||||
FilePath: "crawler4/doc2/kustomization.yaml",
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
output := make(chan CrawledDocument)
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
|
||||
// Run the Crawler runner with a list of crawlers.
|
||||
go func() {
|
||||
defer close(output)
|
||||
defer wg.Done()
|
||||
|
||||
errs := CRunner(context.Background(),
|
||||
output, test.tc)
|
||||
|
||||
// Check that errors are returned as they should be.
|
||||
if !reflect.DeepEqual(errs, test.errs) {
|
||||
t.Errorf("Expected errs (%v) to equal (%v)",
|
||||
errs, test.errs)
|
||||
}
|
||||
|
||||
}()
|
||||
|
||||
// Iterate over the output channel of Crawler runner.
|
||||
returned := make(sortableDocs, 0, len(test.docs))
|
||||
for o := range output {
|
||||
d, ok := o.(*doc.KustomizationDocument)
|
||||
if !ok || d == nil {
|
||||
t.Errorf("%T not expected type (%T)",
|
||||
o, d)
|
||||
}
|
||||
returned = append(returned, *d)
|
||||
}
|
||||
|
||||
// Check that all documents are received.
|
||||
sort.Sort(returned)
|
||||
if !reflect.DeepEqual(returned, test.docs) {
|
||||
t.Errorf("Expected docs (%v) to equal (%v)\n",
|
||||
returned, test.docs)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlFromSeed(t *testing.T) {
|
||||
fmt.Println("testing CrawlFromSeed")
|
||||
|
||||
tests := []struct {
|
||||
seed CrawlSeed
|
||||
matcher string
|
||||
corpus []doc.KustomizationDocument
|
||||
}{
|
||||
{
|
||||
seed: CrawlSeed{
|
||||
{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/helloWorld/kustomization.yaml",
|
||||
},
|
||||
{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/kustomization.yaml",
|
||||
},
|
||||
},
|
||||
matcher: kustomizeRepo,
|
||||
corpus: []doc.KustomizationDocument{
|
||||
// Visited from the seed, will be ignored in the crawl.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/helloWorld/kustomization.yaml",
|
||||
DocumentData: `
|
||||
resources:
|
||||
- deployment.yaml
|
||||
`,
|
||||
}},
|
||||
// Also visited from the seed as a relative resource.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/helloWorld/deployment.yaml",
|
||||
DocumentData: `
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: hello
|
||||
`,
|
||||
}},
|
||||
// Visited from the seed. Has a remote import.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/kustomization.yaml",
|
||||
DocumentData: `
|
||||
resources:
|
||||
- https://github.com/kubernetes-sigs/kustomize/examples/other/overlay
|
||||
- service.yaml
|
||||
`,
|
||||
}},
|
||||
// Imported as a base from the seed.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/overlay/kustomization.yaml",
|
||||
DocumentData: `
|
||||
resources:
|
||||
- https://github.com/kubernetes-sigs/kustomize/examples/seedcrawl1
|
||||
- https://github.com/kubernetes-sigs/kustomize/examples/seedcrawl2
|
||||
`,
|
||||
}},
|
||||
// Imported as a resource from the seed.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/service.yaml",
|
||||
}},
|
||||
// Visited from crawling seed.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/seedcrawl1/kustomization.yml",
|
||||
}},
|
||||
// Visited from crawling seed.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/seedcrawl2/kustomization.yaml",
|
||||
DocumentData: `
|
||||
resources:
|
||||
- ../base
|
||||
- job.yaml
|
||||
`,
|
||||
}},
|
||||
// Visited from crawling seed.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/base/kustomization.yml",
|
||||
}},
|
||||
// Visited from crawling seed imported as resource.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/seedcrawl2/job.yaml",
|
||||
}},
|
||||
// Visited from the crawler runner.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/base/kustomization.yaml",
|
||||
DocumentData: `
|
||||
resources:
|
||||
- ../app
|
||||
`,
|
||||
}},
|
||||
// Visited from the crawler runner.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/app/kustomization.yaml",
|
||||
DocumentData: `
|
||||
resources:
|
||||
- resource.yaml
|
||||
`,
|
||||
}},
|
||||
// Visited from crawling runner imported as resource.
|
||||
{Document: doc.Document{
|
||||
RepositoryURL: kustomizeRepo,
|
||||
FilePath: "examples/other/app/resource.yaml",
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
cr := newCrawler(tc.matcher, nil, tc.corpus)
|
||||
visited := make(map[string]int)
|
||||
CrawlFromSeed(context.Background(), tc.seed, []Crawler{cr},
|
||||
func(d *doc.Document) (CrawledDocument, error) {
|
||||
return &doc.KustomizationDocument{
|
||||
Document: *d,
|
||||
}, nil
|
||||
},
|
||||
func(d CrawledDocument, cr Crawler) error {
|
||||
visited[d.ID()]++
|
||||
return nil
|
||||
},
|
||||
)
|
||||
if lv, lc := len(visited), len(tc.corpus); lv != lc {
|
||||
t.Errorf("error: %d of %d documents visited.", lv, lc)
|
||||
t.Errorf("\nvisited (%v)\nexpected (%v).", visited, cr.lukp)
|
||||
}
|
||||
for id, cnt := range visited {
|
||||
if cnt != 1 {
|
||||
t.Errorf("%s not visited once (%d)", id, cnt)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
568
hack/crawl/crawler/github/crawler.go
Normal file
568
hack/crawl/crawler/github/crawler.go
Normal file
@@ -0,0 +1,568 @@
|
||||
// Package github implements the crawler.Crawler interface, getting data
|
||||
// from the Github search API.
|
||||
package github
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/git"
|
||||
"sigs.k8s.io/kustomize/api/pgmconfig"
|
||||
"sigs.k8s.io/kustomize/hack/crawl/crawler"
|
||||
"sigs.k8s.io/kustomize/hack/crawl/doc"
|
||||
"sigs.k8s.io/kustomize/hack/crawl/httpclient"
|
||||
)
|
||||
|
||||
var logger = log.New(os.Stdout, "Github Crawler: ",
|
||||
log.LstdFlags|log.LUTC|log.Llongfile)
|
||||
|
||||
// Implements crawler.Crawler.
|
||||
type githubCrawler struct {
|
||||
client GhClient
|
||||
query Query
|
||||
}
|
||||
|
||||
type GhClient struct {
|
||||
RequestConfig
|
||||
retryCount uint64
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
/*
|
||||
func NewCrawler(accessToken string, retryCount uint64, client *http.Client,
|
||||
query Query) githubCrawler {
|
||||
|
||||
return githubCrawler{
|
||||
client: GhClient{
|
||||
retryCount: retryCount,
|
||||
client: client,
|
||||
RequestConfig: RequestConfig{
|
||||
perPage: githubMaxPageSize,
|
||||
accessToken: accessToken,
|
||||
},
|
||||
},
|
||||
query: query,
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Implements crawler.Crawler.
|
||||
func (gc githubCrawler) Crawl(
|
||||
ctx context.Context, output chan<- crawler.CrawledDocument) error {
|
||||
|
||||
noETagClient := GhClient{
|
||||
RequestConfig: gc.client.RequestConfig,
|
||||
client: &http.Client{Timeout: gc.client.client.Timeout},
|
||||
retryCount: gc.client.retryCount,
|
||||
}
|
||||
|
||||
// Since Github returns a max of 1000 results per query, we can use
|
||||
// multiple queries that split the search space into chunks of at most
|
||||
// 1000 files to get all of the data.
|
||||
ranges, err := FindRangesForRepoSearch(newCache(noETagClient, gc.query))
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not split %v into ranges, %v\n",
|
||||
gc.query, err)
|
||||
}
|
||||
|
||||
logger.Println("ranges: ", ranges)
|
||||
|
||||
// Query each range for files.
|
||||
errs := make(multiError, 0)
|
||||
for _, query := range ranges {
|
||||
err := processQuery(ctx, gc.client, query, output)
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errs
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (gc githubCrawler) FetchDocument(ctx context.Context, d *doc.Document) error {
|
||||
repoURL := d.RepositoryURL + "/" + d.FilePath + "?ref=" + d.DefaultBranch
|
||||
repoSpec, err := git.NewRepoSpecFromUrl(repoURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid repospec: %v", err)
|
||||
}
|
||||
|
||||
url := "https://raw.githubusercontent.com/" + repoSpec.OrgRepo +
|
||||
"/" + repoSpec.Ref + "/" + repoSpec.Path
|
||||
|
||||
handle := func(resp *http.Response, err error, path string) error {
|
||||
if err == nil && resp.StatusCode == http.StatusOK {
|
||||
d.IsSame = httpclient.FromCache(resp.Header)
|
||||
defer resp.Body.Close()
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.DocumentData = string(data)
|
||||
d.FilePath = d.FilePath + path
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
resp, err := gc.client.GetRawUserContent(url)
|
||||
if err := handle(resp, err, ""); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, file := range pgmconfig.RecognizedKustomizationFileNames() {
|
||||
resp, err = gc.client.GetRawUserContent(url + "/" + file)
|
||||
err := handle(resp, err, "/"+file)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("file not found: %s", url)
|
||||
}
|
||||
|
||||
func (gc githubCrawler) SetCreated(ctx context.Context, d *doc.Document) error {
|
||||
fs := GhFileSpec{}
|
||||
fs.Repository.FullName = d.RepositoryURL + "/" + d.FilePath
|
||||
creationTime, err := gc.client.GetFileCreationTime(fs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.CreationTime = &creationTime
|
||||
return nil
|
||||
}
|
||||
|
||||
func (gc githubCrawler) Match(d *doc.Document) bool {
|
||||
url := d.RepositoryURL + "/" + d.FilePath + "?ref=" + "/" +
|
||||
d.DefaultBranch
|
||||
repoSpec, err := git.NewRepoSpecFromUrl(url)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return strings.Contains(repoSpec.Host, "github.com")
|
||||
}
|
||||
|
||||
// processQuery follows all of the pages in a query, and updates/adds the
|
||||
// documents from the crawl to the datastore/index.
|
||||
func processQuery(ctx context.Context, gcl GhClient, query string,
|
||||
output chan<- crawler.CrawledDocument) error {
|
||||
|
||||
queryPages := make(chan GhResponseInfo)
|
||||
|
||||
go func() {
|
||||
// Forward the document metadata to the retrieval channel.
|
||||
// This separation allows for concurrent requests for the code
|
||||
// search, and the retrieval portions of the API.
|
||||
err := gcl.ForwardPaginatedQuery(ctx, query, queryPages)
|
||||
if err != nil {
|
||||
// TODO(damienr74) handle this error with redis?
|
||||
logger.Println(err)
|
||||
}
|
||||
close(queryPages)
|
||||
}()
|
||||
|
||||
errs := make(multiError, 0)
|
||||
errorCnt := 0
|
||||
totalCnt := 0
|
||||
for page := range queryPages {
|
||||
if page.Error != nil {
|
||||
errs = append(errs, page.Error)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, file := range page.Parsed.Items {
|
||||
k, err := kustomizationResultAdapter(gcl, file)
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
errorCnt++
|
||||
continue
|
||||
}
|
||||
output <- k
|
||||
totalCnt++
|
||||
}
|
||||
|
||||
logger.Printf("got %d files out of %d from API. %d of %d had errors\n",
|
||||
totalCnt, page.Parsed.TotalCount, errorCnt, totalCnt)
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
func kustomizationResultAdapter(gcl GhClient, k GhFileSpec) (
|
||||
crawler.CrawledDocument, error) {
|
||||
|
||||
data, err := gcl.GetFileData(k)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
url := gcl.ReposRequest(k.Repository.FullName)
|
||||
defaultBranch, err := gcl.GetDefaultBranch(url)
|
||||
if err != nil {
|
||||
logger.Printf(
|
||||
"(error: %v) setting default_branch to master\n", err)
|
||||
defaultBranch = "master"
|
||||
}
|
||||
|
||||
d := doc.KustomizationDocument{
|
||||
Document: doc.Document{
|
||||
DocumentData: string(data),
|
||||
FilePath: k.Path,
|
||||
DefaultBranch: defaultBranch,
|
||||
RepositoryURL: k.Repository.URL,
|
||||
},
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// ForwardPaginatedQuery follows the links to the next pages and performs all of
|
||||
// the queries for a given search query, relaying the data from each request
|
||||
// back to an output channel.
|
||||
func (gcl GhClient) ForwardPaginatedQuery(ctx context.Context, query string,
|
||||
output chan<- GhResponseInfo) error {
|
||||
|
||||
logger.Println("querying: ", query)
|
||||
response := gcl.parseGithubResponse(query)
|
||||
|
||||
if response.Error != nil {
|
||||
return response.Error
|
||||
}
|
||||
|
||||
output <- response
|
||||
|
||||
for response.LastURL != "" && response.NextURL != "" {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
default:
|
||||
response = gcl.parseGithubResponse(response.NextURL)
|
||||
if response.Error != nil {
|
||||
return response.Error
|
||||
}
|
||||
|
||||
output <- response
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetFileData gets the bytes from a file.
|
||||
func (gcl GhClient) GetFileData(k GhFileSpec) ([]byte, error) {
|
||||
|
||||
url := gcl.ContentsRequest(k.Repository.FullName, k.Path)
|
||||
|
||||
resp, err := gcl.GetReposData(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%+v: could not get '%s' metadata: %v",
|
||||
k, url, err)
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%+v: could not read '%s' metadata: %v",
|
||||
k, url, err)
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
type githubContentRawURL struct {
|
||||
DownloadURL string `json:"download_url,omitempty"`
|
||||
}
|
||||
var rawURL githubContentRawURL
|
||||
err = json.Unmarshal(data, &rawURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"%+v: could not get 'download_url' from '%s' response: %v",
|
||||
k, data, err)
|
||||
}
|
||||
|
||||
resp, err = gcl.GetRawUserContent(rawURL.DownloadURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%+v: could not fetch file raw data '%s': %v",
|
||||
k, rawURL.DownloadURL, err)
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
data, err = ioutil.ReadAll(resp.Body)
|
||||
return data, err
|
||||
}
|
||||
|
||||
func (gcl GhClient) GetDefaultBranch(url string) (string, error) {
|
||||
resp, err := gcl.GetReposData(url)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf(
|
||||
"'%s' could not get default_branch: %v", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf(
|
||||
"could not read default_branch: %v", err)
|
||||
}
|
||||
|
||||
type defaultBranch struct {
|
||||
DefaultBranch string `json:"default_branch,omitempty"`
|
||||
}
|
||||
var branch defaultBranch
|
||||
err = json.Unmarshal(data, &branch)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf(
|
||||
"default_branch json malformed: %v", err)
|
||||
}
|
||||
|
||||
return branch.DefaultBranch, nil
|
||||
}
|
||||
|
||||
// GetFileCreationTime gets the earliest date of a file.
|
||||
func (gcl GhClient) GetFileCreationTime(
|
||||
k GhFileSpec) (time.Time, error) {
|
||||
|
||||
url := gcl.CommitsRequest(k.Repository.FullName, k.Path)
|
||||
|
||||
defaultTime := time.Now()
|
||||
|
||||
resp, err := gcl.GetReposData(url)
|
||||
if err != nil {
|
||||
return defaultTime, fmt.Errorf(
|
||||
"%+v: '%s' could not get metadata: %v", k, url, err)
|
||||
}
|
||||
|
||||
type DateSpec struct {
|
||||
Commit struct {
|
||||
Author struct {
|
||||
Date string `json:"date,omitempty"`
|
||||
} `json:"author,omitempty"`
|
||||
} `json:"commit,omitempty"`
|
||||
}
|
||||
|
||||
_, lastURL := parseGithubLinkFormat(resp.Header.Get("link"))
|
||||
if lastURL != "" {
|
||||
resp, err = gcl.GetReposData(lastURL)
|
||||
if err != nil {
|
||||
return defaultTime, fmt.Errorf(
|
||||
"%+v: '%s' could not get metadata: %v",
|
||||
k, lastURL, err)
|
||||
}
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return defaultTime, fmt.Errorf(
|
||||
"%+v: failed to read metadata: %v", k, err)
|
||||
}
|
||||
var earliestDate []DateSpec
|
||||
err = json.Unmarshal(data, &earliestDate)
|
||||
size := len(earliestDate)
|
||||
if err != nil || size == 0 {
|
||||
return defaultTime, fmt.Errorf(
|
||||
"%+v: server response '%s' not in expected format: %v",
|
||||
k, data, err)
|
||||
}
|
||||
|
||||
return time.Parse(time.RFC3339, earliestDate[size-1].Commit.Author.Date)
|
||||
}
|
||||
|
||||
// TODO(damienr74) change the tickers to actually check api rate limits, reset
|
||||
// times, and throttle requests dynamically based off of current utilization,
|
||||
// instead of hardcoding the documented values, these calls are not quota'd.
|
||||
// This is now especially important, since caching the API requests will reduce
|
||||
// API quota use (so we can actually make more requests in the allotted time
|
||||
// period).
|
||||
//
|
||||
// See https://developer.github.com/v3/rate_limit/ for details.
|
||||
var (
|
||||
searchRateTicker = time.NewTicker(time.Second * 2)
|
||||
contentRateTicker = time.NewTicker(time.Second * 1)
|
||||
)
|
||||
|
||||
func throttleSearchAPI() {
|
||||
<-searchRateTicker.C
|
||||
}
|
||||
|
||||
func throttleRepoAPI() {
|
||||
<-contentRateTicker.C
|
||||
}
|
||||
|
||||
type multiError []error
|
||||
|
||||
func (e multiError) Error() string {
|
||||
size := len(e) + 2
|
||||
strs := make([]string, size)
|
||||
strs[0] = "Errors ["
|
||||
for i, err := range e {
|
||||
strs[i+1] = "\t" + err.Error()
|
||||
}
|
||||
strs[size-1] = "]"
|
||||
return strings.Join(strs, "\n")
|
||||
}
|
||||
|
||||
type GhFileSpec struct {
|
||||
Path string `json:"path,omitempty"`
|
||||
Repository struct {
|
||||
API string `json:"url,omitempty"`
|
||||
URL string `json:"html_url,omitempty"`
|
||||
FullName string `json:"full_name,omitempty"`
|
||||
} `json:"repository,omitempty"`
|
||||
}
|
||||
|
||||
type githubResponse struct {
|
||||
// MaxUint is reserved as a sentinel value.
|
||||
// This is the number of files that match the query.
|
||||
TotalCount uint64 `json:"total_count,omitempty"`
|
||||
|
||||
// Github representation of a file.
|
||||
Items []GhFileSpec `json:"items,omitempty"`
|
||||
}
|
||||
|
||||
type GhResponseInfo struct {
|
||||
*http.Response
|
||||
Parsed *githubResponse
|
||||
Error error
|
||||
NextURL string
|
||||
LastURL string
|
||||
}
|
||||
|
||||
func parseGithubLinkFormat(links string) (string, string) {
|
||||
const (
|
||||
linkNext = "next"
|
||||
linkLast = "last"
|
||||
linkInfoURL = 1
|
||||
linkInfoRel = 2
|
||||
)
|
||||
|
||||
next, last := "", ""
|
||||
linkInfo := regexp.MustCompile(`<(.*)>.*; rel="(last|next)"`)
|
||||
|
||||
for _, link := range strings.Split(links, ",") {
|
||||
linkParse := linkInfo.FindStringSubmatch(link)
|
||||
if len(linkParse) != 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
url := linkParse[linkInfoURL]
|
||||
switch linkParse[linkInfoRel] {
|
||||
case linkNext:
|
||||
next = url
|
||||
case linkLast:
|
||||
last = url
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
return next, last
|
||||
}
|
||||
|
||||
func (gcl GhClient) parseGithubResponse(getRequest string) GhResponseInfo {
|
||||
resp, err := gcl.SearchGithubAPI(getRequest)
|
||||
requestInfo := GhResponseInfo{
|
||||
Response: resp,
|
||||
Error: err,
|
||||
Parsed: nil,
|
||||
}
|
||||
|
||||
if err != nil || resp == nil {
|
||||
return requestInfo
|
||||
}
|
||||
|
||||
var data []byte
|
||||
defer resp.Body.Close()
|
||||
data, requestInfo.Error = ioutil.ReadAll(resp.Body)
|
||||
if requestInfo.Error != nil {
|
||||
return requestInfo
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
logger.Println("query: ", getRequest)
|
||||
logger.Println("status not OK at the source")
|
||||
logger.Println("header dump", resp.Header)
|
||||
logger.Println("body dump", string(data))
|
||||
requestInfo.Error = fmt.Errorf("request rejected, status '%s'",
|
||||
resp.Status)
|
||||
return requestInfo
|
||||
}
|
||||
|
||||
requestInfo.NextURL, requestInfo.LastURL =
|
||||
parseGithubLinkFormat(resp.Header.Get("link"))
|
||||
|
||||
resultCount := githubResponse{
|
||||
TotalCount: math.MaxUint64,
|
||||
}
|
||||
requestInfo.Error = json.Unmarshal(data, &resultCount)
|
||||
if requestInfo.Error != nil {
|
||||
return requestInfo
|
||||
}
|
||||
|
||||
requestInfo.Parsed = &resultCount
|
||||
|
||||
return requestInfo
|
||||
|
||||
}
|
||||
|
||||
// SearchGithubAPI performs a search query and handles rate limitting for
|
||||
// the 'code/search?' endpoint as well as timed retries in the case of abuse
|
||||
// prevention.
|
||||
func (gcl GhClient) SearchGithubAPI(query string) (*http.Response, error) {
|
||||
throttleSearchAPI()
|
||||
return gcl.getWithRetry(query)
|
||||
}
|
||||
|
||||
// GetReposData performs a search query and handles rate limitting for
|
||||
// the '/repos' endpoint as well as timed retries in the case of abuse
|
||||
// prevention.
|
||||
func (gcl GhClient) GetReposData(query string) (*http.Response, error) {
|
||||
throttleRepoAPI()
|
||||
return gcl.getWithRetry(query)
|
||||
}
|
||||
|
||||
// User content (file contents) is not API rate limited, so there's no use in
|
||||
// throttling this call.
|
||||
func (gcl GhClient) GetRawUserContent(query string) (*http.Response, error) {
|
||||
return gcl.getWithRetry(query)
|
||||
}
|
||||
|
||||
func (gcl GhClient) getWithRetry(
|
||||
query string) (resp *http.Response, err error) {
|
||||
|
||||
resp, err = gcl.client.Get(query)
|
||||
retryCount := gcl.retryCount
|
||||
|
||||
for err == nil &&
|
||||
resp.StatusCode == http.StatusForbidden &&
|
||||
retryCount > 0 {
|
||||
|
||||
retryTime := resp.Header.Get("Retry-After")
|
||||
i, err := strconv.Atoi(retryTime)
|
||||
if err != nil {
|
||||
return resp, fmt.Errorf(
|
||||
"query '%s' forbidden without 'Retry-After'", query)
|
||||
}
|
||||
logger.Printf(
|
||||
"status forbidden, retring %d more times\n", retryCount)
|
||||
|
||||
logger.Printf("waiting %d seconds before retrying\n", i)
|
||||
time.Sleep(time.Second * time.Duration(i))
|
||||
retryCount--
|
||||
resp, err = gcl.client.Get(query)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return resp, fmt.Errorf("query '%s' could not be processed, %v",
|
||||
query, err)
|
||||
}
|
||||
|
||||
return resp, err
|
||||
}
|
||||
222
hack/crawl/crawler/github/queries.go
Normal file
222
hack/crawl/crawler/github/queries.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
perPageArg = "per_page"
|
||||
accessTokenArg = "access_token"
|
||||
)
|
||||
|
||||
// Implementation detail, not important to external API.
|
||||
type queryField struct {
|
||||
name string
|
||||
value interface{}
|
||||
}
|
||||
|
||||
// Formats a query field.
|
||||
func (qf queryField) String() string {
|
||||
var value string
|
||||
switch v := qf.value.(type) {
|
||||
case string:
|
||||
value = v
|
||||
case rangeFormatter:
|
||||
value = v.RangeString()
|
||||
default:
|
||||
value = fmt.Sprint(v)
|
||||
}
|
||||
|
||||
if qf.name == "" {
|
||||
return value
|
||||
}
|
||||
return fmt.Sprint(qf.name, ":", value)
|
||||
}
|
||||
|
||||
// Example of formating a query:
|
||||
// QueryWith(
|
||||
// Filename("kustomization.yaml"),
|
||||
// Filesize(RangeWithin{64, 192}),
|
||||
// Keyword("copyright"),
|
||||
// Keyword("2019"),
|
||||
// ).String()
|
||||
//
|
||||
// Outputs "q=filename:kustomization.yaml+size:64..192+copyright+2018" which
|
||||
// would search for files that have [64, 192] bytes (inclusive range) and that
|
||||
// contain the keywords 'copyright' and '2019' somewhere in the file.
|
||||
type Query []queryField
|
||||
|
||||
func QueryWith(qfs ...queryField) Query {
|
||||
return qfs
|
||||
}
|
||||
|
||||
func (q Query) String() string {
|
||||
strs := make([]string, 0, len(q))
|
||||
for _, elem := range q {
|
||||
str := elem.String()
|
||||
if str == "" {
|
||||
continue
|
||||
}
|
||||
strs = append(strs, str)
|
||||
}
|
||||
|
||||
query := strings.Join(strs, "+")
|
||||
if query == "" {
|
||||
return query
|
||||
}
|
||||
return "q=" + query
|
||||
}
|
||||
|
||||
// Keyword takes a single word, and formats it according to the Github API.
|
||||
func Keyword(k string) queryField {
|
||||
return queryField{value: k}
|
||||
}
|
||||
|
||||
// Filesize takes a rangeFormatter and formats it according to the Github API.
|
||||
func Filesize(r rangeFormatter) queryField {
|
||||
return queryField{name: "size", value: r}
|
||||
}
|
||||
|
||||
// Filename takes a filename and formats it according to the Github API.
|
||||
func Filename(f string) queryField {
|
||||
return queryField{name: "filename", value: f}
|
||||
}
|
||||
|
||||
// Path takes a filepath and formats it according to the Github API.
|
||||
func Path(p string) queryField {
|
||||
return queryField{name: "path", value: p}
|
||||
}
|
||||
|
||||
// RequestConfig stores common variables that must be present for the queries.
|
||||
// - CodeSearchRequests: ask Github to check the code indices given a query.
|
||||
// - ContentsRequests: ask Github where to download a resource given a repo and a
|
||||
// file path.
|
||||
// - CommitsRequests: asks Github to list commits made one a file. Useful to
|
||||
// determine the date of a file.
|
||||
type RequestConfig struct {
|
||||
perPage uint64
|
||||
accessToken string
|
||||
}
|
||||
|
||||
func NewRequestConfig(perPage uint64, accessToken string) RequestConfig {
|
||||
return RequestConfig{
|
||||
perPage: perPage,
|
||||
accessToken: accessToken,
|
||||
}
|
||||
}
|
||||
|
||||
// CodeSearchRequestWith given a list of query parameters that specify the
|
||||
// (patial) query, returns a request object with the (parital) query. Must call
|
||||
// the URL method to get the string value of the URL. See request.CopyWith, to
|
||||
// understand why the request object is useful.
|
||||
func (rc RequestConfig) CodeSearchRequestWith(query Query) request {
|
||||
req := rc.makeRequest("search/code", query)
|
||||
req.vals.Set("sort", "indexed")
|
||||
req.vals.Set("order", "desc")
|
||||
return req
|
||||
}
|
||||
|
||||
// ContentsRequest given the repo name, and the filepath returns a formatted
|
||||
// query for the Github API to find the dowload information of this filepath.
|
||||
func (rc RequestConfig) ContentsRequest(fullRepoName, path string) string {
|
||||
uri := fmt.Sprintf("repos/%s/contents/%s", fullRepoName, path)
|
||||
return rc.makeRequest(uri, Query{}).URL()
|
||||
}
|
||||
|
||||
func (rc RequestConfig) ReposRequest(fullRepoName string) string {
|
||||
uri := fmt.Sprintf("repos/%s", fullRepoName)
|
||||
return rc.makeRequest(uri, Query{}).URL()
|
||||
}
|
||||
|
||||
// CommitsRequest given the repo name, and a filepath returns a formatted query
|
||||
// for the Github API to find the commits that affect this file.
|
||||
func (rc RequestConfig) CommitsRequest(fullRepoName, path string) string {
|
||||
uri := fmt.Sprintf("repos/%s/commits", fullRepoName)
|
||||
return rc.makeRequest(uri, Query{Path(path)}).URL()
|
||||
}
|
||||
|
||||
func (rc RequestConfig) makeRequest(path string, query Query) request {
|
||||
vals := url.Values{}
|
||||
if rc.accessToken != "" {
|
||||
vals.Set(accessTokenArg, rc.accessToken)
|
||||
}
|
||||
vals.Set(perPageArg, fmt.Sprint(rc.perPage))
|
||||
|
||||
return request{
|
||||
url: url.URL{
|
||||
Scheme: "https",
|
||||
Host: "api.github.com",
|
||||
Path: path,
|
||||
},
|
||||
vals: vals,
|
||||
query: query,
|
||||
}
|
||||
}
|
||||
|
||||
type request struct {
|
||||
url url.URL
|
||||
vals url.Values
|
||||
query Query
|
||||
}
|
||||
|
||||
// CopyWith copies the requests and adds the extra query parameters. Usefull
|
||||
// for dynamically adding sizes to a filename only query without modifying it.
|
||||
func (r request) CopyWith(queryParams ...queryField) request {
|
||||
cpy := r
|
||||
cpy.query = append(cpy.query, queryParams...)
|
||||
return cpy
|
||||
}
|
||||
|
||||
// URL encodes the variables and the URL representation into a string.
|
||||
func (r request) URL() string {
|
||||
// Github does not handle URL encoding properly in its API for the
|
||||
// q='...', so the query parameter is added without any encoding
|
||||
// manually.
|
||||
encoded := r.vals.Encode()
|
||||
query := r.query.String()
|
||||
sep := "&"
|
||||
if query == "" {
|
||||
sep = ""
|
||||
}
|
||||
if encoded == "" && query != "" {
|
||||
sep = "?"
|
||||
}
|
||||
r.url.RawQuery = encoded + sep + query
|
||||
return r.url.String()
|
||||
}
|
||||
|
||||
// Allows to define a range of numbers and print it in the github range
|
||||
// query format https://help.github.com/en/articles/understanding-the-search-syntax.
|
||||
type rangeFormatter interface {
|
||||
RangeString() string
|
||||
}
|
||||
|
||||
// RangeLessThan is a range of values strictly less than (<) size.
|
||||
type RangeLessThan struct {
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (r RangeLessThan) RangeString() string {
|
||||
return fmt.Sprintf("<%d", r.size)
|
||||
}
|
||||
|
||||
// RangeLessThan is a range of values strictly greater than (>) size.
|
||||
type RangeGreaterThan struct {
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (r RangeGreaterThan) RangeString() string {
|
||||
return fmt.Sprintf(">%d", r.size)
|
||||
}
|
||||
|
||||
// RangeWithin is an inclusive range from start to end.
|
||||
type RangeWithin struct {
|
||||
start uint64
|
||||
end uint64
|
||||
}
|
||||
|
||||
func (r RangeWithin) RangeString() string {
|
||||
return fmt.Sprintf("%d..%d", r.start, r.end)
|
||||
}
|
||||
119
hack/crawl/crawler/github/queries_test.go
Normal file
119
hack/crawl/crawler/github/queries_test.go
Normal file
@@ -0,0 +1,119 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestQueryFields(t *testing.T) {
|
||||
testCases := []struct {
|
||||
formatter queryField
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
formatter: Keyword("keyword"),
|
||||
expected: "keyword",
|
||||
},
|
||||
{
|
||||
formatter: Filesize(RangeLessThan{23}),
|
||||
expected: "size:<23",
|
||||
},
|
||||
{
|
||||
formatter: Filesize(RangeWithin{24, 64}),
|
||||
expected: "size:24..64",
|
||||
},
|
||||
{
|
||||
formatter: Filesize(RangeGreaterThan{64}),
|
||||
expected: "size:>64",
|
||||
},
|
||||
{
|
||||
formatter: Path("some/path/to/file"),
|
||||
expected: "path:some/path/to/file",
|
||||
},
|
||||
{
|
||||
formatter: Filename("kustomization.yaml"),
|
||||
expected: "filename:kustomization.yaml",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
if result := test.formatter.String(); result != test.expected {
|
||||
t.Errorf("got (%#v = %s), expected %s", test.formatter, result, test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryType(t *testing.T) {
|
||||
testCases := []struct {
|
||||
query Query
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
query: QueryWith(
|
||||
Filesize(RangeWithin{24, 64}),
|
||||
Filename("kustomization.yaml"),
|
||||
Keyword("keyword1"),
|
||||
Keyword("keyword2"),
|
||||
),
|
||||
expected: "q=size:24..64+filename:kustomization.yaml+keyword1+keyword2",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
if queryStr := test.query.String(); queryStr != test.expected {
|
||||
t.Errorf("got (%#v = %s), expected %s", test.query, queryStr, test.expected)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestGithubSearchQuery(t *testing.T) {
|
||||
const (
|
||||
accessToken = "random_token"
|
||||
perPage = 100
|
||||
)
|
||||
|
||||
testCases := []struct {
|
||||
rc RequestConfig
|
||||
codeQuery Query
|
||||
fullRepoName string
|
||||
path string
|
||||
expectedCodeQuery string
|
||||
expectedContentsQuery string
|
||||
expectedCommitsQuery string
|
||||
}{
|
||||
{
|
||||
rc: RequestConfig{
|
||||
perPage: perPage,
|
||||
accessToken: accessToken,
|
||||
},
|
||||
codeQuery: Query{
|
||||
Filename("kustomization.yaml"),
|
||||
Filesize(RangeWithin{64, 128}),
|
||||
},
|
||||
fullRepoName: "kubernetes-sigs/kustomize",
|
||||
path: "examples/helloWorld/kustomization.yaml",
|
||||
|
||||
expectedCodeQuery: "https://api.github.com/search/code?" +
|
||||
"access_token=random_token&order=desc&per_page=100&sort=indexed&q=filename:kustomization.yaml+size:64..128",
|
||||
|
||||
expectedContentsQuery: "https://api.github.com/repos/kubernetes-sigs/kustomize/contents/" +
|
||||
"examples/helloWorld/kustomization.yaml?access_token=random_token&per_page=100",
|
||||
|
||||
expectedCommitsQuery: "https://api.github.com/repos/kubernetes-sigs/kustomize/commits?" +
|
||||
"access_token=random_token&per_page=100&q=path:examples/helloWorld/kustomization.yaml",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
if result := test.rc.CodeSearchRequestWith(test.codeQuery).URL(); result != test.expectedCodeQuery {
|
||||
t.Errorf("Got code query: %s, expected %s", result, test.expectedCodeQuery)
|
||||
}
|
||||
|
||||
if result := test.rc.ContentsRequest(test.fullRepoName, test.path); result != test.expectedContentsQuery {
|
||||
t.Errorf("Got contents query: %s, expected %s", result, test.expectedContentsQuery)
|
||||
}
|
||||
if result := test.rc.CommitsRequest(test.fullRepoName, test.path); result != test.expectedCommitsQuery {
|
||||
t.Errorf("Got commits query: %s, expected %s", result, test.expectedCommitsQuery)
|
||||
}
|
||||
}
|
||||
}
|
||||
378
hack/crawl/crawler/github/split_search_ranges.go
Normal file
378
hack/crawl/crawler/github/split_search_ranges.go
Normal file
@@ -0,0 +1,378 @@
|
||||
package github
|
||||
|
||||
// GitHub only returns at most 1000 results per search query,
|
||||
// this is problematic if you want to retrieve all the results for a given
|
||||
// search query. However, GitHub allows you to specify as much as you want per
|
||||
// query to make things more specific. Specifically for files, GitHub allows
|
||||
// you to specify their sizes with range queries. This is very convenient
|
||||
// since it allows us to split the search into disjoint sets/shards of results
|
||||
// from the different file size ranges.
|
||||
//
|
||||
// Some important factors to consider:
|
||||
//
|
||||
// - These queries are rate limited by the API to roughly once query every two
|
||||
// seconds.
|
||||
//
|
||||
// - The search space for file sizes is in bytes, from 0B to < 512KiB (this is
|
||||
// a huge search space that cannot be probed linearly in a timely manner if
|
||||
// granularity is to be expected).
|
||||
//
|
||||
// - If you have K files there will likely be ~K/1000 sets that you have find
|
||||
// from this search space in order to get all of the results.
|
||||
//
|
||||
// - If you have O(K) sets it is unlikely that they are all of the same size,
|
||||
// since (most files are power law distributed). That means that the range
|
||||
// might be significantly smaller for 1000 small files, than it is for
|
||||
// 1000 large files.
|
||||
//
|
||||
// - This method is a best effort approach. There are some limitations to what
|
||||
// it can and can't do, so please note the following:
|
||||
//
|
||||
// + There may very well be a filesize that has more than 1000 results.
|
||||
// this method cannot help in this case. However, requerying over time
|
||||
// (days/weeks/months) while sorting by last indexed values may be
|
||||
// sufficient to eventually get all of the results.
|
||||
//
|
||||
// + It's possible that the github API returns inconsistent counts. This
|
||||
// is problematic in most cases, since it can cause many issues if the
|
||||
// case is not handled properly. For instance, if you requested the
|
||||
// number of files of an interval from size:0..64 and get that there
|
||||
// are 900 results, you may query at size:0..96 and get that there
|
||||
// are 800 results. To guarantee that this approach completes and does
|
||||
// not get into a query loop over the same intervals, it will retry a few
|
||||
// times and take the largest of the results or the largest previously
|
||||
// queried value from another range (in this case, the implementation
|
||||
// could decide that size:0..96 must have 900) results. This makes the
|
||||
// approach best effort even if there are no single file sizes of over
|
||||
// 1000 results.
|
||||
//
|
||||
//
|
||||
// The approach that was taken to solve this problem is the following:
|
||||
//
|
||||
// 1. Determine the total number of results by querying from the lower bound
|
||||
// to the upper bound (size:0..max). If there are less than 1000 files,
|
||||
// return a single range of values (size:0..max) since all results can be
|
||||
// retrieved.
|
||||
//
|
||||
// 2. Otherwise, set a target number of files to be 1000.
|
||||
//
|
||||
// 3. Binary search for the range from 0..r that provides a file count that is
|
||||
// less than or equal to the target. Once this value is found, store the
|
||||
// upper bound of range (r). If r is the same as the previous value, (or 0)
|
||||
// increase r by one (this guarantees progress, but will miss out on some
|
||||
// results).
|
||||
//
|
||||
// 4. Increase the target by 1000.
|
||||
//
|
||||
// 5. Repeat steps 3 and 4 until the target is at or exceeds the total number
|
||||
// of files.
|
||||
//
|
||||
//
|
||||
// In general there are other ways to get all of the files from GitHub. In
|
||||
// some cases it would be sufficient to just get the files that are being
|
||||
// updated/indexed by github periodically to update the corpus, so this
|
||||
// complicated approach does not have to be run every time. However, for
|
||||
// some searches, there may be too many results on a time interval to do
|
||||
// this simple update search limited to only 1000 results.
|
||||
//
|
||||
// There is also a more sophisticated approach that may yield better
|
||||
// performance:
|
||||
// - Perform this search once and create a prior distribution of file sizes.
|
||||
// Each time you want to retrieve the results of the query, scale the
|
||||
// prior of expected ranges to the current number of files. From each
|
||||
// expected range of 1000 files, perform a exponential search to find the
|
||||
// lower bound of the range. This would likely reduce the total number
|
||||
// of queries by a significant amount since it would only have to search
|
||||
// for a small set of values around each likely range boundary.
|
||||
//
|
||||
// However, actually retrieving the files will be the bottleneck operation
|
||||
// since the number of queries to find the ranges will be close to:
|
||||
// log2(maxFileSize) * totalResults / 1000 ~= totalResults / 50
|
||||
// whereas the number of queries to actually get all of the search results
|
||||
// are close to:
|
||||
// apiCallsPerResult * 10(pages) * 100(resultsPerPage) * totalResults / 1000
|
||||
// = apiCallsPerResult * totalResults.
|
||||
//
|
||||
// So it could very well take apiCallsPerResult * 50 times longer to acutally
|
||||
// fetch the results (assuming the quotas for the API calls are the same as the
|
||||
// search API), than it does to perform these range searches.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
// Files cannot be more than 2^19 bytes, according to
|
||||
// https://help.github.com/en/articles/searching-code#considerations-for-code-search
|
||||
const (
|
||||
githubMaxFileSize = uint64(1 << 19)
|
||||
githubMaxResultsPerQuery = uint64(1000)
|
||||
)
|
||||
|
||||
// Interface instead of struct for testing purposes.
|
||||
// Not expecting to have multiple implementations.
|
||||
type cachedSearch interface {
|
||||
CountResults(uint64) (uint64, error)
|
||||
RequestString(filesize rangeFormatter) string
|
||||
}
|
||||
|
||||
// cachedSearch is a simple data structure that maps the upper bound (r) of a
|
||||
// range from 0 to r to the number of files that have between 0 and r files
|
||||
// (inclusive). It also guarantees that the counts are monotonically increasing
|
||||
// (not strict) as the value for r increases, by looking at the maximal
|
||||
// previous file count for the value that precedes r in the cache.
|
||||
//
|
||||
// It uses a bit trick to be more efficient in detecting
|
||||
// inconsistencies in the returned data from the Github API.
|
||||
// Therefore, the cache expects a search to always start at 0, and
|
||||
// it expects the max file size to be a power of 2. If this is to be changed
|
||||
// there are a few considerations to keep in mind:
|
||||
//
|
||||
// 1. The cache is only efficient if the queries can be reused, so if
|
||||
// the first chunk of files lives in the range 0..x, continuing the
|
||||
// search for the next chunk from x+1..max (while asymptotically sane)
|
||||
// may actually be less efficient since the cache is essentially reset
|
||||
// at every interval. This leads to a larger number of requests in
|
||||
// practice, and requests are what's expensive (rate limits).
|
||||
//
|
||||
// 2. The github API is not perfectly monotonic.. (this is somewhat
|
||||
// problematic). The current cache implementation looks at the
|
||||
// predecessor entry to find out if the current value is monotonic.
|
||||
// This is where the bit trick is used, since each step in the binary
|
||||
// search is adding or ommiting to add a decreasing power of 2 to the query
|
||||
// value, we can remove the least significant set bit to find the
|
||||
// predecessor in constant time. Ultimately since the search is rate
|
||||
// limited, we could also easily afford to compute this in linear time
|
||||
// by iterating over cached values. So this trick is not crucial to the
|
||||
// cache's performance.
|
||||
type githubCachedSearch struct {
|
||||
cache map[uint64]uint64
|
||||
gcl GhClient
|
||||
baseRequest request
|
||||
}
|
||||
|
||||
func newCache(client GhClient, query Query) githubCachedSearch {
|
||||
return githubCachedSearch{
|
||||
cache: map[uint64]uint64{
|
||||
0: 0,
|
||||
},
|
||||
gcl: client,
|
||||
baseRequest: client.CodeSearchRequestWith(query),
|
||||
}
|
||||
}
|
||||
|
||||
func (c githubCachedSearch) CountResults(upperBound uint64) (uint64, error) {
|
||||
count, cached := c.cache[upperBound]
|
||||
if cached {
|
||||
return count, nil
|
||||
}
|
||||
|
||||
sizeRange := RangeWithin{0, upperBound}
|
||||
rangeRequest := c.RequestString(sizeRange)
|
||||
|
||||
result := c.gcl.parseGithubResponse(rangeRequest)
|
||||
if result.Error != nil {
|
||||
return count, result.Error
|
||||
}
|
||||
|
||||
// As range search uses powers of 2 for binary search, the previously
|
||||
// cached value is easy to find by removing the least significant set
|
||||
// bit from the current upperBound, since each step of the search adds
|
||||
// least significant set bit.
|
||||
//
|
||||
// Finding the predecessor could also be implemented by iterating over
|
||||
// the map to find the largest key that is smaller than upperBound if
|
||||
// this approach deemed too complex.
|
||||
trail := bits.TrailingZeros64(upperBound)
|
||||
prev := uint64(0)
|
||||
if trail != 64 {
|
||||
prev = upperBound - (1 << uint64(trail))
|
||||
}
|
||||
|
||||
// Sometimes the github API is not monotonically increasing, or ouputs
|
||||
// an erroneous value of 0, or 1. This logic makes sure that it was not
|
||||
// erroneous, and that the sequence continues to be monotonic by setting
|
||||
// the current query count to match the previous value. which at least
|
||||
// guarantees that the range search terminates.
|
||||
//
|
||||
// On the other hand, if files are added, then we way loose out on some
|
||||
// files in a reviously completed range, but these files should be there
|
||||
// the next time the crawler runs, so this is not really problematic.
|
||||
retryMonotonicCount := 4
|
||||
for result.Parsed.TotalCount < c.cache[prev] {
|
||||
logger.Printf(
|
||||
"Retrying query... current lower bound: %d, got: %d\n",
|
||||
c.cache[prev], result.Parsed.TotalCount)
|
||||
|
||||
result = c.gcl.parseGithubResponse(rangeRequest)
|
||||
if result.Error != nil {
|
||||
return count, result.Error
|
||||
}
|
||||
|
||||
retryMonotonicCount--
|
||||
if retryMonotonicCount <= 0 {
|
||||
result.Parsed.TotalCount = c.cache[prev]
|
||||
logger.Println(
|
||||
"Retries for monotonic check exceeded,",
|
||||
" setting value to match predecessor")
|
||||
}
|
||||
}
|
||||
|
||||
count = result.Parsed.TotalCount
|
||||
logger.Printf("Caching new query %s, with count %d\n",
|
||||
sizeRange.RangeString(), count)
|
||||
c.cache[upperBound] = count
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (c githubCachedSearch) RequestString(filesize rangeFormatter) string {
|
||||
return c.baseRequest.CopyWith(Filesize(filesize)).URL()
|
||||
}
|
||||
|
||||
// Outputs a (possibly incomplete) list of ranges to query to find most search
|
||||
// results as permissible by the search github search API. Github search only
|
||||
// allows 1,000 results per query (paginated).
|
||||
// Source: https://developer.github.com/v3/search/
|
||||
//
|
||||
// This leaves the possibility of having file sizes with more than 1000 results,
|
||||
// This would mean that the search as it is could not find all files. If queries
|
||||
// are sorted by last indexed, and retrieved on regular intervals, it should be
|
||||
// sufficient to get most if not all documents.
|
||||
func FindRangesForRepoSearch(cache cachedSearch) ([]string, error) {
|
||||
totalFiles, err := cache.CountResults(githubMaxFileSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
logger.Println("total files: ", totalFiles)
|
||||
|
||||
if githubMaxResultsPerQuery >= totalFiles {
|
||||
return []string{
|
||||
cache.RequestString(RangeWithin{0, githubMaxFileSize}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Find all the ranges of file sizes such that all files are queryable
|
||||
// using the Github API. This does not compute an optimal ranges, since
|
||||
// the number of queries needed to get the information required to
|
||||
// compute an optimal range is expected to be much larger than the
|
||||
// number of queries performed this way.
|
||||
//
|
||||
// The number of ranges is k = (number of files)/1000, and finding a
|
||||
// range is logarithmic in the max file size (n = filesize). This means
|
||||
// that preprocessing takes O(k * lg n) queries to find the ranges with
|
||||
// a binary search over file sizes.
|
||||
//
|
||||
// My intuition is that this approach is competitive to a perfectly
|
||||
// optimal solution, but I didn't actually take the time to do a
|
||||
// rigorous proof. Intuitively, since files sizes are typically power
|
||||
// law distibuted the binary search will be very skewed towards the
|
||||
// smaller file ranges. This means that in practice this approach will
|
||||
// make fewer than (#files/1000)*(log(n) = 19) queries for
|
||||
// preprocessing, since it reuses a lot of the queries in the denser
|
||||
// ranges. Furthermore, because of the distribution, it should be very
|
||||
// easy to find ranges that are very close to the upper bound, up to
|
||||
// the limiting factor of having no more than 1000 files accessible per
|
||||
// range.
|
||||
filesAccessible := uint64(0)
|
||||
sizes := make([]uint64, 0)
|
||||
for filesAccessible < totalFiles {
|
||||
target := filesAccessible + githubMaxResultsPerQuery
|
||||
if target >= totalFiles {
|
||||
break
|
||||
}
|
||||
|
||||
logger.Printf("%d accessible files, next target = %d\n",
|
||||
filesAccessible, target)
|
||||
|
||||
cur, err := lowerBoundFileCount(cache, target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there are more than 1000 files in the next bucket, we must
|
||||
// advance anyway and lose out on some files :(.
|
||||
if l := len(sizes); l > 0 && sizes[l-1] == cur {
|
||||
cur++
|
||||
}
|
||||
|
||||
nextAccessible, err := cache.CountResults(cur)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"cache should be populated at %d already, got %v",
|
||||
cur, err)
|
||||
}
|
||||
if nextAccessible < filesAccessible {
|
||||
return nil, fmt.Errorf(
|
||||
"number of results dropped from %d to %d within range search",
|
||||
filesAccessible, nextAccessible)
|
||||
}
|
||||
|
||||
filesAccessible = nextAccessible
|
||||
if nextAccessible < totalFiles {
|
||||
sizes = append(sizes, cur)
|
||||
}
|
||||
}
|
||||
|
||||
return formatFilesizeRanges(cache, sizes), nil
|
||||
}
|
||||
|
||||
// lowerBoundFileCount finds the filesize range from [0, return value] that has
|
||||
// the largest file count that is smaller than or equal to
|
||||
// githubMaxResultsPerQuery. It is important to note that this returned value
|
||||
// could already be in a previous range if the next file size has more than 1000
|
||||
// results. It is left to the caller to handle this bit of logic and guarantee
|
||||
// forward progession in this case.
|
||||
func lowerBoundFileCount(
|
||||
cache cachedSearch, targetFileCount uint64) (uint64, error) {
|
||||
|
||||
// Binary search for file sizes that make up the next <=1000 element
|
||||
// chunk.
|
||||
cur := uint64(0)
|
||||
increase := githubMaxFileSize / 2
|
||||
|
||||
for increase > 0 {
|
||||
mid := cur + increase
|
||||
|
||||
count, err := cache.CountResults(mid)
|
||||
if err != nil {
|
||||
return count, err
|
||||
}
|
||||
|
||||
if count <= targetFileCount {
|
||||
cur = mid
|
||||
}
|
||||
|
||||
if count == targetFileCount {
|
||||
break
|
||||
}
|
||||
|
||||
increase /= 2
|
||||
}
|
||||
|
||||
return cur, nil
|
||||
}
|
||||
|
||||
func formatFilesizeRanges(cache cachedSearch, sizes []uint64) []string {
|
||||
ranges := make([]string, 0, len(sizes)+1)
|
||||
|
||||
if len(sizes) > 0 {
|
||||
ranges = append(ranges, cache.RequestString(
|
||||
RangeLessThan{sizes[0] + 1},
|
||||
))
|
||||
}
|
||||
|
||||
for i := 0; i < len(sizes)-1; i += 1 {
|
||||
ranges = append(ranges, cache.RequestString(
|
||||
RangeWithin{sizes[i] + 1, sizes[i+1]},
|
||||
))
|
||||
|
||||
if i != len(sizes)-2 {
|
||||
continue
|
||||
}
|
||||
ranges = append(ranges, cache.RequestString(
|
||||
RangeGreaterThan{sizes[i+1]},
|
||||
))
|
||||
}
|
||||
|
||||
return ranges
|
||||
}
|
||||
90
hack/crawl/crawler/github/split_search_ranges_test.go
Normal file
90
hack/crawl/crawler/github/split_search_ranges_test.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package github
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type testCachedSearch struct {
|
||||
cache map[uint64]uint64
|
||||
}
|
||||
|
||||
func (c testCachedSearch) CountResults(upperBound uint64) (uint64, error) {
|
||||
fmt.Printf("CountResults(%05x)\n", upperBound)
|
||||
count, ok := c.cache[upperBound]
|
||||
if !ok {
|
||||
return count, fmt.Errorf("cache not set at %x", upperBound)
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (c testCachedSearch) RequestString(filesize rangeFormatter) string {
|
||||
return filesize.RangeString()
|
||||
}
|
||||
|
||||
// TODO(damienr74) make tests easier to write.. I'm thinking I can make the test
|
||||
// cache take in a list of (filesize, count) pairs and it can populate the cache
|
||||
// without relying on how the implementation will create queries. This was only
|
||||
// a quick and dirty test to make sure that modifications are not going to break
|
||||
// the functionality.
|
||||
func TestRangeSplitting(t *testing.T) {
|
||||
// Keys follow the binary search depending on whether or not the range
|
||||
// is too small/large to find close to optimal filesize ranges. This
|
||||
// test is heavily tied to the fact that the search is using powers of two
|
||||
// to make progress in the search (hence the use of hexadecimal values).
|
||||
cache := testCachedSearch{
|
||||
map[uint64]uint64{
|
||||
0x80000: 5000,
|
||||
0x40000: 5000,
|
||||
0x20000: 5000,
|
||||
0x10000: 5000,
|
||||
0x08000: 5000,
|
||||
0x04000: 5000,
|
||||
0x02000: 5000,
|
||||
0x01000: 5000,
|
||||
0x00fff: 3950,
|
||||
0x00ffe: 3950,
|
||||
0x00ffc: 3950,
|
||||
0x00ff8: 3950,
|
||||
0x00ff0: 3950,
|
||||
0x00fe0: 3950,
|
||||
0x00fc0: 3950,
|
||||
0x00f80: 3950,
|
||||
0x00f00: 3950,
|
||||
0x00e00: 3950,
|
||||
0x00c00: 3950,
|
||||
0x00800: 3950,
|
||||
0x00400: 3950,
|
||||
0x00200: 3688,
|
||||
0x00180: 3028,
|
||||
0x00100: 2999,
|
||||
0x000c0: 2448,
|
||||
0x00080: 1999,
|
||||
0x00070: 1600,
|
||||
0x0006c: 1003,
|
||||
0x0006b: 1001,
|
||||
0x0006a: 999,
|
||||
0x00068: 999,
|
||||
0x00060: 999,
|
||||
0x00040: 999,
|
||||
0x00000: 0,
|
||||
},
|
||||
}
|
||||
|
||||
requests, err := FindRangesForRepoSearch(cache)
|
||||
if err != nil {
|
||||
t.Errorf("Error while finding ranges: %v", err)
|
||||
}
|
||||
expected := []string{
|
||||
"<107", // cache.RequestString(RangeLessThan{0x6b}),
|
||||
"107..128", // cache.RequestString(RangeWithin{0x6b, 0x80}),
|
||||
"129..256", // cache.RequestString(RangeWithin{0x81, 0x100}),
|
||||
"257..4095", // cache.RequestString(RangeWithin{0x101, 0xfff}),
|
||||
">4095", // cache.RequestString(RangeGreaterThan{0xfff}),
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(requests, expected) {
|
||||
t.Errorf("Expected requests (%v) to equal (%v)", requests, expected)
|
||||
}
|
||||
}
|
||||
201
hack/crawl/doc/doc.go
Normal file
201
hack/crawl/doc/doc.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package doc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/k8sdeps/kunstruct"
|
||||
"sigs.k8s.io/kustomize/api/pgmconfig"
|
||||
"sigs.k8s.io/kustomize/api/types"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
var fileReader = kunstruct.NewKunstructuredFactoryImpl()
|
||||
|
||||
// This document is meant to be used at the elasticsearch document type.
|
||||
// Fields are serialized as-is to elasticsearch, where indices are built
|
||||
// to facilitate text search queries. Identifiers, Values, FilePath,
|
||||
// RepositoryURL and DocumentData are meant to be searched for text queries
|
||||
// directly, while the other fields can either be used as a filter, or as
|
||||
// additional metadata displayed in the UI.
|
||||
//
|
||||
// The fields of the document and their purpose are listed below:
|
||||
// - DocumentData contains the contents of the kustomization file.
|
||||
// - Kinds Represents the kubernetes Kinds that are in this file.
|
||||
// - Identifiers are a list of (partial and full) identifier paths that can be
|
||||
// found by users. Each part of a path is delimited by ":" e.g. spec:replicas.
|
||||
// - Values are a list of identifier paths and their values that can be found by
|
||||
// search queries. The path is delimited by ":" and the value follows the "="
|
||||
// symbol e.g. spec:replicas=4.
|
||||
// - FilePath is the path of the file.
|
||||
// - RepositoryURL is the URL of the source repository.
|
||||
// - CreationTime is the time at which the file was created.
|
||||
//
|
||||
// Representing each Identifier and Value as a flat string representation
|
||||
// facilitates the use of complex text search features from elasticsearch such
|
||||
// as fuzzy searching, regex, wildcards, etc.
|
||||
type KustomizationDocument struct {
|
||||
Document
|
||||
Kinds []string `json:"kinds,omitempty"`
|
||||
Identifiers []string `json:"identifiers,omitempty"`
|
||||
Values []string `json:"values,omitempty"`
|
||||
}
|
||||
|
||||
type set map[string]struct{}
|
||||
|
||||
// Implements the CrawlerDocument interface.
|
||||
func (doc *KustomizationDocument) GetResources() ([]*Document, error) {
|
||||
isResource := true
|
||||
for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() {
|
||||
if strings.HasSuffix(doc.FilePath, "/"+suffix) {
|
||||
isResource = false
|
||||
}
|
||||
}
|
||||
if isResource {
|
||||
return []*Document{}, nil
|
||||
}
|
||||
|
||||
content := []byte(doc.DocumentData)
|
||||
content, err := FixKustomizationPreUnmarshallingNonFatal(content)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not fix kustomize file: %v", err)
|
||||
}
|
||||
|
||||
var k types.Kustomization
|
||||
err = yaml.Unmarshal(content, &k)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"could not parse kustomization: %v", err)
|
||||
}
|
||||
k.FixKustomizationPostUnmarshalling()
|
||||
|
||||
res := make([]*Document, 0, len(k.Resources))
|
||||
for _, r := range k.Resources {
|
||||
next, err := doc.Document.FromRelativePath(r)
|
||||
if err != nil {
|
||||
fmt.Printf("GetResources error: %v\n", err)
|
||||
continue
|
||||
}
|
||||
res = append(res, &next)
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (doc *KustomizationDocument) readBytes() ([]map[string]interface{}, error) {
|
||||
data := []byte(doc.DocumentData)
|
||||
|
||||
for _, suffix := range pgmconfig.RecognizedKustomizationFileNames() {
|
||||
if !strings.HasSuffix(doc.FilePath, "/"+suffix) {
|
||||
continue
|
||||
}
|
||||
var config map[string]interface{}
|
||||
err := yaml.Unmarshal(data, &config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"unable to parse kustomization: %v", err)
|
||||
}
|
||||
return []map[string]interface{}{config}, nil
|
||||
}
|
||||
|
||||
configs := make([]map[string]interface{}, 0)
|
||||
ks, err := fileReader.SliceFromBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse resource: %v", err)
|
||||
}
|
||||
for _, k := range ks {
|
||||
configs = append(configs, k.Map())
|
||||
}
|
||||
|
||||
return configs, nil
|
||||
}
|
||||
|
||||
func (doc *KustomizationDocument) ParseYAML() error {
|
||||
doc.Identifiers = make([]string, 0)
|
||||
doc.Values = make([]string, 0)
|
||||
doc.Kinds = make([]string, 0, 1)
|
||||
|
||||
identifierSet := make(set)
|
||||
valueSet := make(set)
|
||||
getKind := func(m map[string]interface{}) string {
|
||||
const defaultStr = "Kustomization"
|
||||
kind, ok := m["kind"]
|
||||
if !ok {
|
||||
return defaultStr
|
||||
}
|
||||
if str, ok := kind.(string); ok && str != "" {
|
||||
return str
|
||||
}
|
||||
return defaultStr
|
||||
}
|
||||
|
||||
ks, err := doc.readBytes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, contents := range ks {
|
||||
doc.Kinds = append(doc.Kinds, getKind(contents))
|
||||
createFlatStructure(identifierSet, valueSet, contents)
|
||||
}
|
||||
|
||||
for val := range valueSet {
|
||||
doc.Values = append(doc.Values, val)
|
||||
}
|
||||
|
||||
for key := range identifierSet {
|
||||
doc.Identifiers = append(doc.Identifiers, key)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createFlatStructure(identifierSet set, valueSet set, contents map[string]interface{}) {
|
||||
type Map struct {
|
||||
data map[string]interface{}
|
||||
prefix string
|
||||
}
|
||||
|
||||
toVisit := []Map{
|
||||
{
|
||||
data: contents,
|
||||
prefix: "",
|
||||
},
|
||||
}
|
||||
|
||||
for i := 0; i < len(toVisit); i++ {
|
||||
visiting := toVisit[i]
|
||||
for k, v := range visiting.data {
|
||||
identifier := fmt.Sprintf("%s:%s", visiting.prefix, k)
|
||||
// noop after the first iteration.
|
||||
identifier = strings.TrimLeft(identifier, ":")
|
||||
|
||||
// Recursive function traverses structure to find
|
||||
// identifiers and values. These later get formatted
|
||||
// into doc.Identifiers and doc.Values respectively.
|
||||
var traverseStructure func(interface{})
|
||||
traverseStructure = func(arg interface{}) {
|
||||
switch value := arg.(type) {
|
||||
case map[string]interface{}:
|
||||
toVisit = append(toVisit, Map{
|
||||
data: value,
|
||||
prefix: identifier,
|
||||
})
|
||||
case []interface{}:
|
||||
for _, val := range value {
|
||||
traverseStructure(val)
|
||||
}
|
||||
case interface{}:
|
||||
esc := fmt.Sprintf("%v", value)
|
||||
|
||||
valuePath := fmt.Sprintf("%s=%v",
|
||||
identifier, esc)
|
||||
valueSet[valuePath] = struct{}{}
|
||||
}
|
||||
}
|
||||
traverseStructure(v)
|
||||
|
||||
identifierSet[identifier] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
254
hack/crawl/doc/doc_test.go
Normal file
254
hack/crawl/doc/doc_test.go
Normal file
@@ -0,0 +1,254 @@
|
||||
package doc
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseYAML(t *testing.T) {
|
||||
testCases := []struct {
|
||||
identifiers []string
|
||||
values []string
|
||||
kinds []string
|
||||
filepath string
|
||||
yaml string
|
||||
}{
|
||||
{
|
||||
identifiers: []string{
|
||||
"namePrefix",
|
||||
"metadata",
|
||||
"metadata:name",
|
||||
"kind",
|
||||
},
|
||||
values: []string{
|
||||
"kind=",
|
||||
"namePrefix=dev-",
|
||||
"metadata:name=app",
|
||||
},
|
||||
kinds: []string{
|
||||
"Kustomization",
|
||||
},
|
||||
filepath: "some/path/to/kustomization.yaml",
|
||||
yaml: `
|
||||
namePrefix: dev-
|
||||
metadata:
|
||||
name: app
|
||||
kind: ""
|
||||
`,
|
||||
},
|
||||
{
|
||||
identifiers: []string{
|
||||
"namePrefix",
|
||||
"metadata",
|
||||
"metadata:name",
|
||||
"metadata:spec",
|
||||
"metadata:spec:replicas",
|
||||
"kind",
|
||||
"replicas",
|
||||
"replicas:name",
|
||||
"replicas:count",
|
||||
"resource",
|
||||
},
|
||||
values: []string{
|
||||
"namePrefix=dev-",
|
||||
"metadata:name=n1",
|
||||
"metadata:spec:replicas=3",
|
||||
"kind=Kustomization",
|
||||
"replicas:name=n1",
|
||||
"replicas:name=n2",
|
||||
"replicas:count=3",
|
||||
"resource=file1.yaml",
|
||||
"resource=file2.yaml",
|
||||
},
|
||||
kinds: []string{
|
||||
"Kustomization",
|
||||
},
|
||||
filepath: "./kustomization.yaml",
|
||||
yaml: `
|
||||
namePrefix: dev-
|
||||
# map of map
|
||||
metadata:
|
||||
name: n1
|
||||
spec:
|
||||
replicas: 3
|
||||
kind: Kustomization
|
||||
|
||||
#list of map
|
||||
replicas:
|
||||
- name: n1
|
||||
count: 3
|
||||
- name: n2
|
||||
count: 3
|
||||
|
||||
# list
|
||||
resource:
|
||||
- file1.yaml
|
||||
- file2.yaml
|
||||
`,
|
||||
},
|
||||
{
|
||||
identifiers: []string{
|
||||
"kind",
|
||||
"metadata",
|
||||
"metadata:name",
|
||||
},
|
||||
values: []string{
|
||||
"kind=Deployment",
|
||||
"kind=Service",
|
||||
"kind=Custom",
|
||||
"metadata:name=app",
|
||||
"metadata:name=app-service",
|
||||
"metadata:name=app-crd",
|
||||
},
|
||||
kinds: []string{
|
||||
"Deployment",
|
||||
"Service",
|
||||
"Custom",
|
||||
},
|
||||
filepath: "resources.yaml",
|
||||
yaml: `
|
||||
---
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: app
|
||||
---
|
||||
kind: Service
|
||||
metadata:
|
||||
name: app-service
|
||||
---
|
||||
kind: Custom
|
||||
metadata:
|
||||
name: app-crd
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
doc := KustomizationDocument{
|
||||
Document: Document{
|
||||
DocumentData: test.yaml,
|
||||
FilePath: test.filepath,
|
||||
},
|
||||
}
|
||||
|
||||
err := doc.ParseYAML()
|
||||
if err != nil {
|
||||
t.Errorf("Document error error: %s", err)
|
||||
}
|
||||
|
||||
cmpStrings := func(got, expected []string, label string) {
|
||||
sort.Strings(got)
|
||||
sort.Strings(expected)
|
||||
|
||||
if !reflect.DeepEqual(got, expected) {
|
||||
t.Errorf("Expected %s (%v) to be equal to (%v)\n",
|
||||
label,
|
||||
strings.Join(got, ","),
|
||||
strings.Join(expected, ","))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cmpStrings(doc.Identifiers, test.identifiers, "identifiers")
|
||||
cmpStrings(doc.Values, test.values, "values")
|
||||
cmpStrings(doc.Kinds, test.kinds, "kinds")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetResources(t *testing.T) {
|
||||
tests := []struct {
|
||||
doc KustomizationDocument
|
||||
resources []*Document
|
||||
}{
|
||||
{
|
||||
doc: KustomizationDocument{
|
||||
Document: Document{
|
||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||
FilePath: "some/path/to/kdir/kustomization.yaml",
|
||||
DocumentData: `
|
||||
bases:
|
||||
- ../base
|
||||
- ../otherbase
|
||||
|
||||
resources:
|
||||
- file.yaml
|
||||
- https://github.com/kubernetes-sigs/kustomize/examples/helloWorld?ref=v3.1.0
|
||||
`},
|
||||
},
|
||||
resources: []*Document{
|
||||
{
|
||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||
FilePath: "some/path/to/base",
|
||||
},
|
||||
{
|
||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||
FilePath: "some/path/to/otherbase",
|
||||
},
|
||||
{
|
||||
RepositoryURL: "sigs.k8s.io/kustomize",
|
||||
FilePath: "some/path/to/kdir/file.yaml",
|
||||
},
|
||||
{
|
||||
RepositoryURL: "https://github.com/kubernetes-sigs/kustomize",
|
||||
FilePath: "examples/helloWorld",
|
||||
DefaultBranch: "v3.1.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
doc: KustomizationDocument{
|
||||
Document: Document{
|
||||
RepositoryURL: "https://github.com/some/repo",
|
||||
FilePath: "some/resource.yaml",
|
||||
DocumentData: `
|
||||
bases:
|
||||
- ../base
|
||||
- ../overlay
|
||||
|
||||
resources:
|
||||
- https://github.com/kubernetes-sigs/kustomize/examples/helloWorld?ref=v3.1.0
|
||||
- some/file.yaml
|
||||
`,
|
||||
},
|
||||
},
|
||||
resources: []*Document{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
res, err := test.doc.GetResources()
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v\n", err)
|
||||
continue
|
||||
}
|
||||
if len(test.resources) != len(res) {
|
||||
t.Errorf("Number of resources does not match.")
|
||||
continue
|
||||
}
|
||||
cmp := func(docs []*Document) func(i, j int) bool {
|
||||
return func(i, j int) bool {
|
||||
if docs[i].RepositoryURL != docs[j].RepositoryURL {
|
||||
return docs[i].RepositoryURL <
|
||||
docs[j].RepositoryURL
|
||||
}
|
||||
|
||||
if docs[i].FilePath != docs[j].FilePath {
|
||||
return docs[i].FilePath <
|
||||
docs[j].FilePath
|
||||
}
|
||||
|
||||
return docs[i].DefaultBranch < docs[j].DefaultBranch
|
||||
}
|
||||
}
|
||||
sort.Slice(test.resources, cmp(test.resources))
|
||||
sort.Slice(res, cmp(res))
|
||||
for i, r := range test.resources {
|
||||
if !reflect.DeepEqual(res[i], r) {
|
||||
t.Errorf("Expected '%+v' to equal '%+v'\n",
|
||||
res[i], r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
58
hack/crawl/doc/docname.go
Normal file
58
hack/crawl/doc/docname.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package doc
|
||||
|
||||
import (
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/api/git"
|
||||
)
|
||||
|
||||
type Document struct {
|
||||
RepositoryURL string `json:"repositoryUrl,omitempty"`
|
||||
FilePath string `json:"filePath,omitempty"`
|
||||
DefaultBranch string `json:"defaultBranch,omitempty"`
|
||||
DocumentData string `json:"document,omitempty"`
|
||||
CreationTime *time.Time `json:"creationTime,omitempty"`
|
||||
IsSame bool `json:"-"`
|
||||
}
|
||||
|
||||
// Implements the CrawlerDocument interface.
|
||||
func (doc *Document) GetDocument() *Document {
|
||||
return doc
|
||||
}
|
||||
|
||||
// Implements the CrawlerDocument interface.
|
||||
func (doc *Document) WasCached() bool {
|
||||
return doc.IsSame
|
||||
}
|
||||
|
||||
func (doc *Document) FromRelativePath(newFile string) (Document, error) {
|
||||
repoSpec, err := git.NewRepoSpecFromUrl(newFile)
|
||||
if err == nil {
|
||||
return Document{
|
||||
RepositoryURL: repoSpec.Host + path.Clean(repoSpec.OrgRepo),
|
||||
FilePath: path.Clean(repoSpec.Path),
|
||||
DefaultBranch: repoSpec.Ref,
|
||||
}, nil
|
||||
}
|
||||
// else document is probably relative path.
|
||||
|
||||
ret := Document{
|
||||
RepositoryURL: doc.RepositoryURL,
|
||||
DefaultBranch: doc.DefaultBranch,
|
||||
}
|
||||
ogDir, _ := path.Split(doc.FilePath)
|
||||
|
||||
cleaned := path.Clean(newFile)
|
||||
if !path.IsAbs(cleaned) {
|
||||
cleaned = path.Clean(ogDir + "/" + cleaned)
|
||||
}
|
||||
|
||||
ret.FilePath = cleaned
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (doc *Document) ID() string {
|
||||
return doc.RepositoryURL + "/" +
|
||||
doc.DefaultBranch + "/" + doc.FilePath
|
||||
}
|
||||
64
hack/crawl/doc/docname_test.go
Normal file
64
hack/crawl/doc/docname_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package doc
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFromRelativePath(t *testing.T) {
|
||||
type Case struct {
|
||||
RelativePath string
|
||||
Expected Document
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
BaseDoc Document
|
||||
Cases []Case
|
||||
}{
|
||||
{
|
||||
BaseDoc: Document{
|
||||
RepositoryURL: "example.com/repo",
|
||||
FilePath: "path/to/file/kustomization.yaml",
|
||||
DefaultBranch: "master",
|
||||
},
|
||||
Cases: []Case{
|
||||
{
|
||||
RelativePath: "../other/file/resource.yaml",
|
||||
Expected: Document{
|
||||
RepositoryURL: "example.com/repo",
|
||||
FilePath: "path/to/other/file/resource.yaml",
|
||||
DefaultBranch: "master",
|
||||
},
|
||||
},
|
||||
{
|
||||
RelativePath: "../file/../../something/../to/other/file/patch.yaml",
|
||||
Expected: Document{
|
||||
RepositoryURL: "example.com/repo",
|
||||
FilePath: "path/to/other/file/patch.yaml",
|
||||
DefaultBranch: "master",
|
||||
},
|
||||
},
|
||||
{
|
||||
RelativePath: "service.yaml",
|
||||
Expected: Document{
|
||||
RepositoryURL: "example.com/repo",
|
||||
FilePath: "path/to/file/service.yaml",
|
||||
DefaultBranch: "master",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
for _, c := range tc.Cases {
|
||||
rd, err := tc.BaseDoc.FromRelativePath(c.RelativePath)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(rd, c.Expected) {
|
||||
t.Errorf("document mismatch expected %v, got %v", c.Expected, rd)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
51
hack/crawl/doc/kustomize_util.go
Normal file
51
hack/crawl/doc/kustomize_util.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package doc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
func FixKustomizationPreUnmarshallingNonFatal(data []byte) ([]byte, error) {
|
||||
deprecateFieldsMap := map[string]string{
|
||||
"imageTags:": "images:",
|
||||
}
|
||||
for oldname, newname := range deprecateFieldsMap {
|
||||
pattern := regexp.MustCompile(oldname)
|
||||
data = pattern.ReplaceAll(data, []byte(newname))
|
||||
}
|
||||
|
||||
found, err := useLegacyPatch(data)
|
||||
if err == nil && found {
|
||||
pattern := regexp.MustCompile("patches:")
|
||||
data = pattern.ReplaceAll(data, []byte("patchesStrategicMerge:"))
|
||||
}
|
||||
|
||||
return data, err
|
||||
}
|
||||
|
||||
func useLegacyPatch(data []byte) (bool, error) {
|
||||
found := false
|
||||
|
||||
var object map[string]interface{}
|
||||
err := yaml.Unmarshal(data, &object)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("invalid content from %s",
|
||||
string(data))
|
||||
}
|
||||
if rawPatches, ok := object["patches"]; ok {
|
||||
patches, ok := rawPatches.([]interface{})
|
||||
if !ok {
|
||||
return false, fmt.Errorf("invalid patches from %v",
|
||||
rawPatches)
|
||||
}
|
||||
for _, p := range patches {
|
||||
_, ok := p.(string)
|
||||
if ok {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
return found, nil
|
||||
}
|
||||
14
hack/crawl/go.mod
Normal file
14
hack/crawl/go.mod
Normal file
@@ -0,0 +1,14 @@
|
||||
module sigs.k8s.io/kustomize/hack/crawl
|
||||
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/elastic/go-elasticsearch/v6 v6.8.2
|
||||
github.com/gomodule/redigo v2.0.0+incompatible
|
||||
github.com/googleapis/gnostic v0.3.0 // indirect
|
||||
github.com/gorilla/mux v1.7.3
|
||||
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79
|
||||
github.com/rs/cors v1.7.0
|
||||
sigs.k8s.io/kustomize/api v0.1.1
|
||||
sigs.k8s.io/yaml v1.1.0
|
||||
)
|
||||
165
hack/crawl/go.sum
Normal file
165
hack/crawl/go.sum
Normal file
@@ -0,0 +1,165 @@
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
|
||||
github.com/elastic/go-elasticsearch/v6 v6.8.2 h1:rp5DGrd63V5c6nHLjF6QEXUpZSvs0+QM3ld7m9VhV2g=
|
||||
github.com/elastic/go-elasticsearch/v6 v6.8.2/go.mod h1:UwaDJsD3rWLM5rKNFzv9hgox93HoX8utj1kxD9aFUcI=
|
||||
github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
|
||||
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
|
||||
github.com/evanphx/json-patch v4.2.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
|
||||
github.com/evanphx/json-patch v4.5.0+incompatible h1:ouOWdg56aJriqS0huScTkVXPC5IcNrDCXZ6OoTAWu7M=
|
||||
github.com/evanphx/json-patch v4.5.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
|
||||
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
|
||||
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
|
||||
github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg=
|
||||
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg=
|
||||
github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc=
|
||||
github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc=
|
||||
github.com/go-openapi/spec v0.19.4/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo=
|
||||
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
|
||||
github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
||||
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
||||
github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I=
|
||||
github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
|
||||
github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||
github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/gomodule/redigo v2.0.0+incompatible h1:K/R+8tc58AaqLkqG2Ol3Qk+DR/TlNuhuh457pBFPtt0=
|
||||
github.com/gomodule/redigo v2.0.0+incompatible/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4=
|
||||
github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI=
|
||||
github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY=
|
||||
github.com/googleapis/gnostic v0.3.0 h1:CcQijm0XKekKjP/YCz28LXVSpgguuB+nCxaSjCe09y0=
|
||||
github.com/googleapis/gnostic v0.3.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY=
|
||||
github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw=
|
||||
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA=
|
||||
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||
github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.7 h1:KfgG9LzI+pYjr4xvmz/5H4FXjokeP+rlHLhv3iH62Fo=
|
||||
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
|
||||
github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.10.1 h1:q/mM8GF/n0shIN8SaAZ0V+jnLPzen6WIVZdiwrRlMlo=
|
||||
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
|
||||
github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=
|
||||
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
|
||||
github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
|
||||
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
||||
github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190923162816-aa69164e4478 h1:l5EDrHhldLYb3ZRHDUhXF7Om7MvYXnkV9/iQNo1lX6g=
|
||||
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69 h1:rOhMmluY6kLMhdnrivzec6lLgaVbMHMn2ISQXJeJ5EM=
|
||||
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
|
||||
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/inf.v0 v0.9.0 h1:3zYtXIO92bvsdS3ggAdA8Gb4Azj0YU+TVY1uGYNFA8o=
|
||||
gopkg.in/inf.v0 v0.9.0/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
|
||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
k8s.io/api v0.0.0-20191016225839-816a9b7df678 h1:z/0BV/tMBIvdwZvqBH/f7TWjQX9y3dj1nMNhrSK0h/8=
|
||||
k8s.io/api v0.0.0-20191016225839-816a9b7df678/go.mod h1:LZQaT8MvVpl7Bg2lYFcQm7+Mpdxq8p1NFl3yh+5DCwY=
|
||||
k8s.io/apimachinery v0.0.0-20191016225534-b1267f8c42b4/go.mod h1:92mWDd8Ji2sw2157KIgino5wCxffA8KSvhW2oY4ypdw=
|
||||
k8s.io/apimachinery v0.0.0-20191020214737-6c8691705fc5 h1:r3/YL3+t1U46lJF5zUSArskUpnLyWuM28rQDpM1qQPI=
|
||||
k8s.io/apimachinery v0.0.0-20191020214737-6c8691705fc5/go.mod h1:92mWDd8Ji2sw2157KIgino5wCxffA8KSvhW2oY4ypdw=
|
||||
k8s.io/client-go v11.0.0+incompatible h1:LBbX2+lOwY9flffWlJM7f1Ct8V2SRNiMRDFeiwnJo9o=
|
||||
k8s.io/client-go v11.0.0+incompatible/go.mod h1:7vJpHMYJwNQCWgzmNV+VYUl1zCObLyodBc8nIyt8L5s=
|
||||
k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
||||
k8s.io/klog v0.0.0-20181102134211-b9b56d5dfc92/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk=
|
||||
k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8=
|
||||
k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
|
||||
k8s.io/kube-openapi v0.0.0-20190816220812-743ec37842bf/go.mod h1:1TqjTSzOxsLGIKfj0lK8EeCP7K1iUG65v09OM0/WG5E=
|
||||
k8s.io/kube-openapi v0.0.0-20190918143330-0270cf2f1c1d h1:Xpe6sK+RY4ZgCTyZ3y273UmFmURhjtoJiwOMbQsXitY=
|
||||
k8s.io/kube-openapi v0.0.0-20190918143330-0270cf2f1c1d/go.mod h1:1TqjTSzOxsLGIKfj0lK8EeCP7K1iUG65v09OM0/WG5E=
|
||||
sigs.k8s.io/kustomize/api v0.1.1 h1:W2dWXex2MhF4/EZNokZllvet2RejCHqdAFklufN7VTg=
|
||||
sigs.k8s.io/kustomize/api v0.1.1/go.mod h1:FyfJD1q1QMjC/TvK78b6cCtZB+mbpnGIo9YOvbucJes=
|
||||
sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI=
|
||||
sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
|
||||
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
|
||||
23
hack/crawl/httpclient/httpclient.go
Normal file
23
hack/crawl/httpclient/httpclient.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package httpclient
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gomodule/redigo/redis"
|
||||
"github.com/gregjones/httpcache"
|
||||
rediscache "github.com/gregjones/httpcache/redis"
|
||||
)
|
||||
|
||||
func FromCache(header http.Header) bool {
|
||||
return header.Get(httpcache.XFromCache) != ""
|
||||
}
|
||||
|
||||
func NewClient(conn redis.Conn) *http.Client {
|
||||
etagCache := rediscache.NewWithClient(conn)
|
||||
tr := httpcache.NewTransport(etagCache)
|
||||
return &http.Client{
|
||||
Transport: tr,
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
}
|
||||
266
hack/crawl/index/elasticsearch.go
Normal file
266
hack/crawl/index/elasticsearch.go
Normal file
@@ -0,0 +1,266 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"time"
|
||||
|
||||
es "github.com/elastic/go-elasticsearch/v6"
|
||||
"github.com/elastic/go-elasticsearch/v6/esapi"
|
||||
)
|
||||
|
||||
// TODO(damienr74) Split index into reader and writer?
|
||||
type index struct {
|
||||
ctx context.Context
|
||||
client *es.Client
|
||||
name string
|
||||
}
|
||||
|
||||
func newIndex(ctx context.Context, name string) (*index, error) {
|
||||
client, err := es.NewDefaultClient()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &index{
|
||||
ctx: ctx,
|
||||
client: client,
|
||||
name: name,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type readerFunc func(io.Reader) error
|
||||
|
||||
func ignoreResponseBody(_ io.Reader) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checks that elastic returned successfully. If it has not, it will read the
|
||||
// body and return it in an error message.
|
||||
//
|
||||
// Otherwise, it will use the readerFunc to read the body. This function is a
|
||||
// mechanism for getting relevant data from the response only if it was successful.
|
||||
func (idx *index) responseErrorOrNil(info string, res *esapi.Response,
|
||||
err error, reader readerFunc) error {
|
||||
|
||||
messageStart := fmt.Sprintf("index %s error: %s", idx.name, info)
|
||||
if err != nil || res == nil {
|
||||
return fmt.Errorf("%s: %v", messageStart, err)
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
if res.IsError() {
|
||||
return fmt.Errorf("%s: %s", messageStart, res.String())
|
||||
}
|
||||
|
||||
if reader != nil {
|
||||
err = reader(res.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v", messageStart, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func byteJoin(bts ...interface{}) []byte {
|
||||
ret := make([][]byte, len(bts))
|
||||
for i, v := range bts {
|
||||
switch bt := v.(type) {
|
||||
case []byte:
|
||||
ret[i] = bt
|
||||
case string:
|
||||
ret[i] = []byte(bt)
|
||||
default:
|
||||
ret[i] = []byte(fmt.Sprintf("%v", bt))
|
||||
}
|
||||
}
|
||||
|
||||
return bytes.Join(ret, []byte(` `))
|
||||
}
|
||||
|
||||
// Update the elasticsearch index mappings. (describes how to index/search for the documents).
|
||||
func (idx *index) UpdateMapping(mappings []byte) error {
|
||||
request := byteJoin(`{ "mappings":`, mappings, `}`)
|
||||
|
||||
op := idx.client.Indices.PutMapping
|
||||
res, err := op(
|
||||
bytes.NewReader(request),
|
||||
op.WithContext(idx.ctx),
|
||||
op.WithIndex(idx.name),
|
||||
op.WithIncludeTypeName(true),
|
||||
op.WithPretty(),
|
||||
)
|
||||
|
||||
return idx.responseErrorOrNil(
|
||||
fmt.Sprintf("could not update index mappings '%s'", request),
|
||||
res, err, ignoreResponseBody)
|
||||
}
|
||||
|
||||
// Update the elasticsearch index settings. (describes default parameters and
|
||||
// some analyzer definitions, etc.)
|
||||
func (idx *index) UpdateSetting(settings []byte) error {
|
||||
request := byteJoin(`{ "settings": `, settings, `}`)
|
||||
op := idx.client.Indices.PutSettings
|
||||
res, err := op(
|
||||
bytes.NewReader(request),
|
||||
op.WithContext(idx.ctx),
|
||||
op.WithIndex(idx.name),
|
||||
op.WithPretty(),
|
||||
)
|
||||
|
||||
return idx.responseErrorOrNil(
|
||||
fmt.Sprintf("could not update index settings '%s'", request),
|
||||
res, err, ignoreResponseBody)
|
||||
}
|
||||
|
||||
// Create an index providing both the mappings and the settings.
|
||||
func (idx *index) CreateIndex(mappings []byte, settings []byte) error {
|
||||
request := byteJoin(`{ "mappings":`, mappings, `, "settings":`, settings, `}`)
|
||||
op := idx.client.Indices.Create
|
||||
res, err := op(
|
||||
idx.name,
|
||||
op.WithBody(bytes.NewReader(request)),
|
||||
op.WithContext(idx.ctx),
|
||||
op.WithHuman(),
|
||||
op.WithPretty(),
|
||||
op.WithIncludeTypeName(true),
|
||||
)
|
||||
|
||||
return idx.responseErrorOrNil(
|
||||
fmt.Sprintf("could not create index with config '%s'", request),
|
||||
res, err, ignoreResponseBody)
|
||||
}
|
||||
|
||||
// Delete an index.
|
||||
func (idx *index) DeleteIndex() error {
|
||||
res, err := idx.client.Indices.Delete(
|
||||
[]string{idx.name},
|
||||
)
|
||||
|
||||
return idx.responseErrorOrNil("could not delete index",
|
||||
res, err, ignoreResponseBody)
|
||||
}
|
||||
|
||||
// Insert or update the document by ID.
|
||||
func (idx *index) Put(uniqueID string, doc interface{}) (string, error) {
|
||||
body, err := json.Marshal(doc)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req := esapi.IndexRequest{
|
||||
Index: idx.name,
|
||||
Body: bytes.NewReader(body),
|
||||
DocumentID: uniqueID,
|
||||
}
|
||||
res, err := req.Do(idx.ctx, idx.client)
|
||||
|
||||
var id string
|
||||
readId := func(reader io.Reader) error {
|
||||
type InsertResult struct {
|
||||
ID string `json:"_id,omitempty"`
|
||||
}
|
||||
var ir InsertResult
|
||||
data, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(data, &ir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
id = ir.ID
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// populates the id field.
|
||||
err = idx.responseErrorOrNil("could not insert document",
|
||||
res, err, readId)
|
||||
|
||||
return id, err
|
||||
}
|
||||
|
||||
type scrollUpdater func(string, readerFunc) error
|
||||
|
||||
// Update the scroll for iteration. If no scroll exists, create one.
|
||||
func (idx *index) scrollUpdater(query []byte, batchSize int,
|
||||
timeout time.Duration) scrollUpdater {
|
||||
|
||||
return func(scrollID string, reader readerFunc) error {
|
||||
var res *esapi.Response
|
||||
var err error
|
||||
|
||||
if scrollID == "" {
|
||||
search := idx.client.Search
|
||||
res, err = search(
|
||||
search.WithContext(idx.ctx),
|
||||
search.WithIndex(idx.name),
|
||||
search.WithBody(bytes.NewBuffer(query)),
|
||||
search.WithScroll(timeout),
|
||||
search.WithSize(batchSize),
|
||||
)
|
||||
} else {
|
||||
scroll := idx.client.Scroll
|
||||
res, err = scroll(
|
||||
scroll.WithContext(idx.ctx),
|
||||
scroll.WithScroll(timeout),
|
||||
scroll.WithScrollID(scrollID),
|
||||
)
|
||||
}
|
||||
|
||||
return idx.responseErrorOrNil(
|
||||
fmt.Sprintf("could not scroll for query %s", query),
|
||||
res, err, reader)
|
||||
}
|
||||
}
|
||||
|
||||
// Simple search options. Size is the number of elements to return, From is the
|
||||
// rank of the results according to the query. Used as a simple (stateless)
|
||||
// pagination technique.
|
||||
type SearchOptions struct {
|
||||
Size int
|
||||
From int
|
||||
}
|
||||
|
||||
// Search for a query (json query dsl) with some options, and use the reader func
|
||||
// to extract the response.
|
||||
func (idx *index) Search(query []byte, opts SearchOptions,
|
||||
responseReader readerFunc) error {
|
||||
|
||||
op := idx.client.Search
|
||||
res, err := op(
|
||||
op.WithContext(idx.ctx),
|
||||
op.WithIndex(idx.name),
|
||||
op.WithBody(bytes.NewBuffer(query)),
|
||||
op.WithTrackTotalHits(true),
|
||||
op.WithSize(opts.Size),
|
||||
op.WithFrom(opts.From),
|
||||
op.WithPretty(),
|
||||
)
|
||||
|
||||
return idx.responseErrorOrNil(
|
||||
fmt.Sprintf("could not complete search query %v", query),
|
||||
res, err, responseReader)
|
||||
}
|
||||
|
||||
// Delete an element from elasticsearch by Id.
|
||||
func (idx *index) Delete(id string) error {
|
||||
op := idx.client.Delete
|
||||
res, err := op(
|
||||
idx.name,
|
||||
id,
|
||||
op.WithContext(idx.ctx),
|
||||
op.WithPretty(),
|
||||
)
|
||||
|
||||
return idx.responseErrorOrNil(
|
||||
fmt.Sprintf("could not delete id(%s) from index(%s)", id, idx.name),
|
||||
res, err, ignoreResponseBody)
|
||||
}
|
||||
337
hack/crawl/index/kustomize.go
Normal file
337
hack/crawl/index/kustomize.go
Normal file
@@ -0,0 +1,337 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/kustomize/hack/crawl/doc"
|
||||
)
|
||||
|
||||
const (
|
||||
AggregationKeyword = "aggs"
|
||||
)
|
||||
|
||||
// Redefinition of Hits structure. Must match the json string of
|
||||
// KustomizeResult.Hits.Hits. Declared as a convenience for iteration.
|
||||
type KustomizeHits []struct {
|
||||
ID string `json:"id"`
|
||||
Document doc.KustomizationDocument `json:"result"`
|
||||
}
|
||||
|
||||
type KustomizeResult struct {
|
||||
ScrollID *string `json:"-"`
|
||||
|
||||
Hits *struct {
|
||||
Total int `json:"total"`
|
||||
Hits []struct {
|
||||
ID string `json:"id"`
|
||||
Document doc.KustomizationDocument `json:"result"`
|
||||
} `json:"hits"`
|
||||
} `json:"hits,omitempty"`
|
||||
|
||||
Aggregations *struct {
|
||||
Timeseries *struct {
|
||||
Buckets []struct {
|
||||
Key string `json:"key"`
|
||||
Count int `json:"count"`
|
||||
} `json:"buckets"`
|
||||
} `json:"timeseries,omitempty"`
|
||||
|
||||
Kinds *struct {
|
||||
OtherCount int `json:"otherResults"`
|
||||
Buckets []struct {
|
||||
Key string `json:"key"`
|
||||
Count int `json:"count"`
|
||||
} `json:"buckets"`
|
||||
} `json:"kinds,omitempty"`
|
||||
} `json:"aggregations,omitempty"`
|
||||
}
|
||||
|
||||
// Elasticsearch has some sometimes inconsistent labels, and some pretty ugly label choices.
|
||||
// However, the structure seems reasonable, so I wanted to use it if possible. This method
|
||||
// needs two copies of the types to make the json strings different. The Copies must be the
|
||||
// exact same type/structure, so the types must be declared inline. Go will check that these
|
||||
// are convertible at compile time, and converting at runtime is a noop.
|
||||
type ElasticKustomizeResult struct {
|
||||
ScrollID *string `json:"_scroll_id,omitempty"`
|
||||
|
||||
Hits *struct {
|
||||
Total int `json:"total"`
|
||||
Hits []struct {
|
||||
ID string `json:"_id"`
|
||||
Document doc.KustomizationDocument `json:"_source"`
|
||||
} `json:"hits"`
|
||||
} `json:"hits,omitempty"`
|
||||
|
||||
Aggregations *struct {
|
||||
Timeseries *struct {
|
||||
Buckets []struct {
|
||||
Key string `json:"key_as_string"`
|
||||
Count int `json:"doc_count"`
|
||||
}
|
||||
} `json:"timeseries,omitempty"`
|
||||
|
||||
Kinds *struct {
|
||||
OtherCount int `json:"sum_other_doc_count"`
|
||||
Buckets []struct {
|
||||
Key string `json:"key"`
|
||||
Count int `json:"doc_count"`
|
||||
}
|
||||
} `json:"kinds,omitempty"`
|
||||
} `json:"aggregations,omitempty"`
|
||||
}
|
||||
|
||||
type KustomizeIndex struct {
|
||||
*index
|
||||
}
|
||||
|
||||
// Create index reference to the index containing the kustomize documents.
|
||||
func NewKustomizeIndex(ctx context.Context) (*KustomizeIndex, error) {
|
||||
idx, err := newIndex(ctx, "kustomize")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &KustomizeIndex{idx}, nil
|
||||
}
|
||||
|
||||
// Return a timeseries of kustomization file counts.
|
||||
func TimeseriesAggregation() (string, map[string]interface{}) {
|
||||
return "timeseries", map[string]interface{}{
|
||||
"date_histogram": map[string]interface{}{
|
||||
"field": "creationTime",
|
||||
"interval": "day",
|
||||
/// XXX Only return values with counts, otherwise
|
||||
// every day is added to the output...
|
||||
// This matters if ever a zero valued time would
|
||||
// be stored in the creationTime field... it would
|
||||
// return >600k entries (for every day since year 0).
|
||||
// IDK why this is default, but I would not want this
|
||||
// to happen...
|
||||
"min_doc_count": 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Return aggregation of results based off of their kinds.
|
||||
func KindAggregation(maxBuckets int) (string, map[string]interface{}) {
|
||||
if maxBuckets < 1 {
|
||||
maxBuckets = 1
|
||||
}
|
||||
return "kinds", map[string]interface{}{
|
||||
"terms": map[string]interface{}{
|
||||
"field": "kinds.keyword",
|
||||
"size": maxBuckets,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// The multi_match search type in elasticsearch will check each field according
|
||||
// to their respective analyzers for the identifier.
|
||||
func multiMatch(query string) map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"multi_match": map[string]interface{}{
|
||||
"type": "cross_fields",
|
||||
"fields": []string{
|
||||
"values.keyword^3",
|
||||
"identifiers.keyword^3",
|
||||
"values.ngram",
|
||||
"identifiers.ngram",
|
||||
// TODO(damienr74) remove document with default
|
||||
// analyzer. It does not handle special (=,: etc)
|
||||
// characters properly, and matches with false
|
||||
// positives. document.whitespace does not exist
|
||||
// yet, but should use the whitespace analyzer.
|
||||
"document",
|
||||
"document.whitespace",
|
||||
},
|
||||
"query": query,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Build an elasticsearch query from a user query.
|
||||
func BuildQuery(query string) map[string]interface{} {
|
||||
queryTokens := strings.Fields(query)
|
||||
if len(queryTokens) == 0 {
|
||||
return map[string]interface{}{
|
||||
"size": 0,
|
||||
}
|
||||
}
|
||||
|
||||
mustMatch := make([]map[string]interface{}, len(queryTokens))
|
||||
|
||||
for i, tok := range queryTokens {
|
||||
if strings.HasPrefix(strings.ToLower(tok), "kind=") {
|
||||
mustMatch[i] = map[string]interface{}{
|
||||
"term": map[string]interface{}{
|
||||
"kinds.keyword": tok[5:],
|
||||
},
|
||||
}
|
||||
continue
|
||||
}
|
||||
mustMatch[i] = multiMatch(tok)
|
||||
}
|
||||
|
||||
structuredQuery := map[string]interface{}{
|
||||
"query": map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must": mustMatch,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return structuredQuery
|
||||
}
|
||||
|
||||
// Iterator based off of the way bufio.Scanner works.
|
||||
//
|
||||
// Example:
|
||||
// for it.Next() {
|
||||
// for _, doc := range it.Value().Hits {
|
||||
// // Handle KustomizationDocument.
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if err := it.Err(); err != nil {
|
||||
// // Handle err.
|
||||
// }
|
||||
type KustomizeIterator struct {
|
||||
update scrollUpdater
|
||||
err error
|
||||
// Matches the return definition of elasticsearch search results. The
|
||||
// scroll ID is practically a database cursor.
|
||||
scrollImpl KustomizeResult
|
||||
}
|
||||
|
||||
// Get the next batch of results. Note that this returns multiple results that
|
||||
// can be iterated.
|
||||
func (it *KustomizeIterator) Next() bool {
|
||||
reader := func(reader io.Reader) error {
|
||||
data, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read from body: %v", err)
|
||||
}
|
||||
var scrollInput ElasticKustomizeResult
|
||||
err = json.Unmarshal(data, &scrollInput)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cloud not marshal %s into %T: %v",
|
||||
data, scrollInput, err)
|
||||
}
|
||||
it.scrollImpl = KustomizeResult(scrollInput)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if it.err == nil {
|
||||
fmt.Printf("updating scroll: %s\n", *it.scrollImpl.ScrollID)
|
||||
it.err = it.update(*it.scrollImpl.ScrollID, reader)
|
||||
}
|
||||
|
||||
// if there is no error and the array is not empty, then Value is
|
||||
// obligated to return a valid result.
|
||||
return it.err == nil &&
|
||||
it.scrollImpl.Hits != nil &&
|
||||
len(it.scrollImpl.Hits.Hits) > 0
|
||||
}
|
||||
|
||||
// Get the value from this batch of iterations.
|
||||
func (it *KustomizeIterator) Value() KustomizeResult {
|
||||
return it.scrollImpl
|
||||
}
|
||||
|
||||
// Check if any errors have occured.
|
||||
func (it *KustomizeIterator) Err() error {
|
||||
return it.err
|
||||
}
|
||||
|
||||
// Create an iterator over query. Iterate in chunks of batchSize, each batch
|
||||
// should take no longer than timeout to read (otherwise, elasticsearch will
|
||||
// delete the context).
|
||||
//
|
||||
// XXX Important to set a reasonable amount of time to read the documents. If
|
||||
// a lot of processing must be done, consider loading everything in memory
|
||||
// before doing it so that, a short timeout period can be set. Scrolling creates
|
||||
// a consistent DB context, so this can be costly.
|
||||
//
|
||||
// Scrolling is also not meant to be used for real time purposes. If you need
|
||||
// results quickly, consider using the From: field in SearchOptions and a normal
|
||||
// search. This will not guarantee that the values will not change but is more
|
||||
// suitable for lower latencies/long execution timeouts.
|
||||
func (ki *KustomizeIndex) IterateQuery(query []byte, batchSize int,
|
||||
timeout time.Duration) *KustomizeIterator {
|
||||
|
||||
emptyScroll := ""
|
||||
return &KustomizeIterator{
|
||||
update: ki.scrollUpdater(query, batchSize, timeout),
|
||||
scrollImpl: KustomizeResult{
|
||||
ScrollID: &emptyScroll,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// type specific Put for inserting structured kustomization documents.
|
||||
func (ki *KustomizeIndex) Put(id string, doc *doc.KustomizationDocument) (string, error) {
|
||||
id, err := ki.index.Put(id, doc)
|
||||
if err != nil {
|
||||
return id, fmt.Errorf("could not insert in elastic: %v", err)
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// Kustomize search options: What metrics should be returned? Kind Aggregation,
|
||||
// TimeseriesAggregation, etc. Also embedds the SearchOptions field to specify
|
||||
// the position in the sorted list of results and the number of results to return.
|
||||
type KustomizeSearchOptions struct {
|
||||
SearchOptions
|
||||
KindAggregation bool
|
||||
TimeseriesAggregation bool
|
||||
}
|
||||
|
||||
// Search the index with the given query string. Returns a structured result and possible
|
||||
// aggregates.
|
||||
func (ki *KustomizeIndex) Search(query string,
|
||||
opts KustomizeSearchOptions) (*KustomizeResult, error) {
|
||||
|
||||
aggMap := make(map[string]interface{})
|
||||
if opts.KindAggregation {
|
||||
k, kAgg := KindAggregation(15)
|
||||
aggMap[k] = kAgg
|
||||
}
|
||||
if opts.TimeseriesAggregation {
|
||||
t, tAgg := TimeseriesAggregation()
|
||||
aggMap[t] = tAgg
|
||||
}
|
||||
|
||||
esQuery := BuildQuery(query)
|
||||
if len(aggMap) > 0 {
|
||||
esQuery[AggregationKeyword] = aggMap
|
||||
}
|
||||
|
||||
data, err := json.Marshal(&esQuery)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to format query %s", query)
|
||||
}
|
||||
fmt.Printf("formated query: %s\n", data)
|
||||
|
||||
var kr ElasticKustomizeResult
|
||||
err = ki.index.Search(data, opts.SearchOptions, func(results io.Reader) error {
|
||||
data, err = ioutil.ReadAll(results)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read results from search: %v", err)
|
||||
}
|
||||
|
||||
if err = json.Unmarshal(data, &kr); err != nil {
|
||||
return fmt.Errorf("could not parse results from search: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
res := KustomizeResult(kr)
|
||||
|
||||
return &res, err
|
||||
}
|
||||
72
hack/crawl/index/kustomize_test.go
Normal file
72
hack/crawl/index/kustomize_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBuildQuery(t *testing.T) {
|
||||
testCases := []struct {
|
||||
query string
|
||||
result map[string]interface{}
|
||||
}{
|
||||
{
|
||||
query: " \t\n\r",
|
||||
result: map[string]interface{}{"size": 0},
|
||||
},
|
||||
{
|
||||
query: "\tidentifier1 identifier2\nidentifier3\r",
|
||||
result: map[string]interface{}{
|
||||
"query": map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must": []map[string]interface{}{
|
||||
multiMatch("identifier1"),
|
||||
multiMatch("identifier2"),
|
||||
multiMatch("identifier3"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
query: "kind=Kustomization",
|
||||
result: map[string]interface{}{
|
||||
"query": map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must": []map[string]interface{}{
|
||||
{
|
||||
"term": map[string]interface{}{
|
||||
"kinds.keyword": "Kustomization",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
query: "kind=Kustomization identifier2",
|
||||
result: map[string]interface{}{
|
||||
"query": map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must": []map[string]interface{}{
|
||||
{
|
||||
"term": map[string]interface{}{
|
||||
"kinds.keyword": "Kustomization",
|
||||
},
|
||||
},
|
||||
multiMatch("identifier2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
result := BuildQuery(tc.query)
|
||||
if !reflect.DeepEqual(tc.result, result) {
|
||||
t.Errorf("Expected %#v to match %#v", result, tc.result)
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
hack/crawl/pictures/github_token.png
Normal file
BIN
hack/crawl/pictures/github_token.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 53 KiB |
BIN
hack/crawl/pictures/sys_arch.png
Normal file
BIN
hack/crawl/pictures/sys_arch.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
BIN
hack/crawl/pictures/token_config.png
Normal file
BIN
hack/crawl/pictures/token_config.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 32 KiB |
2
hack/crawl/ui/.dockerignore
Normal file
2
hack/crawl/ui/.dockerignore
Normal file
@@ -0,0 +1,2 @@
|
||||
node_modules
|
||||
dist
|
||||
2
hack/crawl/ui/.gcloudignore
Normal file
2
hack/crawl/ui/.gcloudignore
Normal file
@@ -0,0 +1,2 @@
|
||||
node_modules
|
||||
dist
|
||||
2
hack/crawl/ui/.gitignore
vendored
Normal file
2
hack/crawl/ui/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
node_modules
|
||||
dist
|
||||
16
hack/crawl/ui/Dockerfile
Normal file
16
hack/crawl/ui/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM node:latest as builder
|
||||
|
||||
WORKDIR /app
|
||||
COPY package.json package-lock.json /app/
|
||||
RUN cd /app && npm set progress=false && npm install
|
||||
COPY . /app
|
||||
|
||||
RUN cd /app && npm run build
|
||||
|
||||
FROM nginx:alpine
|
||||
RUN rm -rf /usr/share/nginx/html/*
|
||||
# todo(damienr74), put this in configmap.
|
||||
COPY nginx.conf /etc/nginx/nginx.conf
|
||||
COPY --from=builder /app/dist/kustomize-search/ /usr/share/nginx/html
|
||||
EXPOSE 80
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
25
hack/crawl/ui/README.md
Normal file
25
hack/crawl/ui/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
There is a Dockerfile for building container images.
|
||||
|
||||
## Development server
|
||||
|
||||
Run `ng serve` for a dev server. Navigate to `http://localhost:4200/`. The app will automatically reload if you change any of the source files.
|
||||
|
||||
## Code scaffolding
|
||||
|
||||
Run `ng generate component component-name` to generate a new component. You can also use `ng generate directive|pipe|service|class|guard|interface|enum|module`.
|
||||
|
||||
## Build
|
||||
|
||||
Run `ng build` to build the project. The build artifacts will be stored in the `dist/` directory. Use the `--prod` flag for a production build.
|
||||
|
||||
## Running unit tests
|
||||
|
||||
Run `ng test` to execute the unit tests via [Karma](https://karma-runner.github.io).
|
||||
|
||||
## Running end-to-end tests
|
||||
|
||||
Run `ng e2e` to execute the end-to-end tests via [Protractor](http://www.protractortest.org/).
|
||||
|
||||
## Further help
|
||||
|
||||
To get more help on the Angular CLI use `ng help` or go check out the [Angular CLI README](https://github.com/angular/angular-cli/blob/master/README.md).
|
||||
123
hack/crawl/ui/angular.json
Normal file
123
hack/crawl/ui/angular.json
Normal file
@@ -0,0 +1,123 @@
|
||||
{
|
||||
"$schema": "./node_modules/@angular/cli/lib/config/schema.json",
|
||||
"version": 1,
|
||||
"newProjectRoot": "projects",
|
||||
"projects": {
|
||||
"kustomize-search": {
|
||||
"projectType": "application",
|
||||
"schematics": {},
|
||||
"root": "",
|
||||
"sourceRoot": "src",
|
||||
"prefix": "app",
|
||||
"architect": {
|
||||
"build": {
|
||||
"builder": "@angular-devkit/build-angular:browser",
|
||||
"options": {
|
||||
"outputPath": "dist/kustomize-search",
|
||||
"index": "src/index.html",
|
||||
"main": "src/main.ts",
|
||||
"polyfills": "src/polyfills.ts",
|
||||
"tsConfig": "tsconfig.app.json",
|
||||
"aot": false,
|
||||
"assets": [
|
||||
"src/favicon.ico",
|
||||
"src/assets"
|
||||
],
|
||||
"styles": [
|
||||
"./node_modules/@angular/material/prebuilt-themes/deeppurple-amber.css",
|
||||
"src/styles.css"
|
||||
],
|
||||
"scripts": []
|
||||
},
|
||||
"configurations": {
|
||||
"production": {
|
||||
"fileReplacements": [
|
||||
{
|
||||
"replace": "src/environments/environment.ts",
|
||||
"with": "src/environments/environment.prod.ts"
|
||||
}
|
||||
],
|
||||
"optimization": true,
|
||||
"outputHashing": "all",
|
||||
"sourceMap": false,
|
||||
"extractCss": true,
|
||||
"namedChunks": false,
|
||||
"aot": true,
|
||||
"extractLicenses": true,
|
||||
"vendorChunk": false,
|
||||
"buildOptimizer": true,
|
||||
"budgets": [
|
||||
{
|
||||
"type": "initial",
|
||||
"maximumWarning": "2mb",
|
||||
"maximumError": "5mb"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"serve": {
|
||||
"builder": "@angular-devkit/build-angular:dev-server",
|
||||
"options": {
|
||||
"browserTarget": "kustomize-search:build"
|
||||
},
|
||||
"configurations": {
|
||||
"production": {
|
||||
"browserTarget": "kustomize-search:build:production"
|
||||
}
|
||||
}
|
||||
},
|
||||
"extract-i18n": {
|
||||
"builder": "@angular-devkit/build-angular:extract-i18n",
|
||||
"options": {
|
||||
"browserTarget": "kustomize-search:build"
|
||||
}
|
||||
},
|
||||
"test": {
|
||||
"builder": "@angular-devkit/build-angular:karma",
|
||||
"options": {
|
||||
"main": "src/test.ts",
|
||||
"polyfills": "src/polyfills.ts",
|
||||
"tsConfig": "tsconfig.spec.json",
|
||||
"karmaConfig": "karma.conf.js",
|
||||
"assets": [
|
||||
"src/favicon.ico",
|
||||
"src/assets"
|
||||
],
|
||||
"styles": [
|
||||
"./node_modules/@angular/material/prebuilt-themes/deeppurple-amber.css",
|
||||
"src/styles.css"
|
||||
],
|
||||
"scripts": []
|
||||
}
|
||||
},
|
||||
"lint": {
|
||||
"builder": "@angular-devkit/build-angular:tslint",
|
||||
"options": {
|
||||
"tsConfig": [
|
||||
"tsconfig.app.json",
|
||||
"tsconfig.spec.json",
|
||||
"e2e/tsconfig.json"
|
||||
],
|
||||
"exclude": [
|
||||
"**/node_modules/**"
|
||||
]
|
||||
}
|
||||
},
|
||||
"e2e": {
|
||||
"builder": "@angular-devkit/build-angular:protractor",
|
||||
"options": {
|
||||
"protractorConfig": "e2e/protractor.conf.js",
|
||||
"devServerTarget": "kustomize-search:serve"
|
||||
},
|
||||
"configurations": {
|
||||
"production": {
|
||||
"devServerTarget": "kustomize-search:serve:production"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"defaultProject": "kustomize-search"
|
||||
}
|
||||
12
hack/crawl/ui/browserslist
Normal file
12
hack/crawl/ui/browserslist
Normal file
@@ -0,0 +1,12 @@
|
||||
# This file is used by the build system to adjust CSS and JS output to support the specified browsers below.
|
||||
# For additional information regarding the format and rule options, please see:
|
||||
# https://github.com/browserslist/browserslist#queries
|
||||
|
||||
# You can see what browsers were selected by your queries by running:
|
||||
# npx browserslist
|
||||
|
||||
> 0.5%
|
||||
last 2 versions
|
||||
Firefox ESR
|
||||
not dead
|
||||
not IE 9-11 # For IE 9-11 support, remove 'not'.
|
||||
5
hack/crawl/ui/cloudbuild.yaml
Normal file
5
hack/crawl/ui/cloudbuild.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
steps:
|
||||
- name: 'gcr.io/cloud-builders/docker'
|
||||
args: ['build', '-t', 'gcr.io/kustomize-search/frontend', '.']
|
||||
images:
|
||||
- 'gcr.io/kustomize-search/frontend'
|
||||
32
hack/crawl/ui/e2e/protractor.conf.js
Normal file
32
hack/crawl/ui/e2e/protractor.conf.js
Normal file
@@ -0,0 +1,32 @@
|
||||
// @ts-check
|
||||
// Protractor configuration file, see link for more information
|
||||
// https://github.com/angular/protractor/blob/master/lib/config.ts
|
||||
|
||||
const { SpecReporter } = require('jasmine-spec-reporter');
|
||||
|
||||
/**
|
||||
* @type { import("protractor").Config }
|
||||
*/
|
||||
exports.config = {
|
||||
allScriptsTimeout: 11000,
|
||||
specs: [
|
||||
'./src/**/*.e2e-spec.ts'
|
||||
],
|
||||
capabilities: {
|
||||
'browserName': 'chrome'
|
||||
},
|
||||
directConnect: true,
|
||||
baseUrl: 'http://localhost:4200/',
|
||||
framework: 'jasmine',
|
||||
jasmineNodeOpts: {
|
||||
showColors: true,
|
||||
defaultTimeoutInterval: 30000,
|
||||
print: function() {}
|
||||
},
|
||||
onPrepare() {
|
||||
require('ts-node').register({
|
||||
project: require('path').join(__dirname, './tsconfig.json')
|
||||
});
|
||||
jasmine.getEnv().addReporter(new SpecReporter({ spec: { displayStacktrace: true } }));
|
||||
}
|
||||
};
|
||||
23
hack/crawl/ui/e2e/src/app.e2e-spec.ts
Normal file
23
hack/crawl/ui/e2e/src/app.e2e-spec.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import { AppPage } from './app.po';
|
||||
import { browser, logging } from 'protractor';
|
||||
|
||||
describe('workspace-project App', () => {
|
||||
let page: AppPage;
|
||||
|
||||
beforeEach(() => {
|
||||
page = new AppPage();
|
||||
});
|
||||
|
||||
it('should display welcome message', () => {
|
||||
page.navigateTo();
|
||||
expect(page.getTitleText()).toEqual('Welcome to kustomize-search!');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
// Assert that there are no errors emitted from the browser
|
||||
const logs = await browser.manage().logs().get(logging.Type.BROWSER);
|
||||
expect(logs).not.toContain(jasmine.objectContaining({
|
||||
level: logging.Level.SEVERE,
|
||||
} as logging.Entry));
|
||||
});
|
||||
});
|
||||
11
hack/crawl/ui/e2e/src/app.po.ts
Normal file
11
hack/crawl/ui/e2e/src/app.po.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
import { browser, by, element } from 'protractor';
|
||||
|
||||
export class AppPage {
|
||||
navigateTo() {
|
||||
return browser.get(browser.baseUrl) as Promise<any>;
|
||||
}
|
||||
|
||||
getTitleText() {
|
||||
return element(by.css('app-root h1')).getText() as Promise<string>;
|
||||
}
|
||||
}
|
||||
13
hack/crawl/ui/e2e/tsconfig.json
Normal file
13
hack/crawl/ui/e2e/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"extends": "../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "../out-tsc/e2e",
|
||||
"module": "commonjs",
|
||||
"target": "es5",
|
||||
"types": [
|
||||
"jasmine",
|
||||
"jasminewd2",
|
||||
"node"
|
||||
]
|
||||
}
|
||||
}
|
||||
32
hack/crawl/ui/karma.conf.js
Normal file
32
hack/crawl/ui/karma.conf.js
Normal file
@@ -0,0 +1,32 @@
|
||||
// Karma configuration file, see link for more information
|
||||
// https://karma-runner.github.io/1.0/config/configuration-file.html
|
||||
|
||||
module.exports = function (config) {
|
||||
config.set({
|
||||
basePath: '',
|
||||
frameworks: ['jasmine', '@angular-devkit/build-angular'],
|
||||
plugins: [
|
||||
require('karma-jasmine'),
|
||||
require('karma-chrome-launcher'),
|
||||
require('karma-jasmine-html-reporter'),
|
||||
require('karma-coverage-istanbul-reporter'),
|
||||
require('@angular-devkit/build-angular/plugins/karma')
|
||||
],
|
||||
client: {
|
||||
clearContext: false // leave Jasmine Spec Runner output visible in browser
|
||||
},
|
||||
coverageIstanbulReporter: {
|
||||
dir: require('path').join(__dirname, './coverage/kustomize-search'),
|
||||
reports: ['html', 'lcovonly', 'text-summary'],
|
||||
fixWebpackSourcePaths: true
|
||||
},
|
||||
reporters: ['progress', 'kjhtml'],
|
||||
port: 9876,
|
||||
colors: true,
|
||||
logLevel: config.LOG_INFO,
|
||||
autoWatch: true,
|
||||
browsers: ['Chrome'],
|
||||
singleRun: false,
|
||||
restartOnFileChange: true
|
||||
});
|
||||
};
|
||||
25
hack/crawl/ui/nginx.conf
Normal file
25
hack/crawl/ui/nginx.conf
Normal file
@@ -0,0 +1,25 @@
|
||||
worker_processes 1;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
server {
|
||||
listen 80;
|
||||
server_name 0.0.0.0;
|
||||
|
||||
root /usr/share/nginx/html;
|
||||
index index.html index.htm;
|
||||
include /etc/nginx/mime.types;
|
||||
|
||||
gzip on;
|
||||
gzip_min_length 1000;
|
||||
gzip_proxied expired no-cache no-store private auth;
|
||||
gzip_types text/plain text/css application/json applications/javascript application/x-javascript text/javascript;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
}
|
||||
}
|
||||
12104
hack/crawl/ui/package-lock.json
generated
Normal file
12104
hack/crawl/ui/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
53
hack/crawl/ui/package.json
Normal file
53
hack/crawl/ui/package.json
Normal file
@@ -0,0 +1,53 @@
|
||||
{
|
||||
"name": "kustomize-search",
|
||||
"version": "0.0.0",
|
||||
"scripts": {
|
||||
"ng": "ng",
|
||||
"start": "ng serve",
|
||||
"build": "ng build --prod --aot",
|
||||
"test": "ng test",
|
||||
"lint": "ng lint",
|
||||
"e2e": "ng e2e"
|
||||
},
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@angular/animations": "~8.1.0",
|
||||
"@angular/cdk": "~8.0.2",
|
||||
"@angular/common": "~8.1.0",
|
||||
"@angular/compiler": "~8.1.0",
|
||||
"@angular/core": "~8.1.0",
|
||||
"@angular/forms": "~8.1.0",
|
||||
"@angular/http": "^7.2.15",
|
||||
"@angular/material": "^8.0.2",
|
||||
"@angular/platform-browser": "~8.1.0",
|
||||
"@angular/platform-browser-dynamic": "~8.1.0",
|
||||
"@angular/router": "~8.1.0",
|
||||
"angular-google-charts": "^0.1.6",
|
||||
"chart.js": "^2.8.0",
|
||||
"hammerjs": "^2.0.8",
|
||||
"rxjs": "~6.4.0",
|
||||
"tslib": "^1.9.0",
|
||||
"zone.js": "~0.9.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@angular-devkit/build-angular": "~0.801.0",
|
||||
"@angular/cli": "^8.1.2",
|
||||
"@angular/compiler-cli": "~8.1.0",
|
||||
"@angular/language-service": "~8.1.0",
|
||||
"@types/jasmine": "~3.3.8",
|
||||
"@types/jasminewd2": "~2.0.3",
|
||||
"@types/node": "~8.9.4",
|
||||
"codelyzer": "^5.0.0",
|
||||
"jasmine-core": "~3.4.0",
|
||||
"jasmine-spec-reporter": "~4.2.1",
|
||||
"karma": "~4.1.0",
|
||||
"karma-chrome-launcher": "~2.2.0",
|
||||
"karma-coverage-istanbul-reporter": "~2.0.1",
|
||||
"karma-jasmine": "~2.0.1",
|
||||
"karma-jasmine-html-reporter": "^1.4.0",
|
||||
"protractor": "~5.4.0",
|
||||
"ts-node": "~7.0.0",
|
||||
"tslint": "~5.15.0",
|
||||
"typescript": "~3.4.3"
|
||||
}
|
||||
}
|
||||
0
hack/crawl/ui/src/app/app.component.css
Normal file
0
hack/crawl/ui/src/app/app.component.css
Normal file
2
hack/crawl/ui/src/app/app.component.html
Normal file
2
hack/crawl/ui/src/app/app.component.html
Normal file
@@ -0,0 +1,2 @@
|
||||
<h1>{{ title }}</h1>
|
||||
<router-outlet></router-outlet>
|
||||
31
hack/crawl/ui/src/app/app.component.spec.ts
Normal file
31
hack/crawl/ui/src/app/app.component.spec.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { TestBed, async } from '@angular/core/testing';
|
||||
import { AppComponent } from './app.component';
|
||||
|
||||
describe('AppComponent', () => {
|
||||
beforeEach(async(() => {
|
||||
TestBed.configureTestingModule({
|
||||
declarations: [
|
||||
AppComponent
|
||||
],
|
||||
}).compileComponents();
|
||||
}));
|
||||
|
||||
it('should create the app', () => {
|
||||
const fixture = TestBed.createComponent(AppComponent);
|
||||
const app = fixture.debugElement.componentInstance;
|
||||
expect(app).toBeTruthy();
|
||||
});
|
||||
|
||||
it(`should have as title 'kustomize-search'`, () => {
|
||||
const fixture = TestBed.createComponent(AppComponent);
|
||||
const app = fixture.debugElement.componentInstance;
|
||||
expect(app.title).toEqual('kustomize-search');
|
||||
});
|
||||
|
||||
it('should render title in a h1 tag', () => {
|
||||
const fixture = TestBed.createComponent(AppComponent);
|
||||
fixture.detectChanges();
|
||||
const compiled = fixture.debugElement.nativeElement;
|
||||
expect(compiled.querySelector('h1').textContent).toContain('Welcome to kustomize-search!');
|
||||
});
|
||||
});
|
||||
10
hack/crawl/ui/src/app/app.component.ts
Normal file
10
hack/crawl/ui/src/app/app.component.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { Component } from '@angular/core';
|
||||
|
||||
@Component({
|
||||
selector: 'app-root',
|
||||
templateUrl: './app.component.html',
|
||||
styleUrls: ['./app.component.css']
|
||||
})
|
||||
export class AppComponent {
|
||||
title = 'k8s Search';
|
||||
}
|
||||
58
hack/crawl/ui/src/app/app.module.ts
Normal file
58
hack/crawl/ui/src/app/app.module.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import { BrowserModule } from '@angular/platform-browser';
|
||||
import { Routes, RouterModule } from '@angular/router';
|
||||
import { NgModule } from '@angular/core';
|
||||
import { FormsModule } from '@angular/forms';
|
||||
import { HttpClientModule } from '@angular/common/http';
|
||||
|
||||
import { MatExpansionModule } from '@angular/material/expansion';
|
||||
import { MatInputModule } from '@angular/material/input';
|
||||
import { MatListModule } from '@angular/material/list';
|
||||
import { MatButtonModule } from '@angular/material/button';
|
||||
|
||||
import { AppComponent } from './app.component';
|
||||
import { SearchComponent } from './search/search.component';
|
||||
import { BrowserAnimationsModule } from '@angular/platform-browser/animations';
|
||||
import { HistogramComponent } from './histogram/histogram.component';
|
||||
import { TimeseriesComponent } from './timeseries/timeseries.component';
|
||||
|
||||
const appRoutes: Routes = [
|
||||
{
|
||||
path: 'search',
|
||||
component: SearchComponent,
|
||||
runGuardsAndResolvers: 'always'
|
||||
},
|
||||
// Always ridirect to the search endpoint for now.
|
||||
{
|
||||
path: '',
|
||||
redirectTo: 'search',
|
||||
pathMatch: 'full',
|
||||
},
|
||||
];
|
||||
|
||||
@NgModule({
|
||||
declarations: [
|
||||
AppComponent,
|
||||
SearchComponent,
|
||||
HistogramComponent,
|
||||
TimeseriesComponent,
|
||||
],
|
||||
imports: [
|
||||
BrowserModule,
|
||||
BrowserAnimationsModule,
|
||||
HttpClientModule,
|
||||
MatExpansionModule,
|
||||
MatInputModule,
|
||||
MatListModule,
|
||||
MatButtonModule,
|
||||
FormsModule,
|
||||
RouterModule.forRoot(
|
||||
appRoutes,
|
||||
{ onSameUrlNavigation: 'reload', }
|
||||
)
|
||||
],
|
||||
providers: [
|
||||
{provide: HttpClientModule}
|
||||
],
|
||||
bootstrap: [AppComponent]
|
||||
})
|
||||
export class AppModule {}
|
||||
41
hack/crawl/ui/src/app/documents.ts
Normal file
41
hack/crawl/ui/src/app/documents.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
export interface SearchResults {
|
||||
hits: SearchResults.Hits;
|
||||
aggregations?: SearchResults.Aggregations;
|
||||
};
|
||||
|
||||
export namespace SearchResults {
|
||||
export class Hits {
|
||||
total: number;
|
||||
hits: SearchResults.InnerHits[];
|
||||
};
|
||||
|
||||
export class InnerHits {
|
||||
id: string;
|
||||
result: SearchResults.Result;
|
||||
};
|
||||
|
||||
export class Result {
|
||||
repositoryUrl: string;
|
||||
filePath: string;
|
||||
defaultBranch: string;
|
||||
document: string;
|
||||
creationTime: Date;
|
||||
values: string;
|
||||
kinds: string;
|
||||
};
|
||||
|
||||
export interface Aggregations {
|
||||
timeseries?: SearchResults.BucketAggregation;
|
||||
kinds?: SearchResults.BucketAggregation;
|
||||
};
|
||||
|
||||
export interface BucketAggregation {
|
||||
otherResults?: number;
|
||||
buckets: SearchResults.Bucket[];
|
||||
};
|
||||
|
||||
export class Bucket {
|
||||
key: string;
|
||||
count: number;
|
||||
};
|
||||
};
|
||||
1
hack/crawl/ui/src/app/histogram/histogram.component.html
Normal file
1
hack/crawl/ui/src/app/histogram/histogram.component.html
Normal file
@@ -0,0 +1 @@
|
||||
<div><canvas id="histogram">{{hist}}</canvas></div>
|
||||
25
hack/crawl/ui/src/app/histogram/histogram.component.spec.ts
Normal file
25
hack/crawl/ui/src/app/histogram/histogram.component.spec.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { async, ComponentFixture, TestBed } from '@angular/core/testing';
|
||||
|
||||
import { HistogramComponent } from './histogram.component';
|
||||
|
||||
describe('HistogramComponent', () => {
|
||||
let component: HistogramComponent;
|
||||
let fixture: ComponentFixture<HistogramComponent>;
|
||||
|
||||
beforeEach(async(() => {
|
||||
TestBed.configureTestingModule({
|
||||
declarations: [ HistogramComponent ]
|
||||
})
|
||||
.compileComponents();
|
||||
}));
|
||||
|
||||
beforeEach(() => {
|
||||
fixture = TestBed.createComponent(HistogramComponent);
|
||||
component = fixture.componentInstance;
|
||||
fixture.detectChanges();
|
||||
});
|
||||
|
||||
it('should create', () => {
|
||||
expect(component).toBeTruthy();
|
||||
});
|
||||
});
|
||||
61
hack/crawl/ui/src/app/histogram/histogram.component.ts
Normal file
61
hack/crawl/ui/src/app/histogram/histogram.component.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import { Chart } from 'chart.js';
|
||||
import { SearchResults } from '../documents';
|
||||
|
||||
import { Component, OnInit } from '@angular/core';
|
||||
import { Subject, Observable } from 'rxjs';
|
||||
|
||||
const otherLabel = 'Other Kinds';
|
||||
|
||||
// Draws a histogram from SearchResults.BucketAggregation data.
|
||||
@Component({
|
||||
selector: 'app-histogram',
|
||||
templateUrl: './histogram.component.html',
|
||||
styleUrls: ['./histogram.component.css']
|
||||
})
|
||||
export class HistogramComponent implements OnInit {
|
||||
hist;
|
||||
|
||||
constructor() {}
|
||||
ngOnInit() {}
|
||||
|
||||
public update(agg: SearchResults.BucketAggregation): Observable<string> {
|
||||
if (this.hist) {
|
||||
this.hist.destroy();
|
||||
}
|
||||
|
||||
let labels = agg.buckets.map(bucket => bucket.key);
|
||||
let counts = agg.buckets.map(bucket => bucket.count);
|
||||
if (agg.otherResults && agg.otherResults > 0) {
|
||||
labels.push(otherLabel)
|
||||
counts.push(agg.otherResults)
|
||||
}
|
||||
|
||||
let selectedLabel = new Subject<string>();
|
||||
|
||||
this.hist = new Chart('histogram', {
|
||||
type: 'bar',
|
||||
data: {
|
||||
datasets: [ { data: counts } ],
|
||||
labels: labels,
|
||||
},
|
||||
options: {
|
||||
legend: { display: false },
|
||||
'onClick' : function(e, it) {
|
||||
if (!(it && it[0] && it[0]._model && it[0]._model.label)) {
|
||||
return
|
||||
}
|
||||
let label = it[0]._model.label;
|
||||
if (label != otherLabel) {
|
||||
selectedLabel.next(label);
|
||||
}
|
||||
}.bind(selectedLabel),
|
||||
scales: {
|
||||
// no floating point
|
||||
yAxes: [ { ticks: { precision: 0, beginAtZero: true } } ],
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
return selectedLabel;
|
||||
}
|
||||
}
|
||||
8
hack/crawl/ui/src/app/search/search.component.css
Normal file
8
hack/crawl/ui/src/app/search/search.component.css
Normal file
@@ -0,0 +1,8 @@
|
||||
.json_query > * {
|
||||
width: 100%;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
mat-expansion-panel-header {
|
||||
padding: 20px;
|
||||
}
|
||||
36
hack/crawl/ui/src/app/search/search.component.html
Normal file
36
hack/crawl/ui/src/app/search/search.component.html
Normal file
@@ -0,0 +1,36 @@
|
||||
<div class="json_query">
|
||||
<mat-form-field>
|
||||
<input matInput (keydown.enter)="search()" placeholder="Search" [(ngModel)]='inputQueryValue'>
|
||||
</mat-form-field>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<mat-expansion-panel
|
||||
[disabled]="docs.hits.total == 0"
|
||||
[expanded]="docs.hits.hits.length == 0 && docs.hits.total != 0">
|
||||
|
||||
<mat-expansion-panel-header>
|
||||
{{docs.hits.total}} matching config files
|
||||
<div *ngIf="docs.hits.total > 0">, expand to see a breakdown by kind.</div>
|
||||
</mat-expansion-panel-header>
|
||||
<app-histogram></app-histogram>
|
||||
<app-timeseries></app-timeseries>
|
||||
</mat-expansion-panel>
|
||||
|
||||
<br>
|
||||
<mat-expansion-panel class="result" *ngFor="let doc of docs.hits.hits">
|
||||
<mat-expansion-panel-header class="result" [collapsedHeight]="'auto'" [expandedHeight]="'auto'">
|
||||
{{ doc.result.repositoryUrl }}/{{ doc.result.filePath }}
|
||||
</mat-expansion-panel-header>
|
||||
<div mat-line>
|
||||
<h3>File Contents</h3>
|
||||
<pre><code>{{doc.result.document}}</code></pre>
|
||||
</div>
|
||||
</mat-expansion-panel>
|
||||
|
||||
<button mat-button [disabled]="first()" (click)="prev()">
|
||||
Previous
|
||||
</button>
|
||||
<button mat-button [disabled]="last()" (click)="next()">
|
||||
Next
|
||||
</button>
|
||||
25
hack/crawl/ui/src/app/search/search.component.spec.ts
Normal file
25
hack/crawl/ui/src/app/search/search.component.spec.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { async, ComponentFixture, TestBed } from '@angular/core/testing';
|
||||
|
||||
import { SearchComponent } from './search.component';
|
||||
|
||||
describe('SearchComponent', () => {
|
||||
let component: SearchComponent;
|
||||
let fixture: ComponentFixture<SearchComponent>;
|
||||
|
||||
beforeEach(async(() => {
|
||||
TestBed.configureTestingModule({
|
||||
declarations: [ SearchComponent ]
|
||||
})
|
||||
.compileComponents();
|
||||
}));
|
||||
|
||||
beforeEach(() => {
|
||||
fixture = TestBed.createComponent(SearchComponent);
|
||||
component = fixture.componentInstance;
|
||||
fixture.detectChanges();
|
||||
});
|
||||
|
||||
it('should create', () => {
|
||||
expect(component).toBeTruthy();
|
||||
});
|
||||
});
|
||||
111
hack/crawl/ui/src/app/search/search.component.ts
Normal file
111
hack/crawl/ui/src/app/search/search.component.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { Component, OnInit, NgModule } from '@angular/core';
|
||||
import { Router, ActivatedRoute } from '@angular/router';
|
||||
import { SearchResults } from '../documents';
|
||||
import { HistogramComponent } from '../histogram/histogram.component';
|
||||
import { TimeseriesComponent } from '../timeseries/timeseries.component';
|
||||
import { SearchService } from './search.service';
|
||||
|
||||
const perPage = 10;
|
||||
|
||||
@Component({
|
||||
selector: 'app-search',
|
||||
templateUrl: './search.component.html',
|
||||
styleUrls: ['./search.component.css'],
|
||||
providers: [SearchService]
|
||||
})
|
||||
export class SearchComponent implements OnInit {
|
||||
inputQuery: string[] = [];
|
||||
from: number = 0;
|
||||
disableNav: boolean = false;
|
||||
|
||||
docs: SearchResults = {
|
||||
hits: {
|
||||
total: 0,
|
||||
hits: [],
|
||||
},
|
||||
};
|
||||
|
||||
kindBreakdown = new HistogramComponent();
|
||||
timeseries = new TimeseriesComponent();
|
||||
|
||||
constructor(
|
||||
private searcher : SearchService,
|
||||
private router: Router,
|
||||
private route: ActivatedRoute
|
||||
) {}
|
||||
|
||||
ngOnInit() {
|
||||
this.route.queryParams.subscribe(params => {
|
||||
if (params.q instanceof Array) {
|
||||
this.inputQuery = params.q || [""]
|
||||
} else {
|
||||
this.inputQuery = [params.q || "" ];
|
||||
}
|
||||
|
||||
this.from = parseInt(params.from) || 0;
|
||||
if (this.from < 0) {
|
||||
this.from = Math.max(this.from, 0);
|
||||
this.searchWithParams();
|
||||
}
|
||||
|
||||
this.searcher.search(params).subscribe(sr => {
|
||||
this.docs = sr;
|
||||
this.kindBreakdown.update(sr.aggregations.kinds).subscribe(selectedKind => {
|
||||
this.addToQuery('kind='+selectedKind)
|
||||
this.search();
|
||||
})
|
||||
this.timeseries.update(sr.aggregations.timeseries);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
public addToQuery(q: string) {
|
||||
for (let v of this.inputQuery) {
|
||||
if (v == q) {
|
||||
return
|
||||
}
|
||||
}
|
||||
this.inputQuery.push(q)
|
||||
}
|
||||
|
||||
search(): void {
|
||||
this.from = 0;
|
||||
this.searchWithParams();
|
||||
}
|
||||
|
||||
searchWithParams(): void {
|
||||
let params = {
|
||||
q: this.inputQuery,
|
||||
from: this.from,
|
||||
}
|
||||
this.router.navigate([], {
|
||||
relativeTo: this.route,
|
||||
queryParams: params,
|
||||
});
|
||||
}
|
||||
|
||||
first(): boolean {
|
||||
return this.from <= 0 || this.disableNav;
|
||||
}
|
||||
|
||||
last(): boolean {
|
||||
return this.from + perPage >= this.docs.hits.total || this.disableNav;
|
||||
}
|
||||
|
||||
next (): void {
|
||||
this.from += perPage;
|
||||
this.searchWithParams();
|
||||
}
|
||||
prev (): void {
|
||||
this.from -= perPage;
|
||||
this.searchWithParams();
|
||||
}
|
||||
|
||||
get inputQueryValue() : string {
|
||||
return this.inputQuery.join(' ')
|
||||
}
|
||||
|
||||
set inputQueryValue(input : string) {
|
||||
this.inputQuery = [input]
|
||||
}
|
||||
}
|
||||
41
hack/crawl/ui/src/app/search/search.service.ts
Normal file
41
hack/crawl/ui/src/app/search/search.service.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
import { SearchResults } from '../documents';
|
||||
|
||||
import { Injectable } from '@angular/core';
|
||||
import {
|
||||
HttpClient,
|
||||
HttpResponse,
|
||||
HttpParams } from '@angular/common/http';
|
||||
import { Params, convertToParamMap } from '@angular/router';
|
||||
|
||||
import { Observable } from 'rxjs';
|
||||
import { filter, map, catchError } from 'rxjs/operators';
|
||||
|
||||
@Injectable()
|
||||
export class SearchService {
|
||||
private serviceUrl = "https://www.example.com/";
|
||||
|
||||
constructor(private http: HttpClient) {}
|
||||
|
||||
public search(params: Params): Observable<SearchResults> {
|
||||
let requestParams = new HttpParams();
|
||||
let pmap = convertToParamMap(params);
|
||||
let hasQuery = false;
|
||||
|
||||
for (var k of pmap.keys) {
|
||||
for (var v of pmap.getAll(k)) {
|
||||
if (k == "q" && v != "") {
|
||||
hasQuery = true
|
||||
}
|
||||
requestParams.append(k, v)
|
||||
}
|
||||
}
|
||||
|
||||
let queryUrl = this.serviceUrl
|
||||
if (hasQuery) {
|
||||
queryUrl += "search"
|
||||
} else {
|
||||
queryUrl += "metrics"
|
||||
}
|
||||
return this.http.get<SearchResults>(queryUrl, {params: params});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
<!--
|
||||
TODO(someone who knows angular) Canvas is still populated when the chart
|
||||
is empty. I'm not sure how to do this.
|
||||
-->
|
||||
<div>
|
||||
<canvas max-height="100%" max-width="100%" id="timeseries">
|
||||
{{timeseries}}
|
||||
</canvas>
|
||||
</div>
|
||||
@@ -0,0 +1,25 @@
|
||||
import { async, ComponentFixture, TestBed } from '@angular/core/testing';
|
||||
|
||||
import { TimeseriesComponent } from './timeseries.component';
|
||||
|
||||
describe('TimeseriesComponent', () => {
|
||||
let component: TimeseriesComponent;
|
||||
let fixture: ComponentFixture<TimeseriesComponent>;
|
||||
|
||||
beforeEach(async(() => {
|
||||
TestBed.configureTestingModule({
|
||||
declarations: [ TimeseriesComponent ]
|
||||
})
|
||||
.compileComponents();
|
||||
}));
|
||||
|
||||
beforeEach(() => {
|
||||
fixture = TestBed.createComponent(TimeseriesComponent);
|
||||
component = fixture.componentInstance;
|
||||
fixture.detectChanges();
|
||||
});
|
||||
|
||||
it('should create', () => {
|
||||
expect(component).toBeTruthy();
|
||||
});
|
||||
});
|
||||
65
hack/crawl/ui/src/app/timeseries/timeseries.component.ts
Normal file
65
hack/crawl/ui/src/app/timeseries/timeseries.component.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
import { Chart } from 'chart.js';
|
||||
import { SearchResults } from '../documents';
|
||||
|
||||
import { Component, OnInit } from '@angular/core';
|
||||
import { Subject, Observable } from 'rxjs';
|
||||
|
||||
@Component({
|
||||
selector: 'app-timeseries',
|
||||
templateUrl: './timeseries.component.html',
|
||||
styleUrls: ['./timeseries.component.css']
|
||||
})
|
||||
export class TimeseriesComponent implements OnInit {
|
||||
timeseries;
|
||||
|
||||
constructor() {}
|
||||
|
||||
ngOnInit() {}
|
||||
|
||||
update(agg: SearchResults.BucketAggregation) {
|
||||
if (this.timeseries) {
|
||||
this.timeseries.destroy();
|
||||
}
|
||||
if (!agg || agg.buckets.length == 0) {
|
||||
this.timeseries = null;
|
||||
return
|
||||
}
|
||||
|
||||
let buckets = agg.buckets
|
||||
.filter(bucket => new Date(bucket.key) > new Date(2017, 1));
|
||||
|
||||
let labels = buckets.map(bucket => new Date(bucket.key))
|
||||
let counts = buckets.map(bucket => bucket.count);
|
||||
|
||||
let sum = 0;
|
||||
for (let i = 0; i < counts.length; i++) {
|
||||
sum += counts[i];
|
||||
counts[i] = sum;
|
||||
}
|
||||
|
||||
this.timeseries = new Chart('timeseries', {
|
||||
type: 'line',
|
||||
data: {
|
||||
datasets: [{
|
||||
label: 'Kustomizations Over time',
|
||||
data: counts,
|
||||
type: 'line',
|
||||
pointRadius: 0,
|
||||
lineTension: 0,
|
||||
}],
|
||||
labels: labels,
|
||||
},
|
||||
options: {
|
||||
scales: {
|
||||
xAxes: [{
|
||||
type: 'time',
|
||||
distribution: 'linear',
|
||||
ticks: {
|
||||
autoSkip: true,
|
||||
},
|
||||
}],
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
0
hack/crawl/ui/src/assets/.gitkeep
Normal file
0
hack/crawl/ui/src/assets/.gitkeep
Normal file
3
hack/crawl/ui/src/environments/environment.prod.ts
Normal file
3
hack/crawl/ui/src/environments/environment.prod.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
export const environment = {
|
||||
production: true
|
||||
};
|
||||
16
hack/crawl/ui/src/environments/environment.ts
Normal file
16
hack/crawl/ui/src/environments/environment.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
// This file can be replaced during build by using the `fileReplacements` array.
|
||||
// `ng build --prod` replaces `environment.ts` with `environment.prod.ts`.
|
||||
// The list of file replacements can be found in `angular.json`.
|
||||
|
||||
export const environment = {
|
||||
production: false
|
||||
};
|
||||
|
||||
/*
|
||||
* For easier debugging in development mode, you can import the following file
|
||||
* to ignore zone related error stack frames such as `zone.run`, `zoneDelegate.invokeTask`.
|
||||
*
|
||||
* This import should be commented out in production mode because it will have a negative impact
|
||||
* on performance if an error is thrown.
|
||||
*/
|
||||
// import 'zone.js/dist/zone-error'; // Included with Angular CLI.
|
||||
BIN
hack/crawl/ui/src/favicon.ico
Normal file
BIN
hack/crawl/ui/src/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 5.3 KiB |
16
hack/crawl/ui/src/index.html
Normal file
16
hack/crawl/ui/src/index.html
Normal file
@@ -0,0 +1,16 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Kustomize Search</title>
|
||||
<base href="/">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="icon" type="image/x-icon" href="favicon.ico">
|
||||
<link href="https://fonts.googleapis.com/css?family=Roboto:300,400,500" rel="stylesheet">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<app-root></app-root>
|
||||
</body>
|
||||
</html>
|
||||
13
hack/crawl/ui/src/main.ts
Normal file
13
hack/crawl/ui/src/main.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import 'hammerjs';
|
||||
import { enableProdMode } from '@angular/core';
|
||||
import { platformBrowserDynamic } from '@angular/platform-browser-dynamic';
|
||||
|
||||
import { AppModule } from './app/app.module';
|
||||
import { environment } from './environments/environment';
|
||||
|
||||
if (environment.production) {
|
||||
enableProdMode();
|
||||
}
|
||||
|
||||
platformBrowserDynamic().bootstrapModule(AppModule)
|
||||
.catch(err => console.error(err));
|
||||
63
hack/crawl/ui/src/polyfills.ts
Normal file
63
hack/crawl/ui/src/polyfills.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* This file includes polyfills needed by Angular and is loaded before the app.
|
||||
* You can add your own extra polyfills to this file.
|
||||
*
|
||||
* This file is divided into 2 sections:
|
||||
* 1. Browser polyfills. These are applied before loading ZoneJS and are sorted by browsers.
|
||||
* 2. Application imports. Files imported after ZoneJS that should be loaded before your main
|
||||
* file.
|
||||
*
|
||||
* The current setup is for so-called "evergreen" browsers; the last versions of browsers that
|
||||
* automatically update themselves. This includes Safari >= 10, Chrome >= 55 (including Opera),
|
||||
* Edge >= 13 on the desktop, and iOS 10 and Chrome on mobile.
|
||||
*
|
||||
* Learn more in https://angular.io/guide/browser-support
|
||||
*/
|
||||
|
||||
/***************************************************************************************************
|
||||
* BROWSER POLYFILLS
|
||||
*/
|
||||
|
||||
/** IE10 and IE11 requires the following for NgClass support on SVG elements */
|
||||
// import 'classlist.js'; // Run `npm install --save classlist.js`.
|
||||
|
||||
/**
|
||||
* Web Animations `@angular/platform-browser/animations`
|
||||
* Only required if AnimationBuilder is used within the application and using IE/Edge or Safari.
|
||||
* Standard animation support in Angular DOES NOT require any polyfills (as of Angular 6.0).
|
||||
*/
|
||||
// import 'web-animations-js'; // Run `npm install --save web-animations-js`.
|
||||
|
||||
/**
|
||||
* By default, zone.js will patch all possible macroTask and DomEvents
|
||||
* user can disable parts of macroTask/DomEvents patch by setting following flags
|
||||
* because those flags need to be set before `zone.js` being loaded, and webpack
|
||||
* will put import in the top of bundle, so user need to create a separate file
|
||||
* in this directory (for example: zone-flags.ts), and put the following flags
|
||||
* into that file, and then add the following code before importing zone.js.
|
||||
* import './zone-flags.ts';
|
||||
*
|
||||
* The flags allowed in zone-flags.ts are listed here.
|
||||
*
|
||||
* The following flags will work for all browsers.
|
||||
*
|
||||
* (window as any).__Zone_disable_requestAnimationFrame = true; // disable patch requestAnimationFrame
|
||||
* (window as any).__Zone_disable_on_property = true; // disable patch onProperty such as onclick
|
||||
* (window as any).__zone_symbol__UNPATCHED_EVENTS = ['scroll', 'mousemove']; // disable patch specified eventNames
|
||||
*
|
||||
* in IE/Edge developer tools, the addEventListener will also be wrapped by zone.js
|
||||
* with the following flag, it will bypass `zone.js` patch for IE/Edge
|
||||
*
|
||||
* (window as any).__Zone_enable_cross_context_check = true;
|
||||
*
|
||||
*/
|
||||
|
||||
/***************************************************************************************************
|
||||
* Zone JS is required by default for Angular itself.
|
||||
*/
|
||||
import 'zone.js/dist/zone'; // Included with Angular CLI.
|
||||
|
||||
|
||||
/***************************************************************************************************
|
||||
* APPLICATION IMPORTS
|
||||
*/
|
||||
36
hack/crawl/ui/src/styles.css
Normal file
36
hack/crawl/ui/src/styles.css
Normal file
@@ -0,0 +1,36 @@
|
||||
/* You can add global styles to this file, and also import other style files */
|
||||
@import '~@angular/material/prebuilt-themes/deeppurple-amber.css';
|
||||
html, body { height: 100%; }
|
||||
|
||||
body {
|
||||
font-family: Roboto, "Helvetica Neue", sans-serif;
|
||||
font-style: normal;
|
||||
max-width: 800px;
|
||||
margin-right: auto;
|
||||
margin-left: auto;
|
||||
padding-left: 16px;
|
||||
padding-right: 16px;
|
||||
}
|
||||
|
||||
h1, .h1, h2, .h2, h3, .h3 {
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
margin-top: 20px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
pre {
|
||||
overflow: auto;
|
||||
padding: 10px;
|
||||
background-color: #f0f0f0;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
mat-expansion-panel.result {
|
||||
margin-top: 8px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.mat-expansion-panel-header {
|
||||
padding: 20px;
|
||||
}
|
||||
20
hack/crawl/ui/src/test.ts
Normal file
20
hack/crawl/ui/src/test.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
// This file is required by karma.conf.js and loads recursively all the .spec and framework files
|
||||
|
||||
import 'zone.js/dist/zone-testing';
|
||||
import { getTestBed } from '@angular/core/testing';
|
||||
import {
|
||||
BrowserDynamicTestingModule,
|
||||
platformBrowserDynamicTesting
|
||||
} from '@angular/platform-browser-dynamic/testing';
|
||||
|
||||
declare const require: any;
|
||||
|
||||
// First, initialize the Angular testing environment.
|
||||
getTestBed().initTestEnvironment(
|
||||
BrowserDynamicTestingModule,
|
||||
platformBrowserDynamicTesting()
|
||||
);
|
||||
// Then we find all the tests.
|
||||
const context = require.context('./', true, /\.spec\.ts$/);
|
||||
// And load the modules.
|
||||
context.keys().map(context);
|
||||
14
hack/crawl/ui/tsconfig.app.json
Normal file
14
hack/crawl/ui/tsconfig.app.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "./out-tsc/app",
|
||||
"types": []
|
||||
},
|
||||
"include": [
|
||||
"src/**/*.ts"
|
||||
],
|
||||
"exclude": [
|
||||
"src/test.ts",
|
||||
"src/**/*.spec.ts"
|
||||
]
|
||||
}
|
||||
26
hack/crawl/ui/tsconfig.json
Normal file
26
hack/crawl/ui/tsconfig.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"compileOnSave": false,
|
||||
"compilerOptions": {
|
||||
"baseUrl": "./",
|
||||
"outDir": "./dist/out-tsc",
|
||||
"sourceMap": true,
|
||||
"declaration": false,
|
||||
"downlevelIteration": true,
|
||||
"experimentalDecorators": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"importHelpers": true,
|
||||
"target": "es2015",
|
||||
"typeRoots": [
|
||||
"node_modules/@types"
|
||||
],
|
||||
"lib": [
|
||||
"es2018",
|
||||
"dom"
|
||||
]
|
||||
},
|
||||
"angularCompilerOptions": {
|
||||
"fullTemplateTypeCheck": true,
|
||||
"strictInjectionParameters": true
|
||||
}
|
||||
}
|
||||
18
hack/crawl/ui/tsconfig.spec.json
Normal file
18
hack/crawl/ui/tsconfig.spec.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "./out-tsc/spec",
|
||||
"types": [
|
||||
"jasmine",
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"files": [
|
||||
"src/test.ts",
|
||||
"src/polyfills.ts"
|
||||
],
|
||||
"include": [
|
||||
"src/**/*.spec.ts",
|
||||
"src/**/*.d.ts"
|
||||
]
|
||||
}
|
||||
92
hack/crawl/ui/tslint.json
Normal file
92
hack/crawl/ui/tslint.json
Normal file
@@ -0,0 +1,92 @@
|
||||
{
|
||||
"extends": "tslint:recommended",
|
||||
"rules": {
|
||||
"array-type": false,
|
||||
"arrow-parens": false,
|
||||
"deprecation": {
|
||||
"severity": "warning"
|
||||
},
|
||||
"component-class-suffix": true,
|
||||
"contextual-lifecycle": true,
|
||||
"directive-class-suffix": true,
|
||||
"directive-selector": [
|
||||
true,
|
||||
"attribute",
|
||||
"app",
|
||||
"camelCase"
|
||||
],
|
||||
"component-selector": [
|
||||
true,
|
||||
"element",
|
||||
"app",
|
||||
"kebab-case"
|
||||
],
|
||||
"import-blacklist": [
|
||||
true,
|
||||
"rxjs/Rx"
|
||||
],
|
||||
"interface-name": false,
|
||||
"max-classes-per-file": false,
|
||||
"max-line-length": [
|
||||
true,
|
||||
140
|
||||
],
|
||||
"member-access": false,
|
||||
"member-ordering": [
|
||||
true,
|
||||
{
|
||||
"order": [
|
||||
"static-field",
|
||||
"instance-field",
|
||||
"static-method",
|
||||
"instance-method"
|
||||
]
|
||||
}
|
||||
],
|
||||
"no-consecutive-blank-lines": false,
|
||||
"no-console": [
|
||||
true,
|
||||
"debug",
|
||||
"info",
|
||||
"time",
|
||||
"timeEnd",
|
||||
"trace"
|
||||
],
|
||||
"no-empty": false,
|
||||
"no-inferrable-types": [
|
||||
true,
|
||||
"ignore-params"
|
||||
],
|
||||
"no-non-null-assertion": true,
|
||||
"no-redundant-jsdoc": true,
|
||||
"no-switch-case-fall-through": true,
|
||||
"no-use-before-declare": true,
|
||||
"no-var-requires": false,
|
||||
"object-literal-key-quotes": [
|
||||
true,
|
||||
"as-needed"
|
||||
],
|
||||
"object-literal-sort-keys": false,
|
||||
"ordered-imports": false,
|
||||
"quotemark": [
|
||||
true,
|
||||
"single"
|
||||
],
|
||||
"trailing-comma": false,
|
||||
"no-conflicting-lifecycle": true,
|
||||
"no-host-metadata-property": true,
|
||||
"no-input-rename": true,
|
||||
"no-inputs-metadata-property": true,
|
||||
"no-output-native": true,
|
||||
"no-output-on-prefix": true,
|
||||
"no-output-rename": true,
|
||||
"no-outputs-metadata-property": true,
|
||||
"template-banana-in-box": true,
|
||||
"template-no-negated-async": true,
|
||||
"use-lifecycle-interface": true,
|
||||
"use-pipe-transform-interface": true
|
||||
},
|
||||
"rulesDirectory": [
|
||||
"codelyzer"
|
||||
]
|
||||
}
|
||||
10
hack/doGoMod.sh
Executable file
10
hack/doGoMod.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
# Usage: From repo root:
|
||||
# ./hack/doGoMod.sh tidy
|
||||
# ./hack/doGoMod.sh verify
|
||||
|
||||
operation=$1
|
||||
for f in $(find ./ -name 'go.mod'); do
|
||||
echo $f
|
||||
d=$(dirname "$f")
|
||||
(cd $d; go mod $operation)
|
||||
done
|
||||
5
hack/imports.sh
Executable file
5
hack/imports.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
for f in $(find ./ -name '*.go'); do
|
||||
echo $f
|
||||
# go run go.coder.com/go-tools/cmd/goimports
|
||||
~/gopath/bin/goimports -w $f
|
||||
done
|
||||
Reference in New Issue
Block a user