Crawler performance improvements, better structure

Refactored the crawler implementation to make the whole thing more
testable. Added a document interface to make the crawler generic.
This will be useful for collecting plugins, and other documents.
This commit is contained in:
Damien Robichaud
2019-08-23 18:11:55 -07:00
parent a66808a10d
commit c2d6f09ef3
13 changed files with 1251 additions and 99 deletions

View File

@@ -9,6 +9,10 @@ import (
redis_cache "github.com/gregjones/httpcache/redis"
)
func FromCache(header http.Header) bool {
return header.Get(httpcache.XFromCache) != ""
}
func NewClient(conn redis.Conn) *http.Client {
etagCache := redis_cache.NewWithClient(conn)
tr := httpcache.NewTransport(etagCache)