Use RWMutex to control the map access

Without RWMutex, we may run into fatal error: concurrent map read and map write.
This commit is contained in:
Haiyan Meng
2020-06-14 13:40:08 -07:00
parent a895220743
commit 171412cc98
2 changed files with 22 additions and 6 deletions

View File

@@ -191,7 +191,7 @@ func main() {
// this greatly reduces the time overhead of CrawlGithub. // this greatly reduces the time overhead of CrawlGithub.
getSeedDocsFunc() getSeedDocsFunc()
for _, d := range seedDocs { for _, d := range seedDocs {
seen[d.ID()] = d.FileType seen.Set(d.ID(), d.FileType)
} }
crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen) crawler.CrawlGithub(ctx, crawlers, docConverter, indexFunc, seen)
case CrawlUser: case CrawlUser:

View File

@@ -1,21 +1,37 @@
package utils package utils
type SeenMap map[string]string import "sync"
type SeenMap struct {
data map[string]string
lock sync.RWMutex
}
// TODO: add lock to avoid race condition
func (seen SeenMap) Seen(item string) bool { func (seen SeenMap) Seen(item string) bool {
_, ok := seen[item] seen.lock.RLock()
_, ok := seen.data[item]
seen.lock.RUnlock()
return ok return ok
} }
func (seen SeenMap) Set(k, v string) { func (seen SeenMap) Set(k, v string) {
seen[k] = v seen.lock.Lock()
seen.data[k] = v
seen.lock.Unlock()
} }
// The caller should make sure that key is in the map. // The caller should make sure that key is in the map.
func (seen SeenMap) Value(k string) string { func (seen SeenMap) Value(k string) string {
return seen[k] seen.lock.RLock()
v := seen.data[k]
seen.lock.RUnlock()
return v
} }
func NewSeenMap() SeenMap { func NewSeenMap() SeenMap {
return make(map[string]string) return SeenMap{
data: make(map[string]string),
lock: sync.RWMutex{},
}
} }