package main import ( "log" "os" "path/filepath" "strings" "time" goutil "git.fireandbrimst.one/aw/goutil/html" "git.fireandbrimst.one/aw/goutil/misc" "github.com/fsnotify/fsnotify" "golang.org/x/net/html" ) const ( NAME = "ambrowse" DIR = "ambrowse" // inside home directory BOOKFILE = "books.yaml" ) type book struct { title string authors []string publisher string isbn10 string isbn13 string amazonPrice string } func (b book) String() string { return strings.Join([]string{b.title, strings.Join(b.authors, ", "), b.publisher, b.isbn10, b.isbn13, b.amazonPrice}, "\n") } func (b book) yaml() string { return "\n---\n\n" + "title: \"" + b.title + "\"\n" + "authors: [" + strings.Join(b.authors, ", ") + "]\n" + "publisher: \"" + b.publisher + "\"\n" + "isbn10: " + b.isbn10 + "\n" + "isbn13: " + b.isbn13 + "\n" + "amazonPrice: \"" + b.amazonPrice + "\"\n" } func optFatal(err error) { if err != nil { log.Fatal(err) } } func optLog(err error) { if err != nil { log.Println(err) } } func readLine(line string) string { line = strings.TrimSpace(line) parts := strings.Split(line, ":") if len(parts) >= 2 { return strings.TrimSpace(strings.Replace(strings.Join(parts[1:], ":"), "\u200e", "", -1)) } else { return "" } } func getDirectory() string { home, err := misc.HomeDir() if err != nil { log.Fatal("Cannot identify home directory", err) } dir := filepath.Join(home, DIR) err = os.MkdirAll(dir, 0750) if err != nil { log.Fatal("Cannot create "+dir, err) } return dir } func getAuthors(doc *goutil.HtmlNode) []string { authors := make(map[string]interface{}) authorNodes := doc.GetElementsByClass("contributorNameID") authorNodes = append(authorNodes, doc.GetElementsByClass("author")...) for _, n := range authorNodes { decl := n.GetElementsByClass("a-declarative") if len(decl) > 0 { authors[decl[0].Text()] = nil } else { authors[strings.TrimSuffix(n.Text(), ",")] = nil } } result := []string{} for k, _ := range authors { result = append(result, k) } return result } func setDetails(b *book, doc *goutil.HtmlNode) { details := doc.GetElementById("productDetailsTable") if details == nil { details = doc.GetElementById("detailBullets_feature_div") } if details != nil { cont := details.FindAll(func(n *goutil.HtmlNode) bool { return n.Type == html.ElementNode && n.Data == "li" }) for _, c := range cont { line := c.Text() if strings.Contains(line, "Verlag") || strings.Contains(line, "Publisher") || strings.Contains(line, "Herausgeber") { b.publisher = readLine(line) } else if strings.Contains(line, "ISBN-10") { b.isbn10 = readLine(line) } else if strings.Contains(line, "ISBN-13") { b.isbn13 = readLine(line) } } } } func getPrice(doc *goutil.HtmlNode) string { price := doc.GetElementsByClass("a-color-price") if len(price) > 0 { return strings.TrimSpace(price[0].Text()) } else { return "" } } func readHTML(filename string) book { b := book{} log.Println("Reading metadata from " + filename) f, err := os.Open(filename) optFatal(err) tmpdoc, err := html.Parse(f) optFatal(err) doc := (*goutil.HtmlNode)(tmpdoc) title := doc.GetElementById("productTitle") b.title = title.Text() b.authors = getAuthors(doc) setDetails(&b, doc) b.amazonPrice = getPrice(doc) return b } func clean(files []string) { time.Sleep(30 * time.Second) for _, file := range files { log.Println("Removing", file) os.RemoveAll(file) } } func digest(dir string, created []string) { for _, file := range created { log.Println("Digesting", file) if fileInfo, err := os.Stat(file); err == nil && !fileInfo.IsDir() { b := readHTML(file) if b.title != "" { err := misc.AppendToFile(filepath.Join(dir, BOOKFILE), b.yaml()) if err != nil { log.Println(err) } misc.Notify(NAME, b.String()) } } } go clean(created) } func main() { dir := getDirectory() watcher, err := fsnotify.NewWatcher() optFatal(err) defer watcher.Close() done := make(chan bool) go func() { created := []string{} for { select { case event, ok := <-watcher.Events: if !ok { return } if event.Op&fsnotify.Create == fsnotify.Create && filepath.Base(event.Name) != BOOKFILE { created = append(created, event.Name) } case err, ok := <-watcher.Errors: if !ok { return } log.Println("error:", err) case <-time.After(1 * time.Second): // 1s after last file system event: munch data if len(created) > 0 { tmpcreated := created created = []string{} go digest(dir, tmpcreated) } } } }() err = watcher.Add(dir) optFatal(err) log.Println(NAME + " is watching " + dir) <-done }