ambrowse/ambrowse.go
2020-10-11 13:39:17 +02:00

204 lines
4.5 KiB
Go

package main
import (
goutil "git.gutmet.org/goutil.git/html"
"git.gutmet.org/goutil.git/misc"
"github.com/fsnotify/fsnotify"
"golang.org/x/net/html"
"log"
"os"
"path/filepath"
"strings"
"time"
)
const (
NAME = "ambrowse"
DIR = "ambrowse" // inside home directory
BOOKFILE = "books.yaml"
)
type book struct {
title string
authors []string
publisher string
isbn10 string
isbn13 string
amazonPrice string
}
func (b book) String() string {
return strings.Join([]string{b.title, strings.Join(b.authors, ", "), b.publisher, b.isbn10, b.isbn13, b.amazonPrice}, "\n")
}
func (b book) yaml() string {
return "\n---\n\n" +
"title: \"" + b.title + "\"\n" +
"authors: [" + strings.Join(b.authors, ", ") + "]\n" +
"publisher: \"" + b.publisher + "\"\n" +
"isbn10: " + b.isbn10 + "\n" +
"isbn13: " + b.isbn13 + "\n" +
"amazonPrice: \"" + b.amazonPrice + "\"\n"
}
func optFatal(err error) {
if err != nil {
log.Fatal(err)
}
}
func optLog(err error) {
if err != nil {
log.Println(err)
}
}
func readLine(line string) string {
line = strings.TrimSpace(line)
parts := strings.Split(line, ":")
if len(parts) >= 2 {
return strings.TrimSpace(strings.Join(parts[1:], ":"))
} else {
return ""
}
}
func getDirectory() string {
home, err := misc.HomeDir()
if err != nil {
log.Fatal("Cannot identify home directory", err)
}
dir := filepath.Join(home, DIR)
err = os.MkdirAll(dir, 0750)
if err != nil {
log.Fatal("Cannot create "+dir, err)
}
return dir
}
func getAuthors(doc *goutil.HtmlNode) []string {
authors := make(map[string]interface{})
authorNodes := doc.GetElementsByClass("contributorNameID")
authorNodes = append(authorNodes, doc.GetElementsByClass("author")...)
for _, n := range authorNodes {
decl := n.GetElementsByClass("a-declarative")
if len(decl) > 0 {
authors[decl[0].Text()] = nil
} else {
authors[strings.TrimSuffix(n.Text(), ",")] = nil
}
}
result := []string{}
for k, _ := range authors {
result = append(result, k)
}
return result
}
func setDetails(b *book, doc *goutil.HtmlNode) {
details := doc.GetElementById("productDetailsTable")
if details == nil {
details = doc.GetElementById("detailBullets_feature_div")
}
if details != nil {
cont := details.FindAll(func(n *goutil.HtmlNode) bool { return n.Type == html.ElementNode && n.Data == "li" })
for _, c := range cont {
line := c.Text()
if strings.Contains(line, "Verlag") || strings.Contains(line, "Publisher") || strings.Contains(line, "Herausgeber") {
b.publisher = readLine(line)
} else if strings.Contains(line, "ISBN-10") {
b.isbn10 = readLine(line)
} else if strings.Contains(line, "ISBN-13") {
b.isbn13 = readLine(line)
}
}
}
}
func getPrice(doc *goutil.HtmlNode) string {
price := doc.GetElementsByClass("a-color-price")
if len(price) > 0 {
return strings.TrimSpace(price[0].Text())
} else {
return ""
}
}
func readHTML(filename string) book {
b := book{}
log.Println("Reading metadata from " + filename)
f, err := os.Open(filename)
optFatal(err)
tmpdoc, err := html.Parse(f)
optFatal(err)
doc := (*goutil.HtmlNode)(tmpdoc)
title := doc.GetElementById("productTitle")
b.title = title.Text()
b.authors = getAuthors(doc)
setDetails(&b, doc)
b.amazonPrice = getPrice(doc)
return b
}
func clean(files []string) {
time.Sleep(30 * time.Second)
for _, file := range files {
os.RemoveAll(file)
}
}
func digest(dir string, created []string) {
for _, file := range created {
if filepath.Ext(file) == ".html" && misc.PathExists(file) {
b := readHTML(file)
if b.title != "" {
err := misc.AppendToFile(filepath.Join(dir, BOOKFILE), b.yaml())
if err != nil {
log.Println(err)
}
misc.Notify(NAME, b.String())
}
}
}
go clean(created)
}
func main() {
dir := getDirectory()
watcher, err := fsnotify.NewWatcher()
optFatal(err)
defer watcher.Close()
done := make(chan bool)
go func() {
created := []string{}
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
if event.Op&fsnotify.Create == fsnotify.Create && filepath.Base(event.Name) != BOOKFILE {
created = append(created, event.Name)
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("error:", err)
case <-time.After(1 * time.Second):
// 1s after last file system event: munch data
if len(created) > 0 {
tmpcreated := created
created = []string{}
go digest(dir, tmpcreated)
}
}
}
}()
err = watcher.Add(dir)
optFatal(err)
log.Println(NAME + " is watching " + dir)
<-done
}