2019-01-13 19:57:34 +01:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2020-10-11 13:39:17 +02:00
|
|
|
goutil "git.gutmet.org/goutil.git/html"
|
|
|
|
"git.gutmet.org/goutil.git/misc"
|
2019-01-13 19:57:34 +01:00
|
|
|
"github.com/fsnotify/fsnotify"
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
"log"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
NAME = "ambrowse"
|
|
|
|
DIR = "ambrowse" // inside home directory
|
|
|
|
BOOKFILE = "books.yaml"
|
|
|
|
)
|
|
|
|
|
|
|
|
type book struct {
|
|
|
|
title string
|
|
|
|
authors []string
|
|
|
|
publisher string
|
|
|
|
isbn10 string
|
|
|
|
isbn13 string
|
|
|
|
amazonPrice string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b book) String() string {
|
|
|
|
return strings.Join([]string{b.title, strings.Join(b.authors, ", "), b.publisher, b.isbn10, b.isbn13, b.amazonPrice}, "\n")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b book) yaml() string {
|
|
|
|
return "\n---\n\n" +
|
|
|
|
"title: \"" + b.title + "\"\n" +
|
|
|
|
"authors: [" + strings.Join(b.authors, ", ") + "]\n" +
|
|
|
|
"publisher: \"" + b.publisher + "\"\n" +
|
|
|
|
"isbn10: " + b.isbn10 + "\n" +
|
|
|
|
"isbn13: " + b.isbn13 + "\n" +
|
|
|
|
"amazonPrice: \"" + b.amazonPrice + "\"\n"
|
|
|
|
}
|
|
|
|
|
|
|
|
func optFatal(err error) {
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func optLog(err error) {
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func readLine(line string) string {
|
|
|
|
line = strings.TrimSpace(line)
|
|
|
|
parts := strings.Split(line, ":")
|
|
|
|
if len(parts) >= 2 {
|
|
|
|
return strings.TrimSpace(strings.Join(parts[1:], ":"))
|
|
|
|
} else {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func getDirectory() string {
|
2020-10-11 13:39:17 +02:00
|
|
|
home, err := misc.HomeDir()
|
2019-01-13 19:57:34 +01:00
|
|
|
if err != nil {
|
|
|
|
log.Fatal("Cannot identify home directory", err)
|
|
|
|
}
|
|
|
|
dir := filepath.Join(home, DIR)
|
|
|
|
err = os.MkdirAll(dir, 0750)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal("Cannot create "+dir, err)
|
|
|
|
}
|
|
|
|
return dir
|
|
|
|
}
|
|
|
|
|
|
|
|
func getAuthors(doc *goutil.HtmlNode) []string {
|
|
|
|
authors := make(map[string]interface{})
|
|
|
|
authorNodes := doc.GetElementsByClass("contributorNameID")
|
|
|
|
authorNodes = append(authorNodes, doc.GetElementsByClass("author")...)
|
|
|
|
for _, n := range authorNodes {
|
|
|
|
decl := n.GetElementsByClass("a-declarative")
|
|
|
|
if len(decl) > 0 {
|
|
|
|
authors[decl[0].Text()] = nil
|
|
|
|
} else {
|
|
|
|
authors[strings.TrimSuffix(n.Text(), ",")] = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result := []string{}
|
|
|
|
for k, _ := range authors {
|
|
|
|
result = append(result, k)
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
func setDetails(b *book, doc *goutil.HtmlNode) {
|
2020-08-27 22:45:01 +02:00
|
|
|
details := doc.GetElementById("productDetailsTable")
|
2019-01-13 19:57:34 +01:00
|
|
|
if details == nil {
|
2020-08-27 22:45:01 +02:00
|
|
|
details = doc.GetElementById("detailBullets_feature_div")
|
2019-01-13 19:57:34 +01:00
|
|
|
}
|
|
|
|
if details != nil {
|
2020-08-27 22:45:01 +02:00
|
|
|
cont := details.FindAll(func(n *goutil.HtmlNode) bool { return n.Type == html.ElementNode && n.Data == "li" })
|
|
|
|
for _, c := range cont {
|
|
|
|
line := c.Text()
|
|
|
|
if strings.Contains(line, "Verlag") || strings.Contains(line, "Publisher") || strings.Contains(line, "Herausgeber") {
|
|
|
|
b.publisher = readLine(line)
|
|
|
|
} else if strings.Contains(line, "ISBN-10") {
|
|
|
|
b.isbn10 = readLine(line)
|
|
|
|
} else if strings.Contains(line, "ISBN-13") {
|
|
|
|
b.isbn13 = readLine(line)
|
2019-01-13 19:57:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func getPrice(doc *goutil.HtmlNode) string {
|
|
|
|
price := doc.GetElementsByClass("a-color-price")
|
|
|
|
if len(price) > 0 {
|
|
|
|
return strings.TrimSpace(price[0].Text())
|
|
|
|
} else {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func readHTML(filename string) book {
|
|
|
|
b := book{}
|
|
|
|
log.Println("Reading metadata from " + filename)
|
|
|
|
f, err := os.Open(filename)
|
|
|
|
optFatal(err)
|
|
|
|
tmpdoc, err := html.Parse(f)
|
|
|
|
optFatal(err)
|
|
|
|
doc := (*goutil.HtmlNode)(tmpdoc)
|
|
|
|
title := doc.GetElementById("productTitle")
|
|
|
|
b.title = title.Text()
|
|
|
|
b.authors = getAuthors(doc)
|
|
|
|
setDetails(&b, doc)
|
|
|
|
b.amazonPrice = getPrice(doc)
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
|
|
|
func clean(files []string) {
|
|
|
|
time.Sleep(30 * time.Second)
|
|
|
|
for _, file := range files {
|
|
|
|
os.RemoveAll(file)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func digest(dir string, created []string) {
|
|
|
|
for _, file := range created {
|
2020-10-11 13:39:17 +02:00
|
|
|
if filepath.Ext(file) == ".html" && misc.PathExists(file) {
|
2019-01-13 19:57:34 +01:00
|
|
|
b := readHTML(file)
|
|
|
|
if b.title != "" {
|
2020-10-11 13:39:17 +02:00
|
|
|
err := misc.AppendToFile(filepath.Join(dir, BOOKFILE), b.yaml())
|
2019-01-13 19:57:34 +01:00
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
2020-10-11 13:39:17 +02:00
|
|
|
misc.Notify(NAME, b.String())
|
2019-01-13 19:57:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
go clean(created)
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
dir := getDirectory()
|
|
|
|
watcher, err := fsnotify.NewWatcher()
|
|
|
|
optFatal(err)
|
|
|
|
defer watcher.Close()
|
|
|
|
done := make(chan bool)
|
|
|
|
go func() {
|
|
|
|
created := []string{}
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case event, ok := <-watcher.Events:
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if event.Op&fsnotify.Create == fsnotify.Create && filepath.Base(event.Name) != BOOKFILE {
|
|
|
|
created = append(created, event.Name)
|
|
|
|
}
|
|
|
|
case err, ok := <-watcher.Errors:
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
log.Println("error:", err)
|
2019-02-03 23:08:59 +01:00
|
|
|
case <-time.After(1 * time.Second):
|
|
|
|
// 1s after last file system event: munch data
|
2019-01-13 19:57:34 +01:00
|
|
|
if len(created) > 0 {
|
|
|
|
tmpcreated := created
|
|
|
|
created = []string{}
|
|
|
|
go digest(dir, tmpcreated)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
err = watcher.Add(dir)
|
|
|
|
optFatal(err)
|
|
|
|
log.Println(NAME + " is watching " + dir)
|
|
|
|
<-done
|
|
|
|
}
|