initial commit

This commit is contained in:
gutmet 2019-01-13 19:57:34 +01:00
commit 5229888846
4 changed files with 277 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
.gitdist
go.sum
ambrowse

61
README.md Normal file
View File

@ -0,0 +1,61 @@
ambrowse
========
ambrowse extracts book metadata from saved amazon.com, amazon.co.uk or amazon.de pages. It watches the directory 'ambrowse'
inside your HOME, digests newly written files and adds the metadata to a list in 'books.yaml'. It then removes those files
after a time delay, leaving only the metadata list.
You can find releases on [releases.gutmet.org](https://releases.gutmet.org) or build it yourself.
build
=====
If you want to build ambrowse yourself, you need go1.11+ with module support. Clone this repository and type 'go build ambrowse.go'.
quick start
===========
Download a release from [releases.gutmet.org](https://releases.gutmet.org) and unzip it. Put the correct file for your operating system
somewhere you can run it. Then start a command line:
```
> ambrowse
2019/01/13 19:06:34 ambrowse is watching /home/johndoe/ambrowse
```
Go to amazon, visit a book page and save it to the folder ambrowse is watching.
```
2019/01/13 19:06:40 Reading metadata from /home/johndoe/ambrowse/Metallbearbeitung II: Verbindungstechniken, Gebrauchsgegenstände aus Metall ALS-Werk- und Arbeitsmappen: Amazon.de: Ingrid Kreide, Wolfram Enders: Bücher.html
```
If you are on Unix or MacOS, you will get a desktop notification about the extracted metadata. You might want to add the watched folder to your favorites to reduce the number of clicks and add ambrowse to the services that are automatically started.
example metadata
================
The file 'books.yaml' inside the watched folder might look like this:
```
---
title: "Essential Bushcraft"
authors: [Ray Mears (Autor)]
publisher: "Hodder & Stoughton General Division; Auflage: Reprint (23. Juni 2003)"
isbn10: 0340829710
isbn13: 978-0340829714
amazonPrice: "EUR 24,01"
---
title: "Ray Mears Outdoor Survival Handbook: The Classic Indispensable Guide to
Surviving the Outdoors"
authors: [Ray Mears (Autor)]
publisher: "Ebury Press (30. Mai 2001)"
isbn10: 0091878861
isbn13: 978-0091878863
amazonPrice: "EUR 20,35"
```
So far it seems like this format is bookseller-parseable.

205
ambrowse.go Normal file
View File

@ -0,0 +1,205 @@
package main
import (
"git.gutmet.org/goutil.git"
"github.com/fsnotify/fsnotify"
"golang.org/x/net/html"
"log"
"os"
"path/filepath"
"strings"
"time"
)
const (
NAME = "ambrowse"
DIR = "ambrowse" // inside home directory
BOOKFILE = "books.yaml"
)
type book struct {
title string
authors []string
publisher string
isbn10 string
isbn13 string
amazonPrice string
}
func (b book) String() string {
return strings.Join([]string{b.title, strings.Join(b.authors, ", "), b.publisher, b.isbn10, b.isbn13, b.amazonPrice}, "\n")
}
func (b book) yaml() string {
return "\n---\n\n" +
"title: \"" + b.title + "\"\n" +
"authors: [" + strings.Join(b.authors, ", ") + "]\n" +
"publisher: \"" + b.publisher + "\"\n" +
"isbn10: " + b.isbn10 + "\n" +
"isbn13: " + b.isbn13 + "\n" +
"amazonPrice: \"" + b.amazonPrice + "\"\n"
}
func optFatal(err error) {
if err != nil {
log.Fatal(err)
}
}
func optLog(err error) {
if err != nil {
log.Println(err)
}
}
func readLine(line string) string {
line = strings.TrimSpace(line)
parts := strings.Split(line, ":")
if len(parts) >= 2 {
return strings.TrimSpace(strings.Join(parts[1:], ":"))
} else {
return ""
}
}
func getDirectory() string {
home, err := goutil.HomeDir()
if err != nil {
log.Fatal("Cannot identify home directory", err)
}
dir := filepath.Join(home, DIR)
err = os.MkdirAll(dir, 0750)
if err != nil {
log.Fatal("Cannot create "+dir, err)
}
return dir
}
func getAuthors(doc *goutil.HtmlNode) []string {
authors := make(map[string]interface{})
authorNodes := doc.GetElementsByClass("contributorNameID")
authorNodes = append(authorNodes, doc.GetElementsByClass("author")...)
for _, n := range authorNodes {
decl := n.GetElementsByClass("a-declarative")
if len(decl) > 0 {
authors[decl[0].Text()] = nil
} else {
authors[strings.TrimSuffix(n.Text(), ",")] = nil
}
}
result := []string{}
for k, _ := range authors {
result = append(result, k)
}
return result
}
func setDetails(b *book, doc *goutil.HtmlNode) {
details := doc.GetElementById("detail-bullets")
if details == nil {
details = doc.GetElementById("detail_bullets_id")
}
if details != nil {
tmpcont := details.GetElementsByClass("content")
if len(tmpcont) > 0 {
cont := tmpcont[0].FindAll(func(n *goutil.HtmlNode) bool { return n.Type == html.ElementNode && n.Data == "li" })
for _, c := range cont {
line := c.Text()
if strings.Contains(line, "Verlag") || strings.Contains(line, "Publisher") {
b.publisher = readLine(line)
} else if strings.Contains(line, "ISBN-10") {
b.isbn10 = readLine(line)
} else if strings.Contains(line, "ISBN-13") {
b.isbn13 = readLine(line)
}
}
}
}
}
func getPrice(doc *goutil.HtmlNode) string {
price := doc.GetElementsByClass("a-color-price")
if len(price) > 0 {
return strings.TrimSpace(price[0].Text())
} else {
return ""
}
}
func readHTML(filename string) book {
b := book{}
log.Println("Reading metadata from " + filename)
f, err := os.Open(filename)
optFatal(err)
tmpdoc, err := html.Parse(f)
optFatal(err)
doc := (*goutil.HtmlNode)(tmpdoc)
title := doc.GetElementById("productTitle")
b.title = title.Text()
b.authors = getAuthors(doc)
setDetails(&b, doc)
b.amazonPrice = getPrice(doc)
return b
}
func clean(files []string) {
time.Sleep(30 * time.Second)
for _, file := range files {
os.RemoveAll(file)
}
}
func digest(dir string, created []string) {
for _, file := range created {
if filepath.Ext(file) == ".html" && goutil.PathExists(file) {
b := readHTML(file)
if b.title != "" {
err := goutil.AppendToFile(filepath.Join(dir, BOOKFILE), b.yaml())
if err != nil {
log.Println(err)
}
goutil.Notify(NAME, b.String())
}
}
}
go clean(created)
}
func main() {
dir := getDirectory()
watcher, err := fsnotify.NewWatcher()
optFatal(err)
defer watcher.Close()
done := make(chan bool)
go func() {
created := []string{}
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
if event.Op&fsnotify.Create == fsnotify.Create && filepath.Base(event.Name) != BOOKFILE {
created = append(created, event.Name)
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("error:", err)
case <-time.After(100 * time.Millisecond):
// 100ms after last file system event: munch data
if len(created) > 0 {
tmpcreated := created
created = []string{}
go digest(dir, tmpcreated)
}
}
}
}()
err = watcher.Add(dir)
optFatal(err)
log.Println(NAME + " is watching " + dir)
<-done
}

8
go.mod Normal file
View File

@ -0,0 +1,8 @@
module git.gutmet.org/ambrowse.git
require (
git.gutmet.org/goutil.git v0.0.0-20190113180148-bb2d3e26ea6c
github.com/fsnotify/fsnotify v1.4.7
golang.org/x/net v0.0.0-20190110200230-915654e7eabc
golang.org/x/sys v0.0.0-20190109145017-48ac38b7c8cb // indirect
)