initial commit
This commit is contained in:
commit
5229888846
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
.gitdist
|
||||
go.sum
|
||||
ambrowse
|
61
README.md
Normal file
61
README.md
Normal file
|
@ -0,0 +1,61 @@
|
|||
ambrowse
|
||||
========
|
||||
|
||||
ambrowse extracts book metadata from saved amazon.com, amazon.co.uk or amazon.de pages. It watches the directory 'ambrowse'
|
||||
inside your HOME, digests newly written files and adds the metadata to a list in 'books.yaml'. It then removes those files
|
||||
after a time delay, leaving only the metadata list.
|
||||
|
||||
You can find releases on [releases.gutmet.org](https://releases.gutmet.org) or build it yourself.
|
||||
|
||||
build
|
||||
=====
|
||||
|
||||
If you want to build ambrowse yourself, you need go1.11+ with module support. Clone this repository and type 'go build ambrowse.go'.
|
||||
|
||||
quick start
|
||||
===========
|
||||
|
||||
Download a release from [releases.gutmet.org](https://releases.gutmet.org) and unzip it. Put the correct file for your operating system
|
||||
somewhere you can run it. Then start a command line:
|
||||
|
||||
```
|
||||
> ambrowse
|
||||
2019/01/13 19:06:34 ambrowse is watching /home/johndoe/ambrowse
|
||||
```
|
||||
|
||||
Go to amazon, visit a book page and save it to the folder ambrowse is watching.
|
||||
|
||||
```
|
||||
2019/01/13 19:06:40 Reading metadata from /home/johndoe/ambrowse/Metallbearbeitung II: Verbindungstechniken, Gebrauchsgegenstände aus Metall ALS-Werk- und Arbeitsmappen: Amazon.de: Ingrid Kreide, Wolfram Enders: Bücher.html
|
||||
```
|
||||
|
||||
If you are on Unix or MacOS, you will get a desktop notification about the extracted metadata. You might want to add the watched folder to your favorites to reduce the number of clicks and add ambrowse to the services that are automatically started.
|
||||
|
||||
|
||||
example metadata
|
||||
================
|
||||
|
||||
The file 'books.yaml' inside the watched folder might look like this:
|
||||
|
||||
```
|
||||
---
|
||||
|
||||
title: "Essential Bushcraft"
|
||||
authors: [Ray Mears (Autor)]
|
||||
publisher: "Hodder & Stoughton General Division; Auflage: Reprint (23. Juni 2003)"
|
||||
isbn10: 0340829710
|
||||
isbn13: 978-0340829714
|
||||
amazonPrice: "EUR 24,01"
|
||||
|
||||
---
|
||||
|
||||
title: "Ray Mears Outdoor Survival Handbook: The Classic Indispensable Guide to
|
||||
Surviving the Outdoors"
|
||||
authors: [Ray Mears (Autor)]
|
||||
publisher: "Ebury Press (30. Mai 2001)"
|
||||
isbn10: 0091878861
|
||||
isbn13: 978-0091878863
|
||||
amazonPrice: "EUR 20,35"
|
||||
```
|
||||
|
||||
So far it seems like this format is bookseller-parseable.
|
205
ambrowse.go
Normal file
205
ambrowse.go
Normal file
|
@ -0,0 +1,205 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"git.gutmet.org/goutil.git"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"golang.org/x/net/html"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
NAME = "ambrowse"
|
||||
DIR = "ambrowse" // inside home directory
|
||||
BOOKFILE = "books.yaml"
|
||||
)
|
||||
|
||||
type book struct {
|
||||
title string
|
||||
authors []string
|
||||
publisher string
|
||||
isbn10 string
|
||||
isbn13 string
|
||||
amazonPrice string
|
||||
}
|
||||
|
||||
func (b book) String() string {
|
||||
return strings.Join([]string{b.title, strings.Join(b.authors, ", "), b.publisher, b.isbn10, b.isbn13, b.amazonPrice}, "\n")
|
||||
}
|
||||
|
||||
func (b book) yaml() string {
|
||||
return "\n---\n\n" +
|
||||
"title: \"" + b.title + "\"\n" +
|
||||
"authors: [" + strings.Join(b.authors, ", ") + "]\n" +
|
||||
"publisher: \"" + b.publisher + "\"\n" +
|
||||
"isbn10: " + b.isbn10 + "\n" +
|
||||
"isbn13: " + b.isbn13 + "\n" +
|
||||
"amazonPrice: \"" + b.amazonPrice + "\"\n"
|
||||
}
|
||||
|
||||
func optFatal(err error) {
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func optLog(err error) {
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
func readLine(line string) string {
|
||||
line = strings.TrimSpace(line)
|
||||
parts := strings.Split(line, ":")
|
||||
if len(parts) >= 2 {
|
||||
return strings.TrimSpace(strings.Join(parts[1:], ":"))
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func getDirectory() string {
|
||||
home, err := goutil.HomeDir()
|
||||
if err != nil {
|
||||
log.Fatal("Cannot identify home directory", err)
|
||||
}
|
||||
dir := filepath.Join(home, DIR)
|
||||
err = os.MkdirAll(dir, 0750)
|
||||
if err != nil {
|
||||
log.Fatal("Cannot create "+dir, err)
|
||||
}
|
||||
return dir
|
||||
}
|
||||
|
||||
func getAuthors(doc *goutil.HtmlNode) []string {
|
||||
authors := make(map[string]interface{})
|
||||
authorNodes := doc.GetElementsByClass("contributorNameID")
|
||||
authorNodes = append(authorNodes, doc.GetElementsByClass("author")...)
|
||||
for _, n := range authorNodes {
|
||||
decl := n.GetElementsByClass("a-declarative")
|
||||
if len(decl) > 0 {
|
||||
authors[decl[0].Text()] = nil
|
||||
} else {
|
||||
authors[strings.TrimSuffix(n.Text(), ",")] = nil
|
||||
}
|
||||
}
|
||||
result := []string{}
|
||||
for k, _ := range authors {
|
||||
result = append(result, k)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func setDetails(b *book, doc *goutil.HtmlNode) {
|
||||
details := doc.GetElementById("detail-bullets")
|
||||
if details == nil {
|
||||
details = doc.GetElementById("detail_bullets_id")
|
||||
}
|
||||
if details != nil {
|
||||
tmpcont := details.GetElementsByClass("content")
|
||||
if len(tmpcont) > 0 {
|
||||
cont := tmpcont[0].FindAll(func(n *goutil.HtmlNode) bool { return n.Type == html.ElementNode && n.Data == "li" })
|
||||
for _, c := range cont {
|
||||
line := c.Text()
|
||||
if strings.Contains(line, "Verlag") || strings.Contains(line, "Publisher") {
|
||||
b.publisher = readLine(line)
|
||||
} else if strings.Contains(line, "ISBN-10") {
|
||||
b.isbn10 = readLine(line)
|
||||
} else if strings.Contains(line, "ISBN-13") {
|
||||
b.isbn13 = readLine(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getPrice(doc *goutil.HtmlNode) string {
|
||||
price := doc.GetElementsByClass("a-color-price")
|
||||
if len(price) > 0 {
|
||||
return strings.TrimSpace(price[0].Text())
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func readHTML(filename string) book {
|
||||
b := book{}
|
||||
log.Println("Reading metadata from " + filename)
|
||||
f, err := os.Open(filename)
|
||||
optFatal(err)
|
||||
tmpdoc, err := html.Parse(f)
|
||||
optFatal(err)
|
||||
doc := (*goutil.HtmlNode)(tmpdoc)
|
||||
title := doc.GetElementById("productTitle")
|
||||
b.title = title.Text()
|
||||
b.authors = getAuthors(doc)
|
||||
setDetails(&b, doc)
|
||||
b.amazonPrice = getPrice(doc)
|
||||
return b
|
||||
}
|
||||
|
||||
func clean(files []string) {
|
||||
time.Sleep(30 * time.Second)
|
||||
for _, file := range files {
|
||||
os.RemoveAll(file)
|
||||
}
|
||||
}
|
||||
|
||||
func digest(dir string, created []string) {
|
||||
for _, file := range created {
|
||||
if filepath.Ext(file) == ".html" && goutil.PathExists(file) {
|
||||
b := readHTML(file)
|
||||
if b.title != "" {
|
||||
err := goutil.AppendToFile(filepath.Join(dir, BOOKFILE), b.yaml())
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
goutil.Notify(NAME, b.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
go clean(created)
|
||||
}
|
||||
|
||||
func main() {
|
||||
dir := getDirectory()
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
optFatal(err)
|
||||
defer watcher.Close()
|
||||
done := make(chan bool)
|
||||
go func() {
|
||||
created := []string{}
|
||||
for {
|
||||
select {
|
||||
case event, ok := <-watcher.Events:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if event.Op&fsnotify.Create == fsnotify.Create && filepath.Base(event.Name) != BOOKFILE {
|
||||
created = append(created, event.Name)
|
||||
}
|
||||
case err, ok := <-watcher.Errors:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
log.Println("error:", err)
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
// 100ms after last file system event: munch data
|
||||
if len(created) > 0 {
|
||||
tmpcreated := created
|
||||
created = []string{}
|
||||
go digest(dir, tmpcreated)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}()
|
||||
err = watcher.Add(dir)
|
||||
optFatal(err)
|
||||
log.Println(NAME + " is watching " + dir)
|
||||
<-done
|
||||
}
|
8
go.mod
Normal file
8
go.mod
Normal file
|
@ -0,0 +1,8 @@
|
|||
module git.gutmet.org/ambrowse.git
|
||||
|
||||
require (
|
||||
git.gutmet.org/goutil.git v0.0.0-20190113180148-bb2d3e26ea6c
|
||||
github.com/fsnotify/fsnotify v1.4.7
|
||||
golang.org/x/net v0.0.0-20190110200230-915654e7eabc
|
||||
golang.org/x/sys v0.0.0-20190109145017-48ac38b7c8cb // indirect
|
||||
)
|
Loading…
Reference in New Issue
Block a user