snippets/unicodedata.go
2021-03-14 20:45:47 +01:00

99 lines
2.1 KiB
Go

package main
import (
"bufio"
"bytes"
_ "embed"
"flag"
"fmt"
"os"
"strconv"
"strings"
)
//go:embed UnicodeData.txt
var unicodedata []byte
func die(msg string) {
fmt.Fprintln(os.Stderr, msg)
fmt.Fprintln(os.Stderr, "USAGE:", os.Args[0], "[-ranges RANGES]")
flag.PrintDefaults()
os.Exit(-1)
}
type rangeInterval struct {
from int64
to int64
}
func (r *rangeInterval) contains(i int64) bool {
return r.from <= i && i <= r.to
}
func parseHex(s string) (int64, error) {
return strconv.ParseInt(s, 16, 32)
}
func parseRanges(ranges string) []rangeInterval {
intervals := []rangeInterval{}
if strings.TrimSpace(ranges) != "" {
splitranges := strings.Split(ranges, " ")
for _, r := range splitranges {
fromto := strings.Split(r, "..")
if len(fromto) != 2 {
die("invalid ranges")
}
from, err := parseHex(fromto[0])
if err != nil {
die("invalid ranges")
}
to, err := parseHex(fromto[1])
if err != nil {
die("invalid ranges")
}
intervals = append(intervals, rangeInterval{from, to})
}
}
return intervals
}
func printCodepoint(i int64, description string) {
var ch rune = rune(i)
var s string = string([]rune{ch})
fmt.Println(s, description)
}
func main() {
var ranges string
flag.StringVar(&ranges, "ranges", "", "define multiple codepoint ranges with \"N1..N2 N3..N4\"")
flag.Parse()
rangeIntervals := parseRanges(ranges)
scanner := bufio.NewScanner(bytes.NewReader(unicodedata))
for scanner.Scan() {
line := scanner.Text()
splitline := strings.Split(line, ";")
if len(splitline) < 2 || len(splitline[1]) == 0 {
continue
}
codepoint, description := splitline[0], splitline[1]
if (description[0] == '<' && description[len(description)-1] == '>') || strings.Contains(description, "MODIFIER") || strings.Contains(description, "VARIATION SELECTOR") {
continue
}
i, err := parseHex(codepoint)
if err != nil {
continue
}
if len(rangeIntervals) == 0 {
printCodepoint(i, description)
}
for _, r := range rangeIntervals {
if r.contains(i) {
printCodepoint(i, description)
break
}
}
}
}