add unicodedata
This commit is contained in:
parent
60ff521ec1
commit
c735447a81
34627
UnicodeData.txt
Normal file
34627
UnicodeData.txt
Normal file
File diff suppressed because it is too large
Load Diff
98
unicodedata.go
Normal file
98
unicodedata.go
Normal file
|
@ -0,0 +1,98 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed UnicodeData.txt
|
||||
var unicodedata []byte
|
||||
|
||||
func die(msg string) {
|
||||
fmt.Fprintln(os.Stderr, msg)
|
||||
fmt.Fprintln(os.Stderr, "USAGE:", os.Args[0], "[-ranges RANGES]")
|
||||
flag.PrintDefaults()
|
||||
os.Exit(-1)
|
||||
}
|
||||
|
||||
type rangeInterval struct {
|
||||
from int64
|
||||
to int64
|
||||
}
|
||||
|
||||
func (r *rangeInterval) contains(i int64) bool {
|
||||
return r.from <= i && i <= r.to
|
||||
}
|
||||
|
||||
func parseHex(s string) (int64, error) {
|
||||
return strconv.ParseInt(s, 16, 32)
|
||||
}
|
||||
|
||||
func parseRanges(ranges string) []rangeInterval {
|
||||
intervals := []rangeInterval{}
|
||||
if strings.TrimSpace(ranges) != "" {
|
||||
splitranges := strings.Split(ranges, " ")
|
||||
for _, r := range splitranges {
|
||||
fromto := strings.Split(r, "..")
|
||||
if len(fromto) != 2 {
|
||||
die("invalid ranges")
|
||||
}
|
||||
from, err := parseHex(fromto[0])
|
||||
if err != nil {
|
||||
die("invalid ranges")
|
||||
}
|
||||
to, err := parseHex(fromto[1])
|
||||
if err != nil {
|
||||
die("invalid ranges")
|
||||
}
|
||||
intervals = append(intervals, rangeInterval{from, to})
|
||||
}
|
||||
}
|
||||
return intervals
|
||||
}
|
||||
|
||||
func printCodepoint(i int64, description string) {
|
||||
var ch rune = rune(i)
|
||||
var s string = string([]rune{ch})
|
||||
fmt.Println(s, description)
|
||||
}
|
||||
|
||||
func main() {
|
||||
var ranges string
|
||||
flag.StringVar(&ranges, "ranges", "", "define multiple codepoint ranges with \"N1..N2 N3..N4\"")
|
||||
flag.Parse()
|
||||
rangeIntervals := parseRanges(ranges)
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewReader(unicodedata))
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
splitline := strings.Split(line, ";")
|
||||
if len(splitline) < 2 || len(splitline[1]) == 0 {
|
||||
continue
|
||||
}
|
||||
codepoint, description := splitline[0], splitline[1]
|
||||
if (description[0] == '<' && description[len(description)-1] == '>') || strings.Contains(description, "MODIFIER") || strings.Contains(description, "VARIATION SELECTOR") {
|
||||
continue
|
||||
}
|
||||
|
||||
i, err := parseHex(codepoint)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if len(rangeIntervals) == 0 {
|
||||
printCodepoint(i, description)
|
||||
}
|
||||
for _, r := range rangeIntervals {
|
||||
if r.contains(i) {
|
||||
printCodepoint(i, description)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user