package main import ( "bufio" "bytes" _ "embed" "flag" "fmt" "os" "strconv" "strings" ) //go:embed UnicodeData.txt var unicodedata []byte func die(msg string) { fmt.Fprintln(os.Stderr, msg) fmt.Fprintln(os.Stderr, "USAGE:", os.Args[0], "[-ranges RANGES]") flag.PrintDefaults() os.Exit(-1) } type rangeInterval struct { from int64 to int64 } func (r *rangeInterval) contains(i int64) bool { return r.from <= i && i <= r.to } func parseHex(s string) (int64, error) { return strconv.ParseInt(s, 16, 32) } func parseRanges(ranges string) []rangeInterval { intervals := []rangeInterval{} if strings.TrimSpace(ranges) != "" { splitranges := strings.Split(ranges, " ") for _, r := range splitranges { fromto := strings.Split(r, "..") if len(fromto) != 2 { die("invalid ranges") } from, err := parseHex(fromto[0]) if err != nil { die("invalid ranges") } to, err := parseHex(fromto[1]) if err != nil { die("invalid ranges") } intervals = append(intervals, rangeInterval{from, to}) } } return intervals } func printCodepoint(i int64, description string) { var ch rune = rune(i) var s string = string([]rune{ch}) fmt.Println(s, description) } func main() { var ranges string flag.StringVar(&ranges, "ranges", "", "define multiple codepoint ranges with \"N1..N2 N3..N4\"") flag.Parse() rangeIntervals := parseRanges(ranges) scanner := bufio.NewScanner(bytes.NewReader(unicodedata)) for scanner.Scan() { line := scanner.Text() splitline := strings.Split(line, ";") if len(splitline) < 2 || len(splitline[1]) == 0 { continue } codepoint, description := splitline[0], splitline[1] if (description[0] == '<' && description[len(description)-1] == '>') || strings.Contains(description, "MODIFIER") || strings.Contains(description, "VARIATION SELECTOR") { continue } i, err := parseHex(codepoint) if err != nil { continue } if len(rangeIntervals) == 0 { printCodepoint(i, description) } for _, r := range rangeIntervals { if r.contains(i) { printCodepoint(i, description) break } } } }