freetype/truetype: parse UCS-4 encoded cmap tables.

R=bsiegert
CC=golang-dev
https://codereview.appspot.com/14548046
This commit is contained in:
Nigel Tao 2013-10-11 07:29:40 +11:00
parent ba07cbfbc8
commit 0a778f7f02
2 changed files with 182 additions and 53 deletions

View File

@ -87,7 +87,7 @@ const (
// A cm holds a parsed cmap entry.
type cm struct {
start, end, delta, offset uint16
start, end, delta, offset uint32
}
// A Font represents a Truetype font.
@ -111,12 +111,14 @@ type Font struct {
func (f *Font) parseCmap() error {
const (
cmapFormat4 = 4
cmapFormat12 = 12
languageIndependent = 0
// A 32-bit encoding consists of a most-significant 16-bit Platform ID and a
// least-significant 16-bit Platform Specific ID.
unicodeEncoding = 0x00000003 // PID = 0 (Unicode), PSID = 3 (Unicode 2.0)
microsoftEncoding = 0x00030001 // PID = 3 (Microsoft), PSID = 1 (UCS-2)
unicodeEncoding = 0x00000003 // PID = 0 (Unicode), PSID = 3 (Unicode 2.0)
microsoftUCS2Encoding = 0x00030001 // PID = 3 (Microsoft), PSID = 1 (UCS-2)
microsoftUCS4Encoding = 0x0003000a // PID = 3 (Microsoft), PSID = 10 (UCS-4)
)
if len(f.cmap) < 4 {
@ -137,7 +139,7 @@ func (f *Font) parseCmap() error {
if pidPsid == unicodeEncoding {
offset, found = int(o), true
break
} else if pidPsid == microsoftEncoding {
} else if pidPsid == microsoftUCS2Encoding || pidPsid == microsoftUCS4Encoding {
offset, found = int(o), true
// We don't break out of the for loop, so that Unicode can override Microsoft.
}
@ -150,39 +152,63 @@ func (f *Font) parseCmap() error {
}
cmapFormat := u16(f.cmap, offset)
if cmapFormat != cmapFormat4 {
return UnsupportedError(fmt.Sprintf("cmap format: %d", cmapFormat))
}
language := u16(f.cmap, offset+4)
if language != languageIndependent {
return UnsupportedError(fmt.Sprintf("language: %d", language))
}
segCountX2 := int(u16(f.cmap, offset+6))
if segCountX2%2 == 1 {
return FormatError(fmt.Sprintf("bad segCountX2: %d", segCountX2))
}
segCount := segCountX2 / 2
offset += 14
f.cm = make([]cm, segCount)
for i := 0; i < segCount; i++ {
f.cm[i].end = u16(f.cmap, offset)
switch cmapFormat {
case cmapFormat4:
language := u16(f.cmap, offset+4)
if language != languageIndependent {
return UnsupportedError(fmt.Sprintf("language: %d", language))
}
segCountX2 := int(u16(f.cmap, offset+6))
if segCountX2%2 == 1 {
return FormatError(fmt.Sprintf("bad segCountX2: %d", segCountX2))
}
segCount := segCountX2 / 2
offset += 14
f.cm = make([]cm, segCount)
for i := 0; i < segCount; i++ {
f.cm[i].end = uint32(u16(f.cmap, offset))
offset += 2
}
offset += 2
for i := 0; i < segCount; i++ {
f.cm[i].start = uint32(u16(f.cmap, offset))
offset += 2
}
for i := 0; i < segCount; i++ {
f.cm[i].delta = uint32(u16(f.cmap, offset))
offset += 2
}
for i := 0; i < segCount; i++ {
f.cm[i].offset = uint32(u16(f.cmap, offset))
offset += 2
}
f.cmapIndexes = f.cmap[offset:]
return nil
case cmapFormat12:
if u16(f.cmap, offset+2) != 0 {
return FormatError(fmt.Sprintf("cmap format: % x", f.cmap[offset:offset+4]))
}
length := u32(f.cmap, offset+4)
language := u32(f.cmap, offset+8)
if language != languageIndependent {
return UnsupportedError(fmt.Sprintf("language: %d", language))
}
nGroups := u32(f.cmap, offset+12)
if length != 12*nGroups+16 {
return FormatError("inconsistent cmap length")
}
offset += 16
f.cm = make([]cm, nGroups)
for i := uint32(0); i < nGroups; i++ {
f.cm[i].start = u32(f.cmap, offset+0)
f.cm[i].end = u32(f.cmap, offset+4)
f.cm[i].delta = u32(f.cmap, offset+8) - f.cm[i].start
offset += 12
}
return nil
}
offset += 2
for i := 0; i < segCount; i++ {
f.cm[i].start = u16(f.cmap, offset)
offset += 2
}
for i := 0; i < segCount; i++ {
f.cm[i].delta = u16(f.cmap, offset)
offset += 2
}
for i := 0; i < segCount; i++ {
f.cm[i].offset = u16(f.cmap, offset)
offset += 2
}
f.cmapIndexes = f.cmap[offset:]
return nil
return UnsupportedError(fmt.Sprintf("cmap format: %d", cmapFormat))
}
func (f *Font) parseHead() error {
@ -296,8 +322,9 @@ func (f *Font) FUnitsPerEm() int32 {
// Index returns a Font's index for the given rune.
func (f *Font) Index(x rune) Index {
c := uint16(x)
c := uint32(x)
n := len(f.cm)
// TODO: binary search.
for i := 0; i < n; i++ {
if f.cm[i].start <= c && c <= f.cm[i].end {
if f.cm[i].offset == 0 {

View File

@ -16,14 +16,24 @@ import (
"testing"
)
func parseTestdataFont(name string) (font *Font, testdataIsOptional bool, err error) {
b, err := ioutil.ReadFile(fmt.Sprintf("../../testdata/%s.ttf", name))
if err != nil {
// The "x-foo" fonts are optional tests, as they are not checked
// in for copyright or file size reasons.
return nil, strings.HasPrefix(name, "x-"), fmt.Errorf("%s: ReadFile: %v", name, err)
}
font, err = Parse(b)
if err != nil {
return nil, true, fmt.Errorf("%s: Parse: %v", name, err)
}
return font, false, nil
}
// TestParse tests that the luxisr.ttf metrics and glyphs are parsed correctly.
// The numerical values can be manually verified by examining luxisr.ttx.
func TestParse(t *testing.T) {
b, err := ioutil.ReadFile("../../testdata/luxisr.ttf")
if err != nil {
t.Fatal(err)
}
font, err := Parse(b)
font, _, err := parseTestdataFont("luxisr")
if err != nil {
t.Fatal(err)
}
@ -74,6 +84,105 @@ func TestParse(t *testing.T) {
}
}
func TestIndex(t *testing.T) {
testCases := map[string]map[rune]Index{
"luxisr": {
' ': 3,
'!': 4,
'A': 36,
'V': 57,
'É': 101,
'fl': 193,
'\u22c5': 385,
'中': 0,
},
"x-arial-bold": {
' ': 3,
'+': 14,
'0': 19,
'_': 66,
'w': 90,
'~': 97,
'Ä': 98,
'fl': 192,
'½': 242,
'σ': 305,
'λ': 540,
'ỹ': 1275,
'\u04e9': 1319,
'中': 0,
},
"x-deja-vu-sans-oblique": {
' ': 3,
'*': 13,
'Œ': 276,
'ω': 861,
'‡': 2571,
'⊕': 3109,
'fl': 4560,
'\ufb03': 4561,
'\ufffd': 4645,
// TODO: '\U0001f640': ???,
'中': 0,
},
"x-droid-sans-japanese": {
' ': 0,
'\u3000': 3,
'\u3041': 25,
'\u30fe': 201,
'\uff61': 202,
'\uff67': 208,
'\uff9e': 263,
'\uff9f': 264,
'\u4e00': 265,
'\u557e': 1000,
'\u61b6': 2024,
'\u6ede': 3177,
'\u7505': 3555,
'\u81e3': 4602,
'\u81e5': 4603,
'\u81e7': 4604,
'\u81e8': 4605,
'\u81ea': 4606,
'\u81ed': 4607,
'\u81f3': 4608,
'\u81f4': 4609,
'\u91c7': 5796,
'\u9fa0': 6620,
'\u203e': 12584,
},
"x-times-new-roman": {
' ': 3,
':': 29,
'fl': 192,
'Ŀ': 273,
'♠': 388,
'Ŗ': 451,
'Σ': 520,
'\u200D': 745,
'Ẽ': 1216,
'\u04e9': 1319,
'中': 0,
},
}
for name, wants := range testCases {
font, testdataIsOptional, err := parseTestdataFont(name)
if err != nil {
if testdataIsOptional {
t.Log(err)
} else {
t.Fatal(err)
}
continue
}
for r, want := range wants {
if got := font.Index(r); got != want {
t.Errorf("%s: Index(%q): got %d, want %d", name, r, got, want)
}
}
}
}
var scalingTestCases = []struct {
name string
size int32
@ -87,29 +196,22 @@ var scalingTestCases = []struct {
// GlyphBuf.Load, and the unhinted values match C Freetype.
{"x-arial-bold", 11, 0},
//{"x-deja-vu-sans-oblique", 17, 0},
//{"x-droid-sans-japanese", 9, 0},
{"x-droid-sans-japanese", 9, 0},
//{"x-times-new-roman", 13, 0},
}
func testScaling(t *testing.T, hinter *Hinter) {
loop:
for _, tc := range scalingTestCases {
b, err := ioutil.ReadFile(fmt.Sprintf("../../testdata/%s.ttf", tc.name))
font, testdataIsOptional, err := parseTestdataFont(tc.name)
if err != nil {
// The "x-foo" fonts are optional tests, as they are not checked
// in for copyright or file size reasons.
if strings.HasPrefix(tc.name, "x-") {
t.Logf("%s: ReadFile: %v", tc.name, err)
if testdataIsOptional {
t.Log(err)
} else {
t.Errorf("%s: ReadFile: %v", tc.name, err)
t.Error(err)
}
continue loop
}
font, err := Parse(b)
if err != nil {
t.Errorf("%s: Parse: %v", tc.name, err)
continue loop
}
hinting := "sans"
if hinter != nil {
hinting = "with"