font/sfnt: parse the CFF table.

Parsing the per-glyph Charstrings will be a follow-up commit.

Change-Id: I52f849a77dd7fa14b6e07420820bdfb4347c2438
Reviewed-on: https://go-review.googlesource.com/33593
Reviewed-by: Dave Day <djd@golang.org>
This commit is contained in:
Nigel Tao 2016-11-27 20:19:50 +11:00
parent c39d899a5b
commit 5286ed5c2a
6 changed files with 614 additions and 15 deletions

389
font/sfnt/postscript.go Normal file
View File

@ -0,0 +1,389 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sfnt
// Compact Font Format (CFF) fonts are written in PostScript, a stack-based
// programming language.
//
// A fundamental concept is a DICT, or a key-value map, expressed in reverse
// Polish notation. For example, this sequence of operations:
// - push the number 379
// - version operator
// - push the number 392
// - Notice operator
// - etc
// - push the number 100
// - push the number 0
// - push the number 500
// - push the number 800
// - FontBBox operator
// - etc
// defines a DICT that maps "version" to the String ID (SID) 379, the copyright
// "Notice" to the SID 392, the font bounding box "FontBBox" to the four
// numbers [100, 0, 500, 800], etc.
//
// The first 391 String IDs (starting at 0) are predefined as per the CFF spec
// Appendix A, in 5176.CFF.pdf referenced below. For example, 379 means
// "001.000". String ID 392 is not predefined, and is mapped by a separate
// structure, the "String INDEX", inside the CFF data. (String ID 391 is also
// not predefined. Specifically for ../testdata/CFFTest.otf, 391 means
// "uni4E2D", as this font contains a glyph for U+4E2D).
//
// The actual glyph vectors are similarly encoded (in PostScript), in a format
// called Type 2 Charstrings. The wire encoding is similar to but not exactly
// the same as CFF's. For example, the byte 0x05 means FontBBox for CFF DICTs,
// but means rlineto (relative line-to) for Type 2 Charstrings. See
// 5176.CFF.pdf Appendix H and 5177.Type2.pdf Appendix A in the PDF files
// referenced below.
//
// CFF is a stand-alone format, but CFF as used in SFNT fonts have further
// restrictions. For example, a stand-alone CFF can contain multiple fonts, but
// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The Name
// INDEX in the CFF must contain only one entry; that is, there must be only
// one font in the CFF FontSet".
//
// The relevant specifications are:
// - http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
// - http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
import (
"fmt"
)
const (
// psStackSize is the stack size for a PostScript interpreter. 5176.CFF.pdf
// section 4 "DICT Data" says that "An operator may be preceded by up to a
// maximum of 48 operands". Similarly, 5177.Type2.pdf Appendix B "Type 2
// Charstring Implementation Limits" says that "Argument stack 48".
psStackSize = 48
)
func bigEndian(b []byte) uint32 {
switch len(b) {
case 1:
return uint32(b[0])
case 2:
return uint32(b[0])<<8 | uint32(b[1])
case 3:
return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
case 4:
return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])
}
panic("unreachable")
}
// cffParser parses the CFF table from an SFNT font.
type cffParser struct {
src *source
base int
offset int
end int
buf []byte
err error
locBuf [2]uint32
instructions []byte
stack struct {
a [psStackSize]int32
top int32
}
saved struct {
charStrings int32
}
}
func (p *cffParser) parse() (locations []uint32, err error) {
// Parse header.
{
if !p.read(4) {
return nil, p.err
}
if p.buf[0] != 1 || p.buf[1] != 0 || p.buf[2] != 4 {
return nil, errUnsupportedCFFVersion
}
}
// Parse Name INDEX.
{
count, offSize, ok := p.parseIndexHeader()
if !ok {
return nil, p.err
}
// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The
// Name INDEX in the CFF must contain only one entry".
if count != 1 {
return nil, errInvalidCFFTable
}
if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
return nil, p.err
}
p.offset = int(p.locBuf[1])
}
// Parse Top DICT INDEX.
{
count, offSize, ok := p.parseIndexHeader()
if !ok {
return nil, p.err
}
// 5176.CFF.pdf section 8 "Top DICT INDEX" says that the count here
// should match the count of the Name INDEX, which is 1.
if count != 1 {
return nil, errInvalidCFFTable
}
if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
return nil, p.err
}
if !p.read(int(p.locBuf[1] - p.locBuf[0])) {
return nil, p.err
}
for p.instructions = p.buf; len(p.instructions) > 0; {
p.step()
if p.err != nil {
return nil, p.err
}
}
}
// Parse the CharStrings INDEX, whose location was found in the Top DICT.
if p.saved.charStrings <= 0 || int32(p.end-p.base) < p.saved.charStrings {
return nil, errInvalidCFFTable
}
p.offset = p.base + int(p.saved.charStrings)
count, offSize, ok := p.parseIndexHeader()
if !ok {
return nil, p.err
}
if count == 0 {
return nil, errInvalidCFFTable
}
locations = make([]uint32, count+1)
if !p.parseIndexLocations(locations, count, offSize) {
return nil, p.err
}
return locations, nil
}
// read sets p.buf to view the n bytes from p.offset to p.offset+n. It also
// advances p.offset by n.
//
// As per the source.view method, the caller should not modify the contents of
// p.buf after read returns, other than by calling read again.
//
// The caller should also avoid modifying the pointer / length / capacity of
// the p.buf slice, not just avoid modifying the slice's contents, in order to
// maximize the opportunity to re-use p.buf's allocated memory when viewing the
// underlying source data for subsequent read calls.
func (p *cffParser) read(n int) (ok bool) {
if p.end-p.offset < n {
p.err = errInvalidCFFTable
return false
}
p.buf, p.err = p.src.view(p.buf, p.offset, n)
p.offset += n
return p.err == nil
}
func (p *cffParser) parseIndexHeader() (count, offSize int32, ok bool) {
if !p.read(2) {
return 0, 0, false
}
count = int32(u16(p.buf[:2]))
// 5176.CFF.pdf section 5 "INDEX Data" says that "An empty INDEX is
// represented by a count field with a 0 value and no additional fields.
// Thus, the total size of an empty INDEX is 2 bytes".
if count == 0 {
return count, 0, true
}
if !p.read(1) {
return 0, 0, false
}
offSize = int32(p.buf[0])
if offSize < 1 || 4 < offSize {
p.err = errInvalidCFFTable
return 0, 0, false
}
return count, offSize, true
}
func (p *cffParser) parseIndexLocations(dst []uint32, count, offSize int32) (ok bool) {
if count == 0 {
return true
}
if len(dst) != int(count+1) {
panic("unreachable")
}
if !p.read(len(dst) * int(offSize)) {
return false
}
buf, prev := p.buf, uint32(0)
for i := range dst {
loc := bigEndian(buf[:offSize])
buf = buf[offSize:]
// Locations are off by 1 byte. 5176.CFF.pdf section 5 "INDEX Data"
// says that "Offsets in the offset array are relative to the byte that
// precedes the object data... This ensures that every object has a
// corresponding offset which is always nonzero".
if loc == 0 {
p.err = errInvalidCFFTable
return false
}
loc--
// In the same paragraph, "Therefore the first element of the offset
// array is always 1" before correcting for the off-by-1.
if i == 0 {
if loc != 0 {
p.err = errInvalidCFFTable
break
}
} else if loc <= prev { // Check that locations are increasing.
p.err = errInvalidCFFTable
break
}
// Check that locations are in bounds.
if uint32(p.end-p.offset) < loc {
p.err = errInvalidCFFTable
break
}
dst[i] = uint32(p.offset) + loc
prev = loc
}
return p.err == nil
}
// step executes a single operation, whether pushing a numeric operand onto the
// stack or executing an operator.
func (p *cffParser) step() {
if number, res := p.parseNumber(); res != prNone {
if res == prBad || p.stack.top == psStackSize {
p.err = errInvalidCFFTable
return
}
p.stack.a[p.stack.top] = number
p.stack.top++
return
}
b0 := p.instructions[0]
p.instructions = p.instructions[1:]
if int(b0) < len(cff1ByteOperators) {
if op := cff1ByteOperators[b0]; op.name != "" {
if p.stack.top < op.numPop {
p.err = errInvalidCFFTable
return
}
if op.run != nil {
op.run(p)
}
if op.numPop < 0 {
p.stack.top = 0
} else {
p.stack.top -= op.numPop
}
return
}
}
p.err = fmt.Errorf("sfnt: unrecognized CFF 1-byte operator %d", b0)
}
type parseResult int32
const (
prBad parseResult = -1
prNone parseResult = +0
prGood parseResult = +1
)
// See 5176.CFF.pdf section 4 "DICT Data".
func (p *cffParser) parseNumber() (number int32, res parseResult) {
if len(p.instructions) == 0 {
return 0, prNone
}
switch b0 := p.instructions[0]; {
case b0 == 28:
if len(p.instructions) < 3 {
return 0, prBad
}
number = int32(int16(u16(p.instructions[1:])))
p.instructions = p.instructions[3:]
return number, prGood
case b0 == 29:
if len(p.instructions) < 5 {
return 0, prBad
}
number = int32(u32(p.instructions[1:]))
p.instructions = p.instructions[5:]
return number, prGood
case b0 < 32:
// No-op.
case b0 < 247:
p.instructions = p.instructions[1:]
return int32(b0) - 139, prGood
case b0 < 251:
if len(p.instructions) < 2 {
return 0, prBad
}
b1 := p.instructions[1]
p.instructions = p.instructions[2:]
return +int32(b0-247)*256 + int32(b1) + 108, prGood
case b0 < 255:
if len(p.instructions) < 2 {
return 0, prBad
}
b1 := p.instructions[1]
p.instructions = p.instructions[2:]
return -int32(b0-251)*256 - int32(b1) - 108, prGood
}
return 0, prNone
}
type cffOperator struct {
// numPop is the number of stack values to pop. -1 means "array" and -2
// means "delta" as per 5176.CFF.pdf Table 6 "Operand Types".
numPop int32
// name is the operator name. An empty name (i.e. the zero value for the
// struct overall) means an unrecognized 1-byte operator.
name string
// run is the function that implements the operator. Nil means that we
// ignore the operator, other than popping its arguments off the stack.
run func(*cffParser)
}
// cff1ByteOperators encodes the subset of 5176.CFF.pdf Table 9 "Top DICT
// Operator Entries" used by this implementation.
var cff1ByteOperators = [...]cffOperator{
0: {+1, "version", nil},
1: {+1, "Notice", nil},
2: {+1, "FullName", nil},
3: {+1, "FamilyName", nil},
4: {+1, "Weight", nil},
5: {-1, "FontBBox", nil},
13: {+1, "UniqueID", nil},
14: {-1, "XUID", nil},
15: {+1, "charset", nil},
16: {+1, "Encoding", nil},
17: {+1, "CharStrings", func(p *cffParser) {
p.saved.charStrings = p.stack.a[p.stack.top-1]
}},
18: {+2, "Private", nil},
}
// TODO: 2-byte operators.

View File

@ -10,6 +10,9 @@ package sfnt // import "golang.org/x/image/font/sfnt"
// https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx
// specification. Additional documentation is at
// http://developer.apple.com/fonts/TTRefMan/
//
// The pyftinspect tool from https://github.com/fonttools/fonttools is useful
// for inspecting SFNT fonts.
import (
"errors"
@ -26,17 +29,21 @@ const (
)
var (
errGlyphIndexOutOfRange = errors.New("sfnt: glyph index out of range")
errInvalidBounds = errors.New("sfnt: invalid bounds")
errInvalidCFFTable = errors.New("sfnt: invalid CFF table")
errInvalidHeadTable = errors.New("sfnt: invalid head table")
errInvalidLocationData = errors.New("sfnt: invalid location data")
errInvalidMaxpTable = errors.New("sfnt: invalid maxp table")
errInvalidSourceData = errors.New("sfnt: invalid source data")
errInvalidTableOffset = errors.New("sfnt: invalid table offset")
errInvalidTableTagOrder = errors.New("sfnt: invalid table tag order")
errInvalidVersion = errors.New("sfnt: invalid version")
errUnsupportedCFFVersion = errors.New("sfnt: unsupported CFF version")
errUnsupportedNumberOfTables = errors.New("sfnt: unsupported number of tables")
errUnsupportedTableOffsetLength = errors.New("sfnt: unsupported table offset or length")
errUnsupportedVersion = errors.New("sfnt: unsupported version")
)
// Units are an integral number of abstract, scalable "font units". The em
@ -174,7 +181,8 @@ type Font struct {
// https://www.microsoft.com/typography/otspec/otff.htm#otttables
// "Tables Related to PostScript Outlines".
//
// TODO: cff, cff2, vorg?
// TODO: cff2, vorg?
cff table
// https://www.microsoft.com/typography/otspec/otff.htm#otttables
// "Advanced Typographic Tables".
@ -187,13 +195,17 @@ type Font struct {
// TODO: hdmx, kern, vmtx? Others?
cached struct {
numGlyphs int
isPostScript bool
unitsPerEm Units
// The glyph data for the glyph index i is in
// src[locations[i+0]:locations[i+1]].
locations []uint32
}
}
// NumGlyphs returns the number of glyphs in f.
func (f *Font) NumGlyphs() int { return f.cached.numGlyphs }
func (f *Font) NumGlyphs() int { return len(f.cached.locations) - 1 }
// UnitsPerEm returns the number of units per em for f.
func (f *Font) UnitsPerEm() Units { return f.cached.unitsPerEm }
@ -217,8 +229,7 @@ func (f *Font) initialize() error {
case 0x00010000:
// No-op.
case 0x4f54544f: // "OTTO".
// TODO: support CFF fonts.
return errUnsupportedVersion
f.cached.isPostScript = true
}
numTables := int(u16(buf[4:]))
if numTables > maxNumTables {
@ -252,6 +263,8 @@ func (f *Font) initialize() error {
// Match the 4-byte tag as a uint32. For example, "OS/2" is 0x4f532f32.
switch tag {
case 0x43464620:
f.cff = table{o, n}
case 0x4f532f32:
f.os2 = table{o, n}
case 0x636d6170:
@ -291,14 +304,47 @@ func (f *Font) initialize() error {
f.cached.unitsPerEm = Units(u)
// https://www.microsoft.com/typography/otspec/maxp.htm
if f.cached.isPostScript {
if f.maxp.length != 6 {
return errInvalidMaxpTable
}
} else {
if f.maxp.length != 32 {
return errInvalidMaxpTable
}
}
u, err = f.src.u16(buf, f.maxp, 4)
if err != nil {
return err
}
f.cached.numGlyphs = int(u)
numGlyphs := int(u)
if f.cached.isPostScript {
p := cffParser{
src: &f.src,
base: int(f.cff.offset),
offset: int(f.cff.offset),
end: int(f.cff.offset + f.cff.length),
}
f.cached.locations, err = p.parse()
if err != nil {
return err
}
} else {
// TODO: locaParser for TrueType fonts.
f.cached.locations = make([]uint32, numGlyphs+1)
}
if len(f.cached.locations) != numGlyphs+1 {
return errInvalidLocationData
}
return nil
}
func (f *Font) viewGlyphData(buf []byte, glyphIndex int) ([]byte, error) {
if glyphIndex < 0 || f.NumGlyphs() <= glyphIndex {
return nil, errGlyphIndexOutOfRange
}
i := f.cached.locations[glyphIndex+0]
j := f.cached.locations[glyphIndex+1]
return f.src.view(buf, int(i), int(j-i))
}

View File

@ -6,28 +6,30 @@ package sfnt
import (
"bytes"
"io/ioutil"
"path/filepath"
"testing"
"golang.org/x/image/font/gofont/goregular"
)
func TestParse(t *testing.T) {
func TestTrueTypeParse(t *testing.T) {
f, err := Parse(goregular.TTF)
if err != nil {
t.Fatalf("Parse: %v", err)
}
testFont(t, f)
testTrueType(t, f)
}
func TestParseReaderAt(t *testing.T) {
func TestTrueTypeParseReaderAt(t *testing.T) {
f, err := ParseReaderAt(bytes.NewReader(goregular.TTF))
if err != nil {
t.Fatalf("ParseReaderAt: %v", err)
}
testFont(t, f)
testTrueType(t, f)
}
func testFont(t *testing.T, f *Font) {
func testTrueType(t *testing.T, f *Font) {
if got, want := f.UnitsPerEm(), Units(2048); got != want {
t.Errorf("UnitsPerEm: got %d, want %d", got, want)
}
@ -38,3 +40,39 @@ func testFont(t *testing.T, f *Font) {
t.Errorf("NumGlyphs: got %d, want > %d", got, want)
}
}
func TestPostScript(t *testing.T) {
data, err := ioutil.ReadFile(filepath.Join("..", "testdata", "CFFTest.otf"))
if err != nil {
t.Fatal(err)
}
f, err := Parse(data)
if err != nil {
t.Fatal(err)
}
// TODO: replace this by a higher level test, once we parse Type 2
// Charstrings.
//
// As a sanity check for now, note that each string ends in '\x0e', which
// 5177.Type2.pdf Appendix A defines as "endchar".
wants := [...]string{
"\xf7\x63\x8b\xbd\xf8\x45\xbd\x01\xbd\xbd\xf7\xc0\xbd\x03\xbd\x16\xf8\x24\xf8\xa9\xfc\x24\x06\xbd\xfc\x77\x15\xf8\x45\xf7\xc0\xfc\x45\x07\x0e",
"\x8b\xef\xf8\xec\xef\x01\xef\xdb\xf7\x84\xdb\x03\xf7\xc0\xf9\x50\x15\xdb\xb3\xfb\x0c\x3b\xfb\x2a\x6d\xfb\x8e\x31\x3b\x63\xf7\x0c\xdb\xf7\x2a\xa9\xf7\x8e\xe5\x1f\xef\x04\x27\x27\xfb\x70\xfb\x48\xfb\x48\xef\xfb\x70\xef\xef\xef\xf7\x70\xf7\x48\xf7\x48\x27\xf7\x70\x27\x1f\x0e",
"\xf6\xa0\x76\x01\xef\xf7\x5c\x03\xef\x16\xf7\x5c\xf9\xb4\xfb\x5c\x06\x0e",
"\xf7\x89\xe1\x03\xf7\x21\xf8\x9c\x15\x87\xfb\x38\xf7\x00\xb7\xe1\xfc\x0a\xa3\xf8\x18\xf7\x00\x9f\x81\xf7\x4e\xfb\x04\x6f\x81\xf7\x3a\x33\x85\x83\xfb\x52\x05\x0e",
}
if ng := f.NumGlyphs(); ng != len(wants) {
t.Fatalf("NumGlyphs: got %d, want %d", ng, len(wants))
}
for i, want := range wants {
gd, err := f.viewGlyphData(nil, i)
if err != nil {
t.Errorf("i=%d: %v", i, err)
continue
}
if got := string(gd); got != want {
t.Errorf("i=%d:\ngot % x\nwant % x", i, got, want)
}
}
}

BIN
font/testdata/CFFTest.otf vendored Normal file

Binary file not shown.

124
font/testdata/CFFTest.sfd vendored Normal file
View File

@ -0,0 +1,124 @@
SplineFontDB: 3.0
FontName: CFFTest
FullName: CFFTest
FamilyName: CFFTest
Weight: Regular
Copyright: Copyright 2016 The Go Authors. All rights reserved.\nUse of this font is governed by a BSD-style license that can be found at https://golang.org/LICENSE.
Version: 001.000
ItalicAngle: 0
UnderlinePosition: -100
UnderlineWidth: 50
Ascent: 800
Descent: 200
LayerCount: 2
Layer: 0 0 "Back" 1
Layer: 1 0 "Fore" 0
XUID: [1021 367 888937226 7862908]
FSType: 8
OS2Version: 0
OS2_WeightWidthSlopeOnly: 0
OS2_UseTypoMetrics: 1
CreationTime: 1479626795
ModificationTime: 1480238616
PfmFamily: 17
TTFWeight: 400
TTFWidth: 5
LineGap: 90
VLineGap: 0
OS2TypoAscent: 0
OS2TypoAOffset: 1
OS2TypoDescent: 0
OS2TypoDOffset: 1
OS2TypoLinegap: 90
OS2WinAscent: 0
OS2WinAOffset: 1
OS2WinDescent: 0
OS2WinDOffset: 1
HheadAscent: 0
HheadAOffset: 1
HheadDescent: 0
HheadDOffset: 1
OS2Vendor: 'PfEd'
MarkAttachClasses: 1
DEI: 91125
LangName: 1033
Encoding: UnicodeBmp
UnicodeInterp: none
NameList: Adobe Glyph List
DisplaySize: -24
AntiAlias: 1
FitToEm: 1
WinInfo: 19712 32 23
BeginPrivate: 0
EndPrivate
TeXData: 1 0 0 346030 173015 115343 0 1048576 115343 783286 444596 497025 792723 393216 433062 380633 303038 157286 324010 404750 52429 2506097 1059062 262144
BeginChars: 65536 3
StartChar: zero
Encoding: 48 48 0
Width: 600
VWidth: 0
HStem: 0 100<248.223 341.575> 700 100<258.425 351.777>
VStem: 100 80<243.925 531.374> 420 80<268.627 556.075>
LayerCount: 2
Fore
SplineSet
300 700 m 0
210 700 180 450 180 300 c 24
180 220 220 100 300 100 c 0
390 100 420 350 420 500 c 24
420 580 380 700 300 700 c 0
300 800 m 0
400 800 500 580 500 400 c 0
500 220 400 0 300 0 c 0
200 0 100 220 100 400 c 0
100 580 200 800 300 800 c 0
EndSplineSet
Validated: 1
EndChar
StartChar: one
Encoding: 49 49 1
Width: 400
VWidth: 0
Flags: W
HStem: 0 21G<100 300>
VStem: 100 200<0 800>
LayerCount: 2
Fore
SplineSet
100 0 m 25
100 800 l 25
300 800 l 29
300 0 l 29
100 0 l 25
EndSplineSet
Validated: 1
EndChar
StartChar: uni4E2D
Encoding: 20013 20013 2
Width: 600
VWidth: 0
Flags: W
VStem: 245 86<641.8 752>
LayerCount: 2
Fore
SplineSet
141 520 m 25
235 562 l 25
243 752 l 25
331 758 l 25
341 592 l 25
453 620 l 25
463 434 l 25
355 414 l 25
331 26 l 25
245 400 l 25
137 356 l 25
141 520 l 25
EndSplineSet
Validated: 1
EndChar
EndChars
EndSplineFont

2
font/testdata/README vendored Normal file
View File

@ -0,0 +1,2 @@
CFFTest.sfd is a FontForge file for creating CFFTest.otf, a custom OpenType
font for testing the golang.org/x/image/font/sfnt package's CFF support.