local readfile = require'luametalatex-readfile' local sfnt = require'luametalatex-font-sfnt' local stdStrings = require'luametalatex-font-cff-data' local offsetfmt = ">I%i" local function parse_index(buf, i) local count, offsize count, offsize, i = string.unpack(">I2B", buf, i) if count == 0 then return {}, i-1 end local fmt = offsetfmt:format(offsize) local offsets = {} local dataoffset = i + offsize*count - 1 for j=1,count+1 do offsets[j], i = string.unpack(fmt, buf, i) end for j=1,count+1 do offsets[j] = offsets[j] + i - 1 end return offsets, offsets[#offsets] end local real_mapping = { [0] = '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'E', 'E-', nil, '-', nil} local function parse_real(cs, offset) local c = cs:byte(offset) if not c then return offset end local c1, c2 = real_mapping[c>>4], real_mapping[c&0xF] if not c1 or not c2 then return c1 or offset, c1 and offset else return c1, c2, parse_real(cs, offset+1) --Warning: This is not a tail-call, -- so we are affected by the stack limit. On the other hand, as long as -- there are less than ~50 bytes we should be safe. end end local function get_number(result) if #result ~= 1 then print(require'inspect'(result)) end assert(#result == 1) local num = result[1] result[1] = nil return num end local function get_bool(result) return get_number(result) == 1 end local function get_string(result, strings) local sid = get_number(result) return stdStrings[sid] or strings[sid-#stdStrings] end local function get_array(result) local arr = table.move(result, 1, #result, 1, {}) for i=1,#result do result[i] = nil end return arr end local function get_delta(result) local arr = get_array(result) local last = 0 for i=1,#arr do arr[i] = arr[i]+last last = arr[i] end return arr end local function get_private(result) local arr = get_array(result) assert(#arr == 2) return arr end local function get_ros(result, strings) local arr = get_array(result) assert(#arr == 3) result[1] = arr[1] arr[1] = get_string(result, strings) result[1] = arr[2] arr[2] = get_string(result, strings) return arr end local function apply_matrix(m, x, y) return (m[1] * x + m[3] * y + m[5])*1000, (m[2] * x + m[4] * y + m[6])*1000 end local operators = { [0] = {'version', get_string}, {'Notice', get_string}, {'FullName', get_string}, {'FamilyName', get_string}, {'Weight', get_string}, {'FontBBox', get_array}, {'BlueValues', get_delta}, {'OtherBlues', get_delta}, {'FamilyBlues', get_delta}, {'FamilyOtherBlues', get_delta}, {'StdHW', get_number}, {'StdVW', get_number}, nil, -- 12, escape {'UniqueID', get_number}, {'XUID', get_array}, {'charset', get_number}, {'Encoding', get_number}, {'CharStrings', get_number}, {'Private', get_private}, {'Subrs', get_number}, {'defaultWidthX', get_number}, {'nominalWidthX', get_number}, [-1] = {'Copyright', get_string}, [-2] = {'isFixedPitch', get_bool}, [-3] = {'ItalicAngle', get_number}, [-4] = {'UnderlinePosition', get_number}, [-5] = {'UnderlineThickness', get_number}, [-6] = {'PaintType', get_number}, [-7] = {'CharstringType', get_number}, [-8] = {'FontMatrix', get_array}, [-9] = {'StrokeWidth', get_number}, [-10] = {'BlueScale', get_number}, [-11] = {'BlueShift', get_number}, [-12] = {'BlueFuzz', get_number}, [-13] = {'StemSnapH', get_delta}, [-14] = {'StemSnapV', get_delta}, [-15] = {'ForceBold', get_bool}, [-18] = {'LanguageGroup', get_number}, [-19] = {'ExpansionFactor', get_number}, [-20] = {'initialRandomSeed', get_number}, [-21] = {'SyntheticBase', get_number}, [-22] = {'PostScript', get_string}, [-23] = {'BaseFontName', get_string}, [-24] = {'BaseFontBlend', get_delta}, [-31] = {'ROS', get_ros}, [-32] = {'CIDFontVersion', get_number}, [-33] = {'CIDFontRevision', get_number}, [-34] = {'CIDFontType', get_number}, [-35] = {'CIDCount', get_number}, [-36] = {'UIDBase', get_number}, [-37] = {'FDArray', get_number}, [-38] = {'FDSelect', get_number}, [-39] = {'FontName', get_string}, } local function parse_dict(buf, i, j, strings) result = {} while i<=j do local cmd = buf:byte(i) if cmd == 29 then result[#result+1] = string.unpack(">i4", buf:sub(i+1, i+4)) i = i+4 elseif cmd == 28 then result[#result+1] = string.unpack(">i2", buf:sub(i+1, i+2)) i = i+2 elseif cmd >= 251 then -- Actually "and cmd ~= 255", but 255 is reserved result[#result+1] = -((cmd-251)*256)-string.byte(buf, i+1)-108 i = i+1 elseif cmd >= 247 then result[#result+1] = (cmd-247)*256+string.byte(buf, i+1)+108 i = i+1 elseif cmd >= 32 then result[#result+1] = cmd-139 elseif cmd == 30 then -- 31 is reserved again local real = {parse_real(buf, i+1)} i = real[#real] real[#real] = nil result[#result+1] = tonumber(table.concat(real)) else if cmd == 12 then i = i+1 cmd = -buf:byte(i)-1 end local op = operators[cmd] if not op then error[[Unknown CFF operator]] end result[op[1]] = op[2](result, strings) end i = i+1 end return result end local function parse_charstring(cs, globalsubrs, subrs, result) result = result or {{false}, stemcount = 0} local lastresult = result[#result] local i = 1 while i~=#cs+1 do local cmd = cs:byte(i) if cmd == 28 then lastresult[#lastresult+1] = string.unpack(">i2", cs:sub(i+1, i+2)) i = i+2 elseif cmd == 255 then lastresult[#lastresult+1] = string.unpack(">i4", cs:sub(i+1, i+4))/0x10000 i = i+4 elseif cmd >= 251 then lastresult[#lastresult+1] = -((cmd-251)*256)-string.byte(cs, i+1)-108 i = i+1 elseif cmd >= 247 then lastresult[#lastresult+1] = (cmd-247)*256+string.byte(cs, i+1)+108 i = i+1 elseif cmd >= 32 then lastresult[#lastresult+1] = cmd-139 elseif cmd == 10 then local idx = lastresult[#lastresult]+subrs.bias local subr = subrs[idx] subrs.used[idx] = true lastresult[#lastresult] = nil parse_charstring(subr, globalsubrs, subrs, result) lastresult = result[#result] elseif cmd == 29 then local idx = lastresult[#lastresult]+globalsubrs.bias local subr = globalsubrs[idx] globalsubrs.used[idx] = true lastresult[#lastresult] = nil parse_charstring(subr, globalsubrs, subrs, result) lastresult = result[#result] elseif cmd == 11 then break -- We do not keep subroutines, so drop returns and continue with the outer commands elseif cmd == 12 then i = i+1 cmd = cs:byte(i) lastresult[1] = -cmd-1 lastresult = {false} result[#result+1] = lastresult elseif cmd == 19 or cmd == 20 then if #result == 1 then lastresult = {} result[#result+1] = lastresult end lastresult[1] = cmd local newi = i+(result.stemcount+7)//8 lastresult[2] = cs:sub(i+1, newi) i = newi else if cmd == 21 and #result == 1 then table.insert(result, 1, {false}) if #lastresult == 4 then result[1][2] = lastresult[2] table.remove(lastresult, 2) end elseif (cmd == 4 or cmd == 22) and #result == 1 then table.insert(result, 1, {false}) if #lastresult == 3 then result[1][2] = lastresult[2] table.remove(lastresult, 2) end elseif cmd == 14 and #result == 1 then table.insert(result, 1, {false}) if #lastresult == 2 or #lastresult == 6 then result[1][2] = lastresult[2] table.remove(lastresult, 2) end elseif cmd == 1 or cmd == 3 or cmd == 18 or cmd == 23 then if #result == 1 then table.insert(result, 1, {false}) if #lastresult % 2 == 0 then result[1][2] = lastresult[2] table.remove(lastresult, 2) end end result.stemcount = result.stemcount + #lastresult//2 end lastresult[1] = cmd lastresult = {false} result[#result+1] = lastresult end i = i+1 end return result end local function parse_charset(buf, i0, offset, strings, num) if not offset then offset = 0 end if offset == 0 then return ISOAdobe elseif offset == 1 then return Expert elseif offset == 2 then return ExpertSubset else offset = i0+offset end local format format, offset = string.unpack(">B", buf, offset) local charset = {[0] = 0} if format == 0 then for i=1,num-1 do charset[i], offset = string.unpack(">I2", buf, offset) end elseif format == 1 then local i = 1 while i < num do local first, nLeft first, nLeft, offset = string.unpack(">I2I1", buf, offset) for j=0,nLeft do charset[i+j] = first+j end i = i+1+nLeft end elseif format == 2 then local i = 1 while i < num do local first, nLeft first, nLeft, offset = string.unpack(">I2I2", buf, offset) for j=0,nLeft do charset[i+j] = first+j end i = i+1+nLeft end else error[[Invalid Charset format]] end if strings then -- We are not CID-keyed, so we should use strings instead of numbers local string_charset = {} for i=#charset,0,-1 do local sid = charset[i] charset[i] = nil string_charset[i] = stdStrings[sid] or strings[sid-#stdStrings] end charset = string_charset end return charset end local function parse_encoding(buf, i0, offset, CharStrings) if not offset then offset = 0 end if offset == 0 then error[[TODO]] return "StandardEncoding" elseif offset == 1 then error[[TODO]] return "ExpertEncoding" else offset = i0+offset end local format, num format, num, offset = string.unpack(">BB", buf, offset) local encoding = {} if format == 0 then for i=1,num do local code code, offset = string.unpack(">B", buf, offset) encoding[code] = CharStrings[i] end elseif format == 1 then local i = 1 while i <= num do local first, nLeft first, nLeft, offset = string.unpack(">BB", buf, offset) for j=0,nLeft do encoding[first + j] = CharStrings[i + j] end i = i+1+nLeft end else error[[Invalid Encoding format]] end return encoding end local function parse_fdselect(buf, offset, CharStrings) local format format, offset = string.unpack(">B", buf, offset) if format == 0 then for i=1,#CharStrings-1 do local code code, offset = string.unpack(">B", buf, offset) CharStrings[i][3] = code + 1 end -- Reimplement with string.byte elseif format == 3 then local count, last count, offset = string.unpack(">I2", buf, offset) for i=1,count do local first, code, after = string.unpack(">I2BI2", buf, offset) for j=first, after-1 do CharStrings[j][3] = code + 1 end offset = offset + 3 end else error[[Invalid FDSelect format]] end end local function applyencoding(buf, i, usedcids, encoding) local usednames = {} local numglyphs numglyphs, i = string.unpack(">I2", buf, i) local stroffset = 2*numglyphs + i local names = setmetatable({}, {__index = function(t, i) for j=#t+1,i do t[j], stroffset = string.unpack("s1", buf, stroffset) end return t[i] end}) local newusedcids = {} for j=1,#usedcids do local name = encoding[usedcids[j][1]] if name then local new = {old = usedcids[j]} usednames[name], newusedcids[j] = new, new else newusedcids[j] = {j} -- FIXME: Someone used a character which does not exists in the encoding. -- This should probably at least trigger a warning. end end for j=1,numglyphs do local name name, i = string.unpack(">I2", buf, i) if name < 258 then name = stdnames[name] else name = names[name-257] end if usednames[name] then usednames[name][1] = j-1 usednames[name] = nil end end if next(usednames) then error[[Missing character]] end return newusedcids end -- The encoding parameter might be: -- an encoding dictionary - Use the supplied encoding -- true - Use the build-in encoding -- false - Use GIDs -- nil - Use CIDs, falling back to GIDs in name.based fonts function myfunc(buf, i0, fontid, usedcids, encoding, trust_widths) -- return function(filename, fontid) fontid = fontid or 1 local major, minor, hdrSize, offSize = string.unpack(">BBBB", buf, i0) if major ~= 1 then error[[Unsupported CFF version]] end -- local offfmt = offsetfmt:format(offSize) local nameoffsets, topoffsets, stringoffsets, globalsubrs local i = i0+hdrSize nameoffsets, i = parse_index(buf, i) topoffsets, i = parse_index(buf, i) stringoffsets, i = parse_index(buf, i) globalsubrs, i = parse_index(buf, i) local strings = {} for j=1,#stringoffsets-1 do strings[j] = buf:sub(stringoffsets[j], stringoffsets[j+1]-1) end if #nameoffsets ~= #topoffsets then error[[Inconsistant size of FontSet]] end if fontid >= #nameoffsets then error[[Invalid font id]] end local top = parse_dict(buf, topoffsets[fontid], topoffsets[fontid+1]-1, strings) top.FontName = buf:sub(nameoffsets[fontid], nameoffsets[fontid+1]-1) local gsubrsdict = {} for i=1,#globalsubrs-1 do gsubrsdict[i] = buf:sub(globalsubrs[i], globalsubrs[i+1]-1) end gsubrsdict.used = {} gsubrsdict.bias = #gsubrsdict < 1240 and 108 or #gsubrsdict < 33900 and 1132 or 32769 top.GlobalSubrs = gsubrsdict local CharStrings = parse_index(buf, i0+top.CharStrings) if not not encoding ~= encoding and (encoding or top.ROS) then -- If we use the build-in encoding *or* GIDs, we do not need to waste our time making sense of the charset local charset = parse_charset(buf, i0, top.charset, not top.ROS and strings, #CharStrings-1) named_charstrings = {} for i=1,#CharStrings-1 do named_charstrings[charset[i-1]] = {CharStrings[i], CharStrings[i+1]-1} end CharStrings = named_charstrings else for i=1,#CharStrings-1 do CharStrings[i-1] = {CharStrings[i], CharStrings[i+1]-1} end CharStrings[#CharStrings] = nil CharStrings[#CharStrings] = nil end -- top.CharStrings = named_charstrings if not top.ROS then -- if encoding == true and top.Encoding < 3 then -- if not reencode and parsed_t1.Encoding == "StandardEncoding" then -- reencode = kpse.find_file("8a.enc", "enc files") -- end -- end if encoding == true then -- Use the built-in encoding CharStrings = parse_encoding(buf, i0, top.Encoding, CharStrings) elseif encoding then encoding = require'luametalatex-font-enc'(encoding) local encoded = {} for i, n in pairs(encoding) do encoded[i] = CharStrings[n] end CharStrings = encoded end -- else: Use GIDs top.Privates = {parse_dict(buf, i0+top.Private[2], i0+top.Private[2]+top.Private[1]-1, strings)} local subrs = top.Privates[1].Subrs if subrs then subrs = parse_index(buf, i0+top.Private[2]+subrs) local subrsdict ={} for j=1,#subrs-1 do subrsdict[j] = buf:sub(subrs[j], subrs[j+1]-1) end subrsdict.used = {} subrsdict.bias = #subrsdict < 1240 and 108 or #subrsdict < 33900 and 1132 or 32769 top.Privates[1].Subrs = subrsdict end top.Private = nil else assert(not encoding) -- FIXME: If we actually get these from OpenType, the glyph names might be hidden there... -- Would that even be allowed? local fonts = parse_index(buf, i0+top.FDArray) local privates = {} top.Privates = privates for i=1,#fonts-1 do local font = fonts[i] local fontdir = parse_dict(buf, fonts[i], fonts[i+1]-1, strings) privates[i] = parse_dict(buf, i0+fontdir.Private[2], i0+fontdir.Private[2]+fontdir.Private[1]-1, strings) privates[i].FontName = fontdir.FontName local subrs = privates[i].Subrs if subrs then subrs = parse_index(buf, i0+fontdir.Private[2]+subrs) local subrsdict ={} for j=1,#subrs-1 do subrsdict[j] = buf:sub(subrs[j], subrs[j+1]-1) end subrsdict.used = {} subrsdict.bias = #subrsdict < 1240 and 108 or #subrsdict < 33900 and 1132 or 32769 privates[i].Subrs = subrsdict end end top.FDArray = nil parse_fdselect(buf, i0+top.FDSelect, CharStrings) end local glyphs = {} -- if false and usedcids then -- Subsetting FIXME: Disabled, because other tables have to be fixed up first if usedcids then -- Subsetting FIXME: Should be Disabled, because other tables have to be fixed up first -- Actually seems to work now, let's test it a bit more local usedfonts = {} for i=1,#usedcids do local cid = usedcids[i][1] local cs = CharStrings[cid] glyphs[i] = {cs = buf:sub(cs[1], cs[2]), index = cid, cidfont = cs[3], usedcid = usedcids[i]} usedfonts[CharStrings[cid][3] or 1] = true end local lastfont = 0 for i=1,#top.Privates do if usedfonts[i] then lastfont = lastfont + 1 usedfonts[i] = lastfont top.Privates[lastfont] = top.Privates[i] end end for i=lastfont+1,#top.Privates do top.Privates[i] = nil end for i=1,#glyphs do glyphs[i].cidfont = usedfonts[glyphs[i].cidfont] end -- TODO: CIDFont / Privates subsetting... DONE(?) -- TODO: Subrs subsetting... Instead of deleting unused SubRs, we only make them empty. -- This avoids problems with renumberings whiuch would have to be consitant across -- Fonts in some odd way, because they might be used by globalsubrs. for i=1,#glyphs do local g = glyphs[i] local private = top.Privates[g.cidfont or 1] local parsed = parse_charstring(g.cs, top.GlobalSubrs, private.Subrs) -- TODO: Implement local width = parsed[1][2] if width then width = width + (private.nominalWidthX or 0) else width = private.defaultWidthX or 0 end local m = top.FontMatrix or {.001, 0, 0, .001, 0, 0} width = width * m[1] + m[3] -- I really have no idea why m[3] /= 0 might happen, but why not? width = math.floor(width*1000+.5) -- Thats rescale into "PDF glyph space" if g.usedcid[2] ~= width then print("MISMATCH:", g.usedcid[1], g.usedcid[2], width) end g.usedcid[2] = width end for i=1,#top.GlobalSubrs do if not top.GlobalSubrs.used[i] then top.GlobalSubrs[i] = "" end end for _, priv in ipairs(top.Privates) do if priv.Subrs then for i=1,#priv.Subrs do if not priv.Subrs.used[i] then priv.Subrs[i] = "" end end end end else for i, cs in pairs(CharStrings) do -- Not subsetting glyphs[#glyphs+1] = {cs = buf:sub(cs[1], cs[2]), index = i, cidfont = cs.font} end end top.glyphs = glyphs table.sort(glyphs, function(a,b)return a.index = readfile('subset', filename, nil) local buf = file() local i = 1 local magic = buf:sub(1, 4) if magic == "ttcf" or magic == "OTTO" then -- assert(not encoding) -- nil or false encoding = encoding or false local magic, tables = sfnt.parse(buf, fontid) -- TODO: Interpret widths etc, they might differ from the CFF ones. assert(magic == "OTTO") -- Also CFF2 would be nice to have i = tables['CFF '][1] end local content, bbox = myfunc(buf, i, fontid, usedcids, encoding) fontdir.bbox = bbox return content end end