Improve T1 parser
This commit is contained in:
parent
5dead1b1a0
commit
46cada8666
@ -1,6 +1,8 @@
|
|||||||
local white = (lpeg.S'\0\9\10\12\13\32' + '%' * (1 - lpeg.S'\r\n')^0)^1
|
local white = (lpeg.S'\0\9\10\12\13\32' + '%' * (1 - lpeg.S'\r\n')^0)^1 -- Whitespace
|
||||||
|
|
||||||
local regular = 1-lpeg.S'()<>[]{}/%\0\9\10\12\13\32'
|
local regular = 1-lpeg.S'()<>[]{}/%\0\9\10\12\13\32'
|
||||||
local lastbase = '123456789abcdefghiklmnopqrstuvwxyz'
|
local lastbase = '123456789abcdefghiklmnopqrstuvwxyz'
|
||||||
|
|
||||||
local number = lpeg.Cmt(lpeg.R'09'^1/tonumber * '#', function(s, p, base)
|
local number = lpeg.Cmt(lpeg.R'09'^1/tonumber * '#', function(s, p, base)
|
||||||
if base < 2 then return end
|
if base < 2 then return end
|
||||||
local pattern
|
local pattern
|
||||||
@ -13,6 +15,7 @@ local number = lpeg.Cmt(lpeg.R'09'^1/tonumber * '#', function(s, p, base)
|
|||||||
return p, num and tonumber(num, base)
|
return p, num and tonumber(num, base)
|
||||||
end)
|
end)
|
||||||
+ (lpeg.S'+-'^-1 * ('.' * lpeg.R'09'^1 + lpeg.R'09'^1 * lpeg.P'.'^-1 * lpeg.R'09'^0) * (lpeg.S'eE' * lpeg.S'+-'^-1 * lpeg.R'09'^1)^-1)/tonumber
|
+ (lpeg.S'+-'^-1 * ('.' * lpeg.R'09'^1 + lpeg.R'09'^1 * lpeg.P'.'^-1 * lpeg.R'09'^0) * (lpeg.S'eE' * lpeg.S'+-'^-1 * lpeg.R'09'^1)^-1)/tonumber
|
||||||
|
|
||||||
local literalstring = lpeg.P{'(' * lpeg.Cs((
|
local literalstring = lpeg.P{'(' * lpeg.Cs((
|
||||||
lpeg.P'\\n'/'\n'+lpeg.P'\\r'/'\r'+lpeg.P'\\t'/'\t'+lpeg.P'\\b'/'\b'+lpeg.P'\\f'/'\f'
|
lpeg.P'\\n'/'\n'+lpeg.P'\\r'/'\r'+lpeg.P'\\t'/'\t'+lpeg.P'\\b'/'\b'+lpeg.P'\\f'/'\f'
|
||||||
+'\\'*lpeg.C(lpeg.R'07'*lpeg.R'07'^-2)/function(n)return string.char(tonumber(n, 8))end
|
+'\\'*lpeg.C(lpeg.R'07'*lpeg.R'07'^-2)/function(n)return string.char(tonumber(n, 8))end
|
||||||
@ -20,16 +23,27 @@ local literalstring = lpeg.P{'(' * lpeg.Cs((
|
|||||||
+'\\'*lpeg.C(1)/1
|
+'\\'*lpeg.C(1)/1
|
||||||
+('\n' + ('\r' * lpeg.P'\n'^-1))/'\n'
|
+('\n' + ('\r' * lpeg.P'\n'^-1))/'\n'
|
||||||
+(1-lpeg.S'()\\')+lpeg.V(1))^0) * ')'}
|
+(1-lpeg.S'()\\')+lpeg.V(1))^0) * ')'}
|
||||||
|
|
||||||
local hexstring = '<' * lpeg.Cs((
|
local hexstring = '<' * lpeg.Cs((
|
||||||
lpeg.C(lpeg.R'09'+lpeg.R'af'+lpeg.R'AF')*(lpeg.C(lpeg.R'09'+lpeg.R'af'+lpeg.R'AF')+lpeg.Cc'0')/function(a,b)return string.char(tonumber(a..b, 16))end)^0) * '>'
|
lpeg.C(lpeg.R'09'+lpeg.R'af'+lpeg.R'AF')*(lpeg.C(lpeg.R'09'+lpeg.R'af'+lpeg.R'AF')+lpeg.Cc'0')/function(a,b)return string.char(tonumber(a..b, 16))end)^0) * '>'
|
||||||
|
|
||||||
local name = lpeg.C(regular^1)
|
local name = lpeg.C(regular^1)
|
||||||
local lname = '/' * name / 1
|
local lname = '/' * name / 1
|
||||||
|
|
||||||
|
local boolean = (lpeg.P'true' + 'false')/{["true"] = true, ["false"] = false}
|
||||||
|
|
||||||
|
-- Everything above this line works pretty reliable and can be understood by reading the PostScript specs.
|
||||||
|
|
||||||
|
-- This is Type1 specific. The only thing which might need adjustment is adding alternative spellings for -|, RD, |-, |, etc.
|
||||||
|
local binary_bytes = lpeg.Cmt(number*white^-1*(lpeg.P'-| ' + 'RD '), function(s, p, l)return p+l, s:sub(p, p+l-1) end)*white^-1*(lpeg.P"|-"+"|"+"ND"+"NP")
|
||||||
|
-- Attention: The |-, |, ND, NP already contain an implicit `def`
|
||||||
|
|
||||||
local function decrypt(key, n, cipher)
|
local function decrypt(key, n, cipher)
|
||||||
-- Generally you should never implement your own crypto. So we call a well known, peer reviewed,
|
-- Generally you should never implement your own crypto. So we call a well known, peer reviewed,
|
||||||
-- high-quality cryptographic library. --- Ha-Ha, of course we are implementing by ourselves.
|
-- high-quality cryptographic library. --- Ha-Ha, of course we are implementing by ourselves.
|
||||||
-- That might be completely unsecure, but given that the encryption keys are well known constants
|
-- That might be completely unsecure, but given that the encryption keys are well known constants
|
||||||
-- documented in the T1 Spec, there is no need to worry about it.
|
-- documented in the T1 Spec, there is no need to worry about it.
|
||||||
-- Also I do not think any cryptorgraphic library would implement this anyway, it doesn't even
|
-- Also I do not think any cryptographic library would implement this anyway, it doesn't even
|
||||||
-- really deserve the term encryption.
|
-- really deserve the term encryption.
|
||||||
local decoded = {string.byte(cipher, 1,-1)}
|
local decoded = {string.byte(cipher, 1,-1)}
|
||||||
for i=1,#decoded do
|
for i=1,#decoded do
|
||||||
@ -40,66 +54,77 @@ local function decrypt(key, n, cipher)
|
|||||||
return string.char(table.unpack(decoded, n+1))
|
return string.char(table.unpack(decoded, n+1))
|
||||||
end
|
end
|
||||||
|
|
||||||
-- io.stdout:write(decrypt(55665, 4, string.sub(io.stdin:read'a', 7)))
|
local anytype = {
|
||||||
local boolean = (lpeg.P'true' + 'false')/{["true"] = true, ["false"] = false}
|
hexstring
|
||||||
local anytype = {hexstring + literalstring + number + lname + boolean + lpeg.V(2) + name, lpeg.Ct('[' * (white^-1 * lpeg.V(1))^0 * white^-1 * ']' + '{' * (white^-1 * lpeg.V(1))^0 * white^-1 * '}' * white^-1 * lpeg.P"executeonly"^-1)}
|
+ literalstring
|
||||||
local dict = lpeg.Cf(lpeg.Carg(1) * lpeg.Cg(white^-1*lname*white^-1*(anytype)*white^-1*lpeg.P"readonly"^-1*white^-1*lpeg.P"noaccess"^-1*white^-1*(lpeg.P"def"+"ND"+"|-"))^0, rawset)
|
+ number
|
||||||
local encoding = (white+anytype-("dup"*white))^0/0
|
+ lname
|
||||||
|
+ boolean
|
||||||
|
+ lpeg.V'array'
|
||||||
|
+ name,
|
||||||
|
array = lpeg.Ct( '[' * (white^-1 * lpeg.V(1))^0 * white^-1 * ']' -- Arrays have two possible syntaxes
|
||||||
|
+ '{' * (white^-1 * lpeg.V(1))^0 * white^-1 * '}') * (white * "executeonly")^-1
|
||||||
|
}
|
||||||
|
|
||||||
|
local function skip_until(p)
|
||||||
|
if type(p) == 'string' then p = p * -name end
|
||||||
|
return (white + anytype - p)^0/0
|
||||||
|
end
|
||||||
|
local skip_to_begin = skip_until'begin' * 'begin'
|
||||||
|
|
||||||
|
local def_like = (lpeg.P'def' + 'ND' + '|-') * -name
|
||||||
|
|
||||||
|
local encoding = '/' * lpeg.C'Encoding' * -name
|
||||||
|
* skip_until'dup'
|
||||||
* lpeg.Cf(lpeg.Ct''
|
* lpeg.Cf(lpeg.Ct''
|
||||||
* lpeg.Cg("dup"*white*number*white^-1*lname*white^-1*"put"*white)^0
|
* lpeg.Cg("dup"*white*number*white^-1*lname*white^-1*"put"*white)^0
|
||||||
, rawset)
|
, rawset)
|
||||||
* lpeg.P"readonly"^-1*white*"def"
|
* ("readonly"*white)^-1 * "def"
|
||||||
local function parse_encoding(offset, str)
|
|
||||||
local found
|
local charstr = '/' * lpeg.C'CharStrings' * -name
|
||||||
found, offset = (encoding*lpeg.Cp()):match(str, offset)
|
* skip_until(lname) -- sometimes we get weird stuff in between. Just make sure that we don't swallow a charname
|
||||||
return found, offset
|
|
||||||
end
|
|
||||||
local function parse_fontinfo(offset, str)
|
|
||||||
local found
|
|
||||||
repeat
|
|
||||||
found, offset = ((white+(anytype-name))^0/0*name*lpeg.Cp()):match(str, offset)
|
|
||||||
until found == 'begin'
|
|
||||||
found, offset = (dict*lpeg.Cp()):match(str, offset, {})
|
|
||||||
offset = (white^-1*"end"*white^-1*lpeg.P"readonly"^-1*white^-1*"def"):match(str, offset)
|
|
||||||
return found, offset
|
|
||||||
end
|
|
||||||
local binary_bytes = lpeg.Cmt(number*white^-1*(lpeg.P'-| ' + 'RD '), function(s, p, l)return p+l, s:sub(p, p+l-1) end)*white^-1*(lpeg.P"|-"+"|"+"ND"+"NP")
|
|
||||||
local charstr = white^-1*lname*(white^-1*(anytype-lname))^0/0*white^-1
|
|
||||||
* lpeg.Cf(lpeg.Ct''
|
* lpeg.Cf(lpeg.Ct''
|
||||||
* lpeg.Cg(lname*white^-1*binary_bytes*white)^0
|
* lpeg.Cg(lname*white^-1*binary_bytes*white)^0 -- Remember: binary_bytes includes a `def`
|
||||||
, rawset)
|
, rawset)
|
||||||
* lpeg.P"end"*white
|
* lpeg.P"end"*white
|
||||||
local subrs = (white^-1*(anytype-("dup"*white)))^0/0*white^-1
|
|
||||||
* lpeg.Cf(lpeg.Ct''
|
local subrs = '/' * lpeg.C'Subrs' * -name
|
||||||
* lpeg.Cg("dup"*white^-1*number*white^-1*binary_bytes*white)^0
|
* skip_until'dup'
|
||||||
, rawset)
|
* lpeg.Cf(lpeg.Ct''
|
||||||
* (lpeg.P"readonly"*white)^-1 * (lpeg.P"noaccess"*white)^-1*(lpeg.P"def"+"ND"+"|-")
|
* lpeg.Cg("dup"*white^-1*number*white^-1*binary_bytes*white)^0
|
||||||
|
, rawset)
|
||||||
|
* (lpeg.P"readonly"*white)^-1 * (lpeg.P"noaccess"*white)^-1*(lpeg.P"def"+"ND"+"|-")
|
||||||
|
|
||||||
|
-- lpeg.V(2) == dict_entries
|
||||||
|
local dict = skip_to_begin * lpeg.V(2) * white^-1 * 'end' * white * ('readonly' * white)^-1 * ('noaccess' * white)^-1 * def_like
|
||||||
|
local dict_entry = encoding + subrs +
|
||||||
|
'/' * lpeg.C'FontInfo' * dict +
|
||||||
|
lname -- key
|
||||||
|
* white^-1
|
||||||
|
* anytype -- value
|
||||||
|
* ((white + anytype - (def_like + 'dict' + 'array') * -name)/0 * white^-1)^0 -- Sometimes we get Postscript code in between.
|
||||||
|
* def_like
|
||||||
|
local dict_entries = lpeg.P{
|
||||||
|
lpeg.Cf(lpeg.Carg(1) * lpeg.Cg(white^-1*lpeg.V(3))^0, rawset),
|
||||||
|
lpeg.Cf(lpeg.Ct'' * lpeg.Cg(white^-1*lpeg.V(3))^0, rawset),
|
||||||
|
dict_entry,
|
||||||
|
}
|
||||||
local function parse_private(offset, str)
|
local function parse_private(offset, str)
|
||||||
local mydict, found
|
local mydict, found
|
||||||
repeat
|
offset = (skip_to_begin * lpeg.Cp()):match(str, offset)
|
||||||
found, offset = ((white+(anytype-name))^0/0*name*lpeg.Cp()):match(str, offset)
|
|
||||||
until found == 'begin'
|
-- Scan the dictionary
|
||||||
mydict, offset = (dict*lpeg.Cp()):match(str, offset, {})
|
mydict, offset = (dict_entries*lpeg.Cp()):match(str, offset, {})
|
||||||
found = (white^-1*lname):match(str, offset)
|
|
||||||
if found == "Subrs" then
|
|
||||||
mydict.Subrs, offset = (subrs*lpeg.Cp()):match(str, offset)
|
|
||||||
end
|
|
||||||
return mydict, offset
|
return mydict, offset
|
||||||
end
|
end
|
||||||
local function continue_maintable(offset, str, mydict)
|
local function continue_maintable(offset, str, mydict)
|
||||||
mydict, offset = (dict*lpeg.Cp()):match(str, offset, mydict)
|
mydict, offset = (dict_entries*lpeg.Cp()):match(str, offset, mydict)
|
||||||
local found = (white^-1*lname):match(str, offset)
|
local found = (white^-1*lname):match(str, offset)
|
||||||
if found == "FontInfo" then
|
if found == "Private" then -- Scanned separatly because it isn't always ended in a regular way
|
||||||
mydict.FontInfo, offset = parse_fontinfo(offset, str)
|
|
||||||
return continue_maintable(offset, str, mydict)
|
|
||||||
elseif found == "Encoding" then
|
|
||||||
mydict.Encoding, offset = parse_encoding(offset, str)
|
|
||||||
return continue_maintable(offset, str, mydict)
|
|
||||||
elseif found == "Private" then
|
|
||||||
mydict.Private, offset = parse_private(offset, str)
|
mydict.Private, offset = parse_private(offset, str)
|
||||||
return continue_maintable(offset, str, mydict)
|
return continue_maintable(offset, str, mydict)
|
||||||
elseif found == "CharStrings" then
|
elseif found == "CharStrings" then -- This could be included in normal scanning, but it is our signal to terminate
|
||||||
mydict.CharStrings, offset = (charstr*lpeg.Cp()):match(str, offset)
|
found, mydict.CharStrings, offset = (charstr*lpeg.Cp()):match(str, offset)
|
||||||
return mydict
|
return mydict
|
||||||
else
|
else
|
||||||
local newoffset = ((white+name)^1/0*lpeg.Cp()):match(str, offset)
|
local newoffset = ((white+name)^1/0*lpeg.Cp()):match(str, offset)
|
||||||
@ -107,23 +132,18 @@ local function continue_maintable(offset, str, mydict)
|
|||||||
return continue_maintable(newoffset, str, mydict)
|
return continue_maintable(newoffset, str, mydict)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
print(str:sub(offset))
|
|
||||||
error[[Unable to read Type 1 font]]
|
error[[Unable to read Type 1 font]]
|
||||||
end
|
end
|
||||||
local function parse_maintable(offset, str)
|
local function parse_maintable(offset, str)
|
||||||
local found
|
local found
|
||||||
repeat
|
offset = (skip_to_begin * lpeg.Cp()):match(str, offset)
|
||||||
found, offset = ((white+(anytype-name))^0/0*name*lpeg.Cp()):match(str, offset)
|
|
||||||
until found == 'begin'
|
|
||||||
return continue_maintable(offset, str, {})
|
return continue_maintable(offset, str, {})
|
||||||
end
|
end
|
||||||
|
|
||||||
return function(filename)
|
return function(filename)
|
||||||
local file = io.open(filename, 'rb')
|
local file = io.open(filename, 'rb')
|
||||||
local _, length = string.unpack("<I2I4", file:read(6))
|
local preface, private = string.unpack("<xxs4xxs4", file:read'a')
|
||||||
local preface = file:read(length)
|
private = decrypt(55665, 4, private)
|
||||||
_, length = string.unpack("<I2I4", file:read(6))
|
|
||||||
local private = decrypt(55665, 4, file:read(length))
|
|
||||||
file:close()
|
file:close()
|
||||||
local after = parse_maintable(1, preface .. private)
|
local after = parse_maintable(1, preface .. private)
|
||||||
local lenIV = after.Private.lenIV or 4
|
local lenIV = after.Private.lenIV or 4
|
||||||
|
Loading…
Reference in New Issue
Block a user