New escaping system
This commit is contained in:
parent
f3af2599da
commit
e75211162c
@ -75,10 +75,9 @@ local function write_infodir(p)
|
|||||||
return p:indirect(nil, string.format("<<%s%s>>", infodir, additional))
|
return p:indirect(nil, string.format("<<%s%s>>", infodir, additional))
|
||||||
end
|
end
|
||||||
|
|
||||||
local function pdf_string(s)
|
local pdf_escape = require'luametalatex-pdf-escape'
|
||||||
-- Emulate other engines here: If looks like an escaped string, treat it as such. Otherwise, add parenthesis.
|
local pdf_bytestring = pdf_escape.escape_bytes
|
||||||
return s:match("^%(.*%)$") or s:match("^<.*>$") or '(' .. s .. ')'
|
local pdf_text = pdf_escape.escape_text
|
||||||
end
|
|
||||||
|
|
||||||
callback.register("stop_run", function()
|
callback.register("stop_run", function()
|
||||||
if not pfile then
|
if not pfile then
|
||||||
@ -185,7 +184,7 @@ local function get_action_attr(p, action, is_link)
|
|||||||
local action_attr = is_link and "/Subtype/Link/A<<" or "<<"
|
local action_attr = is_link and "/Subtype/Link/A<<" or "<<"
|
||||||
local file = action.file
|
local file = action.file
|
||||||
if file then
|
if file then
|
||||||
action_attr = action_attr .. '/F' .. pdf_string(file)
|
action_attr = action_attr .. '/F' .. pdf_bytestring(file)
|
||||||
local newwindow = action.new_window
|
local newwindow = action.new_window
|
||||||
if newwindow and newwindow > 0 then
|
if newwindow and newwindow > 0 then
|
||||||
action_attr = action_attr .. '/NewWindow ' .. (newwindow == 1 and 'true' or 'false')
|
action_attr = action_attr .. '/NewWindow ' .. (newwindow == 1 and 'true' or 'false')
|
||||||
@ -199,7 +198,7 @@ local function get_action_attr(p, action, is_link)
|
|||||||
local id = action.id
|
local id = action.id
|
||||||
if file then
|
if file then
|
||||||
assert(type(id) == "string")
|
assert(type(id) == "string")
|
||||||
action_attr = action_attr .. "/S/GoToR/D" .. pdf_string(id) .. ">>"
|
action_attr = action_attr .. "/S/GoToR/D" .. pdf_bytestring(id) .. ">>"
|
||||||
else
|
else
|
||||||
local dest = dests[id]
|
local dest = dests[id]
|
||||||
if not dest then
|
if not dest then
|
||||||
@ -207,7 +206,7 @@ local function get_action_attr(p, action, is_link)
|
|||||||
dests[id] = dest
|
dests[id] = dest
|
||||||
end
|
end
|
||||||
if type(id) == "string" then
|
if type(id) == "string" then
|
||||||
action_attr = action_attr .. "/S/GoTo/D" .. pdf_string(id) .. ">>"
|
action_attr = action_attr .. "/S/GoTo/D" .. pdf_bytestring(id) .. ">>"
|
||||||
else
|
else
|
||||||
action_attr = string.format("%s/S/GoTo/D %i 0 R>>", action_attr, dest)
|
action_attr = string.format("%s/S/GoTo/D %i 0 R>>", action_attr, dest)
|
||||||
end
|
end
|
||||||
@ -577,11 +576,11 @@ token.luacmd("pdfextension", function(_, imm)
|
|||||||
local level = token.scan_int()
|
local level = token.scan_int()
|
||||||
local open = token.scan_keyword'open'
|
local open = token.scan_keyword'open'
|
||||||
local title = token.scan_string()
|
local title = token.scan_string()
|
||||||
outline:add(pdf_string(title), action, level, open, attr)
|
outline:add(pdf_text(title), action, level, open, attr)
|
||||||
else
|
else
|
||||||
local count = token.scan_keyword'count' and token.scan_int() or 0
|
local count = token.scan_keyword'count' and token.scan_int() or 0
|
||||||
local title = token.scan_string()
|
local title = token.scan_string()
|
||||||
outline:add_legacy(pdf_string(title), action, count, attr)
|
outline:add_legacy(pdf_text(title), action, count, attr)
|
||||||
end
|
end
|
||||||
elseif token.scan_keyword"dest" then
|
elseif token.scan_keyword"dest" then
|
||||||
local id
|
local id
|
||||||
|
@ -324,7 +324,8 @@ function nodehandler.glue(p, n, x, y, outer, origin, level) -- Naturally this is
|
|||||||
end
|
end
|
||||||
function nodehandler.kern() end
|
function nodehandler.kern() end
|
||||||
function nodehandler.penalty() end
|
function nodehandler.penalty() end
|
||||||
local literalescape = lpeg.Cs((lpeg.S'\\()\r'/{['\\'] = '\\\\', ['('] = '\\(', [')'] = '\\)', ['\r'] = '\\r'}+1)^0)
|
|
||||||
|
local pdf_escape = require'luametalatex-pdf-escape'.escape_raw
|
||||||
local match = lpeg.match
|
local match = lpeg.match
|
||||||
local function do_commands(p, c, f, fid, x, y, outer, ...)
|
local function do_commands(p, c, f, fid, x, y, outer, ...)
|
||||||
local fonts = f.fonts
|
local fonts = f.fonts
|
||||||
@ -407,20 +408,20 @@ function nodehandler.glyph(p, n, x, y, ...)
|
|||||||
-- if f.encodingbytes == -3 then
|
-- if f.encodingbytes == -3 then
|
||||||
if false then
|
if false then
|
||||||
if index < 0x80 then
|
if index < 0x80 then
|
||||||
p.pending[#p.pending+1] = match(literalescape, string.pack('>B', index))
|
p.pending[#p.pending+1] = pdf_escape(string.pack('>B', index))
|
||||||
elseif index < 0x7F80 then
|
elseif index < 0x7F80 then
|
||||||
p.pending[#p.pending+1] = match(literalescape, string.pack('>H', index+0x7F80))
|
p.pending[#p.pending+1] = pdf_escape(string.pack('>H', index+0x7F80))
|
||||||
else
|
else
|
||||||
p.pending[#p.pending+1] = match(literalescape, string.pack('>BH', 0xFF, index-0x7F80))
|
p.pending[#p.pending+1] = pdf_escape(string.pack('>BH', 0xFF, index-0x7F80))
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
p.pending[#p.pending+1] = match(literalescape, string.pack('>H', index))
|
p.pending[#p.pending+1] = pdf_escape(string.pack('>H', index))
|
||||||
end
|
end
|
||||||
if not p.usedglyphs[index] then
|
if not p.usedglyphs[index] then
|
||||||
p.usedglyphs[index] = {index, math.floor(c.width * 1000 / f.size + .5), c.tounicode}
|
p.usedglyphs[index] = {index, math.floor(c.width * 1000 / f.size + .5), c.tounicode}
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
p.pending[#p.pending+1] = match(literalescape, string.char(getchar(n)))
|
p.pending[#p.pending+1] = pdf_escape(string.char(getchar(n)))
|
||||||
if not p.usedglyphs[getchar(n)] then
|
if not p.usedglyphs[getchar(n)] then
|
||||||
p.usedglyphs[getchar(n)] = {getchar(n), math.floor(c.width * 1000 / f.size + .5), c.tounicode}
|
p.usedglyphs[getchar(n)] = {getchar(n), math.floor(c.width * 1000 / f.size + .5), c.tounicode}
|
||||||
end
|
end
|
||||||
|
77
luametalatex-pdf-escape.lua
Normal file
77
luametalatex-pdf-escape.lua
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
local mode = 6
|
||||||
|
-- Control how much escaping is done... the mode is a bitset:
|
||||||
|
-- Bit 0: Disable auto-detection of pre-escaped input
|
||||||
|
-- Bit 1: Convert UTF-8 input to UTF-16
|
||||||
|
-- Bit 2: Actually escape unescaped input instead of assuming that it is safe
|
||||||
|
--
|
||||||
|
-- This currently results in 8 modes. Mode 7 is recommended if you can control
|
||||||
|
-- all new code, otherwise Mode 6 might be required. Code 0 is (mostly) compatible
|
||||||
|
-- with other engines.
|
||||||
|
--
|
||||||
|
-- Also we have three distinct functions which relate to different uses.
|
||||||
|
-- escape_text is for text strings and fully respects the mode.
|
||||||
|
-- escape_bytes is for non-text byte strings and always acts as if Bit 1 is unset
|
||||||
|
-- (after all, UTF-16 doesn't make sense for non-text strings)
|
||||||
|
-- escape_raw always acts like mode 5 without the parens: Just escape, without any
|
||||||
|
-- other auto-detection or conversion. (This is used for actual content text)
|
||||||
|
local function setmode(new)
|
||||||
|
mode = new
|
||||||
|
end
|
||||||
|
local function is_escaped(s)
|
||||||
|
if mode & 1 == 1 then return false end
|
||||||
|
return (mode & 1 == 1) and (s:match("^%(.*%)$") or s:match("^<.*>$")) and true or false
|
||||||
|
end
|
||||||
|
local function to_utf16(s)
|
||||||
|
local i = 3
|
||||||
|
local bytes = {0xFE, 0xFF}
|
||||||
|
for _, c in utf8.codes(s) do
|
||||||
|
if c < 0x10000 then
|
||||||
|
-- assert(c < 0xD800 or c >= 0xE000)
|
||||||
|
bytes[i] = c >> 8
|
||||||
|
bytes[i+1] = c & 0xFF
|
||||||
|
i = i+2
|
||||||
|
else
|
||||||
|
c = c-0x10000
|
||||||
|
bytes[i] = 0xD8 | ((c>>18) & 3)
|
||||||
|
bytes[i+1] = (c>>10) & 0xFF
|
||||||
|
bytes[i+2] = 0xDC | ((c>>8) & 3)
|
||||||
|
bytes[i+3] = c & 0xFF
|
||||||
|
i = i+4
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return string.char(table.unpack(bytes))
|
||||||
|
end
|
||||||
|
-- This is pretty much the minimal escaping possible: Only escape bytes which are
|
||||||
|
local l = lpeg
|
||||||
|
local simple_char = 1-l.S'()\\'
|
||||||
|
local semi_simple_char = simple_char + l.P'\\'/'\\\\'
|
||||||
|
local nested = l.P{'(' * (semi_simple_char + l.V(1))^0 * ')'}
|
||||||
|
local inner = (semi_simple_char + nested + (l.Cc'\\' * l.S'()'))^0 * -1
|
||||||
|
local raw = l.Cs(inner)
|
||||||
|
local patt = l.Cs(l.Cc'(' * inner * l.Cc')')
|
||||||
|
local function escape_bytes(s)
|
||||||
|
if is_escaped(s) then return s end
|
||||||
|
if mode & 4 == 0 then
|
||||||
|
return '(' .. s .. ')'
|
||||||
|
end
|
||||||
|
return patt:match(s)
|
||||||
|
end
|
||||||
|
local function escape_text(s)
|
||||||
|
if is_escaped(s) then return s end
|
||||||
|
if mode & 2 == 2 then
|
||||||
|
s = to_utf16(s)
|
||||||
|
elseif mode & 4 == 0 then
|
||||||
|
return '(' .. s .. ')'
|
||||||
|
end
|
||||||
|
return patt:match(s)
|
||||||
|
end
|
||||||
|
local function escape_raw(s)
|
||||||
|
return raw:match(s)
|
||||||
|
end
|
||||||
|
|
||||||
|
return {
|
||||||
|
escape_raw = escape_raw,
|
||||||
|
escape_bytes = escape_bytes,
|
||||||
|
escape_text = escape_text,
|
||||||
|
setmode = setmode,
|
||||||
|
}
|
@ -14,10 +14,9 @@ local function write(pdf, tree, escaped, step)
|
|||||||
move(tree, #tree+1, 2*#tree-nextcount, nextcount+1)
|
move(tree, #tree+1, 2*#tree-nextcount, nextcount+1)
|
||||||
return write(pdf, tree, escaped, step*6)
|
return write(pdf, tree, escaped, step*6)
|
||||||
end
|
end
|
||||||
local function pdf_string(s)
|
|
||||||
-- Emulate other engines here: If looks like an escaped string, treat it as such. Otherwise, add parenthesis.
|
local pdf_bytestring = require'luametalatex-pdf-escape'.escape_bytes
|
||||||
return s:match("^%(.*%)$") or s:match("^<.*>$") or '(' .. s .. ')'
|
|
||||||
end
|
|
||||||
local serialized = {}
|
local serialized = {}
|
||||||
return function(values, pdf)
|
return function(values, pdf)
|
||||||
local tree = {}
|
local tree = {}
|
||||||
@ -35,7 +34,7 @@ return function(values, pdf)
|
|||||||
local key = tree[6*i+j]
|
local key = tree[6*i+j]
|
||||||
if key then
|
if key then
|
||||||
local value = values[key]
|
local value = values[key]
|
||||||
key = pdf_string(key)
|
key = pdf_bytestring(key)
|
||||||
tree[6*i+j] = key
|
tree[6*i+j] = key
|
||||||
serialized[2*j-1] = key
|
serialized[2*j-1] = key
|
||||||
serialized[2*j] = value
|
serialized[2*j] = value
|
||||||
|
@ -14,7 +14,8 @@ local function written(pdf, num)
|
|||||||
if not num or num == assigned then return end
|
if not num or num == assigned then return end
|
||||||
return num ~= delayed
|
return num ~= delayed
|
||||||
end
|
end
|
||||||
local function stream(pdf, num, dict, content, isfile)
|
-- raw: Pass on preencoded stream. Currently ignored.
|
||||||
|
local function stream(pdf, num, dict, content, isfile, raw)
|
||||||
if not num then num = pdf:getobj() end
|
if not num then num = pdf:getobj() end
|
||||||
if pdf[num] ~= assigned then
|
if pdf[num] ~= assigned then
|
||||||
error[[Invalid object]]
|
error[[Invalid object]]
|
||||||
|
Loading…
Reference in New Issue
Block a user