texmod.read: Unescaped ^ in (...) laxness

This commit is contained in:
Lars Mueller 2023-06-02 16:19:56 +02:00
parent 7d35b4d9e8
commit b56f030127
2 changed files with 25 additions and 14 deletions

@ -17,13 +17,13 @@ function metatable:__tostring()
return table.concat(rope) return table.concat(rope)
end end
function texmod.read_string(str) function texmod.read_string(str, warn --[[function(warn_str)]])
local i = 0 local i = 0
return texmod.read(function() return texmod.read(function()
i = i + 1 i = i + 1
if i > #str then return end if i > #str then return end
return str:sub(i, i) return str:sub(i, i)
end) end, warn)
end end
return texmod return texmod

@ -250,18 +250,24 @@ end
-- Reader methods. We use `r` instead of the `self` "sugar" for consistency (and to save us some typing). -- Reader methods. We use `r` instead of the `self` "sugar" for consistency (and to save us some typing).
local rm = {} local rm = {}
function rm.peek(r) function rm.peek(r, parenthesized)
if r.eof then return end if r.eof then return end
local expected_escapes = 0 local expected_escapes = 0
if r.level > 0 then if r.level > 0 then
-- Premature optimization my beloved (this is `2^(level-1)`) -- Premature optimization my beloved (this is `2^(level-1)`)
expected_escapes = math.ldexp(0.5, r.level) expected_escapes = math.ldexp(0.5, r.level)
end end
if r.character:match"[&^:]" then if r.character:match"[&^:]" then -- "special" characters - these need to be escaped
if r.escapes == expected_escapes then return r.character end if r.escapes == expected_escapes then
return r.character
elseif parenthesized and r.character == "^" and r.escapes < expected_escapes then
-- Special handling for `^` inside `(...)`: This is undocumented behavior but works in Minetest
r.warn"parenthesized caret (`^`) with too few escapes"
return r.character
end
elseif r.escapes <= expected_escapes then elseif r.escapes <= expected_escapes then
return r.character return r.character
elseif r.escapes >= 2*expected_escapes then end if r.escapes >= 2*expected_escapes then
return "\\" return "\\"
end end
end end
@ -301,8 +307,11 @@ function rm.expect(r, char)
error(("%q expected"):format(char)) error(("%q expected"):format(char))
end end
end end
function rm.hat(r) function rm.hat(r, parenthesized)
return r:match(r.invcube and "&" or "^") if r:peek(parenthesized) == (r.invcube and "&" or "^") then
r:pop()
return true
end
end end
function rm.match_charset(r, set) function rm.match_charset(r, set)
local char = r:peek() local char = r:peek()
@ -354,7 +363,7 @@ function rm.invcubeside(r)
end end
function rm.basexp(r) function rm.basexp(r)
if r:match"(" then if r:match"(" then
local res = r:texp() local res = r:texp(true)
r:expect")" r:expect")"
return res return res
end end
@ -372,9 +381,9 @@ function rm.colorspec(r)
-- Leave exact validation up to colorspec, only do a rough greedy charset matching -- Leave exact validation up to colorspec, only do a rough greedy charset matching
return assert(colorspec.from_string(r:match_str"[#%x%a]")) return assert(colorspec.from_string(r:match_str"[#%x%a]"))
end end
function rm.texp(r) function rm.texp(r, parenthesized)
local base = r:basexp() local base = r:basexp() -- TODO (?) make optional - warn about omitting the base
while r:hat() do while r:hat(parenthesized) do
if r:match"[" then if r:match"[" then
local reader_subtrie = texmod_reader_trie local reader_subtrie = texmod_reader_trie
while true do while true do
@ -404,15 +413,17 @@ function rm.texp(r)
end end
local mt = {__index = rm} local mt = {__index = rm}
return function(read_char) return function(read_char, warn --[[function(str)]])
local r = setmetatable({ local r = setmetatable({
level = 0, level = 0,
invcube = false, invcube = false,
parenthesized = false,
eof = false, eof = false,
read_char = read_char, read_char = read_char,
warn = warn or error,
}, mt) }, mt)
r:popchar() r:popchar()
local res = r:texp() local res = r:texp(false)
assert(r.eof, "eof expected") assert(r.eof, "eof expected")
return res return res
end end