Fixed bug in `parse_string`'s escape sequence handling

Fixes #22
pull/24/head
rxi 2020-04-05 11:53:21 +01:00
parent 8aa60078ca
commit 2e76cfb067
1 changed files with 36 additions and 48 deletions

View File

@ -31,23 +31,23 @@ local json = { _version = "0.1.2" }
local encode
local escape_char_map = {
[ "\\" ] = "\\\\",
[ "\"" ] = "\\\"",
[ "\b" ] = "\\b",
[ "\f" ] = "\\f",
[ "\n" ] = "\\n",
[ "\r" ] = "\\r",
[ "\t" ] = "\\t",
[ "\\" ] = "\\",
[ "\"" ] = "\"",
[ "\b" ] = "b",
[ "\f" ] = "f",
[ "\n" ] = "n",
[ "\r" ] = "r",
[ "\t" ] = "t",
}
local escape_char_map_inv = { [ "\\/" ] = "/" }
local escape_char_map_inv = { [ "/" ] = "/" }
for k, v in pairs(escape_char_map) do
escape_char_map_inv[v] = k
end
local function escape_char(c)
return escape_char_map[c] or string.format("\\u%04x", c:byte())
return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte()))
end
@ -204,8 +204,8 @@ end
local function parse_unicode_escape(s)
local n1 = tonumber( s:sub(3, 6), 16 )
local n2 = tonumber( s:sub(9, 12), 16 )
local n1 = tonumber( s:sub(1, 4), 16 )
local n2 = tonumber( s:sub(7, 10), 16 )
-- Surrogate pair?
if n2 then
return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000)
@ -216,54 +216,42 @@ end
local function parse_string(str, i)
local has_unicode_escape = false
local has_surrogate_escape = false
local has_escape = false
local last
for j = i + 1, #str do
local res = ""
local j = i + 1
local k = j
while j <= #str do
local x = str:byte(j)
if x < 32 then
decode_error(str, j, "control character in string")
end
if last == 92 then -- "\\" (escape char)
if x == 117 then -- "u" (unicode escape sequence)
local hex = str:sub(j + 1, j + 5)
if not hex:find("%x%x%x%x") then
decode_error(str, j, "invalid unicode escape in string")
end
if hex:find("^[dD][89aAbB]") then
has_surrogate_escape = true
elseif x == 92 then -- `\`: Escape
res = res .. str:sub(k, j - 1)
j = j + 1
local c = str:sub(j, j)
if c == "u" then
local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1)
or str:match("^%x%x%x%x", j + 1)
or decode_error(str, j - 1, "invalid unicode escape in string")
res = res .. parse_unicode_escape(hex)
j = j + #hex
else
has_unicode_escape = true
end
else
local c = string.char(x)
if not escape_chars[c] then
decode_error(str, j, "invalid escape char '" .. c .. "' in string")
decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string")
end
has_escape = true
res = res .. escape_char_map_inv[c]
end
last = nil
k = j + 1
elseif x == 34 then -- '"' (end of string)
local s = str:sub(i + 1, j - 1)
if has_surrogate_escape then
s = s:gsub("\\u[dD][89aAbB]..\\u....", parse_unicode_escape)
elseif x == 34 then -- `"`: End of string
res = res .. str:sub(k, j - 1)
return res, j + 1
end
if has_unicode_escape then
s = s:gsub("\\u....", parse_unicode_escape)
end
if has_escape then
s = s:gsub("\\.", escape_char_map_inv)
end
return s, j + 1
else
last = x
end
j = j + 1
end
decode_error(str, i, "expected closing quote for string")
end