From 2e76cfb067718d59ef7026fcf2b8e1586622d749 Mon Sep 17 00:00:00 2001 From: rxi Date: Sun, 5 Apr 2020 11:53:21 +0100 Subject: [PATCH] Fixed bug in `parse_string`'s escape sequence handling Fixes #22 --- json.lua | 84 ++++++++++++++++++++++++-------------------------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/json.lua b/json.lua index 098e7b2..7a4f20e 100644 --- a/json.lua +++ b/json.lua @@ -31,23 +31,23 @@ local json = { _version = "0.1.2" } local encode local escape_char_map = { - [ "\\" ] = "\\\\", - [ "\"" ] = "\\\"", - [ "\b" ] = "\\b", - [ "\f" ] = "\\f", - [ "\n" ] = "\\n", - [ "\r" ] = "\\r", - [ "\t" ] = "\\t", + [ "\\" ] = "\\", + [ "\"" ] = "\"", + [ "\b" ] = "b", + [ "\f" ] = "f", + [ "\n" ] = "n", + [ "\r" ] = "r", + [ "\t" ] = "t", } -local escape_char_map_inv = { [ "\\/" ] = "/" } +local escape_char_map_inv = { [ "/" ] = "/" } for k, v in pairs(escape_char_map) do escape_char_map_inv[v] = k end local function escape_char(c) - return escape_char_map[c] or string.format("\\u%04x", c:byte()) + return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte())) end @@ -204,9 +204,9 @@ end local function parse_unicode_escape(s) - local n1 = tonumber( s:sub(3, 6), 16 ) - local n2 = tonumber( s:sub(9, 12), 16 ) - -- Surrogate pair? + local n1 = tonumber( s:sub(1, 4), 16 ) + local n2 = tonumber( s:sub(7, 10), 16 ) + -- Surrogate pair? if n2 then return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000) else @@ -216,54 +216,42 @@ end local function parse_string(str, i) - local has_unicode_escape = false - local has_surrogate_escape = false - local has_escape = false - local last - for j = i + 1, #str do + local res = "" + local j = i + 1 + local k = j + + while j <= #str do local x = str:byte(j) if x < 32 then decode_error(str, j, "control character in string") - end - if last == 92 then -- "\\" (escape char) - if x == 117 then -- "u" (unicode escape sequence) - local hex = str:sub(j + 1, j + 5) - if not hex:find("%x%x%x%x") then - decode_error(str, j, "invalid unicode escape in string") - end - if hex:find("^[dD][89aAbB]") then - has_surrogate_escape = true - else - has_unicode_escape = true - end + elseif x == 92 then -- `\`: Escape + res = res .. str:sub(k, j - 1) + j = j + 1 + local c = str:sub(j, j) + if c == "u" then + local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1) + or str:match("^%x%x%x%x", j + 1) + or decode_error(str, j - 1, "invalid unicode escape in string") + res = res .. parse_unicode_escape(hex) + j = j + #hex else - local c = string.char(x) if not escape_chars[c] then - decode_error(str, j, "invalid escape char '" .. c .. "' in string") + decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string") end - has_escape = true + res = res .. escape_char_map_inv[c] end - last = nil + k = j + 1 - elseif x == 34 then -- '"' (end of string) - local s = str:sub(i + 1, j - 1) - if has_surrogate_escape then - s = s:gsub("\\u[dD][89aAbB]..\\u....", parse_unicode_escape) - end - if has_unicode_escape then - s = s:gsub("\\u....", parse_unicode_escape) - end - if has_escape then - s = s:gsub("\\.", escape_char_map_inv) - end - return s, j + 1 - - else - last = x + elseif x == 34 then -- `"`: End of string + res = res .. str:sub(k, j - 1) + return res, j + 1 end + + j = j + 1 end + decode_error(str, i, "expected closing quote for string") end