Fix escape sequence parser for decoding strings

Improved logic to parse escape sequences when decoding strings. The former gsub logic is replaced by a clean token parser.

This fixes the problem that the parser tries to decode any ``\u`` sequence (also invalid unicode escape sequences) when a valid escape sequence is found somewhere else in the string.

MWE:

```lua
json = require("json")
json.decode("{\"fail\":\"\\\\url{http://www.example.com/} vs. \\u0023\"}")
```
pull/22/head
sfeuerstein-op 2020-03-12 13:49:19 +01:00 committed by GitHub
parent d1e3b0f5d0
commit 673d41a4fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 15 additions and 24 deletions

View File

@ -216,52 +216,43 @@ end
local function parse_string(str, i)
local has_unicode_escape = false
local has_surrogate_escape = false
local has_escape = false
local last
for j = i + 1, #str do
local s = ""
local j = i + 1
while j <= #str do
local x = str:byte(j)
if x < 32 then
decode_error(str, j, "control character in string")
end
if last == 92 then -- "\\" (escape char)
if x == 117 then -- "u" (unicode escape sequence)
local hex = str:sub(j + 1, j + 5)
if x == 92 then -- "\\" (escape char)
if str:byte(j + 1) == 117 then -- "u" (unicode escape sequence)
local hex = str:sub(j + 2, j + 6)
if not hex:find("%x%x%x%x") then
decode_error(str, j, "invalid unicode escape in string")
end
if hex:find("^[dD][89aAbB]") then
has_surrogate_escape = true
s = s .. parse_unicode_escape(str:sub(j, j + 12))
j = j + 12
else
has_unicode_escape = true
s = s .. parse_unicode_escape(str:sub(j, j + 6))
j = j + 6
end
else
local c = string.char(x)
local c = str:sub(j + 1, j + 1)
if not escape_chars[c] then
decode_error(str, j, "invalid escape char '" .. c .. "' in string")
end
has_escape = true
s = s .. escape_char_map_inv[str:sub(j, j + 1)]
j = j + 2
end
last = nil
elseif x == 34 then -- '"' (end of string)
local s = str:sub(i + 1, j - 1)
if has_surrogate_escape then
s = s:gsub("\\u[dD][89aAbB]..\\u....", parse_unicode_escape)
end
if has_unicode_escape then
s = s:gsub("\\u....", parse_unicode_escape)
end
if has_escape then
s = s:gsub("\\.", escape_char_map_inv)
end
return s, j + 1
else
last = x
s = s .. string.char(x)
j = j + 1
end
end
decode_error(str, i, "expected closing quote for string")