Improved indentation by using tab. Added module variable SPARSELIMIT to control the length threshold of sparse arrays. Some bug fixes and improvements as suggested by @appgurueu

pull/28/head
Milind Gupta 2023-11-27 20:38:05 -08:00
parent 75127d2c7d
commit 0e6634a54e
1 changed files with 210 additions and 212 deletions

422
json.lua
View File

@ -22,7 +22,10 @@
-- SOFTWARE. -- SOFTWARE.
-- --
local json = { _version = "0.1.2" } local json = {
_version = "0.1.2" ,
SPARSELIMIT = 100, -- To set the limit on how long sparse arrays can be
}
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- Encode -- Encode
@ -31,28 +34,28 @@ local json = { _version = "0.1.2" }
local encode local encode
local escape_char_map = { local escape_char_map = {
[ "\\" ] = "\\", [ "\\" ] = "\\",
[ "\"" ] = "\"", [ "\"" ] = "\"",
[ "\b" ] = "b", [ "\b" ] = "b",
[ "\f" ] = "f", [ "\f" ] = "f",
[ "\n" ] = "n", [ "\n" ] = "n",
[ "\r" ] = "r", [ "\r" ] = "r",
[ "\t" ] = "t", [ "\t" ] = "t",
} }
local escape_char_map_inv = { [ "/" ] = "/" } local escape_char_map_inv = { [ "/" ] = "/" }
for k, v in pairs(escape_char_map) do for k, v in pairs(escape_char_map) do
escape_char_map_inv[v] = k escape_char_map_inv[v] = k
end end
local function escape_char(c) local function escape_char(c)
return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte())) return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte()))
end end
local function encode_nil(val) local function encode_nil(val)
return "null" return "null"
end end
@ -70,7 +73,7 @@ local function encode_table(val, stack)
local nLen = 0 local nLen = 0
local count = 0 local count = 0
for k,v in pairs(val) do for k,v in pairs(val) do
if (type(k) ~= "number" or k<=0) and not (k == "n" and type(v) == "number") then if (type(k) ~= "number" or k<=0 or k%1 ~= 0) and not (k == "n" and type(v) == "number") then
array = nil array = nil
break -- Treat as object break -- Treat as object
else else
@ -87,7 +90,7 @@ local function encode_table(val, stack)
if nLen > length then if nLen > length then
length = nLen length = nLen
end end
if array and not (length > 100 and count ~= length) then -- Check Array detected but sparse > 100 length then treat as object if array and not (length > SPARSELIMIT and count ~= length) then -- Check Array detected but sparse > 100 length then treat as object
-- Encode -- Encode
for i=1,length do for i=1,length do
table.insert(res, encode(val[i], stack)) table.insert(res, encode(val[i], stack))
@ -97,11 +100,6 @@ local function encode_table(val, stack)
else else
-- Treat as an object -- Treat as an object
for k, v in pairs(val) do for k, v in pairs(val) do
--[[
if type(k) ~= "string" then
error("invalid table: mixed or invalid key types")
end
]]
table.insert(res, encode(k, stack) .. ":" .. encode(v, stack)) table.insert(res, encode(k, stack) .. ":" .. encode(v, stack))
end end
stack[val] = nil stack[val] = nil
@ -116,35 +114,35 @@ end
local function encode_number(val) local function encode_number(val)
-- Check for NaN, -inf and inf -- Check for NaN, -inf and inf
if val ~= val or val <= -math.huge or val >= math.huge then if val ~= val or val <= -math.huge or val >= math.huge then
error("unexpected number value '" .. tostring(val) .. "'") error("unexpected number value '" .. tostring(val) .. "'")
end end
return tostring(val) return string.format("%.14g",val)
end end
local type_func_map = { local type_func_map = {
[ "nil" ] = encode_nil, [ "nil" ] = encode_nil,
[ "table" ] = encode_table, [ "table" ] = encode_table,
[ "string" ] = encode_string, [ "string" ] = encode_string,
[ "number" ] = encode_number, [ "number" ] = encode_number,
[ "boolean" ] = tostring, [ "boolean" ] = tostring,
} }
encode = function(val, stack) encode = function(val, stack)
local t = type(val) local t = type(val)
local f = type_func_map[t] local f = type_func_map[t]
if f then if f then
return f(val, stack) return f(val, stack)
end end
error("unexpected type '" .. t .. "'") error("unexpected type '" .. t .. "'")
end end
function json.encode(val) function json.encode(val)
return ( encode(val) ) return ( encode(val) )
end end
@ -155,11 +153,11 @@ end
local parse local parse
local function create_set(...) local function create_set(...)
local res = {} local res = {}
for i = 1, select("#", ...) do for i = 1, select("#", ...) do
res[ select(i, ...) ] = true res[ select(i, ...) ] = true
end end
return res return res
end end
local space_chars = create_set(" ", "\t", "\r", "\n") local space_chars = create_set(" ", "\t", "\r", "\n")
@ -168,232 +166,232 @@ local escape_chars = create_set("\\", "/", '"', "b", "f", "n", "r", "t", "u")
local literals = create_set("true", "false", "null") local literals = create_set("true", "false", "null")
local literal_map = { local literal_map = {
[ "true" ] = true, [ "true" ] = true,
[ "false" ] = false, [ "false" ] = false,
[ "null" ] = nil, [ "null" ] = nil,
} }
local function next_char(str, idx, set, negate) local function next_char(str, idx, set, negate)
for i = idx, #str do for i = idx, #str do
if set[str:sub(i, i)] ~= negate then if set[str:sub(i, i)] ~= negate then
return i return i
end end
end end
return #str + 1 return #str + 1
end end
local function decode_error(str, idx, msg) local function decode_error(str, idx, msg)
local line_count = 1 local line_count = 1
local col_count = 1 local col_count = 1
for i = 1, idx - 1 do for i = 1, idx - 1 do
col_count = col_count + 1 col_count = col_count + 1
if str:sub(i, i) == "\n" then if str:sub(i, i) == "\n" then
line_count = line_count + 1 line_count = line_count + 1
col_count = 1 col_count = 1
end end
end end
error( string.format("%s at line %d col %d", msg, line_count, col_count) ) error( string.format("%s at line %d col %d", msg, line_count, col_count) )
end end
local function codepoint_to_utf8(n) local function codepoint_to_utf8(n)
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa -- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
local f = math.floor local f = math.floor
if n <= 0x7f then if n <= 0x7f then
return string.char(n) return string.char(n)
elseif n <= 0x7ff then elseif n <= 0x7ff then
return string.char(f(n / 64) + 192, n % 64 + 128) return string.char(f(n / 64) + 192, n % 64 + 128)
elseif n <= 0xffff then elseif n <= 0xffff then
return string.char(f(n / 4096) + 224, f(n % 4096 / 64) + 128, n % 64 + 128) return string.char(f(n / 4096) + 224, f(n % 4096 / 64) + 128, n % 64 + 128)
elseif n <= 0x10ffff then elseif n <= 0x10ffff then
return string.char(f(n / 262144) + 240, f(n % 262144 / 4096) + 128, return string.char(f(n / 262144) + 240, f(n % 262144 / 4096) + 128,
f(n % 4096 / 64) + 128, n % 64 + 128) f(n % 4096 / 64) + 128, n % 64 + 128)
end end
error( string.format("invalid unicode codepoint '%x'", n) ) error( string.format("invalid unicode codepoint '%x'", n) )
end end
local function parse_unicode_escape(s) local function parse_unicode_escape(s)
local n1 = tonumber( s:sub(1, 4), 16 ) local n1 = tonumber( s:sub(1, 4), 16 )
local n2 = tonumber( s:sub(7, 10), 16 ) local n2 = tonumber( s:sub(7, 10), 16 )
-- Surrogate pair? -- Surrogate pair?
if n2 then if n2 then
return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000) return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000)
else else
return codepoint_to_utf8(n1) return codepoint_to_utf8(n1)
end end
end end
local function parse_string(str, i) local function parse_string(str, i)
local res = "" local res = ""
local j = i + 1 local j = i + 1
local k = j local k = j
while j <= #str do while j <= #str do
local x = str:byte(j) local x = str:byte(j)
if x < 32 then if x < 32 then
decode_error(str, j, "control character in string") decode_error(str, j, "control character in string")
elseif x == 92 then -- `\`: Escape elseif x == 92 then -- `\`: Escape
res = res .. str:sub(k, j - 1) res = res .. str:sub(k, j - 1)
j = j + 1 j = j + 1
local c = str:sub(j, j) local c = str:sub(j, j)
if c == "u" then if c == "u" then
local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1) local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1)
or str:match("^%x%x%x%x", j + 1) or str:match("^%x%x%x%x", j + 1)
or decode_error(str, j - 1, "invalid unicode escape in string") or decode_error(str, j - 1, "invalid unicode escape in string")
res = res .. parse_unicode_escape(hex) res = res .. parse_unicode_escape(hex)
j = j + #hex j = j + #hex
else else
if not escape_chars[c] then if not escape_chars[c] then
decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string") decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string")
end end
res = res .. escape_char_map_inv[c] res = res .. escape_char_map_inv[c]
end end
k = j + 1 k = j + 1
elseif x == 34 then -- `"`: End of string elseif x == 34 then -- `"`: End of string
res = res .. str:sub(k, j - 1) res = res .. str:sub(k, j - 1)
return res, j + 1 return res, j + 1
end end
j = j + 1 j = j + 1
end end
decode_error(str, i, "expected closing quote for string") decode_error(str, i, "expected closing quote for string")
end end
local function parse_number(str, i) local function parse_number(str, i)
local x = next_char(str, i, delim_chars) local x = next_char(str, i, delim_chars)
local s = str:sub(i, x - 1) local s = str:sub(i, x - 1)
local n = tonumber(s) local n = tonumber(s)
if not n then if not n then
decode_error(str, i, "invalid number '" .. s .. "'") decode_error(str, i, "invalid number '" .. s .. "'")
end end
return n, x return n, x
end end
local function parse_literal(str, i) local function parse_literal(str, i)
local x = next_char(str, i, delim_chars) local x = next_char(str, i, delim_chars)
local word = str:sub(i, x - 1) local word = str:sub(i, x - 1)
if not literals[word] then if not literals[word] then
decode_error(str, i, "invalid literal '" .. word .. "'") decode_error(str, i, "invalid literal '" .. word .. "'")
end end
return literal_map[word], x return literal_map[word], x
end end
local function parse_array(str, i) local function parse_array(str, i)
local res = {} local res = {}
local n = 1 local n = 1
i = i + 1 i = i + 1
while 1 do while 1 do
local x local x
i = next_char(str, i, space_chars, true) i = next_char(str, i, space_chars, true)
-- Empty / end of array? -- Empty / end of array?
if str:sub(i, i) == "]" then if str:sub(i, i) == "]" then
i = i + 1 i = i + 1
break break
end end
-- Read token -- Read token
x, i = parse(str, i) x, i = parse(str, i)
res[n] = x res[n] = x
n = n + 1 n = n + 1
-- Next token -- Next token
i = next_char(str, i, space_chars, true) i = next_char(str, i, space_chars, true)
local chr = str:sub(i, i) local chr = str:sub(i, i)
i = i + 1 i = i + 1
if chr == "]" then break end if chr == "]" then break end
if chr ~= "," then decode_error(str, i, "expected ']' or ','") end if chr ~= "," then decode_error(str, i, "expected ']' or ','") end
end end
return res, i return res, i
end end
local function parse_object(str, i) local function parse_object(str, i)
local res = {} local res = {}
i = i + 1 i = i + 1
while 1 do while 1 do
local key, val local key, val
i = next_char(str, i, space_chars, true) i = next_char(str, i, space_chars, true)
-- Empty / end of object? -- Empty / end of object?
if str:sub(i, i) == "}" then if str:sub(i, i) == "}" then
i = i + 1 i = i + 1
break break
end end
-- Read key -- Read key
if str:sub(i, i) ~= '"' then if str:sub(i, i) ~= '"' then
decode_error(str, i, "expected string for key") decode_error(str, i, "expected string for key")
end end
key, i = parse(str, i) key, i = parse(str, i)
-- Read ':' delimiter -- Read ':' delimiter
i = next_char(str, i, space_chars, true) i = next_char(str, i, space_chars, true)
if str:sub(i, i) ~= ":" then if str:sub(i, i) ~= ":" then
decode_error(str, i, "expected ':' after key") decode_error(str, i, "expected ':' after key")
end end
i = next_char(str, i + 1, space_chars, true) i = next_char(str, i + 1, space_chars, true)
-- Read value -- Read value
val, i = parse(str, i) val, i = parse(str, i)
-- Set -- Set
res[key] = val res[key] = val
-- Next token -- Next token
i = next_char(str, i, space_chars, true) i = next_char(str, i, space_chars, true)
local chr = str:sub(i, i) local chr = str:sub(i, i)
i = i + 1 i = i + 1
if chr == "}" then break end if chr == "}" then break end
if chr ~= "," then decode_error(str, i, "expected '}' or ','") end if chr ~= "," then decode_error(str, i, "expected '}' or ','") end
end end
return res, i return res, i
end end
local char_func_map = { local char_func_map = {
[ '"' ] = parse_string, [ '"' ] = parse_string,
[ "0" ] = parse_number, [ "0" ] = parse_number,
[ "1" ] = parse_number, [ "1" ] = parse_number,
[ "2" ] = parse_number, [ "2" ] = parse_number,
[ "3" ] = parse_number, [ "3" ] = parse_number,
[ "4" ] = parse_number, [ "4" ] = parse_number,
[ "5" ] = parse_number, [ "5" ] = parse_number,
[ "6" ] = parse_number, [ "6" ] = parse_number,
[ "7" ] = parse_number, [ "7" ] = parse_number,
[ "8" ] = parse_number, [ "8" ] = parse_number,
[ "9" ] = parse_number, [ "9" ] = parse_number,
[ "-" ] = parse_number, [ "-" ] = parse_number,
[ "t" ] = parse_literal, [ "t" ] = parse_literal,
[ "f" ] = parse_literal, [ "f" ] = parse_literal,
[ "n" ] = parse_literal, [ "n" ] = parse_literal,
[ "[" ] = parse_array, [ "[" ] = parse_array,
[ "{" ] = parse_object, [ "{" ] = parse_object,
} }
parse = function(str, idx) parse = function(str, idx)
local chr = str:sub(idx, idx) local chr = str:sub(idx, idx)
local f = char_func_map[chr] local f = char_func_map[chr]
if f then if f then
return f(str, idx) return f(str, idx)
end end
decode_error(str, idx, "unexpected character '" .. chr .. "'") decode_error(str, idx, "unexpected character '" .. chr .. "'")
end end
function json.decode(str) function json.decode(str)
if type(str) ~= "string" then if type(str) ~= "string" then
error("expected argument of type string, got " .. type(str)) error("expected argument of type string, got " .. type(str))
end end
local res, idx = parse(str, next_char(str, 1, space_chars, true)) local res, idx = parse(str, next_char(str, 1, space_chars, true))
idx = next_char(str, idx, space_chars, true) idx = next_char(str, idx, space_chars, true)
if idx <= #str then if idx <= #str then
decode_error(str, idx, "trailing garbage") decode_error(str, idx, "trailing garbage")
end end
return res return res
end end