Merge branch 'master' into kikeflare

This commit is contained in:
Thomas Lynch
2022-11-06 13:43:57 +11:00

View File

@ -1,17 +1,41 @@
-- neturl.lua - a robust url parser and builder -- net/url.lua - a robust url parser and builder
-- --
-- Bertrand Mansion, 2011-2013; License MIT -- Bertrand Mansion, 2011-2021; License MIT
-- @module neturl -- @module net.url
-- @alias M -- @alias M
local M = {} local M = {}
M.version = "0.9.0" M.version = "1.1.0"
--- url options --- url options
-- separator is set to `&` by default but could be anything like `&` or `;` -- - `separator` is set to `&` by default but could be anything like `&` or `;`
-- @todo Add an option to limit the size of the argument table -- - `cumulative_parameters` is false by default. If true, query parameters with the same name will be stored in a table.
-- - `legal_in_path` is a table of characters that will not be url encoded in path components
-- - `legal_in_query` is a table of characters that will not be url encoded in query values. Query parameters only support a small set of legal characters (-_.).
-- - `query_plus_is_space` is true by default, so a plus sign in a query value will be converted to %20 (space), not %2B (plus)
-- @todo Add option to limit the size of the argument table
-- @todo Add option to limit the depth of the argument table
-- @todo Add option to process dots in parameter names, ie. `param.filter=1`
M.options = { M.options = {
separator = '&' separator = '&',
cumulative_parameters = false,
square_bracket_key = false,
max_query_parse_keys = 50,
max_query_parse_length = 32 * 1024,
legal_in_path = {
[":"] = true, ["-"] = true, ["_"] = true, ["."] = true,
["!"] = true, ["~"] = true, ["*"] = true, ["'"] = true,
["("] = true, [")"] = true, ["@"] = true, ["&"] = true,
["="] = true, ["$"] = true, [","] = true,
[";"] = true
},
legal_in_query = {
[":"] = true, ["-"] = true, ["_"] = true, ["."] = true,
[","] = true, ["!"] = true, ["~"] = true, ["*"] = true,
["'"] = true, [";"] = true, ["("] = true, [")"] = true,
["@"] = true, ["$"] = true,
},
query_plus_is_space = true
} }
--- list of known and common scheme ports --- list of known and common scheme ports
@ -47,48 +71,42 @@ M.services = {
videotex = 516 videotex = 516
} }
local legal = { local function decode(str)
["-"] = true, ["_"] = true, ["."] = true, ["!"] = true,
["~"] = true, ["*"] = true, ["'"] = true, ["("] = true,
[")"] = true, [":"] = true, ["@"] = true, ["&"] = true,
["="] = true, ["+"] = true, ["$"] = true, [","] = true,
[";"] = true -- can be used for parameters in path
}
local function decode(str, path)
local str = str
if not path then
str = str:gsub('+', ' ')
end
return (str:gsub("%%(%x%x)", function(c) return (str:gsub("%%(%x%x)", function(c)
return string.char(tonumber(c, 16)) return string.char(tonumber(c, 16))
end)) end))
end end
local function encode(str) local function encode(str, legal)
return (str:gsub("([^A-Za-z0-9%_%.%-%~])", function(v) return (str:gsub("([^%w])", function(v)
if legal[v] then
return v
end
return string.upper(string.format("%%%02x", string.byte(v))) return string.upper(string.format("%%%02x", string.byte(v)))
end)) end))
end end
-- for query values, prefer + instead of %20 for spaces -- for query values, + can mean space if configured as such
local function encodeValue(str) local function decodeValue(str)
local str = encode(str) if M.options.query_plus_is_space then
return str:gsub('%%20', '+') str = str:gsub('+', ' ')
end
return decode(str)
end end
local function encodeSegment(s) local function concat(a, b)
local legalEncode = function(c) if type(a) == 'table' then
if legal[c] then return a:build() .. b
return c else
return a .. b:build()
end end
return encode(c)
end
return s:gsub('([^a-zA-Z0-9])', legalEncode)
end end
local function concat(s, u) function M:addSegment(path)
return s .. u:build() if type(path) == 'string' then
self.path = self.path .. '/' .. encode(path:gsub("^/+", ""), M.options.legal_in_path)
end
return self
end end
--- builds the url --- builds the url
@ -97,7 +115,6 @@ function M:build()
local url = '' local url = ''
if self.path then if self.path then
local path = self.path local path = self.path
path:gsub("([^/]+)", function (s) return encodeSegment(s) end)
url = url .. tostring(path) url = url .. tostring(path)
end end
if self.query then if self.query then
@ -152,17 +169,24 @@ function M.buildQuery(tab, sep, key)
for k in pairs(tab) do for k in pairs(tab) do
keys[#keys+1] = k keys[#keys+1] = k
end end
table.sort(keys) table.sort(keys, function (a, b)
local function padnum(n, rest) return ("%03d"..rest):format(tonumber(n)) end
return tostring(a):gsub("(%d+)(%.)",padnum) < tostring(b):gsub("(%d+)(%.)",padnum)
end)
for _,name in ipairs(keys) do for _,name in ipairs(keys) do
local value = tab[name] local value = tab[name]
name = encode(tostring(name)) name = encode(tostring(name), {["-"] = true, ["_"] = true, ["."] = true})
if key then if key then
if M.options.cumulative_parameters and string.find(name, '^%d+$') then
name = tostring(key)
else
name = string.format('%s[%s]', tostring(key), tostring(name)) name = string.format('%s[%s]', tostring(key), tostring(name))
end end
end
if type(value) == 'table' then if type(value) == 'table' then
query[#query+1] = M.buildQuery(value, sep, name) query[#query+1] = M.buildQuery(value, sep, name)
else else
local value = encodeValue(tostring(value)) local value = encode(tostring(value), M.options.legal_in_query)
if value ~= "" then if value ~= "" then
query[#query+1] = string.format('%s=%s', name, value) query[#query+1] = string.format('%s=%s', name, value)
else else
@ -181,24 +205,34 @@ end
-- @todo limit the max number of parameters with M.options.max_parameters -- @todo limit the max number of parameters with M.options.max_parameters
-- @return a table representing the query key/value pairs -- @return a table representing the query key/value pairs
function M.parseQuery(str, sep) function M.parseQuery(str, sep)
if #str > M.options.max_query_parse_length then
return
end
if not sep then if not sep then
sep = M.options.separator or '&' sep = M.options.separator or '&'
end end
local values = {} local values = {}
local parts = 0
for key,val in str:gmatch(string.format('([^%q=]+)(=*[^%q=]*)', sep, sep)) do for key,val in str:gmatch(string.format('([^%q=]+)(=*[^%q=]*)', sep, sep)) do
local key = decode(key) if parts > M.options.max_query_parse_keys then
break
end
local key = decodeValue(key)
local keys = {} local keys = {}
if M.options.square_bracket_key then
key = key:gsub('%[([^%]]*)%]', function(v) key = key:gsub('%[([^%]]*)%]', function(v)
-- extract keys between balanced brackets -- extract keys between balanced brackets
if string.find(v, "^-?%d+$") then if string.find(v, "^-?%d+$") then
v = tonumber(v) v = tonumber(v)
else else
v = decode(v) v = decodeValue(v)
end end
table.insert(keys, v) table.insert(keys, v)
return "=" return "="
end) end)
end
key = key:gsub('=+.*$', "") key = key:gsub('=+.*$', "")
key = key:gsub('%s', "_") -- remove spaces in parameter name key = key:gsub('%s', "_") -- remove spaces in parameter name
val = val:gsub('^=+', "") val = val:gsub('^=+', "")
@ -209,7 +243,11 @@ function M.parseQuery(str, sep)
if #keys > 0 and type(values[key]) ~= 'table' then if #keys > 0 and type(values[key]) ~= 'table' then
values[key] = {} values[key] = {}
elseif #keys == 0 and type(values[key]) == 'table' then elseif #keys == 0 and type(values[key]) == 'table' then
values[key] = decode(val) values[key] = decodeValue(val)
elseif M.options.cumulative_parameters
and type(values[key]) == 'string' then
values[key] = { values[key] }
table.insert(values[key], decodeValue(val))
end end
local t = values[key] local t = values[key]
@ -224,10 +262,11 @@ function M.parseQuery(str, sep)
t[k] = {} t[k] = {}
end end
if i == #keys then if i == #keys then
t[k] = decode(val) t[k] = val
end end
t = t[k] t = t[k]
end end
parts = parts + 1
end end
setmetatable(values, { __tostring = M.buildQuery }) setmetatable(values, { __tostring = M.buildQuery })
return values return values
@ -261,26 +300,69 @@ function M:setAuthority(authority)
self.userinfo = v self.userinfo = v
return '' return ''
end) end)
authority = authority:gsub("^%[[^%]]+%]", function(v)
-- ipv6 authority = authority:gsub(':(%d+)$', function(v)
self.host = v
return ''
end)
authority = authority:gsub(':([^:]*)$', function(v)
self.port = tonumber(v) self.port = tonumber(v)
return '' return ''
end) end)
if authority ~= '' and not self.host then
self.host = authority:lower() local function getIP(str)
-- ipv4
local chunks = { str:match("^(%d+)%.(%d+)%.(%d+)%.(%d+)$") }
if #chunks == 4 then
for _, v in pairs(chunks) do
if tonumber(v) > 255 then
return false
end end
end
return str
end
-- ipv6
local chunks = { str:match("^%["..(("([a-fA-F0-9]*):"):rep(8):gsub(":$","%%]$"))) }
if #chunks == 8 or #chunks < 8 and
str:match('::') and not str:gsub("::", "", 1):match('::') then
for _,v in pairs(chunks) do
if #v > 0 and tonumber(v, 16) > 65535 then
return false
end
end
return str
end
return nil
end
local ip = getIP(authority)
if ip then
self.host = ip
elseif type(ip) == 'nil' then
-- domain
if authority ~= '' and not self.host then
local host = authority:lower()
if string.match(host, '^[%d%a%-%.]+$') ~= nil and
string.sub(host, 0, 1) ~= '.' and
string.sub(host, -1) ~= '.' and
string.find(host, '%.%.') == nil then
self.host = host
end
end
end
if self.userinfo then if self.userinfo then
local userinfo = self.userinfo local userinfo = self.userinfo
userinfo = userinfo:gsub(':([^:]*)$', function(v) userinfo = userinfo:gsub(':([^:]*)$', function(v)
self.password = v self.password = v
return '' return ''
end) end)
if string.find(userinfo, "^[%w%+%.]+$") then
self.user = userinfo self.user = userinfo
else
-- incorrect userinfo
self.userinfo = nil
self.user = nil
self.password = nil
end end
end
return authority return authority
end end
@ -312,13 +394,15 @@ function M.parse(url)
M.setAuthority(comp, v) M.setAuthority(comp, v)
return '' return ''
end) end)
comp.path = decode(url, true)
comp.path = url:gsub("([^/]+)", function (s) return encode(decode(s), M.options.legal_in_path) end)
setmetatable(comp, { setmetatable(comp, {
__index = M, __index = M,
__tostring = M.build,
__concat = concat, __concat = concat,
__tostring = M.build} __div = M.addSegment
) })
return comp return comp
end end
@ -370,13 +454,14 @@ function M.removeDotSegments(path)
return ret return ret
end end
local function absolutePath(base_path, relative_path) local function reducePath(base_path, relative_path)
if string.sub(relative_path, 1, 1) == "/" then if string.sub(relative_path, 1, 1) == "/" then
return '/' .. string.gsub(relative_path, '^[%./]+', '') return '/' .. string.gsub(relative_path, '^[%./]+', '')
end end
local path = base_path local path = base_path
local startslash = string.sub(path, 1, 1) ~= "/";
if relative_path ~= "" then if relative_path ~= "" then
path = '/'..path:gsub("[^/]*$", "") path = (startslash and '' or '/') .. path:gsub("[^/]*$", "")
end end
path = path .. relative_path path = path .. relative_path
path = path:gsub("([^/]*%./)", function (s) path = path:gsub("([^/]*%./)", function (s)
@ -398,7 +483,7 @@ local function absolutePath(base_path, relative_path)
reduced = path reduced = path
path = string.gsub(reduced, '^/?%.%./', '') path = string.gsub(reduced, '^/?%.%./', '')
end end
return '/' .. path return (startslash and '' or '/') .. path
end end
--- builds a new url by using the one given as parameter and resolving paths --- builds a new url by using the one given as parameter and resolving paths
@ -424,7 +509,7 @@ function M:resolve(other)
other.query = self.query other.query = self.query
end end
else else
other.path = absolutePath(self.path, other.path) other.path = reducePath(self.path, other.path)
end end
end end
return other return other
@ -440,7 +525,7 @@ function M:normalize()
end end
if self.path then if self.path then
local path = self.path local path = self.path
path = absolutePath(path, "") path = reducePath(path, "")
-- normalize multiple slashes -- normalize multiple slashes
path = string.gsub(path, "//+", "/") path = string.gsub(path, "//+", "/")
self.path = path self.path = path