diff --git a/nselib/idna.lua b/nselib/idna.lua index 1d186c7e7..55ddc464a 100644 --- a/nselib/idna.lua +++ b/nselib/idna.lua @@ -236,6 +236,33 @@ function validate(tableOfTables, checkHyphens) end +--- Breaks the tables of codepoints using a delimiter. +-- +-- @param A table is given as an input which contains codepoints. +-- @param ASCII value of delimiter is provided. +-- @return Returns table of tables after breaking the give table using delimiter. +local function breakInput(codepoints, delimiter) + + local tbl = {} + local output = {} + + local delimiter = delimiter or 0x002E + + for _, v in ipairs(codepoints) do + if v == delimiter then + table.insert(output, tbl) + tbl = {} + else + table.insert(tbl, v) + end + end + + table.insert(output, tbl) + + return output + +end + --- Converts the input codepoints into ASCII text based on IDNA rules. -- -- @param codepoints Table of codepoints of decoded input. @@ -249,13 +276,11 @@ end -- Default: false. -- @param useSTD3ASCIIRules Boolean value to represent ASCII rules. Default: true. -- @param delimiter codepoint of the character to be used as delimiter. --- @param encoder Encoder function to convert a Unicode codepoint into a --- string of bytes. -- @param An decoder function to decode the input string -- into an array of code points. -- @return Returns the IDNA ASCII format of the input. -- @return Throws nil, if there is any error in conversion. -function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder, decoder) +function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, decoder) -- Assigns default values if not specified. if transitionalProcessing == nil then @@ -282,7 +307,6 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch end delimiter = delimiter or 0x002E - encoder = encoder or unicode.utf8_enc decoder = decoder or unicode.utf8_dec local decoded_tbl, disallowedCodePoints = map(codepoints, useSTD3ASCIIRules, transitionalProcessing) @@ -297,7 +321,7 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch end -- Breaks the codepoints into multiple tables using delimiter. - decoded_tbl = punycode.breakInput(decoded_tbl, delimiter) + decoded_tbl = breakInput(decoded_tbl, delimiter) if decoded_tbl == nil then return nil @@ -308,20 +332,16 @@ function toASCII(codepoints, transitionalProcessing, checkHyphens, checkBidi, ch return nil end - local stringLabels = {} - - -- Convert the codepoints into Unicode strings before passing them to mapLabels function. - for _, label in ipairs(decoded_tbl) do - table.insert(stringLabels, unicode.encode(label, encoder)) + for i, label in ipairs(decoded_tbl) do + decoded_tbl[i] = punycode.encode_label(label) end - - return punycode.mapLabels(stringLabels, punycode.encode_label, decoder, unicode.encode({0x002E}, encoder)) + return table.concat(decoded_tbl, ".") end ---- Converts the input into Unicode codepoitns based on IDNA rules. +--- Converts the input into Unicode codepoints based on IDNA rules. -- --- @param codepoints Table of codepoints of decoded input. +-- @param name A domain name in string format -- @param transitionalProcessing Boolean value. Default: true. -- @param checkHyphens Boolean flag for checking hyphens presence in input. -- Default: true. @@ -333,11 +353,9 @@ end -- @param delimiter, codepoint of the character to be used as delimiter. -- @param encoder Encoder function to convert a Unicode codepoint into a -- string of bytes. --- @param An decoder function to decode the input string --- into an array of code points. -- @return Returns the Unicode format of the input based on IDNA rules. -- @return Throws nil, if there is any error in conversion. -function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder, decoder) +function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, delimiter, encoder) -- Assigns default values if not specified. if transitionalProcessing == nil then @@ -358,22 +376,25 @@ function toUnicode(decoded_tbl, transitionalProcessing, checkHyphens, checkBidi, delimiter = delimiter or 0x002E encoder = encoder or unicode.utf8_enc - decoder = decoder or unicode.utf8_dec -- Breaks the codepoints into multiple tables using delimiter. - decoded_tbl = punycode.breakInput(decoded_tbl, delimiter) + decoded_tbl = stdnse.strsplit('%'.. string.char(delimiter), decoded_tbl) if decoded_tbl == nil then return nil end - local stringLabels = {} - - -- Format the codepoints into strings before passing to punycode.mapLabels - for _, label in ipairs(decoded_tbl) do - table.insert(stringLabels, unicode.encode(label, encoder)) + local output = {} + for i, label in ipairs(decoded_tbl) do + local decoded = punycode.decode_label(label) + for j = 1, #decoded do + output[#output+1] = decoded[j] + end + if i < #decoded_tbl then + output[#output+1] = delimiter + end end - return punycode.mapLabels(stringLabels, punycode.decode_label, encoder, unicode.encode({0x002E}, encoder)) + return unicode.encode(output, encoder) end @@ -528,7 +549,7 @@ end for _, v in ipairs(encodingAndDecodingTestCases) do test_suite:add_test(unittest.equal(toASCII(unicode.decode(v[1], unicode.utf8_dec)), v[2])) - test_suite:add_test(unittest.equal(toUnicode(unicode.decode(v[2], unicode.utf8_dec)), v[1])) + test_suite:add_test(unittest.equal(toUnicode(v[2],nil,nil,nil,nil,nil,nil,unicode.utf8_enc), v[1])) end for _, v in ipairs(multipleProcessingTestCases) do diff --git a/nselib/punycode.lua b/nselib/punycode.lua index 41eb264c3..ec389d45a 100644 --- a/nselib/punycode.lua +++ b/nselib/punycode.lua @@ -61,25 +61,6 @@ local delimiter = char("0x2D") -- Convenience shortcuts local baseMinusTMin = base - tMin --- This function finds and replaces matched values in a table. --- --- @param tbl Table of values. --- @param val Value to to be replaced in the table. --- @param new_val Value to be replaced with. --- @return Returns a new table with new values. -local function find_and_replace(tbl, val, new_val) - - for index, data in pairs(tbl) do - if data == val then - tbl[index] = new_val - end - end - - return tbl - -end - - -- Bias adaptation function as per section 3.4 of RFC 3492. -- https://tools.ietf.org/html/rfc3492#section-3.4 -- The following function is adapted from punycode.js by Mathias Bynens @@ -162,18 +143,15 @@ end -- Creates a string based on an array of numeric code points. -- --- @param input String of input to be encoded. +-- @param input list-table of Unicode code points -- @param decoder Sets the decoding format to be used. -- @return The new encoded string -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. -function encode_input(input, decoder) +function encode_input(input) local output = {} - -- Convert the input into an array of Unicode code points. - input = unicode.decode(input, decoder) - -- Cache the length. local inputLength = #input @@ -283,14 +261,13 @@ function encode_input(input, decoder) end -- Converts a Punycode string of ASCII-only symbols to a --- string of Unicode symbols. +-- list-table of Unicode code points. -- -- @param input The Punycode string of ASCII-only symbols. --- @param encoder Defines the type of encoding format to be used. --- @return The resulting string of Unicode symbols. +-- @return The resulting list-table of Unicode code points. -- The following function is adapted from punycode.js by Mathias Bynens -- under the MIT License. -function decode_input(input, encoder) +function decode_input(input) local output = {} local inputLength = #input @@ -397,23 +374,23 @@ function decode_input(input, encoder) i = i + 1 end - return unicode.encode(output, encoder) + return output end -- Performs punycode encoding on a label -- --- @param s String of input to be encoded. --- @param decoder A decoder function to convert the domain into a --- table of Unicode code points. --- @return Returns encoded string. -function encode_label(s, decoder) +-- If the label is already ASCII, it is returned as a string. If any encoding +-- was required, the "xn--" prefix is added. +-- +-- @param u A list-table of Unicode code points representing a domain label +-- @return A punycode-encoded ASCII string +function encode_label(u) local flag = false - local decoded_tbl = unicode.decode(s, decoder) -- Looks for non-ASCII character - for _, val in pairs(decoded_tbl) do + for _, val in pairs(u) do if not (val >=0 and val <= 127) then flag = true @@ -424,7 +401,7 @@ function encode_label(s, decoder) if flag then - local res, err = encode_input(s, decoder) + local res, err = encode_input(u) if err then return nil, err end @@ -432,22 +409,24 @@ function encode_label(s, decoder) return 'xn--' .. res else - return s + return unicode.encode(u, unicode.utf8_enc) end end --- Decodes a punycode-encoded label to Unicode. -- --- @param s String of input --- @param encoder An encoder function to convert a Unicode code point --- into a string of bytes. Default: unicode.utf8_enc --- @return Returns decoded string. -function decode_label(s, encoder) +-- If the label starts with "xn--", it will be punycode-decoded. Otherwise, it +-- will be decoded as UTF-8 (ASCII). The return value is always a table of +-- Unicode code points. +-- +-- @param s String of input. +-- @return A table of Unicode code points. +function decode_label(s) if match(s, "^xn%-%-") then - local res, err = decode_input(sub(s, 5):lower(), encoder) + local res, err = decode_input(sub(s, 5)) if err then return nil, err end @@ -455,67 +434,11 @@ function decode_label(s, encoder) return res else - return s + return unicode.decode(s, unicode.utf8_dec) end end ---- Splits the domain name and maps it with the corresponding data. --- --- @param s The domain name to be processed. --- @param fn The function to be called for every label. --- @param formatter The type of encoder/decoder to be used. --- @param delimiter delimiter character for concatinating output. --- @return Returns encoded/decoded string based on the formatter. --- The following function is adapted from punycode.js by Mathias Bynens --- under the MIT License. -function mapLabels(labels, fn, formatter, delimiter) - - local encoded = {} - - for index, v in ipairs(labels) do - - local res, err = fn(labels[index], formatter) - - if err then - stdnse.debug2(err) - return nil - end - - encoded[index] = res - end - - return table.concat(encoded, delimiter) - -end - ---- Breaks the tables of codepoints using a delimiter. --- --- @param A table is given as an input which contains codepoints. --- @param ASCII value of delimiter is provided. --- @return Returns table of tables after breaking the give table using delimiter. -function breakInput(codepoints, delimiter) - - local tbl = {} - local output = {} - - local delimiter = delimiter or 0x002E - - for _, v in ipairs(codepoints) do - if v == delimiter then - table.insert(output, tbl) - tbl = {} - else - table.insert(tbl, v) - end - end - - table.insert(output, tbl) - - return output - -end - --Ignore the rest if we are not testing. if not unittest.testing() then return _ENV @@ -549,8 +472,8 @@ test_suite = unittest.TestSuite:new() -- Running test cases against Encoding function. for i, v in ipairs(testCases) do - test_suite:add_test(unittest.equal(decode_label(v[1], unicode.utf8_enc), v[2])) - test_suite:add_test(unittest.equal(encode_label(v[2], unicode.utf8_dec), v[1])) + test_suite:add_test(unittest.equal(unicode.encode(decode_label(v[1]), unicode.utf8_enc), v[2])) + test_suite:add_test(unittest.equal(encode_label(unicode.decode(v[2], unicode.utf8_dec)), v[1])) end return _ENV