diff --git a/deps/ada.cpp b/deps/ada.cpp index 5eb0a3f..449046f 100644 --- a/deps/ada.cpp +++ b/deps/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-09-05 16:55:45 -0400. Do not edit! */ +/* auto-generated on 2023-11-19 13:35:02 -0500. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -7,62 +7,79 @@ namespace ada::checkers { ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { - size_t last_dot = view.rfind('.'); - if (last_dot == view.size() - 1) { + // The string is not empty and does not contain upper case ASCII characters. + // + // Optimization. To be considered as a possible ipv4, the string must end + // with 'x' or a lowercase hex character. + // Most of the time, this will be false so this simple check will save a lot + // of effort. + char last_char = view.back(); + // If the address ends with a dot, we need to prune it (special case). + if (last_char == '.') { view.remove_suffix(1); - last_dot = view.rfind('.'); + if (view.empty()) { + return false; + } + last_char = view.back(); } - std::string_view number = - (last_dot == std::string_view::npos) ? view : view.substr(last_dot + 1); - if (number.empty()) { + bool possible_ipv4 = (last_char >= '0' && last_char <= '9') || + (last_char >= 'a' && last_char <= 'f') || + last_char == 'x'; + if (!possible_ipv4) { return false; } + // From the last character, find the last dot. + size_t last_dot = view.rfind('.'); + if (last_dot != std::string_view::npos) { + // We have at least one dot. + view = view.substr(last_dot + 1); + } /** Optimization opportunity: we have basically identified the last number of the ipv4 if we return true here. We might as well parse it and have at least one number parsed when we get to parse_ipv4. */ - if (std::all_of(number.begin(), number.end(), ada::checkers::is_digit)) { + if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) { + return true; + } + // It could be hex (0x), but not if there is a single character. + if (view.size() == 1) { + return false; + } + // It must start with 0x. + if (!std::equal(view.begin(), view.begin() + 2, "0x")) { + return false; + } + // We must allow "0x". + if (view.size() == 2) { return true; } - return (checkers::has_hex_prefix(number) && - std::all_of(number.begin() + 2, number.end(), - ada::unicode::is_lowercase_hex)); + // We have 0x followed by some characters, we need to check that they are + // hexadecimals. + return std::all_of(view.begin() + 2, view.end(), + ada::unicode::is_lowercase_hex); } // for use with path_signature, we include all characters that need percent // encoding. -static constexpr uint8_t path_signature_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -static_assert(path_signature_table[uint8_t('?')] == 1); -static_assert(path_signature_table[uint8_t('`')] == 1); -static_assert(path_signature_table[uint8_t('{')] == 1); -static_assert(path_signature_table[uint8_t('}')] == 1); -// -static_assert(path_signature_table[uint8_t(' ')] == 1); -static_assert(path_signature_table[uint8_t('?')] == 1); -static_assert(path_signature_table[uint8_t('"')] == 1); -static_assert(path_signature_table[uint8_t('#')] == 1); -static_assert(path_signature_table[uint8_t('<')] == 1); -static_assert(path_signature_table[uint8_t('>')] == 1); -static_assert(path_signature_table[uint8_t('\\')] == 2); -static_assert(path_signature_table[uint8_t('.')] == 4); -static_assert(path_signature_table[uint8_t('%')] == 8); - -// -static_assert(path_signature_table[0] == 1); -static_assert(path_signature_table[31] == 1); -static_assert(path_signature_table[127] == 1); -static_assert(path_signature_table[128] == 1); -static_assert(path_signature_table[255] == 1); +static constexpr std::array path_signature_table = + []() constexpr { + std::array result{}; + for (size_t i = 0; i < 256; i++) { + if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e || + i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7b || i == 0x7d || + i > 0x7e) { + result[i] = 1; + } else if (i == 0x25) { + result[i] = 8; + } else if (i == 0x2e) { + result[i] = 4; + } else if (i == 0x5c) { + result[i] = 2; + } else { + result[i] = 0; + } + } + return result; + }(); ada_really_inline constexpr uint8_t path_signature( std::string_view input) noexcept { @@ -116,7 +133,7 @@ ada_really_inline constexpr bool verify_dns_length( ADA_PUSH_DISABLE_ALL_WARNINGS /* begin file src/ada_idna.cpp */ -/* auto-generated on 2023-08-29 15:28:19 -0400. Do not edit! */ +/* auto-generated on 2023-09-19 15:58:51 -0400. Do not edit! */ /* begin file src/idna.cpp */ /* begin file src/unicode_transcoding.cpp */ @@ -9505,18 +9522,19 @@ bool is_label_valid(const std::u32string_view label) { namespace ada::idna { -bool constexpr begins_with(std::u32string_view view, - std::u32string_view prefix) { +bool begins_with(std::u32string_view view, std::u32string_view prefix) { if (view.size() < prefix.size()) { return false; } + // constexpr as of C++20 return std::equal(prefix.begin(), prefix.end(), view.begin()); } -bool constexpr begins_with(std::string_view view, std::string_view prefix) { +bool begins_with(std::string_view view, std::string_view prefix) { if (view.size() < prefix.size()) { return false; } + // constexpr as of C++20 return std::equal(prefix.begin(), prefix.end(), view.begin()); } @@ -9809,6 +9827,17 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept { #if ADA_NEON ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { + // first check for short strings in which case we do it naively. + if (user_input.size() < 16) { // slow path + for (size_t i = 0; i < user_input.size(); i++) { + if (user_input[i] == '\r' || user_input[i] == '\n' || + user_input[i] == '\t') { + return true; + } + } + return false; + } + // fast path for long strings (expected to be common) size_t i = 0; const uint8x16_t mask1 = vmovq_n_u8('\r'); const uint8x16_t mask2 = vmovq_n_u8('\n'); @@ -9821,9 +9850,8 @@ ada_really_inline bool has_tabs_or_newline( vceqq_u8(word, mask3)); } if (i < user_input.size()) { - uint8_t buffer[16]{}; - memcpy(buffer, user_input.data() + i, user_input.size() - i); - uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i); + uint8x16_t word = + vld1q_u8((const uint8_t*)user_input.data() + user_input.length() - 16); running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1), vceqq_u8(word, mask2))), vceqq_u8(word, mask3)); @@ -9833,6 +9861,17 @@ ada_really_inline bool has_tabs_or_newline( #elif ADA_SSE2 ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { + // first check for short strings in which case we do it naively. + if (user_input.size() < 16) { // slow path + for (size_t i = 0; i < user_input.size(); i++) { + if (user_input[i] == '\r' || user_input[i] == '\n' || + user_input[i] == '\t') { + return true; + } + } + return false; + } + // fast path for long strings (expected to be common) size_t i = 0; const __m128i mask1 = _mm_set1_epi8('\r'); const __m128i mask2 = _mm_set1_epi8('\n'); @@ -9846,9 +9885,8 @@ ada_really_inline bool has_tabs_or_newline( _mm_cmpeq_epi8(word, mask3)); } if (i < user_input.size()) { - alignas(16) uint8_t buffer[16]{}; - memcpy(buffer, user_input.data() + i, user_input.size() - i); - __m128i word = _mm_load_si128((const __m128i*)buffer); + __m128i word = _mm_loadu_si128( + (const __m128i*)(user_input.data() + user_input.length() - 16)); running = _mm_or_si128( _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1), _mm_cmpeq_epi8(word, mask2))), @@ -9891,56 +9929,36 @@ ada_really_inline bool has_tabs_or_newline( // U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>), // U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or // U+007C (|). -constexpr static bool is_forbidden_host_code_point_table[] = { - 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -static_assert(sizeof(is_forbidden_host_code_point_table) == 256); +constexpr static std::array is_forbidden_host_code_point_table = + []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|'}) { + result[c] = true; + } + return result; + }(); ada_really_inline constexpr bool is_forbidden_host_code_point( const char c) noexcept { return is_forbidden_host_code_point_table[uint8_t(c)]; } -static_assert(unicode::is_forbidden_host_code_point('\0')); -static_assert(unicode::is_forbidden_host_code_point('\t')); -static_assert(unicode::is_forbidden_host_code_point('\n')); -static_assert(unicode::is_forbidden_host_code_point('\r')); -static_assert(unicode::is_forbidden_host_code_point(' ')); -static_assert(unicode::is_forbidden_host_code_point('#')); -static_assert(unicode::is_forbidden_host_code_point('/')); -static_assert(unicode::is_forbidden_host_code_point(':')); -static_assert(unicode::is_forbidden_host_code_point('?')); -static_assert(unicode::is_forbidden_host_code_point('@')); -static_assert(unicode::is_forbidden_host_code_point('[')); -static_assert(unicode::is_forbidden_host_code_point('?')); -static_assert(unicode::is_forbidden_host_code_point('<')); -static_assert(unicode::is_forbidden_host_code_point('>')); -static_assert(unicode::is_forbidden_host_code_point('\\')); -static_assert(unicode::is_forbidden_host_code_point(']')); -static_assert(unicode::is_forbidden_host_code_point('^')); -static_assert(unicode::is_forbidden_host_code_point('|')); - -constexpr static uint8_t is_forbidden_domain_code_point_table[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +constexpr static std::array is_forbidden_domain_code_point_table = + []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|', '%'}) { + result[c] = true; + } + for (uint8_t c = 0; c <= 32; c++) { + result[c] = true; + } + for (size_t c = 127; c < 255; c++) { + result[c] = true; + } + return result; + }(); static_assert(sizeof(is_forbidden_domain_code_point_table) == 256); @@ -9965,22 +9983,24 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point( return accumulator; } -constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256); -static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2); -static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2); +constexpr static std::array + is_forbidden_domain_code_point_table_or_upper = []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|', '%'}) { + result[c] = 1; + } + for (uint8_t c = 'A'; c <= 'Z'; c++) { + result[c] = 2; + } + for (uint8_t c = 0; c <= 32; c++) { + result[c] = 1; + } + for (size_t c = 127; c < 255; c++) { + result[c] = 1; + } + return result; + }(); ada_really_inline constexpr uint8_t contains_forbidden_domain_code_point_or_upper(const char* input, @@ -10004,41 +10024,22 @@ contains_forbidden_domain_code_point_or_upper(const char* input, return accumulator; } -static_assert(unicode::is_forbidden_domain_code_point('%')); -static_assert(unicode::is_forbidden_domain_code_point('\x7f')); -static_assert(unicode::is_forbidden_domain_code_point('\0')); -static_assert(unicode::is_forbidden_domain_code_point('\t')); -static_assert(unicode::is_forbidden_domain_code_point('\n')); -static_assert(unicode::is_forbidden_domain_code_point('\r')); -static_assert(unicode::is_forbidden_domain_code_point(' ')); -static_assert(unicode::is_forbidden_domain_code_point('#')); -static_assert(unicode::is_forbidden_domain_code_point('/')); -static_assert(unicode::is_forbidden_domain_code_point(':')); -static_assert(unicode::is_forbidden_domain_code_point('?')); -static_assert(unicode::is_forbidden_domain_code_point('@')); -static_assert(unicode::is_forbidden_domain_code_point('[')); -static_assert(unicode::is_forbidden_domain_code_point('?')); -static_assert(unicode::is_forbidden_domain_code_point('<')); -static_assert(unicode::is_forbidden_domain_code_point('>')); -static_assert(unicode::is_forbidden_domain_code_point('\\')); -static_assert(unicode::is_forbidden_domain_code_point(']')); -static_assert(unicode::is_forbidden_domain_code_point('^')); -static_assert(unicode::is_forbidden_domain_code_point('|')); - -constexpr static bool is_alnum_plus_table[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -static_assert(sizeof(is_alnum_plus_table) == 256); +// std::isalnum(c) || c == '+' || c == '-' || c == '.') is true for +constexpr static std::array is_alnum_plus_table = []() constexpr { + std::array result{}; + for (size_t c = 0; c < 256; c++) { + if (c >= '0' && c <= '9') { + result[c] = true; + } else if (c >= 'a' && c <= 'z') { + result[c] = true; + } else if (c >= 'A' && c <= 'Z') { + result[c] = true; + } else if (c == '+' || c == '-' || c == '.') { + result[c] = true; + } + } + return result; +}(); ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { return is_alnum_plus_table[uint8_t(c)]; @@ -10046,13 +10047,6 @@ ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { // following under most compilers: return // return (std::isalnum(c) || c == '+' || c == '-' || c == '.'); } -static_assert(unicode::is_alnum_plus('+')); -static_assert(unicode::is_alnum_plus('-')); -static_assert(unicode::is_alnum_plus('.')); -static_assert(unicode::is_alnum_plus('0')); -static_assert(unicode::is_alnum_plus('1')); -static_assert(unicode::is_alnum_plus('a')); -static_assert(unicode::is_alnum_plus('b')); ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || @@ -10596,155 +10590,302 @@ ada_really_inline void resize(std::string_view& input, size_t pos) noexcept { input.remove_suffix(input.size() - pos); } -// Reverse the byte order. -ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept { - // performance: this often compiles to a single instruction (e.g., bswap) - return ((((val)&0xff00000000000000ull) >> 56) | - (((val)&0x00ff000000000000ull) >> 40) | - (((val)&0x0000ff0000000000ull) >> 24) | - (((val)&0x000000ff00000000ull) >> 8) | - (((val)&0x00000000ff000000ull) << 8) | - (((val)&0x0000000000ff0000ull) << 24) | - (((val)&0x000000000000ff00ull) << 40) | - (((val)&0x00000000000000ffull) << 56)); -} - -ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept { - // performance: under little-endian systems (most systems), this function - // is free (just returns the input). -#if ADA_IS_BIG_ENDIAN - return swap_bytes(val); -#else - return val; // unchanged (trivial) -#endif +// computes the number of trailing zeroes +// this is a private inline function only defined in this source file. +ada_really_inline int trailing_zeroes(uint32_t input_num) noexcept { +#ifdef ADA_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward(&ret, input_num); + return (int)ret; +#else // ADA_REGULAR_VISUAL_STUDIO + return __builtin_ctzl(input_num); +#endif // ADA_REGULAR_VISUAL_STUDIO } // starting at index location, this finds the next location of a character // :, /, \\, ? or [. If none is found, view.size() is returned. // For use within get_host_delimiter_location. +#if ADA_NEON +// The ada_make_uint8x16_t macro is necessary because Visual Studio does not +// support direct initialization of uint8x16_t. See +// https://developercommunity.visualstudio.com/t/error-C2078:-too-many-initializers-whe/402911?q=backend+neon +#ifndef ada_make_uint8x16_t +#define ada_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + static uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif + ada_really_inline size_t find_next_host_delimiter_special( std::string_view view, size_t location) noexcept { - // performance: if you plan to call find_next_host_delimiter more than once, - // you *really* want find_next_host_delimiter to be inlined, because - // otherwise, the constants may get reloaded each time (bad). - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '\\' || + view[i] == '?' || view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + auto to_bitmask = [](uint8x16_t input) -> uint16_t { + uint8x16_t bit_mask = + ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, + 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); }; + + // fast path for long strings (expected to be common) size_t i = location; - uint64_t mask1 = broadcast(':'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('\\'); - uint64_t mask4 = broadcast('?'); - uint64_t mask5 = broadcast('['); - // This loop will get autovectorized under many optimizing compilers, - // so you get actually SIMD! - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - // performance: the next memcpy translates into a single CPU instruction. - memcpy(&word, view.data() + i, sizeof(word)); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4) | - has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t low_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x04, 0x04, 0x00, 0x00, 0x03); + uint8x16_t high_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + uint8x16_t fmask = vmovq_n_u8(0xf); + uint8x16_t zero{0}; + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return i + trailing_zeroes(is_non_zero); } } + if (i < view.size()) { - uint64_t word{}; - // performance: the next memcpy translates into a function call, but - // that is difficult to avoid. Might be a bit expensive. - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4) | - has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t word = + vld1q_u8((const uint8_t*)view.data() + view.length() - 16); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return view.length() - 16 + trailing_zeroes(is_non_zero); } } - return view.size(); + return size_t(view.size()); } +#elif ADA_SSE2 +ada_really_inline size_t find_next_host_delimiter_special( + std::string_view view, size_t location) noexcept { + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '\\' || + view[i] == '?' || view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + // fast path for long strings (expected to be common) + size_t i = location; + const __m128i mask1 = _mm_set1_epi8(':'); + const __m128i mask2 = _mm_set1_epi8('/'); + const __m128i mask3 = _mm_set1_epi8('\\'); + const __m128i mask4 = _mm_set1_epi8('?'); + const __m128i mask5 = _mm_set1_epi8('['); + + for (; i + 15 < view.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m3 = _mm_cmpeq_epi8(word, mask3); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128( + _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return i + trailing_zeroes(mask); + } + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m3 = _mm_cmpeq_epi8(word, mask3); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128( + _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return view.length() - 16 + trailing_zeroes(mask); + } + } + return size_t(view.length()); +} +#else +// : / [ \\ ? +static constexpr std::array special_host_delimiters = + []() constexpr { + std::array result{}; + for (int i : {':', '/', '[', '\\', '?'}) { + result[i] = 1; + } + return result; + }(); +// credit: @the-moisrex recommended a table-based approach +ada_really_inline size_t find_next_host_delimiter_special( + std::string_view view, size_t location) noexcept { + auto const str = view.substr(location); + for (auto pos = str.begin(); pos != str.end(); ++pos) { + if (special_host_delimiters[(uint8_t)*pos]) { + return pos - str.begin() + location; + } + } + return size_t(view.size()); +} +#endif // starting at index location, this finds the next location of a character // :, /, ? or [. If none is found, view.size() is returned. // For use within get_host_delimiter_location. +#if ADA_NEON ada_really_inline size_t find_next_host_delimiter(std::string_view view, size_t location) noexcept { - // performance: if you plan to call find_next_host_delimiter more than once, - // you *really* want find_next_host_delimiter to be inlined, because - // otherwise, the constants may get reloaded each time (bad). - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '?' || + view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + auto to_bitmask = [](uint8x16_t input) -> uint16_t { + uint8x16_t bit_mask = + ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, + 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); }; + + // fast path for long strings (expected to be common) size_t i = location; - uint64_t mask1 = broadcast(':'); - uint64_t mask2 = broadcast('/'); - uint64_t mask4 = broadcast('?'); - uint64_t mask5 = broadcast('['); - // This loop will get autovectorized under many optimizing compilers, - // so you get actually SIMD! - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - // performance: the next memcpy translates into a single CPU instruction. - memcpy(&word, view.data() + i, sizeof(word)); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor4) | has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t low_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x03); + uint8x16_t high_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + uint8x16_t fmask = vmovq_n_u8(0xf); + uint8x16_t zero{0}; + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return i + trailing_zeroes(is_non_zero); } } + if (i < view.size()) { - uint64_t word{}; - // performance: the next memcpy translates into a function call, but - // that is difficult to avoid. Might be a bit expensive. - memcpy(&word, view.data() + i, view.size() - i); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor4) | has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t word = + vld1q_u8((const uint8_t*)view.data() + view.length() - 16); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return view.length() - 16 + trailing_zeroes(is_non_zero); } } - return view.size(); + return size_t(view.size()); } +#elif ADA_SSE2 +ada_really_inline size_t find_next_host_delimiter(std::string_view view, + size_t location) noexcept { + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '?' || + view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + // fast path for long strings (expected to be common) + size_t i = location; + const __m128i mask1 = _mm_set1_epi8(':'); + const __m128i mask2 = _mm_set1_epi8('/'); + const __m128i mask4 = _mm_set1_epi8('?'); + const __m128i mask5 = _mm_set1_epi8('['); + + for (; i + 15 < view.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5)); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return i + trailing_zeroes(mask); + } + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5)); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return view.length() - 16 + trailing_zeroes(mask); + } + } + return size_t(view.length()); +} +#else +// : / [ ? +static constexpr std::array host_delimiters = []() constexpr { + std::array result{}; + for (int i : {':', '/', '?', '['}) { + result[i] = 1; + } + return result; +}(); +// credit: @the-moisrex recommended a table-based approach +ada_really_inline size_t find_next_host_delimiter(std::string_view view, + size_t location) noexcept { + auto const str = view.substr(location); + for (auto pos = str.begin(); pos != str.end(); ++pos) { + if (host_delimiters[(uint8_t)*pos]) { + return pos - str.begin() + location; + } + } + return size_t(view.size()); +} +#endif ada_really_inline std::pair get_host_delimiter_location( const bool is_special, std::string_view& view) noexcept { @@ -11019,101 +11160,47 @@ ada_really_inline void strip_trailing_spaces_from_opaque_path( url.update_base_pathname(path); } +// @ / \\ ? +static constexpr std::array authority_delimiter_special = + []() constexpr { + std::array result{}; + for (int i : {'@', '/', '\\', '?'}) { + result[i] = 1; + } + return result; + }(); +// credit: @the-moisrex recommended a table-based approach ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept { - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; - size_t i = 0; - uint64_t mask1 = broadcast('@'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('?'); - uint64_t mask4 = broadcast('\\'); - - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - memcpy(&word, view.data() + i, sizeof(word)); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); - } - } - - if (i < view.size()) { - uint64_t word{}; - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + // performance note: we might be able to gain further performance + // with SIMD instrinsics. + for (auto pos = view.begin(); pos != view.end(); ++pos) { + if (authority_delimiter_special[(uint8_t)*pos]) { + return pos - view.begin(); } } - - return view.size(); + return size_t(view.size()); } +// @ / ? +static constexpr std::array authority_delimiter = []() constexpr { + std::array result{}; + for (int i : {'@', '/', '?'}) { + result[i] = 1; + } + return result; +}(); +// credit: @the-moisrex recommended a table-based approach ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept { - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; - size_t i = 0; - uint64_t mask1 = broadcast('@'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('?'); - - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - memcpy(&word, view.data() + i, sizeof(word)); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t is_match = - has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + // performance note: we might be able to gain further performance + // with SIMD instrinsics. + for (auto pos = view.begin(); pos != view.end(); ++pos) { + if (authority_delimiter[(uint8_t)*pos]) { + return pos - view.begin(); } } - - if (i < view.size()) { - uint64_t word{}; - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t is_match = - has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); - } - } - - return view.size(); + return size_t(view.size()); } } // namespace ada::helpers @@ -11122,6 +11209,7 @@ namespace ada { ada_warn_unused std::string to_string(ada::state state) { return ada::helpers::get_state(state); } +#undef ada_make_uint8x16_t } // namespace ada /* end file src/helpers.cpp */ /* begin file src/url.cpp */ @@ -11653,10 +11741,9 @@ ada_really_inline void url::parse_path(std::string_view input) { path = "/"; } } - return; } -std::string url::to_string() const { +[[nodiscard]] std::string url::to_string() const { if (!is_valid) { return "null"; } @@ -11776,7 +11863,7 @@ namespace ada { return host.value_or(""); } -[[nodiscard]] const std::string_view url::get_pathname() const noexcept { +[[nodiscard]] std::string_view url::get_pathname() const noexcept { return path; } @@ -11864,7 +11951,7 @@ bool url::set_host_or_hostname(const std::string_view input) { } // Let host be the result of host parsing host_view with url is not special. - if (host_view.empty()) { + if (host_view.empty() && !is_special()) { host = ""; return true; } @@ -12962,7 +13049,7 @@ template url_aggregator parse_url( namespace ada { -bool url_components::check_offset_consistency() const noexcept { +[[nodiscard]] bool url_components::check_offset_consistency() const noexcept { /** * https://user:pass@example.com:1234/foo/bar?baz#quux * | | | | ^^^^| | | @@ -13038,7 +13125,7 @@ bool url_components::check_offset_consistency() const noexcept { return true; } -std::string url_components::to_string() const { +[[nodiscard]] std::string url_components::to_string() const { std::string answer; auto back = std::back_insert_iterator(answer); answer.append("{\n"); @@ -13625,13 +13712,12 @@ bool url_aggregator::set_host_or_hostname(const std::string_view input) { // empty string, and either url includes credentials or url's port is // non-null, return. else if (host_view.empty() && - (is_special() || has_credentials() || - components.port != url_components::omitted)) { + (is_special() || has_credentials() || has_port())) { return false; } // Let host be the result of host parsing host_view with url is not special. - if (host_view.empty()) { + if (host_view.empty() && !is_special()) { if (has_hostname()) { clear_hostname(); // easy! } else if (has_dash_dot()) { @@ -13827,7 +13913,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, 0, components.protocol_end); } -std::string ada::url_aggregator::to_string() const { +[[nodiscard]] std::string ada::url_aggregator::to_string() const { ada_log("url_aggregator::to_string buffer:", buffer, "[", buffer.size(), " bytes]"); if (!is_valid) { @@ -14272,7 +14358,7 @@ bool url_aggregator::parse_opaque_host(std::string_view input) { return true; } -std::string url_aggregator::to_diagram() const { +[[nodiscard]] std::string url_aggregator::to_diagram() const { if (!is_valid) { return "invalid"; } @@ -14429,7 +14515,7 @@ std::string url_aggregator::to_diagram() const { return answer; } -bool url_aggregator::validate() const noexcept { +[[nodiscard]] bool url_aggregator::validate() const noexcept { if (!is_valid) { return true; } @@ -14797,6 +14883,11 @@ ada::result& get_instance(void* result) noexcept { extern "C" { typedef void* ada_url; +typedef void* ada_url_search_params; +typedef void* ada_strings; +typedef void* ada_url_search_params_keys_iter; +typedef void* ada_url_search_params_values_iter; +typedef void* ada_url_search_params_entries_iter; struct ada_string { const char* data; @@ -14808,6 +14899,11 @@ struct ada_owned_string { size_t length; }; +struct ada_string_pair { + ada_string key; + ada_string value; +}; + ada_string ada_string_create(const char* data, size_t length) { ada_string out{}; out.data = data; @@ -15247,6 +15343,256 @@ ada_owned_string ada_idna_to_ascii(const char* input, size_t length) { return owned; } +ada_url_search_params ada_parse_search_params(const char* input, + size_t length) { + return new ada::result( + ada::url_search_params(std::string_view(input, length))); +} + +void ada_free_search_params(ada_url_search_params result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_owned_string ada_search_params_to_string(ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) return ada_owned_string{NULL, 0}; + std::string out = r->to_string(); + ada_owned_string owned{}; + owned.length = out.size(); + owned.data = new char[owned.length]; + memcpy((void*)owned.data, out.data(), owned.length); + return owned; +} + +size_t ada_search_params_size(ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) return 0; + return r->size(); +} + +void ada_search_params_sort(ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (r) r->sort(); +} + +void ada_search_params_append(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->append(std::string_view(key, key_length), + std::string_view(value, value_length)); + } +} + +void ada_search_params_set(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->set(std::string_view(key, key_length), + std::string_view(value, value_length)); + } +} + +void ada_search_params_remove(ada_url_search_params result, const char* key, + size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->remove(std::string_view(key, key_length)); + } +} + +void ada_search_params_remove_value(ada_url_search_params result, + const char* key, size_t key_length, + const char* value, size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->remove(std::string_view(key, key_length), + std::string_view(value, value_length)); + } +} + +bool ada_search_params_has(ada_url_search_params result, const char* key, + size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (!r) return false; + return r->has(std::string_view(key, key_length)); +} + +bool ada_search_params_has_value(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (!r) return false; + return r->has(std::string_view(key, key_length), + std::string_view(value, value_length)); +} + +ada_string ada_search_params_get(ada_url_search_params result, const char* key, + size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (!r) return ada_string_create(NULL, 0); + auto found = r->get(std::string_view(key, key_length)); + if (!found.has_value()) return ada_string_create(NULL, 0); + return ada_string_create(found->data(), found->length()); +} + +ada_strings ada_search_params_get_all(ada_url_search_params result, + const char* key, size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result>( + std::vector()); + } + return new ada::result>( + r->get_all(std::string_view(key, key_length))); +} + +ada_url_search_params_keys_iter ada_search_params_get_keys( + ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result( + ada::url_search_params_keys_iter()); + } + return new ada::result(r->get_keys()); +} + +ada_url_search_params_values_iter ada_search_params_get_values( + ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result( + ada::url_search_params_values_iter()); + } + return new ada::result(r->get_values()); +} + +ada_url_search_params_entries_iter ada_search_params_get_entries( + ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result( + ada::url_search_params_entries_iter()); + } + return new ada::result(r->get_entries()); +} + +void ada_free_strings(ada_strings result) { + ada::result>* r = + (ada::result>*)result; + delete r; +} + +size_t ada_strings_size(ada_strings result) { + ada::result>* r = + (ada::result>*)result; + if (!r) return 0; + return (*r)->size(); +} + +ada_string ada_strings_get(ada_strings result, size_t index) { + ada::result>* r = + (ada::result>*)result; + if (!r) return ada_string_create(NULL, 0); + std::string_view view = (*r)->at(index); + return ada_string_create(view.data(), view.length()); +} + +void ada_free_search_params_keys_iter(ada_url_search_params_keys_iter result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_string ada_search_params_keys_iter_next( + ada_url_search_params_keys_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return ada_string_create(NULL, 0); + auto next = (*r)->next(); + if (!next.has_value()) return ada_string_create(NULL, 0); + return ada_string_create(next->data(), next->length()); +} + +bool ada_search_params_keys_iter_has_next( + ada_url_search_params_keys_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return false; + return (*r)->has_next(); +} + +void ada_free_search_params_values_iter( + ada_url_search_params_values_iter result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_string ada_search_params_values_iter_next( + ada_url_search_params_values_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return ada_string_create(NULL, 0); + auto next = (*r)->next(); + if (!next.has_value()) return ada_string_create(NULL, 0); + return ada_string_create(next->data(), next->length()); +} + +bool ada_search_params_values_iter_has_next( + ada_url_search_params_values_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return false; + return (*r)->has_next(); +} + +void ada_free_search_params_entries_iter( + ada_url_search_params_entries_iter result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_string_pair ada_search_params_entries_iter_next( + ada_url_search_params_entries_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return {ada_string_create(NULL, 0), ada_string_create(NULL, 0)}; + auto next = (*r)->next(); + if (!next.has_value()) + return {ada_string_create(NULL, 0), ada_string_create(NULL, 0)}; + return ada_string_pair{ + ada_string_create(next->first.data(), next->first.length()), + ada_string_create(next->second.data(), next->second.length())}; +} + +bool ada_search_params_entries_iter_has_next( + ada_url_search_params_entries_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return false; + return (*r)->has_next(); +} + } // extern "C" /* end file src/ada_c.cpp */ /* end file src/ada.cpp */ diff --git a/deps/ada.h b/deps/ada.h index 1485e37..a4aa39d 100644 --- a/deps/ada.h +++ b/deps/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-09-05 16:55:45 -0400. Do not edit! */ +/* auto-generated on 2023-11-19 13:35:02 -0500. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -8,7 +8,7 @@ #define ADA_H /* begin file include/ada/ada_idna.h */ -/* auto-generated on 2023-08-29 15:28:19 -0400. Do not edit! */ +/* auto-generated on 2023-09-19 15:58:51 -0400. Do not edit! */ /* begin file include/idna.h */ #ifndef ADA_IDNA_H #define ADA_IDNA_H @@ -98,7 +98,7 @@ namespace ada::idna { /** * @see https://www.unicode.org/reports/tr46/#Validity_Criteria */ -bool is_label_valid(const std::u32string_view label); +bool is_label_valid(std::u32string_view label); } // namespace ada::idna @@ -129,9 +129,8 @@ std::string to_ascii(std::string_view ut8_string); // https://url.spec.whatwg.org/#forbidden-domain-code-point bool contains_forbidden_domain_code_point(std::string_view ascii_string); -bool constexpr begins_with(std::u32string_view view, - std::u32string_view prefix); -bool constexpr begins_with(std::string_view view, std::string_view prefix); +bool begins_with(std::u32string_view view, std::u32string_view prefix); +bool begins_with(std::string_view view, std::string_view prefix); bool constexpr is_ascii(std::u32string_view view); bool constexpr is_ascii(std::string_view view); @@ -480,14 +479,14 @@ namespace ada { #endif // ADA_COMMON_DEFS_H /* end file include/ada/common_defs.h */ -#include +#include /** * @namespace ada::character_sets * @brief Includes the definitions for unicode character sets. */ namespace ada::character_sets { -ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i); +ada_really_inline bool bit_at(const uint8_t a[], uint8_t i); } // namespace ada::character_sets #endif // ADA_CHARACTER_SETS_H @@ -997,7 +996,7 @@ ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i) { } // namespace ada::character_sets -#endif // ADA_CHARACTER_SETS_H +#endif // ADA_CHARACTER_SETS_INL_H /* end file include/ada/character_sets-inl.h */ /* begin file include/ada/checkers-inl.h */ /** @@ -1055,9 +1054,10 @@ inline constexpr bool is_normalized_windows_drive_letter( return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':')); } -ada_really_inline constexpr bool begins_with(std::string_view view, - std::string_view prefix) { +ada_really_inline bool begins_with(std::string_view view, + std::string_view prefix) { // in C++20, you have view.begins_with(prefix) + // std::equal is constexpr in C++20 return view.size() >= prefix.size() && std::equal(prefix.begin(), prefix.end(), view.begin()); } @@ -1312,12 +1312,12 @@ struct url_components { * @return true if the offset values are * consistent with a possible URL string */ - bool check_offset_consistency() const noexcept; + [[nodiscard]] bool check_offset_consistency() const noexcept; /** * Converts a url_components to JSON stringified version. */ - std::string to_string() const; + [[nodiscard]] std::string to_string() const; }; // struct url_components @@ -1505,13 +1505,17 @@ struct url_base { * @return On failure, it returns zero. * @see https://url.spec.whatwg.org/#host-parsing */ - virtual ada_really_inline size_t parse_port( - std::string_view view, bool check_trailing_content = false) noexcept = 0; + virtual size_t parse_port(std::string_view view, + bool check_trailing_content) noexcept = 0; + + virtual ada_really_inline size_t parse_port(std::string_view view) noexcept { + return this->parse_port(view, false); + } /** * Returns a JSON string representation of this URL. */ - virtual std::string to_string() const = 0; + [[nodiscard]] virtual std::string to_string() const = 0; /** @private */ virtual inline void clear_pathname() = 0; @@ -1520,10 +1524,10 @@ struct url_base { virtual inline void clear_search() = 0; /** @private */ - virtual inline bool has_hash() const noexcept = 0; + [[nodiscard]] virtual inline bool has_hash() const noexcept = 0; /** @private */ - virtual inline bool has_search() const noexcept = 0; + [[nodiscard]] virtual inline bool has_search() const noexcept = 0; }; // url_base @@ -1593,7 +1597,7 @@ ada_really_inline bool shorten_path(std::string_view& path, * * @see https://url.spec.whatwg.org/ */ -ada_really_inline void parse_prepared_path(const std::string_view input, +ada_really_inline void parse_prepared_path(std::string_view input, ada::scheme::type type, std::string& path); @@ -1666,18 +1670,6 @@ template ada_really_inline void strip_trailing_spaces_from_opaque_path( url_type& url) noexcept; -/** - * @private - * Reverse the order of the bytes. - */ -ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept; - -/** - * @private - * Reverse the order of the bytes but only if the system is big endian - */ -ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept; - /** * @private * Finds the delimiter of a view in authority state. @@ -4381,7 +4373,7 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept { } // namespace ada::scheme -#endif // ADA_SCHEME_H +#endif // ADA_SCHEME_INL_H /* end file include/ada/scheme-inl.h */ /* begin file include/ada/serializers.h */ /** @@ -4423,7 +4415,7 @@ std::string ipv6(const std::array& address) noexcept; * network address. * @see https://url.spec.whatwg.org/#concept-ipv4-serializer */ -std::string ipv4(const uint64_t address) noexcept; +std::string ipv4(uint64_t address) noexcept; } // namespace ada::serializers @@ -4508,8 +4500,7 @@ ada_really_inline bool has_tabs_or_newline( * Checks if the input is a forbidden host code point. * @see https://url.spec.whatwg.org/#forbidden-host-code-point */ -ada_really_inline constexpr bool is_forbidden_host_code_point( - const char c) noexcept; +ada_really_inline constexpr bool is_forbidden_host_code_point(char c) noexcept; /** * Checks if the input contains a forbidden domain code point. @@ -4533,12 +4524,12 @@ contains_forbidden_domain_code_point_or_upper(const char* input, * @see https://url.spec.whatwg.org/#forbidden-domain-code-point */ ada_really_inline constexpr bool is_forbidden_domain_code_point( - const char c) noexcept; + char c) noexcept; /** * Checks if the input is alphanumeric, '+', '-' or '.' */ -ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept; +ada_really_inline constexpr bool is_alnum_plus(char c) noexcept; /** * @details An ASCII hex digit is an ASCII upper hex digit or ASCII lower hex @@ -4546,7 +4537,7 @@ ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept; * range U+0041 (A) to U+0046 (F), inclusive. An ASCII lower hex digit is an * ASCII digit or a code point in the range U+0061 (a) to U+0066 (f), inclusive. */ -ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept; +ada_really_inline constexpr bool is_ascii_hex_digit(char c) noexcept; /** * Checks if the input is a C0 control or space character. @@ -4555,33 +4546,33 @@ ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept; * A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION * SEPARATOR ONE, inclusive. */ -ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept; +ada_really_inline constexpr bool is_c0_control_or_space(char c) noexcept; /** * Checks if the input is a ASCII tab or newline character. * * @details An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR. */ -ada_really_inline constexpr bool is_ascii_tab_or_newline(const char c) noexcept; +ada_really_inline constexpr bool is_ascii_tab_or_newline(char c) noexcept; /** * @details A double-dot path segment must be ".." or an ASCII case-insensitive * match for ".%2e", "%2e.", or "%2e%2e". */ ada_really_inline ada_constexpr bool is_double_dot_path_segment( - const std::string_view input) noexcept; + std::string_view input) noexcept; /** * @details A single-dot path segment must be "." or an ASCII case-insensitive * match for "%2e". */ ada_really_inline constexpr bool is_single_dot_path_segment( - const std::string_view input) noexcept; + std::string_view input) noexcept; /** * @details ipv4 character might contain 0-9 or a-f character ranges. */ -ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; +ada_really_inline constexpr bool is_lowercase_hex(char c) noexcept; /** * @details Convert hex to binary. Caller is responsible to ensure that @@ -4597,20 +4588,20 @@ ada_really_inline unsigned constexpr convert_hex_to_binary(char c) noexcept; * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L245 * @see https://encoding.spec.whatwg.org/#utf-8-decode-without-bom */ -std::string percent_decode(const std::string_view input, size_t first_percent); +std::string percent_decode(std::string_view input, size_t first_percent); /** * Returns a percent-encoding string whether percent encoding was needed or not. * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 */ -std::string percent_encode(const std::string_view input, +std::string percent_encode(std::string_view input, const uint8_t character_set[]); /** * Returns a percent-encoded string version of input, while starting the percent * encoding at the provided index. * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 */ -std::string percent_encode(const std::string_view input, +std::string percent_encode(std::string_view input, const uint8_t character_set[], size_t index); /** * Returns true if percent encoding was needed, in which case, we store @@ -4620,13 +4611,13 @@ std::string percent_encode(const std::string_view input, * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 */ template -bool percent_encode(const std::string_view input, const uint8_t character_set[], +bool percent_encode(std::string_view input, const uint8_t character_set[], std::string& out); /** * Returns the index at which percent encoding should start, or (equivalently), * the length of the prefix that does not require percent encoding. */ -ada_really_inline size_t percent_encode_index(const std::string_view input, +ada_really_inline size_t percent_encode_index(std::string_view input, const uint8_t character_set[]); /** * Lowers the string in-place, assuming that the content is ASCII. @@ -4673,18 +4664,18 @@ struct url_aggregator : url_base { url_aggregator(url_aggregator &&u) noexcept = default; url_aggregator &operator=(url_aggregator &&u) noexcept = default; url_aggregator &operator=(const url_aggregator &u) = default; - ~url_aggregator() = default; - - bool set_href(const std::string_view input); - bool set_host(const std::string_view input); - bool set_hostname(const std::string_view input); - bool set_protocol(const std::string_view input); - bool set_username(const std::string_view input); - bool set_password(const std::string_view input); - bool set_port(const std::string_view input); - bool set_pathname(const std::string_view input); - void set_search(const std::string_view input); - void set_hash(const std::string_view input); + ~url_aggregator() override = default; + + bool set_href(std::string_view input); + bool set_host(std::string_view input); + bool set_hostname(std::string_view input); + bool set_protocol(std::string_view input); + bool set_username(std::string_view input); + bool set_password(std::string_view input); + bool set_port(std::string_view input); + bool set_pathname(std::string_view input); + void set_search(std::string_view input); + void set_hash(std::string_view input); [[nodiscard]] bool has_valid_domain() const noexcept override; /** @@ -4702,7 +4693,7 @@ struct url_aggregator : url_base { * @see https://url.spec.whatwg.org/#dom-url-href * @see https://url.spec.whatwg.org/#concept-url-serializer */ - inline std::string_view get_href() const noexcept; + [[nodiscard]] inline std::string_view get_href() const noexcept; /** * The username getter steps are to return this's URL's username. * This function does not allocate memory. @@ -4762,7 +4753,7 @@ struct url_aggregator : url_base { * @return size of the pathname in bytes * @see https://url.spec.whatwg.org/#dom-url-pathname */ - ada_really_inline uint32_t get_pathname_length() const noexcept; + [[nodiscard]] ada_really_inline uint32_t get_pathname_length() const noexcept; /** * Return U+003F (?), followed by this's URL's query. * This function does not allocate memory. @@ -4811,18 +4802,18 @@ struct url_aggregator : url_base { /** * Returns a string representation of this URL. */ - std::string to_string() const override; + [[nodiscard]] std::string to_string() const override; /** * Returns a string diagram of this URL. */ - std::string to_diagram() const; + [[nodiscard]] std::string to_diagram() const; /** * Verifies that the parsed URL could be valid. Useful for debugging purposes. * @return true if the URL is valid, otherwise return true of the offsets are * possible. */ - bool validate() const noexcept; + [[nodiscard]] bool validate() const noexcept; /** @return true if it has an host but it is the empty string */ [[nodiscard]] inline bool has_empty_hostname() const noexcept; @@ -4869,9 +4860,12 @@ struct url_aggregator : url_base { */ inline void reserve(uint32_t capacity); - ada_really_inline size_t - parse_port(std::string_view view, - bool check_trailing_content = false) noexcept override; + ada_really_inline size_t parse_port( + std::string_view view, bool check_trailing_content) noexcept override; + + ada_really_inline size_t parse_port(std::string_view view) noexcept override { + return this->parse_port(view, false); + } /** * Return true on success. @@ -4900,7 +4894,7 @@ struct url_aggregator : url_base { [[nodiscard]] inline bool cannot_have_credentials_or_port() const; template - bool set_host_or_hostname(const std::string_view input); + bool set_host_or_hostname(std::string_view input); ada_really_inline bool parse_host(std::string_view input); @@ -4911,26 +4905,26 @@ struct url_aggregator : url_base { inline void update_base_search(std::string_view input); inline void update_base_search(std::string_view input, const uint8_t *query_percent_encode_set); - inline void update_base_pathname(const std::string_view input); - inline void update_base_username(const std::string_view input); - inline void append_base_username(const std::string_view input); - inline void update_base_password(const std::string_view input); - inline void append_base_password(const std::string_view input); + inline void update_base_pathname(std::string_view input); + inline void update_base_username(std::string_view input); + inline void append_base_username(std::string_view input); + inline void update_base_password(std::string_view input); + inline void append_base_password(std::string_view input); inline void update_base_port(uint32_t input); - inline void append_base_pathname(const std::string_view input); - inline uint32_t retrieve_base_port() const; + inline void append_base_pathname(std::string_view input); + [[nodiscard]] inline uint32_t retrieve_base_port() const; inline void clear_hostname(); inline void clear_password(); inline void clear_pathname() override; - inline bool has_dash_dot() const noexcept; + [[nodiscard]] inline bool has_dash_dot() const noexcept; void delete_dash_dot(); inline void consume_prepared_path(std::string_view input); template [[nodiscard]] ada_really_inline bool parse_scheme_with_colon( - const std::string_view input); + std::string_view input); ada_really_inline uint32_t replace_and_resize(uint32_t start, uint32_t end, std::string_view input); - inline bool has_authority() const noexcept; + [[nodiscard]] inline bool has_authority() const noexcept; inline void set_protocol_as_file(); inline void set_scheme(std::string_view new_scheme) noexcept; /** @@ -5020,13 +5014,15 @@ inline constexpr bool is_normalized_windows_drive_letter( std::string_view input) noexcept; /** - * @warning Will be removed when Ada supports C++20. + * @warning Will be removed when Ada requires C++20. */ -ada_really_inline constexpr bool begins_with(std::string_view view, - std::string_view prefix); +ada_really_inline bool begins_with(std::string_view view, + std::string_view prefix); /** - * Returns true if an input is an ipv4 address. + * Returns true if an input is an ipv4 address. It is assumed that the string + * does not contain uppercase ASCII characters (the input should have been + * lowered cased before calling this function) and is not empty. */ ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept; @@ -5092,7 +5088,7 @@ struct url : url_base { url(url &&u) noexcept = default; url &operator=(url &&u) noexcept = default; url &operator=(const url &u) = default; - ~url() = default; + ~url() override = default; /** * @private @@ -5153,7 +5149,7 @@ struct url : url_base { /** * Returns a JSON string representation of this URL. */ - std::string to_string() const override; + [[nodiscard]] std::string to_string() const override; /** * @see https://url.spec.whatwg.org/#dom-url-href @@ -5200,7 +5196,7 @@ struct url : url_base { * @return a newly allocated string. * @see https://url.spec.whatwg.org/#dom-url-pathname */ - [[nodiscard]] const std::string_view get_pathname() const noexcept; + [[nodiscard]] std::string_view get_pathname() const noexcept; /** * Compute the pathname length in bytes without instantiating a view or a @@ -5208,7 +5204,7 @@ struct url : url_base { * @return size of the pathname in bytes * @see https://url.spec.whatwg.org/#dom-url-pathname */ - ada_really_inline size_t get_pathname_length() const noexcept; + [[nodiscard]] ada_really_inline size_t get_pathname_length() const noexcept; /** * Return U+003F (?), followed by this's URL's query. @@ -5228,60 +5224,60 @@ struct url : url_base { * @return Returns true on successful operation. * @see https://url.spec.whatwg.org/#dom-url-username */ - bool set_username(const std::string_view input); + bool set_username(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-password */ - bool set_password(const std::string_view input); + bool set_password(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-port */ - bool set_port(const std::string_view input); + bool set_port(std::string_view input); /** * This function always succeeds. * @see https://url.spec.whatwg.org/#dom-url-hash */ - void set_hash(const std::string_view input); + void set_hash(std::string_view input); /** * This function always succeeds. * @see https://url.spec.whatwg.org/#dom-url-search */ - void set_search(const std::string_view input); + void set_search(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-search */ - bool set_pathname(const std::string_view input); + bool set_pathname(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-host */ - bool set_host(const std::string_view input); + bool set_host(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-hostname */ - bool set_hostname(const std::string_view input); + bool set_hostname(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-protocol */ - bool set_protocol(const std::string_view input); + bool set_protocol(std::string_view input); /** * @see https://url.spec.whatwg.org/#dom-url-href */ - bool set_href(const std::string_view input); + bool set_href(std::string_view input); /** * The password getter steps are to return this's URL's password. @@ -5352,9 +5348,9 @@ struct url : url_base { inline void update_base_search(std::string_view input, const uint8_t query_percent_encode_set[]); inline void update_base_search(std::optional input); - inline void update_base_pathname(const std::string_view input); - inline void update_base_username(const std::string_view input); - inline void update_base_password(const std::string_view input); + inline void update_base_pathname(std::string_view input); + inline void update_base_username(std::string_view input); + inline void update_base_password(std::string_view input); inline void update_base_port(std::optional input); /** @@ -5400,9 +5396,12 @@ struct url : url_base { */ [[nodiscard]] inline bool cannot_have_credentials_or_port() const; - ada_really_inline size_t - parse_port(std::string_view view, - bool check_trailing_content = false) noexcept override; + ada_really_inline size_t parse_port( + std::string_view view, bool check_trailing_content) noexcept override; + + ada_really_inline size_t parse_port(std::string_view view) noexcept override { + return this->parse_port(view, false); + } /** * Take the scheme from another URL. The scheme string is copied from the @@ -5421,8 +5420,7 @@ struct url : url_base { [[nodiscard]] ada_really_inline bool parse_host(std::string_view input); template - [[nodiscard]] ada_really_inline bool parse_scheme( - const std::string_view input); + [[nodiscard]] ada_really_inline bool parse_scheme(std::string_view input); inline void clear_pathname() override; inline void clear_search() override; @@ -5438,7 +5436,7 @@ struct url : url_base { * * @see https://url.spec.whatwg.org/ */ - ada_really_inline void parse_path(const std::string_view input); + ada_really_inline void parse_path(std::string_view input); /** * Set the scheme for this URL. The provided scheme should be a valid @@ -5525,7 +5523,9 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) { return out << u.to_string(); } -size_t url::get_pathname_length() const noexcept { return path.size(); } +[[nodiscard]] size_t url::get_pathname_length() const noexcept { + return path.size(); +} [[nodiscard]] ada_really_inline ada::url_components url::get_components() const noexcept { @@ -5902,7 +5902,7 @@ inline void url_aggregator::update_base_hostname(const std::string_view input) { ADA_ASSERT_TRUE(validate()); } -ada_really_inline uint32_t +[[nodiscard]] ada_really_inline uint32_t url_aggregator::get_pathname_length() const noexcept { ada_log("url_aggregator::get_pathname_length"); uint32_t ending_index = uint32_t(buffer.size()); @@ -6337,7 +6337,7 @@ inline void url_aggregator::clear_port() { ADA_ASSERT_TRUE(validate()); } -inline uint32_t url_aggregator::retrieve_base_port() const { +[[nodiscard]] inline uint32_t url_aggregator::retrieve_base_port() const { ada_log("url_aggregator::retrieve_base_port"); return components.port; } @@ -6557,31 +6557,45 @@ inline bool url_aggregator::has_hostname() const noexcept { inline bool url_aggregator::has_port() const noexcept { ada_log("url_aggregator::has_port"); - return components.pathname_start != components.host_end; + // A URL cannot have a username/password/port if its host is null or the empty + // string, or its scheme is "file". + return has_hostname() && components.pathname_start != components.host_end; } -inline bool url_aggregator::has_dash_dot() const noexcept { +[[nodiscard]] inline bool url_aggregator::has_dash_dot() const noexcept { // If url's host is null, url does not have an opaque path, url's path's size // is greater than 1, and url's path[0] is the empty string, then append // U+002F (/) followed by U+002E (.) to output. ada_log("url_aggregator::has_dash_dot"); - // Performance: instead of doing this potentially expensive check, we could - // just have a boolean value in the structure. #if ADA_DEVELOPMENT_CHECKS - if (components.pathname_start + 1 < buffer.size() && - components.pathname_start == components.host_end + 2) { - ADA_ASSERT_TRUE(buffer[components.host_end] == '/'); - ADA_ASSERT_TRUE(buffer[components.host_end + 1] == '.'); + // If pathname_start and host_end are exactly two characters apart, then we + // either have a one-digit port such as http://test.com:5?param=1 or else we + // have a /.: sequence such as "non-spec:/.//". We test that this is the case. + if (components.pathname_start == components.host_end + 2) { + ADA_ASSERT_TRUE((buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') || + (buffer[components.host_end] == ':' && + checkers::is_digit(buffer[components.host_end + 1]))); + } + if (components.pathname_start == components.host_end + 2 && + buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') { + ADA_ASSERT_TRUE(components.pathname_start + 1 < buffer.size()); ADA_ASSERT_TRUE(buffer[components.pathname_start] == '/'); ADA_ASSERT_TRUE(buffer[components.pathname_start + 1] == '/'); } #endif - return !has_opaque_path && - components.pathname_start == components.host_end + 2 && - components.pathname_start + 1 < buffer.size(); + // Performance: it should be uncommon for components.pathname_start == + // components.host_end + 2 to be true. So we put this check first in the + // sequence. Most times, we do not have an opaque path. Checking for '/.' is + // more expensive, but should be uncommon. + return components.pathname_start == components.host_end + 2 && + !has_opaque_path && buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.'; } -inline std::string_view url_aggregator::get_href() const noexcept { +[[nodiscard]] inline std::string_view url_aggregator::get_href() + const noexcept { ada_log("url_aggregator::get_href"); return buffer; } @@ -6673,6 +6687,26 @@ inline std::ostream &operator<<(std::ostream &out, namespace ada { +enum class url_search_params_iter_type { + KEYS, + VALUES, + ENTRIES, +}; + +template +struct url_search_params_iter; + +typedef std::pair key_value_view_pair; + +using url_search_params_keys_iter = + url_search_params_iter; +using url_search_params_values_iter = + url_search_params_iter; +using url_search_params_entries_iter = + url_search_params_iter; + /** * @see https://url.spec.whatwg.org/#interface-urlsearchparams */ @@ -6735,6 +6769,42 @@ struct url_search_params { */ inline std::string to_string(); + /** + * Returns a simple JS-style iterator over all of the keys in this + * url_search_params. The keys in the iterator are not unique. The valid + * lifespan of the iterator is tied to the url_search_params. The iterator + * must be freed when you're done with it. + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ + inline url_search_params_keys_iter get_keys(); + + /** + * Returns a simple JS-style iterator over all of the values in this + * url_search_params. The valid lifespan of the iterator is tied to the + * url_search_params. The iterator must be freed when you're done with it. + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ + inline url_search_params_values_iter get_values(); + + /** + * Returns a simple JS-style iterator over all of the entries in this + * url_search_params. The entries are pairs of keys and corresponding values. + * The valid lifespan of the iterator is tied to the url_search_params. The + * iterator must be freed when you're done with it. + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ + inline url_search_params_entries_iter get_entries(); + + /** + * C++ style conventional iterator support. const only because we + * do not really want the params to be modified via the iterator. + */ + inline auto begin() const { return params.begin(); } + inline auto end() const { return params.end(); } + inline auto front() const { return params.front(); } + inline auto back() const { return params.back(); } + inline auto operator[](size_t index) const { return params[index]; } + private: typedef std::pair key_value_pair; std::vector params{}; @@ -6743,8 +6813,44 @@ struct url_search_params { * @see https://url.spec.whatwg.org/#concept-urlencoded-parser */ void initialize(std::string_view init); + + template + friend struct url_search_params_iter; }; // url_search_params +/** + * Implements a non-conventional iterator pattern that is closer in style to + * JavaScript's definition of an iterator. + * + * @see https://webidl.spec.whatwg.org/#idl-iterable + */ +template +struct url_search_params_iter { + inline url_search_params_iter() : params(EMPTY) {} + url_search_params_iter(const url_search_params_iter &u) = default; + url_search_params_iter(url_search_params_iter &&u) noexcept = default; + url_search_params_iter &operator=(url_search_params_iter &&u) noexcept = + default; + url_search_params_iter &operator=(const url_search_params_iter &u) = default; + ~url_search_params_iter() = default; + + /** + * Return the next item in the iterator or std::nullopt if done. + */ + inline std::optional next(); + + inline bool has_next(); + + private: + static url_search_params EMPTY; + inline url_search_params_iter(url_search_params ¶ms_) : params(params_) {} + + url_search_params ¶ms; + size_t pos = 0; + + friend struct url_search_params; +}; + } // namespace ada #endif /* end file include/ada/url_search_params.h */ @@ -6765,6 +6871,10 @@ struct url_search_params { namespace ada { +// A default, empty url_search_params for use with empty iterators. +template +url_search_params url_search_params_iter::EMPTY; + inline void url_search_params::initialize(std::string_view input) { if (!input.empty() && input.front() == '?') { input.remove_prefix(1); @@ -6912,6 +7022,48 @@ inline void url_search_params::sort() { }); } +inline url_search_params_keys_iter url_search_params::get_keys() { + return url_search_params_keys_iter(*this); +} + +/** + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ +inline url_search_params_values_iter url_search_params::get_values() { + return url_search_params_values_iter(*this); +} + +/** + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ +inline url_search_params_entries_iter url_search_params::get_entries() { + return url_search_params_entries_iter(*this); +} + +template +inline bool url_search_params_iter::has_next() { + return pos < params.params.size(); +} + +template <> +inline std::optional url_search_params_keys_iter::next() { + if (!has_next()) return std::nullopt; + return params.params[pos++].first; +} + +template <> +inline std::optional url_search_params_values_iter::next() { + if (!has_next()) return std::nullopt; + return params.params[pos++].second; +} + +template <> +inline std::optional +url_search_params_entries_iter::next() { + if (!has_next()) return std::nullopt; + return params.params[pos++]; +} + } // namespace ada #endif // ADA_URL_SEARCH_PARAMS_INL_H @@ -6926,14 +7078,14 @@ inline void url_search_params::sort() { #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.6.7" +#define ADA_VERSION "2.7.4" namespace ada { enum { ADA_VERSION_MAJOR = 2, - ADA_VERSION_MINOR = 6, - ADA_VERSION_REVISION = 7, + ADA_VERSION_MINOR = 7, + ADA_VERSION_REVISION = 4, }; } // namespace ada diff --git a/deps/ada_c.h b/deps/ada_c.h index 0409155..173e27b 100644 --- a/deps/ada_c.h +++ b/deps/ada_c.h @@ -109,4 +109,77 @@ const ada_url_components* ada_get_components(ada_url result); ada_owned_string ada_idna_to_unicode(const char* input, size_t length); ada_owned_string ada_idna_to_ascii(const char* input, size_t length); +// url search params +typedef void* ada_url_search_params; + +// Represents an std::vector +typedef void* ada_strings; +typedef void* ada_url_search_params_keys_iter; +typedef void* ada_url_search_params_values_iter; + +typedef struct { + ada_string key; + ada_string value; +} ada_string_pair; + +typedef void* ada_url_search_params_entries_iter; + +ada_url_search_params ada_parse_search_params(const char* input, size_t length); +void ada_free_search_params(ada_url_search_params result); + +size_t ada_search_params_size(ada_url_search_params result); +void ada_search_params_sort(ada_url_search_params result); +ada_owned_string ada_search_params_to_string(ada_url_search_params result); + +void ada_search_params_append(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length); +void ada_search_params_set(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length); +void ada_search_params_remove(ada_url_search_params result, const char* key, + size_t key_length); +void ada_search_params_remove_value(ada_url_search_params result, + const char* key, size_t key_length, + const char* value, size_t value_length); +bool ada_search_params_has(ada_url_search_params result, const char* key, + size_t key_length); +bool ada_search_params_has_value(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length); +ada_string ada_search_params_get(ada_url_search_params result, const char* key, + size_t key_length); +ada_strings ada_search_params_get_all(ada_url_search_params result, + const char* key, size_t key_length); +ada_url_search_params_keys_iter ada_search_params_get_keys( + ada_url_search_params result); +ada_url_search_params_values_iter ada_search_params_get_values( + ada_url_search_params result); +ada_url_search_params_entries_iter ada_search_params_get_entries( + ada_url_search_params result); + +void ada_free_strings(ada_strings result); +size_t ada_strings_size(ada_strings result); +ada_string ada_strings_get(ada_strings result, size_t index); + +void ada_free_search_params_keys_iter(ada_url_search_params_keys_iter result); +ada_string ada_search_params_keys_iter_next( + ada_url_search_params_keys_iter result); +bool ada_search_params_keys_iter_has_next( + ada_url_search_params_keys_iter result); + +void ada_free_search_params_values_iter( + ada_url_search_params_values_iter result); +ada_string ada_search_params_values_iter_next( + ada_url_search_params_values_iter result); +bool ada_search_params_values_iter_has_next( + ada_url_search_params_values_iter result); + +void ada_free_search_params_entries_iter( + ada_url_search_params_entries_iter result); +ada_string_pair ada_search_params_entries_iter_next( + ada_url_search_params_entries_iter result); +bool ada_search_params_entries_iter_has_next( + ada_url_search_params_entries_iter result); + #endif // ADA_C_H