diff --git a/include/asm/charmap.hpp b/include/asm/charmap.hpp index f861903f8..34a578dae 100644 --- a/include/asm/charmap.hpp +++ b/include/asm/charmap.hpp @@ -21,7 +21,9 @@ void charmap_Pop(); void charmap_CheckStack(); void charmap_Add(std::string const &mapping, std::vector &&value); bool charmap_HasChar(std::string const &mapping); +size_t charmap_CharSize(std::string const &mapping); std::vector charmap_Convert(std::string const &input); size_t charmap_ConvertNext(std::string_view &input, std::vector *output); +std::string charmap_Reverse(std::vector const &value, bool &unique); #endif // RGBDS_ASM_CHARMAP_HPP diff --git a/man/rgbasm.5 b/man/rgbasm.5 index ed5509174..b025011eb 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -564,22 +564,17 @@ is equivalent to the regular string (Note that this prevents raw strings from including the double quote character.) Raw strings also may be contained in triple quotes for them to be multi-line, so they can include literal newline or quote characters (although still not three quotes in a row). .Pp -The following functions operate on string expressions. -Most of them return a string, however some of these functions actually return an integer and can be used as part of an integer expression! -.Bl -column "STRSUB(str, pos, len)" +The following functions operate on string expressions, and return strings themselves. +.Bl -column "STRSLICE(str, start, stop)" .It Sy Name Ta Sy Operation -.It Fn STRLEN str Ta Returns the number of characters in Ar str . .It Fn STRCAT strs... Ta Concatenates Ar strs . -.It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 . -.It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 . -.It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 . -.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1, last is position -1) and Ar len No characters long. If Ar len No is not specified the substring continues to the end of Ar str . .It Fn STRUPR str Ta Returns Ar str No with all ASCII letters .Pq Ql a-z in uppercase. .It Fn STRLWR str Ta Returns Ar str No with all ASCII letters .Pq Ql A-Z in lowercase. +.It Fn STRSLICE str start stop Ta Returns a substring of Ar str No starting at Ar start No and ending at Ar stop No (exclusive). If Ar stop No is not specified, the substring continues to the end of Ar str Ns . .It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new . .It Fn STRFMT fmt args... Ta Returns the string Ar fmt No with each .Ql %spec @@ -589,9 +584,35 @@ pattern replaced by interpolating the format with its corresponding argument in .Ar args .Pq So %% Sc is replaced by the So % Sc character . -.It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, and 0 otherwise . +.It Fn STRCHAR str idx Ta Returns the substring of Ar str No for the charmap entry at Ar idx No with the current charmap . Pq Ar idx No counts charmap entries, not characters. +.It Fn REVCHAR vals... Ta Returns the string that is mapped to Ar vals No with the current charmap. If there is no unique charmap entry for Ar vals Ns , an error occurs. +.El +.Pp +The following functions operate on string expressions, but return integers. +.Bl -column "STRRFIND(str, sub)" +.It Sy Name Ta Sy Operation +.It Fn STRLEN str Ta Returns the number of characters in Ar str . +.It Fn STRCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to ASCII ordering of their characters. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match. +.It Fn STRFIND str sub Ta Returns the first index of Ar sub No in Ar str Ns , or -1 if it's not present. +.It Fn STRRFIND str sub Ta Returns the last index of Ar sub No in Ar str Ns , or -1 if it's not present. +.It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, or 0 otherwise . .It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap . -.It Fn CHARSUB str pos Ta Returns the substring for the charmap entry at Ar pos No in Ar str No (first character is position 1, last is position -1) with the current charmap . +.It Fn CHARCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to their charmap entry values with the current charmap. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match. +.It Fn CHARSIZE char Ta Returns how many values are in the charmap entry for Ar char No with the current charmap. +.El +.Pp +Note that the first character of a string is at index 0, and the last is at index -1. +.Pp +The following legacy functions are similar to other functions that operate on string expressions, but for historical reasons, they count characters starting from +.Em position 1 , +not from index 0! +(Position -1 still counts from the last character.) +.Bl -column "STRSUB(str, pos, len)" +.It Sy Name Ta Sy Operation +.It Fn STRSUB str pos len Ta Returns a substring of Ar str No starting at Ar pos No and Ar len No characters long. If Ar len No is not specified, the substring continues to the end of Ar str No . +.It Fn STRIN str sub Ta Returns the first position of Ar sub No in Ar str Ns , or 0 if it's not present. +.It Fn STRRIN str sub Ta Returns the last position of Ar sub No in Ar str Ns , or 0 if it's not present. +.It Fn CHARSUB str pos Ta Returns the substring of Ar str No for the charmap entry at Ar pos No with the current charmap . Pq Ar pos No counts charmap entries, not characters. .El .Ss Character maps When writing text strings that are meant to be displayed on the Game Boy, the character encoding in the ROM may need to be different than the source file encoding. diff --git a/src/asm/charmap.cpp b/src/asm/charmap.cpp index 53aabb8f1..4f02194c2 100644 --- a/src/asm/charmap.cpp +++ b/src/asm/charmap.cpp @@ -31,6 +31,29 @@ struct CharmapNode { struct Charmap { std::string name; std::vector nodes; // first node is reserved for the root node + + // Traverse the trie depth-first to derive the character mappings in definition order + template + bool forEachChar(F callback) const { + // clang-format off: nested initializers + for (std::stack> prefixes({{0, ""}}); !prefixes.empty();) { + // clang-format on + auto [nodeIdx, mapping] = std::move(prefixes.top()); + prefixes.pop(); + CharmapNode const &node = nodes[nodeIdx]; + if (node.isTerminal()) { + if (!callback(nodeIdx, mapping)) { + return false; + } + } + for (unsigned c = 0; c < std::size(node.next); c++) { + if (size_t nextIdx = node.next[c]; nextIdx) { + prefixes.push({nextIdx, mapping + static_cast(c)}); + } + } + } + return true; + } }; static std::deque charmapList; @@ -44,24 +67,12 @@ bool charmap_ForEach( void (*charFunc)(std::string const &, std::vector) ) { for (Charmap const &charmap : charmapList) { - // Traverse the trie depth-first to derive the character mappings in definition order std::map mappings; - // clang-format off: nested initializers - for (std::stack> prefixes({{0, ""}}); - !prefixes.empty();) { - // clang-format on - auto [nodeIdx, mapping] = std::move(prefixes.top()); - prefixes.pop(); - CharmapNode const &node = charmap.nodes[nodeIdx]; - if (node.isTerminal()) { - mappings[nodeIdx] = mapping; - } - for (unsigned c = 0; c < 256; c++) { - if (size_t nextIdx = node.next[c]; nextIdx) { - prefixes.push({nextIdx, mapping + static_cast(c)}); - } - } - } + charmap.forEachChar([&mappings](size_t nodeIdx, std::string const &mapping) { + mappings[nodeIdx] = mapping; + return true; + }); + mapFunc(charmap.name); for (auto [nodeIdx, mapping] : mappings) { charFunc(mapping, charmap.nodes[nodeIdx].value); @@ -178,6 +189,22 @@ bool charmap_HasChar(std::string const &mapping) { return charmap.nodes[nodeIdx].isTerminal(); } +size_t charmap_CharSize(std::string const &mapping) { + Charmap const &charmap = *currentCharmap; + size_t nodeIdx = 0; + + for (char c : mapping) { + nodeIdx = charmap.nodes[nodeIdx].next[static_cast(c)]; + + if (!nodeIdx) { + return 0; + } + } + + CharmapNode const &node = charmap.nodes[nodeIdx]; + return node.isTerminal() ? node.value.size() : 0; +} + std::vector charmap_Convert(std::string const &input) { std::vector output; for (std::string_view inputView = input; charmap_ConvertNext(inputView, &output);) {} @@ -263,3 +290,20 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector *output input = input.substr(inputIdx); return matchLen; } + +std::string charmap_Reverse(std::vector const &value, bool &unique) { + Charmap const &charmap = *currentCharmap; + std::string revMapping; + unique = charmap.forEachChar([&](size_t nodeIdx, std::string const &mapping) { + if (charmap.nodes[nodeIdx].value == value) { + if (revMapping.empty()) { + revMapping = mapping; + } else { + revMapping.clear(); + return false; + } + } + return true; + }); + return revMapping; +} diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 2f3e66a4b..46f3419a5 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -240,19 +240,26 @@ static std::unordered_map ke {"TZCOUNT", T_(OP_TZCOUNT) }, {"STRCAT", T_(OP_STRCAT) }, + {"STRCHAR", T_(OP_STRCHAR) }, {"STRCMP", T_(OP_STRCMP) }, + {"STRFIND", T_(OP_STRFIND) }, {"STRFMT", T_(OP_STRFMT) }, {"STRIN", T_(OP_STRIN) }, {"STRLEN", T_(OP_STRLEN) }, {"STRLWR", T_(OP_STRLWR) }, + {"STRRFIND", T_(OP_STRRFIND) }, {"STRRIN", T_(OP_STRRIN) }, {"STRRPL", T_(OP_STRRPL) }, + {"STRSLICE", T_(OP_STRSLICE) }, {"STRSUB", T_(OP_STRSUB) }, {"STRUPR", T_(OP_STRUPR) }, + {"CHARCMP", T_(OP_CHARCMP) }, {"CHARLEN", T_(OP_CHARLEN) }, + {"CHARSIZE", T_(OP_CHARSIZE) }, {"CHARSUB", T_(OP_CHARSUB) }, {"INCHARMAP", T_(OP_INCHARMAP) }, + {"REVCHAR", T_(OP_REVCHAR) }, {"INCLUDE", T_(POP_INCLUDE) }, {"PRINT", T_(POP_PRINT) }, diff --git a/src/asm/parser.y b/src/asm/parser.y index 4f45d87bc..b18fea29d 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -64,9 +64,13 @@ static uint32_t strToNum(std::vector const &s); static void errorInvalidUTF8Byte(uint8_t byte, char const *functionName); static size_t strlenUTF8(std::string const &str, bool printErrors); + static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop); static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len); static size_t charlenUTF8(std::string const &str); + static std::string strcharUTF8(std::string const &str, uint32_t idx); static std::string charsubUTF8(std::string const &str, uint32_t pos); + static int32_t charcmp(std::string_view str1, std::string_view str2); + static uint32_t adjustNegativeIndex(int32_t idx, size_t len, char const *functionName); static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName); static std::string strrpl(std::string_view str, std::string const &old, std::string const &rep); static std::string strfmt( @@ -260,7 +264,9 @@ %token OP_BANK "BANK" %token OP_BITWIDTH "BITWIDTH" %token OP_CEIL "CEIL" +%token OP_CHARCMP "CHARCMP" %token OP_CHARLEN "CHARLEN" +%token OP_CHARSIZE "CHARSIZE" %token OP_CHARSUB "CHARSUB" %token OP_COS "COS" %token OP_DEF "DEF" @@ -274,18 +280,23 @@ %token OP_LOG "LOG" %token OP_LOW "LOW" %token OP_POW "POW" +%token OP_REVCHAR "REVCHAR" %token OP_ROUND "ROUND" %token OP_SIN "SIN" %token OP_SIZEOF "SIZEOF" %token OP_STARTOF "STARTOF" %token OP_STRCAT "STRCAT" +%token OP_STRCHAR "STRCHAR" %token OP_STRCMP "STRCMP" +%token OP_STRFIND "STRFIND" %token OP_STRFMT "STRFMT" %token OP_STRIN "STRIN" %token OP_STRLEN "STRLEN" %token OP_STRLWR "STRLWR" +%token OP_STRRFIND "STRRFIND" %token OP_STRRIN "STRRIN" %token OP_STRRPL "STRRPL" +%token OP_STRSLICE "STRSLICE" %token OP_STRSUB "STRSUB" %token OP_STRUPR "STRUPR" %token OP_TAN "TAN" @@ -1461,6 +1472,14 @@ relocexpr_no_str: | OP_STRCMP LPAREN string COMMA string RPAREN { $$.makeNumber($3.compare($5)); } + | OP_STRFIND LPAREN string COMMA string RPAREN { + size_t pos = $3.find($5); + $$.makeNumber(pos != std::string::npos ? pos : -1); + } + | OP_STRRFIND LPAREN string COMMA string RPAREN { + size_t pos = $3.rfind($5); + $$.makeNumber(pos != std::string::npos ? pos : -1); + } | OP_STRIN LPAREN string COMMA string RPAREN { size_t pos = $3.find($5); $$.makeNumber(pos != std::string::npos ? pos + 1 : 0); @@ -1478,6 +1497,16 @@ relocexpr_no_str: | OP_INCHARMAP LPAREN string RPAREN { $$.makeNumber(charmap_HasChar($3)); } + | OP_CHARCMP LPAREN string COMMA string RPAREN { + $$.makeNumber(charcmp($3, $5)); + } + | OP_CHARSIZE LPAREN string RPAREN { + size_t charSize = charmap_CharSize($3); + if (charSize == 0) { + ::error("CHARSIZE: No character mapping for \"%s\"\n", $3.c_str()); + } + $$.makeNumber(charSize); + } | LPAREN relocexpr RPAREN { $$ = std::move($2); } @@ -1515,6 +1544,17 @@ string: STRING { $$ = std::move($1); } + | OP_STRSLICE LPAREN string COMMA iconst COMMA iconst RPAREN { + size_t len = strlenUTF8($3, false); + uint32_t start = adjustNegativeIndex($5, len, "STRSLICE"); + uint32_t stop = adjustNegativeIndex($7, len, "STRSLICE"); + $$ = strsliceUTF8($3, start, stop); + } + | OP_STRSLICE LPAREN string COMMA iconst RPAREN { + size_t len = strlenUTF8($3, false); + uint32_t start = adjustNegativeIndex($5, len, "STRSLICE"); + $$ = strsliceUTF8($3, start, len - 1); + } | OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN { size_t len = strlenUTF8($3, false); uint32_t pos = adjustNegativePos($5, len, "STRSUB"); @@ -1525,11 +1565,25 @@ string: uint32_t pos = adjustNegativePos($5, len, "STRSUB"); $$ = strsubUTF8($3, pos, pos > len ? 0 : len + 1 - pos); } + | OP_STRCHAR LPAREN string COMMA iconst RPAREN { + size_t len = charlenUTF8($3); + uint32_t idx = adjustNegativeIndex($5, len, "STRCHAR"); + $$ = strcharUTF8($3, idx); + } | OP_CHARSUB LPAREN string COMMA iconst RPAREN { size_t len = charlenUTF8($3); uint32_t pos = adjustNegativePos($5, len, "CHARSUB"); $$ = charsubUTF8($3, pos); } + | OP_REVCHAR LPAREN charmap_args RPAREN { + bool unique; + $$ = charmap_Reverse($3, unique); + if (!unique) { + ::error("REVCHAR: Multiple character mappings to values\n"); + } else if ($$.empty()) { + ::error("REVCHAR: No character mapping to values\n"); + } + } | OP_STRCAT LPAREN RPAREN { $$.clear(); } @@ -2516,6 +2570,70 @@ static size_t strlenUTF8(std::string const &str, bool printErrors) { return len; } +static std::string strsliceUTF8(std::string const &str, uint32_t start, uint32_t stop) { + char const *ptr = str.c_str(); + size_t index = 0; + uint32_t state = 0; + uint32_t codepoint = 0; + uint32_t curIdx = 0; + + // Advance to starting index in source string. + while (ptr[index] && curIdx < start) { + switch (decode(&state, &codepoint, ptr[index])) { + case 1: + errorInvalidUTF8Byte(ptr[index], "STRSLICE"); + state = 0; + // fallthrough + case 0: + curIdx++; + break; + } + index++; + } + + // An index 1 past the end of the string is allowed, but will trigger the + // "Length too big" warning below if the length is nonzero. + if (!ptr[index] && start > curIdx) { + warning( + WARNING_BUILTIN_ARG, + "STRSLICE: Start index %" PRIu32 " is past the end of the string\n", + start + ); + } + + size_t startIndex = index; + + // Advance to ending index in source string. + while (ptr[index] && curIdx < stop) { + switch (decode(&state, &codepoint, ptr[index])) { + case 1: + errorInvalidUTF8Byte(ptr[index], "STRSLICE"); + state = 0; + // fallthrough + case 0: + curIdx++; + break; + } + index++; + } + + // Check for partial code point. + if (state != 0) { + error("STRSLICE: Incomplete UTF-8 character\n"); + curIdx++; + } + + if (curIdx < stop) { + warning( + WARNING_BUILTIN_ARG, + "STRSLICE: Stop index %" PRIu32 " is past the end of the string\n", + stop + ); + } + + return std::string(ptr + startIndex, ptr + index); +} + static std::string strsubUTF8(std::string const &str, uint32_t pos, uint32_t len) { char const *ptr = str.c_str(); size_t index = 0; @@ -2584,6 +2702,29 @@ static size_t charlenUTF8(std::string const &str) { return len; } +static std::string strcharUTF8(std::string const &str, uint32_t idx) { + std::string_view view = str; + size_t charLen = 1; + + // Advance to starting index in source string. + for (uint32_t curIdx = 0; charLen && curIdx < idx; curIdx++) { + charLen = charmap_ConvertNext(view, nullptr); + } + + std::string_view start = view; + + if (!charmap_ConvertNext(view, nullptr)) { + warning( + WARNING_BUILTIN_ARG, + "STRCHAR: Index %" PRIu32 " is past the end of the string\n", + idx + ); + } + + start = start.substr(0, start.length() - view.length()); + return std::string(start); +} + static std::string charsubUTF8(std::string const &str, uint32_t pos) { std::string_view view = str; size_t charLen = 1; @@ -2607,6 +2748,46 @@ static std::string charsubUTF8(std::string const &str, uint32_t pos) { return std::string(start); } +static int32_t charcmp(std::string_view str1, std::string_view str2) { + std::vector seq1, seq2; + size_t idx1 = 0, idx2 = 0; + for (;;) { + if (idx1 >= seq1.size()) { + idx1 = 0; + seq1.clear(); + charmap_ConvertNext(str1, &seq1); + } + if (idx2 >= seq2.size()) { + idx2 = 0; + seq2.clear(); + charmap_ConvertNext(str2, &seq2); + } + if (seq1.empty() != seq2.empty()) { + return seq1.empty() ? -1 : 1; + } else if (seq1.empty()) { + return 0; + } else { + int32_t value1 = seq1[idx1++], value2 = seq2[idx2++]; + if (value1 != value2) { + return (value1 > value2) - (value1 < value2); + } + } + } +} + +static uint32_t adjustNegativeIndex(int32_t idx, size_t len, char const *functionName) { + // String functions adjust negative index arguments the same way, + // such that position -1 is the last character of a string. + if (idx < 0) { + idx += len; + } + if (idx < 0) { + warning(WARNING_BUILTIN_ARG, "%s: Index starts at 0\n", functionName); + idx = 0; + } + return static_cast(idx); +} + static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName) { // STRSUB and CHARSUB adjust negative position arguments the same way, // such that position -1 is the last character of a string. diff --git a/test/asm/charcmp.asm b/test/asm/charcmp.asm new file mode 100644 index 000000000..0551507a4 --- /dev/null +++ b/test/asm/charcmp.asm @@ -0,0 +1,27 @@ +charmap "a", 1 +charmap "b", 2 +charmap "c", 0 +charmap "w", 3, 2, 1 +charmap "x", 1, 2 +charmap "y", 2, 1 +charmap "z", 1, 2, 3 + +macro test + println strfmt("\"%#s\" <=> \"%#s\" == %d", \1, \2, charcmp(\1, \2)) +endm + +test "", "" +test "a", "a" +test "aa", "aaa" +test "aaa", "aa" +test "a", "b" +test "b", "a" +test "", "b" +test "c", "" +test "abc", "cba" +test "cabc", "cxc" +test "zy", "abw" +test "abab", "xx" +test "abab", "ww" +test "w", "z" +test "xcy", "zw" diff --git a/test/asm/charcmp.out b/test/asm/charcmp.out new file mode 100644 index 000000000..7f7e4084d --- /dev/null +++ b/test/asm/charcmp.out @@ -0,0 +1,15 @@ +"" <=> "" == 0 +"a" <=> "a" == 0 +"aa" <=> "aaa" == -1 +"aaa" <=> "aa" == 1 +"a" <=> "b" == -1 +"b" <=> "a" == 1 +"" <=> "b" == -1 +"c" <=> "" == 1 +"abc" <=> "cba" == 1 +"cabc" <=> "cxc" == 0 +"zy" <=> "abw" == 0 +"abab" <=> "xx" == 0 +"abab" <=> "ww" == -1 +"w" <=> "z" == 1 +"xcy" <=> "zw" == -1 diff --git a/test/asm/charlen-charsub.asm b/test/asm/charlen-charsub.asm deleted file mode 100644 index 4736d76d0..000000000 --- a/test/asm/charlen-charsub.asm +++ /dev/null @@ -1,28 +0,0 @@ - opt Wno-unmapped-char - charmap "", $00 - charmap "A", $10 - charmap "B", $20 - charmap "C", $30 - charmap "Bold", $88 - -SECTION "test", ROM0 - -DEF S EQUS "XBoldABC" - - assert CHARLEN("{S}") == 6 - println CHARSUB("{S}", 2) - assert !STRCMP(CHARSUB("{S}", 2), "Bold") - assert CHARSUB("{S}", -5) == CHARSUB("{S}", CHARLEN("{S}") + 1 - 5) - assert CHARSUB("{S}", 2) == "Bold" && "Bold" == $88 - assert CHARSUB("{S}", 1) == $58 ; ASCII "X" - db "{S}" - - newcharmap ascii - - assert CHARLEN("{S}") == 14 - println CHARSUB("{S}", 2) - assert !STRCMP(CHARSUB("{S}", 2), "B") - assert CHARSUB("{S}", -5) == CHARSUB("{S}", CHARLEN("{S}") + 1 - 5) - assert CHARSUB("{S}", 2) == "B" && "B" == $42 ; ASCII "B" - assert CHARSUB("{S}", 1) == $58 ; ASCII "X" - db "{S}" diff --git a/test/asm/charlen-strchar.asm b/test/asm/charlen-strchar.asm new file mode 100644 index 000000000..94258c64e --- /dev/null +++ b/test/asm/charlen-strchar.asm @@ -0,0 +1,34 @@ + opt Wno-unmapped-char + charmap "", $00 + charmap "A", $10 + charmap "B", $20 + charmap "C", $30 + charmap "Bold", $88 + +SECTION "test", ROM0 + +DEF S EQUS "XBoldABC" + + assert CHARLEN("{S}") == 6 + println STRCHAR("{S}", 1) + + assert !STRCMP(STRCHAR("{S}", 1), "Bold") + assert STRCHAR("{S}", -5) == STRCHAR("{S}", CHARLEN("{S}") - 5) + assert STRCHAR("{S}", 1) == "Bold" && "Bold" == $88 + assert STRCHAR("{S}", 0) == $58 ; ASCII "X" + db "{S}" + + for n, CHARLEN("{S}") + assert STRCHAR("{S}", n) == CHARSUB("{S}", n + 1) + assert STRCHAR("{S}", -n - 1) == CHARSUB("{S}", -n - 1) + endr + + newcharmap ascii + + assert CHARLEN("{S}") == 14 + println STRCHAR("{S}", 1) + assert !STRCMP(STRCHAR("{S}", 1), "B") + assert STRCHAR("{S}", -5) == STRCHAR("{S}", CHARLEN("{S}") - 5) + assert STRCHAR("{S}", 1) == "B" && "B" == $42 ; ASCII "B" + assert STRCHAR("{S}", 0) == $58 ; ASCII "X" + db "{S}" diff --git a/test/asm/charlen-charsub.out b/test/asm/charlen-strchar.out similarity index 100% rename from test/asm/charlen-charsub.out rename to test/asm/charlen-strchar.out diff --git a/test/asm/charlen-charsub.out.bin b/test/asm/charlen-strchar.out.bin similarity index 100% rename from test/asm/charlen-charsub.out.bin rename to test/asm/charlen-strchar.out.bin diff --git a/test/asm/charsize.asm b/test/asm/charsize.asm new file mode 100644 index 000000000..c71c7fafd --- /dev/null +++ b/test/asm/charsize.asm @@ -0,0 +1,20 @@ +charmap "a", 1 +charmap "b", 2, 3 +charmap "cdef", 4 +charmap "ghi", 5, 6, 7, 8, 9 +charmap "jkl", 123, 456, 789 +charmap "mno", 123456789 +charmap "¡Pokémon!", 2, 3 + +assert charsize("a") == 1 +assert charsize("b") == 2 +assert charsize("cdef") == 1 +assert charsize("ghi") == 5 +assert charsize("jkl") == 3 +assert charsize("mno") == 1 +assert charsize("¡Pokémon!") == 2 + +assert charsize("") == 0 +assert charsize("hello world") == 0 +assert charsize("abcdef") == 0 +assert charsize("é") == 0 diff --git a/test/asm/charsize.err b/test/asm/charsize.err new file mode 100644 index 000000000..ad026a139 --- /dev/null +++ b/test/asm/charsize.err @@ -0,0 +1,9 @@ +error: charsize.asm(17): + CHARSIZE: No character mapping for "" +error: charsize.asm(18): + CHARSIZE: No character mapping for "hello world" +error: charsize.asm(19): + CHARSIZE: No character mapping for "abcdef" +error: charsize.asm(20): + CHARSIZE: No character mapping for "é" +error: Assembly aborted (4 errors)! diff --git a/test/asm/invalid-charsub.asm b/test/asm/invalid-charsub.asm deleted file mode 100644 index 52b452f38..000000000 --- a/test/asm/invalid-charsub.asm +++ /dev/null @@ -1 +0,0 @@ -DEF S EQUS CHARSUB("ABC", 4) diff --git a/test/asm/invalid-charsub.err b/test/asm/invalid-charsub.err deleted file mode 100644 index ea6cb9737..000000000 --- a/test/asm/invalid-charsub.err +++ /dev/null @@ -1,2 +0,0 @@ -warning: invalid-charsub.asm(1): [-Wbuiltin-args] - CHARSUB: Position 4 is past the end of the string diff --git a/test/asm/invalid-strchar-charsub.asm b/test/asm/invalid-strchar-charsub.asm new file mode 100644 index 000000000..3acffe9c7 --- /dev/null +++ b/test/asm/invalid-strchar-charsub.asm @@ -0,0 +1,3 @@ +DEF S EQUS STRCHAR("ABC", 3) +DEF T EQUS CHARSUB("ABC", 4) +DEF U EQUS CHARSUB("ABC", 0) diff --git a/test/asm/invalid-strchar-charsub.err b/test/asm/invalid-strchar-charsub.err new file mode 100644 index 000000000..8ae4381c8 --- /dev/null +++ b/test/asm/invalid-strchar-charsub.err @@ -0,0 +1,6 @@ +warning: invalid-strchar-charsub.asm(1): [-Wbuiltin-args] + STRCHAR: Index 3 is past the end of the string +warning: invalid-strchar-charsub.asm(2): [-Wbuiltin-args] + CHARSUB: Position 4 is past the end of the string +warning: invalid-strchar-charsub.asm(3): [-Wbuiltin-args] + CHARSUB: Position starts at 1 diff --git a/test/asm/invalid-utf-8-strings.asm b/test/asm/invalid-utf-8-strings.asm index db722c596..0404f6396 100644 --- a/test/asm/invalid-utf-8-strings.asm +++ b/test/asm/invalid-utf-8-strings.asm @@ -36,8 +36,8 @@ DEF n = STRLEN("{invalid}") DEF r = CHARLEN("{invalid}") println "\"{#s:invalid}\": {d:n} == {d:r}" -REDEF mid1 EQUS CHARSUB("{invalid}", 4) -REDEF mid2 EQUS CHARSUB("{invalid}", 7) +REDEF mid1 EQUS STRCHAR("{invalid}", 3) +REDEF mid2 EQUS STRCHAR("{invalid}", 6) println "\"{#s:mid2}{#s:mid1}\"" ; characters: diff --git a/test/asm/revchar.asm b/test/asm/revchar.asm new file mode 100644 index 000000000..fbb7774ed --- /dev/null +++ b/test/asm/revchar.asm @@ -0,0 +1,23 @@ +charmap "a", 1 +charmap "b", 2 +charmap "c", 3 +charmap "d", 3 +charmap "eeeee", $12345678 +charmap "x", 1, 2, 3 +charmap "y", 4, 5, 6, 7, 8, $99999999 +charmap "zed", $1234, $5678, $9abc, $def0 + +macro test + redef expected equs \1 + shift + assert !strcmp(revchar(\#), "{expected}") +endm + +test "a", 1 +test "b", 2 +test "eeeee", 305419896 +test "x", 1, 2, 3 +test "y", 4, 5, 6, 7, 8, $99999999 +test "zed", 4660, 22136, 39612, 57072 +test "", 3 ; multiple +test "", 4 ; none diff --git a/test/asm/revchar.err b/test/asm/revchar.err new file mode 100644 index 000000000..9e70eee94 --- /dev/null +++ b/test/asm/revchar.err @@ -0,0 +1,5 @@ +error: revchar.asm(22) -> revchar.asm::test(13): + REVCHAR: Multiple character mappings to values +error: revchar.asm(23) -> revchar.asm::test(13): + REVCHAR: No character mapping to values +error: Assembly aborted (2 errors)! diff --git a/test/asm/strin-strrin.asm b/test/asm/strfind-strin.asm similarity index 50% rename from test/asm/strin-strrin.asm rename to test/asm/strfind-strin.asm index 943e3ef72..d229871e0 100644 --- a/test/asm/strin-strrin.asm +++ b/test/asm/strfind-strin.asm @@ -1,15 +1,28 @@ SECTION "Test", ROM0 + assert STRFIND("foo bar baz", "bar") == STRRFIND("foo bar baz", "bar") assert STRIN("foo bar baz", "bar") == STRRIN("foo bar baz", "bar") + assert STRFIND("foo bar bargain", "bar") == 4 assert STRIN("foo bar bargain", "bar") == 5 + + assert STRRFIND("foo bar bargain", "bar") == 8 assert STRRIN("foo bar bargain", "bar") == 9 + assert STRFIND("foo bar", "qux") == -1 assert STRIN("foo bar", "qux") == 0 + + assert STRRFIND("foo bar", "qux") == -1 assert STRRIN("foo bar", "qux") == 0 + assert STRFIND("foo", "foobar") == -1 assert STRIN("foo", "foobar") == 0 + + assert STRRFIND("foo", "foobar") == -1 assert STRRIN("foo", "foobar") == 0 + assert STRFIND("foobar", "") == 0 assert STRIN("foobar", "") == 1 + + assert STRRFIND("foobar", "") == STRLEN("foobar") assert STRRIN("foobar", "") == STRLEN("foobar") + 1 diff --git a/test/asm/strslice-strsub.asm b/test/asm/strslice-strsub.asm new file mode 100644 index 000000000..c8e5341f4 --- /dev/null +++ b/test/asm/strslice-strsub.asm @@ -0,0 +1,57 @@ +MACRO xstrslice + PRINTLN "STRSLICE(\#): ", STRSLICE(\#) +ENDM + + xstrslice "ABC", 0, 1 + xstrslice "ABC", 1, 2 + xstrslice "ABC", 2, 3 + xstrslice "ABC", -3, -2 + xstrslice "ABC", -2, -1 + xstrslice "ABC", -1, -0 ; lol + xstrslice "ABC", -1, 3 + xstrslice "ABC", 1 + xstrslice "ABC", -2 + xstrslice "ABC", 4 + xstrslice "ABC", -4 + xstrslice "ABC", 0, 2 + xstrslice "ABC", 1, 3 + xstrslice "ABC", 1, 31 + xstrslice "ABC", 1, 300 + xstrslice "ABC", -4, 300 + xstrslice "ABC", 3, 3 + xstrslice "ABC", 4, 4 + xstrslice "ABC", 3, 4 + xstrslice "カタカナ", 0, 2 + xstrslice "カタカナ", 2, 4 + xstrslice "カタカナ", 2, 12 + xstrslice "g̈", 0, 1 + xstrslice "g̈", 0, 2 + +MACRO xstrsub + PRINTLN "STRSUB(\#): ", STRSUB(\#) +ENDM + + xstrsub "ABC", 1, 1 + xstrsub "ABC", 2, 1 + xstrsub "ABC", 3, 1 + xstrsub "ABC", -3, 1 + xstrsub "ABC", -2, 1 + xstrsub "ABC", -1, 1 + xstrsub "ABC", 2 + xstrsub "ABC", 0 + xstrsub "ABC", -2 + xstrsub "ABC", 5 + xstrsub "ABC", -5 + xstrsub "ABC", 1, 2 + xstrsub "ABC", 2, 2 + xstrsub "ABC", 2, 32 + xstrsub "ABC", 2, 300 + xstrsub "ABC", -4, 300 + xstrsub "ABC", 4, 0 + xstrsub "ABC", 5, 0 + xstrsub "ABC", 4, 1 + xstrsub "カタカナ", 1, 2 + xstrsub "カタカナ", 3, 2 + xstrsub "カタカナ", 3, 10 + xstrsub "g̈", 1, 1 + xstrsub "g̈", 1, 2 diff --git a/test/asm/strslice-strsub.err b/test/asm/strslice-strsub.err new file mode 100644 index 000000000..952db44ea --- /dev/null +++ b/test/asm/strslice-strsub.err @@ -0,0 +1,40 @@ +warning: strslice-strsub.asm(14) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Start index 4 is past the end of the string +warning: strslice-strsub.asm(15) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Index starts at 0 +warning: strslice-strsub.asm(18) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Stop index 31 is past the end of the string +warning: strslice-strsub.asm(19) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Stop index 300 is past the end of the string +warning: strslice-strsub.asm(20) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Index starts at 0 +warning: strslice-strsub.asm(20) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Stop index 300 is past the end of the string +warning: strslice-strsub.asm(22) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Start index 4 is past the end of the string +warning: strslice-strsub.asm(22) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Stop index 4 is past the end of the string +warning: strslice-strsub.asm(23) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Stop index 4 is past the end of the string +warning: strslice-strsub.asm(26) -> strslice-strsub.asm::xstrslice(2): [-Wbuiltin-args] + STRSLICE: Stop index 12 is past the end of the string +warning: strslice-strsub.asm(41) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Position starts at 1 +warning: strslice-strsub.asm(43) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Position 5 is past the end of the string +warning: strslice-strsub.asm(44) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Position starts at 1 +warning: strslice-strsub.asm(47) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Length too big: 32 +warning: strslice-strsub.asm(48) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Length too big: 300 +warning: strslice-strsub.asm(49) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Position starts at 1 +warning: strslice-strsub.asm(49) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Length too big: 300 +warning: strslice-strsub.asm(51) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Position 5 is past the end of the string +warning: strslice-strsub.asm(52) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Length too big: 1 +warning: strslice-strsub.asm(55) -> strslice-strsub.asm::xstrsub(31): [-Wbuiltin-args] + STRSUB: Length too big: 10 diff --git a/test/asm/strslice-strsub.out b/test/asm/strslice-strsub.out new file mode 100644 index 000000000..2c814f42b --- /dev/null +++ b/test/asm/strslice-strsub.out @@ -0,0 +1,48 @@ +STRSLICE("ABC",0,1): A +STRSLICE("ABC",1,2): B +STRSLICE("ABC",2,3): C +STRSLICE("ABC",-3,-2): A +STRSLICE("ABC",-2,-1): B +STRSLICE("ABC",-1,-0): +STRSLICE("ABC",-1,3): C +STRSLICE("ABC",1): B +STRSLICE("ABC",-2): B +STRSLICE("ABC",4): +STRSLICE("ABC",-4): AB +STRSLICE("ABC",0,2): AB +STRSLICE("ABC",1,3): BC +STRSLICE("ABC",1,31): BC +STRSLICE("ABC",1,300): BC +STRSLICE("ABC",-4,300): ABC +STRSLICE("ABC",3,3): +STRSLICE("ABC",4,4): +STRSLICE("ABC",3,4): +STRSLICE("カタカナ",0,2): カタ +STRSLICE("カタカナ",2,4): カナ +STRSLICE("カタカナ",2,12): カナ +STRSLICE("g̈",0,1): g +STRSLICE("g̈",0,2): g̈ +STRSUB("ABC",1,1): A +STRSUB("ABC",2,1): B +STRSUB("ABC",3,1): C +STRSUB("ABC",-3,1): A +STRSUB("ABC",-2,1): B +STRSUB("ABC",-1,1): C +STRSUB("ABC",2): BC +STRSUB("ABC",0): ABC +STRSUB("ABC",-2): BC +STRSUB("ABC",5): +STRSUB("ABC",-5): ABC +STRSUB("ABC",1,2): AB +STRSUB("ABC",2,2): BC +STRSUB("ABC",2,32): BC +STRSUB("ABC",2,300): BC +STRSUB("ABC",-4,300): ABC +STRSUB("ABC",4,0): +STRSUB("ABC",5,0): +STRSUB("ABC",4,1): +STRSUB("カタカナ",1,2): カタ +STRSUB("カタカナ",3,2): カナ +STRSUB("カタカナ",3,10): カナ +STRSUB("g̈",1,1): g +STRSUB("g̈",1,2): g̈ diff --git a/test/asm/strsub.asm b/test/asm/strsub.asm deleted file mode 100644 index 866e52311..000000000 --- a/test/asm/strsub.asm +++ /dev/null @@ -1,30 +0,0 @@ -SECTION "sec", ROM0 - -MACRO xstrsub - PRINTLN STRSUB(\#) -ENDM - - xstrsub "ABC", 1, 1 - xstrsub "ABC", 2, 1 - xstrsub "ABC", 3, 1 - xstrsub "ABC", -3, 1 - xstrsub "ABC", -2, 1 - xstrsub "ABC", -1, 1 - xstrsub "ABC", 2 - xstrsub "ABC", 0 - xstrsub "ABC", -2 - xstrsub "ABC", 5 - xstrsub "ABC", -5 - xstrsub "ABC", 1, 2 - xstrsub "ABC", 2, 2 - xstrsub "ABC", 2, 32 - xstrsub "ABC", 2, 300 - xstrsub "ABC", -4, 300 - xstrsub "ABC", 4, 0 - xstrsub "ABC", 5, 0 - xstrsub "ABC", 4, 1 - xstrsub "カタカナ", 1, 2 - xstrsub "カタカナ", 3, 2 - xstrsub "カタカナ", 3, 10 - xstrsub "g̈", 1, 1 - xstrsub "g̈", 1, 2 diff --git a/test/asm/strsub.err b/test/asm/strsub.err deleted file mode 100644 index a40b28ecf..000000000 --- a/test/asm/strsub.err +++ /dev/null @@ -1,20 +0,0 @@ -warning: strsub.asm(14) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Position starts at 1 -warning: strsub.asm(16) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Position 5 is past the end of the string -warning: strsub.asm(17) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Position starts at 1 -warning: strsub.asm(20) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Length too big: 32 -warning: strsub.asm(21) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Length too big: 300 -warning: strsub.asm(22) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Position starts at 1 -warning: strsub.asm(22) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Length too big: 300 -warning: strsub.asm(24) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Position 5 is past the end of the string -warning: strsub.asm(25) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Length too big: 1 -warning: strsub.asm(28) -> strsub.asm::xstrsub(4): [-Wbuiltin-args] - STRSUB: Length too big: 10 diff --git a/test/asm/strsub.out b/test/asm/strsub.out deleted file mode 100644 index 7270b3f87..000000000 --- a/test/asm/strsub.out +++ /dev/null @@ -1,24 +0,0 @@ -A -B -C -A -B -C -BC -ABC -BC - -ABC -AB -BC -BC -BC -ABC - - - -カタ -カナ -カナ -g -g̈