From b6c70b3c41ddf42da0ecf3801182bfe26be9d74b Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Tue, 4 Feb 2025 10:40:17 +0100 Subject: [PATCH] Disable `EQUS` expansion for raw symbols --- man/rgbasm.5 | 6 ++++- src/asm/lexer.cpp | 18 ++++++++------ src/asm/parser.y | 63 +++++++++++++++++++++++++---------------------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/man/rgbasm.5 b/man/rgbasm.5 index e8b7e291e..1589c43de 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -1104,7 +1104,9 @@ Additionally, label names can contain up to a single dot .Ql \&. , which may not be the first character. .Pp -A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash +A symbol cannot have the same name as a reserved keyword, unless it is a +.Dq raw symbol +prefixed by a hash .Sq # . For example, .Ql #load @@ -1387,6 +1389,8 @@ This expansion is disabled in a few contexts: and .Ql MACRO name will not expand string constants in their names. +Expansion is also disabled for raw string constant symbols (string constant symbols prefixed by a hash +.Sq # ) . .Bd -literal -offset indent DEF COUNTREG EQUS "[hl+]" ld a, COUNTREG diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 3027baf7d..826385c07 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -1910,17 +1910,21 @@ static Token yylex_NORMAL() { // `token` is either an `ID` or a `LOCAL_ID`, and both have a `std::string` value. assume(token.value.holds()); - // Local symbols cannot be string expansions + // Raw symbols and local symbols cannot be string expansions if (token.type == T_(ID) && lexerState->expandStrings) { // Attempt string expansion Symbol const *sym = sym_FindExactSymbol(token.value.get()); if (sym && sym->type == SYM_EQUS) { - std::shared_ptr str = sym->getEqus(); - - assume(str); - beginExpansion(str, sym->name); - continue; // Restart, reading from the new buffer + if (raw) { + token.type = T_(STR_ID); + } else { + std::shared_ptr str = sym->getEqus(); + + assume(str); + beginExpansion(str, sym->name); + continue; // Restart, reading from the new buffer + } } } @@ -1936,7 +1940,7 @@ static Token yylex_NORMAL() { // character *immediately* follows the identifier. Thus, at the beginning of a line, // "Label:" and "mac:" are treated as label definitions, but "Label :" and "mac :" // are treated as macro invocations. - if (token.type == T_(ID) && peek() == ':') { + if ((token.type == T_(ID) || token.type == T_(STR_ID)) && peek() == ':') { token.type = T_(LABEL); } diff --git a/src/asm/parser.y b/src/asm/parser.y index e6b82c5ca..1d2c00246 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -315,6 +315,7 @@ %token STRING "string" %token LABEL "label" %token ID "identifier" +%token STR_ID "string identifier" %token LOCAL_ID "local identifier" %token ANON "anonymous label" @@ -344,6 +345,7 @@ %type string %type strcat_args // Strings used for identifiers +%type sym_id %type def_id %type redef_id %type def_numeric @@ -357,6 +359,7 @@ %type redef_equs %type scoped_id %type scoped_anon_id +%type scoped_anon_id_no_str // SM83 instruction parameters %type reg_r @@ -510,10 +513,12 @@ endc: } ; +sym_id: ID | STR_ID; + def_id: OP_DEF { lexer_ToggleStringExpansion(false); - } ID { + } sym_id { lexer_ToggleStringExpansion(true); $$ = std::move($3); } @@ -522,7 +527,7 @@ def_id: redef_id: POP_REDEF { lexer_ToggleStringExpansion(false); - } ID { + } sym_id { lexer_ToggleStringExpansion(true); $$ = std::move($3); } @@ -530,26 +535,11 @@ redef_id: // LABEL covers identifiers followed by a double colon (e.g. `call Function::ret`, // to be read as `call Function :: ret`). This should not conflict with anything. -scoped_id: - ID { - $$ = std::move($1); - } - | LOCAL_ID { - $$ = std::move($1); - } - | LABEL { - $$ = std::move($1); - } -; +scoped_id: sym_id | LOCAL_ID | LABEL; -scoped_anon_id: - scoped_id { - $$ = std::move($1); - } - | ANON { - $$ = std::move($1); - } -; +scoped_anon_id_no_str: ID | LOCAL_ID | LABEL | ANON; + +scoped_anon_id: scoped_anon_id_no_str | STR_ID; label: %empty @@ -576,7 +566,7 @@ label: ; macro: - ID { + sym_id { // Parsing 'macro_args' will restore the lexer's normal mode lexer_SetMode(LEXER_RAW); } macro_args { @@ -862,7 +852,7 @@ rept: for: POP_FOR { lexer_ToggleStringExpansion(false); - } ID { + } sym_id { lexer_ToggleStringExpansion(true); } COMMA for_args NEWLINE capture_rept endofline { if ($8.span.ptr) { @@ -906,7 +896,7 @@ break: def_macro: POP_MACRO { lexer_ToggleStringExpansion(false); - } ID { + } sym_id { lexer_ToggleStringExpansion(true); } NEWLINE capture_macro endofline { if ($6.span.ptr) { @@ -1171,16 +1161,16 @@ charmap_args: ; newcharmap: - POP_NEWCHARMAP ID { + POP_NEWCHARMAP sym_id { charmap_New($2, nullptr); } - | POP_NEWCHARMAP ID COMMA ID { + | POP_NEWCHARMAP sym_id COMMA sym_id { charmap_New($2, &$4); } ; setcharmap: - POP_SETCHARMAP ID { + POP_SETCHARMAP sym_id { charmap_Set($2); } ; @@ -1192,7 +1182,7 @@ pushc: ; pushc_setcharmap: - POP_PUSHC ID { + POP_PUSHC sym_id { charmap_Push(); charmap_Set($2); } @@ -1325,7 +1315,7 @@ relocexpr: ; relocexpr_no_str: - scoped_anon_id { + scoped_anon_id_no_str { $$.makeSymbol($1); } | NUMBER { @@ -1418,7 +1408,7 @@ relocexpr_no_str: | OP_ISCONST LPAREN relocexpr RPAREN { $$.makeNumber($3.isKnown()); } - | OP_BANK LPAREN scoped_anon_id RPAREN { + | OP_BANK LPAREN scoped_anon_id_no_str RPAREN { // '@' is also an ID; it is handled here $$.makeBankSymbol($3); } @@ -1547,6 +1537,19 @@ string: STRING { $$ = std::move($1); } + | STR_ID { + if (Symbol *sym = sym_FindExactSymbol($1); !sym) { + if (sym_IsPurgedExact($1)) { + ::error("Unknown symbol \"%s\"; it was purged\n", $1.c_str()); + } else { + ::error("Unknown symbol \"%s\"\n", $1.c_str()); + } + } else if (sym->type != SYM_EQUS) { + ::error("Symbol \"%s\" is not a string symbol", $1.c_str()); + } else { + $$ = *sym->getEqus(); + } + } | OP_STRSUB LPAREN string COMMA iconst COMMA uconst RPAREN { size_t len = strlenUTF8($3, false); uint32_t pos = adjustNegativePos($5, len, "STRSUB");