From 556d232813012dde960713873185cc6d9c92fb63 Mon Sep 17 00:00:00 2001 From: Robert Einhorn Date: Mon, 10 Feb 2025 18:26:25 +0100 Subject: [PATCH] update to Python 3.13.2 --- python/python2_7_18/CSharp/AssemblyInfo.cs | 2 + python/python2_7_18/CSharp/PythonLexerBase.cs | 24 +- python/python2_7_18/Python3/README.md | 5 - .../python2_7_18/Python3/transformGrammar.py | 29 - python/python2_7_18/PythonLexer.g4 | 115 +- python/python2_7_18/PythonParser.g4 | 5 +- python/python2_7_18/README.md | 11 +- python/python2_7_18/changes.md | 3 + python/python2_7_18/changes.txt | 4 - python/python2_7_18/tests/test_empty_file.py | 0 .../test_error_first_statement_indented.py | 10 - .../tests/test_error_inconsistent_dedent.py | 10 - .../tests/test_error_not_indented.py | 8 - ...test_error_tab_and_space_in_indentation.py | 11 - .../tests/test_error_unexpected_indent.py | 9 - .../tests/test_explicit_line_joining.py | 9 - .../tests/test_formfeed_as_separator.py | 6 - .../tests/test_formfeed_at_start_of_line.py | 6 - .../tests/test_formfeed_in_indent.py | 8 - .../test_hidden_NEWLINE_before_blank_line.py | 9 - .../test_hidden_NEWLINE_before_comment.py | 11 - .../tests/test_hidden_leading_NEWLINEs.py | 7 - .../tests/test_implicit_line_joining.py | 9 - .../tests/test_insert_trailing_NEWLINE_1.py | 8 - .../tests/test_insert_trailing_NEWLINE_2.py | 10 - .../tests/test_no_trailing_NEWLINE.py | 4 - .../test_trailing_inconsistent_dedent.py | 9 - .../tests/test_trailing_indent.py | 9 - .../tests/test_trailing_unexpected_indent.py | 9 - python/python3_12/CSharp/PythonLexerBase.cs | 475 --- python/python3_12/CSharp/PythonParserBase.cs | 25 - python/python3_12/Java/PythonLexerBase.java | 397 --- python/python3_12/Java/PythonParserBase.java | 16 - .../python3_12/JavaScript/PythonLexerBase.js | 392 --- .../python3_12/JavaScript/PythonParserBase.js | 15 - python/python3_12/Python3/PythonLexerBase.py | 309 -- python/python3_12/Python3/PythonParserBase.py | 13 - python/python3_12/Python3/README.md | 5 - python/python3_12/Python3/transformGrammar.py | 29 - python/python3_12/README.md | 13 - .../python3_12/TypeScript/PythonLexerBase.ts | 392 --- .../python3_12/TypeScript/PythonParserBase.ts | 16 - python/python3_12/changes.md | 12 - python/python3_12/examples/abc.py | 188 -- python/python3_12/examples/aifc.py | 984 ------ python/python3_12/examples/antigravity.py | 17 - python/python3_12/examples/argparse.py | 2655 ----------------- python/python3_12/examples/ast.py | 1829 ------------ python/python3_12/examples/base64.py | 586 ---- python/python3_12/examples/bdb.py | 893 ------ python/python3_12/examples/bisect.py | 118 - python/python3_12/examples/bz2.py | 344 --- python/python3_12/examples/calendar.py | 798 ----- python/python3_12/examples/cgi.py | 1012 ------- python/python3_12/examples/cgitb.py | 332 --- python/python3_12/examples/chunk.py | 173 -- python/python3_12/tests/test_empty_file.py | 0 .../test_error_first_statement_indented.py | 10 - .../tests/test_error_inconsistent_dedent.py | 10 - .../tests/test_error_not_indented.py | 8 - ...test_error_tab_and_space_in_indentation.py | 11 - .../tests/test_error_unexpected_indent.py | 9 - .../tests/test_explicit_line_joining.py | 9 - .../tests/test_formfeed_as_separator.py | 6 - .../tests/test_formfeed_at_start_of_line.py | 6 - .../tests/test_formfeed_in_indent.py | 8 - .../test_hidden_NEWLINE_before_blank_line.py | 9 - .../test_hidden_NEWLINE_before_comment.py | 11 - .../tests/test_hidden_leading_NEWLINEs.py | 7 - .../tests/test_implicit_line_joining.py | 9 - .../tests/test_insert_trailing_NEWLINE_1.py | 8 - .../tests/test_insert_trailing_NEWLINE_2.py | 10 - python/python3_12/tests/test_match_case.py | 16 - .../tests/test_no_trailing_NEWLINE.py | 4 - .../test_trailing_inconsistent_dedent.py | 9 - .../python3_12/tests/test_trailing_indent.py | 9 - .../tests/test_trailing_unexpected_indent.py | 9 - python/python3_13/CSharp/AssemblyInfo.cs | 2 + python/python3_13/CSharp/PythonLexerBase.cs | 797 +++++ python/python3_13/Java/PythonLexerBase.java | 684 +++++ .../python3_13/JavaScript/PythonLexerBase.js | 676 +++++ python/python3_13/Python3/PythonLexerBase.py | 557 ++++ .../Python3_13_2_official_grammar.peg} | 31 +- .../{python3_12 => python3_13}/PythonLexer.g4 | 375 ++- .../PythonParser.g4 | 133 +- python/python3_13/README.md | 37 + .../python3_13/TypeScript/PythonLexerBase.ts | 677 +++++ python/python3_13/changes.md | 30 + python/{python3_12 => python3_13}/desc.xml | 0 .../examples/__future__.py | 0 .../examples/__hello__.py | 0 .../examples/_aix_support.py | 0 .../python3_13/examples/_android_support.py | 181 ++ .../examples/_collections_abc.py | 7 +- python/python3_13/examples/_colorize.py | 64 + .../examples/_compat_pickle.py | 1 - .../examples/_compression.py | 0 python/python3_13/examples/_ios_support.py | 71 + .../examples/_markupbase.py | 0 .../python3_13/examples/_opcode_metadata.py | 343 +++ .../examples/_osx_support.py | 5 + .../examples/_py_abc.py | 0 .../examples/_pydatetime.py | 64 +- .../examples/_pydecimal.py | 120 +- .../examples/_pyio.py | 38 +- .../examples/_pylong.py | 204 +- .../examples/_sitebuiltins.py | 0 .../examples/_strptime.py | 231 +- .../examples/_threading_local.py | 0 .../examples/_weakrefset.py | 0 python/python3_13/pom.xml | 56 + 111 files changed, 4984 insertions(+), 12976 deletions(-) create mode 100644 python/python2_7_18/CSharp/AssemblyInfo.cs delete mode 100644 python/python2_7_18/Python3/README.md delete mode 100644 python/python2_7_18/Python3/transformGrammar.py create mode 100644 python/python2_7_18/changes.md delete mode 100644 python/python2_7_18/changes.txt delete mode 100644 python/python2_7_18/tests/test_empty_file.py delete mode 100644 python/python2_7_18/tests/test_error_first_statement_indented.py delete mode 100644 python/python2_7_18/tests/test_error_inconsistent_dedent.py delete mode 100644 python/python2_7_18/tests/test_error_not_indented.py delete mode 100644 python/python2_7_18/tests/test_error_tab_and_space_in_indentation.py delete mode 100644 python/python2_7_18/tests/test_error_unexpected_indent.py delete mode 100644 python/python2_7_18/tests/test_explicit_line_joining.py delete mode 100644 python/python2_7_18/tests/test_formfeed_as_separator.py delete mode 100644 python/python2_7_18/tests/test_formfeed_at_start_of_line.py delete mode 100644 python/python2_7_18/tests/test_formfeed_in_indent.py delete mode 100644 python/python2_7_18/tests/test_hidden_NEWLINE_before_blank_line.py delete mode 100644 python/python2_7_18/tests/test_hidden_NEWLINE_before_comment.py delete mode 100644 python/python2_7_18/tests/test_hidden_leading_NEWLINEs.py delete mode 100644 python/python2_7_18/tests/test_implicit_line_joining.py delete mode 100644 python/python2_7_18/tests/test_insert_trailing_NEWLINE_1.py delete mode 100644 python/python2_7_18/tests/test_insert_trailing_NEWLINE_2.py delete mode 100644 python/python2_7_18/tests/test_no_trailing_NEWLINE.py delete mode 100644 python/python2_7_18/tests/test_trailing_inconsistent_dedent.py delete mode 100644 python/python2_7_18/tests/test_trailing_indent.py delete mode 100644 python/python2_7_18/tests/test_trailing_unexpected_indent.py delete mode 100644 python/python3_12/CSharp/PythonLexerBase.cs delete mode 100644 python/python3_12/CSharp/PythonParserBase.cs delete mode 100644 python/python3_12/Java/PythonLexerBase.java delete mode 100644 python/python3_12/Java/PythonParserBase.java delete mode 100644 python/python3_12/JavaScript/PythonLexerBase.js delete mode 100644 python/python3_12/JavaScript/PythonParserBase.js delete mode 100644 python/python3_12/Python3/PythonLexerBase.py delete mode 100644 python/python3_12/Python3/PythonParserBase.py delete mode 100644 python/python3_12/Python3/README.md delete mode 100644 python/python3_12/Python3/transformGrammar.py delete mode 100644 python/python3_12/README.md delete mode 100644 python/python3_12/TypeScript/PythonLexerBase.ts delete mode 100644 python/python3_12/TypeScript/PythonParserBase.ts delete mode 100644 python/python3_12/changes.md delete mode 100644 python/python3_12/examples/abc.py delete mode 100644 python/python3_12/examples/aifc.py delete mode 100644 python/python3_12/examples/antigravity.py delete mode 100644 python/python3_12/examples/argparse.py delete mode 100644 python/python3_12/examples/ast.py delete mode 100644 python/python3_12/examples/base64.py delete mode 100644 python/python3_12/examples/bdb.py delete mode 100644 python/python3_12/examples/bisect.py delete mode 100644 python/python3_12/examples/bz2.py delete mode 100644 python/python3_12/examples/calendar.py delete mode 100644 python/python3_12/examples/cgi.py delete mode 100644 python/python3_12/examples/cgitb.py delete mode 100644 python/python3_12/examples/chunk.py delete mode 100644 python/python3_12/tests/test_empty_file.py delete mode 100644 python/python3_12/tests/test_error_first_statement_indented.py delete mode 100644 python/python3_12/tests/test_error_inconsistent_dedent.py delete mode 100644 python/python3_12/tests/test_error_not_indented.py delete mode 100644 python/python3_12/tests/test_error_tab_and_space_in_indentation.py delete mode 100644 python/python3_12/tests/test_error_unexpected_indent.py delete mode 100644 python/python3_12/tests/test_explicit_line_joining.py delete mode 100644 python/python3_12/tests/test_formfeed_as_separator.py delete mode 100644 python/python3_12/tests/test_formfeed_at_start_of_line.py delete mode 100644 python/python3_12/tests/test_formfeed_in_indent.py delete mode 100644 python/python3_12/tests/test_hidden_NEWLINE_before_blank_line.py delete mode 100644 python/python3_12/tests/test_hidden_NEWLINE_before_comment.py delete mode 100644 python/python3_12/tests/test_hidden_leading_NEWLINEs.py delete mode 100644 python/python3_12/tests/test_implicit_line_joining.py delete mode 100644 python/python3_12/tests/test_insert_trailing_NEWLINE_1.py delete mode 100644 python/python3_12/tests/test_insert_trailing_NEWLINE_2.py delete mode 100644 python/python3_12/tests/test_match_case.py delete mode 100644 python/python3_12/tests/test_no_trailing_NEWLINE.py delete mode 100644 python/python3_12/tests/test_trailing_inconsistent_dedent.py delete mode 100644 python/python3_12/tests/test_trailing_indent.py delete mode 100644 python/python3_12/tests/test_trailing_unexpected_indent.py create mode 100644 python/python3_13/CSharp/AssemblyInfo.cs create mode 100644 python/python3_13/CSharp/PythonLexerBase.cs create mode 100644 python/python3_13/Java/PythonLexerBase.java create mode 100644 python/python3_13/JavaScript/PythonLexerBase.js create mode 100644 python/python3_13/Python3/PythonLexerBase.py rename python/{python3_12/Python3_12_6_official_grammar.peg => python3_13/Python3_13_2_official_grammar.peg} (95%) rename python/{python3_12 => python3_13}/PythonLexer.g4 (70%) rename python/{python3_12 => python3_13}/PythonParser.g4 (85%) create mode 100644 python/python3_13/README.md create mode 100644 python/python3_13/TypeScript/PythonLexerBase.ts create mode 100644 python/python3_13/changes.md rename python/{python3_12 => python3_13}/desc.xml (100%) rename python/{python3_12 => python3_13}/examples/__future__.py (100%) rename python/{python3_12 => python3_13}/examples/__hello__.py (100%) rename python/{python3_12 => python3_13}/examples/_aix_support.py (100%) create mode 100644 python/python3_13/examples/_android_support.py rename python/{python3_12 => python3_13}/examples/_collections_abc.py (99%) create mode 100644 python/python3_13/examples/_colorize.py rename python/{python3_12 => python3_13}/examples/_compat_pickle.py (99%) rename python/{python3_12 => python3_13}/examples/_compression.py (100%) create mode 100644 python/python3_13/examples/_ios_support.py rename python/{python3_12 => python3_13}/examples/_markupbase.py (100%) create mode 100644 python/python3_13/examples/_opcode_metadata.py rename python/{python3_12 => python3_13}/examples/_osx_support.py (98%) rename python/{python3_12 => python3_13}/examples/_py_abc.py (100%) rename python/{python3_12 => python3_13}/examples/_pydatetime.py (98%) rename python/{python3_12 => python3_13}/examples/_pydecimal.py (98%) rename python/{python3_12 => python3_13}/examples/_pyio.py (99%) rename python/{python3_12 => python3_13}/examples/_pylong.py (58%) rename python/{python3_12 => python3_13}/examples/_sitebuiltins.py (100%) rename python/{python3_12 => python3_13}/examples/_strptime.py (75%) rename python/{python3_12 => python3_13}/examples/_threading_local.py (100%) rename python/{python3_12 => python3_13}/examples/_weakrefset.py (100%) create mode 100644 python/python3_13/pom.xml diff --git a/python/python2_7_18/CSharp/AssemblyInfo.cs b/python/python2_7_18/CSharp/AssemblyInfo.cs new file mode 100644 index 0000000000..d2e34cef12 --- /dev/null +++ b/python/python2_7_18/CSharp/AssemblyInfo.cs @@ -0,0 +1,2 @@ +[assembly: CLSCompliant(true)] + diff --git a/python/python2_7_18/CSharp/PythonLexerBase.cs b/python/python2_7_18/CSharp/PythonLexerBase.cs index 7902984380..51371dcbb5 100644 --- a/python/python2_7_18/CSharp/PythonLexerBase.cs +++ b/python/python2_7_18/CSharp/PythonLexerBase.cs @@ -34,10 +34,10 @@ THE SOFTWARE. public abstract class PythonLexerBase : Lexer { // A stack that keeps track of the indentation lengths - private Stack indentLengthStack; + private Stack indentLengthStack = new(); // A list where tokens are waiting to be loaded into the token stream - private LinkedList pendingTokens; - + private LinkedList pendingTokens = new(); + // last pending token types private int previousPendingTokenType; private int lastPendingTokenTypeFromDefaultChannel; @@ -49,26 +49,24 @@ public abstract class PythonLexerBase : Lexer private bool wasTabIndentation; private bool wasIndentationMixedWithSpacesAndTabs; - private IToken curToken; // current (under processing) token - private IToken ffgToken; // following (look ahead) token + private IToken curToken = null!; // current (under processing) token + private IToken ffgToken = null!; // following (look ahead) token private const int INVALID_LENGTH = -1; private const string ERR_TXT = " ERROR: "; protected PythonLexerBase(ICharStream input) : base(input) { - this.Init(); } protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput) { - this.Init(); } public override IToken NextToken() // reading the input stream until a return EOF { this.CheckNextToken(); - IToken firstPendingToken = this.pendingTokens.First.Value; + IToken firstPendingToken = this.pendingTokens.First!.Value; this.pendingTokens.RemoveFirst(); return firstPendingToken; // add the queued token to the token stream } @@ -78,11 +76,11 @@ public override void Reset() this.Init(); base.Reset(); } - + private void Init() { - this.indentLengthStack = new Stack(); - this.pendingTokens = new LinkedList(); + this.indentLengthStack = new(); + this.pendingTokens = new(); this.previousPendingTokenType = 0; this.lastPendingTokenTypeFromDefaultChannel = 0; this.opened = 0; @@ -180,7 +178,7 @@ private void InsertLeadingIndentToken() { if (this.previousPendingTokenType == PythonLexer.WS) { - var prevToken = this.pendingTokens.Last.Value; + var prevToken = this.pendingTokens.Last!.Value; if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement { const string errMsg = "first statement indented"; @@ -302,7 +300,7 @@ private void HideAndAddPendingToken(IToken tkn) this.AddPendingToken(ctkn); } - private void CreateAndAddPendingToken(int ttype, int channel, string text, IToken sampleToken) + private void CreateAndAddPendingToken(int ttype, int channel, string? text, IToken sampleToken) { CommonToken ctkn = new CommonToken(sampleToken); ctkn.Type = ttype; diff --git a/python/python2_7_18/Python3/README.md b/python/python2_7_18/Python3/README.md deleted file mode 100644 index 8fa39eaa17..0000000000 --- a/python/python2_7_18/Python3/README.md +++ /dev/null @@ -1,5 +0,0 @@ - -- first run the transformGrammar.py to modify the grammar files for the Python target: -```bash - python transformGrammar.py -``` diff --git a/python/python2_7_18/Python3/transformGrammar.py b/python/python2_7_18/Python3/transformGrammar.py deleted file mode 100644 index ad336fcd12..0000000000 --- a/python/python2_7_18/Python3/transformGrammar.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys, os, re, shutil - -def main(argv): - fix("PythonLexer.g4") - fix("PythonParser.g4") - -def fix(file_path): - print("Altering " + file_path) - if not os.path.exists(file_path): - print(f"Could not find file: {file_path}") - sys.exit(1) - parts = os.path.split(file_path) - file_name = parts[-1] - shutil.move(file_path, file_path + ".bak") - input_file = open(file_path + ".bak",'r') - output_file = open(file_path, 'w') - for x in input_file: - if '!this.' in x: - x = x.replace('!this.', 'not self.') - if 'this.' in x: - x = x.replace('this.', 'self.') - output_file.write(x) - output_file.flush() - print("Writing ...") - input_file.close() - output_file.close() - -if __name__ == '__main__': - main(sys.argv) diff --git a/python/python2_7_18/PythonLexer.g4 b/python/python2_7_18/PythonLexer.g4 index 5146572012..3e74f32b2f 100644 --- a/python/python2_7_18/PythonLexer.g4 +++ b/python/python2_7_18/PythonLexer.g4 @@ -28,46 +28,17 @@ THE SOFTWARE. */ lexer grammar PythonLexer; + options { superClass=PythonLexerBase; } -tokens { INDENT, DEDENT } // https://docs.python.org/2.7/reference/lexical_analysis.html#indentation + +tokens { + INDENT, DEDENT // https://docs.python.org/2.7/reference/lexical_analysis.html#indentation +} /* * lexer rules // https://docs.python.org/2.7/library/tokenize.html */ -// https://docs.python.org/2.7/reference/lexical_analysis.html#keywords -AND : 'and'; -AS : 'as'; -ASSERT : 'assert'; -BREAK : 'break'; -CLASS : 'class'; -CONTINUE : 'continue'; -DEF : 'def'; -DEL : 'del'; -ELIF : 'elif'; -ELSE : 'else'; -EXCEPT : 'except'; -EXEC : 'exec'; -FINALLY : 'finally'; -FOR : 'for'; -FROM : 'from'; -GLOBAL : 'global'; -IF : 'if'; -IMPORT : 'import'; -IN : 'in'; -IS : 'is'; -LAMBDA : 'lambda'; -NOT : 'not'; -OR : 'or'; -PASS : 'pass'; -PRINT : 'print'; -RAISE : 'raise'; -RETURN : 'return'; -TRY : 'try'; -WHILE : 'while'; -WITH : 'with'; -YIELD : 'yield'; - // https://docs.python.org/2.7/library/token.html#token.OP LPAR : '('; // OPEN_PAREN LSQB : '['; // OPEN_BRACK @@ -115,6 +86,38 @@ DOUBLESLASH : '//'; DOUBLESLASHEQUAL : '//='; AT : '@'; +// https://docs.python.org/2.7/reference/lexical_analysis.html#keywords +AND : 'and'; +AS : 'as'; +ASSERT : 'assert'; +BREAK : 'break'; +CLASS : 'class'; +CONTINUE : 'continue'; +DEF : 'def'; +DEL : 'del'; +ELIF : 'elif'; +ELSE : 'else'; +EXCEPT : 'except'; +EXEC : 'exec'; +FINALLY : 'finally'; +FOR : 'for'; +FROM : 'from'; +GLOBAL : 'global'; +IF : 'if'; +IMPORT : 'import'; +IN : 'in'; +IS : 'is'; +LAMBDA : 'lambda'; +NOT : 'not'; +OR : 'or'; +PASS : 'pass'; +PRINT : 'print'; +RAISE : 'raise'; +RETURN : 'return'; +TRY : 'try'; +WHILE : 'while'; +WITH : 'with'; +YIELD : 'yield'; // https://docs.python.org/2.7/reference/lexical_analysis.html#identifiers NAME : IDENTIFIER; @@ -134,15 +137,16 @@ STRING : STRING_LITERAL; NEWLINE : '\r'? '\n'; // Unix, Windows // https://docs.python.org/2.7/reference/lexical_analysis.html#comments -COMMENT : '#' ~[\r\n]* -> channel(HIDDEN); +COMMENT : '#' ~[\r\n]* -> channel(HIDDEN); // https://docs.python.org/2.7/reference/lexical_analysis.html#whitespace-between-tokens -WS : [ \t\f]+ -> channel(HIDDEN); +WS : [ \t\f]+ -> channel(HIDDEN); // https://docs.python.org/2.7/reference/lexical_analysis.html#explicit-line-joining -EXPLICIT_LINE_JOINING : '\\' NEWLINE -> channel(HIDDEN); +EXPLICIT_LINE_JOINING : BACKSLASH_NEWLINE -> channel(HIDDEN); -ERRORTOKEN : . ; // catch unrecognized characters and redirect these errors to the parser +// catch the unrecognized character(s) +ERRORTOKEN : . ; // PythonLexerBase class will report an error about this (the ERRORTOKEN will also cause an error in the parser) /* @@ -153,30 +157,35 @@ ERRORTOKEN : . ; // catch unrecognized characters and redirect these errors to t // https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals fragment STRING_LITERAL : STRING_PREFIX? (SHORT_STRING | LONG_STRING); -fragment STRING_PREFIX : 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' | 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR'; + +// 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' | 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR'; +fragment STRING_PREFIX options { caseInsensitive=true; } : 'r' | 'u' | 'ur' | 'b' | 'br'; fragment SHORT_STRING - : '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\'' - | '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"' - ; + : ['] SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* ['] + | ["] SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* ["] + ; fragment LONG_STRING - : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' - | '"""' LONG_STRING_ITEM*? '"""' - ; + : ['][']['] LONG__STRING_ITEM*? ['][']['] // nongreede + | ["]["]["] LONG__STRING_ITEM*? ["]["]["] // nongreede + ; fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE : SHORT_STRING_CHAR_NO_SINGLE_QUOTE | ESCAPE_SEQ; fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE | ESCAPE_SEQ; -fragment LONG_STRING_ITEM : LONG_STRING_CHAR | ESCAPE_SEQ; +fragment LONG__STRING_ITEM : LONG_STRING_CHAR | ESCAPE_SEQ; -fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // -fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // -fragment LONG_STRING_CHAR : ~'\\'; // -fragment ESCAPE_SEQ // https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals - : '\\' '\r' '\n' // for the two-character Windows line break: \ escape sequence (string literal line continuation) - | '\\' [\u0000-\u007F] // "\" - ; +fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // +fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // +fragment LONG_STRING_CHAR : ~'\\'; // + +// https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals +fragment ESCAPE_SEQ : ESCAPE_SEQ_NEWLINE | '\\' [\u0000-\u007F]; // "\" + +fragment ESCAPE_SEQ_NEWLINE : BACKSLASH_NEWLINE; // it is a kind of line continuation for string literals (backslash and newline will be ignored) + +fragment BACKSLASH_NEWLINE : '\\' NEWLINE; // https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals fragment LONG_INTEGER : INTEGER ('l' | 'L'); diff --git a/python/python2_7_18/PythonParser.g4 b/python/python2_7_18/PythonParser.g4 index adb24d77d4..e476569486 100644 --- a/python/python2_7_18/PythonParser.g4 +++ b/python/python2_7_18/PythonParser.g4 @@ -26,8 +26,11 @@ THE SOFTWARE. * Developed by : Robert Einhorn */ -parser grammar PythonParser; // https://docs.python.org/2.7/reference/grammar.html +// https://docs.python.org/2.7/reference/grammar.html +parser grammar PythonParser; + options { tokenVocab=PythonLexer; } + // ANTLR4 grammar for Python // Start symbols for the grammar: diff --git a/python/python2_7_18/README.md b/python/python2_7_18/README.md index a8575da314..af19931676 100644 --- a/python/python2_7_18/README.md +++ b/python/python2_7_18/README.md @@ -1,13 +1,14 @@ # Python 2.7.18 parser ### About files: - - PythonParser.g4 +- PythonParser.g4 is the ANTLR4 parser grammar that based on the last official [Python 2 grammar](https://docs.python.org/2.7/reference/grammar.html) - - PythonLexerBase - handles the Python indentations - - - Example files: [Python 2.7.18 Standard Lib](https://www.python.org/downloads/release/python-2718/) +- PythonLexerBase: + - handles the Python indentations + - and manage many other things + +- Example files from: [Python 2.7.18 Standard Lib](https://www.python.org/downloads/release/python-2718/) ### Related link: [ANTLR4-parser-for-Python-2.7.18](https://github.com/RobEin/ANTLR4-parser-for-Python-2.7.18) \ No newline at end of file diff --git a/python/python2_7_18/changes.md b/python/python2_7_18/changes.md new file mode 100644 index 0000000000..540442cb10 --- /dev/null +++ b/python/python2_7_18/changes.md @@ -0,0 +1,3 @@ +# Sept. 05, 2024 +- Line continuation for string literals (backslash followed by a newline) is no longer resolved. + (backslash+newline is no longer removed from string literals) diff --git a/python/python2_7_18/changes.txt b/python/python2_7_18/changes.txt deleted file mode 100644 index e945e969ee..0000000000 --- a/python/python2_7_18/changes.txt +++ /dev/null @@ -1,4 +0,0 @@ -Szept 05, 2024 --------------- -Line continuation for string literals (backslash followed by a newline) is no longer resolved. -(backslash+newline is no longer removed from string literals) diff --git a/python/python2_7_18/tests/test_empty_file.py b/python/python2_7_18/tests/test_empty_file.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/python/python2_7_18/tests/test_error_first_statement_indented.py b/python/python2_7_18/tests/test_error_first_statement_indented.py deleted file mode 100644 index 39431ac786..0000000000 --- a/python/python2_7_18/tests/test_error_first_statement_indented.py +++ /dev/null @@ -1,10 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_first_statement_indented.py -# -# EXPECTATIONS: -# - inserted leading INDENT token -# - hidden NEWLINE tokens (channel=1) before the first statement -# - lexer error message: "line 10:3 LEXER ERROR: first statement indented" - - - i = 1 # first statement begins with space diff --git a/python/python2_7_18/tests/test_error_inconsistent_dedent.py b/python/python2_7_18/tests/test_error_inconsistent_dedent.py deleted file mode 100644 index 660f59ff65..0000000000 --- a/python/python2_7_18/tests/test_error_inconsistent_dedent.py +++ /dev/null @@ -1,10 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_inconsistent_dedent.py -# -# EXPECTATIONS: -# - inserted ERROR_TOKEN instead of the DEDENT token -# - lexer error message: "line 10:0 LEXER ERROR: inconsistent dedent" - -if True: - i = 0 - j = 0 # inconsistent dedent diff --git a/python/python2_7_18/tests/test_error_not_indented.py b/python/python2_7_18/tests/test_error_not_indented.py deleted file mode 100644 index fb6b451f94..0000000000 --- a/python/python2_7_18/tests/test_error_not_indented.py +++ /dev/null @@ -1,8 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_not_indented.py -# -# EXPECTATION: -# - parser error message: "line 8:0 missing INDENT at 'i'" - -if True: -i = 1 # no indentation diff --git a/python/python2_7_18/tests/test_error_tab_and_space_in_indentation.py b/python/python2_7_18/tests/test_error_tab_and_space_in_indentation.py deleted file mode 100644 index 7d77a9bc0e..0000000000 --- a/python/python2_7_18/tests/test_error_tab_and_space_in_indentation.py +++ /dev/null @@ -1,11 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_tab_and_space_in_indentation.py -# -# EXPECTATIONS: -# - inserted ERROR_TOKEN instead of the WS token -# - lexer error message: "line 11:0 LEXER ERROR: inconsistent use of tabs and spaces in indentation" - -if True: - i = 0 # indented by spaces -if True: - j = 0 # indented by a tab diff --git a/python/python2_7_18/tests/test_error_unexpected_indent.py b/python/python2_7_18/tests/test_error_unexpected_indent.py deleted file mode 100644 index 9fca02bf5d..0000000000 --- a/python/python2_7_18/tests/test_error_unexpected_indent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_unexpected_indent.py -# -# EXPECTATION: -# - parser error message: "line 9:7 mismatched input '' ..." - -if True: - i = 0 - j = 1 # invalid indentation diff --git a/python/python2_7_18/tests/test_explicit_line_joining.py b/python/python2_7_18/tests/test_explicit_line_joining.py deleted file mode 100644 index 55be1bd964..0000000000 --- a/python/python2_7_18/tests/test_explicit_line_joining.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_explicit_line_joining.py -# -# EXPECTATIONS: -# - hiden (channel=1) EXPLICIT_LINE_JOINING token -# - no error message - -i = 1 \ - + 2 diff --git a/python/python2_7_18/tests/test_formfeed_as_separator.py b/python/python2_7_18/tests/test_formfeed_as_separator.py deleted file mode 100644 index 31c9da82cd..0000000000 --- a/python/python2_7_18/tests/test_formfeed_as_separator.py +++ /dev/null @@ -1,6 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_formfeed_as_separator.py -# -# EXPECTATION: no error message - -import io # formfeed character as whitespace diff --git a/python/python2_7_18/tests/test_formfeed_at_start_of_line.py b/python/python2_7_18/tests/test_formfeed_at_start_of_line.py deleted file mode 100644 index 0fd599a62d..0000000000 --- a/python/python2_7_18/tests/test_formfeed_at_start_of_line.py +++ /dev/null @@ -1,6 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_formfeed_at_start_of_line.py -# -# EXPECTATION: no error message - - i = 1 # line starts with formfeed diff --git a/python/python2_7_18/tests/test_formfeed_in_indent.py b/python/python2_7_18/tests/test_formfeed_in_indent.py deleted file mode 100644 index ff12eb6bf0..0000000000 --- a/python/python2_7_18/tests/test_formfeed_in_indent.py +++ /dev/null @@ -1,8 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_formfeed_in_indent.py -# -# EXPECTATION: no error message - -if True: - i = 1 # the indentation length starts after the last formfeed - j = 1 diff --git a/python/python2_7_18/tests/test_hidden_NEWLINE_before_blank_line.py b/python/python2_7_18/tests/test_hidden_NEWLINE_before_blank_line.py deleted file mode 100644 index 2fbded528c..0000000000 --- a/python/python2_7_18/tests/test_hidden_NEWLINE_before_blank_line.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_hidden_NEWLINE_before_blank_line.py -# -# EXPECTATIONS: -# - hidden NEWLINE token (channel=1) before the blank line -# - no error message -i = 1 - -j = 1 diff --git a/python/python2_7_18/tests/test_hidden_NEWLINE_before_comment.py b/python/python2_7_18/tests/test_hidden_NEWLINE_before_comment.py deleted file mode 100644 index 9db3798954..0000000000 --- a/python/python2_7_18/tests/test_hidden_NEWLINE_before_comment.py +++ /dev/null @@ -1,11 +0,0 @@ -def inc(value): -# this is a comment (or type comment) - return value + 1 - -# COMMAND LINE: -# grun Python file_input -tokens test_hidden_NEWLINE_before_comment.py -# -# EXPECTATIONS: -# - hidden NEWLINE tokens (channel=1) before a COMMENT token -# - hidden NEWLINE token (channel=1) before the blank line -# - no error message diff --git a/python/python2_7_18/tests/test_hidden_leading_NEWLINEs.py b/python/python2_7_18/tests/test_hidden_leading_NEWLINEs.py deleted file mode 100644 index 27ec70615d..0000000000 --- a/python/python2_7_18/tests/test_hidden_leading_NEWLINEs.py +++ /dev/null @@ -1,7 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_hidden_leading_NEWLINEs.py -# -# EXPECTATIONS: -# - hidden NEWLINE tokens (channel=1) before the first statement -# - no error message -i = 1 diff --git a/python/python2_7_18/tests/test_implicit_line_joining.py b/python/python2_7_18/tests/test_implicit_line_joining.py deleted file mode 100644 index 2ce500dc49..0000000000 --- a/python/python2_7_18/tests/test_implicit_line_joining.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_implicit_line_joining.py -# -# EXPECTATIONS: -# - hidden NEWLINE token (channel=1) after the opening parenthesis -# - no error message - -print(1 - + 2) diff --git a/python/python2_7_18/tests/test_insert_trailing_NEWLINE_1.py b/python/python2_7_18/tests/test_insert_trailing_NEWLINE_1.py deleted file mode 100644 index ae3e442512..0000000000 --- a/python/python2_7_18/tests/test_insert_trailing_NEWLINE_1.py +++ /dev/null @@ -1,8 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_insert_trailing_NEWLINE_1.py -# -# EXPECTATIONS: -# - inserted trailing NEWLINE token -# - no error message - -i = 1 # there is no newline at the end of this code \ No newline at end of file diff --git a/python/python2_7_18/tests/test_insert_trailing_NEWLINE_2.py b/python/python2_7_18/tests/test_insert_trailing_NEWLINE_2.py deleted file mode 100644 index 487d9cdce2..0000000000 --- a/python/python2_7_18/tests/test_insert_trailing_NEWLINE_2.py +++ /dev/null @@ -1,10 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_insert_trailing_NEWLINE_2.py -# -# EXPECTATIONS: -# - inserted trailing NEWLINE token -# - inserted trailing DEDENT token -# - no error message - -if True: - j = 0 # there is no newline at the end of this code \ No newline at end of file diff --git a/python/python2_7_18/tests/test_no_trailing_NEWLINE.py b/python/python2_7_18/tests/test_no_trailing_NEWLINE.py deleted file mode 100644 index 9962d0e8e8..0000000000 --- a/python/python2_7_18/tests/test_no_trailing_NEWLINE.py +++ /dev/null @@ -1,4 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_no_trailing_NEWLINE.py -# -# EXPECTATION: no trailing NEWLINE token, no error message \ No newline at end of file diff --git a/python/python2_7_18/tests/test_trailing_inconsistent_dedent.py b/python/python2_7_18/tests/test_trailing_inconsistent_dedent.py deleted file mode 100644 index c517187583..0000000000 --- a/python/python2_7_18/tests/test_trailing_inconsistent_dedent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_trailing_inconsistent_dedent.py -# -# EXPECTATION: -# - no error message - -if True: - i = 0 # the last line (next line) is an inconsistent dedent - \ No newline at end of file diff --git a/python/python2_7_18/tests/test_trailing_indent.py b/python/python2_7_18/tests/test_trailing_indent.py deleted file mode 100644 index 715fd033cc..0000000000 --- a/python/python2_7_18/tests/test_trailing_indent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_trailing_indent.py -# -# EXPECTATION: -# - no error message - -if True: - j = 0 # the last line (next line) is an indent - \ No newline at end of file diff --git a/python/python2_7_18/tests/test_trailing_unexpected_indent.py b/python/python2_7_18/tests/test_trailing_unexpected_indent.py deleted file mode 100644 index 57affec3df..0000000000 --- a/python/python2_7_18/tests/test_trailing_unexpected_indent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_trailing_unexpected_indent.py -# -# EXPECTATION: -# - no error message - -if True: - j = 0 # the last line (next line) is an unexpected indent - \ No newline at end of file diff --git a/python/python3_12/CSharp/PythonLexerBase.cs b/python/python3_12/CSharp/PythonLexerBase.cs deleted file mode 100644 index f67f3a1c62..0000000000 --- a/python/python3_12/CSharp/PythonLexerBase.cs +++ /dev/null @@ -1,475 +0,0 @@ -/* -The MIT License (MIT) -Copyright (c) 2021 Robert Einhorn - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - */ - -/* - * Project : Python Indent/Dedent handler for ANTLR4 grammars - * - * Developed by : Robert Einhorn - */ - -using System; -using System.Collections.Generic; -using System.IO; -using System.Text.RegularExpressions; -using Antlr4.Runtime; - -public abstract class PythonLexerBase : Lexer -{ - // A stack that keeps track of the indentation lengths - private Stack indentLengthStack; - // A list where tokens are waiting to be loaded into the token stream - private LinkedList pendingTokens; - - // last pending token types - private int previousPendingTokenType; - private int lastPendingTokenTypeFromDefaultChannel; - - // The amount of opened parentheses, square brackets, or curly braces - private int opened; - // The amount of opened parentheses and square brackets in the current lexer mode - private Stack paren_or_bracket_openedStack; - - private bool wasSpaceIndentation; - private bool wasTabIndentation; - private bool wasIndentationMixedWithSpacesAndTabs; - - private IToken curToken; // current (under processing) token - private IToken ffgToken; // following (look ahead) token - - private const int INVALID_LENGTH = -1; - private const string ERR_TXT = " ERROR: "; - - protected PythonLexerBase(ICharStream input) : base(input) - { - this.Init(); - } - - protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput) - { - this.Init(); - } - - public override IToken NextToken() // reading the input stream until a return EOF - { - this.CheckNextToken(); - IToken firstPendingToken = this.pendingTokens.First.Value; - this.pendingTokens.RemoveFirst(); - return firstPendingToken; // add the queued token to the token stream - } - - public override void Reset() - { - this.Init(); - base.Reset(); - } - - private void Init() - { - this.indentLengthStack = new Stack(); - this.pendingTokens = new LinkedList(); - this.previousPendingTokenType = 0; - this.lastPendingTokenTypeFromDefaultChannel = 0; - this.opened = 0; - this.paren_or_bracket_openedStack = new Stack(); - this.wasSpaceIndentation = false; - this.wasTabIndentation = false; - this.wasIndentationMixedWithSpacesAndTabs = false; - this.curToken = null!; - this.ffgToken = null!; - } - - private void CheckNextToken() - { - if (this.previousPendingTokenType != TokenConstants.EOF) - { - this.SetCurrentAndFollowingTokens(); - if (this.indentLengthStack.Count == 0) // We're at the first token - { - this.HandleStartOfInput(); - } - - switch (this.curToken.Type) - { - case PythonLexer.LPAR: - case PythonLexer.LSQB: - case PythonLexer.LBRACE: - this.opened++; - this.AddPendingToken(this.curToken); - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - case PythonLexer.RBRACE: - this.opened--; - this.AddPendingToken(this.curToken); - break; - case PythonLexer.NEWLINE: - this.HandleNEWLINEtoken(); - break; - case PythonLexer.FSTRING_MIDDLE: - this.HandleFSTRING_MIDDLE_token(); - break; - case PythonLexer.ERRORTOKEN: - this.ReportLexerError("token recognition error at: '" + this.curToken.Text + "'"); - this.AddPendingToken(this.curToken); - break; - case TokenConstants.EOF: - this.HandleEOFtoken(); - break; - default: - this.AddPendingToken(this.curToken); - break; - } - this.HandleFORMAT_SPECIFICATION_MODE(); - } - } - - private void SetCurrentAndFollowingTokens() - { - this.curToken = this.ffgToken == null ? - base.NextToken() : - this.ffgToken; - - this.HandleFStringLexerModes(); - - this.ffgToken = this.curToken.Type == TokenConstants.EOF ? - this.curToken : - base.NextToken(); - } - - // initialize the _indentLengths - // hide the leading NEWLINE token(s) - // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel - // insert a leading INDENT token if necessary - private void HandleStartOfInput() - { - // initialize the stack with a default 0 indentation length - this.indentLengthStack.Push(0); // this will never be popped off - while (this.curToken.Type != TokenConstants.EOF) - { - if (this.curToken.Channel == TokenConstants.DefaultChannel) - { - if (this.curToken.Type == PythonLexer.NEWLINE) - { - // all the NEWLINE tokens must be ignored before the first statement - this.HideAndAddPendingToken(this.curToken); - } - else - { // We're at the first statement - this.InsertLeadingIndentToken(); - return; // continue the processing of the current token with CheckNextToken() - } - } - else - { - this.AddPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING, or COMMENT token - } - this.SetCurrentAndFollowingTokens(); - } // continue the processing of the EOF token with CheckNextToken() - } - - private void InsertLeadingIndentToken() - { - if (this.previousPendingTokenType == PythonLexer.WS) - { - var prevToken = this.pendingTokens.Last.Value; - if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement - { - const string errMsg = "first statement indented"; - this.ReportLexerError(errMsg); - // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser - this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.curToken); - } - } - } - - private void HandleNEWLINEtoken() - { - if (this.opened > 0) - { - // We're in an implicit line joining, ignore the current NEWLINE token - this.HideAndAddPendingToken(this.curToken); - } - else - { - IToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token - bool isLookingAhead = this.ffgToken.Type == PythonLexer.WS; - if (isLookingAhead) - { - this.SetCurrentAndFollowingTokens(); // set the next two tokens - } - - switch (this.ffgToken.Type) - { - case PythonLexer.NEWLINE: // We're before a blank line - case PythonLexer.COMMENT: // We're before a comment - this.HideAndAddPendingToken(nlToken); - if (isLookingAhead) - { - this.AddPendingToken(this.curToken); // WS token - } - break; - default: - this.AddPendingToken(nlToken); - if (isLookingAhead) - { // We're on whitespace(s) followed by a statement - int indentationLength = this.ffgToken.Type == TokenConstants.EOF ? - 0 : - this.GetIndentationLength(this.curToken.Text); - - if (indentationLength != PythonLexerBase.INVALID_LENGTH) - { - this.AddPendingToken(this.curToken); // WS token - this.InsertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) - } - else - { - this.ReportError("inconsistent use of tabs and spaces in indentation"); - } - } - else - { - // We're at a newline followed by a statement (there is no whitespace before the statement) - this.InsertIndentOrDedentToken(0); // may insert DEDENT token(s) - } - break; - } - } - } - - private void InsertIndentOrDedentToken(int indentLength) - { - int prevIndentLength = this.indentLengthStack.Peek(); - if (indentLength > prevIndentLength) - { - this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, null, this.ffgToken); - this.indentLengthStack.Push(indentLength); - } - else - { - while (indentLength < prevIndentLength) - { // more than 1 DEDENT token may be inserted into the token stream - this.indentLengthStack.Pop(); - prevIndentLength = this.indentLengthStack.Peek(); - if (indentLength <= prevIndentLength) - { - this.CreateAndAddPendingToken(PythonLexer.DEDENT, TokenConstants.DefaultChannel, null, this.ffgToken); - } - else - { - this.ReportError("inconsistent dedent"); - } - } - } - } - - private void HandleFSTRING_MIDDLE_token() // replace the double braces '{{' or '}}' to single braces and hide the second braces - { - string fsMid = this.curToken.Text; - fsMid = fsMid.Replace("{{", "{_").Replace("}}", "}_"); // replace: {{ --> {_ and }} --> }_ - Regex regex = new Regex(@"(?<=[{}])_"); - string[] arrOfStr = regex.Split(fsMid); // split by {_ or }_ - foreach (string s in arrOfStr) - { - if (!String.IsNullOrEmpty(s)) - { - this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, s, this.ffgToken); - string lastCharacter = s.Substring(s.Length - 1); - if ("{}".Contains(lastCharacter)) - { - this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.HiddenChannel, lastCharacter, this.ffgToken); - // this inserted hidden token allows to restore the original f-string literal with the double braces - } - } - } - } - - private void HandleFStringLexerModes() // https://peps.python.org/pep-0498/#specification - { - if (this.ModeStack.Count > 0) - { - switch (this.curToken.Type) - { - case PythonLexer.LBRACE: - this.PushMode(Lexer.DEFAULT_MODE); - this.paren_or_bracket_openedStack.Push(0); - break; - case PythonLexer.LPAR: - case PythonLexer.LSQB: - // https://peps.python.org/pep-0498/#lambdas-inside-expressions - this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() + 1); // increment the last element - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() - 1); // decrement the last element - break; - case PythonLexer.COLON: // colon can only come from DEFAULT_MODE - if (this.paren_or_bracket_openedStack.Peek() == 0) - { - switch (this.ModeStack.Peek()) // check the previous lexer mode (the current is DEFAULT_MODE) - { - case PythonLexer.SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.Mode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.Mode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - } - } - break; - case PythonLexer.RBRACE: - switch (CurrentMode) - { - case Lexer.DEFAULT_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.PopMode(); - this.paren_or_bracket_openedStack.Pop(); - break; - default: - this.ReportLexerError("f-string: single '}' is not allowed"); - break; - } - break; - } - } - } - - private void HandleFORMAT_SPECIFICATION_MODE() - { - if (this.ModeStack.Count > 0 && this.ffgToken.Type == PythonLexer.RBRACE) - { - switch (this.curToken.Type) - { - case PythonLexer.COLON: - case PythonLexer.RBRACE: - // insert an empty FSTRING_MIDDLE token instead of the missing format specification - this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, "", this.ffgToken); - break; - } - } - } - - private void InsertTrailingTokens() - { - switch (this.lastPendingTokenTypeFromDefaultChannel) - { - case PythonLexer.NEWLINE: - case PythonLexer.DEDENT: - break; // no trailing NEWLINE token is needed - default: - // insert an extra trailing NEWLINE token that serves as the end of the last statement - this.CreateAndAddPendingToken(PythonLexer.NEWLINE, TokenConstants.DefaultChannel, null, this.ffgToken); // ffgToken is EOF - break; - } - this.InsertIndentOrDedentToken(0); // Now insert as many trailing DEDENT tokens as needed - } - - private void HandleEOFtoken() - { - if (this.lastPendingTokenTypeFromDefaultChannel > 0) - { // there was a statement in the input (leading NEWLINE tokens are hidden) - this.InsertTrailingTokens(); - } - this.AddPendingToken(this.curToken); - } - - private void HideAndAddPendingToken(IToken tkn) - { - CommonToken ctkn = new CommonToken(tkn); - ctkn.Channel = TokenConstants.HiddenChannel; - this.AddPendingToken(ctkn); - } - - private void CreateAndAddPendingToken(int ttype, int channel, string text, IToken sampleToken) - { - CommonToken ctkn = new CommonToken(sampleToken); - ctkn.Type = ttype; - ctkn.Channel = channel; - ctkn.StopIndex = sampleToken.StartIndex - 1; - - ctkn.Text = text == null - ? "<" + Vocabulary.GetSymbolicName(ttype) + ">" - : text; - - this.AddPendingToken(ctkn); - } - - private void AddPendingToken(IToken tkn) - { - // save the last pending token type because the pendingTokens linked list can be empty by the nextToken() - this.previousPendingTokenType = tkn.Type; - if (tkn.Channel == TokenConstants.DefaultChannel) - { - this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; - } - this.pendingTokens.AddLast(tkn); - } - - private int GetIndentationLength(string indentText) // the indentText may contain spaces, tabs or form feeds - { - const int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces - int length = 0; - foreach (char ch in indentText) - { - switch (ch) - { - case ' ': - this.wasSpaceIndentation = true; - length += 1; - break; - case '\t': - this.wasTabIndentation = true; - length += TAB_LENGTH - (length % TAB_LENGTH); - break; - case '\f': // form feed - length = 0; - break; - } - } - - if (this.wasTabIndentation && this.wasSpaceIndentation) - { - if (!this.wasIndentationMixedWithSpacesAndTabs) - { - this.wasIndentationMixedWithSpacesAndTabs = true; - length = PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent - } - } - return length; - } - - private void ReportLexerError(string errMsg) - { - this.ErrorListenerDispatch.SyntaxError(this.ErrorOutput, this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null); - } - - private void ReportError(string errMsg) - { - this.ReportLexerError(errMsg); - - // the ERRORTOKEN will raise an error in the parser - this.CreateAndAddPendingToken(PythonLexer.ERRORTOKEN, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.ffgToken); - } -} diff --git a/python/python3_12/CSharp/PythonParserBase.cs b/python/python3_12/CSharp/PythonParserBase.cs deleted file mode 100644 index 51f0ee3525..0000000000 --- a/python/python3_12/CSharp/PythonParserBase.cs +++ /dev/null @@ -1,25 +0,0 @@ -using Antlr4.Runtime; -using System.IO; - -public abstract class PythonParserBase : Parser -{ - protected PythonParserBase(ITokenStream input) : base(input) - { - } - - protected PythonParserBase(ITokenStream input, TextWriter output, TextWriter errorOutput) - : base(input, output, errorOutput) - { - } - - // https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords - public bool isEqualToCurrentTokenText(string tokenText) - { - return this.CurrentToken.Text == tokenText; - } - - public bool isnotEqualToCurrentTokenText(string tokenText) - { - return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator - } -} diff --git a/python/python3_12/Java/PythonLexerBase.java b/python/python3_12/Java/PythonLexerBase.java deleted file mode 100644 index 7e4f059d7d..0000000000 --- a/python/python3_12/Java/PythonLexerBase.java +++ /dev/null @@ -1,397 +0,0 @@ -/* -The MIT License (MIT) -Copyright (c) 2021 Robert Einhorn - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - */ - -/* - * - * Project : Python Indent/Dedent handler for ANTLR4 grammars - * - * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com - * - */ - -import java.util.*; - -import org.antlr.v4.runtime.*; - -public abstract class PythonLexerBase extends Lexer { - // A stack that keeps track of the indentation lengths - private Deque indentLengthStack; - // A list where tokens are waiting to be loaded into the token stream - private LinkedList pendingTokens; - - // last pending token types - private int previousPendingTokenType; - private int lastPendingTokenTypeFromDefaultChannel; - - // The amount of opened parentheses, square brackets or curly braces - private int opened; - // The amount of opened parentheses and square brackets in the current lexer mode - private Deque paren_or_bracket_openedStack; - - private boolean wasSpaceIndentation; - private boolean wasTabIndentation; - private boolean wasIndentationMixedWithSpacesAndTabs; - - private Token curToken; // current (under processing) token - private Token ffgToken; // following (look ahead) token - - private final int INVALID_LENGTH = -1; - private final String ERR_TXT = " ERROR: "; - - protected PythonLexerBase(CharStream input) { - super(input); - this.init(); - } - - @Override - public Token nextToken() { // reading the input stream until a return EOF - this.checkNextToken(); - return this.pendingTokens.pollFirst(); // add the queued token to the token stream - } - - @Override - public void reset() { - this.init(); - super.reset(); - } - - private void init() { - this.indentLengthStack = new ArrayDeque<>(); - this.pendingTokens = new LinkedList<>(); - this.previousPendingTokenType = 0; - this.lastPendingTokenTypeFromDefaultChannel = 0; - this.opened = 0; - this.paren_or_bracket_openedStack = new ArrayDeque<>(); - this.wasSpaceIndentation = false; - this.wasTabIndentation = false; - this.wasIndentationMixedWithSpacesAndTabs = false; - this.curToken = null; - this.ffgToken = null; - } - - private void checkNextToken() { - if (this.previousPendingTokenType != Token.EOF) { - this.setCurrentAndFollowingTokens(); - if (this.indentLengthStack.isEmpty()) { // We're at the first token - this.handleStartOfInput(); - } - - switch (this.curToken.getType()) { - case PythonLexer.LPAR: - case PythonLexer.LSQB: - case PythonLexer.LBRACE: - this.opened++; - this.addPendingToken(this.curToken); - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - case PythonLexer.RBRACE: - this.opened--; - this.addPendingToken(this.curToken); - break; - case PythonLexer.NEWLINE: - this.handleNEWLINEtoken(); - break; - case PythonLexer.FSTRING_MIDDLE: - this.handleFSTRING_MIDDLE_token(); - break; - case PythonLexer.ERRORTOKEN: - this.reportLexerError("token recognition error at: '" + this.curToken.getText() + "'"); - this.addPendingToken(this.curToken); - break; - case Token.EOF: - this.handleEOFtoken(); - break; - default: - this.addPendingToken(this.curToken); - } - this.handleFORMAT_SPECIFICATION_MODE(); - } - } - - private void setCurrentAndFollowingTokens() { - this.curToken = this.ffgToken == null ? - super.nextToken() : - this.ffgToken; - - this.handleFStringLexerModes(); - - this.ffgToken = this.curToken.getType() == Token.EOF ? - this.curToken : - super.nextToken(); - } - - // initialize the indentLengthStack - // hide the leading NEWLINE token(s) - // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel - // insert a leading INDENT token if necessary - private void handleStartOfInput() { - // initialize the stack with a default 0 indentation length - this.indentLengthStack.push(0); // this will never be popped off - while (this.curToken.getType() != Token.EOF) { - if (this.curToken.getChannel() == Token.DEFAULT_CHANNEL) { - if (this.curToken.getType() == PythonLexer.NEWLINE) { - // all the NEWLINE tokens must be ignored before the first statement - this.hideAndAddPendingToken(this.curToken); - } else { // We're at the first statement - this.insertLeadingIndentToken(); - return; // continue the processing of the current token with checkNextToken() - } - } else { - this.addPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token - } - this.setCurrentAndFollowingTokens(); - } - // continue the processing of the EOF token with checkNextToken() - } - - private void insertLeadingIndentToken() { - if (this.previousPendingTokenType == PythonLexer.WS) { - Token prevToken = this.pendingTokens.peekLast(); // WS token - if (this.getIndentationLength(prevToken.getText()) != 0) { // there is an "indentation" before the first statement - final String errMsg = "first statement indented"; - this.reportLexerError(errMsg); - // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser - this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.curToken); - } - } - } - - private void handleNEWLINEtoken() { - if (this.opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token - this.hideAndAddPendingToken(this.curToken); - } else { - final Token nlToken = new CommonToken(this.curToken); // save the current NEWLINE token - final boolean isLookingAhead = this.ffgToken.getType() == PythonLexer.WS; - if (isLookingAhead) { - this.setCurrentAndFollowingTokens(); // set the next two tokens - } - - switch (this.ffgToken.getType()) { - case PythonLexer.NEWLINE: // We're before a blank line - case PythonLexer.COMMENT: // We're before a comment - this.hideAndAddPendingToken(nlToken); - if (isLookingAhead) { - this.addPendingToken(this.curToken); // WS token - } - break; - default: - this.addPendingToken(nlToken); - if (isLookingAhead) { // We're on whitespace(s) followed by a statement - final int indentationLength = this.ffgToken.getType() == Token.EOF ? - 0 : - this.getIndentationLength(this.curToken.getText()); - - if (indentationLength != this.INVALID_LENGTH) { - this.addPendingToken(this.curToken); // WS token - this.insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) - } else { - this.reportError("inconsistent use of tabs and spaces in indentation"); - } - } else { // We're at a newline followed by a statement (there is no whitespace before the statement) - this.insertIndentOrDedentToken(0); // may insert DEDENT token(s) - } - } - } - } - - private void insertIndentOrDedentToken(final int indentLength) { - int prevIndentLength = this.indentLengthStack.peek(); - if (indentLength > prevIndentLength) { - this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); - this.indentLengthStack.push(indentLength); - } else { - while (indentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream - this.indentLengthStack.pop(); - prevIndentLength = this.indentLengthStack.peek(); - if (indentLength <= prevIndentLength) { - this.createAndAddPendingToken(PythonLexer.DEDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); - } else { - this.reportError("inconsistent dedent"); - } - } - } - } - - private void handleFSTRING_MIDDLE_token() { // replace the double braces '{{' or '}}' to single braces and hide the second braces - String fsMid = this.curToken.getText(); - fsMid = fsMid.replaceAll("\\{\\{", "{_").replaceAll("}}", "}_"); // replace: {{ --> {_ and }} --> }_ - String[] arrOfStr = fsMid.split("(?<=[{}])_"); // split by {_ or }_ - for (String s : arrOfStr) { - if (!s.isEmpty()) { - this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, s, this.ffgToken); - final String lastCharacter = s.substring(s.length() - 1); - if ("{}".contains(lastCharacter)) { - this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.HIDDEN_CHANNEL, lastCharacter, this.ffgToken); - // this inserted hidden token allows to restore the original f-string literal with the double braces - } - } - } - } - - private void handleFStringLexerModes() { // https://peps.python.org/pep-0498/#specification - if (!this._modeStack.isEmpty()) { - switch (this.curToken.getType()) { - case PythonLexer.LBRACE: - this.pushMode(Lexer.DEFAULT_MODE); - this.paren_or_bracket_openedStack.push(0); - break; - case PythonLexer.LPAR: - case PythonLexer.LSQB: - // https://peps.python.org/pep-0498/#lambdas-inside-expressions - this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop() + 1); // increment the last element - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop() - 1); // decrement the last element - break; - case PythonLexer.COLON: // colon can only come from DEFAULT_MODE - if (this.paren_or_bracket_openedStack.peek() == 0) { - switch (_modeStack.peek()) { // check the previous lexer mode (the current is DEFAULT_MODE) - case PythonLexer.SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.mode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.mode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - } - } - break; - case PythonLexer.RBRACE: - switch (this._mode) { - case Lexer.DEFAULT_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.popMode(); - this.paren_or_bracket_openedStack.pop(); - break; - default: - this.reportLexerError("f-string: single '}' is not allowed"); - } - break; - } - } - } - - private void handleFORMAT_SPECIFICATION_MODE() { - if (!this._modeStack.isEmpty() && - this.ffgToken.getType() == PythonLexer.RBRACE) { - - switch (this.curToken.getType()) { - case PythonLexer.COLON: - case PythonLexer.RBRACE: - // insert an empty FSTRING_MIDDLE token instead of the missing format specification - this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken); - break; - } - } - } - - private void insertTrailingTokens() { - switch (this.lastPendingTokenTypeFromDefaultChannel) { - case PythonLexer.NEWLINE: - case PythonLexer.DEDENT: - break; // no trailing NEWLINE token is needed - default: - // insert an extra trailing NEWLINE token that serves as the end of the last statement - this.createAndAddPendingToken(PythonLexer.NEWLINE, Token.DEFAULT_CHANNEL, null, this.ffgToken); // ffgToken is EOF - } - this.insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed - } - - private void handleEOFtoken() { - if (this.lastPendingTokenTypeFromDefaultChannel > 0) { - // there was statement in the input (leading NEWLINE tokens are hidden) - this.insertTrailingTokens(); - } - this.addPendingToken(this.curToken); - } - - private void hideAndAddPendingToken(final Token tkn) { - CommonToken ctkn = new CommonToken(tkn); - ctkn.setChannel(Token.HIDDEN_CHANNEL); - this.addPendingToken(ctkn); - } - - private void createAndAddPendingToken(final int ttype, final int channel, final String text, Token sampleToken) { - CommonToken ctkn = new CommonToken(sampleToken); - ctkn.setType(ttype); - ctkn.setChannel(channel); - ctkn.setStopIndex(sampleToken.getStartIndex() - 1); - ctkn.setText(text == null - ? "<" + this.getVocabulary().getDisplayName(ttype) + ">" - : text); - - this.addPendingToken(ctkn); - } - - private void addPendingToken(final Token tkn) { - // save the last pending token type because the pendingTokens linked list can be empty by the nextToken() - this.previousPendingTokenType = tkn.getType(); - if (tkn.getChannel() == Token.DEFAULT_CHANNEL) { - this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; - } - this.pendingTokens.addLast(tkn); - } - - private int getIndentationLength(final String indentText) { // the indentText may contain spaces, tabs or form feeds - final int TAB_LENGTH = 8; // the standard number of spaces to replace a tab to spaces - int length = 0; - for (char ch : indentText.toCharArray()) { - switch (ch) { - case ' ': - this.wasSpaceIndentation = true; - length += 1; - break; - case '\t': - this.wasTabIndentation = true; - length += TAB_LENGTH - (length % TAB_LENGTH); - break; - case '\f': // form feed - length = 0; - break; - } - } - - if (this.wasTabIndentation && this.wasSpaceIndentation) { - if (!(this.wasIndentationMixedWithSpacesAndTabs)) { - this.wasIndentationMixedWithSpacesAndTabs = true; - length = this.INVALID_LENGTH; // only for the first inconsistent indent - } - } - return length; - } - - private void reportLexerError(final String errMsg) { - this.getErrorListenerDispatch().syntaxError(this, this.curToken, this.curToken.getLine(), this.curToken.getCharPositionInLine(), " LEXER" + this.ERR_TXT + errMsg, null); - } - - private void reportError(final String errMsg) { - this.reportLexerError(errMsg); - - // the ERRORTOKEN will raise an error in the parser - this.createAndAddPendingToken(PythonLexer.ERRORTOKEN, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.ffgToken); - } -} diff --git a/python/python3_12/Java/PythonParserBase.java b/python/python3_12/Java/PythonParserBase.java deleted file mode 100644 index e93674f8ef..0000000000 --- a/python/python3_12/Java/PythonParserBase.java +++ /dev/null @@ -1,16 +0,0 @@ -import org.antlr.v4.runtime.*; - -public abstract class PythonParserBase extends Parser { - protected PythonParserBase(TokenStream input) { - super(input); - } - - // https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords - public boolean isEqualToCurrentTokenText(String tokenText) { - return this.getCurrentToken().getText().equals(tokenText); - } - - public boolean isnotEqualToCurrentTokenText(String tokenText) { - return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator - } -} diff --git a/python/python3_12/JavaScript/PythonLexerBase.js b/python/python3_12/JavaScript/PythonLexerBase.js deleted file mode 100644 index 2c1ea71d47..0000000000 --- a/python/python3_12/JavaScript/PythonLexerBase.js +++ /dev/null @@ -1,392 +0,0 @@ -/* -The MIT License (MIT) -Copyright (c) 2021 Robert Einhorn - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - */ - -/* - * - * Project : Python Indent/Dedent handler for ANTLR4 grammars - * - * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com - * - */ - -import { Token, Lexer } from "antlr4"; -import PythonLexer from "./PythonLexer.js"; - -export default class PythonLexerBase extends Lexer { - constructor(input) { - super(input); - - // A stack that keeps track of the indentation lengths - this.indentLengthStack; - // A list where tokens are waiting to be loaded into the token stream - this.pendingTokens; - - // last pending token types - this.previousPendingTokenType; - this.lastPendingTokenTypeFromDefaultChannel; - - // The amount of opened parentheses, square brackets or curly braces - this.opened; - // The amount of opened parentheses and square brackets in the current lexer mode - this.paren_or_bracket_openedStack; - - this.wasSpaceIndentation; - this.wasTabIndentation; - this.wasIndentationMixedWithSpacesAndTabs; - - this.curToken; // current (under processing) token - this.ffgToken; // following (look ahead) token - - this.#init(); - } - - get #INVALID_LENGTH() { return -1; } - get #ERR_TXT() { return " ERROR: "; } - - nextToken() { // reading the input stream until a return EOF - this.#checkNextToken(); - return this.pendingTokens.shift() /* .pollFirst() */; // add the queued token to the token stream - } - - reset() { - this.#init(); - super.reset(); - } - - #init() { - this.indentLengthStack = []; - this.pendingTokens = []; - this.previousPendingTokenType = 0; - this.lastPendingTokenTypeFromDefaultChannel = 0; - this.opened = 0; - this.paren_or_bracket_openedStack = []; - this.wasSpaceIndentation = false; - this.wasTabIndentation = false; - this.wasIndentationMixedWithSpacesAndTabs = false; - this.curToken = null; - this.ffgToken = null; - } - - #checkNextToken() { - if (this.previousPendingTokenType !== Token.EOF) { - this.#setCurrentAndFollowingTokens(); - if (this.indentLengthStack.length === 0) { // We're at the first token - this.#handleStartOfInput(); - } - - switch (this.curToken.type) { - case PythonLexer.LPAR: - case PythonLexer.LSQB: - case PythonLexer.LBRACE: - this.opened++; - this.#addPendingToken(this.curToken); - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - case PythonLexer.RBRACE: - this.opened--; - this.#addPendingToken(this.curToken); - break; - case PythonLexer.NEWLINE: - this.#handleNEWLINEtoken(); - break; - case PythonLexer.FSTRING_MIDDLE: - this.#handleFSTRING_MIDDLE_token(); - break; - case PythonLexer.ERRORTOKEN: - this.#reportLexerError(`token recognition error at: '${this.curToken.text}'`); - this.#addPendingToken(this.curToken); - break; - case Token.EOF: - this.#handleEOFtoken(); - break; - default: - this.#addPendingToken(this.curToken); - } - this.#handleFORMAT_SPECIFICATION_MODE(); - } - } - - #setCurrentAndFollowingTokens() { - this.curToken = this.ffgToken == undefined ? - super.nextToken() : - this.ffgToken; - - this.#handleFStringLexerModes(); - - this.ffgToken = this.curToken.type === Token.EOF ? - this.curToken : - super.nextToken(); - } - - // initialize the _indentLengthStack - // hide the leading NEWLINE token(s) - // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel - // insert a leading INDENT token if necessary - #handleStartOfInput() { - // initialize the stack with a default 0 indentation length - this.indentLengthStack.push(0); // this will never be popped off - while (this.curToken.type !== Token.EOF) { - if (this.curToken.channel === Token.DEFAULT_CHANNEL) { - if (this.curToken.type === PythonLexer.NEWLINE) { - // all the NEWLINE tokens must be ignored before the first statement - this.#hideAndAddPendingToken(this.curToken); - } else { // We're at the first statement - this.#insertLeadingIndentToken(); - return; // continue the processing of the current token with #checkNextToken() - } - } else { - this.#addPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token - } - this.#setCurrentAndFollowingTokens(); - } // continue the processing of the EOF token with #checkNextToken() - } - - #insertLeadingIndentToken() { - if (this.previousPendingTokenType === PythonLexer.WS) { - let prevToken = this.pendingTokens.at(- 1) /* .peekLast() */; // WS token - if (this.#getIndentationLength(prevToken.text) !== 0) { // there is an "indentation" before the first statement - const errMsg = "first statement indented"; - this.#reportLexerError(errMsg); - // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser - this.#createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, this.#ERR_TXT + errMsg, this.curToken); - } - } - } - - #handleNEWLINEtoken() { - if (this.opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token - this.#hideAndAddPendingToken(this.curToken); - } else { - let nlToken = this.curToken.clone(); // save the current NEWLINE token - const isLookingAhead = this.ffgToken.type === PythonLexer.WS; - if (isLookingAhead) { - this.#setCurrentAndFollowingTokens(); // set the next two tokens - } - - switch (this.ffgToken.type) { - case PythonLexer.NEWLINE: // We're before a blank line - case PythonLexer.COMMENT: // We're before a comment - this.#hideAndAddPendingToken(nlToken); - if (isLookingAhead) { - this.#addPendingToken(this.curToken); // WS token - } - break; - default: - this.#addPendingToken(nlToken); - if (isLookingAhead) { // We're on whitespace(s) followed by a statement - const indentationLength = this.ffgToken.type === Token.EOF ? - 0 : - this.#getIndentationLength(this.curToken.text); - - if (indentationLength !== this.#INVALID_LENGTH) { - this.#addPendingToken(this.curToken); // WS token - this.#insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) - } else { - this.#reportError("inconsistent use of tabs and spaces in indentation"); - } - } else { // We're at a newline followed by a statement (there is no whitespace before the statement) - this.#insertIndentOrDedentToken(0); // may insert DEDENT token(s) - } - } - } - } - - #insertIndentOrDedentToken(curIndentLength) { - let prevIndentLength = this.indentLengthStack.at(-1) /* peek() */; - if (curIndentLength > prevIndentLength) { - this.#createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); - this.indentLengthStack.push(curIndentLength); - } else { - while (curIndentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream - this.indentLengthStack.pop(); - prevIndentLength = this.indentLengthStack.at(-1) /* peek() */; - if (curIndentLength <= prevIndentLength) { - this.#createAndAddPendingToken(PythonLexer.DEDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); - } else { - this.#reportError("inconsistent dedent"); - } - } - } - } - - #handleFSTRING_MIDDLE_token() { // replace the double braces '{{' or '}}' to single braces and hide the second braces - let fsMid = this.curToken.text; - fsMid = fsMid.replaceAll(/\{\{/g, "{_").replaceAll(/\}\}/g, "}_"); // replace: {{ --> {_ and }} --> }_ - let arrOfStr = fsMid.split(/(?<=[{}])_/); // split by {_ or }_ - for (let s of arrOfStr) { - if (s) { - this.#createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, s, this.ffgToken); - let lastCharacter = s.charAt(s.length - 1); - if ("{}".includes(lastCharacter)) { - this.#createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.HIDDEN_CHANNEL, lastCharacter, this.ffgToken); - // this inserted hidden token allows to restore the original f-string literal with the double braces - } - } - } - } - - #handleFStringLexerModes() { // https://peps.python.org/pep-0498/#specification - if (this._modeStack.length > 0) { - switch (this.curToken.type) { - case PythonLexer.LBRACE: - this.pushMode(Lexer.DEFAULT_MODE); - this.paren_or_bracket_openedStack.push(0); - break; - case PythonLexer.LPAR: - case PythonLexer.LSQB: - // https://peps.python.org/pep-0498/#lambdas-inside-expressions - this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop + 1); // increment the last element - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop - 1); // decrement the last element - break; - case PythonLexer.COLON: // colon can only come from DEFAULT_MODE - if (this.paren_or_bracket_openedStack.at(-1) /* peek() */ == 0) { - switch (this._modeStack.at(-1) /* peek() */) { // check the previous lexer mode (the current is DEFAULT_MODE) - case PythonLexer.SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.setMode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.setMode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - } - } - break; - case PythonLexer.RBRACE: - switch (this._mode) { - case Lexer.DEFAULT_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.popMode(); - this.paren_or_bracket_openedStack.pop(); - break; - default: - this.#reportLexerError("f-string: single '}' is not allowed"); - break; - } - break; - } - } - } - - #handleFORMAT_SPECIFICATION_MODE() { - if (this._modeStack.length > 0 && this.ffgToken.type === PythonLexer.RBRACE) { - switch (this.curToken.type) { - case PythonLexer.COLON: - case PythonLexer.RBRACE: - // insert an empty FSTRING_MIDDLE token instead of the missing format specification - this.#createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken); - break; - } - } - } - - #insertTrailingTokens() { - switch (this.lastPendingTokenTypeFromDefaultChannel) { - case PythonLexer.NEWLINE: - case PythonLexer.DEDENT: - break; // no trailing NEWLINE token is needed - default: - // insert an extra trailing NEWLINE token that serves as the end of the last statement - this.#createAndAddPendingToken(PythonLexer.NEWLINE, Token.DEFAULT_CHANNEL, null, this.ffgToken); // _ffgToken is EOF - } - this.#insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed - } - - #handleEOFtoken() { - if (this.lastPendingTokenTypeFromDefaultChannel > 0) { - // there was a statement in the input (leading NEWLINE tokens are hidden) - this.#insertTrailingTokens(); - } - this.#addPendingToken(this.curToken); - } - - #hideAndAddPendingToken(ctkn) { - ctkn.channel = Token.HIDDEN_CHANNEL; - this.#addPendingToken(ctkn); - } - - #createAndAddPendingToken(type, channel, text, sampleToken) { - const ctkn = sampleToken.clone(); - ctkn.type = type; - ctkn.channel = channel; - ctkn.stop = sampleToken.start - 1; - ctkn.text = text == null ? - `<${this.getSymbolicNames()[type]}>` : - text; - - this.#addPendingToken(ctkn); - } - - #addPendingToken(tkn) { - // save the last pending token type because the _pendingTokens linked list can be empty by the nextToken() - this.previousPendingTokenType = tkn.type; - if (tkn.channel === Token.DEFAULT_CHANNEL) { - this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; - } - this.pendingTokens.push(tkn) /* .addLast(token) */; - } - - #getIndentationLength(indentText) { // the indentText may contain spaces, tabs or form feeds - const TAB_LENGTH = 8; // the standard number of spaces to replace a tab to spaces - let length = 0; - for (let ch of indentText) { - switch (ch) { - case " ": - this.wasSpaceIndentation = true; - length += 1; - break; - case "\t": - this.wasTabIndentation = true; - length += TAB_LENGTH - (length % TAB_LENGTH); - break; - case "\f": // form feed - length = 0; - break; - } - } - - if (this.wasTabIndentation && this.wasSpaceIndentation) { - if (!this.wasIndentationMixedWithSpacesAndTabs) { - this.wasIndentationMixedWithSpacesAndTabs = true; - length = this.#INVALID_LENGTH; // only for the first inconsistent indent - } - } - return length; - } - - #reportLexerError(errMsg) { - this.getErrorListener().syntaxError(this, this.curToken, this.curToken.line, this.curToken.column, " LEXER" + this.#ERR_TXT + errMsg, null); - } - - #reportError(errMsg) { - this.#reportLexerError(errMsg); - - // the ERRORTOKEN will raise an error in the parser - this.#createAndAddPendingToken(PythonLexer.ERRORTOKEN, Token.DEFAULT_CHANNEL, this.#ERR_TXT + errMsg, this.ffgToken); - } -} diff --git a/python/python3_12/JavaScript/PythonParserBase.js b/python/python3_12/JavaScript/PythonParserBase.js deleted file mode 100644 index 58bc59bf86..0000000000 --- a/python/python3_12/JavaScript/PythonParserBase.js +++ /dev/null @@ -1,15 +0,0 @@ -import antlr4 from "antlr4"; - -export default class PythonParserBase extends antlr4.Parser { - constructor(input) { - super(input); - } - - isEqualToCurrentTokenText(tokenText) { - return this.getCurrentToken().text === tokenText; - } - - isnotEqualToCurrentTokenText(tokenText) { - return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator - } -} diff --git a/python/python3_12/Python3/PythonLexerBase.py b/python/python3_12/Python3/PythonLexerBase.py deleted file mode 100644 index d9a95ea764..0000000000 --- a/python/python3_12/Python3/PythonLexerBase.py +++ /dev/null @@ -1,309 +0,0 @@ -# The MIT License (MIT) -# Copyright (c) 2021 Robert Einhorn -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -# Project : Python Indent/Dedent handler for ANTLR4 grammars -# -# Developed by : Robert Einhorn - -from typing import TextIO -from antlr4 import InputStream, Lexer, Token -from antlr4.Token import CommonToken -import sys -import re - -class PythonLexerBase(Lexer): - def __init__(self, input: InputStream, output: TextIO = sys.stdout): - super().__init__(input, output) - - # A stack that keeps track of the indentation lengths - self.__indent_length_stack: list[int] - - # A list where tokens are waiting to be loaded into the token stream - self.__pending_tokens: list[CommonToken] - - # last pending token types - self.__previous_pending_token_type: int - self.__last_pending_token_type_from_default_channel: int - - # The amount of opened parentheses, square brackets or curly braces - self.__opened: int - # The amount of opened parentheses and square brackets in the current lexer mode - self.__paren_or_bracket_opened_stack: list[int] - - self.__was_space_indentation: bool - self.__was_tab_indentation: bool - self.__was_indentation_mixed_with_spaces_and_tabs: bool - - self.__cur_token: CommonToken # current (under processing) token - self.__ffg_token: CommonToken # following (look ahead) token - - self.__INVALID_LENGTH: int = -1 - self.__ERR_TXT: str = " ERROR: " - - self.__init() - - def nextToken(self) -> CommonToken: # reading the input stream until a return EOF - self.__check_next_token() - return self.__pending_tokens.pop(0) # add the queued token to the token stream - - def reset(self) -> None: - self.__init() - super().reset() - - def __init(self) -> None: - self.__indent_length_stack = [] - self.__pending_tokens = [] - self.__previous_pending_token_type = 0 - self.__last_pending_token_type_from_default_channel = 0 - self.__opened = 0 - self.__paren_or_bracket_opened_stack = [] - self.__was_space_indentation = False - self.__was_tab_indentation = False - self.__was_indentation_mixed_with_spaces_and_tabs = False - self.__cur_token = None - self.__ffg_token = None - - def __check_next_token(self) -> None: - if self.__previous_pending_token_type != Token.EOF: - self.__set_current_and_following_tokens() - if len(self.__indent_length_stack) == 0: # We're at the first token - self.__handle_start_of_input() - - match self.__cur_token.type: - case self.LPAR | self.LSQB | self.LBRACE: - self.__opened += 1 - self.__add_pending_token(self.__cur_token) - case self.RPAR | self.RSQB | self.RBRACE: - self.__opened -= 1 - self.__add_pending_token(self.__cur_token) - case self.NEWLINE: - self.__handle_NEWLINE_token() - case self.FSTRING_MIDDLE: - self.__handle_FSTRING_MIDDLE_token() - case self.ERRORTOKEN: - self.__report_lexer_error("token recognition error at: '" + self.__cur_token.text + "'") - self.__add_pending_token(self.__cur_token) - case Token.EOF: - self.__handle_EOF_token() - case other: - self.__add_pending_token(self.__cur_token) - self.__handle_FORMAT_SPECIFICATION_MODE() - - def __set_current_and_following_tokens(self) -> None: - self.__cur_token = super().nextToken() if self.__ffg_token is None else \ - self.__ffg_token - - self.__handle_fstring_lexer_modes() - - self.__ffg_token = self.__cur_token if self.__cur_token.type == Token.EOF else \ - super().nextToken() - - # initialize the _indent_length_stack - # hide the leading NEWLINE token(s) - # if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel - # insert a leading INDENT token if necessary - def __handle_start_of_input(self) -> None: - # initialize the stack with a default 0 indentation length - self.__indent_length_stack.append(0) # this will never be popped off - while self.__cur_token.type != Token.EOF: - if self.__cur_token.channel == Token.DEFAULT_CHANNEL: - if self.__cur_token.type == self.NEWLINE: - # all the NEWLINE tokens must be ignored before the first statement - self.__hide_and_add_pending_token(self.__cur_token) - else: # We're at the first statement - self.__insert_leading_indent_token() - return # continue the processing of the current token with __check_next_token() - else: - self.__add_pending_token(self.__cur_token) # it can be WS, EXPLICIT_LINE_JOINING or COMMENT token - self.__set_current_and_following_tokens() - # continue the processing of the EOF token with __check_next_token() - - def __insert_leading_indent_token(self) -> None: - if self.__previous_pending_token_type == self.WS: - prev_token: CommonToken = self.__pending_tokens[-1] # WS token - if self.__get_indentation_length(prev_token.text) != 0: # there is an "indentation" before the first statement - err_msg: str = "first statement indented" - self.__report_lexer_error(err_msg) - # insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser - self.__create_and_add_pending_token(self.INDENT, Token.DEFAULT_CHANNEL, self.__ERR_TXT + err_msg, self.__cur_token) - - def __handle_NEWLINE_token(self) -> None: - if self.__opened > 0: # We're in an implicit line joining, ignore the current NEWLINE token - self.__hide_and_add_pending_token(self.__cur_token) - else: - nl_token: CommonToken = self.__cur_token.clone() # save the current NEWLINE token - is_looking_ahead: bool = self.__ffg_token.type == self.WS - if is_looking_ahead: - self.__set_current_and_following_tokens() # set the next two tokens - - match self.__ffg_token.type: - case self.NEWLINE | self.COMMENT: - # We're before a blank line or a comment or type comment or a type ignore comment - self.__hide_and_add_pending_token(nl_token) # ignore the NEWLINE token - if is_looking_ahead: - self.__add_pending_token(self.__cur_token) # WS token - case other: - self.__add_pending_token(nl_token) - if is_looking_ahead: # We're on a whitespace(s) followed by a statement - indentation_length: int = 0 if self.__ffg_token.type == Token.EOF else \ - self.__get_indentation_length(self.__cur_token.text) - - if indentation_length != self.__INVALID_LENGTH: - self.__add_pending_token(self.__cur_token) # WS token - self.__insert_indent_or_dedent_token(indentation_length) # may insert INDENT token or DEDENT token(s) - else: - self.__report_error("inconsistent use of tabs and spaces in indentation") - else: # We're at a newline followed by a statement (there is no whitespace before the statement) - self.__insert_indent_or_dedent_token(0) # may insert DEDENT token(s) - - def __insert_indent_or_dedent_token(self, indent_length: int) -> None: - prev_indent_length: int = self.__indent_length_stack[-1] # peek() - if indent_length > prev_indent_length: - self.__create_and_add_pending_token(self.INDENT, Token.DEFAULT_CHANNEL, None, self.__ffg_token) - self.__indent_length_stack.append(indent_length) - else: - while indent_length < prev_indent_length: # more than 1 DEDENT token may be inserted to the token stream - self.__indent_length_stack.pop() - prev_indent_length = self.__indent_length_stack[-1] # peek() - if indent_length <= prev_indent_length: - self.__create_and_add_pending_token(self.DEDENT, Token.DEFAULT_CHANNEL, None, self.__ffg_token) - else: - self.__report_error("inconsistent dedent") - - def __handle_FSTRING_MIDDLE_token(self) -> None: # replace the double braces '{{' or '}}' to single braces and hide the second braces - fs_mid: str = self.__cur_token.text - fs_mid = fs_mid.replace("{{", "{_").replace("}}", "}_") # replace: {{ --> {_ and }} --> }_ - arr_of_str: list[str] = re.split(r"(?<=[{}])_", fs_mid) # split by {_ or }_ - s: str - for s in arr_of_str: - if s: - self.__create_and_add_pending_token(self.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, s, self.__ffg_token) - last_character: str = s[-1:] - if last_character in "{}": - self.__create_and_add_pending_token(self.FSTRING_MIDDLE, Token.HIDDEN_CHANNEL, last_character, self.__ffg_token) - - def __handle_fstring_lexer_modes(self) -> None: - if self._modeStack: - match self.__cur_token.type: - case self.LBRACE: - self.pushMode(Lexer.DEFAULT_MODE) - self.__paren_or_bracket_opened_stack.append(0) - case self.LPAR | self.LSQB: - # https://peps.python.org/pep-0498/#lambdas-inside-expressions - self.__paren_or_bracket_opened_stack[-1] += 1 # increment the last element (peek() + 1) - case self.RPAR | self.RSQB: - self.__paren_or_bracket_opened_stack[-1] -= 1 # decrement the last element (peek() - 1) - case self.COLON: - if self.__paren_or_bracket_opened_stack[-1] == 0: - match self._modeStack[-1]: # check the previous lexer mode (the current is DEFAULT_MODE) - case self.SINGLE_QUOTE_FSTRING_MODE \ - | self.LONG_SINGLE_QUOTE_FSTRING_MODE \ - | self.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - - self.mode(self.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE) # continue in format spec. mode - case self.DOUBLE_QUOTE_FSTRING_MODE \ - | self.LONG_DOUBLE_QUOTE_FSTRING_MODE \ - | self.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - - self.mode(self.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE) # continue in format spec. mode - case self.RBRACE: - match self._mode: - case Lexer.DEFAULT_MODE \ - | self.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE \ - | self.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - - self.popMode() - self.__paren_or_bracket_opened_stack.pop() - case other: - self.__report_lexer_error("f-string: single '}' is not allowed") - - def __handle_FORMAT_SPECIFICATION_MODE(self) -> None: - if len(self._modeStack) != 0 \ - and self.__ffg_token.type == self.RBRACE: - - match self.__cur_token.type: - case self.COLON | self.RBRACE: - # insert an empty FSTRING_MIDDLE token instead of the missing format specification - self.__create_and_add_pending_token(self.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", self.__ffg_token) - - def __insert_trailing_tokens(self) -> None: - match self.__last_pending_token_type_from_default_channel: - case self.NEWLINE | self.DEDENT: - pass # no trailing NEWLINE token is needed - case other: - # insert an extra trailing NEWLINE token that serves as the end of the last statement - self.__create_and_add_pending_token(self.NEWLINE, Token.DEFAULT_CHANNEL, None, self.__ffg_token) # _ffg_token is EOF - self.__insert_indent_or_dedent_token(0) # Now insert as much trailing DEDENT tokens as needed - - def __handle_EOF_token(self) -> None: - if self.__last_pending_token_type_from_default_channel > 0: - # there was statement in the input (leading NEWLINE tokens are hidden) - self.__insert_trailing_tokens() - self.__add_pending_token(self.__cur_token) - - def __hide_and_add_pending_token(self, ctkn: CommonToken) -> None: - ctkn.channel = Token.HIDDEN_CHANNEL - self.__add_pending_token(ctkn) - - def __create_and_add_pending_token(self, ttype: int, channel: int, text: str, sample_token: CommonToken) -> None: - ctkn: CommonToken = sample_token.clone() - ctkn.type = ttype - ctkn.channel = channel - ctkn.stop = sample_token.start - 1 - ctkn.text = "<" + self.symbolicNames[ttype] + ">" if text is None else \ - text - - self.__add_pending_token(ctkn) - - def __add_pending_token(self, ctkn: CommonToken) -> None: - # save the last pending token type because the _pending_tokens list can be empty by the nextToken() - self.__previous_pending_token_type = ctkn.type - if ctkn.channel == Token.DEFAULT_CHANNEL: - self.__last_pending_token_type_from_default_channel = self.__previous_pending_token_type - self.__pending_tokens.append(ctkn) - - def __get_indentation_length(self, indentText: str) -> int: # the indentText may contain spaces, tabs or form feeds - TAB_LENGTH: int = 8 # the standard number of spaces to replace a tab to spaces - length: int = 0 - ch: str - for ch in indentText: - match ch: - case ' ': - self.__was_space_indentation = True - length += 1 - case '\t': - self.__was_tab_indentation = True - length += TAB_LENGTH - (length % TAB_LENGTH) - case '\f': # form feed - length = 0 - - if self.__was_tab_indentation and self.__was_space_indentation: - if not self.__was_indentation_mixed_with_spaces_and_tabs: - self.__was_indentation_mixed_with_spaces_and_tabs = True - length = self.__INVALID_LENGTH # only for the first inconsistent indent - return length - - def __report_lexer_error(self, err_msg: str) -> None: - self.getErrorListenerDispatch().syntaxError(self, self.__cur_token, self.__cur_token.line, self.__cur_token.column, " LEXER" + self.__ERR_TXT + err_msg, None) - - def __report_error(self, err_msg: str) -> None: - self.__report_lexer_error(err_msg) - - # the ERRORTOKEN will raise an error in the parser - self.__create_and_add_pending_token(self.ERRORTOKEN, Token.DEFAULT_CHANNEL, self.__ERR_TXT + err_msg, self.__ffg_token) diff --git a/python/python3_12/Python3/PythonParserBase.py b/python/python3_12/Python3/PythonParserBase.py deleted file mode 100644 index 5392acea73..0000000000 --- a/python/python3_12/Python3/PythonParserBase.py +++ /dev/null @@ -1,13 +0,0 @@ -from antlr4 import InputStream, Parser -from typing import TextIO -import sys - -class PythonParserBase(Parser): - def __init__(self, input: InputStream, output: TextIO = sys.stdout): - super().__init__(input, output) - - def isEqualToCurrentTokenText(self, tokenText: str) -> bool: - return self.getCurrentToken().text == tokenText - - def isnotEqualToCurrentTokenText(self, tokenText: str) -> bool: - return not self.isEqualToCurrentTokenText(tokenText) # for compatibility with the '!' logical operator in other languages diff --git a/python/python3_12/Python3/README.md b/python/python3_12/Python3/README.md deleted file mode 100644 index 8fa39eaa17..0000000000 --- a/python/python3_12/Python3/README.md +++ /dev/null @@ -1,5 +0,0 @@ - -- first run the transformGrammar.py to modify the grammar files for the Python target: -```bash - python transformGrammar.py -``` diff --git a/python/python3_12/Python3/transformGrammar.py b/python/python3_12/Python3/transformGrammar.py deleted file mode 100644 index ad336fcd12..0000000000 --- a/python/python3_12/Python3/transformGrammar.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys, os, re, shutil - -def main(argv): - fix("PythonLexer.g4") - fix("PythonParser.g4") - -def fix(file_path): - print("Altering " + file_path) - if not os.path.exists(file_path): - print(f"Could not find file: {file_path}") - sys.exit(1) - parts = os.path.split(file_path) - file_name = parts[-1] - shutil.move(file_path, file_path + ".bak") - input_file = open(file_path + ".bak",'r') - output_file = open(file_path, 'w') - for x in input_file: - if '!this.' in x: - x = x.replace('!this.', 'not self.') - if 'this.' in x: - x = x.replace('this.', 'self.') - output_file.write(x) - output_file.flush() - print("Writing ...") - input_file.close() - output_file.close() - -if __name__ == '__main__': - main(sys.argv) diff --git a/python/python3_12/README.md b/python/python3_12/README.md deleted file mode 100644 index ffe8f60c50..0000000000 --- a/python/python3_12/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Python 3.12.6 parser - -### About files: - - PythonParser.g4 - is the ANTLR4 parser grammar that based on the official [Python PEG grammar](https://docs.python.org/3.12/reference/grammar.html) - - - PythonLexerBase - handles the Python indentations - -- Example files: [Python 3.12 Standard Lib](https://github.com/python/cpython/tree/3.12/Lib) - -### Related link: -[ANTLR4-parser-for-Python-3.12](https://github.com/RobEin/ANTLR4-parser-for-Python-3.12) \ No newline at end of file diff --git a/python/python3_12/TypeScript/PythonLexerBase.ts b/python/python3_12/TypeScript/PythonLexerBase.ts deleted file mode 100644 index ce72f1782d..0000000000 --- a/python/python3_12/TypeScript/PythonLexerBase.ts +++ /dev/null @@ -1,392 +0,0 @@ -/* -The MIT License (MIT) -Copyright (c) 2021 Robert Einhorn - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - */ - -/* - * - * Project : Python Indent/Dedent handler for ANTLR4 grammars - * - * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com - * - */ - -import { CharStream, Token, Lexer } from "antlr4"; -import PythonLexer from "./PythonLexer"; -import * as Collections from "typescript-collections"; - -export default abstract class PythonLexerBase extends Lexer { - // A stack that keeps track of the indentation lengths - private indentLengthStack!: Collections.Stack; - // A list where tokens are waiting to be loaded into the token stream - private pendingTokens!: Array; - - // last pending token types - private previousPendingTokenType!: number; - private lastPendingTokenTypeFromDefaultChannel!: number; - - // The amount of opened parentheses, square brackets or curly braces - private opened!: number; - // The amount of opened parentheses and square brackets in the current lexer mode - private paren_or_bracket_openedStack!: Collections.Stack; - - private wasSpaceIndentation!: boolean; - private wasTabIndentation!: boolean; - private wasIndentationMixedWithSpacesAndTabs!: boolean; - - private curToken: Token | undefined; // current (under processing) token - private ffgToken: Token | undefined; // following (look ahead) token - - private readonly INVALID_LENGTH: number = -1; - private readonly ERR_TXT: string = " ERROR: "; - - protected constructor(input: CharStream) { - super(input); - this.init(); - } - - public nextToken(): Token { // reading the input stream until a return EOF - this.checkNextToken(); - return this.pendingTokens.shift()! /* .pollFirst() */; // add the queued token to the token stream - } - - public reset(): void { - this.init(); - super.reset(); - } - - private init(): void { - this.indentLengthStack = new Collections.Stack(); - this.pendingTokens = []; - this.previousPendingTokenType = 0; - this.lastPendingTokenTypeFromDefaultChannel = 0; - this.opened = 0; - this.paren_or_bracket_openedStack = new Collections.Stack(); - this.wasSpaceIndentation = false; - this.wasTabIndentation = false; - this.wasIndentationMixedWithSpacesAndTabs = false; - this.curToken = undefined; - this.ffgToken = undefined; - } - - private checkNextToken(): void { - if (this.previousPendingTokenType !== PythonLexer.EOF) { - this.setCurrentAndFollowingTokens(); - if (this.indentLengthStack.isEmpty()) { // We're at the first token - this.handleStartOfInput(); - } - - switch (this.curToken!.type) { - case PythonLexer.LPAR: - case PythonLexer.LSQB: - case PythonLexer.LBRACE: - this.opened++; - this.addPendingToken(this.curToken!); - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - case PythonLexer.RBRACE: - this.opened--; - this.addPendingToken(this.curToken!); - break; - case PythonLexer.NEWLINE: - this.handleNEWLINEtoken(); - break; - case PythonLexer.FSTRING_MIDDLE: - this.handleFSTRING_MIDDLE_token(); - break; - case PythonLexer.ERRORTOKEN: - this.reportLexerError(`token recognition error at: '${this.curToken!.text}'`); - this.addPendingToken(this.curToken!); - break; - case PythonLexer.EOF: - this.handleEOFtoken(); - break; - default: - this.addPendingToken(this.curToken!); - } - this.handleFORMAT_SPECIFICATION_MODE(); - } - } - - private setCurrentAndFollowingTokens(): void { - this.curToken = this.ffgToken == undefined - ? super.nextToken() - : this.ffgToken; - - this.handleFStringLexerModes(); - - this.ffgToken = this.curToken.type === PythonLexer.EOF - ? this.curToken - : super.nextToken(); - } - - // initialize the indentLengthStack - // hide the leading NEWLINE token(s) - // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel - // insert a leading INDENT token if necessary - private handleStartOfInput(): void { - // initialize the stack with a default 0 indentation length - this.indentLengthStack.push(0); // this will never be popped off - while (this.curToken!.type !== PythonLexer.EOF) { - if (this.curToken!.channel === Token.DEFAULT_CHANNEL) { - if (this.curToken!.type === PythonLexer.NEWLINE) { - // all the NEWLINE tokens must be ignored before the first statement - this.hideAndAddPendingToken(this.curToken!); - } else { // We're at the first statement - this.insertLeadingIndentToken(); - return; // continue the processing of the current token with checkNextToken() - } - } else { - this.addPendingToken(this.curToken!); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token - } - this.setCurrentAndFollowingTokens(); - } // continue the processing of the EOF token with checkNextToken() - } - - private insertLeadingIndentToken(): void { - if (this.previousPendingTokenType === PythonLexer.WS) { - const prevToken: Token = this.pendingTokens[this.pendingTokens.length - 1] /* .peekLast() */; // WS token - if (this.getIndentationLength(prevToken.text) !== 0) { // there is an "indentation" before the first statement - const errMsg: string = "first statement indented"; - this.reportLexerError(errMsg); - // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser - this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.curToken!); - } - } - } - - private handleNEWLINEtoken(): void { - if (this.opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token - this.hideAndAddPendingToken(this.curToken!); - } else { - const nlToken: Token = this.curToken?.clone()!; // save the current NEWLINE token - const isLookingAhead: boolean = this.ffgToken!.type === PythonLexer.WS; - if (isLookingAhead) { - this.setCurrentAndFollowingTokens(); // set the next two tokens - } - - switch (this.ffgToken!.type) { - case PythonLexer.NEWLINE: // We're before a blank line - case PythonLexer.COMMENT: // We're before a comment - this.hideAndAddPendingToken(nlToken); - if (isLookingAhead) { - this.addPendingToken(this.curToken!); // WS token - } - break; - default: - this.addPendingToken(nlToken); - if (isLookingAhead) { // We're on whitespace(s) followed by a statement - const indentationLength: number = this.ffgToken!.type === PythonLexer.EOF ? - 0 : - this.getIndentationLength(this.curToken!.text); - - if (indentationLength !== this.INVALID_LENGTH) { - this.addPendingToken(this.curToken!); // WS token - this.insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) - } else { - this.reportError("inconsistent use of tabs and spaces in indentation"); - } - } else { // We're at a newline followed by a statement (there is no whitespace before the statement) - this.insertIndentOrDedentToken(0); // may insert DEDENT token(s) - } - } - } - } - - private insertIndentOrDedentToken(indentLength: number): void { - let prevIndentLength: number = this.indentLengthStack.peek()!; - if (indentLength > prevIndentLength) { - this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken!); - this.indentLengthStack.push(indentLength); - } else { - while (indentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream - this.indentLengthStack.pop(); - prevIndentLength = this.indentLengthStack.peek()!; - if (indentLength <= prevIndentLength) { - this.createAndAddPendingToken(PythonLexer.DEDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken!); - } else { - this.reportError("inconsistent dedent"); - } - } - } - } - - private handleFSTRING_MIDDLE_token(): void { // replace the double braces '{{' or '}}' to single braces and hide the second braces - let fsMid: string = this.curToken!.text; - fsMid = fsMid.replace(/\{\{/g, "{_").replace(/\}\}/g, "}_"); // replace: {{ --> {_ and }} --> }_ - const arrOfStr: string[] = fsMid.split(/(?<=[{}])_/); // split by {_ or }_ - for (let s of arrOfStr) { - if (s) { - this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, s, this.ffgToken!); - const lastCharacter: string = s.charAt(s.length - 1); - if ("{}".includes(lastCharacter)) { - this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.HIDDEN_CHANNEL, lastCharacter, this.ffgToken!); - // this inserted hidden token allows to restore the original f-string literal with the double braces - } - } - } - } - - private handleFStringLexerModes(): void { // https://peps.python.org/pep-0498/#specification - if (this.getModeStack().length > 0) { - switch (this.curToken!.type) { - case PythonLexer.LBRACE: - this.pushMode(Lexer.DEFAULT_MODE); - this.paren_or_bracket_openedStack.push(0); - break; - case PythonLexer.LPAR: - case PythonLexer.LSQB: - // https://peps.python.org/pep-0498/#lambdas-inside-expressions - this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop()! + 1); // increment the last element - break; - case PythonLexer.RPAR: - case PythonLexer.RSQB: - this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop()! - 1); // decrement the last element - break; - case PythonLexer.COLON: // colon can only come from DEFAULT_MODE - if (this.paren_or_bracket_openedStack.peek() == 0) { - switch (this.getModeStack().at(-1) /* peek() */) { // check the previous lexer mode (the current is DEFAULT_MODE) - case PythonLexer.SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_SINGLE_QUOTE_FSTRING_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.setMode(PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - case PythonLexer.DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.LONG_DOUBLE_QUOTE_FSTRING_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.setMode(PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode - break; - } - } - break; - case PythonLexer.RBRACE: - switch (this.getMode()) { - case Lexer.DEFAULT_MODE: - case PythonLexer.SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE: - case PythonLexer.DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE: - this.popMode(); - this.paren_or_bracket_openedStack.pop(); - break; - default: - this.reportLexerError("f-string: single '}' is not allowed"); - break; - } - break; - } - } - } - - private handleFORMAT_SPECIFICATION_MODE(): void { - if (this.getModeStack().length > 0 && this.ffgToken!.type === PythonLexer.RBRACE) { - switch (this.curToken!.type) { - case PythonLexer.COLON: - case PythonLexer.RBRACE: - // insert an empty FSTRING_MIDDLE token instead of the missing format specification - this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken!); - break; - } - } - } - - private insertTrailingTokens(): void { - switch (this.lastPendingTokenTypeFromDefaultChannel) { - case PythonLexer.NEWLINE: - case PythonLexer.DEDENT: - break; // no trailing NEWLINE token is needed - default: - // insert an extra trailing NEWLINE token that serves as the end of the last statement - this.createAndAddPendingToken(PythonLexer.NEWLINE, Token.DEFAULT_CHANNEL, null, this.ffgToken!); // ffgToken is EOF - } - this.insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed - } - - private handleEOFtoken(): void { - if (this.lastPendingTokenTypeFromDefaultChannel > 0) { - // there was a statement in the input (leading NEWLINE tokens are hidden) - this.insertTrailingTokens(); - } - this.addPendingToken(this.curToken!); - } - - private hideAndAddPendingToken(tkn: Token): void { - tkn.channel = Token.HIDDEN_CHANNEL; - this.addPendingToken(tkn); - } - - private createAndAddPendingToken(type: number, channel: number, text: string | null, sampleToken: Token): void { - const tkn: Token = sampleToken.clone(); - tkn.type = type; - tkn.channel = channel; - tkn.stop = sampleToken.start - 1; - tkn.text = text == null ? - `<${this.getSymbolicNames()[type]}>` : - text; - - this.addPendingToken(tkn); - } - - private addPendingToken(tkn: Token): void { - // save the last pending token type because the pendingTokens linked list can be empty by the nextToken() - this.previousPendingTokenType = tkn.type; - if (tkn.channel === Token.DEFAULT_CHANNEL) { - this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; - } - this.pendingTokens.push(tkn) /* .addLast(token) */; - } - - private getIndentationLength(indentText: string): number { // the indentText may contain spaces, tabs or form feeds - const TAB_LENGTH: number = 8; // the standard number of spaces to replace a tab to spaces - let length: number = 0; - for (let ch of indentText) { - switch (ch) { - case " ": - this.wasSpaceIndentation = true; - length += 1; - break; - case "\t": - this.wasTabIndentation = true; - length += TAB_LENGTH - (length % TAB_LENGTH); - break; - case "\f": // form feed - length = 0; - break; - } - } - - if (this.wasTabIndentation && this.wasSpaceIndentation) { - if (!this.wasIndentationMixedWithSpacesAndTabs) { - this.wasIndentationMixedWithSpacesAndTabs = true; - length = this.INVALID_LENGTH; // only for the first inconsistent indent - } - } - return length; - } - - private reportLexerError(errMsg: string): void { - this.getErrorListener().syntaxError(this, 0 /* this.curToken */, this.curToken!.line, this.curToken!.column, " LEXER" + this.ERR_TXT + errMsg, undefined); - } - - private reportError(errMsg: string): void { - this.reportLexerError(errMsg); - - // the ERRORTOKEN will raise an error in the parser - this.createAndAddPendingToken(PythonLexer.ERRORTOKEN, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.ffgToken!); - } -} diff --git a/python/python3_12/TypeScript/PythonParserBase.ts b/python/python3_12/TypeScript/PythonParserBase.ts deleted file mode 100644 index a21b3117af..0000000000 --- a/python/python3_12/TypeScript/PythonParserBase.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { Parser, TokenStream } from "antlr4"; -//import antlr4 from "antlr4"; - -export default class PythonParserBase extends Parser { - constructor(input: TokenStream) { - super(input); - } - - isEqualToCurrentTokenText(tokenText: string): boolean { - return this.getCurrentToken().text === tokenText; - } - - isnotEqualToCurrentTokenText(tokenText: string): boolean { - return !this.isEqualToCurrentTokenText(tokenText); // for compatibility with the Python 'not' logical operator - } -} diff --git a/python/python3_12/changes.md b/python/python3_12/changes.md deleted file mode 100644 index 06e58418b8..0000000000 --- a/python/python3_12/changes.md +++ /dev/null @@ -1,12 +0,0 @@ -Szept 05, 2024 --------------- -Type comment tokens are no longer generated. -Type comments will now be tokenized as plain comment tokens. - -Line continuation for string literals (backslash followed by a newline) is no longer resolved. -(backslash+newline is no longer removed from string literals) - ---- -Oct. 18, 2024 ---- -Fix that `case [a, *_] if a == 0:` throws error `rule soft_kw__not__wildcard failed predicate: {this.isnotEqualToCurrentTokenText("_")}?` \ No newline at end of file diff --git a/python/python3_12/examples/abc.py b/python/python3_12/examples/abc.py deleted file mode 100644 index f8a4e11ce9..0000000000 --- a/python/python3_12/examples/abc.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Abstract Base Classes (ABCs) according to PEP 3119.""" - - -def abstractmethod(funcobj): - """A decorator indicating abstract methods. - - Requires that the metaclass is ABCMeta or derived from it. A - class that has a metaclass derived from ABCMeta cannot be - instantiated unless all of its abstract methods are overridden. - The abstract methods can be called using any of the normal - 'super' call mechanisms. abstractmethod() may be used to declare - abstract methods for properties and descriptors. - - Usage: - - class C(metaclass=ABCMeta): - @abstractmethod - def my_abstract_method(self, arg1, arg2, argN): - ... - """ - funcobj.__isabstractmethod__ = True - return funcobj - - -class abstractclassmethod(classmethod): - """A decorator indicating abstract classmethods. - - Deprecated, use 'classmethod' with 'abstractmethod' instead: - - class C(ABC): - @classmethod - @abstractmethod - def my_abstract_classmethod(cls, ...): - ... - - """ - - __isabstractmethod__ = True - - def __init__(self, callable): - callable.__isabstractmethod__ = True - super().__init__(callable) - - -class abstractstaticmethod(staticmethod): - """A decorator indicating abstract staticmethods. - - Deprecated, use 'staticmethod' with 'abstractmethod' instead: - - class C(ABC): - @staticmethod - @abstractmethod - def my_abstract_staticmethod(...): - ... - - """ - - __isabstractmethod__ = True - - def __init__(self, callable): - callable.__isabstractmethod__ = True - super().__init__(callable) - - -class abstractproperty(property): - """A decorator indicating abstract properties. - - Deprecated, use 'property' with 'abstractmethod' instead: - - class C(ABC): - @property - @abstractmethod - def my_abstract_property(self): - ... - - """ - - __isabstractmethod__ = True - - -try: - from _abc import (get_cache_token, _abc_init, _abc_register, - _abc_instancecheck, _abc_subclasscheck, _get_dump, - _reset_registry, _reset_caches) -except ImportError: - from _py_abc import ABCMeta, get_cache_token - ABCMeta.__module__ = 'abc' -else: - class ABCMeta(type): - """Metaclass for defining Abstract Base Classes (ABCs). - - Use this metaclass to create an ABC. An ABC can be subclassed - directly, and then acts as a mix-in class. You can also register - unrelated concrete classes (even built-in classes) and unrelated - ABCs as 'virtual subclasses' -- these and their descendants will - be considered subclasses of the registering ABC by the built-in - issubclass() function, but the registering ABC won't show up in - their MRO (Method Resolution Order) nor will method - implementations defined by the registering ABC be callable (not - even via super()). - """ - def __new__(mcls, name, bases, namespace, /, **kwargs): - cls = super().__new__(mcls, name, bases, namespace, **kwargs) - _abc_init(cls) - return cls - - def register(cls, subclass): - """Register a virtual subclass of an ABC. - - Returns the subclass, to allow usage as a class decorator. - """ - return _abc_register(cls, subclass) - - def __instancecheck__(cls, instance): - """Override for isinstance(instance, cls).""" - return _abc_instancecheck(cls, instance) - - def __subclasscheck__(cls, subclass): - """Override for issubclass(subclass, cls).""" - return _abc_subclasscheck(cls, subclass) - - def _dump_registry(cls, file=None): - """Debug helper to print the ABC registry.""" - print(f"Class: {cls.__module__}.{cls.__qualname__}", file=file) - print(f"Inv. counter: {get_cache_token()}", file=file) - (_abc_registry, _abc_cache, _abc_negative_cache, - _abc_negative_cache_version) = _get_dump(cls) - print(f"_abc_registry: {_abc_registry!r}", file=file) - print(f"_abc_cache: {_abc_cache!r}", file=file) - print(f"_abc_negative_cache: {_abc_negative_cache!r}", file=file) - print(f"_abc_negative_cache_version: {_abc_negative_cache_version!r}", - file=file) - - def _abc_registry_clear(cls): - """Clear the registry (for debugging or testing).""" - _reset_registry(cls) - - def _abc_caches_clear(cls): - """Clear the caches (for debugging or testing).""" - _reset_caches(cls) - - -def update_abstractmethods(cls): - """Recalculate the set of abstract methods of an abstract class. - - If a class has had one of its abstract methods implemented after the - class was created, the method will not be considered implemented until - this function is called. Alternatively, if a new abstract method has been - added to the class, it will only be considered an abstract method of the - class after this function is called. - - This function should be called before any use is made of the class, - usually in class decorators that add methods to the subject class. - - Returns cls, to allow usage as a class decorator. - - If cls is not an instance of ABCMeta, does nothing. - """ - if not hasattr(cls, '__abstractmethods__'): - # We check for __abstractmethods__ here because cls might by a C - # implementation or a python implementation (especially during - # testing), and we want to handle both cases. - return cls - - abstracts = set() - # Check the existing abstract methods of the parents, keep only the ones - # that are not implemented. - for scls in cls.__bases__: - for name in getattr(scls, '__abstractmethods__', ()): - value = getattr(cls, name, None) - if getattr(value, "__isabstractmethod__", False): - abstracts.add(name) - # Also add any other newly added abstract methods. - for name, value in cls.__dict__.items(): - if getattr(value, "__isabstractmethod__", False): - abstracts.add(name) - cls.__abstractmethods__ = frozenset(abstracts) - return cls - - -class ABC(metaclass=ABCMeta): - """Helper class that provides a standard way to create an ABC using - inheritance. - """ - __slots__ = () diff --git a/python/python3_12/examples/aifc.py b/python/python3_12/examples/aifc.py deleted file mode 100644 index 5254987e22..0000000000 --- a/python/python3_12/examples/aifc.py +++ /dev/null @@ -1,984 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import builtins -import warnings - -__all__ = ["Error", "open"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140 # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = b'' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(b'\x00') - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = int(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = int(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - from chunk import Chunk -from collections import namedtuple - -_aifc_params = namedtuple('_aifc_params', - 'nchannels sampwidth framerate nframes comptype compname') - -_aifc_params.nchannels.__doc__ = 'Number of audio channels (1 for mono, 2 for stereo)' -_aifc_params.sampwidth.__doc__ = 'Sample width in bytes' -_aifc_params.framerate.__doc__ = 'Sampling frequency' -_aifc_params.nframes.__doc__ = 'Number of audio frames' -_aifc_params.comptype.__doc__ = 'Compression type ("NONE" for AIFF files)' -_aifc_params.compname.__doc__ = ("""\ -A human-readable version of the compression type -('not compressed' for AIFF files)""") - - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - _file = None # Set here since __del__ checks it - - def initfp(self, file): - self._version = 0 - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != b'FORM': - raise Error('file does not start with FORM id') - formdata = chunk.read(4) - if formdata == b'AIFF': - self._aifc = 0 - elif formdata == b'AIFC': - self._aifc = 1 - else: - raise Error('not an AIFF or AIFF-C file') - self._comm_chunk_read = 0 - self._ssnd_chunk = None - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == b'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == b'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == b'FVER': - self._version = _read_ulong(chunk) - elif chunkname == b'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error('COMM chunk and/or SSND chunk missing') - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'rb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - else: - # assume it is an open file object already - self.initfp(f) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - file = self._file - if file is not None: - self._file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return _aifc_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error('position not in range') - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return b'' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels - * self._sampwidth) - return data - - # - # Internal methods. - # - - def _alaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.alaw2lin(data, 2) - - def _ulaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) - return data - - def _sowt2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - if self._sampwidth <= 0: - raise Error('bad sample width') - if self._nchannels <= 0: - raise Error('bad # of channels') - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - warnings.warn('Warning: bad COMM chunk size') - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != b'NONE': - if self._comptype == b'G722': - self._convert = self._adpcm2lin - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._ulaw2lin - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._alaw2lin - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._sowt2lin - else: - raise Error('unsupported compression type') - self._sampwidth = 2 - else: - self._comptype = b'NONE' - self._compname = b'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % - (len(self._markers), '' if len(self._markers) == 1 else 's', - nmarkers)) - warnings.warn(w) - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - _file = None # Set here since __del__ checks it - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'wb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - - # treat .aiff file extensions as non-compressed audio - if f.endswith('.aiff'): - self._aifc = 0 - else: - # assume it is an open file object already - self.initfp(f) - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = b'NONE' - self._compname = b'not compressed' - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels < 1: - raise Error('bad # of channels') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth < 1 or sampwidth > 4: - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error('sample width not set') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if framerate <= 0: - raise Error('bad frame rate') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error('not all parameters set') - return _aifc_params(self._nchannels, self._sampwidth, self._framerate, - self._nframes, self._comptype, self._compname) - - def setmark(self, id, pos, name): - if id <= 0: - raise Error('marker ID must be > 0') - if pos < 0: - raise Error('marker position must be >= 0') - if not isinstance(name, bytes): - raise Error('marker name must be bytes') - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(b'\x00') - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _lin2alaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2alaw(data, 2) - - def _lin2ulaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) - return data - - def _lin2sowt(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in (b'ULAW', b'ulaw', - b'ALAW', b'alaw', b'G722', - b'sowt', b'SOWT'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error('sample width must be 2 when compressing ' - 'with ulaw/ULAW, alaw/ALAW, sowt/SOWT ' - 'or G7.22 (ADPCM)') - if not self._nchannels: - raise Error('# channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('sampling rate not specified') - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == b'G722': - self._convert = self._lin2adpcm - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._lin2ulaw - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._lin2alaw - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._lin2sowt - - def _write_header(self, initlength): - if self._aifc and self._comptype != b'NONE': - self._init_compression() - self._file.write(b'FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in (b'ulaw', b'ULAW', b'alaw', b'ALAW'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == b'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write(b'AIFC') - self._file.write(b'FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write(b'AIFF') - self._file.write(b'COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write(b'SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(b'\x00') - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write(b'MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - with open(fn, 'r') as f: - print("Reading", fn) - print("nchannels =", f.getnchannels()) - print("nframes =", f.getnframes()) - print("sampwidth =", f.getsampwidth()) - print("framerate =", f.getframerate()) - print("comptype =", f.getcomptype()) - print("compname =", f.getcompname()) - if sys.argv[2:]: - gn = sys.argv[2] - print("Writing", gn) - with open(gn, 'w') as g: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - print("Done.") diff --git a/python/python3_12/examples/antigravity.py b/python/python3_12/examples/antigravity.py deleted file mode 100644 index 6dc5207335..0000000000 --- a/python/python3_12/examples/antigravity.py +++ /dev/null @@ -1,17 +0,0 @@ - -import webbrowser -import hashlib - -webbrowser.open("https://xkcd.com/353/") - -def geohash(latitude, longitude, datedow): - '''Compute geohash() using the Munroe algorithm. - - >>> geohash(37.421542, -122.085589, b'2005-05-26-10458.68') - 37.857713 -122.544543 - - ''' - # https://xkcd.com/426/ - h = hashlib.md5(datedow, usedforsecurity=False).hexdigest() - p, q = [('%f' % float.fromhex('0.' + x)) for x in (h[:16], h[16:32])] - print('%d%s %d%s' % (latitude, p[1:], longitude, q[1:])) diff --git a/python/python3_12/examples/argparse.py b/python/python3_12/examples/argparse.py deleted file mode 100644 index 543d9944f9..0000000000 --- a/python/python3_12/examples/argparse.py +++ /dev/null @@ -1,2655 +0,0 @@ -# Author: Steven J. Bethard . -# New maintainer as of 29 August 2019: Raymond Hettinger - -"""Command-line parsing library - -This module is an optparse-inspired command-line parsing library that: - - - handles both optional and positional arguments - - produces highly informative usage messages - - supports parsers that dispatch to sub-parsers - -The following is a simple usage example that sums integers from the -command-line and writes the result to a file:: - - parser = argparse.ArgumentParser( - description='sum the integers at the command line') - parser.add_argument( - 'integers', metavar='int', nargs='+', type=int, - help='an integer to be summed') - parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), - help='the file where the sum should be written') - args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() - -The module contains the following public classes: - - - ArgumentParser -- The main entry point for command-line parsing. As the - example above shows, the add_argument() method is used to populate - the parser with actions for optional and positional arguments. Then - the parse_args() method is invoked to convert the args at the - command-line into an object with attributes. - - - ArgumentError -- The exception raised by ArgumentParser objects when - there are errors with the parser's actions. Errors raised while - parsing the command-line are caught by ArgumentParser and emitted - as command-line messages. - - - FileType -- A factory for defining types of files to be created. As the - example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. - - - Action -- The base class for parser actions. Typically actions are - selected by passing strings like 'store_true' or 'append_const' to - the action= argument of add_argument(). However, for greater - customization of ArgumentParser actions, subclasses of Action may - be defined and passed as the action= argument. - - - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, - ArgumentDefaultsHelpFormatter -- Formatter classes which - may be passed as the formatter_class= argument to the - ArgumentParser constructor. HelpFormatter is the default, - RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser - not to change the formatting for help text, and - ArgumentDefaultsHelpFormatter adds information about argument defaults - to the help. - -All other classes in this module are considered implementation details. -(Also note that HelpFormatter and RawDescriptionHelpFormatter are only -considered public as object names -- the API of the formatter objects is -still considered an implementation detail.) -""" - -__version__ = '1.1' -__all__ = [ - 'ArgumentParser', - 'ArgumentError', - 'ArgumentTypeError', - 'BooleanOptionalAction', - 'FileType', - 'HelpFormatter', - 'ArgumentDefaultsHelpFormatter', - 'RawDescriptionHelpFormatter', - 'RawTextHelpFormatter', - 'MetavarTypeHelpFormatter', - 'Namespace', - 'Action', - 'ONE_OR_MORE', - 'OPTIONAL', - 'PARSER', - 'REMAINDER', - 'SUPPRESS', - 'ZERO_OR_MORE', -] - - -import os as _os -import re as _re -import sys as _sys - -import warnings - -from gettext import gettext as _, ngettext - -SUPPRESS = '==SUPPRESS==' - -OPTIONAL = '?' -ZERO_OR_MORE = '*' -ONE_OR_MORE = '+' -PARSER = 'A...' -REMAINDER = '...' -_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' - -# ============================= -# Utility functions and classes -# ============================= - -class _AttributeHolder(object): - """Abstract base class that provides __repr__. - - The __repr__ method returns a string in the format:: - ClassName(attr=name, attr=name, ...) - The attributes are determined either by a class-level attribute, - '_kwarg_names', or by inspecting the instance __dict__. - """ - - def __repr__(self): - type_name = type(self).__name__ - arg_strings = [] - star_args = {} - for arg in self._get_args(): - arg_strings.append(repr(arg)) - for name, value in self._get_kwargs(): - if name.isidentifier(): - arg_strings.append('%s=%r' % (name, value)) - else: - star_args[name] = value - if star_args: - arg_strings.append('**%s' % repr(star_args)) - return '%s(%s)' % (type_name, ', '.join(arg_strings)) - - def _get_kwargs(self): - return list(self.__dict__.items()) - - def _get_args(self): - return [] - - -def _copy_items(items): - if items is None: - return [] - # The copy module is used only in the 'append' and 'append_const' - # actions, and it is needed only when the default value isn't a list. - # Delay its import for speeding up the common case. - if type(items) is list: - return items[:] - import copy - return copy.copy(items) - - -# =============== -# Formatting Help -# =============== - - -class HelpFormatter(object): - """Formatter for generating usage messages and argument help strings. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - - # default setting for width - if width is None: - import shutil - width = shutil.get_terminal_size().columns - width -= 2 - - self._prog = prog - self._indent_increment = indent_increment - self._max_help_position = min(max_help_position, - max(width - 20, indent_increment * 2)) - self._width = width - - self._current_indent = 0 - self._level = 0 - self._action_max_length = 0 - - self._root_section = self._Section(self, None) - self._current_section = self._root_section - - self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) - self._long_break_matcher = _re.compile(r'\n\n\n+') - - # =============================== - # Section and indentation methods - # =============================== - def _indent(self): - self._current_indent += self._indent_increment - self._level += 1 - - def _dedent(self): - self._current_indent -= self._indent_increment - assert self._current_indent >= 0, 'Indent decreased below 0.' - self._level -= 1 - - class _Section(object): - - def __init__(self, formatter, parent, heading=None): - self.formatter = formatter - self.parent = parent - self.heading = heading - self.items = [] - - def format_help(self): - # format the indented section - if self.parent is not None: - self.formatter._indent() - join = self.formatter._join_parts - item_help = join([func(*args) for func, args in self.items]) - if self.parent is not None: - self.formatter._dedent() - - # return nothing if the section was empty - if not item_help: - return '' - - # add the heading if the section was non-empty - if self.heading is not SUPPRESS and self.heading is not None: - current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) - else: - heading = '' - - # join the section-initial newline, the heading and the help - return join(['\n', heading, item_help, '\n']) - - def _add_item(self, func, args): - self._current_section.items.append((func, args)) - - # ======================== - # Message building methods - # ======================== - def start_section(self, heading): - self._indent() - section = self._Section(self, self._current_section, heading) - self._add_item(section.format_help, []) - self._current_section = section - - def end_section(self): - self._current_section = self._current_section.parent - self._dedent() - - def add_text(self, text): - if text is not SUPPRESS and text is not None: - self._add_item(self._format_text, [text]) - - def add_usage(self, usage, actions, groups, prefix=None): - if usage is not SUPPRESS: - args = usage, actions, groups, prefix - self._add_item(self._format_usage, args) - - def add_argument(self, action): - if action.help is not SUPPRESS: - - # find all invocations - get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] - for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) - - # update the maximum item length - invocation_length = max(map(len, invocations)) - action_length = invocation_length + self._current_indent - self._action_max_length = max(self._action_max_length, - action_length) - - # add the item to the list - self._add_item(self._format_action, [action]) - - def add_arguments(self, actions): - for action in actions: - self.add_argument(action) - - # ======================= - # Help-formatting methods - # ======================= - def format_help(self): - help = self._root_section.format_help() - if help: - help = self._long_break_matcher.sub('\n\n', help) - help = help.strip('\n') + '\n' - return help - - def _join_parts(self, part_strings): - return ''.join([part - for part in part_strings - if part and part is not SUPPRESS]) - - def _format_usage(self, usage, actions, groups, prefix): - if prefix is None: - prefix = _('usage: ') - - # if usage is specified, use that - if usage is not None: - usage = usage % dict(prog=self._prog) - - # if no optionals or positionals are available, usage is just prog - elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) - - # if optionals and positionals are available, calculate usage - elif usage is None: - prog = '%(prog)s' % dict(prog=self._prog) - - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - - # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) - - # wrap the usage parts if it's too long - text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: - - # break usage into wrappable parts - part_regexp = ( - r'\(.*?\)+(?=\s|$)|' - r'\[.*?\]+(?=\s|$)|' - r'\S+' - ) - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage - - # helper for wrapping lines - def get_lines(parts, indent, prefix=None): - lines = [] - line = [] - indent_length = len(indent) - if prefix is not None: - line_len = len(prefix) - 1 - else: - line_len = indent_length - 1 - for part in parts: - if line_len + 1 + len(part) > text_width and line: - lines.append(indent + ' '.join(line)) - line = [] - line_len = indent_length - 1 - line.append(part) - line_len += len(part) + 1 - if line: - lines.append(indent + ' '.join(line)) - if prefix is not None: - lines[0] = lines[0][indent_length:] - return lines - - # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) - if opt_parts: - lines = get_lines([prog] + opt_parts, indent, prefix) - lines.extend(get_lines(pos_parts, indent)) - elif pos_parts: - lines = get_lines([prog] + pos_parts, indent, prefix) - else: - lines = [prog] - - # if prog is long, put it on its own line - else: - indent = ' ' * len(prefix) - parts = opt_parts + pos_parts - lines = get_lines(parts, indent) - if len(lines) > 1: - lines = [] - lines.extend(get_lines(opt_parts, indent)) - lines.extend(get_lines(pos_parts, indent)) - lines = [prog] + lines - - # join lines into usage - usage = '\n'.join(lines) - - # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) - - def _format_actions_usage(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - if not group._group_actions: - raise ValueError(f'empty group {group}') - - try: - start = actions.index(group._group_actions[0]) - except ValueError: - continue - else: - group_action_count = len(group._group_actions) - end = start + group_action_count - if actions[start:end] == group._group_actions: - - suppressed_actions_count = 0 - for action in group._group_actions: - group_actions.add(action) - if action.help is SUPPRESS: - suppressed_actions_count += 1 - - exposed_actions_count = group_action_count - suppressed_actions_count - - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - if end in inserts: - inserts[end] += ']' - else: - inserts[end] = ']' - elif exposed_actions_count > 1: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - if end in inserts: - inserts[end] += ')' - else: - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' - - # collect all actions format strings - parts = [] - for i, action in enumerate(actions): - - # suppressed arguments are marked with None - # remove | separators for suppressed arguments - if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) - - # produce all arg strings - elif not action.option_strings: - default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # add the action string to the list - parts.append(part) - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] - - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = action.format_usage() - - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) - - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part - - # add the action string to the list - parts.append(part) - - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) - - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = text.strip() - - # return the text - return text - - def _format_text(self, text): - if '%(prog)' in text: - text = text % dict(prog=self._prog) - text_width = max(self._width - self._current_indent, 11) - indent = ' ' * self._current_indent - return self._fill_text(text, text_width, indent) + '\n\n' - - def _format_action(self, action): - # determine the required width and the entry label - help_position = min(self._action_max_length + 2, - self._max_help_position) - help_width = max(self._width - help_position, 11) - action_width = help_position - self._current_indent - 2 - action_header = self._format_action_invocation(action) - - # no help; start on same line and add a final newline - if not action.help: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - - # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header - action_header = '%*s%-*s ' % tup - indent_first = 0 - - # long action name; start on the next line - else: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - indent_first = help_position - - # collect the pieces of the action help - parts = [action_header] - - # if there was help for the action, add lines of help text - if action.help and action.help.strip(): - help_text = self._expand_help(action) - if help_text: - help_lines = self._split_lines(help_text, help_width) - parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) - for line in help_lines[1:]: - parts.append('%*s%s\n' % (help_position, '', line)) - - # or add a newline if the description doesn't end with one - elif not action_header.endswith('\n'): - parts.append('\n') - - # if there are any sub-actions, add their help as well - for subaction in self._iter_indented_subactions(action): - parts.append(self._format_action(subaction)) - - # return a single string - return self._join_parts(parts) - - def _format_action_invocation(self, action): - if not action.option_strings: - default = self._get_default_metavar_for_positional(action) - metavar, = self._metavar_formatter(action, default)(1) - return metavar - - else: - parts = [] - - # if the Optional doesn't take a value, format is: - # -s, --long - if action.nargs == 0: - parts.extend(action.option_strings) - - # if the Optional takes a value, format is: - # -s ARGS, --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) - - def _metavar_formatter(self, action, default_metavar): - if action.metavar is not None: - result = action.metavar - elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) - else: - result = default_metavar - - def format(tuple_size): - if isinstance(result, tuple): - return result - else: - return (result, ) * tuple_size - return format - - def _format_args(self, action, default_metavar): - get_metavar = self._metavar_formatter(action, default_metavar) - if action.nargs is None: - result = '%s' % get_metavar(1) - elif action.nargs == OPTIONAL: - result = '[%s]' % get_metavar(1) - elif action.nargs == ZERO_OR_MORE: - metavar = get_metavar(1) - if len(metavar) == 2: - result = '[%s [%s ...]]' % metavar - else: - result = '[%s ...]' % metavar - elif action.nargs == ONE_OR_MORE: - result = '%s [%s ...]' % get_metavar(2) - elif action.nargs == REMAINDER: - result = '...' - elif action.nargs == PARSER: - result = '%s ...' % get_metavar(1) - elif action.nargs == SUPPRESS: - result = '' - else: - try: - formats = ['%s' for _ in range(action.nargs)] - except TypeError: - raise ValueError("invalid nargs value") from None - result = ' '.join(formats) % get_metavar(action.nargs) - return result - - def _expand_help(self, action): - params = dict(vars(action), prog=self._prog) - for name in list(params): - if params[name] is SUPPRESS: - del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ - if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str - return self._get_help_string(action) % params - - def _iter_indented_subactions(self, action): - try: - get_subactions = action._get_subactions - except AttributeError: - pass - else: - self._indent() - yield from get_subactions() - self._dedent() - - def _split_lines(self, text, width): - text = self._whitespace_matcher.sub(' ', text).strip() - # The textwrap module is used only for formatting help. - # Delay its import for speeding up the common usage of argparse. - import textwrap - return textwrap.wrap(text, width) - - def _fill_text(self, text, width, indent): - text = self._whitespace_matcher.sub(' ', text).strip() - import textwrap - return textwrap.fill(text, width, - initial_indent=indent, - subsequent_indent=indent) - - def _get_help_string(self, action): - return action.help - - def _get_default_metavar_for_optional(self, action): - return action.dest.upper() - - def _get_default_metavar_for_positional(self, action): - return action.dest - - -class RawDescriptionHelpFormatter(HelpFormatter): - """Help message formatter which retains any formatting in descriptions. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _fill_text(self, text, width, indent): - return ''.join(indent + line for line in text.splitlines(keepends=True)) - - -class RawTextHelpFormatter(RawDescriptionHelpFormatter): - """Help message formatter which retains formatting of all help text. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _split_lines(self, text, width): - return text.splitlines() - - -class ArgumentDefaultsHelpFormatter(HelpFormatter): - """Help message formatter which adds default values to argument help. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _get_help_string(self, action): - """ - Add the default value to the option help message. - - ArgumentDefaultsHelpFormatter and BooleanOptionalAction when it isn't - already present. This code will do that, detecting cornercases to - prevent duplicates or cases where it wouldn't make sense to the end - user. - """ - help = action.help - if help is None: - help = '' - - if '%(default)' not in help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' - return help - - - -class MetavarTypeHelpFormatter(HelpFormatter): - """Help message formatter which uses the argument 'type' as the default - metavar value (instead of the argument 'dest') - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _get_default_metavar_for_optional(self, action): - return action.type.__name__ - - def _get_default_metavar_for_positional(self, action): - return action.type.__name__ - - -# ===================== -# Options and Arguments -# ===================== - -def _get_action_name(argument): - if argument is None: - return None - elif argument.option_strings: - return '/'.join(argument.option_strings) - elif argument.metavar not in (None, SUPPRESS): - return argument.metavar - elif argument.dest not in (None, SUPPRESS): - return argument.dest - elif argument.choices: - return '{' + ','.join(argument.choices) + '}' - else: - return None - - -class ArgumentError(Exception): - """An error from creating or using an argument (optional or positional). - - The string value of this exception is the message, augmented with - information about the argument that caused it. - """ - - def __init__(self, argument, message): - self.argument_name = _get_action_name(argument) - self.message = message - - def __str__(self): - if self.argument_name is None: - format = '%(message)s' - else: - format = _('argument %(argument_name)s: %(message)s') - return format % dict(message=self.message, - argument_name=self.argument_name) - - -class ArgumentTypeError(Exception): - """An error from trying to convert a command line string to a type.""" - pass - - -# ============== -# Action classes -# ============== - -class Action(_AttributeHolder): - """Information about how to convert command line strings to Python objects. - - Action objects are used by an ArgumentParser to represent the information - needed to parse a single argument from one or more strings from the - command line. The keyword arguments to the Action constructor are also - all attributes of Action instances. - - Keyword Arguments: - - - option_strings -- A list of command-line option strings which - should be associated with this action. - - - dest -- The name of the attribute to hold the created object(s) - - - nargs -- The number of command-line arguments that should be - consumed. By default, one argument will be consumed and a single - value will be produced. Other values include: - - N (an integer) consumes N arguments (and produces a list) - - '?' consumes zero or one arguments - - '*' consumes zero or more arguments (and produces a list) - - '+' consumes one or more arguments (and produces a list) - Note that the difference between the default and nargs=1 is that - with the default, a single value will be produced, while with - nargs=1, a list containing a single value will be produced. - - - const -- The value to be produced if the option is specified and the - option uses an action that takes no values. - - - default -- The value to be produced if the option is not specified. - - - type -- A callable that accepts a single string argument, and - returns the converted value. The standard Python types str, int, - float, and complex are useful examples of such callables. If None, - str is used. - - - choices -- A container of values that should be allowed. If not None, - after a command-line argument has been converted to the appropriate - type, an exception will be raised if it is not a member of this - collection. - - - required -- True if the action must always be specified at the - command line. This is only meaningful for optional command-line - arguments. - - - help -- The help string describing the argument. - - - metavar -- The name to be used for the option's argument with the - help string. If None, the 'dest' value will be used as the name. - """ - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - self.option_strings = option_strings - self.dest = dest - self.nargs = nargs - self.const = const - self.default = default - self.type = type - self.choices = choices - self.required = required - self.help = help - self.metavar = metavar - - def _get_kwargs(self): - names = [ - 'option_strings', - 'dest', - 'nargs', - 'const', - 'default', - 'type', - 'choices', - 'required', - 'help', - 'metavar', - ] - return [(name, getattr(self, name)) for name in names] - - def format_usage(self): - return self.option_strings[0] - - def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) - - -# FIXME: remove together with `BooleanOptionalAction` deprecated arguments. -_deprecated_default = object() - -class BooleanOptionalAction(Action): - def __init__(self, - option_strings, - dest, - default=None, - type=_deprecated_default, - choices=_deprecated_default, - required=False, - help=None, - metavar=_deprecated_default): - - _option_strings = [] - for option_string in option_strings: - _option_strings.append(option_string) - - if option_string.startswith('--'): - option_string = '--no-' + option_string[2:] - _option_strings.append(option_string) - - # We need `_deprecated` special value to ban explicit arguments that - # match default value. Like: - # parser.add_argument('-f', action=BooleanOptionalAction, type=int) - for field_name in ('type', 'choices', 'metavar'): - if locals()[field_name] is not _deprecated_default: - warnings._deprecated( - field_name, - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - if type is _deprecated_default: - type = None - if choices is _deprecated_default: - choices = None - if metavar is _deprecated_default: - metavar = None - - super().__init__( - option_strings=_option_strings, - dest=dest, - nargs=0, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - - def __call__(self, parser, namespace, values, option_string=None): - if option_string in self.option_strings: - setattr(namespace, self.dest, not option_string.startswith('--no-')) - - def format_usage(self): - return ' | '.join(self.option_strings) - - -class _StoreAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for store actions must be != 0; if you ' - 'have nothing to store, actions such as store ' - 'true or store const may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_StoreAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, values) - - -class _StoreConstAction(Action): - - def __init__(self, - option_strings, - dest, - const=None, - default=None, - required=False, - help=None, - metavar=None): - super(_StoreConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self.const) - - -class _StoreTrueAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=False, - required=False, - help=None): - super(_StoreTrueAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=True, - default=default, - required=required, - help=help) - - -class _StoreFalseAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=True, - required=False, - help=None): - super(_StoreFalseAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=False, - default=default, - required=required, - help=help) - - -class _AppendAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for append actions must be != 0; if arg ' - 'strings are not supplying the value to append, ' - 'the append const action may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_AppendAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = getattr(namespace, self.dest, None) - items = _copy_items(items) - items.append(values) - setattr(namespace, self.dest, items) - - -class _AppendConstAction(Action): - - def __init__(self, - option_strings, - dest, - const=None, - default=None, - required=False, - help=None, - metavar=None): - super(_AppendConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = getattr(namespace, self.dest, None) - items = _copy_items(items) - items.append(self.const) - setattr(namespace, self.dest, items) - - -class _CountAction(Action): - - def __init__(self, - option_strings, - dest, - default=None, - required=False, - help=None): - super(_CountAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - count = getattr(namespace, self.dest, None) - if count is None: - count = 0 - setattr(namespace, self.dest, count + 1) - - -class _HelpAction(Action): - - def __init__(self, - option_strings, - dest=SUPPRESS, - default=SUPPRESS, - help=None): - super(_HelpAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - parser.print_help() - parser.exit() - - -class _VersionAction(Action): - - def __init__(self, - option_strings, - version=None, - dest=SUPPRESS, - default=SUPPRESS, - help="show program's version number and exit"): - super(_VersionAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - self.version = version - - def __call__(self, parser, namespace, values, option_string=None): - version = self.version - if version is None: - version = parser.version - formatter = parser._get_formatter() - formatter.add_text(version) - parser._print_message(formatter.format_help(), _sys.stdout) - parser.exit() - - -class _SubParsersAction(Action): - - class _ChoicesPseudoAction(Action): - - def __init__(self, name, aliases, help): - metavar = dest = name - if aliases: - metavar += ' (%s)' % ', '.join(aliases) - sup = super(_SubParsersAction._ChoicesPseudoAction, self) - sup.__init__(option_strings=[], dest=dest, help=help, - metavar=metavar) - - def __init__(self, - option_strings, - prog, - parser_class, - dest=SUPPRESS, - required=False, - help=None, - metavar=None): - - self._prog_prefix = prog - self._parser_class = parser_class - self._name_parser_map = {} - self._choices_actions = [] - - super(_SubParsersAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=PARSER, - choices=self._name_parser_map, - required=required, - help=help, - metavar=metavar) - - def add_parser(self, name, **kwargs): - # set prog from the existing prefix - if kwargs.get('prog') is None: - kwargs['prog'] = '%s %s' % (self._prog_prefix, name) - - aliases = kwargs.pop('aliases', ()) - - if name in self._name_parser_map: - raise ArgumentError(self, _('conflicting subparser: %s') % name) - for alias in aliases: - if alias in self._name_parser_map: - raise ArgumentError( - self, _('conflicting subparser alias: %s') % alias) - - # create a pseudo-action to hold the choice help - if 'help' in kwargs: - help = kwargs.pop('help') - choice_action = self._ChoicesPseudoAction(name, aliases, help) - self._choices_actions.append(choice_action) - - # create the parser and add it to the map - parser = self._parser_class(**kwargs) - self._name_parser_map[name] = parser - - # make parser available under aliases also - for alias in aliases: - self._name_parser_map[alias] = parser - - return parser - - def _get_subactions(self): - return self._choices_actions - - def __call__(self, parser, namespace, values, option_string=None): - parser_name = values[0] - arg_strings = values[1:] - - # set the parser name if requested - if self.dest is not SUPPRESS: - setattr(namespace, self.dest, parser_name) - - # select the parser - try: - parser = self._name_parser_map[parser_name] - except KeyError: - args = {'parser_name': parser_name, - 'choices': ', '.join(self._name_parser_map)} - msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args - raise ArgumentError(self, msg) - - # parse all the remaining options into the namespace - # store any unrecognized options on the object, so that the top - # level parser can decide what to do with them - - # In case this subparser defines new defaults, we parse them - # in a new namespace object and then update the original - # namespace for the relevant parts. - subnamespace, arg_strings = parser.parse_known_args(arg_strings, None) - for key, value in vars(subnamespace).items(): - setattr(namespace, key, value) - - if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) - getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) - -class _ExtendAction(_AppendAction): - def __call__(self, parser, namespace, values, option_string=None): - items = getattr(namespace, self.dest, None) - items = _copy_items(items) - items.extend(values) - setattr(namespace, self.dest, items) - -# ============== -# Type classes -# ============== - -class FileType(object): - """Factory for creating file object types - - Instances of FileType are typically passed as type= arguments to the - ArgumentParser add_argument() method. - - Keyword Arguments: - - mode -- A string indicating how the file is to be opened. Accepts the - same values as the builtin open() function. - - bufsize -- The file's desired buffer size. Accepts the same values as - the builtin open() function. - - encoding -- The file's encoding. Accepts the same values as the - builtin open() function. - - errors -- A string indicating how encoding and decoding errors are to - be handled. Accepts the same value as the builtin open() function. - """ - - def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): - self._mode = mode - self._bufsize = bufsize - self._encoding = encoding - self._errors = errors - - def __call__(self, string): - # the special argument "-" means sys.std{in,out} - if string == '-': - if 'r' in self._mode: - return _sys.stdin.buffer if 'b' in self._mode else _sys.stdin - elif any(c in self._mode for c in 'wax'): - return _sys.stdout.buffer if 'b' in self._mode else _sys.stdout - else: - msg = _('argument "-" with mode %r') % self._mode - raise ValueError(msg) - - # all other arguments are used as file names - try: - return open(string, self._mode, self._bufsize, self._encoding, - self._errors) - except OSError as e: - args = {'filename': string, 'error': e} - message = _("can't open '%(filename)s': %(error)s") - raise ArgumentTypeError(message % args) - - def __repr__(self): - args = self._mode, self._bufsize - kwargs = [('encoding', self._encoding), ('errors', self._errors)] - args_str = ', '.join([repr(arg) for arg in args if arg != -1] + - ['%s=%r' % (kw, arg) for kw, arg in kwargs - if arg is not None]) - return '%s(%s)' % (type(self).__name__, args_str) - -# =========================== -# Optional and Positional Parsing -# =========================== - -class Namespace(_AttributeHolder): - """Simple object for storing attributes. - - Implements equality by attribute names and values, and provides a simple - string representation. - """ - - def __init__(self, **kwargs): - for name in kwargs: - setattr(self, name, kwargs[name]) - - def __eq__(self, other): - if not isinstance(other, Namespace): - return NotImplemented - return vars(self) == vars(other) - - def __contains__(self, key): - return key in self.__dict__ - - -class _ActionsContainer(object): - - def __init__(self, - description, - prefix_chars, - argument_default, - conflict_handler): - super(_ActionsContainer, self).__init__() - - self.description = description - self.argument_default = argument_default - self.prefix_chars = prefix_chars - self.conflict_handler = conflict_handler - - # set up registries - self._registries = {} - - # register actions - self.register('action', None, _StoreAction) - self.register('action', 'store', _StoreAction) - self.register('action', 'store_const', _StoreConstAction) - self.register('action', 'store_true', _StoreTrueAction) - self.register('action', 'store_false', _StoreFalseAction) - self.register('action', 'append', _AppendAction) - self.register('action', 'append_const', _AppendConstAction) - self.register('action', 'count', _CountAction) - self.register('action', 'help', _HelpAction) - self.register('action', 'version', _VersionAction) - self.register('action', 'parsers', _SubParsersAction) - self.register('action', 'extend', _ExtendAction) - - # raise an exception if the conflict handler is invalid - self._get_handler() - - # action storage - self._actions = [] - self._option_string_actions = {} - - # groups - self._action_groups = [] - self._mutually_exclusive_groups = [] - - # defaults storage - self._defaults = {} - - # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') - - # whether or not there are any optionals that look like negative - # numbers -- uses a list so it can be shared and edited - self._has_negative_number_optionals = [] - - # ==================== - # Registration methods - # ==================== - def register(self, registry_name, value, object): - registry = self._registries.setdefault(registry_name, {}) - registry[value] = object - - def _registry_get(self, registry_name, value, default=None): - return self._registries[registry_name].get(value, default) - - # ================================== - # Namespace default accessor methods - # ================================== - def set_defaults(self, **kwargs): - self._defaults.update(kwargs) - - # if these defaults match any existing arguments, replace - # the previous default on the object with the new one - for action in self._actions: - if action.dest in kwargs: - action.default = kwargs[action.dest] - - def get_default(self, dest): - for action in self._actions: - if action.dest == dest and action.default is not None: - return action.default - return self._defaults.get(dest, None) - - - # ======================= - # Adding argument actions - # ======================= - def add_argument(self, *args, **kwargs): - """ - add_argument(dest, ..., name=value, ...) - add_argument(option_string, option_string, ..., name=value, ...) - """ - - # if no positional args are supplied or only one is supplied and - # it doesn't look like an option string, parse a positional - # argument - chars = self.prefix_chars - if not args or len(args) == 1 and args[0][0] not in chars: - if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') - kwargs = self._get_positional_kwargs(*args, **kwargs) - - # otherwise, we're adding an optional argument - else: - kwargs = self._get_optional_kwargs(*args, **kwargs) - - # if no default was supplied, use the parser-level default - if 'default' not in kwargs: - dest = kwargs['dest'] - if dest in self._defaults: - kwargs['default'] = self._defaults[dest] - elif self.argument_default is not None: - kwargs['default'] = self.argument_default - - # create the action object, and add it to the parser - action_class = self._pop_action_class(kwargs) - if not callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) - action = action_class(**kwargs) - - # raise an error if the action type is not callable - type_func = self._registry_get('type', action.type, action.type) - if not callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) - - if type_func is FileType: - raise ValueError('%r is a FileType class object, instance of it' - ' must be passed' % (type_func,)) - - # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): - try: - self._get_formatter()._format_args(action, None) - except TypeError: - raise ValueError("length of metavar tuple does not match nargs") - - return self._add_action(action) - - def add_argument_group(self, *args, **kwargs): - group = _ArgumentGroup(self, *args, **kwargs) - self._action_groups.append(group) - return group - - def add_mutually_exclusive_group(self, **kwargs): - group = _MutuallyExclusiveGroup(self, **kwargs) - self._mutually_exclusive_groups.append(group) - return group - - def _add_action(self, action): - # resolve any conflicts - self._check_conflict(action) - - # add to actions list - self._actions.append(action) - action.container = self - - # index the action by any option strings it has - for option_string in action.option_strings: - self._option_string_actions[option_string] = action - - # set the flag if any option strings look like negative numbers - for option_string in action.option_strings: - if self._negative_number_matcher.match(option_string): - if not self._has_negative_number_optionals: - self._has_negative_number_optionals.append(True) - - # return the created action - return action - - def _remove_action(self, action): - self._actions.remove(action) - - def _add_container_actions(self, container): - # collect groups by titles - title_group_map = {} - for group in self._action_groups: - if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) - title_group_map[group.title] = group - - # map each action to its group - group_map = {} - for group in container._action_groups: - - # if a group with the title exists, use that, otherwise - # create a new group matching the container's group - if group.title not in title_group_map: - title_group_map[group.title] = self.add_argument_group( - title=group.title, - description=group.description, - conflict_handler=group.conflict_handler) - - # map the actions to their new group - for action in group._group_actions: - group_map[action] = title_group_map[group.title] - - # add container's mutually exclusive groups - # NOTE: if add_mutually_exclusive_group ever gains title= and - # description= then this code will need to be expanded as above - for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( - required=group.required) - - # map the actions to their new mutex group - for action in group._group_actions: - group_map[action] = mutex_group - - # add all actions to this container or their group - for action in container._actions: - group_map.get(action, self)._add_action(action) - - def _get_positional_kwargs(self, dest, **kwargs): - # make sure required is not specified - if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") - raise TypeError(msg) - - # mark positional arguments as required if at least one is - # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: - kwargs['required'] = True - - # return the keyword arguments with no option strings - return dict(kwargs, dest=dest, option_strings=[]) - - def _get_optional_kwargs(self, *args, **kwargs): - # determine short and long option strings - option_strings = [] - long_option_strings = [] - for option_string in args: - # error on strings that don't start with an appropriate prefix - if not option_string[0] in self.prefix_chars: - args = {'option': option_string, - 'prefix_chars': self.prefix_chars} - msg = _('invalid option string %(option)r: ' - 'must start with a character %(prefix_chars)r') - raise ValueError(msg % args) - - # strings starting with two prefix characters are long options - option_strings.append(option_string) - if len(option_string) > 1 and option_string[1] in self.prefix_chars: - long_option_strings.append(option_string) - - # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' - dest = kwargs.pop('dest', None) - if dest is None: - if long_option_strings: - dest_option_string = long_option_strings[0] - else: - dest_option_string = option_strings[0] - dest = dest_option_string.lstrip(self.prefix_chars) - if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) - dest = dest.replace('-', '_') - - # return the updated keyword arguments - return dict(kwargs, dest=dest, option_strings=option_strings) - - def _pop_action_class(self, kwargs, default=None): - action = kwargs.pop('action', default) - return self._registry_get('action', action, action) - - def _get_handler(self): - # determine function from conflict handler string - handler_func_name = '_handle_conflict_%s' % self.conflict_handler - try: - return getattr(self, handler_func_name) - except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) - - def _check_conflict(self, action): - - # find all options that conflict with this option - confl_optionals = [] - for option_string in action.option_strings: - if option_string in self._option_string_actions: - confl_optional = self._option_string_actions[option_string] - confl_optionals.append((option_string, confl_optional)) - - # resolve any conflicts - if confl_optionals: - conflict_handler = self._get_handler() - conflict_handler(action, confl_optionals) - - def _handle_conflict_error(self, action, conflicting_actions): - message = ngettext('conflicting option string: %s', - 'conflicting option strings: %s', - len(conflicting_actions)) - conflict_string = ', '.join([option_string - for option_string, action - in conflicting_actions]) - raise ArgumentError(action, message % conflict_string) - - def _handle_conflict_resolve(self, action, conflicting_actions): - - # remove all conflicting options - for option_string, action in conflicting_actions: - - # remove the conflicting option - action.option_strings.remove(option_string) - self._option_string_actions.pop(option_string, None) - - # if the option now has no option string, remove it from the - # container holding it - if not action.option_strings: - action.container._remove_action(action) - - -class _ArgumentGroup(_ActionsContainer): - - def __init__(self, container, title=None, description=None, **kwargs): - # add any missing keyword arguments by checking the container - update = kwargs.setdefault - update('conflict_handler', container.conflict_handler) - update('prefix_chars', container.prefix_chars) - update('argument_default', container.argument_default) - super_init = super(_ArgumentGroup, self).__init__ - super_init(description=description, **kwargs) - - # group attributes - self.title = title - self._group_actions = [] - - # share most attributes with the container - self._registries = container._registries - self._actions = container._actions - self._option_string_actions = container._option_string_actions - self._defaults = container._defaults - self._has_negative_number_optionals = \ - container._has_negative_number_optionals - self._mutually_exclusive_groups = container._mutually_exclusive_groups - - def _add_action(self, action): - action = super(_ArgumentGroup, self)._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - super(_ArgumentGroup, self)._remove_action(action) - self._group_actions.remove(action) - - def add_argument_group(self, *args, **kwargs): - warnings.warn( - "Nesting argument groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_argument_group(*args, **kwargs) - - -class _MutuallyExclusiveGroup(_ArgumentGroup): - - def __init__(self, container, required=False): - super(_MutuallyExclusiveGroup, self).__init__(container) - self.required = required - self._container = container - - def _add_action(self, action): - if action.required: - msg = _('mutually exclusive arguments must be optional') - raise ValueError(msg) - action = self._container._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - self._container._remove_action(action) - self._group_actions.remove(action) - - def add_mutually_exclusive_group(self, *args, **kwargs): - warnings.warn( - "Nesting mutually exclusive groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_mutually_exclusive_group(*args, **kwargs) - - -class ArgumentParser(_AttributeHolder, _ActionsContainer): - """Object for parsing command line strings into Python objects. - - Keyword Arguments: - - prog -- The name of the program (default: - ``os.path.basename(sys.argv[0])``) - - usage -- A usage message (default: auto-generated from arguments) - - description -- A description of what the program does - - epilog -- Text following the argument descriptions - - parents -- Parsers whose arguments should be copied into this one - - formatter_class -- HelpFormatter class for printing help messages - - prefix_chars -- Characters that prefix optional arguments - - fromfile_prefix_chars -- Characters that prefix files containing - additional arguments - - argument_default -- The default value for all arguments - - conflict_handler -- String indicating how to handle conflicts - - add_help -- Add a -h/-help option - - allow_abbrev -- Allow long options to be abbreviated unambiguously - - exit_on_error -- Determines whether or not ArgumentParser exits with - error info when an error occurs - """ - - def __init__(self, - prog=None, - usage=None, - description=None, - epilog=None, - parents=[], - formatter_class=HelpFormatter, - prefix_chars='-', - fromfile_prefix_chars=None, - argument_default=None, - conflict_handler='error', - add_help=True, - allow_abbrev=True, - exit_on_error=True): - - superinit = super(ArgumentParser, self).__init__ - superinit(description=description, - prefix_chars=prefix_chars, - argument_default=argument_default, - conflict_handler=conflict_handler) - - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog - self.usage = usage - self.epilog = epilog - self.formatter_class = formatter_class - self.fromfile_prefix_chars = fromfile_prefix_chars - self.add_help = add_help - self.allow_abbrev = allow_abbrev - self.exit_on_error = exit_on_error - - add_group = self.add_argument_group - self._positionals = add_group(_('positional arguments')) - self._optionals = add_group(_('options')) - self._subparsers = None - - # register types - def identity(string): - return string - self.register('type', None, identity) - - # add help argument if necessary - # (using explicit default to override global argument_default) - default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] - if self.add_help: - self.add_argument( - default_prefix+'h', default_prefix*2+'help', - action='help', default=SUPPRESS, - help=_('show this help message and exit')) - - # add parent arguments and defaults - for parent in parents: - self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) - - # ======================= - # Pretty __repr__ methods - # ======================= - def _get_kwargs(self): - names = [ - 'prog', - 'usage', - 'description', - 'formatter_class', - 'conflict_handler', - 'add_help', - ] - return [(name, getattr(self, name)) for name in names] - - # ================================== - # Optional/Positional adding methods - # ================================== - def add_subparsers(self, **kwargs): - if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) - - # add the parser class to the arguments if it's not present - kwargs.setdefault('parser_class', type(self)) - - if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) - self._subparsers = self.add_argument_group(title, description) - else: - self._subparsers = self._positionals - - # prog defaults to the usage message of this parser, skipping - # optional arguments and with no "usage:" prefix - if kwargs.get('prog') is None: - formatter = self._get_formatter() - positionals = self._get_positional_actions() - groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') - kwargs['prog'] = formatter.format_help().strip() - - # create the parsers action and add it to the positionals list - parsers_class = self._pop_action_class(kwargs, 'parsers') - action = parsers_class(option_strings=[], **kwargs) - self._subparsers._add_action(action) - - # return the created parsers action - return action - - def _add_action(self, action): - if action.option_strings: - self._optionals._add_action(action) - else: - self._positionals._add_action(action) - return action - - def _get_optional_actions(self): - return [action - for action in self._actions - if action.option_strings] - - def _get_positional_actions(self): - return [action - for action in self._actions - if not action.option_strings] - - # ===================================== - # Command line argument parsing methods - # ===================================== - def parse_args(self, args=None, namespace=None): - args, argv = self.parse_known_args(args, namespace) - if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) - return args - - def parse_known_args(self, args=None, namespace=None): - if args is None: - # args default to the system args - args = _sys.argv[1:] - else: - # make sure that args are mutable - args = list(args) - - # default Namespace built from parser defaults - if namespace is None: - namespace = Namespace() - - # add any action defaults that aren't present - for action in self._actions: - if action.dest is not SUPPRESS: - if not hasattr(namespace, action.dest): - if action.default is not SUPPRESS: - setattr(namespace, action.dest, action.default) - - # add any parser defaults that aren't present - for dest in self._defaults: - if not hasattr(namespace, dest): - setattr(namespace, dest, self._defaults[dest]) - - # parse the arguments and exit if there are any errors - if self.exit_on_error: - try: - namespace, args = self._parse_known_args(args, namespace) - except ArgumentError as err: - self.error(str(err)) - else: - namespace, args = self._parse_known_args(args, namespace) - - if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): - args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) - delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) - return namespace, args - - def _parse_known_args(self, arg_strings, namespace): - # replace arg strings that are file references - if self.fromfile_prefix_chars is not None: - arg_strings = self._read_args_from_files(arg_strings) - - # map all mutually exclusive arguments to the other arguments - # they can't occur with - action_conflicts = {} - for mutex_group in self._mutually_exclusive_groups: - group_actions = mutex_group._group_actions - for i, mutex_action in enumerate(mutex_group._group_actions): - conflicts = action_conflicts.setdefault(mutex_action, []) - conflicts.extend(group_actions[:i]) - conflicts.extend(group_actions[i + 1:]) - - # find all option indices, and determine the arg_string_pattern - # which has an 'O' if there is an option at an index, - # an 'A' if there is an argument, or a '-' if there is a '--' - option_string_indices = {} - arg_string_pattern_parts = [] - arg_strings_iter = iter(arg_strings) - for i, arg_string in enumerate(arg_strings_iter): - - # all args after -- are non-options - if arg_string == '--': - arg_string_pattern_parts.append('-') - for arg_string in arg_strings_iter: - arg_string_pattern_parts.append('A') - - # otherwise, add the arg to the arg strings - # and note the index if it was an option - else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: - pattern = 'A' - else: - option_string_indices[i] = option_tuple - pattern = 'O' - arg_string_pattern_parts.append(pattern) - - # join the pieces together to form the pattern - arg_strings_pattern = ''.join(arg_string_pattern_parts) - - # converts arg strings to the appropriate and then takes the action - seen_actions = set() - seen_non_default_actions = set() - - def take_action(action, argument_strings, option_string=None): - seen_actions.add(action) - argument_values = self._get_values(action, argument_strings) - - # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: - seen_non_default_actions.add(action) - for conflict_action in action_conflicts.get(action, []): - if conflict_action in seen_non_default_actions: - msg = _('not allowed with argument %s') - action_name = _get_action_name(conflict_action) - raise ArgumentError(action, msg % action_name) - - # take the action if we didn't receive a SUPPRESS value - # (e.g. from a default) - if argument_values is not SUPPRESS: - action(self, namespace, argument_values, option_string) - - # function to convert arg_strings into an optional action - def consume_optional(start_index): - - # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple - - # identify additional optionals in the same arg string - # (e.g. -xyz is the same as -x -y -z if no args are required) - match_argument = self._match_argument - action_tuples = [] - while True: - - # if we found no optional action, skip it - if action is None: - extras.append(arg_strings[start_index]) - return start_index + 1 - - # if there is an explicit argument, try to match the - # optional's string arguments to only this - if explicit_arg is not None: - arg_count = match_argument(action, 'A') - - # if the action is a single-dash option and takes no - # arguments, try to parse more single-dash options out - # of the tail of the option string - chars = self.prefix_chars - if ( - arg_count == 0 - and option_string[1] not in chars - and explicit_arg != '' - ): - action_tuples.append((action, [], option_string)) - char = option_string[0] - option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None - optionals_map = self._option_string_actions - if option_string in optionals_map: - action = optionals_map[option_string] - explicit_arg = new_explicit_arg - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if the action expect exactly one argument, we've - # successfully matched the option; exit the loop - elif arg_count == 1: - stop = start_index + 1 - args = [explicit_arg] - action_tuples.append((action, args, option_string)) - break - - # error if a double-dash option did not use the - # explicit argument - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if there is no explicit argument, try to match the - # optional's string arguments with the following strings - # if successful, exit the loop - else: - start = start_index + 1 - selected_patterns = arg_strings_pattern[start:] - arg_count = match_argument(action, selected_patterns) - stop = start + arg_count - args = arg_strings[start:stop] - action_tuples.append((action, args, option_string)) - break - - # add the Optional to the list and return the index at which - # the Optional's string args stopped - assert action_tuples - for action, args, option_string in action_tuples: - take_action(action, args, option_string) - return stop - - # the list of Positionals left to be parsed; this is modified - # by consume_positionals() - positionals = self._get_positional_actions() - - # function to convert arg_strings into positional actions - def consume_positionals(start_index): - # match as many Positionals as possible - match_partial = self._match_arguments_partial - selected_pattern = arg_strings_pattern[start_index:] - arg_counts = match_partial(positionals, selected_pattern) - - # slice off the appropriate arg strings for each Positional - # and add the Positional and its args to the list - for action, arg_count in zip(positionals, arg_counts): - args = arg_strings[start_index: start_index + arg_count] - start_index += arg_count - take_action(action, args) - - # slice off the Positionals that we just parsed and return the - # index at which the Positionals' string args stopped - positionals[:] = positionals[len(arg_counts):] - return start_index - - # consume Positionals and Optionals alternately, until we have - # passed the last option string - extras = [] - start_index = 0 - if option_string_indices: - max_option_string_index = max(option_string_indices) - else: - max_option_string_index = -1 - while start_index <= max_option_string_index: - - # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: - positionals_end_index = consume_positionals(start_index) - - # only try to parse the next optional if we didn't consume - # the option string during the positionals parsing - if positionals_end_index > start_index: - start_index = positionals_end_index - continue - else: - start_index = positionals_end_index - - # if we consumed all the positionals we could and we're not - # at the index of an option string, there were extra arguments - if start_index not in option_string_indices: - strings = arg_strings[start_index:next_option_string_index] - extras.extend(strings) - start_index = next_option_string_index - - # consume the next optional and any arguments for it - start_index = consume_optional(start_index) - - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) - - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) - - # make sure all required actions were present and also convert - # action defaults which were not given as arguments - required_actions = [] - for action in self._actions: - if action not in seen_actions: - if action.required: - required_actions.append(_get_action_name(action)) - else: - # Convert action default now instead of doing it before - # parsing arguments to avoid calling convert functions - # twice (which may fail) if the argument was given, but - # only if it was defined already in the namespace - if (action.default is not None and - isinstance(action.default, str) and - hasattr(namespace, action.dest) and - action.default is getattr(namespace, action.dest)): - setattr(namespace, action.dest, - self._get_value(action, action.default)) - - if required_actions: - self.error(_('the following arguments are required: %s') % - ', '.join(required_actions)) - - # make sure all required groups had one option present - for group in self._mutually_exclusive_groups: - if group.required: - for action in group._group_actions: - if action in seen_non_default_actions: - break - - # if no actions were used, report the error - else: - names = [_get_action_name(action) - for action in group._group_actions - if action.help is not SUPPRESS] - msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) - - # return the updated namespace and the extra arguments - return namespace, extras - - def _read_args_from_files(self, arg_strings): - # expand arguments referencing files - new_arg_strings = [] - for arg_string in arg_strings: - - # for regular arguments, just add them back into the list - if not arg_string or arg_string[0] not in self.fromfile_prefix_chars: - new_arg_strings.append(arg_string) - - # replace arguments referencing files with the file content - else: - try: - with open(arg_string[1:], - encoding=_sys.getfilesystemencoding(), - errors=_sys.getfilesystemencodeerrors()) as args_file: - arg_strings = [] - for arg_line in args_file.read().splitlines(): - for arg in self.convert_arg_line_to_args(arg_line): - arg_strings.append(arg) - arg_strings = self._read_args_from_files(arg_strings) - new_arg_strings.extend(arg_strings) - except OSError as err: - self.error(str(err)) - - # return the modified argument list - return new_arg_strings - - def convert_arg_line_to_args(self, arg_line): - return [arg_line] - - def _match_argument(self, action, arg_strings_pattern): - # match the pattern for this action to the arg strings - nargs_pattern = self._get_nargs_pattern(action) - match = _re.match(nargs_pattern, arg_strings_pattern) - - # raise an exception if we weren't able to find a match - if match is None: - nargs_errors = { - None: _('expected one argument'), - OPTIONAL: _('expected at most one argument'), - ONE_OR_MORE: _('expected at least one argument'), - } - msg = nargs_errors.get(action.nargs) - if msg is None: - msg = ngettext('expected %s argument', - 'expected %s arguments', - action.nargs) % action.nargs - raise ArgumentError(action, msg) - - # return the number of arguments matched - return len(match.group(1)) - - def _match_arguments_partial(self, actions, arg_strings_pattern): - # progressively shorten the actions list by slicing off the - # final actions until we find a match - result = [] - for i in range(len(actions), 0, -1): - actions_slice = actions[:i] - pattern = ''.join([self._get_nargs_pattern(action) - for action in actions_slice]) - match = _re.match(pattern, arg_strings_pattern) - if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result - - def _parse_optional(self, arg_string): - # if it's an empty string, it was meant to be a positional - if not arg_string: - return None - - # if it doesn't start with a prefix, it was meant to be positional - if not arg_string[0] in self.prefix_chars: - return None - - # if the option string is present in the parser, return the action - if arg_string in self._option_string_actions: - action = self._option_string_actions[arg_string] - return action, arg_string, None - - # if it's just a single character, it was meant to be positional - if len(arg_string) == 1: - return None - - # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg - - # search through all possible prefixes of the option string - # and all actions in the parser for possible interpretations - option_tuples = self._get_option_tuples(arg_string) - - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} - msg = _('ambiguous option: %(option)s could match %(matches)s') - self.error(msg % args) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple - - # if it was not found as an option, but it looks like a negative - # number, it was meant to be positional - # unless there are negative-number-like options - if self._negative_number_matcher.match(arg_string): - if not self._has_negative_number_optionals: - return None - - # if it contains a space, it was meant to be a positional - if ' ' in arg_string: - return None - - # it was meant to be an optional but there is no such option - # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None - - def _get_option_tuples(self, option_string): - result = [] - - # option strings starting with two prefix characters are only - # split at the '=' - chars = self.prefix_chars - if option_string[0] in chars and option_string[1] in chars: - if self.allow_abbrev: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None - for option_string in self._option_string_actions: - if option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # single character options can be concatenated with their arguments - # but multiple character options always have to have their argument - # separate - elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None - short_option_prefix = option_string[:2] - short_explicit_arg = option_string[2:] - - for option_string in self._option_string_actions: - if option_string == short_option_prefix: - action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg - result.append(tup) - elif option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # shouldn't ever get here - else: - self.error(_('unexpected option string: %s') % option_string) - - # return the collected option tuples - return result - - def _get_nargs_pattern(self, action): - # in all examples below, we have to allow for '--' args - # which are represented as '-' in the pattern - nargs = action.nargs - - # the default (None) is assumed to be a single argument - if nargs is None: - nargs_pattern = '(-*A-*)' - - # allow zero or one arguments - elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' - - # allow zero or more arguments - elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' - - # allow one or more arguments - elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' - - # allow any number of options or arguments - elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' - - # allow one argument followed by any number of options or arguments - elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' - - # suppress action, like nargs=0 - elif nargs == SUPPRESS: - nargs_pattern = '(-*-*)' - - # all others should be integers - else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') - - # return the pattern - return nargs_pattern - - # ======================== - # Alt command line argument parsing, allowing free intermix - # ======================== - - def parse_intermixed_args(self, args=None, namespace=None): - args, argv = self.parse_known_intermixed_args(args, namespace) - if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) - return args - - def parse_known_intermixed_args(self, args=None, namespace=None): - # returns a namespace and list of extras - # - # positional can be freely intermixed with optionals. optionals are - # first parsed with all positional arguments deactivated. The 'extras' - # are then parsed. If the parser definition is incompatible with the - # intermixed assumptions (e.g. use of REMAINDER, subparsers) a - # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace - - positionals = self._get_positional_actions() - a = [action for action in positionals - if action.nargs in [PARSER, REMAINDER]] - if a: - raise TypeError('parse_intermixed_args: positional arg' - ' with nargs=%s'%a[0].nargs) - - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras - - # ======================== - # Value conversion methods - # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER, REMAINDER args, strip out first '--' - if action.nargs not in [PARSER, REMAINDER]: - try: - arg_strings.remove('--') - except ValueError: - pass - - # optional argument produces a default when not present - if not arg_strings and action.nargs == OPTIONAL: - if action.option_strings: - value = action.const - else: - value = action.default - if isinstance(value, str): - value = self._get_value(action, value) - self._check_value(action, value) - - # when nargs='*' on a positional, if there were no command-line - # args, use the default if it is anything other than None - elif (not arg_strings and action.nargs == ZERO_OR_MORE and - not action.option_strings): - if action.default is not None: - value = action.default - self._check_value(action, value) - else: - # since arg_strings is always [] at this point - # there is no need to use self._check_value(action, value) - value = arg_strings - - # single argument or optional argument produces a single value - elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: - arg_string, = arg_strings - value = self._get_value(action, arg_string) - self._check_value(action, value) - - # REMAINDER arguments convert all values, checking none - elif action.nargs == REMAINDER: - value = [self._get_value(action, v) for v in arg_strings] - - # PARSER arguments convert all values, but check only the first - elif action.nargs == PARSER: - value = [self._get_value(action, v) for v in arg_strings] - self._check_value(action, value[0]) - - # SUPPRESS argument does not put anything in the namespace - elif action.nargs == SUPPRESS: - value = SUPPRESS - - # all other types of nargs produce a list - else: - value = [self._get_value(action, v) for v in arg_strings] - for v in value: - self._check_value(action, v) - - # return the converted value - return value - - def _get_value(self, action, arg_string): - type_func = self._registry_get('type', action.type, action.type) - if not callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) - - # convert the value to the appropriate type - try: - result = type_func(arg_string) - - # ArgumentTypeErrors indicate errors - except ArgumentTypeError as err: - msg = str(err) - raise ArgumentError(action, msg) - - # TypeErrors or ValueErrors also indicate errors - except (TypeError, ValueError): - name = getattr(action.type, '__name__', repr(action.type)) - args = {'type': name, 'value': arg_string} - msg = _('invalid %(type)s value: %(value)r') - raise ArgumentError(action, msg % args) - - # return the converted value - return result - - def _check_value(self, action, value): - # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} - msg = _('invalid choice: %(value)r (choose from %(choices)s)') - raise ArgumentError(action, msg % args) - - # ======================= - # Help-formatting methods - # ======================= - def format_usage(self): - formatter = self._get_formatter() - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - return formatter.format_help() - - def format_help(self): - formatter = self._get_formatter() - - # usage - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - - # description - formatter.add_text(self.description) - - # positionals, optionals and user-defined groups - for action_group in self._action_groups: - formatter.start_section(action_group.title) - formatter.add_text(action_group.description) - formatter.add_arguments(action_group._group_actions) - formatter.end_section() - - # epilog - formatter.add_text(self.epilog) - - # determine help from format above - return formatter.format_help() - - def _get_formatter(self): - return self.formatter_class(prog=self.prog) - - # ===================== - # Help-printing methods - # ===================== - def print_usage(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_usage(), file) - - def print_help(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_help(), file) - - def _print_message(self, message, file=None): - if message: - file = file or _sys.stderr - try: - file.write(message) - except (AttributeError, OSError): - pass - - # =============== - # Exiting methods - # =============== - def exit(self, status=0, message=None): - if message: - self._print_message(message, _sys.stderr) - _sys.exit(status) - - def error(self, message): - """error(message: string) - - Prints a usage message incorporating the message to stderr and - exits. - - If you override this in a subclass, it should not return -- it - should either exit or raise an exception. - """ - self.print_usage(_sys.stderr) - args = {'prog': self.prog, 'message': message} - self.exit(2, _('%(prog)s: error: %(message)s\n') % args) diff --git a/python/python3_12/examples/ast.py b/python/python3_12/examples/ast.py deleted file mode 100644 index 07044706dc..0000000000 --- a/python/python3_12/examples/ast.py +++ /dev/null @@ -1,1829 +0,0 @@ -""" - ast - ~~~ - - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. - - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. - - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. - - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - - - :copyright: Copyright 2008 by Armin Ronacher. - :license: Python License. -""" -import sys -import re -from _ast import * -from contextlib import contextmanager, nullcontext -from enum import IntEnum, auto, _simple_enum - - -def parse(source, filename='', mode='exec', *, - type_comments=False, feature_version=None): - """ - Parse the source into an AST node. - Equivalent to compile(source, filename, mode, PyCF_ONLY_AST). - Pass type_comments=True to get back type comments where the syntax allows. - """ - flags = PyCF_ONLY_AST - if type_comments: - flags |= PyCF_TYPE_COMMENTS - if feature_version is None: - feature_version = -1 - elif isinstance(feature_version, tuple): - major, minor = feature_version # Should be a 2-tuple. - if major != 3: - raise ValueError(f"Unsupported major version: {major}") - feature_version = minor - # Else it should be an int giving the minor version for 3.x. - return compile(source, filename, mode, flags, - _feature_version=feature_version) - - -def literal_eval(node_or_string): - """ - Evaluate an expression node or a string containing only a Python - expression. The string or node provided may only consist of the following - Python literal structures: strings, bytes, numbers, tuples, lists, dicts, - sets, booleans, and None. - - Caution: A complex expression can overflow the C stack and cause a crash. - """ - if isinstance(node_or_string, str): - node_or_string = parse(node_or_string.lstrip(" \t"), mode='eval') - if isinstance(node_or_string, Expression): - node_or_string = node_or_string.body - def _raise_malformed_node(node): - msg = "malformed node or string" - if lno := getattr(node, 'lineno', None): - msg += f' on line {lno}' - raise ValueError(msg + f': {node!r}') - def _convert_num(node): - if not isinstance(node, Constant) or type(node.value) not in (int, float, complex): - _raise_malformed_node(node) - return node.value - def _convert_signed_num(node): - if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)): - operand = _convert_num(node.operand) - if isinstance(node.op, UAdd): - return + operand - else: - return - operand - return _convert_num(node) - def _convert(node): - if isinstance(node, Constant): - return node.value - elif isinstance(node, Tuple): - return tuple(map(_convert, node.elts)) - elif isinstance(node, List): - return list(map(_convert, node.elts)) - elif isinstance(node, Set): - return set(map(_convert, node.elts)) - elif (isinstance(node, Call) and isinstance(node.func, Name) and - node.func.id == 'set' and node.args == node.keywords == []): - return set() - elif isinstance(node, Dict): - if len(node.keys) != len(node.values): - _raise_malformed_node(node) - return dict(zip(map(_convert, node.keys), - map(_convert, node.values))) - elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)): - left = _convert_signed_num(node.left) - right = _convert_num(node.right) - if isinstance(left, (int, float)) and isinstance(right, complex): - if isinstance(node.op, Add): - return left + right - else: - return left - right - return _convert_signed_num(node) - return _convert(node_or_string) - - -def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): - """ - Return a formatted dump of the tree in node. This is mainly useful for - debugging purposes. If annotate_fields is true (by default), - the returned string will show the names and the values for fields. - If annotate_fields is false, the result string will be more compact by - omitting unambiguous field names. Attributes such as line - numbers and column offsets are not dumped by default. If this is wanted, - include_attributes can be set to true. If indent is a non-negative - integer or string, then the tree will be pretty-printed with that indent - level. None (the default) selects the single line representation. - """ - def _format(node, level=0): - if indent is not None: - level += 1 - prefix = '\n' + indent * level - sep = ',\n' + indent * level - else: - prefix = '' - sep = ', ' - if isinstance(node, AST): - cls = type(node) - args = [] - allsimple = True - keywords = annotate_fields - for name in node._fields: - try: - value = getattr(node, name) - except AttributeError: - keywords = True - continue - if value is None and getattr(cls, name, ...) is None: - keywords = True - continue - value, simple = _format(value, level) - allsimple = allsimple and simple - if keywords: - args.append('%s=%s' % (name, value)) - else: - args.append(value) - if include_attributes and node._attributes: - for name in node._attributes: - try: - value = getattr(node, name) - except AttributeError: - continue - if value is None and getattr(cls, name, ...) is None: - continue - value, simple = _format(value, level) - allsimple = allsimple and simple - args.append('%s=%s' % (name, value)) - if allsimple and len(args) <= 3: - return '%s(%s)' % (node.__class__.__name__, ', '.join(args)), not args - return '%s(%s%s)' % (node.__class__.__name__, prefix, sep.join(args)), False - elif isinstance(node, list): - if not node: - return '[]', True - return '[%s%s]' % (prefix, sep.join(_format(x, level)[0] for x in node)), False - return repr(node), True - - if not isinstance(node, AST): - raise TypeError('expected AST, got %r' % node.__class__.__name__) - if indent is not None and not isinstance(indent, str): - indent = ' ' * indent - return _format(node)[0] - - -def copy_location(new_node, old_node): - """ - Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset` - attributes) from *old_node* to *new_node* if possible, and return *new_node*. - """ - for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset': - if attr in old_node._attributes and attr in new_node._attributes: - value = getattr(old_node, attr, None) - # end_lineno and end_col_offset are optional attributes, and they - # should be copied whether the value is None or not. - if value is not None or ( - hasattr(old_node, attr) and attr.startswith("end_") - ): - setattr(new_node, attr, value) - return new_node - - -def fix_missing_locations(node): - """ - When you compile a node tree with compile(), the compiler expects lineno and - col_offset attributes for every node that supports them. This is rather - tedious to fill in for generated nodes, so this helper adds these attributes - recursively where not already set, by setting them to the values of the - parent node. It works recursively starting at *node*. - """ - def _fix(node, lineno, col_offset, end_lineno, end_col_offset): - if 'lineno' in node._attributes: - if not hasattr(node, 'lineno'): - node.lineno = lineno - else: - lineno = node.lineno - if 'end_lineno' in node._attributes: - if getattr(node, 'end_lineno', None) is None: - node.end_lineno = end_lineno - else: - end_lineno = node.end_lineno - if 'col_offset' in node._attributes: - if not hasattr(node, 'col_offset'): - node.col_offset = col_offset - else: - col_offset = node.col_offset - if 'end_col_offset' in node._attributes: - if getattr(node, 'end_col_offset', None) is None: - node.end_col_offset = end_col_offset - else: - end_col_offset = node.end_col_offset - for child in iter_child_nodes(node): - _fix(child, lineno, col_offset, end_lineno, end_col_offset) - _fix(node, 1, 0, 1, 0) - return node - - -def increment_lineno(node, n=1): - """ - Increment the line number and end line number of each node in the tree - starting at *node* by *n*. This is useful to "move code" to a different - location in a file. - """ - for child in walk(node): - # TypeIgnore is a special case where lineno is not an attribute - # but rather a field of the node itself. - if isinstance(child, TypeIgnore): - child.lineno = getattr(child, 'lineno', 0) + n - continue - - if 'lineno' in child._attributes: - child.lineno = getattr(child, 'lineno', 0) + n - if ( - "end_lineno" in child._attributes - and (end_lineno := getattr(child, "end_lineno", 0)) is not None - ): - child.end_lineno = end_lineno + n - return node - - -def iter_fields(node): - """ - Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields`` - that is present on *node*. - """ - for field in node._fields: - try: - yield field, getattr(node, field) - except AttributeError: - pass - - -def iter_child_nodes(node): - """ - Yield all direct child nodes of *node*, that is, all fields that are nodes - and all items of fields that are lists of nodes. - """ - for name, field in iter_fields(node): - if isinstance(field, AST): - yield field - elif isinstance(field, list): - for item in field: - if isinstance(item, AST): - yield item - - -def get_docstring(node, clean=True): - """ - Return the docstring for the given node or None if no docstring can - be found. If the node provided does not have docstrings a TypeError - will be raised. - - If *clean* is `True`, all tabs are expanded to spaces and any whitespace - that can be uniformly removed from the second line onwards is removed. - """ - if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)): - raise TypeError("%r can't have docstrings" % node.__class__.__name__) - if not(node.body and isinstance(node.body[0], Expr)): - return None - node = node.body[0].value - if isinstance(node, Constant) and isinstance(node.value, str): - text = node.value - else: - return None - if clean: - import inspect - text = inspect.cleandoc(text) - return text - - -_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") -def _splitlines_no_ff(source, maxlines=None): - """Split a string into lines ignoring form feed and other chars. - - This mimics how the Python parser splits source code. - """ - lines = [] - for lineno, match in enumerate(_line_pattern.finditer(source), 1): - if maxlines is not None and lineno > maxlines: - break - lines.append(match[0]) - return lines - - -def _pad_whitespace(source): - r"""Replace all chars except '\f\t' in a line with spaces.""" - result = '' - for c in source: - if c in '\f\t': - result += c - else: - result += ' ' - return result - - -def get_source_segment(source, node, *, padded=False): - """Get source code segment of the *source* that generated *node*. - - If some location information (`lineno`, `end_lineno`, `col_offset`, - or `end_col_offset`) is missing, return None. - - If *padded* is `True`, the first line of a multi-line statement will - be padded with spaces to match its original position. - """ - try: - if node.end_lineno is None or node.end_col_offset is None: - return None - lineno = node.lineno - 1 - end_lineno = node.end_lineno - 1 - col_offset = node.col_offset - end_col_offset = node.end_col_offset - except AttributeError: - return None - - lines = _splitlines_no_ff(source, maxlines=end_lineno+1) - if end_lineno == lineno: - return lines[lineno].encode()[col_offset:end_col_offset].decode() - - if padded: - padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode()) - else: - padding = '' - - first = padding + lines[lineno].encode()[col_offset:].decode() - last = lines[end_lineno].encode()[:end_col_offset].decode() - lines = lines[lineno+1:end_lineno] - - lines.insert(0, first) - lines.append(last) - return ''.join(lines) - - -def walk(node): - """ - Recursively yield all descendant nodes in the tree starting at *node* - (including *node* itself), in no specified order. This is useful if you - only want to modify nodes in place and don't care about the context. - """ - from collections import deque - todo = deque([node]) - while todo: - node = todo.popleft() - todo.extend(iter_child_nodes(node)) - yield node - - -class NodeVisitor(object): - """ - A node visitor base class that walks the abstract syntax tree and calls a - visitor function for every node found. This function may return a value - which is forwarded by the `visit` method. - - This class is meant to be subclassed, with the subclass adding visitor - methods. - - Per default the visitor functions for the nodes are ``'visit_'`` + - class name of the node. So a `TryFinally` node visit function would - be `visit_TryFinally`. This behavior can be changed by overriding - the `visit` method. If no visitor function exists for a node - (return value `None`) the `generic_visit` visitor is used instead. - - Don't use the `NodeVisitor` if you want to apply changes to nodes during - traversing. For this a special visitor exists (`NodeTransformer`) that - allows modifications. - """ - - def visit(self, node): - """Visit a node.""" - method = 'visit_' + node.__class__.__name__ - visitor = getattr(self, method, self.generic_visit) - return visitor(node) - - def generic_visit(self, node): - """Called if no explicit visitor function exists for a node.""" - for field, value in iter_fields(node): - if isinstance(value, list): - for item in value: - if isinstance(item, AST): - self.visit(item) - elif isinstance(value, AST): - self.visit(value) - - def visit_Constant(self, node): - value = node.value - type_name = _const_node_type_names.get(type(value)) - if type_name is None: - for cls, name in _const_node_type_names.items(): - if isinstance(value, cls): - type_name = name - break - if type_name is not None: - method = 'visit_' + type_name - try: - visitor = getattr(self, method) - except AttributeError: - pass - else: - import warnings - warnings.warn(f"{method} is deprecated; add visit_Constant", - DeprecationWarning, 2) - return visitor(node) - return self.generic_visit(node) - - -class NodeTransformer(NodeVisitor): - """ - A :class:`NodeVisitor` subclass that walks the abstract syntax tree and - allows modification of nodes. - - The `NodeTransformer` will walk the AST and use the return value of the - visitor methods to replace or remove the old node. If the return value of - the visitor method is ``None``, the node will be removed from its location, - otherwise it is replaced with the return value. The return value may be the - original node in which case no replacement takes place. - - Here is an example transformer that rewrites all occurrences of name lookups - (``foo``) to ``data['foo']``:: - - class RewriteName(NodeTransformer): - - def visit_Name(self, node): - return Subscript( - value=Name(id='data', ctx=Load()), - slice=Constant(value=node.id), - ctx=node.ctx - ) - - Keep in mind that if the node you're operating on has child nodes you must - either transform the child nodes yourself or call the :meth:`generic_visit` - method for the node first. - - For nodes that were part of a collection of statements (that applies to all - statement nodes), the visitor may also return a list of nodes rather than - just a single node. - - Usually you use the transformer like this:: - - node = YourTransformer().visit(node) - """ - - def generic_visit(self, node): - for field, old_value in iter_fields(node): - if isinstance(old_value, list): - new_values = [] - for value in old_value: - if isinstance(value, AST): - value = self.visit(value) - if value is None: - continue - elif not isinstance(value, AST): - new_values.extend(value) - continue - new_values.append(value) - old_value[:] = new_values - elif isinstance(old_value, AST): - new_node = self.visit(old_value) - if new_node is None: - delattr(node, field) - else: - setattr(node, field, new_node) - return node - - -_DEPRECATED_VALUE_ALIAS_MESSAGE = ( - "{name} is deprecated and will be removed in Python {remove}; use value instead" -) -_DEPRECATED_CLASS_MESSAGE = ( - "{name} is deprecated and will be removed in Python {remove}; " - "use ast.Constant instead" -) - - -# If the ast module is loaded more than once, only add deprecated methods once -if not hasattr(Constant, 'n'): - # The following code is for backward compatibility. - # It will be removed in future. - - def _n_getter(self): - """Deprecated. Use value instead.""" - import warnings - warnings._deprecated( - "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - return self.value - - def _n_setter(self, value): - import warnings - warnings._deprecated( - "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - self.value = value - - def _s_getter(self): - """Deprecated. Use value instead.""" - import warnings - warnings._deprecated( - "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - return self.value - - def _s_setter(self, value): - import warnings - warnings._deprecated( - "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - self.value = value - - Constant.n = property(_n_getter, _n_setter) - Constant.s = property(_s_getter, _s_setter) - -class _ABC(type): - - def __init__(cls, *args): - cls.__doc__ = """Deprecated AST node class. Use ast.Constant instead""" - - def __instancecheck__(cls, inst): - if cls in _const_types: - import warnings - warnings._deprecated( - f"ast.{cls.__qualname__}", - message=_DEPRECATED_CLASS_MESSAGE, - remove=(3, 14) - ) - if not isinstance(inst, Constant): - return False - if cls in _const_types: - try: - value = inst.value - except AttributeError: - return False - else: - return ( - isinstance(value, _const_types[cls]) and - not isinstance(value, _const_types_not.get(cls, ())) - ) - return type.__instancecheck__(cls, inst) - -def _new(cls, *args, **kwargs): - for key in kwargs: - if key not in cls._fields: - # arbitrary keyword arguments are accepted - continue - pos = cls._fields.index(key) - if pos < len(args): - raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}") - if cls in _const_types: - import warnings - warnings._deprecated( - f"ast.{cls.__qualname__}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return Constant(*args, **kwargs) - return Constant.__new__(cls, *args, **kwargs) - -class Num(Constant, metaclass=_ABC): - _fields = ('n',) - __new__ = _new - -class Str(Constant, metaclass=_ABC): - _fields = ('s',) - __new__ = _new - -class Bytes(Constant, metaclass=_ABC): - _fields = ('s',) - __new__ = _new - -class NameConstant(Constant, metaclass=_ABC): - __new__ = _new - -class Ellipsis(Constant, metaclass=_ABC): - _fields = () - - def __new__(cls, *args, **kwargs): - if cls is _ast_Ellipsis: - import warnings - warnings._deprecated( - "ast.Ellipsis", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return Constant(..., *args, **kwargs) - return Constant.__new__(cls, *args, **kwargs) - -# Keep another reference to Ellipsis in the global namespace -# so it can be referenced in Ellipsis.__new__ -# (The original "Ellipsis" name is removed from the global namespace later on) -_ast_Ellipsis = Ellipsis - -_const_types = { - Num: (int, float, complex), - Str: (str,), - Bytes: (bytes,), - NameConstant: (type(None), bool), - Ellipsis: (type(...),), -} -_const_types_not = { - Num: (bool,), -} - -_const_node_type_names = { - bool: 'NameConstant', # should be before int - type(None): 'NameConstant', - int: 'Num', - float: 'Num', - complex: 'Num', - str: 'Str', - bytes: 'Bytes', - type(...): 'Ellipsis', -} - -class slice(AST): - """Deprecated AST node class.""" - -class Index(slice): - """Deprecated AST node class. Use the index value directly instead.""" - def __new__(cls, value, **kwargs): - return value - -class ExtSlice(slice): - """Deprecated AST node class. Use ast.Tuple instead.""" - def __new__(cls, dims=(), **kwargs): - return Tuple(list(dims), Load(), **kwargs) - -# If the ast module is loaded more than once, only add deprecated methods once -if not hasattr(Tuple, 'dims'): - # The following code is for backward compatibility. - # It will be removed in future. - - def _dims_getter(self): - """Deprecated. Use elts instead.""" - return self.elts - - def _dims_setter(self, value): - self.elts = value - - Tuple.dims = property(_dims_getter, _dims_setter) - -class Suite(mod): - """Deprecated AST node class. Unused in Python 3.""" - -class AugLoad(expr_context): - """Deprecated AST node class. Unused in Python 3.""" - -class AugStore(expr_context): - """Deprecated AST node class. Unused in Python 3.""" - -class Param(expr_context): - """Deprecated AST node class. Unused in Python 3.""" - - -# Large float and imaginary literals get turned into infinities in the AST. -# We unparse those infinities to INFSTR. -_INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) - -@_simple_enum(IntEnum) -class _Precedence: - """Precedence table that originated from python grammar.""" - - NAMED_EXPR = auto() # := - TUPLE = auto() # , - YIELD = auto() # 'yield', 'yield from' - TEST = auto() # 'if'-'else', 'lambda' - OR = auto() # 'or' - AND = auto() # 'and' - NOT = auto() # 'not' - CMP = auto() # '<', '>', '==', '>=', '<=', '!=', - # 'in', 'not in', 'is', 'is not' - EXPR = auto() - BOR = EXPR # '|' - BXOR = auto() # '^' - BAND = auto() # '&' - SHIFT = auto() # '<<', '>>' - ARITH = auto() # '+', '-' - TERM = auto() # '*', '@', '/', '%', '//' - FACTOR = auto() # unary '+', '-', '~' - POWER = auto() # '**' - AWAIT = auto() # 'await' - ATOM = auto() - - def next(self): - try: - return self.__class__(self + 1) - except ValueError: - return self - - -_SINGLE_QUOTES = ("'", '"') -_MULTI_QUOTES = ('"""', "'''") -_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) - -class _Unparser(NodeVisitor): - """Methods in this class recursively traverse an AST and - output source code for the abstract syntax; original formatting - is disregarded.""" - - def __init__(self, *, _avoid_backslashes=False): - self._source = [] - self._precedences = {} - self._type_ignores = {} - self._indent = 0 - self._avoid_backslashes = _avoid_backslashes - self._in_try_star = False - - def interleave(self, inter, f, seq): - """Call f on each item in seq, calling inter() in between.""" - seq = iter(seq) - try: - f(next(seq)) - except StopIteration: - pass - else: - for x in seq: - inter() - f(x) - - def items_view(self, traverser, items): - """Traverse and separate the given *items* with a comma and append it to - the buffer. If *items* is a single item sequence, a trailing comma - will be added.""" - if len(items) == 1: - traverser(items[0]) - self.write(",") - else: - self.interleave(lambda: self.write(", "), traverser, items) - - def maybe_newline(self): - """Adds a newline if it isn't the start of generated source""" - if self._source: - self.write("\n") - - def fill(self, text=""): - """Indent a piece of text and append it, according to the current - indentation level""" - self.maybe_newline() - self.write(" " * self._indent + text) - - def write(self, *text): - """Add new source parts""" - self._source.extend(text) - - @contextmanager - def buffered(self, buffer = None): - if buffer is None: - buffer = [] - - original_source = self._source - self._source = buffer - yield buffer - self._source = original_source - - @contextmanager - def block(self, *, extra = None): - """A context manager for preparing the source for blocks. It adds - the character':', increases the indentation on enter and decreases - the indentation on exit. If *extra* is given, it will be directly - appended after the colon character. - """ - self.write(":") - if extra: - self.write(extra) - self._indent += 1 - yield - self._indent -= 1 - - @contextmanager - def delimit(self, start, end): - """A context manager for preparing the source for expressions. It adds - *start* to the buffer and enters, after exit it adds *end*.""" - - self.write(start) - yield - self.write(end) - - def delimit_if(self, start, end, condition): - if condition: - return self.delimit(start, end) - else: - return nullcontext() - - def require_parens(self, precedence, node): - """Shortcut to adding precedence related parens""" - return self.delimit_if("(", ")", self.get_precedence(node) > precedence) - - def get_precedence(self, node): - return self._precedences.get(node, _Precedence.TEST) - - def set_precedence(self, precedence, *nodes): - for node in nodes: - self._precedences[node] = precedence - - def get_raw_docstring(self, node): - """If a docstring node is found in the body of the *node* parameter, - return that docstring node, None otherwise. - - Logic mirrored from ``_PyAST_GetDocString``.""" - if not isinstance( - node, (AsyncFunctionDef, FunctionDef, ClassDef, Module) - ) or len(node.body) < 1: - return None - node = node.body[0] - if not isinstance(node, Expr): - return None - node = node.value - if isinstance(node, Constant) and isinstance(node.value, str): - return node - - def get_type_comment(self, node): - comment = self._type_ignores.get(node.lineno) or node.type_comment - if comment is not None: - return f" # type: {comment}" - - def traverse(self, node): - if isinstance(node, list): - for item in node: - self.traverse(item) - else: - super().visit(node) - - # Note: as visit() resets the output text, do NOT rely on - # NodeVisitor.generic_visit to handle any nodes (as it calls back in to - # the subclass visit() method, which resets self._source to an empty list) - def visit(self, node): - """Outputs a source code string that, if converted back to an ast - (using ast.parse) will generate an AST equivalent to *node*""" - self._source = [] - self.traverse(node) - return "".join(self._source) - - def _write_docstring_and_traverse_body(self, node): - if (docstring := self.get_raw_docstring(node)): - self._write_docstring(docstring) - self.traverse(node.body[1:]) - else: - self.traverse(node.body) - - def visit_Module(self, node): - self._type_ignores = { - ignore.lineno: f"ignore{ignore.tag}" - for ignore in node.type_ignores - } - self._write_docstring_and_traverse_body(node) - self._type_ignores.clear() - - def visit_FunctionType(self, node): - with self.delimit("(", ")"): - self.interleave( - lambda: self.write(", "), self.traverse, node.argtypes - ) - - self.write(" -> ") - self.traverse(node.returns) - - def visit_Expr(self, node): - self.fill() - self.set_precedence(_Precedence.YIELD, node.value) - self.traverse(node.value) - - def visit_NamedExpr(self, node): - with self.require_parens(_Precedence.NAMED_EXPR, node): - self.set_precedence(_Precedence.ATOM, node.target, node.value) - self.traverse(node.target) - self.write(" := ") - self.traverse(node.value) - - def visit_Import(self, node): - self.fill("import ") - self.interleave(lambda: self.write(", "), self.traverse, node.names) - - def visit_ImportFrom(self, node): - self.fill("from ") - self.write("." * (node.level or 0)) - if node.module: - self.write(node.module) - self.write(" import ") - self.interleave(lambda: self.write(", "), self.traverse, node.names) - - def visit_Assign(self, node): - self.fill() - for target in node.targets: - self.set_precedence(_Precedence.TUPLE, target) - self.traverse(target) - self.write(" = ") - self.traverse(node.value) - if type_comment := self.get_type_comment(node): - self.write(type_comment) - - def visit_AugAssign(self, node): - self.fill() - self.traverse(node.target) - self.write(" " + self.binop[node.op.__class__.__name__] + "= ") - self.traverse(node.value) - - def visit_AnnAssign(self, node): - self.fill() - with self.delimit_if("(", ")", not node.simple and isinstance(node.target, Name)): - self.traverse(node.target) - self.write(": ") - self.traverse(node.annotation) - if node.value: - self.write(" = ") - self.traverse(node.value) - - def visit_Return(self, node): - self.fill("return") - if node.value: - self.write(" ") - self.traverse(node.value) - - def visit_Pass(self, node): - self.fill("pass") - - def visit_Break(self, node): - self.fill("break") - - def visit_Continue(self, node): - self.fill("continue") - - def visit_Delete(self, node): - self.fill("del ") - self.interleave(lambda: self.write(", "), self.traverse, node.targets) - - def visit_Assert(self, node): - self.fill("assert ") - self.traverse(node.test) - if node.msg: - self.write(", ") - self.traverse(node.msg) - - def visit_Global(self, node): - self.fill("global ") - self.interleave(lambda: self.write(", "), self.write, node.names) - - def visit_Nonlocal(self, node): - self.fill("nonlocal ") - self.interleave(lambda: self.write(", "), self.write, node.names) - - def visit_Await(self, node): - with self.require_parens(_Precedence.AWAIT, node): - self.write("await") - if node.value: - self.write(" ") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_Yield(self, node): - with self.require_parens(_Precedence.YIELD, node): - self.write("yield") - if node.value: - self.write(" ") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_YieldFrom(self, node): - with self.require_parens(_Precedence.YIELD, node): - self.write("yield from ") - if not node.value: - raise ValueError("Node can't be used without a value attribute.") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_Raise(self, node): - self.fill("raise") - if not node.exc: - if node.cause: - raise ValueError(f"Node can't use cause without an exception.") - return - self.write(" ") - self.traverse(node.exc) - if node.cause: - self.write(" from ") - self.traverse(node.cause) - - def do_visit_try(self, node): - self.fill("try") - with self.block(): - self.traverse(node.body) - for ex in node.handlers: - self.traverse(ex) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - if node.finalbody: - self.fill("finally") - with self.block(): - self.traverse(node.finalbody) - - def visit_Try(self, node): - prev_in_try_star = self._in_try_star - try: - self._in_try_star = False - self.do_visit_try(node) - finally: - self._in_try_star = prev_in_try_star - - def visit_TryStar(self, node): - prev_in_try_star = self._in_try_star - try: - self._in_try_star = True - self.do_visit_try(node) - finally: - self._in_try_star = prev_in_try_star - - def visit_ExceptHandler(self, node): - self.fill("except*" if self._in_try_star else "except") - if node.type: - self.write(" ") - self.traverse(node.type) - if node.name: - self.write(" as ") - self.write(node.name) - with self.block(): - self.traverse(node.body) - - def visit_ClassDef(self, node): - self.maybe_newline() - for deco in node.decorator_list: - self.fill("@") - self.traverse(deco) - self.fill("class " + node.name) - if hasattr(node, "type_params"): - self._type_params_helper(node.type_params) - with self.delimit_if("(", ")", condition = node.bases or node.keywords): - comma = False - for e in node.bases: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - for e in node.keywords: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - - with self.block(): - self._write_docstring_and_traverse_body(node) - - def visit_FunctionDef(self, node): - self._function_helper(node, "def") - - def visit_AsyncFunctionDef(self, node): - self._function_helper(node, "async def") - - def _function_helper(self, node, fill_suffix): - self.maybe_newline() - for deco in node.decorator_list: - self.fill("@") - self.traverse(deco) - def_str = fill_suffix + " " + node.name - self.fill(def_str) - if hasattr(node, "type_params"): - self._type_params_helper(node.type_params) - with self.delimit("(", ")"): - self.traverse(node.args) - if node.returns: - self.write(" -> ") - self.traverse(node.returns) - with self.block(extra=self.get_type_comment(node)): - self._write_docstring_and_traverse_body(node) - - def _type_params_helper(self, type_params): - if type_params is not None and len(type_params) > 0: - with self.delimit("[", "]"): - self.interleave(lambda: self.write(", "), self.traverse, type_params) - - def visit_TypeVar(self, node): - self.write(node.name) - if node.bound: - self.write(": ") - self.traverse(node.bound) - - def visit_TypeVarTuple(self, node): - self.write("*" + node.name) - - def visit_ParamSpec(self, node): - self.write("**" + node.name) - - def visit_TypeAlias(self, node): - self.fill("type ") - self.traverse(node.name) - self._type_params_helper(node.type_params) - self.write(" = ") - self.traverse(node.value) - - def visit_For(self, node): - self._for_helper("for ", node) - - def visit_AsyncFor(self, node): - self._for_helper("async for ", node) - - def _for_helper(self, fill, node): - self.fill(fill) - self.set_precedence(_Precedence.TUPLE, node.target) - self.traverse(node.target) - self.write(" in ") - self.traverse(node.iter) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_If(self, node): - self.fill("if ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - # collapse nested ifs into equivalent elifs. - while node.orelse and len(node.orelse) == 1 and isinstance(node.orelse[0], If): - node = node.orelse[0] - self.fill("elif ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - # final else - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_While(self, node): - self.fill("while ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_With(self, node): - self.fill("with ") - self.interleave(lambda: self.write(", "), self.traverse, node.items) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - - def visit_AsyncWith(self, node): - self.fill("async with ") - self.interleave(lambda: self.write(", "), self.traverse, node.items) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - - def _str_literal_helper( - self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False - ): - """Helper for writing string literals, minimizing escapes. - Returns the tuple (string literal to write, possible quote types). - """ - def escape_char(c): - # \n and \t are non-printable, but we only escape them if - # escape_special_whitespace is True - if not escape_special_whitespace and c in "\n\t": - return c - # Always escape backslashes and other non-printable characters - if c == "\\" or not c.isprintable(): - return c.encode("unicode_escape").decode("ascii") - return c - - escaped_string = "".join(map(escape_char, string)) - possible_quotes = quote_types - if "\n" in escaped_string: - possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] - possible_quotes = [q for q in possible_quotes if q not in escaped_string] - if not possible_quotes: - # If there aren't any possible_quotes, fallback to using repr - # on the original string. Try to use a quote from quote_types, - # e.g., so that we use triple quotes for docstrings. - string = repr(string) - quote = next((q for q in quote_types if string[0] in q), string[0]) - return string[1:-1], [quote] - if escaped_string: - # Sort so that we prefer '''"''' over """\"""" - possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) - # If we're using triple quotes and we'd need to escape a final - # quote, escape it - if possible_quotes[0][0] == escaped_string[-1]: - assert len(possible_quotes[0]) == 3 - escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] - return escaped_string, possible_quotes - - def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): - """Write string literal value with a best effort attempt to avoid backslashes.""" - string, quote_types = self._str_literal_helper(string, quote_types=quote_types) - quote_type = quote_types[0] - self.write(f"{quote_type}{string}{quote_type}") - - def visit_JoinedStr(self, node): - self.write("f") - - fstring_parts = [] - for value in node.values: - with self.buffered() as buffer: - self._write_fstring_inner(value) - fstring_parts.append( - ("".join(buffer), isinstance(value, Constant)) - ) - - new_fstring_parts = [] - quote_types = list(_ALL_QUOTES) - fallback_to_repr = False - for value, is_constant in fstring_parts: - if is_constant: - value, new_quote_types = self._str_literal_helper( - value, - quote_types=quote_types, - escape_special_whitespace=True, - ) - if set(new_quote_types).isdisjoint(quote_types): - fallback_to_repr = True - break - quote_types = new_quote_types - elif "\n" in value: - quote_types = [q for q in quote_types if q in _MULTI_QUOTES] - assert quote_types - new_fstring_parts.append(value) - - if fallback_to_repr: - # If we weren't able to find a quote type that works for all parts - # of the JoinedStr, fallback to using repr and triple single quotes. - quote_types = ["'''"] - new_fstring_parts.clear() - for value, is_constant in fstring_parts: - if is_constant: - value = repr('"' + value) # force repr to use single quotes - expected_prefix = "'\"" - assert value.startswith(expected_prefix), repr(value) - value = value[len(expected_prefix):-1] - new_fstring_parts.append(value) - - value = "".join(new_fstring_parts) - quote_type = quote_types[0] - self.write(f"{quote_type}{value}{quote_type}") - - def _write_fstring_inner(self, node): - if isinstance(node, JoinedStr): - # for both the f-string itself, and format_spec - for value in node.values: - self._write_fstring_inner(value) - elif isinstance(node, Constant) and isinstance(node.value, str): - value = node.value.replace("{", "{{").replace("}", "}}") - self.write(value) - elif isinstance(node, FormattedValue): - self.visit_FormattedValue(node) - else: - raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") - - def visit_FormattedValue(self, node): - def unparse_inner(inner): - unparser = type(self)() - unparser.set_precedence(_Precedence.TEST.next(), inner) - return unparser.visit(inner) - - with self.delimit("{", "}"): - expr = unparse_inner(node.value) - if expr.startswith("{"): - # Separate pair of opening brackets as "{ {" - self.write(" ") - self.write(expr) - if node.conversion != -1: - self.write(f"!{chr(node.conversion)}") - if node.format_spec: - self.write(":") - self._write_fstring_inner(node.format_spec) - - def visit_Name(self, node): - self.write(node.id) - - def _write_docstring(self, node): - self.fill() - if node.kind == "u": - self.write("u") - self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) - - def _write_constant(self, value): - if isinstance(value, (float, complex)): - # Substitute overflowing decimal literal for AST infinities, - # and inf - inf for NaNs. - self.write( - repr(value) - .replace("inf", _INFSTR) - .replace("nan", f"({_INFSTR}-{_INFSTR})") - ) - elif self._avoid_backslashes and isinstance(value, str): - self._write_str_avoiding_backslashes(value) - else: - self.write(repr(value)) - - def visit_Constant(self, node): - value = node.value - if isinstance(value, tuple): - with self.delimit("(", ")"): - self.items_view(self._write_constant, value) - elif value is ...: - self.write("...") - else: - if node.kind == "u": - self.write("u") - self._write_constant(node.value) - - def visit_List(self, node): - with self.delimit("[", "]"): - self.interleave(lambda: self.write(", "), self.traverse, node.elts) - - def visit_ListComp(self, node): - with self.delimit("[", "]"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_GeneratorExp(self, node): - with self.delimit("(", ")"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_SetComp(self, node): - with self.delimit("{", "}"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_DictComp(self, node): - with self.delimit("{", "}"): - self.traverse(node.key) - self.write(": ") - self.traverse(node.value) - for gen in node.generators: - self.traverse(gen) - - def visit_comprehension(self, node): - if node.is_async: - self.write(" async for ") - else: - self.write(" for ") - self.set_precedence(_Precedence.TUPLE, node.target) - self.traverse(node.target) - self.write(" in ") - self.set_precedence(_Precedence.TEST.next(), node.iter, *node.ifs) - self.traverse(node.iter) - for if_clause in node.ifs: - self.write(" if ") - self.traverse(if_clause) - - def visit_IfExp(self, node): - with self.require_parens(_Precedence.TEST, node): - self.set_precedence(_Precedence.TEST.next(), node.body, node.test) - self.traverse(node.body) - self.write(" if ") - self.traverse(node.test) - self.write(" else ") - self.set_precedence(_Precedence.TEST, node.orelse) - self.traverse(node.orelse) - - def visit_Set(self, node): - if node.elts: - with self.delimit("{", "}"): - self.interleave(lambda: self.write(", "), self.traverse, node.elts) - else: - # `{}` would be interpreted as a dictionary literal, and - # `set` might be shadowed. Thus: - self.write('{*()}') - - def visit_Dict(self, node): - def write_key_value_pair(k, v): - self.traverse(k) - self.write(": ") - self.traverse(v) - - def write_item(item): - k, v = item - if k is None: - # for dictionary unpacking operator in dicts {**{'y': 2}} - # see PEP 448 for details - self.write("**") - self.set_precedence(_Precedence.EXPR, v) - self.traverse(v) - else: - write_key_value_pair(k, v) - - with self.delimit("{", "}"): - self.interleave( - lambda: self.write(", "), write_item, zip(node.keys, node.values) - ) - - def visit_Tuple(self, node): - with self.delimit_if( - "(", - ")", - len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE - ): - self.items_view(self.traverse, node.elts) - - unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"} - unop_precedence = { - "not": _Precedence.NOT, - "~": _Precedence.FACTOR, - "+": _Precedence.FACTOR, - "-": _Precedence.FACTOR, - } - - def visit_UnaryOp(self, node): - operator = self.unop[node.op.__class__.__name__] - operator_precedence = self.unop_precedence[operator] - with self.require_parens(operator_precedence, node): - self.write(operator) - # factor prefixes (+, -, ~) shouldn't be separated - # from the value they belong, (e.g: +1 instead of + 1) - if operator_precedence is not _Precedence.FACTOR: - self.write(" ") - self.set_precedence(operator_precedence, node.operand) - self.traverse(node.operand) - - binop = { - "Add": "+", - "Sub": "-", - "Mult": "*", - "MatMult": "@", - "Div": "/", - "Mod": "%", - "LShift": "<<", - "RShift": ">>", - "BitOr": "|", - "BitXor": "^", - "BitAnd": "&", - "FloorDiv": "//", - "Pow": "**", - } - - binop_precedence = { - "+": _Precedence.ARITH, - "-": _Precedence.ARITH, - "*": _Precedence.TERM, - "@": _Precedence.TERM, - "/": _Precedence.TERM, - "%": _Precedence.TERM, - "<<": _Precedence.SHIFT, - ">>": _Precedence.SHIFT, - "|": _Precedence.BOR, - "^": _Precedence.BXOR, - "&": _Precedence.BAND, - "//": _Precedence.TERM, - "**": _Precedence.POWER, - } - - binop_rassoc = frozenset(("**",)) - def visit_BinOp(self, node): - operator = self.binop[node.op.__class__.__name__] - operator_precedence = self.binop_precedence[operator] - with self.require_parens(operator_precedence, node): - if operator in self.binop_rassoc: - left_precedence = operator_precedence.next() - right_precedence = operator_precedence - else: - left_precedence = operator_precedence - right_precedence = operator_precedence.next() - - self.set_precedence(left_precedence, node.left) - self.traverse(node.left) - self.write(f" {operator} ") - self.set_precedence(right_precedence, node.right) - self.traverse(node.right) - - cmpops = { - "Eq": "==", - "NotEq": "!=", - "Lt": "<", - "LtE": "<=", - "Gt": ">", - "GtE": ">=", - "Is": "is", - "IsNot": "is not", - "In": "in", - "NotIn": "not in", - } - - def visit_Compare(self, node): - with self.require_parens(_Precedence.CMP, node): - self.set_precedence(_Precedence.CMP.next(), node.left, *node.comparators) - self.traverse(node.left) - for o, e in zip(node.ops, node.comparators): - self.write(" " + self.cmpops[o.__class__.__name__] + " ") - self.traverse(e) - - boolops = {"And": "and", "Or": "or"} - boolop_precedence = {"and": _Precedence.AND, "or": _Precedence.OR} - - def visit_BoolOp(self, node): - operator = self.boolops[node.op.__class__.__name__] - operator_precedence = self.boolop_precedence[operator] - - def increasing_level_traverse(node): - nonlocal operator_precedence - operator_precedence = operator_precedence.next() - self.set_precedence(operator_precedence, node) - self.traverse(node) - - with self.require_parens(operator_precedence, node): - s = f" {operator} " - self.interleave(lambda: self.write(s), increasing_level_traverse, node.values) - - def visit_Attribute(self, node): - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - # Special case: 3.__abs__() is a syntax error, so if node.value - # is an integer literal then we need to either parenthesize - # it or add an extra space to get 3 .__abs__(). - if isinstance(node.value, Constant) and isinstance(node.value.value, int): - self.write(" ") - self.write(".") - self.write(node.attr) - - def visit_Call(self, node): - self.set_precedence(_Precedence.ATOM, node.func) - self.traverse(node.func) - with self.delimit("(", ")"): - comma = False - for e in node.args: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - for e in node.keywords: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - - def visit_Subscript(self, node): - def is_non_empty_tuple(slice_value): - return ( - isinstance(slice_value, Tuple) - and slice_value.elts - ) - - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - with self.delimit("[", "]"): - if is_non_empty_tuple(node.slice): - # parentheses can be omitted if the tuple isn't empty - self.items_view(self.traverse, node.slice.elts) - else: - self.traverse(node.slice) - - def visit_Starred(self, node): - self.write("*") - self.set_precedence(_Precedence.EXPR, node.value) - self.traverse(node.value) - - def visit_Ellipsis(self, node): - self.write("...") - - def visit_Slice(self, node): - if node.lower: - self.traverse(node.lower) - self.write(":") - if node.upper: - self.traverse(node.upper) - if node.step: - self.write(":") - self.traverse(node.step) - - def visit_Match(self, node): - self.fill("match ") - self.traverse(node.subject) - with self.block(): - for case in node.cases: - self.traverse(case) - - def visit_arg(self, node): - self.write(node.arg) - if node.annotation: - self.write(": ") - self.traverse(node.annotation) - - def visit_arguments(self, node): - first = True - # normal arguments - all_args = node.posonlyargs + node.args - defaults = [None] * (len(all_args) - len(node.defaults)) + node.defaults - for index, elements in enumerate(zip(all_args, defaults), 1): - a, d = elements - if first: - first = False - else: - self.write(", ") - self.traverse(a) - if d: - self.write("=") - self.traverse(d) - if index == len(node.posonlyargs): - self.write(", /") - - # varargs, or bare '*' if no varargs but keyword-only arguments present - if node.vararg or node.kwonlyargs: - if first: - first = False - else: - self.write(", ") - self.write("*") - if node.vararg: - self.write(node.vararg.arg) - if node.vararg.annotation: - self.write(": ") - self.traverse(node.vararg.annotation) - - # keyword-only arguments - if node.kwonlyargs: - for a, d in zip(node.kwonlyargs, node.kw_defaults): - self.write(", ") - self.traverse(a) - if d: - self.write("=") - self.traverse(d) - - # kwargs - if node.kwarg: - if first: - first = False - else: - self.write(", ") - self.write("**" + node.kwarg.arg) - if node.kwarg.annotation: - self.write(": ") - self.traverse(node.kwarg.annotation) - - def visit_keyword(self, node): - if node.arg is None: - self.write("**") - else: - self.write(node.arg) - self.write("=") - self.traverse(node.value) - - def visit_Lambda(self, node): - with self.require_parens(_Precedence.TEST, node): - self.write("lambda") - with self.buffered() as buffer: - self.traverse(node.args) - if buffer: - self.write(" ", *buffer) - self.write(": ") - self.set_precedence(_Precedence.TEST, node.body) - self.traverse(node.body) - - def visit_alias(self, node): - self.write(node.name) - if node.asname: - self.write(" as " + node.asname) - - def visit_withitem(self, node): - self.traverse(node.context_expr) - if node.optional_vars: - self.write(" as ") - self.traverse(node.optional_vars) - - def visit_match_case(self, node): - self.fill("case ") - self.traverse(node.pattern) - if node.guard: - self.write(" if ") - self.traverse(node.guard) - with self.block(): - self.traverse(node.body) - - def visit_MatchValue(self, node): - self.traverse(node.value) - - def visit_MatchSingleton(self, node): - self._write_constant(node.value) - - def visit_MatchSequence(self, node): - with self.delimit("[", "]"): - self.interleave( - lambda: self.write(", "), self.traverse, node.patterns - ) - - def visit_MatchStar(self, node): - name = node.name - if name is None: - name = "_" - self.write(f"*{name}") - - def visit_MatchMapping(self, node): - def write_key_pattern_pair(pair): - k, p = pair - self.traverse(k) - self.write(": ") - self.traverse(p) - - with self.delimit("{", "}"): - keys = node.keys - self.interleave( - lambda: self.write(", "), - write_key_pattern_pair, - zip(keys, node.patterns, strict=True), - ) - rest = node.rest - if rest is not None: - if keys: - self.write(", ") - self.write(f"**{rest}") - - def visit_MatchClass(self, node): - self.set_precedence(_Precedence.ATOM, node.cls) - self.traverse(node.cls) - with self.delimit("(", ")"): - patterns = node.patterns - self.interleave( - lambda: self.write(", "), self.traverse, patterns - ) - attrs = node.kwd_attrs - if attrs: - def write_attr_pattern(pair): - attr, pattern = pair - self.write(f"{attr}=") - self.traverse(pattern) - - if patterns: - self.write(", ") - self.interleave( - lambda: self.write(", "), - write_attr_pattern, - zip(attrs, node.kwd_patterns, strict=True), - ) - - def visit_MatchAs(self, node): - name = node.name - pattern = node.pattern - if name is None: - self.write("_") - elif pattern is None: - self.write(node.name) - else: - with self.require_parens(_Precedence.TEST, node): - self.set_precedence(_Precedence.BOR, node.pattern) - self.traverse(node.pattern) - self.write(f" as {node.name}") - - def visit_MatchOr(self, node): - with self.require_parens(_Precedence.BOR, node): - self.set_precedence(_Precedence.BOR.next(), *node.patterns) - self.interleave(lambda: self.write(" | "), self.traverse, node.patterns) - -def unparse(ast_obj): - unparser = _Unparser() - return unparser.visit(ast_obj) - - -_deprecated_globals = { - name: globals().pop(name) - for name in ('Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis') -} - -def __getattr__(name): - if name in _deprecated_globals: - globals()[name] = value = _deprecated_globals[name] - import warnings - warnings._deprecated( - f"ast.{name}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return value - raise AttributeError(f"module 'ast' has no attribute '{name}'") - - -def main(): - import argparse - - parser = argparse.ArgumentParser(prog='python -m ast') - parser.add_argument('infile', type=argparse.FileType(mode='rb'), nargs='?', - default='-', - help='the file to parse; defaults to stdin') - parser.add_argument('-m', '--mode', default='exec', - choices=('exec', 'single', 'eval', 'func_type'), - help='specify what kind of code must be parsed') - parser.add_argument('--no-type-comments', default=True, action='store_false', - help="don't add information about type comments") - parser.add_argument('-a', '--include-attributes', action='store_true', - help='include attributes such as line numbers and ' - 'column offsets') - parser.add_argument('-i', '--indent', type=int, default=3, - help='indentation of nodes (number of spaces)') - args = parser.parse_args() - - with args.infile as infile: - source = infile.read() - tree = parse(source, args.infile.name, args.mode, type_comments=args.no_type_comments) - print(dump(tree, include_attributes=args.include_attributes, indent=args.indent)) - -if __name__ == '__main__': - main() diff --git a/python/python3_12/examples/base64.py b/python/python3_12/examples/base64.py deleted file mode 100644 index e233647ee7..0000000000 --- a/python/python3_12/examples/base64.py +++ /dev/null @@ -1,586 +0,0 @@ -#! /usr/bin/env python3 - -"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" - -# Modified 04-Oct-1995 by Jack Jansen to use binascii module -# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support -# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere - -import re -import struct -import binascii - - -__all__ = [ - # Legacy interface exports traditional RFC 2045 Base64 encodings - 'encode', 'decode', 'encodebytes', 'decodebytes', - # Generalized interface for other encodings - 'b64encode', 'b64decode', 'b32encode', 'b32decode', - 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', - # Base85 and Ascii85 encodings - 'b85encode', 'b85decode', 'a85encode', 'a85decode', - # Standard Base64 encoding - 'standard_b64encode', 'standard_b64decode', - # Some common Base64 alternatives. As referenced by RFC 3458, see thread - # starting at: - # - # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html - 'urlsafe_b64encode', 'urlsafe_b64decode', - ] - - -bytes_types = (bytes, bytearray) # Types acceptable as binary data - -def _bytes_from_decode_data(s): - if isinstance(s, str): - try: - return s.encode('ascii') - except UnicodeEncodeError: - raise ValueError('string argument should contain only ASCII characters') - if isinstance(s, bytes_types): - return s - try: - return memoryview(s).tobytes() - except TypeError: - raise TypeError("argument should be a bytes-like object or ASCII " - "string, not %r" % s.__class__.__name__) from None - - -# Base64 encoding/decoding uses binascii - -def b64encode(s, altchars=None): - """Encode the bytes-like object s using Base64 and return a bytes object. - - Optional altchars should be a byte string of length 2 which specifies an - alternative alphabet for the '+' and '/' characters. This allows an - application to e.g. generate url or filesystem safe Base64 strings. - """ - encoded = binascii.b2a_base64(s, newline=False) - if altchars is not None: - assert len(altchars) == 2, repr(altchars) - return encoded.translate(bytes.maketrans(b'+/', altchars)) - return encoded - - -def b64decode(s, altchars=None, validate=False): - """Decode the Base64 encoded bytes-like object or ASCII string s. - - Optional altchars must be a bytes-like object or ASCII string of length 2 - which specifies the alternative alphabet used instead of the '+' and '/' - characters. - - The result is returned as a bytes object. A binascii.Error is raised if - s is incorrectly padded. - - If validate is False (the default), characters that are neither in the - normal base-64 alphabet nor the alternative alphabet are discarded prior - to the padding check. If validate is True, these non-alphabet characters - in the input result in a binascii.Error. - For more information about the strict base64 check, see: - - https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64 - """ - s = _bytes_from_decode_data(s) - if altchars is not None: - altchars = _bytes_from_decode_data(altchars) - assert len(altchars) == 2, repr(altchars) - s = s.translate(bytes.maketrans(altchars, b'+/')) - return binascii.a2b_base64(s, strict_mode=validate) - - -def standard_b64encode(s): - """Encode bytes-like object s using the standard Base64 alphabet. - - The result is returned as a bytes object. - """ - return b64encode(s) - -def standard_b64decode(s): - """Decode bytes encoded with the standard Base64 alphabet. - - Argument s is a bytes-like object or ASCII string to decode. The result - is returned as a bytes object. A binascii.Error is raised if the input - is incorrectly padded. Characters that are not in the standard alphabet - are discarded prior to the padding check. - """ - return b64decode(s) - - -_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') -_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') - -def urlsafe_b64encode(s): - """Encode bytes using the URL- and filesystem-safe Base64 alphabet. - - Argument s is a bytes-like object to encode. The result is returned as a - bytes object. The alphabet uses '-' instead of '+' and '_' instead of - '/'. - """ - return b64encode(s).translate(_urlsafe_encode_translation) - -def urlsafe_b64decode(s): - """Decode bytes using the URL- and filesystem-safe Base64 alphabet. - - Argument s is a bytes-like object or ASCII string to decode. The result - is returned as a bytes object. A binascii.Error is raised if the input - is incorrectly padded. Characters that are not in the URL-safe base-64 - alphabet, and are not a plus '+' or slash '/', are discarded prior to the - padding check. - - The alphabet uses '-' instead of '+' and '_' instead of '/'. - """ - s = _bytes_from_decode_data(s) - s = s.translate(_urlsafe_decode_translation) - return b64decode(s) - - - -# Base32 encoding/decoding must be done in Python -_B32_ENCODE_DOCSTRING = ''' -Encode the bytes-like objects using {encoding} and return a bytes object. -''' -_B32_DECODE_DOCSTRING = ''' -Decode the {encoding} encoded bytes-like object or ASCII string s. - -Optional casefold is a flag specifying whether a lowercase alphabet is -acceptable as input. For security purposes, the default is False. -{extra_args} -The result is returned as a bytes object. A binascii.Error is raised if -the input is incorrectly padded or if there are non-alphabet -characters present in the input. -''' -_B32_DECODE_MAP01_DOCSTRING = ''' -RFC 3548 allows for optional mapping of the digit 0 (zero) to the -letter O (oh), and for optional mapping of the digit 1 (one) to -either the letter I (eye) or letter L (el). The optional argument -map01 when not None, specifies which letter the digit 1 should be -mapped to (when map01 is not None, the digit 0 is always mapped to -the letter O). For security purposes the default is None, so that -0 and 1 are not allowed in the input. -''' -_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' -_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' -_b32tab2 = {} -_b32rev = {} - -def _b32encode(alphabet, s): - global _b32tab2 - # Delay the initialization of the table to not waste memory - # if the function is never called - if alphabet not in _b32tab2: - b32tab = [bytes((i,)) for i in alphabet] - _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab] - b32tab = None - - if not isinstance(s, bytes_types): - s = memoryview(s).tobytes() - leftover = len(s) % 5 - # Pad the last quantum with zero bits if necessary - if leftover: - s = s + b'\0' * (5 - leftover) # Don't use += ! - encoded = bytearray() - from_bytes = int.from_bytes - b32tab2 = _b32tab2[alphabet] - for i in range(0, len(s), 5): - c = from_bytes(s[i: i + 5]) # big endian - encoded += (b32tab2[c >> 30] + # bits 1 - 10 - b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 - b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 - b32tab2[c & 0x3ff] # bits 31 - 40 - ) - # Adjust for any leftover partial quanta - if leftover == 1: - encoded[-6:] = b'======' - elif leftover == 2: - encoded[-4:] = b'====' - elif leftover == 3: - encoded[-3:] = b'===' - elif leftover == 4: - encoded[-1:] = b'=' - return bytes(encoded) - -def _b32decode(alphabet, s, casefold=False, map01=None): - global _b32rev - # Delay the initialization of the table to not waste memory - # if the function is never called - if alphabet not in _b32rev: - _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)} - s = _bytes_from_decode_data(s) - if len(s) % 8: - raise binascii.Error('Incorrect padding') - # Handle section 2.4 zero and one mapping. The flag map01 will be either - # False, or the character to map the digit 1 (one) to. It should be - # either L (el) or I (eye). - if map01 is not None: - map01 = _bytes_from_decode_data(map01) - assert len(map01) == 1, repr(map01) - s = s.translate(bytes.maketrans(b'01', b'O' + map01)) - if casefold: - s = s.upper() - # Strip off pad characters from the right. We need to count the pad - # characters because this will tell us how many null bytes to remove from - # the end of the decoded string. - l = len(s) - s = s.rstrip(b'=') - padchars = l - len(s) - # Now decode the full quanta - decoded = bytearray() - b32rev = _b32rev[alphabet] - for i in range(0, len(s), 8): - quanta = s[i: i + 8] - acc = 0 - try: - for c in quanta: - acc = (acc << 5) + b32rev[c] - except KeyError: - raise binascii.Error('Non-base32 digit found') from None - decoded += acc.to_bytes(5) # big endian - # Process the last, partial quanta - if l % 8 or padchars not in {0, 1, 3, 4, 6}: - raise binascii.Error('Incorrect padding') - if padchars and decoded: - acc <<= 5 * padchars - last = acc.to_bytes(5) # big endian - leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1 - decoded[-5:] = last[:leftover] - return bytes(decoded) - - -def b32encode(s): - return _b32encode(_b32alphabet, s) -b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') - -def b32decode(s, casefold=False, map01=None): - return _b32decode(_b32alphabet, s, casefold, map01) -b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', - extra_args=_B32_DECODE_MAP01_DOCSTRING) - -def b32hexencode(s): - return _b32encode(_b32hexalphabet, s) -b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') - -def b32hexdecode(s, casefold=False): - # base32hex does not have the 01 mapping - return _b32decode(_b32hexalphabet, s, casefold) -b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', - extra_args='') - - -# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns -# lowercase. The RFC also recommends against accepting input case -# insensitively. -def b16encode(s): - """Encode the bytes-like object s using Base16 and return a bytes object. - """ - return binascii.hexlify(s).upper() - - -def b16decode(s, casefold=False): - """Decode the Base16 encoded bytes-like object or ASCII string s. - - Optional casefold is a flag specifying whether a lowercase alphabet is - acceptable as input. For security purposes, the default is False. - - The result is returned as a bytes object. A binascii.Error is raised if - s is incorrectly padded or if there are non-alphabet characters present - in the input. - """ - s = _bytes_from_decode_data(s) - if casefold: - s = s.upper() - if re.search(b'[^0-9A-F]', s): - raise binascii.Error('Non-base16 digit found') - return binascii.unhexlify(s) - -# -# Ascii85 encoding/decoding -# - -_a85chars = None -_a85chars2 = None -_A85START = b"<~" -_A85END = b"~>" - -def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): - # Helper function for a85encode and b85encode - if not isinstance(b, bytes_types): - b = memoryview(b).tobytes() - - padding = (-len(b)) % 4 - if padding: - b = b + b'\0' * padding - words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) - - chunks = [b'z' if foldnuls and not word else - b'y' if foldspaces and word == 0x20202020 else - (chars2[word // 614125] + - chars2[word // 85 % 7225] + - chars[word % 85]) - for word in words] - - if padding and not pad: - if chunks[-1] == b'z': - chunks[-1] = chars[0] * 5 - chunks[-1] = chunks[-1][:-padding] - - return b''.join(chunks) - -def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): - """Encode bytes-like object b using Ascii85 and return a bytes object. - - foldspaces is an optional flag that uses the special short sequence 'y' - instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This - feature is not supported by the "standard" Adobe encoding. - - wrapcol controls whether the output should have newline (b'\\n') characters - added to it. If this is non-zero, each output line will be at most this - many characters long. - - pad controls whether the input is padded to a multiple of 4 before - encoding. Note that the btoa implementation always pads. - - adobe controls whether the encoded byte sequence is framed with <~ and ~>, - which is used by the Adobe implementation. - """ - global _a85chars, _a85chars2 - # Delay the initialization of tables to not waste memory - # if the function is never called - if _a85chars2 is None: - _a85chars = [bytes((i,)) for i in range(33, 118)] - _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] - - result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) - - if adobe: - result = _A85START + result - if wrapcol: - wrapcol = max(2 if adobe else 1, wrapcol) - chunks = [result[i: i + wrapcol] - for i in range(0, len(result), wrapcol)] - if adobe: - if len(chunks[-1]) + 2 > wrapcol: - chunks.append(b'') - result = b'\n'.join(chunks) - if adobe: - result += _A85END - - return result - -def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): - """Decode the Ascii85 encoded bytes-like object or ASCII string b. - - foldspaces is a flag that specifies whether the 'y' short sequence should be - accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is - not supported by the "standard" Adobe encoding. - - adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. - is framed with <~ and ~>). - - ignorechars should be a byte string containing characters to ignore from the - input. This should only contain whitespace characters, and by default - contains all whitespace characters in ASCII. - - The result is returned as a bytes object. - """ - b = _bytes_from_decode_data(b) - if adobe: - if not b.endswith(_A85END): - raise ValueError( - "Ascii85 encoded byte sequences must end " - "with {!r}".format(_A85END) - ) - if b.startswith(_A85START): - b = b[2:-2] # Strip off start/end markers - else: - b = b[:-2] - # - # We have to go through this stepwise, so as to ignore spaces and handle - # special short sequences - # - packI = struct.Struct('!I').pack - decoded = [] - decoded_append = decoded.append - curr = [] - curr_append = curr.append - curr_clear = curr.clear - for x in b + b'u' * 4: - if b'!'[0] <= x <= b'u'[0]: - curr_append(x) - if len(curr) == 5: - acc = 0 - for x in curr: - acc = 85 * acc + (x - 33) - try: - decoded_append(packI(acc)) - except struct.error: - raise ValueError('Ascii85 overflow') from None - curr_clear() - elif x == b'z'[0]: - if curr: - raise ValueError('z inside Ascii85 5-tuple') - decoded_append(b'\0\0\0\0') - elif foldspaces and x == b'y'[0]: - if curr: - raise ValueError('y inside Ascii85 5-tuple') - decoded_append(b'\x20\x20\x20\x20') - elif x in ignorechars: - # Skip whitespace - continue - else: - raise ValueError('Non-Ascii85 digit found: %c' % x) - - result = b''.join(decoded) - padding = 4 - len(curr) - if padding: - # Throw away the extra padding - result = result[:-padding] - return result - -# The following code is originally taken (with permission) from Mercurial - -_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") -_b85chars = None -_b85chars2 = None -_b85dec = None - -def b85encode(b, pad=False): - """Encode bytes-like object b in base85 format and return a bytes object. - - If pad is true, the input is padded with b'\\0' so its length is a multiple of - 4 bytes before encoding. - """ - global _b85chars, _b85chars2 - # Delay the initialization of tables to not waste memory - # if the function is never called - if _b85chars2 is None: - _b85chars = [bytes((i,)) for i in _b85alphabet] - _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] - return _85encode(b, _b85chars, _b85chars2, pad) - -def b85decode(b): - """Decode the base85-encoded bytes-like object or ASCII string b - - The result is returned as a bytes object. - """ - global _b85dec - # Delay the initialization of tables to not waste memory - # if the function is never called - if _b85dec is None: - _b85dec = [None] * 256 - for i, c in enumerate(_b85alphabet): - _b85dec[c] = i - - b = _bytes_from_decode_data(b) - padding = (-len(b)) % 5 - b = b + b'~' * padding - out = [] - packI = struct.Struct('!I').pack - for i in range(0, len(b), 5): - chunk = b[i:i + 5] - acc = 0 - try: - for c in chunk: - acc = acc * 85 + _b85dec[c] - except TypeError: - for j, c in enumerate(chunk): - if _b85dec[c] is None: - raise ValueError('bad base85 character at position %d' - % (i + j)) from None - raise - try: - out.append(packI(acc)) - except struct.error: - raise ValueError('base85 overflow in hunk starting at byte %d' - % i) from None - - result = b''.join(out) - if padding: - result = result[:-padding] - return result - -# Legacy interface. This code could be cleaned up since I don't believe -# binascii has any line length limitations. It just doesn't seem worth it -# though. The files should be opened in binary mode. - -MAXLINESIZE = 76 # Excluding the CRLF -MAXBINSIZE = (MAXLINESIZE//4)*3 - -def encode(input, output): - """Encode a file; input and output are binary files.""" - while s := input.read(MAXBINSIZE): - while len(s) < MAXBINSIZE and (ns := input.read(MAXBINSIZE-len(s))): - s += ns - line = binascii.b2a_base64(s) - output.write(line) - - -def decode(input, output): - """Decode a file; input and output are binary files.""" - while line := input.readline(): - s = binascii.a2b_base64(line) - output.write(s) - -def _input_type_check(s): - try: - m = memoryview(s) - except TypeError as err: - msg = "expected bytes-like object, not %s" % s.__class__.__name__ - raise TypeError(msg) from err - if m.format not in ('c', 'b', 'B'): - msg = ("expected single byte elements, not %r from %s" % - (m.format, s.__class__.__name__)) - raise TypeError(msg) - if m.ndim != 1: - msg = ("expected 1-D data, not %d-D data from %s" % - (m.ndim, s.__class__.__name__)) - raise TypeError(msg) - - -def encodebytes(s): - """Encode a bytestring into a bytes object containing multiple lines - of base-64 data.""" - _input_type_check(s) - pieces = [] - for i in range(0, len(s), MAXBINSIZE): - chunk = s[i : i + MAXBINSIZE] - pieces.append(binascii.b2a_base64(chunk)) - return b"".join(pieces) - - -def decodebytes(s): - """Decode a bytestring of base-64 data into a bytes object.""" - _input_type_check(s) - return binascii.a2b_base64(s) - - -# Usable as a script... -def main(): - """Small main program""" - import sys, getopt - usage = f"""usage: {sys.argv[0]} [-h|-d|-e|-u] [file|-] - -h: print this help message and exit - -d, -u: decode - -e: encode (default)""" - try: - opts, args = getopt.getopt(sys.argv[1:], 'hdeu') - except getopt.error as msg: - sys.stdout = sys.stderr - print(msg) - print(usage) - sys.exit(2) - func = encode - for o, a in opts: - if o == '-e': func = encode - if o == '-d': func = decode - if o == '-u': func = decode - if o == '-h': print(usage); return - if args and args[0] != '-': - with open(args[0], 'rb') as f: - func(f, sys.stdout.buffer) - else: - func(sys.stdin.buffer, sys.stdout.buffer) - - -if __name__ == '__main__': - main() diff --git a/python/python3_12/examples/bdb.py b/python/python3_12/examples/bdb.py deleted file mode 100644 index 0f3eec653b..0000000000 --- a/python/python3_12/examples/bdb.py +++ /dev/null @@ -1,893 +0,0 @@ -"""Debugger basics""" - -import fnmatch -import sys -import os -from inspect import CO_GENERATOR, CO_COROUTINE, CO_ASYNC_GENERATOR - -__all__ = ["BdbQuit", "Bdb", "Breakpoint"] - -GENERATOR_AND_COROUTINE_FLAGS = CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR - - -class BdbQuit(Exception): - """Exception to give up completely.""" - - -class Bdb: - """Generic Python debugger base class. - - This class takes care of details of the trace facility; - a derived class should implement user interaction. - The standard debugger class (pdb.Pdb) is an example. - - The optional skip argument must be an iterable of glob-style - module name patterns. The debugger will not step into frames - that originate in a module that matches one of these patterns. - Whether a frame is considered to originate in a certain module - is determined by the __name__ in the frame globals. - """ - - def __init__(self, skip=None): - self.skip = set(skip) if skip else None - self.breaks = {} - self.fncache = {} - self.frame_returning = None - - self._load_breaks() - - def canonic(self, filename): - """Return canonical form of filename. - - For real filenames, the canonical form is a case-normalized (on - case insensitive filesystems) absolute path. 'Filenames' with - angle brackets, such as "", generated in interactive - mode, are returned unchanged. - """ - if filename == "<" + filename[1:-1] + ">": - return filename - canonic = self.fncache.get(filename) - if not canonic: - canonic = os.path.abspath(filename) - canonic = os.path.normcase(canonic) - self.fncache[filename] = canonic - return canonic - - def reset(self): - """Set values of attributes as ready to start debugging.""" - import linecache - linecache.checkcache() - self.botframe = None - self._set_stopinfo(None, None) - - def trace_dispatch(self, frame, event, arg): - """Dispatch a trace function for debugged frames based on the event. - - This function is installed as the trace function for debugged - frames. Its return value is the new trace function, which is - usually itself. The default implementation decides how to - dispatch a frame, depending on the type of event (passed in as a - string) that is about to be executed. - - The event can be one of the following: - line: A new line of code is going to be executed. - call: A function is about to be called or another code block - is entered. - return: A function or other code block is about to return. - exception: An exception has occurred. - c_call: A C function is about to be called. - c_return: A C function has returned. - c_exception: A C function has raised an exception. - - For the Python events, specialized functions (see the dispatch_*() - methods) are called. For the C events, no action is taken. - - The arg parameter depends on the previous event. - """ - if self.quitting: - return # None - if event == 'line': - return self.dispatch_line(frame) - if event == 'call': - return self.dispatch_call(frame, arg) - if event == 'return': - return self.dispatch_return(frame, arg) - if event == 'exception': - return self.dispatch_exception(frame, arg) - if event == 'c_call': - return self.trace_dispatch - if event == 'c_exception': - return self.trace_dispatch - if event == 'c_return': - return self.trace_dispatch - print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) - return self.trace_dispatch - - def dispatch_line(self, frame): - """Invoke user function and return trace function for line event. - - If the debugger stops on the current line, invoke - self.user_line(). Raise BdbQuit if self.quitting is set. - Return self.trace_dispatch to continue tracing in this scope. - """ - if self.stop_here(frame) or self.break_here(frame): - self.user_line(frame) - if self.quitting: raise BdbQuit - return self.trace_dispatch - - def dispatch_call(self, frame, arg): - """Invoke user function and return trace function for call event. - - If the debugger stops on this function call, invoke - self.user_call(). Raise BdbQuit if self.quitting is set. - Return self.trace_dispatch to continue tracing in this scope. - """ - # XXX 'arg' is no longer used - if self.botframe is None: - # First call of dispatch since reset() - self.botframe = frame.f_back # (CT) Note that this may also be None! - return self.trace_dispatch - if not (self.stop_here(frame) or self.break_anywhere(frame)): - # No need to trace this function - return # None - # Ignore call events in generator except when stepping. - if self.stopframe and frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: - return self.trace_dispatch - self.user_call(frame, arg) - if self.quitting: raise BdbQuit - return self.trace_dispatch - - def dispatch_return(self, frame, arg): - """Invoke user function and return trace function for return event. - - If the debugger stops on this function return, invoke - self.user_return(). Raise BdbQuit if self.quitting is set. - Return self.trace_dispatch to continue tracing in this scope. - """ - if self.stop_here(frame) or frame == self.returnframe: - # Ignore return events in generator except when stepping. - if self.stopframe and frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: - return self.trace_dispatch - try: - self.frame_returning = frame - self.user_return(frame, arg) - finally: - self.frame_returning = None - if self.quitting: raise BdbQuit - # The user issued a 'next' or 'until' command. - if self.stopframe is frame and self.stoplineno != -1: - self._set_stopinfo(None, None) - return self.trace_dispatch - - def dispatch_exception(self, frame, arg): - """Invoke user function and return trace function for exception event. - - If the debugger stops on this exception, invoke - self.user_exception(). Raise BdbQuit if self.quitting is set. - Return self.trace_dispatch to continue tracing in this scope. - """ - if self.stop_here(frame): - # When stepping with next/until/return in a generator frame, skip - # the internal StopIteration exception (with no traceback) - # triggered by a subiterator run with the 'yield from' statement. - if not (frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS - and arg[0] is StopIteration and arg[2] is None): - self.user_exception(frame, arg) - if self.quitting: raise BdbQuit - # Stop at the StopIteration or GeneratorExit exception when the user - # has set stopframe in a generator by issuing a return command, or a - # next/until command at the last statement in the generator before the - # exception. - elif (self.stopframe and frame is not self.stopframe - and self.stopframe.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS - and arg[0] in (StopIteration, GeneratorExit)): - self.user_exception(frame, arg) - if self.quitting: raise BdbQuit - - return self.trace_dispatch - - # Normally derived classes don't override the following - # methods, but they may if they want to redefine the - # definition of stopping and breakpoints. - - def is_skipped_module(self, module_name): - "Return True if module_name matches any skip pattern." - if module_name is None: # some modules do not have names - return False - for pattern in self.skip: - if fnmatch.fnmatch(module_name, pattern): - return True - return False - - def stop_here(self, frame): - "Return True if frame is below the starting frame in the stack." - # (CT) stopframe may now also be None, see dispatch_call. - # (CT) the former test for None is therefore removed from here. - if self.skip and \ - self.is_skipped_module(frame.f_globals.get('__name__')): - return False - if frame is self.stopframe: - if self.stoplineno == -1: - return False - return frame.f_lineno >= self.stoplineno - if not self.stopframe: - return True - return False - - def break_here(self, frame): - """Return True if there is an effective breakpoint for this line. - - Check for line or function breakpoint and if in effect. - Delete temporary breakpoints if effective() says to. - """ - filename = self.canonic(frame.f_code.co_filename) - if filename not in self.breaks: - return False - lineno = frame.f_lineno - if lineno not in self.breaks[filename]: - # The line itself has no breakpoint, but maybe the line is the - # first line of a function with breakpoint set by function name. - lineno = frame.f_code.co_firstlineno - if lineno not in self.breaks[filename]: - return False - - # flag says ok to delete temp. bp - (bp, flag) = effective(filename, lineno, frame) - if bp: - self.currentbp = bp.number - if (flag and bp.temporary): - self.do_clear(str(bp.number)) - return True - else: - return False - - def do_clear(self, arg): - """Remove temporary breakpoint. - - Must implement in derived classes or get NotImplementedError. - """ - raise NotImplementedError("subclass of bdb must implement do_clear()") - - def break_anywhere(self, frame): - """Return True if there is any breakpoint for frame's filename. - """ - return self.canonic(frame.f_code.co_filename) in self.breaks - - # Derived classes should override the user_* methods - # to gain control. - - def user_call(self, frame, argument_list): - """Called if we might stop in a function.""" - pass - - def user_line(self, frame): - """Called when we stop or break at a line.""" - pass - - def user_return(self, frame, return_value): - """Called when a return trap is set here.""" - pass - - def user_exception(self, frame, exc_info): - """Called when we stop on an exception.""" - pass - - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): - """Set the attributes for stopping. - - If stoplineno is greater than or equal to 0, then stop at line - greater than or equal to the stopline. If stoplineno is -1, then - don't stop at all. - """ - self.stopframe = stopframe - self.returnframe = returnframe - self.quitting = False - # stoplineno >= 0 means: stop at line >= the stoplineno - # stoplineno -1 means: don't stop at all - self.stoplineno = stoplineno - - # Derived classes and clients can call the following methods - # to affect the stepping state. - - def set_until(self, frame, lineno=None): - """Stop when the line with the lineno greater than the current one is - reached or when returning from current frame.""" - # the name "until" is borrowed from gdb - if lineno is None: - lineno = frame.f_lineno + 1 - self._set_stopinfo(frame, frame, lineno) - - def set_step(self): - """Stop after one line of code.""" - # Issue #13183: pdb skips frames after hitting a breakpoint and running - # step commands. - # Restore the trace function in the caller (that may not have been set - # for performance reasons) when returning from the current frame. - if self.frame_returning: - caller_frame = self.frame_returning.f_back - if caller_frame and not caller_frame.f_trace: - caller_frame.f_trace = self.trace_dispatch - self._set_stopinfo(None, None) - - def set_next(self, frame): - """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) - - def set_return(self, frame): - """Stop when returning from the given frame.""" - if frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: - self._set_stopinfo(frame, None, -1) - else: - self._set_stopinfo(frame.f_back, frame) - - def set_trace(self, frame=None): - """Start debugging from frame. - - If frame is not specified, debugging starts from caller's frame. - """ - if frame is None: - frame = sys._getframe().f_back - self.reset() - while frame: - frame.f_trace = self.trace_dispatch - self.botframe = frame - frame = frame.f_back - self.set_step() - sys.settrace(self.trace_dispatch) - - def set_continue(self): - """Stop only at breakpoints or when finished. - - If there are no breakpoints, set the system trace function to None. - """ - # Don't stop except at breakpoints or when finished - self._set_stopinfo(self.botframe, None, -1) - if not self.breaks: - # no breakpoints; run without debugger overhead - sys.settrace(None) - frame = sys._getframe().f_back - while frame and frame is not self.botframe: - del frame.f_trace - frame = frame.f_back - - def set_quit(self): - """Set quitting attribute to True. - - Raises BdbQuit exception in the next call to a dispatch_*() method. - """ - self.stopframe = self.botframe - self.returnframe = None - self.quitting = True - sys.settrace(None) - - # Derived classes and clients can call the following methods - # to manipulate breakpoints. These methods return an - # error message if something went wrong, None if all is well. - # Set_break prints out the breakpoint line and file:lineno. - # Call self.get_*break*() to see the breakpoints or better - # for bp in Breakpoint.bpbynumber: if bp: bp.bpprint(). - - def _add_to_breaks(self, filename, lineno): - """Add breakpoint to breaks, if not already there.""" - bp_linenos = self.breaks.setdefault(filename, []) - if lineno not in bp_linenos: - bp_linenos.append(lineno) - - def set_break(self, filename, lineno, temporary=False, cond=None, - funcname=None): - """Set a new breakpoint for filename:lineno. - - If lineno doesn't exist for the filename, return an error message. - The filename should be in canonical form. - """ - filename = self.canonic(filename) - import linecache # Import as late as possible - line = linecache.getline(filename, lineno) - if not line: - return 'Line %s:%d does not exist' % (filename, lineno) - self._add_to_breaks(filename, lineno) - bp = Breakpoint(filename, lineno, temporary, cond, funcname) - return None - - def _load_breaks(self): - """Apply all breakpoints (set in other instances) to this one. - - Populates this instance's breaks list from the Breakpoint class's - list, which can have breakpoints set by another Bdb instance. This - is necessary for interactive sessions to keep the breakpoints - active across multiple calls to run(). - """ - for (filename, lineno) in Breakpoint.bplist.keys(): - self._add_to_breaks(filename, lineno) - - def _prune_breaks(self, filename, lineno): - """Prune breakpoints for filename:lineno. - - A list of breakpoints is maintained in the Bdb instance and in - the Breakpoint class. If a breakpoint in the Bdb instance no - longer exists in the Breakpoint class, then it's removed from the - Bdb instance. - """ - if (filename, lineno) not in Breakpoint.bplist: - self.breaks[filename].remove(lineno) - if not self.breaks[filename]: - del self.breaks[filename] - - def clear_break(self, filename, lineno): - """Delete breakpoints for filename:lineno. - - If no breakpoints were set, return an error message. - """ - filename = self.canonic(filename) - if filename not in self.breaks: - return 'There are no breakpoints in %s' % filename - if lineno not in self.breaks[filename]: - return 'There is no breakpoint at %s:%d' % (filename, lineno) - # If there's only one bp in the list for that file,line - # pair, then remove the breaks entry - for bp in Breakpoint.bplist[filename, lineno][:]: - bp.deleteMe() - self._prune_breaks(filename, lineno) - return None - - def clear_bpbynumber(self, arg): - """Delete a breakpoint by its index in Breakpoint.bpbynumber. - - If arg is invalid, return an error message. - """ - try: - bp = self.get_bpbynumber(arg) - except ValueError as err: - return str(err) - bp.deleteMe() - self._prune_breaks(bp.file, bp.line) - return None - - def clear_all_file_breaks(self, filename): - """Delete all breakpoints in filename. - - If none were set, return an error message. - """ - filename = self.canonic(filename) - if filename not in self.breaks: - return 'There are no breakpoints in %s' % filename - for line in self.breaks[filename]: - blist = Breakpoint.bplist[filename, line] - for bp in blist: - bp.deleteMe() - del self.breaks[filename] - return None - - def clear_all_breaks(self): - """Delete all existing breakpoints. - - If none were set, return an error message. - """ - if not self.breaks: - return 'There are no breakpoints' - for bp in Breakpoint.bpbynumber: - if bp: - bp.deleteMe() - self.breaks = {} - return None - - def get_bpbynumber(self, arg): - """Return a breakpoint by its index in Breakpoint.bybpnumber. - - For invalid arg values or if the breakpoint doesn't exist, - raise a ValueError. - """ - if not arg: - raise ValueError('Breakpoint number expected') - try: - number = int(arg) - except ValueError: - raise ValueError('Non-numeric breakpoint number %s' % arg) from None - try: - bp = Breakpoint.bpbynumber[number] - except IndexError: - raise ValueError('Breakpoint number %d out of range' % number) from None - if bp is None: - raise ValueError('Breakpoint %d already deleted' % number) - return bp - - def get_break(self, filename, lineno): - """Return True if there is a breakpoint for filename:lineno.""" - filename = self.canonic(filename) - return filename in self.breaks and \ - lineno in self.breaks[filename] - - def get_breaks(self, filename, lineno): - """Return all breakpoints for filename:lineno. - - If no breakpoints are set, return an empty list. - """ - filename = self.canonic(filename) - return filename in self.breaks and \ - lineno in self.breaks[filename] and \ - Breakpoint.bplist[filename, lineno] or [] - - def get_file_breaks(self, filename): - """Return all lines with breakpoints for filename. - - If no breakpoints are set, return an empty list. - """ - filename = self.canonic(filename) - if filename in self.breaks: - return self.breaks[filename] - else: - return [] - - def get_all_breaks(self): - """Return all breakpoints that are set.""" - return self.breaks - - # Derived classes and clients can call the following method - # to get a data structure representing a stack trace. - - def get_stack(self, f, t): - """Return a list of (frame, lineno) in a stack trace and a size. - - List starts with original calling frame, if there is one. - Size may be number of frames above or below f. - """ - stack = [] - if t and t.tb_frame is f: - t = t.tb_next - while f is not None: - stack.append((f, f.f_lineno)) - if f is self.botframe: - break - f = f.f_back - stack.reverse() - i = max(0, len(stack) - 1) - while t is not None: - stack.append((t.tb_frame, t.tb_lineno)) - t = t.tb_next - if f is None: - i = max(0, len(stack) - 1) - return stack, i - - def format_stack_entry(self, frame_lineno, lprefix=': '): - """Return a string with information about a stack entry. - - The stack entry frame_lineno is a (frame, lineno) tuple. The - return string contains the canonical filename, the function name - or '', the input arguments, the return value, and the - line of code (if it exists). - - """ - import linecache, reprlib - frame, lineno = frame_lineno - filename = self.canonic(frame.f_code.co_filename) - s = '%s(%r)' % (filename, lineno) - if frame.f_code.co_name: - s += frame.f_code.co_name - else: - s += "" - s += '()' - if '__return__' in frame.f_locals: - rv = frame.f_locals['__return__'] - s += '->' - s += reprlib.repr(rv) - if lineno is not None: - line = linecache.getline(filename, lineno, frame.f_globals) - if line: - s += lprefix + line.strip() - else: - s += f'{lprefix}Warning: lineno is None' - return s - - # The following methods can be called by clients to use - # a debugger to debug a statement or an expression. - # Both can be given as a string, or a code object. - - def run(self, cmd, globals=None, locals=None): - """Debug a statement executed via the exec() function. - - globals defaults to __main__.dict; locals defaults to globals. - """ - if globals is None: - import __main__ - globals = __main__.__dict__ - if locals is None: - locals = globals - self.reset() - if isinstance(cmd, str): - cmd = compile(cmd, "", "exec") - sys.settrace(self.trace_dispatch) - try: - exec(cmd, globals, locals) - except BdbQuit: - pass - finally: - self.quitting = True - sys.settrace(None) - - def runeval(self, expr, globals=None, locals=None): - """Debug an expression executed via the eval() function. - - globals defaults to __main__.dict; locals defaults to globals. - """ - if globals is None: - import __main__ - globals = __main__.__dict__ - if locals is None: - locals = globals - self.reset() - sys.settrace(self.trace_dispatch) - try: - return eval(expr, globals, locals) - except BdbQuit: - pass - finally: - self.quitting = True - sys.settrace(None) - - def runctx(self, cmd, globals, locals): - """For backwards-compatibility. Defers to run().""" - # B/W compatibility - self.run(cmd, globals, locals) - - # This method is more useful to debug a single function call. - - def runcall(self, func, /, *args, **kwds): - """Debug a single function call. - - Return the result of the function call. - """ - self.reset() - sys.settrace(self.trace_dispatch) - res = None - try: - res = func(*args, **kwds) - except BdbQuit: - pass - finally: - self.quitting = True - sys.settrace(None) - return res - - -def set_trace(): - """Start debugging with a Bdb instance from the caller's frame.""" - Bdb().set_trace() - - -class Breakpoint: - """Breakpoint class. - - Implements temporary breakpoints, ignore counts, disabling and - (re)-enabling, and conditionals. - - Breakpoints are indexed by number through bpbynumber and by - the (file, line) tuple using bplist. The former points to a - single instance of class Breakpoint. The latter points to a - list of such instances since there may be more than one - breakpoint per line. - - When creating a breakpoint, its associated filename should be - in canonical form. If funcname is defined, a breakpoint hit will be - counted when the first line of that function is executed. A - conditional breakpoint always counts a hit. - """ - - # XXX Keeping state in the class is a mistake -- this means - # you cannot have more than one active Bdb instance. - - next = 1 # Next bp to be assigned - bplist = {} # indexed by (file, lineno) tuple - bpbynumber = [None] # Each entry is None or an instance of Bpt - # index 0 is unused, except for marking an - # effective break .... see effective() - - def __init__(self, file, line, temporary=False, cond=None, funcname=None): - self.funcname = funcname - # Needed if funcname is not None. - self.func_first_executable_line = None - self.file = file # This better be in canonical form! - self.line = line - self.temporary = temporary - self.cond = cond - self.enabled = True - self.ignore = 0 - self.hits = 0 - self.number = Breakpoint.next - Breakpoint.next += 1 - # Build the two lists - self.bpbynumber.append(self) - if (file, line) in self.bplist: - self.bplist[file, line].append(self) - else: - self.bplist[file, line] = [self] - - @staticmethod - def clearBreakpoints(): - Breakpoint.next = 1 - Breakpoint.bplist = {} - Breakpoint.bpbynumber = [None] - - def deleteMe(self): - """Delete the breakpoint from the list associated to a file:line. - - If it is the last breakpoint in that position, it also deletes - the entry for the file:line. - """ - - index = (self.file, self.line) - self.bpbynumber[self.number] = None # No longer in list - self.bplist[index].remove(self) - if not self.bplist[index]: - # No more bp for this f:l combo - del self.bplist[index] - - def enable(self): - """Mark the breakpoint as enabled.""" - self.enabled = True - - def disable(self): - """Mark the breakpoint as disabled.""" - self.enabled = False - - def bpprint(self, out=None): - """Print the output of bpformat(). - - The optional out argument directs where the output is sent - and defaults to standard output. - """ - if out is None: - out = sys.stdout - print(self.bpformat(), file=out) - - def bpformat(self): - """Return a string with information about the breakpoint. - - The information includes the breakpoint number, temporary - status, file:line position, break condition, number of times to - ignore, and number of times hit. - - """ - if self.temporary: - disp = 'del ' - else: - disp = 'keep ' - if self.enabled: - disp = disp + 'yes ' - else: - disp = disp + 'no ' - ret = '%-4dbreakpoint %s at %s:%d' % (self.number, disp, - self.file, self.line) - if self.cond: - ret += '\n\tstop only if %s' % (self.cond,) - if self.ignore: - ret += '\n\tignore next %d hits' % (self.ignore,) - if self.hits: - if self.hits > 1: - ss = 's' - else: - ss = '' - ret += '\n\tbreakpoint already hit %d time%s' % (self.hits, ss) - return ret - - def __str__(self): - "Return a condensed description of the breakpoint." - return 'breakpoint %s at %s:%s' % (self.number, self.file, self.line) - -# -----------end of Breakpoint class---------- - - -def checkfuncname(b, frame): - """Return True if break should happen here. - - Whether a break should happen depends on the way that b (the breakpoint) - was set. If it was set via line number, check if b.line is the same as - the one in the frame. If it was set via function name, check if this is - the right function and if it is on the first executable line. - """ - if not b.funcname: - # Breakpoint was set via line number. - if b.line != frame.f_lineno: - # Breakpoint was set at a line with a def statement and the function - # defined is called: don't break. - return False - return True - - # Breakpoint set via function name. - if frame.f_code.co_name != b.funcname: - # It's not a function call, but rather execution of def statement. - return False - - # We are in the right frame. - if not b.func_first_executable_line: - # The function is entered for the 1st time. - b.func_first_executable_line = frame.f_lineno - - if b.func_first_executable_line != frame.f_lineno: - # But we are not at the first line number: don't break. - return False - return True - - -def effective(file, line, frame): - """Return (active breakpoint, delete temporary flag) or (None, None) as - breakpoint to act upon. - - The "active breakpoint" is the first entry in bplist[line, file] (which - must exist) that is enabled, for which checkfuncname is True, and that - has neither a False condition nor a positive ignore count. The flag, - meaning that a temporary breakpoint should be deleted, is False only - when the condiion cannot be evaluated (in which case, ignore count is - ignored). - - If no such entry exists, then (None, None) is returned. - """ - possibles = Breakpoint.bplist[file, line] - for b in possibles: - if not b.enabled: - continue - if not checkfuncname(b, frame): - continue - # Count every hit when bp is enabled - b.hits += 1 - if not b.cond: - # If unconditional, and ignoring go on to next, else break - if b.ignore > 0: - b.ignore -= 1 - continue - else: - # breakpoint and marker that it's ok to delete if temporary - return (b, True) - else: - # Conditional bp. - # Ignore count applies only to those bpt hits where the - # condition evaluates to true. - try: - val = eval(b.cond, frame.f_globals, frame.f_locals) - if val: - if b.ignore > 0: - b.ignore -= 1 - # continue - else: - return (b, True) - # else: - # continue - except: - # if eval fails, most conservative thing is to stop on - # breakpoint regardless of ignore count. Don't delete - # temporary, as another hint to user. - return (b, False) - return (None, None) - - -# -------------------- testing -------------------- - -class Tdb(Bdb): - def user_call(self, frame, args): - name = frame.f_code.co_name - if not name: name = '???' - print('+++ call', name, args) - def user_line(self, frame): - import linecache - name = frame.f_code.co_name - if not name: name = '???' - fn = self.canonic(frame.f_code.co_filename) - line = linecache.getline(fn, frame.f_lineno, frame.f_globals) - print('+++', fn, frame.f_lineno, name, ':', line.strip()) - def user_return(self, frame, retval): - print('+++ return', retval) - def user_exception(self, frame, exc_stuff): - print('+++ exception', exc_stuff) - self.set_continue() - -def foo(n): - print('foo(', n, ')') - x = bar(n*10) - print('bar returned', x) - -def bar(a): - print('bar(', a, ')') - return a/2 - -def test(): - t = Tdb() - t.run('import bdb; bdb.foo(10)') diff --git a/python/python3_12/examples/bisect.py b/python/python3_12/examples/bisect.py deleted file mode 100644 index ca6ca72408..0000000000 --- a/python/python3_12/examples/bisect.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Bisection algorithms.""" - - -def insort_right(a, x, lo=0, hi=None, *, key=None): - """Insert item x in list a, and keep it sorted assuming a is sorted. - - If x is already in a, insert it to the right of the rightmost x. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - - A custom key function can be supplied to customize the sort order. - """ - if key is None: - lo = bisect_right(a, x, lo, hi) - else: - lo = bisect_right(a, key(x), lo, hi, key=key) - a.insert(lo, x) - - -def bisect_right(a, x, lo=0, hi=None, *, key=None): - """Return the index where to insert item x in list a, assuming a is sorted. - - The return value i is such that all e in a[:i] have e <= x, and all e in - a[i:] have e > x. So if x already appears in the list, a.insert(i, x) will - insert just after the rightmost x already there. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - - A custom key function can be supplied to customize the sort order. - """ - - if lo < 0: - raise ValueError('lo must be non-negative') - if hi is None: - hi = len(a) - # Note, the comparison uses "<" to match the - # __lt__() logic in list.sort() and in heapq. - if key is None: - while lo < hi: - mid = (lo + hi) // 2 - if x < a[mid]: - hi = mid - else: - lo = mid + 1 - else: - while lo < hi: - mid = (lo + hi) // 2 - if x < key(a[mid]): - hi = mid - else: - lo = mid + 1 - return lo - - -def insort_left(a, x, lo=0, hi=None, *, key=None): - """Insert item x in list a, and keep it sorted assuming a is sorted. - - If x is already in a, insert it to the left of the leftmost x. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - - A custom key function can be supplied to customize the sort order. - """ - - if key is None: - lo = bisect_left(a, x, lo, hi) - else: - lo = bisect_left(a, key(x), lo, hi, key=key) - a.insert(lo, x) - -def bisect_left(a, x, lo=0, hi=None, *, key=None): - """Return the index where to insert item x in list a, assuming a is sorted. - - The return value i is such that all e in a[:i] have e < x, and all e in - a[i:] have e >= x. So if x already appears in the list, a.insert(i, x) will - insert just before the leftmost x already there. - - Optional args lo (default 0) and hi (default len(a)) bound the - slice of a to be searched. - - A custom key function can be supplied to customize the sort order. - """ - - if lo < 0: - raise ValueError('lo must be non-negative') - if hi is None: - hi = len(a) - # Note, the comparison uses "<" to match the - # __lt__() logic in list.sort() and in heapq. - if key is None: - while lo < hi: - mid = (lo + hi) // 2 - if a[mid] < x: - lo = mid + 1 - else: - hi = mid - else: - while lo < hi: - mid = (lo + hi) // 2 - if key(a[mid]) < x: - lo = mid + 1 - else: - hi = mid - return lo - - -# Overwrite above definitions with a fast C implementation -try: - from _bisect import * -except ImportError: - pass - -# Create aliases -bisect = bisect_right -insort = insort_right diff --git a/python/python3_12/examples/bz2.py b/python/python3_12/examples/bz2.py deleted file mode 100644 index fabe4f73c8..0000000000 --- a/python/python3_12/examples/bz2.py +++ /dev/null @@ -1,344 +0,0 @@ -"""Interface to the libbzip2 compression library. - -This module provides a file interface, classes for incremental -(de)compression, and functions for one-shot (de)compression. -""" - -__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", - "open", "compress", "decompress"] - -__author__ = "Nadeem Vawda " - -from builtins import open as _builtin_open -import io -import os -import _compression - -from _bz2 import BZ2Compressor, BZ2Decompressor - - -_MODE_CLOSED = 0 -_MODE_READ = 1 -# Value 2 no longer used -_MODE_WRITE = 3 - - -class BZ2File(_compression.BaseStream): - - """A file object providing transparent bzip2 (de)compression. - - A BZ2File can act as a wrapper for an existing file object, or refer - directly to a named file on disk. - - Note that BZ2File provides a *binary* file interface - data read is - returned as bytes, and data to be written should be given as bytes. - """ - - def __init__(self, filename, mode="r", *, compresslevel=9): - """Open a bzip2-compressed file. - - If filename is a str, bytes, or PathLike object, it gives the - name of the file to be opened. Otherwise, it should be a file - object, which will be used to read or write the compressed data. - - mode can be 'r' for reading (default), 'w' for (over)writing, - 'x' for creating exclusively, or 'a' for appending. These can - equivalently be given as 'rb', 'wb', 'xb', and 'ab'. - - If mode is 'w', 'x' or 'a', compresslevel can be a number between 1 - and 9 specifying the level of compression: 1 produces the least - compression, and 9 (default) produces the most compression. - - If mode is 'r', the input file may be the concatenation of - multiple compressed streams. - """ - self._fp = None - self._closefp = False - self._mode = _MODE_CLOSED - - if not (1 <= compresslevel <= 9): - raise ValueError("compresslevel must be between 1 and 9") - - if mode in ("", "r", "rb"): - mode = "rb" - mode_code = _MODE_READ - elif mode in ("w", "wb"): - mode = "wb" - mode_code = _MODE_WRITE - self._compressor = BZ2Compressor(compresslevel) - elif mode in ("x", "xb"): - mode = "xb" - mode_code = _MODE_WRITE - self._compressor = BZ2Compressor(compresslevel) - elif mode in ("a", "ab"): - mode = "ab" - mode_code = _MODE_WRITE - self._compressor = BZ2Compressor(compresslevel) - else: - raise ValueError("Invalid mode: %r" % (mode,)) - - if isinstance(filename, (str, bytes, os.PathLike)): - self._fp = _builtin_open(filename, mode) - self._closefp = True - self._mode = mode_code - elif hasattr(filename, "read") or hasattr(filename, "write"): - self._fp = filename - self._mode = mode_code - else: - raise TypeError("filename must be a str, bytes, file or PathLike object") - - if self._mode == _MODE_READ: - raw = _compression.DecompressReader(self._fp, - BZ2Decompressor, trailing_error=OSError) - self._buffer = io.BufferedReader(raw) - else: - self._pos = 0 - - def close(self): - """Flush and close the file. - - May be called more than once without error. Once the file is - closed, any other operation on it will raise a ValueError. - """ - if self._mode == _MODE_CLOSED: - return - try: - if self._mode == _MODE_READ: - self._buffer.close() - elif self._mode == _MODE_WRITE: - self._fp.write(self._compressor.flush()) - self._compressor = None - finally: - try: - if self._closefp: - self._fp.close() - finally: - self._fp = None - self._closefp = False - self._mode = _MODE_CLOSED - self._buffer = None - - @property - def closed(self): - """True if this file is closed.""" - return self._mode == _MODE_CLOSED - - def fileno(self): - """Return the file descriptor for the underlying file.""" - self._check_not_closed() - return self._fp.fileno() - - def seekable(self): - """Return whether the file supports seeking.""" - return self.readable() and self._buffer.seekable() - - def readable(self): - """Return whether the file was opened for reading.""" - self._check_not_closed() - return self._mode == _MODE_READ - - def writable(self): - """Return whether the file was opened for writing.""" - self._check_not_closed() - return self._mode == _MODE_WRITE - - def peek(self, n=0): - """Return buffered data without advancing the file position. - - Always returns at least one byte of data, unless at EOF. - The exact number of bytes returned is unspecified. - """ - self._check_can_read() - # Relies on the undocumented fact that BufferedReader.peek() - # always returns at least one byte (except at EOF), independent - # of the value of n - return self._buffer.peek(n) - - def read(self, size=-1): - """Read up to size uncompressed bytes from the file. - - If size is negative or omitted, read until EOF is reached. - Returns b'' if the file is already at EOF. - """ - self._check_can_read() - return self._buffer.read(size) - - def read1(self, size=-1): - """Read up to size uncompressed bytes, while trying to avoid - making multiple reads from the underlying stream. Reads up to a - buffer's worth of data if size is negative. - - Returns b'' if the file is at EOF. - """ - self._check_can_read() - if size < 0: - size = io.DEFAULT_BUFFER_SIZE - return self._buffer.read1(size) - - def readinto(self, b): - """Read bytes into b. - - Returns the number of bytes read (0 for EOF). - """ - self._check_can_read() - return self._buffer.readinto(b) - - def readline(self, size=-1): - """Read a line of uncompressed bytes from the file. - - The terminating newline (if present) is retained. If size is - non-negative, no more than size bytes will be read (in which - case the line may be incomplete). Returns b'' if already at EOF. - """ - if not isinstance(size, int): - if not hasattr(size, "__index__"): - raise TypeError("Integer argument expected") - size = size.__index__() - self._check_can_read() - return self._buffer.readline(size) - - def readlines(self, size=-1): - """Read a list of lines of uncompressed bytes from the file. - - size can be specified to control the number of lines read: no - further lines will be read once the total size of the lines read - so far equals or exceeds size. - """ - if not isinstance(size, int): - if not hasattr(size, "__index__"): - raise TypeError("Integer argument expected") - size = size.__index__() - self._check_can_read() - return self._buffer.readlines(size) - - def write(self, data): - """Write a byte string to the file. - - Returns the number of uncompressed bytes written, which is - always the length of data in bytes. Note that due to buffering, - the file on disk may not reflect the data written until close() - is called. - """ - self._check_can_write() - if isinstance(data, (bytes, bytearray)): - length = len(data) - else: - # accept any data that supports the buffer protocol - data = memoryview(data) - length = data.nbytes - - compressed = self._compressor.compress(data) - self._fp.write(compressed) - self._pos += length - return length - - def writelines(self, seq): - """Write a sequence of byte strings to the file. - - Returns the number of uncompressed bytes written. - seq can be any iterable yielding byte strings. - - Line separators are not added between the written byte strings. - """ - return _compression.BaseStream.writelines(self, seq) - - def seek(self, offset, whence=io.SEEK_SET): - """Change the file position. - - The new position is specified by offset, relative to the - position indicated by whence. Values for whence are: - - 0: start of stream (default); offset must not be negative - 1: current stream position - 2: end of stream; offset must not be positive - - Returns the new file position. - - Note that seeking is emulated, so depending on the parameters, - this operation may be extremely slow. - """ - self._check_can_seek() - return self._buffer.seek(offset, whence) - - def tell(self): - """Return the current file position.""" - self._check_not_closed() - if self._mode == _MODE_READ: - return self._buffer.tell() - return self._pos - - -def open(filename, mode="rb", compresslevel=9, - encoding=None, errors=None, newline=None): - """Open a bzip2-compressed file in binary or text mode. - - The filename argument can be an actual filename (a str, bytes, or - PathLike object), or an existing file object to read from or write - to. - - The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or - "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode. - The default mode is "rb", and the default compresslevel is 9. - - For binary mode, this function is equivalent to the BZ2File - constructor: BZ2File(filename, mode, compresslevel). In this case, - the encoding, errors and newline arguments must not be provided. - - For text mode, a BZ2File object is created, and wrapped in an - io.TextIOWrapper instance with the specified encoding, error - handling behavior, and line ending(s). - - """ - if "t" in mode: - if "b" in mode: - raise ValueError("Invalid mode: %r" % (mode,)) - else: - if encoding is not None: - raise ValueError("Argument 'encoding' not supported in binary mode") - if errors is not None: - raise ValueError("Argument 'errors' not supported in binary mode") - if newline is not None: - raise ValueError("Argument 'newline' not supported in binary mode") - - bz_mode = mode.replace("t", "") - binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) - - if "t" in mode: - encoding = io.text_encoding(encoding) - return io.TextIOWrapper(binary_file, encoding, errors, newline) - else: - return binary_file - - -def compress(data, compresslevel=9): - """Compress a block of data. - - compresslevel, if given, must be a number between 1 and 9. - - For incremental compression, use a BZ2Compressor object instead. - """ - comp = BZ2Compressor(compresslevel) - return comp.compress(data) + comp.flush() - - -def decompress(data): - """Decompress a block of data. - - For incremental decompression, use a BZ2Decompressor object instead. - """ - results = [] - while data: - decomp = BZ2Decompressor() - try: - res = decomp.decompress(data) - except OSError: - if results: - break # Leftover data is not a valid bzip2 stream; ignore it. - else: - raise # Error on the first iteration; bail out. - results.append(res) - if not decomp.eof: - raise ValueError("Compressed data ended before the " - "end-of-stream marker was reached") - data = decomp.unused_data - return b"".join(results) diff --git a/python/python3_12/examples/calendar.py b/python/python3_12/examples/calendar.py deleted file mode 100644 index baab52a157..0000000000 --- a/python/python3_12/examples/calendar.py +++ /dev/null @@ -1,798 +0,0 @@ -"""Calendar printing functions - -Note when comparing these calendars to the ones printed by cal(1): By -default, these calendars have Monday as the first day of the week, and -Sunday as the last (the European convention). Use setfirstweekday() to -set the first day of the week (0=Monday, 6=Sunday).""" - -import sys -import datetime -from enum import IntEnum, global_enum -import locale as _locale -from itertools import repeat -import warnings - -__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", - "firstweekday", "isleap", "leapdays", "weekday", "monthrange", - "monthcalendar", "prmonth", "month", "prcal", "calendar", - "timegm", "month_name", "month_abbr", "day_name", "day_abbr", - "Calendar", "TextCalendar", "HTMLCalendar", "LocaleTextCalendar", - "LocaleHTMLCalendar", "weekheader", - "Day", "Month", "JANUARY", "FEBRUARY", "MARCH", - "APRIL", "MAY", "JUNE", "JULY", - "AUGUST", "SEPTEMBER", "OCTOBER", "NOVEMBER", "DECEMBER", - "MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", - "SATURDAY", "SUNDAY"] - -# Exception raised for bad input (with string parameter for details) -error = ValueError - -# Exceptions raised for bad input -class IllegalMonthError(ValueError): - def __init__(self, month): - self.month = month - def __str__(self): - return "bad month number %r; must be 1-12" % self.month - - -class IllegalWeekdayError(ValueError): - def __init__(self, weekday): - self.weekday = weekday - def __str__(self): - return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday - - -def __getattr__(name): - if name in ('January', 'February'): - warnings.warn(f"The '{name}' attribute is deprecated, use '{name.upper()}' instead", - DeprecationWarning, stacklevel=2) - if name == 'January': - return 1 - else: - return 2 - - raise AttributeError(f"module '{__name__}' has no attribute '{name}'") - - -# Constants for months -@global_enum -class Month(IntEnum): - JANUARY = 1 - FEBRUARY = 2 - MARCH = 3 - APRIL = 4 - MAY = 5 - JUNE = 6 - JULY = 7 - AUGUST = 8 - SEPTEMBER = 9 - OCTOBER = 10 - NOVEMBER = 11 - DECEMBER = 12 - - -# Constants for days -@global_enum -class Day(IntEnum): - MONDAY = 0 - TUESDAY = 1 - WEDNESDAY = 2 - THURSDAY = 3 - FRIDAY = 4 - SATURDAY = 5 - SUNDAY = 6 - - -# Number of days per month (except for February in leap years) -mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - -# This module used to have hard-coded lists of day and month names, as -# English strings. The classes following emulate a read-only version of -# that, but supply localized names. Note that the values are computed -# fresh on each call, in case the user changes locale between calls. - -class _localized_month: - - _months = [datetime.date(2001, i+1, 1).strftime for i in range(12)] - _months.insert(0, lambda x: "") - - def __init__(self, format): - self.format = format - - def __getitem__(self, i): - funcs = self._months[i] - if isinstance(i, slice): - return [f(self.format) for f in funcs] - else: - return funcs(self.format) - - def __len__(self): - return 13 - - -class _localized_day: - - # January 1, 2001, was a Monday. - _days = [datetime.date(2001, 1, i+1).strftime for i in range(7)] - - def __init__(self, format): - self.format = format - - def __getitem__(self, i): - funcs = self._days[i] - if isinstance(i, slice): - return [f(self.format) for f in funcs] - else: - return funcs(self.format) - - def __len__(self): - return 7 - - -# Full and abbreviated names of weekdays -day_name = _localized_day('%A') -day_abbr = _localized_day('%a') - -# Full and abbreviated names of months (1-based arrays!!!) -month_name = _localized_month('%B') -month_abbr = _localized_month('%b') - - -def isleap(year): - """Return True for leap years, False for non-leap years.""" - return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) - - -def leapdays(y1, y2): - """Return number of leap years in range [y1, y2). - Assume y1 <= y2.""" - y1 -= 1 - y2 -= 1 - return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400) - - -def weekday(year, month, day): - """Return weekday (0-6 ~ Mon-Sun) for year, month (1-12), day (1-31).""" - if not datetime.MINYEAR <= year <= datetime.MAXYEAR: - year = 2000 + year % 400 - return Day(datetime.date(year, month, day).weekday()) - - -def monthrange(year, month): - """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for - year, month.""" - if not 1 <= month <= 12: - raise IllegalMonthError(month) - day1 = weekday(year, month, 1) - ndays = mdays[month] + (month == FEBRUARY and isleap(year)) - return day1, ndays - - -def _monthlen(year, month): - return mdays[month] + (month == FEBRUARY and isleap(year)) - - -def _prevmonth(year, month): - if month == 1: - return year-1, 12 - else: - return year, month-1 - - -def _nextmonth(year, month): - if month == 12: - return year+1, 1 - else: - return year, month+1 - - -class Calendar(object): - """ - Base calendar class. This class doesn't do any formatting. It simply - provides data to subclasses. - """ - - def __init__(self, firstweekday=0): - self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday - - def getfirstweekday(self): - return self._firstweekday % 7 - - def setfirstweekday(self, firstweekday): - self._firstweekday = firstweekday - - firstweekday = property(getfirstweekday, setfirstweekday) - - def iterweekdays(self): - """ - Return an iterator for one week of weekday numbers starting with the - configured first one. - """ - for i in range(self.firstweekday, self.firstweekday + 7): - yield i%7 - - def itermonthdates(self, year, month): - """ - Return an iterator for one month. The iterator will yield datetime.date - values and will always iterate through complete weeks, so it will yield - dates outside the specified month. - """ - for y, m, d in self.itermonthdays3(year, month): - yield datetime.date(y, m, d) - - def itermonthdays(self, year, month): - """ - Like itermonthdates(), but will yield day numbers. For days outside - the specified month the day number is 0. - """ - day1, ndays = monthrange(year, month) - days_before = (day1 - self.firstweekday) % 7 - yield from repeat(0, days_before) - yield from range(1, ndays + 1) - days_after = (self.firstweekday - day1 - ndays) % 7 - yield from repeat(0, days_after) - - def itermonthdays2(self, year, month): - """ - Like itermonthdates(), but will yield (day number, weekday number) - tuples. For days outside the specified month the day number is 0. - """ - for i, d in enumerate(self.itermonthdays(year, month), self.firstweekday): - yield d, i % 7 - - def itermonthdays3(self, year, month): - """ - Like itermonthdates(), but will yield (year, month, day) tuples. Can be - used for dates outside of datetime.date range. - """ - day1, ndays = monthrange(year, month) - days_before = (day1 - self.firstweekday) % 7 - days_after = (self.firstweekday - day1 - ndays) % 7 - y, m = _prevmonth(year, month) - end = _monthlen(y, m) + 1 - for d in range(end-days_before, end): - yield y, m, d - for d in range(1, ndays + 1): - yield year, month, d - y, m = _nextmonth(year, month) - for d in range(1, days_after + 1): - yield y, m, d - - def itermonthdays4(self, year, month): - """ - Like itermonthdates(), but will yield (year, month, day, day_of_week) tuples. - Can be used for dates outside of datetime.date range. - """ - for i, (y, m, d) in enumerate(self.itermonthdays3(year, month)): - yield y, m, d, (self.firstweekday + i) % 7 - - def monthdatescalendar(self, year, month): - """ - Return a matrix (list of lists) representing a month's calendar. - Each row represents a week; week entries are datetime.date values. - """ - dates = list(self.itermonthdates(year, month)) - return [ dates[i:i+7] for i in range(0, len(dates), 7) ] - - def monthdays2calendar(self, year, month): - """ - Return a matrix representing a month's calendar. - Each row represents a week; week entries are - (day number, weekday number) tuples. Day numbers outside this month - are zero. - """ - days = list(self.itermonthdays2(year, month)) - return [ days[i:i+7] for i in range(0, len(days), 7) ] - - def monthdayscalendar(self, year, month): - """ - Return a matrix representing a month's calendar. - Each row represents a week; days outside this month are zero. - """ - days = list(self.itermonthdays(year, month)) - return [ days[i:i+7] for i in range(0, len(days), 7) ] - - def yeardatescalendar(self, year, width=3): - """ - Return the data for the specified year ready for formatting. The return - value is a list of month rows. Each month row contains up to width months. - Each month contains between 4 and 6 weeks and each week contains 1-7 - days. Days are datetime.date objects. - """ - months = [self.monthdatescalendar(year, m) for m in Month] - return [months[i:i+width] for i in range(0, len(months), width) ] - - def yeardays2calendar(self, year, width=3): - """ - Return the data for the specified year ready for formatting (similar to - yeardatescalendar()). Entries in the week lists are - (day number, weekday number) tuples. Day numbers outside this month are - zero. - """ - months = [self.monthdays2calendar(year, m) for m in Month] - return [months[i:i+width] for i in range(0, len(months), width) ] - - def yeardayscalendar(self, year, width=3): - """ - Return the data for the specified year ready for formatting (similar to - yeardatescalendar()). Entries in the week lists are day numbers. - Day numbers outside this month are zero. - """ - months = [self.monthdayscalendar(year, m) for m in Month] - return [months[i:i+width] for i in range(0, len(months), width) ] - - -class TextCalendar(Calendar): - """ - Subclass of Calendar that outputs a calendar as a simple plain text - similar to the UNIX program cal. - """ - - def prweek(self, theweek, width): - """ - Print a single week (no newline). - """ - print(self.formatweek(theweek, width), end='') - - def formatday(self, day, weekday, width): - """ - Returns a formatted day. - """ - if day == 0: - s = '' - else: - s = '%2i' % day # right-align single-digit days - return s.center(width) - - def formatweek(self, theweek, width): - """ - Returns a single week in a string (no newline). - """ - return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek) - - def formatweekday(self, day, width): - """ - Returns a formatted week day name. - """ - if width >= 9: - names = day_name - else: - names = day_abbr - return names[day][:width].center(width) - - def formatweekheader(self, width): - """ - Return a header for a week. - """ - return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays()) - - def formatmonthname(self, theyear, themonth, width, withyear=True): - """ - Return a formatted month name. - """ - s = month_name[themonth] - if withyear: - s = "%s %r" % (s, theyear) - return s.center(width) - - def prmonth(self, theyear, themonth, w=0, l=0): - """ - Print a month's calendar. - """ - print(self.formatmonth(theyear, themonth, w, l), end='') - - def formatmonth(self, theyear, themonth, w=0, l=0): - """ - Return a month's calendar string (multi-line). - """ - w = max(2, w) - l = max(1, l) - s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) - s = s.rstrip() - s += '\n' * l - s += self.formatweekheader(w).rstrip() - s += '\n' * l - for week in self.monthdays2calendar(theyear, themonth): - s += self.formatweek(week, w).rstrip() - s += '\n' * l - return s - - def formatyear(self, theyear, w=2, l=1, c=6, m=3): - """ - Returns a year's calendar as a multi-line string. - """ - w = max(2, w) - l = max(1, l) - c = max(2, c) - colwidth = (w + 1) * 7 - 1 - v = [] - a = v.append - a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) - a('\n'*l) - header = self.formatweekheader(w) - for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): - # months in this row - months = range(m*i+1, min(m*(i+1)+1, 13)) - a('\n'*l) - names = (self.formatmonthname(theyear, k, colwidth, False) - for k in months) - a(formatstring(names, colwidth, c).rstrip()) - a('\n'*l) - headers = (header for k in months) - a(formatstring(headers, colwidth, c).rstrip()) - a('\n'*l) - # max number of weeks for this row - height = max(len(cal) for cal in row) - for j in range(height): - weeks = [] - for cal in row: - if j >= len(cal): - weeks.append('') - else: - weeks.append(self.formatweek(cal[j], w)) - a(formatstring(weeks, colwidth, c).rstrip()) - a('\n' * l) - return ''.join(v) - - def pryear(self, theyear, w=0, l=0, c=6, m=3): - """Print a year's calendar.""" - print(self.formatyear(theyear, w, l, c, m), end='') - - -class HTMLCalendar(Calendar): - """ - This calendar returns complete HTML pages. - """ - - # CSS classes for the day s - cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] - - # CSS classes for the day s - cssclasses_weekday_head = cssclasses - - # CSS class for the days before and after current month - cssclass_noday = "noday" - - # CSS class for the month's head - cssclass_month_head = "month" - - # CSS class for the month - cssclass_month = "month" - - # CSS class for the year's table head - cssclass_year_head = "year" - - # CSS class for the whole year table - cssclass_year = "year" - - def formatday(self, day, weekday): - """ - Return a day as a table cell. - """ - if day == 0: - # day outside month - return ' ' % self.cssclass_noday - else: - return '%d' % (self.cssclasses[weekday], day) - - def formatweek(self, theweek): - """ - Return a complete week as a table row. - """ - s = ''.join(self.formatday(d, wd) for (d, wd) in theweek) - return '%s' % s - - def formatweekday(self, day): - """ - Return a weekday name as a table header. - """ - return '%s' % ( - self.cssclasses_weekday_head[day], day_abbr[day]) - - def formatweekheader(self): - """ - Return a header for a week as a table row. - """ - s = ''.join(self.formatweekday(i) for i in self.iterweekdays()) - return '%s' % s - - def formatmonthname(self, theyear, themonth, withyear=True): - """ - Return a month name as a table row. - """ - if withyear: - s = '%s %s' % (month_name[themonth], theyear) - else: - s = '%s' % month_name[themonth] - return '%s' % ( - self.cssclass_month_head, s) - - def formatmonth(self, theyear, themonth, withyear=True): - """ - Return a formatted month as a table. - """ - v = [] - a = v.append - a('' % ( - self.cssclass_month)) - a('\n') - a(self.formatmonthname(theyear, themonth, withyear=withyear)) - a('\n') - a(self.formatweekheader()) - a('\n') - for week in self.monthdays2calendar(theyear, themonth): - a(self.formatweek(week)) - a('\n') - a('
') - a('\n') - return ''.join(v) - - def formatyear(self, theyear, width=3): - """ - Return a formatted year as a table of tables. - """ - v = [] - a = v.append - width = max(width, 1) - a('' % - self.cssclass_year) - a('\n') - a('' % ( - width, self.cssclass_year_head, theyear)) - for i in range(JANUARY, JANUARY+12, width): - # months in this row - months = range(i, min(i+width, 13)) - a('') - for m in months: - a('') - a('') - a('
%s
') - a(self.formatmonth(theyear, m, withyear=False)) - a('
') - return ''.join(v) - - def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None): - """ - Return a formatted year as a complete HTML page. - """ - if encoding is None: - encoding = sys.getdefaultencoding() - v = [] - a = v.append - a('\n' % encoding) - a('\n') - a('\n') - a('\n') - a('\n' % encoding) - if css is not None: - a('\n' % css) - a('Calendar for %d\n' % theyear) - a('\n') - a('\n') - a(self.formatyear(theyear, width)) - a('\n') - a('\n') - return ''.join(v).encode(encoding, "xmlcharrefreplace") - - -class different_locale: - def __init__(self, locale): - self.locale = locale - self.oldlocale = None - - def __enter__(self): - self.oldlocale = _locale.setlocale(_locale.LC_TIME, None) - _locale.setlocale(_locale.LC_TIME, self.locale) - - def __exit__(self, *args): - if self.oldlocale is None: - return - _locale.setlocale(_locale.LC_TIME, self.oldlocale) - - -def _get_default_locale(): - locale = _locale.setlocale(_locale.LC_TIME, None) - if locale == "C": - with different_locale(""): - # The LC_TIME locale does not seem to be configured: - # get the user preferred locale. - locale = _locale.setlocale(_locale.LC_TIME, None) - return locale - - -class LocaleTextCalendar(TextCalendar): - """ - This class can be passed a locale name in the constructor and will return - month and weekday names in the specified locale. - """ - - def __init__(self, firstweekday=0, locale=None): - TextCalendar.__init__(self, firstweekday) - if locale is None: - locale = _get_default_locale() - self.locale = locale - - def formatweekday(self, day, width): - with different_locale(self.locale): - return super().formatweekday(day, width) - - def formatmonthname(self, theyear, themonth, width, withyear=True): - with different_locale(self.locale): - return super().formatmonthname(theyear, themonth, width, withyear) - - -class LocaleHTMLCalendar(HTMLCalendar): - """ - This class can be passed a locale name in the constructor and will return - month and weekday names in the specified locale. - """ - def __init__(self, firstweekday=0, locale=None): - HTMLCalendar.__init__(self, firstweekday) - if locale is None: - locale = _get_default_locale() - self.locale = locale - - def formatweekday(self, day): - with different_locale(self.locale): - return super().formatweekday(day) - - def formatmonthname(self, theyear, themonth, withyear=True): - with different_locale(self.locale): - return super().formatmonthname(theyear, themonth, withyear) - -# Support for old module level interface -c = TextCalendar() - -firstweekday = c.getfirstweekday - -def setfirstweekday(firstweekday): - if not MONDAY <= firstweekday <= SUNDAY: - raise IllegalWeekdayError(firstweekday) - c.firstweekday = firstweekday - -monthcalendar = c.monthdayscalendar -prweek = c.prweek -week = c.formatweek -weekheader = c.formatweekheader -prmonth = c.prmonth -month = c.formatmonth -calendar = c.formatyear -prcal = c.pryear - - -# Spacing of month columns for multi-column year calendar -_colwidth = 7*3 - 1 # Amount printed by prweek() -_spacing = 6 # Number of spaces between columns - - -def format(cols, colwidth=_colwidth, spacing=_spacing): - """Prints multi-column formatting for year calendars""" - print(formatstring(cols, colwidth, spacing)) - - -def formatstring(cols, colwidth=_colwidth, spacing=_spacing): - """Returns a string formatted from n strings, centered within n columns.""" - spacing *= ' ' - return spacing.join(c.center(colwidth) for c in cols) - - -EPOCH = 1970 -_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal() - - -def timegm(tuple): - """Unrelated but handy function to calculate Unix timestamp from GMT.""" - year, month, day, hour, minute, second = tuple[:6] - days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1 - hours = days*24 + hour - minutes = hours*60 + minute - seconds = minutes*60 + second - return seconds - - -def main(args): - import argparse - parser = argparse.ArgumentParser() - textgroup = parser.add_argument_group('text only arguments') - htmlgroup = parser.add_argument_group('html only arguments') - textgroup.add_argument( - "-w", "--width", - type=int, default=2, - help="width of date column (default 2)" - ) - textgroup.add_argument( - "-l", "--lines", - type=int, default=1, - help="number of lines for each week (default 1)" - ) - textgroup.add_argument( - "-s", "--spacing", - type=int, default=6, - help="spacing between months (default 6)" - ) - textgroup.add_argument( - "-m", "--months", - type=int, default=3, - help="months per row (default 3)" - ) - htmlgroup.add_argument( - "-c", "--css", - default="calendar.css", - help="CSS to use for page" - ) - parser.add_argument( - "-L", "--locale", - default=None, - help="locale to use for month and weekday names" - ) - parser.add_argument( - "-e", "--encoding", - default=None, - help="encoding to use for output" - ) - parser.add_argument( - "-t", "--type", - default="text", - choices=("text", "html"), - help="output type (text or html)" - ) - parser.add_argument( - "year", - nargs='?', type=int, - help="year number (1-9999)" - ) - parser.add_argument( - "month", - nargs='?', type=int, - help="month number (1-12, text only)" - ) - - options = parser.parse_args(args[1:]) - - if options.locale and not options.encoding: - parser.error("if --locale is specified --encoding is required") - sys.exit(1) - - locale = options.locale, options.encoding - - if options.type == "html": - if options.locale: - cal = LocaleHTMLCalendar(locale=locale) - else: - cal = HTMLCalendar() - encoding = options.encoding - if encoding is None: - encoding = sys.getdefaultencoding() - optdict = dict(encoding=encoding, css=options.css) - write = sys.stdout.buffer.write - if options.year is None: - write(cal.formatyearpage(datetime.date.today().year, **optdict)) - elif options.month is None: - write(cal.formatyearpage(options.year, **optdict)) - else: - parser.error("incorrect number of arguments") - sys.exit(1) - else: - if options.locale: - cal = LocaleTextCalendar(locale=locale) - else: - cal = TextCalendar() - optdict = dict(w=options.width, l=options.lines) - if options.month is None: - optdict["c"] = options.spacing - optdict["m"] = options.months - if options.year is None: - result = cal.formatyear(datetime.date.today().year, **optdict) - elif options.month is None: - result = cal.formatyear(options.year, **optdict) - else: - result = cal.formatmonth(options.year, options.month, **optdict) - write = sys.stdout.write - if options.encoding: - result = result.encode(options.encoding) - write = sys.stdout.buffer.write - write(result) - - -if __name__ == "__main__": - main(sys.argv) diff --git a/python/python3_12/examples/cgi.py b/python/python3_12/examples/cgi.py deleted file mode 100644 index 8787567be7..0000000000 --- a/python/python3_12/examples/cgi.py +++ /dev/null @@ -1,1012 +0,0 @@ -#! /usr/local/bin/python - -# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is -# intentionally NOT "/usr/bin/env python". On many systems -# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI -# scripts, and /usr/local/bin is the default directory where Python is -# installed, so /usr/bin/env would be unable to find python. Granted, -# binary installations by Linux vendors often install Python in -# /usr/bin. So let those vendors patch cgi.py to match their choice -# of installation. - -"""Support module for CGI (Common Gateway Interface) scripts. - -This module defines a number of utilities for use by CGI scripts -written in Python. - -The global variable maxlen can be set to an integer indicating the maximum size -of a POST request. POST requests larger than this size will result in a -ValueError being raised during parsing. The default value of this variable is 0, -meaning the request size is unlimited. -""" - -# History -# ------- -# -# Michael McLay started this module. Steve Majewski changed the -# interface to SvFormContentDict and FormContentDict. The multipart -# parsing was inspired by code submitted by Andreas Paepcke. Guido van -# Rossum rewrote, reformatted and documented the module and is currently -# responsible for its maintenance. -# - -__version__ = "2.6" - - -# Imports -# ======= - -from io import StringIO, BytesIO, TextIOWrapper -from collections.abc import Mapping -import sys -import os -import urllib.parse -from email.parser import FeedParser -from email.message import Message -import html -import locale -import tempfile -import warnings - -__all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart", - "parse_header", "test", "print_exception", "print_environ", - "print_form", "print_directory", "print_arguments", - "print_environ_usage"] - - -warnings._deprecated(__name__, remove=(3,13)) - -# Logging support -# =============== - -logfile = "" # Filename to log to, if not empty -logfp = None # File object to log to, if not None - -def initlog(*allargs): - """Write a log message, if there is a log file. - - Even though this function is called initlog(), you should always - use log(); log is a variable that is set either to initlog - (initially), to dolog (once the log file has been opened), or to - nolog (when logging is disabled). - - The first argument is a format string; the remaining arguments (if - any) are arguments to the % operator, so e.g. - log("%s: %s", "a", "b") - will write "a: b" to the log file, followed by a newline. - - If the global logfp is not None, it should be a file object to - which log data is written. - - If the global logfp is None, the global logfile may be a string - giving a filename to open, in append mode. This file should be - world writable!!! If the file can't be opened, logging is - silently disabled (since there is no safe place where we could - send an error message). - - """ - global log, logfile, logfp - warnings.warn("cgi.log() is deprecated as of 3.10. Use logging instead", - DeprecationWarning, stacklevel=2) - if logfile and not logfp: - try: - logfp = open(logfile, "a", encoding="locale") - except OSError: - pass - if not logfp: - log = nolog - else: - log = dolog - log(*allargs) - -def dolog(fmt, *args): - """Write a log message to the log file. See initlog() for docs.""" - logfp.write(fmt%args + "\n") - -def nolog(*allargs): - """Dummy function, assigned to log when logging is disabled.""" - pass - -def closelog(): - """Close the log file.""" - global log, logfile, logfp - logfile = '' - if logfp: - logfp.close() - logfp = None - log = initlog - -log = initlog # The current logging function - - -# Parsing functions -# ================= - -# Maximum input we will accept when REQUEST_METHOD is POST -# 0 ==> unlimited input -maxlen = 0 - -def parse(fp=None, environ=os.environ, keep_blank_values=0, - strict_parsing=0, separator='&'): - """Parse a query in the environment or from a file (default stdin) - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - """ - if fp is None: - fp = sys.stdin - - # field keys and values (except for files) are returned as strings - # an encoding is required to decode the bytes read from self.fp - if hasattr(fp,'encoding'): - encoding = fp.encoding - else: - encoding = 'latin-1' - - # fp.read() must return bytes - if isinstance(fp, TextIOWrapper): - fp = fp.buffer - - if not 'REQUEST_METHOD' in environ: - environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone - if environ['REQUEST_METHOD'] == 'POST': - ctype, pdict = parse_header(environ['CONTENT_TYPE']) - if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict, separator=separator) - elif ctype == 'application/x-www-form-urlencoded': - clength = int(environ['CONTENT_LENGTH']) - if maxlen and clength > maxlen: - raise ValueError('Maximum content length exceeded') - qs = fp.read(clength).decode(encoding) - else: - qs = '' # Unknown content-type - if 'QUERY_STRING' in environ: - if qs: qs = qs + '&' - qs = qs + environ['QUERY_STRING'] - elif sys.argv[1:]: - if qs: qs = qs + '&' - qs = qs + sys.argv[1] - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - elif 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - else: - if sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing, - encoding=encoding, separator=separator) - - -def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'): - """Parse multipart input. - - Arguments: - fp : input file - pdict: dictionary containing other parameters of content-type header - encoding, errors: request encoding and error handler, passed to - FieldStorage - - Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. For non-file fields, the value - is a list of strings. - """ - # RFC 2046, Section 5.1 : The "multipart" boundary delimiters are always - # represented as 7bit US-ASCII. - boundary = pdict['boundary'].decode('ascii') - ctype = "multipart/form-data; boundary={}".format(boundary) - headers = Message() - headers.set_type(ctype) - try: - headers['Content-Length'] = pdict['CONTENT-LENGTH'] - except KeyError: - pass - fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors, - environ={'REQUEST_METHOD': 'POST'}, separator=separator) - return {k: fs.getlist(k) for k in fs} - -def _parseparam(s): - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - -def parse_header(line): - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - parts = _parseparam(';' + line) - key = parts.__next__() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict - - -# Classes for field storage -# ========================= - -class MiniFieldStorage: - - """Like FieldStorage, for use when no file uploads are possible.""" - - # Dummy attributes - filename = None - list = None - type = None - file = None - type_options = {} - disposition = None - disposition_options = {} - headers = {} - - def __init__(self, name, value): - """Constructor from field name and value.""" - self.name = name - self.value = value - # self.file = StringIO(value) - - def __repr__(self): - """Return printable representation.""" - return "MiniFieldStorage(%r, %r)" % (self.name, self.value) - - -class FieldStorage: - - """Store a sequence of fields, reading multipart/form-data. - - This class provides naming, typing, files stored on disk, and - more. At the top level, it is accessible like a dictionary, whose - keys are the field names. (Note: None can occur as a field name.) - The items are either a Python list (if there's multiple values) or - another FieldStorage or MiniFieldStorage object. If it's a single - object, it has the following attributes: - - name: the field name, if specified; otherwise None - - filename: the filename, if specified; otherwise None; this is the - client side filename, *not* the file name on which it is - stored (that's a temporary file you don't deal with) - - value: the value as a *string*; for file uploads, this - transparently reads the file every time you request the value - and returns *bytes* - - file: the file(-like) object from which you can read the data *as - bytes* ; None if the data is stored a simple string - - type: the content-type, or None if not specified - - type_options: dictionary of options specified on the content-type - line - - disposition: content-disposition, or None if not specified - - disposition_options: dictionary of corresponding options - - headers: a dictionary(-like) object (sometimes email.message.Message or a - subclass thereof) containing *all* headers - - The class is subclassable, mostly for the purpose of overriding - the make_file() method, which is called internally to come up with - a file open for reading and writing. This makes it possible to - override the default choice of storing all files in a temporary - directory and unlinking them as soon as they have been opened. - - """ - def __init__(self, fp=None, headers=None, outerboundary=b'', - environ=os.environ, keep_blank_values=0, strict_parsing=0, - limit=None, encoding='utf-8', errors='replace', - max_num_fields=None, separator='&'): - """Constructor. Read multipart/* until last part. - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - (not used when the request method is GET) - Can be : - 1. a TextIOWrapper object - 2. an object whose read() and readline() methods return bytes - - headers : header dictionary-like object; default: - taken from environ as per CGI spec - - outerboundary : terminating multipart boundary - (for internal use only) - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - limit : used internally to read parts of multipart/form-data forms, - to exit from the reading loop when reached. It is the difference - between the form content-length and the number of bytes already - read - - encoding, errors : the encoding and error handler used to decode the - binary stream to strings. Must be the same as the charset defined - for the page sending the form (content-type : meta http-equiv or - header) - - max_num_fields: int. If set, then __init__ throws a ValueError - if there are more than n fields read by parse_qsl(). - - """ - method = 'GET' - self.keep_blank_values = keep_blank_values - self.strict_parsing = strict_parsing - self.max_num_fields = max_num_fields - self.separator = separator - if 'REQUEST_METHOD' in environ: - method = environ['REQUEST_METHOD'].upper() - self.qs_on_post = None - if method == 'GET' or method == 'HEAD': - if 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - elif sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape') - fp = BytesIO(qs) - if headers is None: - headers = {'content-type': - "application/x-www-form-urlencoded"} - if headers is None: - headers = {} - if method == 'POST': - # Set default content-type for POST to what's traditional - headers['content-type'] = "application/x-www-form-urlencoded" - if 'CONTENT_TYPE' in environ: - headers['content-type'] = environ['CONTENT_TYPE'] - if 'QUERY_STRING' in environ: - self.qs_on_post = environ['QUERY_STRING'] - if 'CONTENT_LENGTH' in environ: - headers['content-length'] = environ['CONTENT_LENGTH'] - else: - if not (isinstance(headers, (Mapping, Message))): - raise TypeError("headers must be mapping or an instance of " - "email.message.Message") - self.headers = headers - if fp is None: - self.fp = sys.stdin.buffer - # self.fp.read() must return bytes - elif isinstance(fp, TextIOWrapper): - self.fp = fp.buffer - else: - if not (hasattr(fp, 'read') and hasattr(fp, 'readline')): - raise TypeError("fp must be file pointer") - self.fp = fp - - self.encoding = encoding - self.errors = errors - - if not isinstance(outerboundary, bytes): - raise TypeError('outerboundary must be bytes, not %s' - % type(outerboundary).__name__) - self.outerboundary = outerboundary - - self.bytes_read = 0 - self.limit = limit - - # Process content-disposition header - cdisp, pdict = "", {} - if 'content-disposition' in self.headers: - cdisp, pdict = parse_header(self.headers['content-disposition']) - self.disposition = cdisp - self.disposition_options = pdict - self.name = None - if 'name' in pdict: - self.name = pdict['name'] - self.filename = None - if 'filename' in pdict: - self.filename = pdict['filename'] - self._binary_file = self.filename is not None - - # Process content-type header - # - # Honor any existing content-type header. But if there is no - # content-type header, use some sensible defaults. Assume - # outerboundary is "" at the outer level, but something non-false - # inside a multi-part. The default for an inner part is text/plain, - # but for an outer part it should be urlencoded. This should catch - # bogus clients which erroneously forget to include a content-type - # header. - # - # See below for what we do if there does exist a content-type header, - # but it happens to be something we don't understand. - if 'content-type' in self.headers: - ctype, pdict = parse_header(self.headers['content-type']) - elif self.outerboundary or method != 'POST': - ctype, pdict = "text/plain", {} - else: - ctype, pdict = 'application/x-www-form-urlencoded', {} - self.type = ctype - self.type_options = pdict - if 'boundary' in pdict: - self.innerboundary = pdict['boundary'].encode(self.encoding, - self.errors) - else: - self.innerboundary = b"" - - clen = -1 - if 'content-length' in self.headers: - try: - clen = int(self.headers['content-length']) - except ValueError: - pass - if maxlen and clen > maxlen: - raise ValueError('Maximum content length exceeded') - self.length = clen - if self.limit is None and clen >= 0: - self.limit = clen - - self.list = self.file = None - self.done = 0 - if ctype == 'application/x-www-form-urlencoded': - self.read_urlencoded() - elif ctype[:10] == 'multipart/': - self.read_multi(environ, keep_blank_values, strict_parsing) - else: - self.read_single() - - def __del__(self): - try: - self.file.close() - except AttributeError: - pass - - def __enter__(self): - return self - - def __exit__(self, *args): - self.file.close() - - def __repr__(self): - """Return a printable representation.""" - return "FieldStorage(%r, %r, %r)" % ( - self.name, self.filename, self.value) - - def __iter__(self): - return iter(self.keys()) - - def __getattr__(self, name): - if name != 'value': - raise AttributeError(name) - if self.file: - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - elif self.list is not None: - value = self.list - else: - value = None - return value - - def __getitem__(self, key): - """Dictionary style indexing.""" - if self.list is None: - raise TypeError("not indexable") - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError(key) - if len(found) == 1: - return found[0] - else: - return found - - def getvalue(self, key, default=None): - """Dictionary style get() method, including 'value' lookup.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return value.value - else: - return default - - def getfirst(self, key, default=None): - """ Return the first value received.""" - if key in self: - value = self[key] - if isinstance(value, list): - return value[0].value - else: - return value.value - else: - return default - - def getlist(self, key): - """ Return list of received values.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return [value.value] - else: - return [] - - def keys(self): - """Dictionary style keys() method.""" - if self.list is None: - raise TypeError("not indexable") - return list(set(item.name for item in self.list)) - - def __contains__(self, key): - """Dictionary style __contains__ method.""" - if self.list is None: - raise TypeError("not indexable") - return any(item.name == key for item in self.list) - - def __len__(self): - """Dictionary style len(x) support.""" - return len(self.keys()) - - def __bool__(self): - if self.list is None: - raise TypeError("Cannot be converted to bool.") - return bool(self.list) - - def read_urlencoded(self): - """Internal: read data in query string format.""" - qs = self.fp.read(self.length) - if not isinstance(qs, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(qs).__name__)) - qs = qs.decode(self.encoding, self.errors) - if self.qs_on_post: - qs += '&' + self.qs_on_post - query = urllib.parse.parse_qsl( - qs, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields, separator=self.separator) - self.list = [MiniFieldStorage(key, value) for key, value in query] - self.skip_lines() - - FieldStorageClass = None - - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - if not valid_boundary(ib): - raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) - self.list = [] - if self.qs_on_post: - query = urllib.parse.parse_qsl( - self.qs_on_post, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields, separator=self.separator) - self.list.extend(MiniFieldStorage(key, value) for key, value in query) - - klass = self.FieldStorageClass or self.__class__ - first_line = self.fp.readline() # bytes - if not isinstance(first_line, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(first_line).__name__)) - self.bytes_read += len(first_line) - - # Ensure that we consume the file until we've hit our inner boundary - while (first_line.strip() != (b"--" + self.innerboundary) and - first_line): - first_line = self.fp.readline() - self.bytes_read += len(first_line) - - # Propagate max_num_fields into the sub class appropriately - max_num_fields = self.max_num_fields - if max_num_fields is not None: - max_num_fields -= len(self.list) - - while True: - parser = FeedParser() - hdr_text = b"" - while True: - data = self.fp.readline() - hdr_text += data - if not data.strip(): - break - if not hdr_text: - break - # parser takes strings, not bytes - self.bytes_read += len(hdr_text) - parser.feed(hdr_text.decode(self.encoding, self.errors)) - headers = parser.close() - - # Some clients add Content-Length for part headers, ignore them - if 'content-length' in headers: - del headers['content-length'] - - limit = None if self.limit is None \ - else self.limit - self.bytes_read - part = klass(self.fp, headers, ib, environ, keep_blank_values, - strict_parsing, limit, - self.encoding, self.errors, max_num_fields, self.separator) - - if max_num_fields is not None: - max_num_fields -= 1 - if part.list: - max_num_fields -= len(part.list) - if max_num_fields < 0: - raise ValueError('Max number of fields exceeded') - - self.bytes_read += part.bytes_read - self.list.append(part) - if part.done or self.bytes_read >= self.length > 0: - break - self.skip_lines() - - def read_single(self): - """Internal: read an atomic part.""" - if self.length >= 0: - self.read_binary() - self.skip_lines() - else: - self.read_lines() - self.file.seek(0) - - bufsize = 8*1024 # I/O buffering size for copy to file - - def read_binary(self): - """Internal: read binary data.""" - self.file = self.make_file() - todo = self.length - if todo >= 0: - while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) # bytes - if not isinstance(data, bytes): - raise ValueError("%s should return bytes, got %s" - % (self.fp, type(data).__name__)) - self.bytes_read += len(data) - if not data: - self.done = -1 - break - self.file.write(data) - todo = todo - len(data) - - def read_lines(self): - """Internal: read lines until EOF or outerboundary.""" - if self._binary_file: - self.file = self.__file = BytesIO() # store data as bytes for files - else: - self.file = self.__file = StringIO() # as strings for other fields - if self.outerboundary: - self.read_lines_to_outerboundary() - else: - self.read_lines_to_eof() - - def __write(self, line): - """line is always bytes, not string""" - if self.__file is not None: - if self.__file.tell() + len(line) > 1000: - self.file = self.make_file() - data = self.__file.getvalue() - self.file.write(data) - self.__file = None - if self._binary_file: - # keep bytes - self.file.write(line) - else: - # decode to string - self.file.write(line.decode(self.encoding, self.errors)) - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary. - Data is read as bytes: boundaries and line ends must be converted - to bytes for comparisons. - """ - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - delim = b"" - last_line_lfend = True - _read = 0 - while 1: - - if self.limit is not None and 0 <= self.limit <= _read: - break - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - _read += len(line) - if not line: - self.done = -1 - break - if delim == b"\r": - line = delim + line - delim = b"" - if line.startswith(b"--") and last_line_lfend: - strippedline = line.rstrip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - odelim = delim - if line.endswith(b"\r\n"): - delim = b"\r\n" - line = line[:-2] - last_line_lfend = True - elif line.endswith(b"\n"): - delim = b"\n" - line = line[:-1] - last_line_lfend = True - elif line.endswith(b"\r"): - # We may interrupt \r\n sequences if they span the 2**16 - # byte boundary - delim = b"\r" - line = line[:-1] - last_line_lfend = False - else: - delim = b"" - last_line_lfend = False - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - last_line_lfend = True - while True: - line = self.fp.readline(1<<16) - self.bytes_read += len(line) - if not line: - self.done = -1 - break - if line.endswith(b"--") and last_line_lfend: - strippedline = line.strip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - last_line_lfend = line.endswith(b'\n') - - def make_file(self): - """Overridable: return a readable & writable file. - - The file will be used as follows: - - data is written to it - - seek(0) - - data is read from it - - The file is opened in binary mode for files, in text mode - for other fields - - This version opens a temporary file for reading and writing, - and immediately deletes (unlinks) it. The trick (on Unix!) is - that the file can still be used, but it can't be opened by - another process, and it will automatically be deleted when it - is closed or when the current process terminates. - - If you want a more permanent file, you derive a class which - overrides this method. If you want a visible temporary file - that is nevertheless automatically deleted when the script - terminates, try defining a __del__ method in a derived class - which unlinks the temporary files you have created. - - """ - if self._binary_file: - return tempfile.TemporaryFile("wb+") - else: - return tempfile.TemporaryFile("w+", - encoding=self.encoding, newline = '\n') - - -# Test/debug code -# =============== - -def test(environ=os.environ): - """Robust test CGI script, usable as main program. - - Write minimal HTTP headers and dump all information provided to - the script in HTML form. - - """ - print("Content-type: text/html") - print() - sys.stderr = sys.stdout - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - print_environ_usage() - def f(): - exec("testing print_exception() -- italics?") - def g(f=f): - f() - print("

What follows is a test, not an actual exception:

") - g() - except: - print_exception() - - print("

Second try with a small maxlen...

") - - global maxlen - maxlen = 50 - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - except: - print_exception() - -def print_exception(type=None, value=None, tb=None, limit=None): - if type is None: - type, value, tb = sys.exc_info() - import traceback - print() - print("

Traceback (most recent call last):

") - list = traceback.format_tb(tb, limit) + \ - traceback.format_exception_only(type, value) - print("
%s%s
" % ( - html.escape("".join(list[:-1])), - html.escape(list[-1]), - )) - del tb - -def print_environ(environ=os.environ): - """Dump the shell environment as HTML.""" - keys = sorted(environ.keys()) - print() - print("

Shell Environment:

") - print("
") - for key in keys: - print("
", html.escape(key), "
", html.escape(environ[key])) - print("
") - print() - -def print_form(form): - """Dump the contents of a form as HTML.""" - keys = sorted(form.keys()) - print() - print("

Form Contents:

") - if not keys: - print("

No form fields.") - print("

") - for key in keys: - print("
" + html.escape(key) + ":", end=' ') - value = form[key] - print("" + html.escape(repr(type(value))) + "") - print("
" + html.escape(repr(value))) - print("
") - print() - -def print_directory(): - """Dump the current directory as HTML.""" - print() - print("

Current Working Directory:

") - try: - pwd = os.getcwd() - except OSError as msg: - print("OSError:", html.escape(str(msg))) - else: - print(html.escape(pwd)) - print() - -def print_arguments(): - print() - print("

Command Line Arguments:

") - print() - print(sys.argv) - print() - -def print_environ_usage(): - """Dump a list of environment variables used by CGI as HTML.""" - print(""" -

These environment variables could have been set:

-
    -
  • AUTH_TYPE -
  • CONTENT_LENGTH -
  • CONTENT_TYPE -
  • DATE_GMT -
  • DATE_LOCAL -
  • DOCUMENT_NAME -
  • DOCUMENT_ROOT -
  • DOCUMENT_URI -
  • GATEWAY_INTERFACE -
  • LAST_MODIFIED -
  • PATH -
  • PATH_INFO -
  • PATH_TRANSLATED -
  • QUERY_STRING -
  • REMOTE_ADDR -
  • REMOTE_HOST -
  • REMOTE_IDENT -
  • REMOTE_USER -
  • REQUEST_METHOD -
  • SCRIPT_NAME -
  • SERVER_NAME -
  • SERVER_PORT -
  • SERVER_PROTOCOL -
  • SERVER_ROOT -
  • SERVER_SOFTWARE -
-In addition, HTTP headers sent by the server may be passed in the -environment as well. Here are some common variable names: -
    -
  • HTTP_ACCEPT -
  • HTTP_CONNECTION -
  • HTTP_HOST -
  • HTTP_PRAGMA -
  • HTTP_REFERER -
  • HTTP_USER_AGENT -
-""") - - -# Utilities -# ========= - -def valid_boundary(s): - import re - if isinstance(s, bytes): - _vb_pattern = b"^[ -~]{0,200}[!-~]$" - else: - _vb_pattern = "^[ -~]{0,200}[!-~]$" - return re.match(_vb_pattern, s) - -# Invoke mainline -# =============== - -# Call test() when this file is run as a script (not imported as a module) -if __name__ == '__main__': - test() diff --git a/python/python3_12/examples/cgitb.py b/python/python3_12/examples/cgitb.py deleted file mode 100644 index f6b97f25c5..0000000000 --- a/python/python3_12/examples/cgitb.py +++ /dev/null @@ -1,332 +0,0 @@ -"""More comprehensive traceback formatting for Python scripts. - -To enable this module, do: - - import cgitb; cgitb.enable() - -at the top of your script. The optional arguments to enable() are: - - display - if true, tracebacks are displayed in the web browser - logdir - if set, tracebacks are written to files in this directory - context - number of lines of source code to show for each stack frame - format - 'text' or 'html' controls the output format - -By default, tracebacks are displayed but not saved, the context is 5 lines -and the output format is 'html' (for backwards compatibility with the -original use of this module) - -Alternatively, if you have caught an exception and want cgitb to display it -for you, call cgitb.handler(). The optional argument to handler() is a -3-item tuple (etype, evalue, etb) just like the value of sys.exc_info(). -The default handler displays output as HTML. - -""" -import inspect -import keyword -import linecache -import os -import pydoc -import sys -import tempfile -import time -import tokenize -import traceback -import warnings -from html import escape as html_escape - -warnings._deprecated(__name__, remove=(3, 13)) - - -def reset(): - """Return a string that resets the CGI and browser to a known state.""" - return ''' - --> --> - - ''' - -__UNDEF__ = [] # a special sentinel object -def small(text): - if text: - return '' + text + '' - else: - return '' - -def strong(text): - if text: - return '' + text + '' - else: - return '' - -def grey(text): - if text: - return '' + text + '' - else: - return '' - -def lookup(name, frame, locals): - """Find the value for a given name in the given environment.""" - if name in locals: - return 'local', locals[name] - if name in frame.f_globals: - return 'global', frame.f_globals[name] - if '__builtins__' in frame.f_globals: - builtins = frame.f_globals['__builtins__'] - if isinstance(builtins, dict): - if name in builtins: - return 'builtin', builtins[name] - else: - if hasattr(builtins, name): - return 'builtin', getattr(builtins, name) - return None, __UNDEF__ - -def scanvars(reader, frame, locals): - """Scan one logical line of Python and look up values of variables used.""" - vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ - for ttype, token, start, end, line in tokenize.generate_tokens(reader): - if ttype == tokenize.NEWLINE: break - if ttype == tokenize.NAME and token not in keyword.kwlist: - if lasttoken == '.': - if parent is not __UNDEF__: - value = getattr(parent, token, __UNDEF__) - vars.append((prefix + token, prefix, value)) - else: - where, value = lookup(token, frame, locals) - vars.append((token, where, value)) - elif token == '.': - prefix += lasttoken + '.' - parent = value - else: - parent, prefix = None, '' - lasttoken = token - return vars - -def html(einfo, context=5): - """Return a nice HTML document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = f''' - - - - - -
 
- 
-{html_escape(str(etype))}
-{pyver}
{date}
-

A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred.

''' - - indent = '' + small(' ' * 5) + ' ' - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - if file: - file = os.path.abspath(file) - link = '%s' % (file, pydoc.html.escape(file)) - else: - file = link = '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + strong(pydoc.html.escape(func)) - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.html.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = ['%s%s %s' % - (' ', link, call)] - if index is not None: - i = lnum - index - for line in lines: - num = small(' ' * (5-len(str(i))) + str(i)) + ' ' - if i in highlight: - line = '=>%s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % line) - else: - line = '  %s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % grey(line)) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where in ('global', 'builtin'): - name = ('%s ' % where) + strong(name) - elif where == 'local': - name = strong(name) - else: - name = where + strong(name.split('.')[-1]) - dump.append('%s = %s' % (name, pydoc.html.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('%s' % small(grey(', '.join(dump)))) - frames.append(''' - -%s
''' % '\n'.join(rows)) - - exception = ['

%s: %s' % (strong(pydoc.html.escape(str(etype))), - pydoc.html.escape(str(evalue)))] - for name in dir(evalue): - if name[:1] == '_': continue - value = pydoc.html.repr(getattr(evalue, name)) - exception.append('\n
%s%s =\n%s' % (indent, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - - - -''' % pydoc.html.escape( - ''.join(traceback.format_exception(etype, evalue, etb))) - -def text(einfo, context=5): - """Return a plain text document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + ''' -A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred. -''' - - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - file = file and os.path.abspath(file) or '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + func - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.text.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = [' %s %s' % (file, call)] - if index is not None: - i = lnum - index - for line in lines: - num = '%5d ' % i - rows.append(num+line.rstrip()) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where == 'global': name = 'global ' + name - elif where != 'local': name = where + name.split('.')[-1] - dump.append('%s = %s' % (name, pydoc.text.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('\n'.join(dump)) - frames.append('\n%s\n' % '\n'.join(rows)) - - exception = ['%s: %s' % (str(etype), str(evalue))] - for name in dir(evalue): - value = pydoc.text.repr(getattr(evalue, name)) - exception.append('\n%s%s = %s' % (" "*4, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - -The above is a description of an error in a Python program. Here is -the original traceback: - -%s -''' % ''.join(traceback.format_exception(etype, evalue, etb)) - -class Hook: - """A hook to replace sys.excepthook that shows tracebacks in HTML.""" - - def __init__(self, display=1, logdir=None, context=5, file=None, - format="html"): - self.display = display # send tracebacks to browser if true - self.logdir = logdir # log tracebacks to files if not None - self.context = context # number of source code lines per frame - self.file = file or sys.stdout # place to send the output - self.format = format - - def __call__(self, etype, evalue, etb): - self.handle((etype, evalue, etb)) - - def handle(self, info=None): - info = info or sys.exc_info() - if self.format == "html": - self.file.write(reset()) - - formatter = (self.format=="html") and html or text - plain = False - try: - doc = formatter(info, self.context) - except: # just in case something goes wrong - doc = ''.join(traceback.format_exception(*info)) - plain = True - - if self.display: - if plain: - doc = pydoc.html.escape(doc) - self.file.write('

' + doc + '
\n') - else: - self.file.write(doc + '\n') - else: - self.file.write('

A problem occurred in a Python script.\n') - - if self.logdir is not None: - suffix = ['.txt', '.html'][self.format=="html"] - (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir) - - try: - with os.fdopen(fd, 'w') as file: - file.write(doc) - msg = '%s contains the description of this error.' % path - except: - msg = 'Tried to save traceback to %s, but failed.' % path - - if self.format == 'html': - self.file.write('

%s

\n' % msg) - else: - self.file.write(msg + '\n') - try: - self.file.flush() - except: pass - -handler = Hook().handle -def enable(display=1, logdir=None, context=5, format="html"): - """Install an exception handler that formats tracebacks as HTML. - - The optional argument 'display' can be set to 0 to suppress sending the - traceback to the browser, and 'logdir' can be set to a directory to cause - tracebacks to be written to files there.""" - sys.excepthook = Hook(display=display, logdir=logdir, - context=context, format=format) diff --git a/python/python3_12/examples/chunk.py b/python/python3_12/examples/chunk.py deleted file mode 100644 index 618781efd1..0000000000 --- a/python/python3_12/examples/chunk.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -import warnings - -warnings._deprecated(__name__, remove=(3, 13)) - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError from None - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, OSError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if not self.seekable: - raise OSError("cannot seek") - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.size_read >= self.chunksize: - return b'' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except OSError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError diff --git a/python/python3_12/tests/test_empty_file.py b/python/python3_12/tests/test_empty_file.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/python/python3_12/tests/test_error_first_statement_indented.py b/python/python3_12/tests/test_error_first_statement_indented.py deleted file mode 100644 index 39431ac786..0000000000 --- a/python/python3_12/tests/test_error_first_statement_indented.py +++ /dev/null @@ -1,10 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_first_statement_indented.py -# -# EXPECTATIONS: -# - inserted leading INDENT token -# - hidden NEWLINE tokens (channel=1) before the first statement -# - lexer error message: "line 10:3 LEXER ERROR: first statement indented" - - - i = 1 # first statement begins with space diff --git a/python/python3_12/tests/test_error_inconsistent_dedent.py b/python/python3_12/tests/test_error_inconsistent_dedent.py deleted file mode 100644 index 660f59ff65..0000000000 --- a/python/python3_12/tests/test_error_inconsistent_dedent.py +++ /dev/null @@ -1,10 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_inconsistent_dedent.py -# -# EXPECTATIONS: -# - inserted ERROR_TOKEN instead of the DEDENT token -# - lexer error message: "line 10:0 LEXER ERROR: inconsistent dedent" - -if True: - i = 0 - j = 0 # inconsistent dedent diff --git a/python/python3_12/tests/test_error_not_indented.py b/python/python3_12/tests/test_error_not_indented.py deleted file mode 100644 index fb6b451f94..0000000000 --- a/python/python3_12/tests/test_error_not_indented.py +++ /dev/null @@ -1,8 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_not_indented.py -# -# EXPECTATION: -# - parser error message: "line 8:0 missing INDENT at 'i'" - -if True: -i = 1 # no indentation diff --git a/python/python3_12/tests/test_error_tab_and_space_in_indentation.py b/python/python3_12/tests/test_error_tab_and_space_in_indentation.py deleted file mode 100644 index 7d77a9bc0e..0000000000 --- a/python/python3_12/tests/test_error_tab_and_space_in_indentation.py +++ /dev/null @@ -1,11 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_tab_and_space_in_indentation.py -# -# EXPECTATIONS: -# - inserted ERROR_TOKEN instead of the WS token -# - lexer error message: "line 11:0 LEXER ERROR: inconsistent use of tabs and spaces in indentation" - -if True: - i = 0 # indented by spaces -if True: - j = 0 # indented by a tab diff --git a/python/python3_12/tests/test_error_unexpected_indent.py b/python/python3_12/tests/test_error_unexpected_indent.py deleted file mode 100644 index 9fca02bf5d..0000000000 --- a/python/python3_12/tests/test_error_unexpected_indent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_error_unexpected_indent.py -# -# EXPECTATION: -# - parser error message: "line 9:7 mismatched input '' ..." - -if True: - i = 0 - j = 1 # invalid indentation diff --git a/python/python3_12/tests/test_explicit_line_joining.py b/python/python3_12/tests/test_explicit_line_joining.py deleted file mode 100644 index 55be1bd964..0000000000 --- a/python/python3_12/tests/test_explicit_line_joining.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_explicit_line_joining.py -# -# EXPECTATIONS: -# - hiden (channel=1) EXPLICIT_LINE_JOINING token -# - no error message - -i = 1 \ - + 2 diff --git a/python/python3_12/tests/test_formfeed_as_separator.py b/python/python3_12/tests/test_formfeed_as_separator.py deleted file mode 100644 index 31c9da82cd..0000000000 --- a/python/python3_12/tests/test_formfeed_as_separator.py +++ /dev/null @@ -1,6 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_formfeed_as_separator.py -# -# EXPECTATION: no error message - -import io # formfeed character as whitespace diff --git a/python/python3_12/tests/test_formfeed_at_start_of_line.py b/python/python3_12/tests/test_formfeed_at_start_of_line.py deleted file mode 100644 index 0fd599a62d..0000000000 --- a/python/python3_12/tests/test_formfeed_at_start_of_line.py +++ /dev/null @@ -1,6 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_formfeed_at_start_of_line.py -# -# EXPECTATION: no error message - - i = 1 # line starts with formfeed diff --git a/python/python3_12/tests/test_formfeed_in_indent.py b/python/python3_12/tests/test_formfeed_in_indent.py deleted file mode 100644 index ff12eb6bf0..0000000000 --- a/python/python3_12/tests/test_formfeed_in_indent.py +++ /dev/null @@ -1,8 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_formfeed_in_indent.py -# -# EXPECTATION: no error message - -if True: - i = 1 # the indentation length starts after the last formfeed - j = 1 diff --git a/python/python3_12/tests/test_hidden_NEWLINE_before_blank_line.py b/python/python3_12/tests/test_hidden_NEWLINE_before_blank_line.py deleted file mode 100644 index 2fbded528c..0000000000 --- a/python/python3_12/tests/test_hidden_NEWLINE_before_blank_line.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_hidden_NEWLINE_before_blank_line.py -# -# EXPECTATIONS: -# - hidden NEWLINE token (channel=1) before the blank line -# - no error message -i = 1 - -j = 1 diff --git a/python/python3_12/tests/test_hidden_NEWLINE_before_comment.py b/python/python3_12/tests/test_hidden_NEWLINE_before_comment.py deleted file mode 100644 index 9db3798954..0000000000 --- a/python/python3_12/tests/test_hidden_NEWLINE_before_comment.py +++ /dev/null @@ -1,11 +0,0 @@ -def inc(value): -# this is a comment (or type comment) - return value + 1 - -# COMMAND LINE: -# grun Python file_input -tokens test_hidden_NEWLINE_before_comment.py -# -# EXPECTATIONS: -# - hidden NEWLINE tokens (channel=1) before a COMMENT token -# - hidden NEWLINE token (channel=1) before the blank line -# - no error message diff --git a/python/python3_12/tests/test_hidden_leading_NEWLINEs.py b/python/python3_12/tests/test_hidden_leading_NEWLINEs.py deleted file mode 100644 index 27ec70615d..0000000000 --- a/python/python3_12/tests/test_hidden_leading_NEWLINEs.py +++ /dev/null @@ -1,7 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_hidden_leading_NEWLINEs.py -# -# EXPECTATIONS: -# - hidden NEWLINE tokens (channel=1) before the first statement -# - no error message -i = 1 diff --git a/python/python3_12/tests/test_implicit_line_joining.py b/python/python3_12/tests/test_implicit_line_joining.py deleted file mode 100644 index 2ce500dc49..0000000000 --- a/python/python3_12/tests/test_implicit_line_joining.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_implicit_line_joining.py -# -# EXPECTATIONS: -# - hidden NEWLINE token (channel=1) after the opening parenthesis -# - no error message - -print(1 - + 2) diff --git a/python/python3_12/tests/test_insert_trailing_NEWLINE_1.py b/python/python3_12/tests/test_insert_trailing_NEWLINE_1.py deleted file mode 100644 index ae3e442512..0000000000 --- a/python/python3_12/tests/test_insert_trailing_NEWLINE_1.py +++ /dev/null @@ -1,8 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_insert_trailing_NEWLINE_1.py -# -# EXPECTATIONS: -# - inserted trailing NEWLINE token -# - no error message - -i = 1 # there is no newline at the end of this code \ No newline at end of file diff --git a/python/python3_12/tests/test_insert_trailing_NEWLINE_2.py b/python/python3_12/tests/test_insert_trailing_NEWLINE_2.py deleted file mode 100644 index 487d9cdce2..0000000000 --- a/python/python3_12/tests/test_insert_trailing_NEWLINE_2.py +++ /dev/null @@ -1,10 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_insert_trailing_NEWLINE_2.py -# -# EXPECTATIONS: -# - inserted trailing NEWLINE token -# - inserted trailing DEDENT token -# - no error message - -if True: - j = 0 # there is no newline at the end of this code \ No newline at end of file diff --git a/python/python3_12/tests/test_match_case.py b/python/python3_12/tests/test_match_case.py deleted file mode 100644 index 9452769b06..0000000000 --- a/python/python3_12/tests/test_match_case.py +++ /dev/null @@ -1,16 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_match_case.py -# -# EXPECTATIONS: -# - [@63,234:234='*',<'*'>,12:13] -# - [@64,235:235='_',,12:14] -# - no error message - -a, *b = [1, 2, 3, 4] -match b: - case [2]: - print("0") - case [f, *_] if f==2: - print("1") - case _: - print("2") diff --git a/python/python3_12/tests/test_no_trailing_NEWLINE.py b/python/python3_12/tests/test_no_trailing_NEWLINE.py deleted file mode 100644 index 9962d0e8e8..0000000000 --- a/python/python3_12/tests/test_no_trailing_NEWLINE.py +++ /dev/null @@ -1,4 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_no_trailing_NEWLINE.py -# -# EXPECTATION: no trailing NEWLINE token, no error message \ No newline at end of file diff --git a/python/python3_12/tests/test_trailing_inconsistent_dedent.py b/python/python3_12/tests/test_trailing_inconsistent_dedent.py deleted file mode 100644 index c517187583..0000000000 --- a/python/python3_12/tests/test_trailing_inconsistent_dedent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_trailing_inconsistent_dedent.py -# -# EXPECTATION: -# - no error message - -if True: - i = 0 # the last line (next line) is an inconsistent dedent - \ No newline at end of file diff --git a/python/python3_12/tests/test_trailing_indent.py b/python/python3_12/tests/test_trailing_indent.py deleted file mode 100644 index 715fd033cc..0000000000 --- a/python/python3_12/tests/test_trailing_indent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_trailing_indent.py -# -# EXPECTATION: -# - no error message - -if True: - j = 0 # the last line (next line) is an indent - \ No newline at end of file diff --git a/python/python3_12/tests/test_trailing_unexpected_indent.py b/python/python3_12/tests/test_trailing_unexpected_indent.py deleted file mode 100644 index 57affec3df..0000000000 --- a/python/python3_12/tests/test_trailing_unexpected_indent.py +++ /dev/null @@ -1,9 +0,0 @@ -# COMMAND LINE: -# grun Python file_input -tokens test_trailing_unexpected_indent.py -# -# EXPECTATION: -# - no error message - -if True: - j = 0 # the last line (next line) is an unexpected indent - \ No newline at end of file diff --git a/python/python3_13/CSharp/AssemblyInfo.cs b/python/python3_13/CSharp/AssemblyInfo.cs new file mode 100644 index 0000000000..d2e34cef12 --- /dev/null +++ b/python/python3_13/CSharp/AssemblyInfo.cs @@ -0,0 +1,2 @@ +[assembly: CLSCompliant(true)] + diff --git a/python/python3_13/CSharp/PythonLexerBase.cs b/python/python3_13/CSharp/PythonLexerBase.cs new file mode 100644 index 0000000000..512ba6ece2 --- /dev/null +++ b/python/python3_13/CSharp/PythonLexerBase.cs @@ -0,0 +1,797 @@ +/* +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + +/* + * Project : Python Indent/Dedent handler for ANTLR4 grammars + * + * Developed by : Robert Einhorn + */ + +#nullable enable +using Antlr4.Runtime; +using System.Text; +using System.Text.RegularExpressions; + +public abstract class PythonLexerBase : Lexer +{ + // A stack that keeps track of the indentation lengths + private Stack indentLengthStack = new(); + // A list where tokens are waiting to be loaded into the token stream + private LinkedList pendingTokens = new(); + + // last pending token type + private int previousPendingTokenType; + private int lastPendingTokenTypeFromDefaultChannel; + + // The amount of opened parentheses, square brackets, or curly braces + private int opened; + // The amount of opened parentheses and square brackets in the current lexer mode + private Stack paren_or_bracket_openedStack = new(); + // A stack that stores expression(s) between braces in fstring + private Stack braceExpressionStack = new(); + private string prevBraceExpression = ""; + + // Instead of this._mode (_mode is not implemented in each ANTLR4 runtime) + private int curLexerMode; + // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime) + private Stack lexerModeStack = new(); + + private bool wasSpaceIndentation; + private bool wasTabIndentation; + private bool wasIndentationMixedWithSpacesAndTabs; + + private IToken curToken = null!; // current (under processing) token + private IToken ffgToken = null!; // following (look ahead) token + + private const int INVALID_LENGTH = -1; + private const string ERR_TXT = " ERROR: "; + + protected PythonLexerBase(ICharStream input) : base(input) + { + } + + protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput) + { + } + + public override IToken NextToken() // reading the intStream stream until a return EOF + { + this.CheckNextToken(); + IToken firstPendingToken = this.pendingTokens.First!.Value; + this.pendingTokens.RemoveFirst(); + return firstPendingToken; // add the queued token to the token stream + } + + public override void Reset() + { + this.Init(); + base.Reset(); + } + + private void Init() + { + this.indentLengthStack = new(); + this.pendingTokens = new(); + this.previousPendingTokenType = 0; + this.lastPendingTokenTypeFromDefaultChannel = 0; + this.opened = 0; + this.paren_or_bracket_openedStack = new(); + this.braceExpressionStack = new(); + this.prevBraceExpression = ""; + this.curLexerMode = 0; + this.lexerModeStack = new(); + this.wasSpaceIndentation = false; + this.wasTabIndentation = false; + this.wasIndentationMixedWithSpacesAndTabs = false; + this.curToken = null!; + this.ffgToken = null!; + } + + private void CheckNextToken() + { + if (this.previousPendingTokenType == TokenConstants.EOF) + return; + + if (this.indentLengthStack.Count == 0) // We're at the first token + { + this.InsertENCODINGtoken(); + this.SetCurrentAndFollowingTokens(); + this.HandleStartOfInput(); + } + else + { + this.SetCurrentAndFollowingTokens(); + } + + + switch (this.curToken.Type) + { + case PythonLexer.NEWLINE: + this.HandleNEWLINEtoken(); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + case PythonLexer.LBRACE: + this.opened++; + this.AddPendingToken(this.curToken); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + case PythonLexer.RBRACE: + this.opened--; + this.AddPendingToken(this.curToken); + break; + case PythonLexer.FSTRING_MIDDLE: + this.HandleFSTRING_MIDDLEtokenWithDoubleBrace(); // does not affect the opened field + this.AddPendingToken(this.curToken); + break; + case PythonLexer.COLONEQUAL: + this.HandleCOLONEQUALtokenInFString(); + break; + case PythonLexer.ERRORTOKEN: + this.ReportLexerError("token recognition error at: '" + this.curToken.Text + "'"); + this.AddPendingToken(this.curToken); + break; + case TokenConstants.EOF: + this.HandleEOFtoken(); + break; + default: + this.AddPendingToken(this.curToken); + break; + } + this.HandleFORMAT_SPECIFICATION_MODE(); + } + + private void SetCurrentAndFollowingTokens() + { + this.curToken = this.ffgToken == null ? + base.NextToken() : + this.ffgToken; + + this.CheckCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)! + + this.ffgToken = this.curToken.Type == TokenConstants.EOF ? + this.curToken : + base.NextToken(); + } + + private void InsertENCODINGtoken() // https://peps.python.org/pep-0263/ + { + var lineBuilder = new StringBuilder(); + var encodingName = ""; + var lineCount = 0; + var ws_commentPattern = new Regex("^[ \t\f]*(#.*)?$"); + var intStream = this.InputStream; + var size = intStream.Size; + + intStream.Seek(0); + for (int i = 0; i < size; i++) + { + char c = (char)intStream.LA(i + 1); + lineBuilder.Append(c); + + if (c == '\n' || i == size - 1) + { + string line = lineBuilder.ToString().Replace("\r", "").Replace("\n", ""); + if (ws_commentPattern.IsMatch(line)) // WS* + COMMENT? found + { + encodingName = GetEncodingName(line); + if (encodingName != "") + { + break; // encoding found + } + } + else + { + break; // statement or backslash found (line is not empty, not whitespace(s), not comment) + } + + lineCount++; + if (lineCount >= 2) + { + break; // check only the first two lines + } + lineBuilder.Clear(); + } + } + + if (encodingName == "") + { + encodingName = "utf-8"; // default Python source code encoding + } + + var encodingToken = new CommonToken(PythonLexer.ENCODING, encodingName); + encodingToken.Channel = TokenConstants.HiddenChannel; + encodingToken.StartIndex = 0; + encodingToken.StopIndex = 0; + encodingToken.Line = 0; + encodingToken.Column = -1; + AddPendingToken(encodingToken); + } + + private static string GetEncodingName(string commentText) // https://peps.python.org/pep-0263/#defining-the-encoding + { + var encodingCommentPattern = new Regex("^[ \\t\\f]*#.*?coding[:=][ \\t]*([-_.a-zA-Z0-9]+)"); + var match = encodingCommentPattern.Match(commentText); + return match.Success ? match.Groups[1].Value : string.Empty; + } + + // initialize the _indentLengths + // hide the leading NEWLINE token(s) + // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel + // insert a leading INDENT token if necessary + private void HandleStartOfInput() + { + // initialize the stack with a default 0 indentation length + this.indentLengthStack.Push(0); // this will never be popped off + while (this.curToken.Type != TokenConstants.EOF) + { + if (this.curToken.Channel == TokenConstants.DefaultChannel) + { + if (this.curToken.Type == PythonLexer.NEWLINE) + { + // all the NEWLINE tokens must be ignored before the first statement + this.HideAndAddPendingToken(this.curToken); + } + else + { // We're at the first statement + this.InsertLeadingIndentToken(); + return; // continue the processing of the current token with CheckNextToken() + } + } + else + { + this.AddPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING, or COMMENT token + } + this.SetCurrentAndFollowingTokens(); + } // continue the processing of the EOF token with CheckNextToken() + } + + private void InsertLeadingIndentToken() + { + if (this.previousPendingTokenType == PythonLexer.WS) + { + IToken prevToken = this.pendingTokens.Last!.Value; + if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement + { + const string errMsg = "first statement indented"; + this.ReportLexerError(errMsg); + // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser + this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.curToken); + } + } + } + + private void HandleNEWLINEtoken() + { + if (this.lexerModeStack.Count > 0) + { + this.AddPendingToken(this.curToken); + } + else if (this.opened > 0) + { + // We're in an implicit line joining, ignore the current NEWLINE token + this.HideAndAddPendingToken(this.curToken); + } + else + { + IToken nlToken = new CommonToken(this.curToken); // save the current NEWLINE token + bool isLookingAhead = this.ffgToken.Type == PythonLexer.WS; + if (isLookingAhead) + { + this.SetCurrentAndFollowingTokens(); // set the next two tokens + } + + switch (this.ffgToken.Type) + { + case PythonLexer.NEWLINE: // We're before a blank line + case PythonLexer.COMMENT: // We're before a comment + this.HideAndAddPendingToken(nlToken); + if (isLookingAhead) + { + this.AddPendingToken(this.curToken); // WS token + } + break; + default: + this.AddPendingToken(nlToken); + if (isLookingAhead) + { // We're on a whitespace(s) followed by a statement + int indentationLength = this.ffgToken.Type == TokenConstants.EOF ? + 0 : + this.GetIndentationLength(this.curToken.Text); + + if (indentationLength != PythonLexerBase.INVALID_LENGTH) + { + this.AddPendingToken(this.curToken); // WS token + this.InsertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) + } + else + { + this.ReportError("inconsistent use of tabs and spaces in indentation"); + } + } + else + { + // We're at a newline followed by a statement (there is no whitespace before the statement) + this.InsertIndentOrDedentToken(0); // may insert DEDENT token(s) + } + break; + } + } + } + + private void InsertIndentOrDedentToken(int indentLength) + { + int prevIndentLength = this.indentLengthStack.Peek(); + if (indentLength > prevIndentLength) + { + this.CreateAndAddPendingToken(PythonLexer.INDENT, TokenConstants.DefaultChannel, null, this.ffgToken); + this.indentLengthStack.Push(indentLength); + } + else + { + while (indentLength < prevIndentLength) + { // more than 1 DEDENT token may be inserted into the token stream + this.indentLengthStack.Pop(); + prevIndentLength = this.indentLengthStack.Peek(); + if (indentLength <= prevIndentLength) + { + this.CreateAndAddPendingToken(PythonLexer.DEDENT, TokenConstants.DefaultChannel, null, this.ffgToken); + } + else + { + this.ReportError("inconsistent dedent"); + } + } + } + } + + private void CheckCurToken() + { + switch (this.curToken.Type) + { + case PythonLexer.FSTRING_START: + this.SetLexerModeByFSTRING_STARTtoken(); + return; + case PythonLexer.FSTRING_MIDDLE: + this.HandleFSTRING_MIDDLEtokenWithQuoteAndLBrace(); // affect the opened field + if (this.curToken.Type == PythonLexer.FSTRING_MIDDLE) + return; // No curToken exchange happened + break; + case PythonLexer.FSTRING_END: + this.PopLexerMode(); + return; + default: + if (this.lexerModeStack.Count == 0) + return; // Not in fstring mode + break; + } + + switch (this.curToken.Type) + { + case PythonLexer.NEWLINE: + // append the current brace expression with the current newline + this.AppendToBraceExpression(this.curToken.Text); + var ctkn = new CommonToken(this.curToken); + ctkn.Channel = TokenConstants.HiddenChannel; + this.curToken = ctkn; + break; + case PythonLexer.LBRACE: + // the outermost brace expression cannot be a dictionary comprehension or a set comprehension + this.braceExpressionStack.Push("{"); + this.paren_or_bracket_openedStack.Push(0); + this.PushLexerMode(Lexer.DEFAULT_MODE); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + // append the current brace expression with a "(" or a "[" + this.AppendToBraceExpression(this.curToken.Text); + // https://peps.python.org/pep-0498/#lambdas-inside-expressions + this.IncrementBraceStack(); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + // append the current brace expression with a ")" or a "]" + this.AppendToBraceExpression(this.curToken.Text); + this.DecrementBraceStack(); + break; + case PythonLexer.COLON: + case PythonLexer.COLONEQUAL: + // append the current brace expression with a ":" or a ":=" + this.AppendToBraceExpression(this.curToken.Text); + this.SetLexerModeByCOLONorCOLONEQUALtoken(); + break; + case PythonLexer.RBRACE: + this.SetLexerModeAfterRBRACEtoken(); + break; + default: + // append the current brace expression with the current token text + this.AppendToBraceExpression(this.curToken.Text); + break; + } + } + + private void AppendToBraceExpression(string text) + { + this.braceExpressionStack.Push(this.braceExpressionStack.Pop() + text); + } + + private void IncrementBraceStack() + { // increment the last element (peek() + 1) + this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() + 1); + } + + private void DecrementBraceStack() + { // decrement the last element (peek() - 1) + this.paren_or_bracket_openedStack.Push(this.paren_or_bracket_openedStack.Pop() - 1); + } + + private void SetLexerModeAfterRBRACEtoken() + { + switch (this.curLexerMode) + { + case Lexer.DEFAULT_MODE: + this.PopLexerMode(); + this.PopByBRACE(); + break; + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.PopLexerMode(); + this.PopLexerMode(); + this.PopByBRACE(); + break; + default: + this.ReportLexerError("f-string: single '}' is not allowed"); + break; + } + } + + private void SetLexerModeByFSTRING_STARTtoken() + { + string text = this.curToken.Text.ToLower(); + var modeMap = new Dictionary + { + { "f'", PythonLexer.SQ1__FSTRING_MODE }, + { "rf'", PythonLexer.SQ1R_FSTRING_MODE }, + { "fr'", PythonLexer.SQ1R_FSTRING_MODE }, + { "f\"", PythonLexer.DQ1__FSTRING_MODE }, + { "rf\"", PythonLexer.DQ1R_FSTRING_MODE }, + { "fr\"", PythonLexer.DQ1R_FSTRING_MODE }, + { "f'''", PythonLexer.SQ3__FSTRING_MODE }, + { "rf'''", PythonLexer.SQ3R_FSTRING_MODE }, + { "fr'''", PythonLexer.SQ3R_FSTRING_MODE }, + { "f\"\"\"", PythonLexer.DQ3__FSTRING_MODE }, + { "rf\"\"\"", PythonLexer.DQ3R_FSTRING_MODE }, + { "fr\"\"\"", PythonLexer.DQ3R_FSTRING_MODE } + }; + + if (modeMap.TryGetValue(text, out int mode)) + { + this.PushLexerMode(mode); + } + } + + private void SetLexerModeByCOLONorCOLONEQUALtoken() + { + if (this.paren_or_bracket_openedStack.Peek() == 0) + { + // COLONEQUAL token will be replaced with a COLON token in CheckNextToken() + switch (this.lexerModeStack.Peek()) + { // check the previous lexer mode (the current is DEFAULT_MODE) + case PythonLexer.SQ1__FSTRING_MODE: + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ1R_FSTRING_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1__FSTRING_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1R_FSTRING_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3__FSTRING_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3R_FSTRING_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3__FSTRING_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3R_FSTRING_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.PushLexerMode(PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + } + } + } + + private void PopByBRACE() + { + this.paren_or_bracket_openedStack.Pop(); + this.prevBraceExpression = this.braceExpressionStack.Pop() + "}"; + if (this.braceExpressionStack.Count > 0) + { + // append the current brace expression with the previous brace expression + this.braceExpressionStack.Push(this.braceExpressionStack.Pop() + this.prevBraceExpression); + } + + } + + private void HandleFSTRING_MIDDLEtokenWithDoubleBrace() + { + // replace the trailing double brace with a single brace and insert a hidden brace token + switch (this.GetLastTwoCharsOfTheCurTokenText()) + { + case "{{": + this.TrimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", TokenConstants.HiddenChannel); + break; + case "}}": + this.TrimLastCharAddPendingTokenSetCurToken(PythonLexer.RBRACE, "}", TokenConstants.HiddenChannel); + break; + } + } + + private void HandleFSTRING_MIDDLEtokenWithQuoteAndLBrace() + { + // replace the trailing quote + left_brace with a quote and insert an LBRACE token + // replace the trailing backslash + left_brace with a backslash and insert an LBRACE token + switch (this.GetLastTwoCharsOfTheCurTokenText()) + { + case "\"{": + case "'{": + case "\\{": + this.TrimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", TokenConstants.DefaultChannel); + break; + } + } + + private string GetLastTwoCharsOfTheCurTokenText() + { + string curTokenText = this.curToken.Text; + return curTokenText.Length >= 2 ? curTokenText.Substring(curTokenText.Length - 2) : curTokenText; + } + + private void TrimLastCharAddPendingTokenSetCurToken(int type, string text, int channel) + { + // trim the last char and add the modified curToken to the pendingTokens stack + string curTokenText = this.curToken.Text; + string tokenTextWithoutLastChar = curTokenText.Substring(0, curTokenText.Length - 1); + var ctkn = new CommonToken(this.curToken); + ctkn.Text = tokenTextWithoutLastChar; + ctkn.StopIndex = ctkn.StopIndex - 1; + this.AddPendingToken(ctkn); + + this.CreateNewCurToken(type, text, channel); // set curToken + } + + private void HandleCOLONEQUALtokenInFString() + { + if (this.lexerModeStack.Count > 0 && + this.paren_or_bracket_openedStack.Peek() == 0) + { + // In fstring a colonequal (walrus operator) can only be used in parentheses + // Not in parentheses, replace COLONEQUAL token with COLON as format specifier + // and insert the equal symbol to the following FSTRING_MIDDLE token + var ctkn = new CommonToken(this.curToken); + ctkn.Type = PythonLexer.COLON; + ctkn.Text = ":"; + ctkn.StopIndex = ctkn.StartIndex; + this.curToken = ctkn; + if (this.ffgToken.Type == PythonLexer.FSTRING_MIDDLE) + { + ctkn = new CommonToken(this.ffgToken); + ctkn.Text = "=" + ctkn.Text; + ctkn.StartIndex -= 1; + ctkn.Column -= 1; + this.ffgToken = ctkn; + } + else + { + this.AddPendingToken(this.curToken); + this.CreateNewCurToken(PythonLexer.FSTRING_MIDDLE, "=", TokenConstants.DefaultChannel); + } + } + this.AddPendingToken(this.curToken); + } + + private void CreateNewCurToken(int type, string text, int channel) + { + var ctkn = new CommonToken(this.curToken); + ctkn.Type = type; + ctkn.Text = text; + ctkn.Channel = channel; + ctkn.Column += 1; + ctkn.StartIndex += 1; + ctkn.StopIndex = ctkn.StartIndex; + this.curToken = ctkn; + } + + private void PushLexerMode(int mode) + { + this.PushMode(mode); + this.lexerModeStack.Push(this.curLexerMode); + this.curLexerMode = mode; + } + + private void PopLexerMode() + { + this.PopMode(); + this.curLexerMode = this.lexerModeStack.Pop(); + } + + private void HandleFORMAT_SPECIFICATION_MODE() + { + if (this.lexerModeStack.Count > 0 + && this.ffgToken.Type == PythonLexer.RBRACE) + { + // insert an empty FSTRING_MIDDLE token instead of the missing format specification + switch (this.curToken.Type) + { + case PythonLexer.COLON: + this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, "", this.ffgToken); + break; + case PythonLexer.RBRACE: + // only if the previous brace expression is not a dictionary comprehension or set comprehension + if (!IsDictionaryComprehensionOrSetComprehension(this.prevBraceExpression)) + { + this.CreateAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, TokenConstants.DefaultChannel, "", this.ffgToken); + } + break; + } + } + } + + private static bool IsDictionaryComprehensionOrSetComprehension(string code) + { + var inputStream = CharStreams.fromString(code); + var lexer = new PythonLexer(inputStream); + var tokenStream = new CommonTokenStream(lexer); + var parser = new PythonParser(tokenStream); + + // Disable error listeners to suppress console output + lexer.RemoveErrorListeners(); + parser.RemoveErrorListeners(); + + parser.dictcomp(); // Try parsing as dictionary comprehension + if (parser.NumberOfSyntaxErrors == 0) + return true; + + parser = new PythonParser(tokenStream); + tokenStream.Seek(0); + parser.RemoveErrorListeners(); + parser.setcomp(); // Try parsing as set comprehension + return parser.NumberOfSyntaxErrors == 0; + } + + private void InsertTrailingTokens() + { + switch (this.lastPendingTokenTypeFromDefaultChannel) + { + case PythonLexer.NEWLINE: + case PythonLexer.DEDENT: + break; // no trailing NEWLINE token is needed + default: + // insert an extra trailing NEWLINE token that serves as the end of the last statement + this.CreateAndAddPendingToken(PythonLexer.NEWLINE, TokenConstants.DefaultChannel, null, this.ffgToken); // ffgToken is EOF + break; + } + this.InsertIndentOrDedentToken(0); // Now insert as many trailing DEDENT tokens as needed + } + + private void HandleEOFtoken() + { + if (this.lastPendingTokenTypeFromDefaultChannel > 0) + { // there was a statement in the intStream (leading NEWLINE tokens are hidden) + this.InsertTrailingTokens(); + } + this.AddPendingToken(this.curToken); + } + + private void HideAndAddPendingToken(IToken tkn) + { + var ctkn = new CommonToken(tkn); + ctkn.Channel = TokenConstants.HiddenChannel; + this.AddPendingToken(ctkn); + } + + private void CreateAndAddPendingToken(int ttype, int channel, string? text, IToken sampleToken) + { + var ctkn = new CommonToken(sampleToken); + ctkn.Type = ttype; + ctkn.Channel = channel; + ctkn.StopIndex = sampleToken.StartIndex - 1; + ctkn.Text = text ?? "<" + this.Vocabulary.GetSymbolicName(ttype) + ">"; + + this.AddPendingToken(ctkn); + } + + private void AddPendingToken(IToken tkn) + { + // save the last pending token type because the pendingTokens list can be empty by the nextToken() + this.previousPendingTokenType = tkn.Type; + if (tkn.Channel == TokenConstants.DefaultChannel) + { + this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; + } + this.pendingTokens.AddLast(tkn); + } + + private int GetIndentationLength(string indentText) // the indentText may contain spaces, tabs or form feeds + { + const int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces + int length = 0; + foreach (char ch in indentText) + { + switch (ch) + { + case ' ': + this.wasSpaceIndentation = true; + length += 1; + break; + case '\t': + this.wasTabIndentation = true; + length += TAB_LENGTH - (length % TAB_LENGTH); + break; + case '\f': // form feed + length = 0; + break; + } + } + + if (this.wasTabIndentation && this.wasSpaceIndentation) + { + if (!this.wasIndentationMixedWithSpacesAndTabs) + { + this.wasIndentationMixedWithSpacesAndTabs = true; + length = PythonLexerBase.INVALID_LENGTH; // only for the first inconsistent indent + } + } + return length; + } + + private void ReportLexerError(string errMsg) + { + this.ErrorListenerDispatch.SyntaxError(this.ErrorOutput, this, this.curToken.Type, this.curToken.Line, this.curToken.Column, " LEXER" + PythonLexerBase.ERR_TXT + errMsg, null); + } + + private void ReportError(string errMsg) + { + this.ReportLexerError(errMsg); + + // the ERRORTOKEN will raise an error in the parser + this.CreateAndAddPendingToken(PythonLexer.ERRORTOKEN, TokenConstants.DefaultChannel, PythonLexerBase.ERR_TXT + errMsg, this.ffgToken); + } +} diff --git a/python/python3_13/Java/PythonLexerBase.java b/python/python3_13/Java/PythonLexerBase.java new file mode 100644 index 0000000000..ab5eb88751 --- /dev/null +++ b/python/python3_13/Java/PythonLexerBase.java @@ -0,0 +1,684 @@ +/* +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + +/* + * + * Project : Python Indent/Dedent handler for ANTLR4 grammars + * + * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com + * + */ + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.antlr.v4.runtime.*; + +public abstract class PythonLexerBase extends Lexer { + // A stack that keeps track of the indentation lengths + private Deque indentLengthStack; + // A list where tokens are waiting to be loaded into the token stream + private Deque pendingTokens; + + // last pending token type + private int previousPendingTokenType; + private int lastPendingTokenTypeFromDefaultChannel; + + // The amount of opened parentheses, square brackets or curly braces + private int opened; + // The amount of opened parentheses and square brackets in the current lexer mode + private Deque paren_or_bracket_openedStack; + // A stack that stores expression(s) between braces in fstring + private Deque braceExpressionStack; + private String prevBraceExpression; + + // Instead of this._mode (_mode is not implemented in each ANTLR4 runtime) + private int curLexerMode; + // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime) + private Deque lexerModeStack; + + private boolean wasSpaceIndentation; + private boolean wasTabIndentation; + private boolean wasIndentationMixedWithSpacesAndTabs; + + private Token curToken; // current (under processing) token + private Token ffgToken; // following (look ahead) token + + private final int INVALID_LENGTH = -1; + private final String ERR_TXT = " ERROR: "; + + protected PythonLexerBase(CharStream input) { + super(input); + this.init(); + } + + @Override + public Token nextToken() { // reading the input stream until a return EOF + this.checkNextToken(); + return this.pendingTokens.pollFirst(); // add the queued token to the token stream + } + + @Override + public void reset() { + this.init(); + super.reset(); + } + + private void init() { + this.indentLengthStack = new ArrayDeque<>(); + this.pendingTokens = new ArrayDeque<>(); + this.previousPendingTokenType = 0; + this.lastPendingTokenTypeFromDefaultChannel = 0; + this.opened = 0; + this.paren_or_bracket_openedStack = new ArrayDeque<>(); + this.braceExpressionStack = new ArrayDeque<>(); + this.prevBraceExpression = ""; + this.curLexerMode = 0; + this.lexerModeStack = new ArrayDeque<>(); + this.wasSpaceIndentation = false; + this.wasTabIndentation = false; + this.wasIndentationMixedWithSpacesAndTabs = false; + this.curToken = null; + this.ffgToken = null; + } + + private void checkNextToken() { + if (this.previousPendingTokenType == Token.EOF) + return; + + if (this.indentLengthStack.isEmpty()) { // We're at the first token + this.insertENCODINGtoken(); + this.setCurrentAndFollowingTokens(); + this.handleStartOfInput(); + } else { + this.setCurrentAndFollowingTokens(); + } + + switch (this.curToken.getType()) { + case PythonLexer.NEWLINE: + this.handleNEWLINEtoken(); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + case PythonLexer.LBRACE: + this.opened++; + this.addPendingToken(this.curToken); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + case PythonLexer.RBRACE: + this.opened--; + this.addPendingToken(this.curToken); + break; + case PythonLexer.FSTRING_MIDDLE: + this.handleFSTRING_MIDDLEtokenWithDoubleBrace(); // does not affect the opened field + this.addPendingToken(this.curToken); + break; + case PythonLexer.COLONEQUAL: + this.handleCOLONEQUALtokenInFString(); + break; + case PythonLexer.ERRORTOKEN: + this.reportLexerError("token recognition error at: '" + this.curToken.getText() + "'"); + this.addPendingToken(this.curToken); + break; + case Token.EOF: + this.handleEOFtoken(); + break; + default: + this.addPendingToken(this.curToken); + } + this.handleFORMAT_SPECIFICATION_MODE(); + } + + private void setCurrentAndFollowingTokens() { + this.curToken = this.ffgToken == null ? + super.nextToken() : + this.ffgToken; + + this.checkCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)! + + this.ffgToken = this.curToken.getType() == Token.EOF ? + this.curToken : + super.nextToken(); + } + + private void insertENCODINGtoken() { // https://peps.python.org/pep-0263/ + StringBuilder lineBuilder = new StringBuilder(); + String encodingName = ""; + int lineCount = 0; + final Pattern ws_commentPattern = Pattern.compile("^[ \\t\\f]*(#.*)?$"); + final CharStream charStream = this.getInputStream(); + final int size = charStream.size(); + + charStream.seek(0); + for (int i = 0; i < size; i++) { + char c = (char) charStream.LA(i + 1); + lineBuilder.append(c); + + if (c == '\n' || i == size - 1) { + String line = lineBuilder.toString().replace("\r", "").replace("\n", ""); + if (ws_commentPattern.matcher(line).find()) { // WS* + COMMENT? found + encodingName = getEncodingName(line); + if (!encodingName.isEmpty()) { + break; // encoding found + } + } else { + break; // statement or backslash found (line is not empty, not whitespace(s), not comment) + } + + lineCount++; + if (lineCount >= 2) { + break; // check only the first two lines + } + lineBuilder = new StringBuilder(); + } + } + + if (encodingName.isEmpty()) { + encodingName = "utf-8"; // default Python source code encoding + } + + final CommonToken encodingToken = new CommonToken(PythonLexer.ENCODING, encodingName); + encodingToken.setChannel(Token.HIDDEN_CHANNEL); + this.addPendingToken(encodingToken); + } + + private String getEncodingName(final String commentText) { // https://peps.python.org/pep-0263/#defining-the-encoding + final Pattern encodingCommentPattern = Pattern.compile("^[ \\t\\f]*#.*?coding[:=][ \\t]*([-_.a-zA-Z0-9]+)"); + final Matcher matcher = encodingCommentPattern.matcher(commentText); + return matcher.find() ? matcher.group(1) : ""; + } + + // initialize the indentLengthStack + // hide the leading NEWLINE token(s) + // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel + // insert a leading INDENT token if necessary + private void handleStartOfInput() { + // initialize the stack with a default 0 indentation length + this.indentLengthStack.push(0); // this will never be popped off + while (this.curToken.getType() != Token.EOF) { + if (this.curToken.getChannel() == Token.DEFAULT_CHANNEL) { + if (this.curToken.getType() == PythonLexer.NEWLINE) { + // all the NEWLINE tokens must be ignored before the first statement + this.hideAndAddPendingToken(this.curToken); + } else { // We're at the first statement + this.insertLeadingIndentToken(); + return; // continue the processing of the current token with checkNextToken() + } + } else { + this.addPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token + } + this.setCurrentAndFollowingTokens(); + } + // continue the processing of the EOF token with checkNextToken() + } + + private void insertLeadingIndentToken() { + if (this.previousPendingTokenType == PythonLexer.WS) { + Token prevToken = this.pendingTokens.peekLast(); // WS token + if (this.getIndentationLength(prevToken.getText()) != 0) { // there is an "indentation" before the first statement + final String errMsg = "first statement indented"; + this.reportLexerError(errMsg); + // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser + this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.curToken); + } + } + } + + private void handleNEWLINEtoken() { + if (!this.lexerModeStack.isEmpty()) { // for multi line fstring literals + this.addPendingToken(this.curToken); + } else if (this.opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token + this.hideAndAddPendingToken(this.curToken); + } else { + final Token nlToken = new CommonToken(this.curToken); // save the current NEWLINE token + final boolean isLookingAhead = this.ffgToken.getType() == PythonLexer.WS; + if (isLookingAhead) { + this.setCurrentAndFollowingTokens(); // set the next two tokens + } + + switch (this.ffgToken.getType()) { + case PythonLexer.NEWLINE: // We're before a blank line + case PythonLexer.COMMENT: // We're before a comment + this.hideAndAddPendingToken(nlToken); + if (isLookingAhead) { + this.addPendingToken(this.curToken); // WS token + } + break; + default: + this.addPendingToken(nlToken); + if (isLookingAhead) { // We're on a whitespace(s) followed by a statement + final int indentationLength = this.ffgToken.getType() == Token.EOF ? + 0 : + this.getIndentationLength(this.curToken.getText()); + + if (indentationLength != this.INVALID_LENGTH) { + this.addPendingToken(this.curToken); // WS token + this.insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) + } else { + this.reportError("inconsistent use of tabs and spaces in indentation"); + } + } else { // We're at a newline followed by a statement (there is no whitespace before the statement) + this.insertIndentOrDedentToken(0); // may insert DEDENT token(s) + } + } + } + } + + private void insertIndentOrDedentToken(final int indentLength) { + int prevIndentLength = this.indentLengthStack.peek(); + if (indentLength > prevIndentLength) { + this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); + this.indentLengthStack.push(indentLength); + } else { + while (indentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream + this.indentLengthStack.pop(); + prevIndentLength = this.indentLengthStack.peek(); + if (indentLength <= prevIndentLength) { + this.createAndAddPendingToken(PythonLexer.DEDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); + } else { + this.reportError("inconsistent dedent"); + } + } + } + } + + private void checkCurToken() { + switch (this.curToken.getType()) { + case PythonLexer.FSTRING_START: + this.setLexerModeByFSTRING_STARTtoken(); + return; + case PythonLexer.FSTRING_MIDDLE: + this.handleFSTRING_MIDDLEtokenWithQuoteAndLBrace(); // affect the opened field + if (this.curToken.getType() == PythonLexer.FSTRING_MIDDLE) + return; // No curToken exchange happened + break; + case PythonLexer.FSTRING_END: + this.popLexerMode(); + return; + default: + if (this.lexerModeStack.isEmpty()) + return; // Not in fstring mode + } + + switch (this.curToken.getType()) { // the following tokens can only come from default mode (after an LBRACE in fstring) + case PythonLexer.NEWLINE: + // append the current brace expression with the current newline + this.appendToBraceExpression(this.curToken.getText()); + final CommonToken ctkn = new CommonToken(this.curToken); + ctkn.setChannel(Token.HIDDEN_CHANNEL); + this.curToken = ctkn; + break; + case PythonLexer.LBRACE: + // the outermost brace expression cannot be a dictionary comprehension or a set comprehension + this.braceExpressionStack.push("{"); + this.paren_or_bracket_openedStack.push(0); + this.pushLexerMode(Lexer.DEFAULT_MODE); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + // append the current brace expression with a "(" or a "[" + this.appendToBraceExpression(this.curToken.getText()); + // https://peps.python.org/pep-0498/#lambdas-inside-expressions + this.incrementBraceStack(); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + // append the current brace expression with a ")" or a "]" + this.appendToBraceExpression(this.curToken.getText()); + this.decrementBraceStack(); + break; + case PythonLexer.COLON: + case PythonLexer.COLONEQUAL: + // append the current brace expression with a ":" or a ":=" + this.appendToBraceExpression(this.curToken.getText()); + this.setLexerModeByCOLONorCOLONEQUALtoken(); + break; + case PythonLexer.RBRACE: + this.setLexerModeAfterRBRACEtoken(); + break; + default: + // append the current brace expression with the current token text + this.appendToBraceExpression(this.curToken.getText()); + } + } + + private void appendToBraceExpression(String text) { + this.braceExpressionStack.push(this.braceExpressionStack.pop() + text); + } + + private void incrementBraceStack() { // increment the last element (peek() + 1) + this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop() + 1); + } + + private void decrementBraceStack() { // decrement the last element (peek() - 1) + this.paren_or_bracket_openedStack.push(this.paren_or_bracket_openedStack.pop() - 1); + } + + private void setLexerModeAfterRBRACEtoken() { + switch (this.curLexerMode) { + case Lexer.DEFAULT_MODE: + this.popLexerMode(); + this.popByBRACE(); + break; + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.popLexerMode(); + this.popLexerMode(); + this.popByBRACE(); + break; + default: + this.reportLexerError("f-string: single '}' is not allowed"); + } + } + + private void setLexerModeByFSTRING_STARTtoken() { + final String text = this.curToken.getText().toLowerCase(); + Map modeMap = new HashMap<>(); + modeMap.put("f'", PythonLexer.SQ1__FSTRING_MODE); + modeMap.put("rf'", PythonLexer.SQ1R_FSTRING_MODE); + modeMap.put("fr'", PythonLexer.SQ1R_FSTRING_MODE); + modeMap.put("f\"", PythonLexer.DQ1__FSTRING_MODE); + modeMap.put("rf\"", PythonLexer.DQ1R_FSTRING_MODE); + modeMap.put("fr\"", PythonLexer.DQ1R_FSTRING_MODE); + modeMap.put("f'''", PythonLexer.SQ3__FSTRING_MODE); + modeMap.put("rf'''", PythonLexer.SQ3R_FSTRING_MODE); + modeMap.put("fr'''", PythonLexer.SQ3R_FSTRING_MODE); + modeMap.put("f\"\"\"", PythonLexer.DQ3__FSTRING_MODE); + modeMap.put("rf\"\"\"", PythonLexer.DQ3R_FSTRING_MODE); + modeMap.put("fr\"\"\"", PythonLexer.DQ3R_FSTRING_MODE); + + Integer mode = modeMap.get(text); + if (mode != null) { + this.pushLexerMode(mode); + } + } + + private void setLexerModeByCOLONorCOLONEQUALtoken() { + if (this.paren_or_bracket_openedStack.peek() == 0) { + // COLONEQUAL token will be replaced with a COLON token in checkNextToken() + switch (this.lexerModeStack.peek()) { // check the previous lexer mode (the current is DEFAULT_MODE) + case PythonLexer.SQ1__FSTRING_MODE: + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ1R_FSTRING_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1__FSTRING_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1R_FSTRING_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3__FSTRING_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3R_FSTRING_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3__FSTRING_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3R_FSTRING_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + } + } + } + + private void popByBRACE() { + this.paren_or_bracket_openedStack.pop(); + this.prevBraceExpression = this.braceExpressionStack.pop() + "}"; + if (!this.braceExpressionStack.isEmpty()) { + // append the current brace expression with the previous brace expression + this.braceExpressionStack.push(this.braceExpressionStack.pop() + this.prevBraceExpression); + } + + } + + private void handleFSTRING_MIDDLEtokenWithDoubleBrace() { + // replace the trailing double brace with a single brace and insert a hidden brace token + switch (this.getLastTwoCharsOfTheCurTokenText()) { + case "{{": + this.trimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", Token.HIDDEN_CHANNEL); + break; + case "}}": + this.trimLastCharAddPendingTokenSetCurToken(PythonLexer.RBRACE, "}", Token.HIDDEN_CHANNEL); + break; + } + } + + private void handleFSTRING_MIDDLEtokenWithQuoteAndLBrace() { + // replace the trailing quote + left_brace with a quote and insert an LBRACE token + // replace the trailing backslash + left_brace with a backslash and insert an LBRACE token + switch (this.getLastTwoCharsOfTheCurTokenText()) { + case "\"{": + case "'{": + case "\\{": + this.trimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", Token.DEFAULT_CHANNEL); + break; + } + } + + private String getLastTwoCharsOfTheCurTokenText() { + final String curTokenText = this.curToken.getText(); + return curTokenText.length() >= 2 ? curTokenText.substring(curTokenText.length() - 2) : curTokenText; + } + + private void trimLastCharAddPendingTokenSetCurToken(final int type, final String text, final int channel) { + // trim the last char and add the modified curToken to the pendingTokens stack + final String curTokenText = this.curToken.getText(); + final String tokenTextWithoutLastChar = curTokenText.substring(0, curTokenText.length() - 1); + final CommonToken ctkn = new CommonToken(this.curToken); + ctkn.setText(tokenTextWithoutLastChar); + ctkn.setStopIndex(ctkn.getStopIndex() - 1); + this.addPendingToken(ctkn); + + this.createNewCurToken(type, text, channel); // set curToken + } + + private void handleCOLONEQUALtokenInFString() { + if (!this.lexerModeStack.isEmpty() && + this.paren_or_bracket_openedStack.peek() == 0) { + + // In fstring a colonequal (walrus operator) can only be used in parentheses + // Not in parentheses, replace COLONEQUAL token with COLON as format specifier + // and insert the equal symbol to the following FSTRING_MIDDLE token + CommonToken ctkn = new CommonToken(this.curToken); + ctkn.setType(PythonLexer.COLON); + ctkn.setText(":"); + ctkn.setStopIndex(ctkn.getStartIndex()); + this.curToken = ctkn; + if (this.ffgToken.getType() == PythonLexer.FSTRING_MIDDLE) { + ctkn = new CommonToken(this.ffgToken); + ctkn.setText("=" + ctkn.getText()); + ctkn.setStartIndex(ctkn.getStartIndex() - 1); + ctkn.setCharPositionInLine(ctkn.getCharPositionInLine() - 1); + this.ffgToken = ctkn; + } else { + this.addPendingToken(this.curToken); + this.createNewCurToken(PythonLexer.FSTRING_MIDDLE, "=", Token.DEFAULT_CHANNEL); + } + } + this.addPendingToken(this.curToken); + } + + private void createNewCurToken(final int type, final String text, final int channel) { + final CommonToken ctkn = new CommonToken(this.curToken); + ctkn.setType(type); + ctkn.setText(text); + ctkn.setChannel(channel); + ctkn.setCharPositionInLine(ctkn.getCharPositionInLine() + 1); + ctkn.setStartIndex(ctkn.getStartIndex() + 1); + ctkn.setStopIndex(ctkn.getStartIndex()); + this.curToken = ctkn; + } + + private void pushLexerMode(final int mode) { + this.pushMode(mode); + this.lexerModeStack.push(this.curLexerMode); + this.curLexerMode = mode; + } + + private void popLexerMode() { + this.popMode(); + this.curLexerMode = this.lexerModeStack.pop(); + } + + private void handleFORMAT_SPECIFICATION_MODE() { + if (!this.lexerModeStack.isEmpty() && + this.ffgToken.getType() == PythonLexer.RBRACE) { + + // insert an empty FSTRING_MIDDLE token instead of the missing format specification + switch (this.curToken.getType()) { + case PythonLexer.COLON: + this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken); + break; + case PythonLexer.RBRACE: + // only if the previous brace expression is not a dictionary comprehension or set comprehension + if (!isDictionaryComprehensionOrSetComprehension(this.prevBraceExpression)) { + this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken); + } + break; + } + } + } + + private boolean isDictionaryComprehensionOrSetComprehension(final String code) { + final CharStream inputStream = CharStreams.fromString(code); + final PythonLexer lexer = new PythonLexer(inputStream); + final CommonTokenStream tokenStream = new CommonTokenStream(lexer); + PythonParser parser = new PythonParser(tokenStream); + + // Disable error listeners to suppress console output + lexer.removeErrorListeners(); + parser.removeErrorListeners(); + + parser.dictcomp(); // Try parsing as dictionary comprehension + if (parser.getNumberOfSyntaxErrors() == 0) + return true; + + parser = new PythonParser(tokenStream); + tokenStream.seek(0); + parser.removeErrorListeners(); + parser.setcomp(); // Try parsing as set comprehension + return parser.getNumberOfSyntaxErrors() == 0; + } + + private void insertTrailingTokens() { + switch (this.lastPendingTokenTypeFromDefaultChannel) { + case PythonLexer.NEWLINE: + case PythonLexer.DEDENT: + break; // no trailing NEWLINE token is needed + default: // insert an extra trailing NEWLINE token that serves as the end of the last statement + this.createAndAddPendingToken(PythonLexer.NEWLINE, Token.DEFAULT_CHANNEL, null, this.ffgToken); // ffgToken is EOF + } + this.insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed + } + + private void handleEOFtoken() { + if (this.lastPendingTokenTypeFromDefaultChannel > 0) { + // there was a statement in the input (leading NEWLINE tokens are hidden) + this.insertTrailingTokens(); + } + this.addPendingToken(this.curToken); + } + + private void hideAndAddPendingToken(final Token tkn) { + final CommonToken ctkn = new CommonToken(tkn); + ctkn.setChannel(Token.HIDDEN_CHANNEL); + this.addPendingToken(ctkn); + } + + private void createAndAddPendingToken(final int ttype, final int channel, final String text, final Token sampleToken) { + final CommonToken ctkn = new CommonToken(sampleToken); + ctkn.setType(ttype); + ctkn.setChannel(channel); + ctkn.setStopIndex(sampleToken.getStartIndex() - 1); + ctkn.setText(text == null ? + "<" + this.getVocabulary().getDisplayName(ttype) + ">" : + text); + + this.addPendingToken(ctkn); + } + + private void addPendingToken(final Token tkn) { + // save the last pending token type because the pendingTokens list can be empty by the nextToken() + this.previousPendingTokenType = tkn.getType(); + if (tkn.getChannel() == Token.DEFAULT_CHANNEL) { + this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; + } + this.pendingTokens.addLast(tkn); + } + + private int getIndentationLength(final String indentText) { // the indentText may contain spaces, tabs or form feeds + final int TAB_LENGTH = 8; // the standard number of spaces to replace a tab with spaces + int length = 0; + for (char ch : indentText.toCharArray()) { + switch (ch) { + case ' ': + this.wasSpaceIndentation = true; + length += 1; + break; + case '\t': + this.wasTabIndentation = true; + length += TAB_LENGTH - (length % TAB_LENGTH); + break; + case '\f': // form feed + length = 0; + break; + } + } + + if (this.wasTabIndentation && this.wasSpaceIndentation) { + if (!(this.wasIndentationMixedWithSpacesAndTabs)) { + this.wasIndentationMixedWithSpacesAndTabs = true; + length = this.INVALID_LENGTH; // only for the first inconsistent indent + } + } + return length; + } + + private void reportLexerError(final String errMsg) { + this.getErrorListenerDispatch().syntaxError(this, this.curToken, this.curToken.getLine(), this.curToken.getCharPositionInLine(), " LEXER" + this.ERR_TXT + errMsg, null); + } + + private void reportError(final String errMsg) { + this.reportLexerError(errMsg); + + // the ERRORTOKEN will raise an error in the parser + this.createAndAddPendingToken(PythonLexer.ERRORTOKEN, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.ffgToken); + } +} diff --git a/python/python3_13/JavaScript/PythonLexerBase.js b/python/python3_13/JavaScript/PythonLexerBase.js new file mode 100644 index 0000000000..5c08004f40 --- /dev/null +++ b/python/python3_13/JavaScript/PythonLexerBase.js @@ -0,0 +1,676 @@ +/* +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + +/* + * + * Project : Python Indent/Dedent handler for ANTLR4 grammars + * + * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com + * + */ + +import { CharStreams, CommonTokenStream, Token, CommonToken, Lexer } from "antlr4"; +import PythonLexer from "./PythonLexer.js"; +import PythonParser from "./PythonParser.js"; + +export default class PythonLexerBase extends Lexer { + constructor(input) { + super(input); + + // A stack that keeps track of the indentation lengths + this.indentLengthStack; + // A list where tokens are waiting to be loaded into the token stream + this.pendingTokens; + + // last pending token types + this.previousPendingTokenType; + this.lastPendingTokenTypeFromDefaultChannel; + + // The amount of opened parentheses, square brackets or curly braces + this.opened; + // The amount of opened parentheses and square brackets in the current lexer mode + this.paren_or_bracket_openedStack; + // A stack that stores expression(s) between braces in fstring + this.braceExpressionStack; + this.prevBraceExpression; + + // Instead of this._mode (_mode is not implemented in each ANTLR4 runtime) + this.curLexerMode; + // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime) + this.lexerModeStack; + + this.wasSpaceIndentation; + this.wasTabIndentation; + this.wasIndentationMixedWithSpacesAndTabs; + + this.curToken; // current (under processing) token + this.ffgToken; // following (look ahead) token + + this.#init(); + } + + get #INVALID_LENGTH() { return -1; } + get #ERR_TXT() { return " ERROR: "; } + + nextToken() { // reading the input stream until a return EOF + this.#checkNextToken(); + return this.pendingTokens.shift() /* stack pollFirst() */; // add the queued token to the token stream + } + + reset() { + this.#init(); + super.reset(); + } + + #init() { + this.indentLengthStack = []; + this.pendingTokens = []; + this.previousPendingTokenType = 0; + this.lastPendingTokenTypeFromDefaultChannel = 0; + this.opened = 0; + this.paren_or_bracket_openedStack = []; + this.braceExpressionStack = []; + this.prevBraceExpression = ""; + this.curLexerMode = 0; + this.lexerModeStack = []; + this.wasSpaceIndentation = false; + this.wasTabIndentation = false; + this.wasIndentationMixedWithSpacesAndTabs = false; + this.curToken = null; + this.ffgToken = null; + } + + #checkNextToken() { + if (this.previousPendingTokenType === Token.EOF) + return; + + if (this.indentLengthStack.length === 0) { // We're at the first token + this.#insertENCODINGtoken(); + this.#setCurrentAndFollowingTokens(); + this.#handleStartOfInput(); + } else { + this.#setCurrentAndFollowingTokens(); + } + + switch (this.curToken.type) { + case PythonLexer.NEWLINE: + this.#handleNEWLINEtoken(); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + case PythonLexer.LBRACE: + this.opened++; + this.#addPendingToken(this.curToken); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + case PythonLexer.RBRACE: + this.opened--; + this.#addPendingToken(this.curToken); + break; + case PythonLexer.FSTRING_MIDDLE: + this.#handleFSTRING_MIDDLEtokenWithDoubleBrace(); // does not affect the opened field + this.#addPendingToken(this.curToken); + break; + case PythonLexer.COLONEQUAL: + this.#handleCOLONEQUALtokenInFString(); + break; + case PythonLexer.ERRORTOKEN: + this.#reportLexerError(`token recognition error at: '${this.curToken.text}'`); + this.#addPendingToken(this.curToken); + break; + case Token.EOF: + this.#handleEOFtoken(); + break; + default: + this.#addPendingToken(this.curToken); + } + this.#handleFORMAT_SPECIFICATION_MODE(); + } + + #setCurrentAndFollowingTokens() { + this.curToken = this.ffgToken == undefined ? + super.nextToken() : + this.ffgToken; + + this.#checkCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)! + + this.ffgToken = this.curToken.type === Token.EOF ? + this.curToken : + super.nextToken(); + } + + #insertENCODINGtoken() { + let lineBuilder = []; + let encodingName = ""; + let lineCount = 0; + const ws_commentPattern = /^[ \t\f]*(#.*)?$/; + const inputStream = this.inputStream; + const size = inputStream.size; + + inputStream.seek(0); + for (let i = 0; i < size; i++) { + let c = String.fromCharCode(inputStream.LA(i + 1)); + lineBuilder.push(c); + + if (c == '\n' || i == size - 1) { + let line = lineBuilder.join("").replace("\r", "").replace("\n", ""); + if (ws_commentPattern.test(line)) { // WS* + COMMENT? found + encodingName = this.#getEncodingName(line); + if (encodingName !== "") { + break; // encoding found + } + } else { + break; // statement or backslash found (line is not empty, not whitespace, not comment) + } + + lineCount++; + if (lineCount >= 2) { + break; // check only the first two lines + } + lineBuilder = []; + } + } + + if (encodingName === "") { + encodingName = "utf-8"; // default Python source code encoding + } + + const encodingToken = new CommonToken([null, null], PythonLexer.ENCODING, Token.HIDDEN_CHANNEL, 0, 0); + encodingToken.text = encodingName; + encodingToken.line = 0; + encodingToken.column = -1; + this.#addPendingToken(encodingToken); + } + + #getEncodingName(commentText) { // https://peps.python.org/pep-0263/#defining-the-encoding + const encodingCommentPattern = /^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)/; + const match = commentText.match(encodingCommentPattern); + return match ? match[1] : ""; + } + + // initialize the _indentLengthStack + // hide the leading NEWLINE token(s) + // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel + // insert a leading INDENT token if necessary + #handleStartOfInput() { + // initialize the stack with a default 0 indentation length + this.indentLengthStack.push(0); // this will never be popped off + while (this.curToken.type !== Token.EOF) { + if (this.curToken.channel === Token.DEFAULT_CHANNEL) { + if (this.curToken.type === PythonLexer.NEWLINE) { + // all the NEWLINE tokens must be ignored before the first statement + this.#hideAndAddPendingToken(this.curToken); + } else { // We're at the first statement + this.#insertLeadingIndentToken(); + return; // continue the processing of the current token with #checkNextToken() + } + } else { + this.#addPendingToken(this.curToken); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token + } + this.#setCurrentAndFollowingTokens(); + } // continue the processing of the EOF token with #checkNextToken() + } + + #insertLeadingIndentToken() { + if (this.previousPendingTokenType === PythonLexer.WS) { + const prevToken = this.pendingTokens.at(- 1); /* stack peekLast() */ // WS token + if (this.#getIndentationLength(prevToken.text) !== 0) { // there is an "indentation" before the first statement + const errMsg = "first statement indented"; + this.#reportLexerError(errMsg); + // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser + this.#createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, this.#ERR_TXT + errMsg, this.curToken); + } + } + } + + #handleNEWLINEtoken() { + if (this.lexerModeStack.length > 0) { + this.#addPendingToken(this.curToken); + } else if (this.opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token + this.#hideAndAddPendingToken(this.curToken); + } else { + const nlToken = this.curToken.clone(); // save the current NEWLINE token + const isLookingAhead = this.ffgToken.type === PythonLexer.WS; + if (isLookingAhead) { + this.#setCurrentAndFollowingTokens(); // set the next two tokens + } + + switch (this.ffgToken.type) { + case PythonLexer.NEWLINE: // We're before a blank line + case PythonLexer.COMMENT: // We're before a comment + this.#hideAndAddPendingToken(nlToken); + if (isLookingAhead) { + this.#addPendingToken(this.curToken); // WS token + } + break; + default: + this.#addPendingToken(nlToken); + if (isLookingAhead) { // We're on a whitespace(s) followed by a statement + const indentationLength = this.ffgToken.type === Token.EOF ? + 0 : + this.#getIndentationLength(this.curToken.text); + + if (indentationLength !== this.#INVALID_LENGTH) { + this.#addPendingToken(this.curToken); // WS token + this.#insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) + } else { + this.#reportError("inconsistent use of tabs and spaces in indentation"); + } + } else { // We're at a newline followed by a statement (there is no whitespace before the statement) + this.#insertIndentOrDedentToken(0); // may insert DEDENT token(s) + } + } + } + } + + #insertIndentOrDedentToken(curIndentLength) { + let prevIndentLength = this.indentLengthStack.at(-1) /* peek() */; + if (curIndentLength > prevIndentLength) { + this.#createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); + this.indentLengthStack.push(curIndentLength); + } else { + while (curIndentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream + this.indentLengthStack.pop(); + prevIndentLength = this.indentLengthStack.at(-1) /* peek() */; + if (curIndentLength <= prevIndentLength) { + this.#createAndAddPendingToken(PythonLexer.DEDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken); + } else { + this.#reportError("inconsistent dedent"); + } + } + } + } + + #checkCurToken() { + switch (this.curToken.type) { + case PythonLexer.FSTRING_START: + this.#setLexerModeByFSTRING_STARTtoken(); + return; + case PythonLexer.FSTRING_MIDDLE: + this.#handleFSTRING_MIDDLEtokenWithQuoteAndLBrace(); // affect the opened field + if (this.curToken.type === PythonLexer.FSTRING_MIDDLE) { + return; // No curToken exchange happened + } + break; + case PythonLexer.FSTRING_END: + this.#popLexerMode(); + return; + default: + if (this.lexerModeStack.length === 0) { + return; // Not in fstring mode + } + } + + switch (this.curToken.type) { // the following tokens can only come from default mode (after an LBRACE in fstring) + case PythonLexer.NEWLINE: + // append the current brace expression with the current newline + this.#appendToBraceExpression(this.curToken.text) + this.curToken.channel = Token.HIDDEN_CHANNEL; + break; + case PythonLexer.LBRACE: + // the outermost brace expression cannot be a dictionary comprehension or a set comprehension + this.braceExpressionStack.push("{"); + this.paren_or_bracket_openedStack.push(0); + this.#pushLexerMode(Lexer.DEFAULT_MODE); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + // append the current brace expression with a "(" or a "[" + this.#appendToBraceExpression(this.curToken.text) + // https://peps.python.org/pep-0498/#lambdas-inside-expressions + this.#incrementBraceStack(); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + // append the current brace expression with a ")" or a "]" + this.#appendToBraceExpression(this.curToken.text) + this.#decrementBraceStack(); + break; + case PythonLexer.COLON: + case PythonLexer.COLONEQUAL: + // append the current brace expression with a ":" or a ":=" + this.#appendToBraceExpression(this.curToken.text) + this.#setLexerModeByCOLONorCOLONEQUALtoken(); + break; + case PythonLexer.RBRACE: + this.#setLexerModeAfterRBRACEtoken(); + break; + default: + // append the current brace expression with the current token text + this.#appendToBraceExpression(this.curToken.text) + } + } + + #appendToBraceExpression(text) { + this.braceExpressionStack[this.braceExpressionStack.length - 1] += text; + } + + #incrementBraceStack() { // increment the last element (peek() + 1) + this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1]++; + } + + #decrementBraceStack() { // decrement the last element (peek() - 1) + this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1]--; + } + + #setLexerModeAfterRBRACEtoken() { + switch (this.curLexerMode) { + case Lexer.DEFAULT_MODE: + this.#popLexerMode(); + this.#popByBRACE(); + break; + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.#popLexerMode(); + this.#popLexerMode(); + this.#popByBRACE(); + break; + default: + this.#reportLexerError("f-string: single '}' is not allowed"); + } + } + + #setLexerModeByFSTRING_STARTtoken() { + const text = this.curToken.text.toLowerCase(); + const modeMap = { + "f'": PythonLexer.SQ1__FSTRING_MODE, + "rf'": PythonLexer.SQ1R_FSTRING_MODE, + "fr'": PythonLexer.SQ1R_FSTRING_MODE, + 'f"': PythonLexer.DQ1__FSTRING_MODE, + 'rf"': PythonLexer.DQ1R_FSTRING_MODE, + 'fr"': PythonLexer.DQ1R_FSTRING_MODE, + "f'''": PythonLexer.SQ3__FSTRING_MODE, + "rf'''": PythonLexer.SQ3R_FSTRING_MODE, + "fr'''": PythonLexer.SQ3R_FSTRING_MODE, + 'f"""': PythonLexer.DQ3__FSTRING_MODE, + 'rf"""': PythonLexer.DQ3R_FSTRING_MODE, + 'fr"""': PythonLexer.DQ3R_FSTRING_MODE, + }; + const mode = modeMap[text]; + if (mode !== undefined) { + this.#pushLexerMode(mode); + } + } + + #setLexerModeByCOLONorCOLONEQUALtoken() { + if (this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1] === 0) { // stack peek == 0 + const previousMode = this.lexerModeStack[this.lexerModeStack.length - 1]; // stack peek + switch (previousMode) { // check the previous lexer mode (the current is DEFAULT_MODE) + case PythonLexer.SQ1__FSTRING_MODE: + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ1R_FSTRING_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1__FSTRING_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1R_FSTRING_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3__FSTRING_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3R_FSTRING_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3__FSTRING_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3R_FSTRING_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.#pushLexerMode(PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + } + } + } + + #popByBRACE() { + this.paren_or_bracket_openedStack.pop(); + this.prevBraceExpression = this.braceExpressionStack.pop() + "}"; + if (this.braceExpressionStack.length > 0) { + // append the current brace expression with the previous brace expression + this.braceExpressionStack[this.braceExpressionStack.length - 1] += this.prevBraceExpression; + } + } + + #handleFSTRING_MIDDLEtokenWithDoubleBrace() { + // replace the trailing double brace with a single brace and insert a hidden brace token + switch (this.#getLastTwoCharsOfTheCurTokenText()) { + case "{{": + this.#trimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", Token.HIDDEN_CHANNEL); + break; + case "}}": + this.#trimLastCharAddPendingTokenSetCurToken(PythonLexer.RBRACE, "}", Token.HIDDEN_CHANNEL); + break; + } + } + + #handleFSTRING_MIDDLEtokenWithQuoteAndLBrace() { + // replace the trailing quote + left_brace with a quote and insert an LBRACE token + // replace the trailing backslash + left_brace with a backslash and insert an LBRACE token + switch (this.#getLastTwoCharsOfTheCurTokenText()) { + case "\"{": + case "'{": + case "\\{": + this.#trimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", Token.DEFAULT_CHANNEL); + break; + } + } + + #getLastTwoCharsOfTheCurTokenText() { + return this.curToken.text.slice(-2); + } + + #trimLastCharAddPendingTokenSetCurToken(type, text, channel) { + // trim the last char and add the modified curToken to the pendingTokens stack + const tokenTextWithoutLastChar = this.curToken.text.slice(0, -1); + this.curToken.text = tokenTextWithoutLastChar; + this.curToken.stop -= 1; + this.#addPendingToken(this.curToken); + + this.#createNewCurToken(type, text, channel); // set curToken + } + + #handleCOLONEQUALtokenInFString() { + if (this.lexerModeStack.length > 0 && + this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1] === 0) { // stack peek == 0 + + // In fstring a colonequal (walrus operator) can only be used in parentheses + // Not in parentheses, replace COLONEQUAL token with COLON as format specifier + // and insert the equal symbol to the following FSTRING_MIDDLE token + this.curToken.type = PythonLexer.COLON; + this.curToken.text = ":"; + this.curToken.stop = this.curToken.start; + + if (this.ffgToken.type === PythonLexer.FSTRING_MIDDLE) { + this.ffgToken.text = "=" + this.ffgToken.text; + this.ffgToken.start -= 1; + this.ffgToken.column -= 1; + } else { + this.#addPendingToken(this.curToken); + this.#createNewCurToken(PythonLexer.FSTRING_MIDDLE, "=", Token.DEFAULT_CHANNEL); + } + } + this.#addPendingToken(this.curToken); + } + + #createNewCurToken(type, text, channel) { + const ctkn = this.curToken.clone(); + ctkn.type = type; + ctkn.text = text; + ctkn.channel = channel; + ctkn.column += 1; + ctkn.start += 1; + ctkn.stop = ctkn.start; + this.curToken = ctkn; + } + + #pushLexerMode(mode) { + this.pushMode(mode); + this.lexerModeStack.push(this.curLexerMode); + this.curLexerMode = mode; + } + + #popLexerMode() { + this.popMode(); + this.curLexerMode = this.lexerModeStack.pop(); + } + + #handleFORMAT_SPECIFICATION_MODE() { + if (this.lexerModeStack.length > 0 && + this.ffgToken.type === PythonLexer.RBRACE) { + + // insert an empty FSTRING_MIDDLE token instead of the missing format specification + switch (this.curToken.type) { + case PythonLexer.COLON: + this.#createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken); + break; + case PythonLexer.RBRACE: + // only if the previous brace expression is not a dictionary comprehension or set comprehension + if (!this.#isDictionaryComprehensionOrSetComprehension(this.prevBraceExpression)) { + this.#createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken); + } + break; + } + } + } + + #isDictionaryComprehensionOrSetComprehension(code) { + const inputStream = CharStreams.fromString(code); + const lexer = new PythonLexer(inputStream); + const tokenStream = new CommonTokenStream(lexer); + let parser = new PythonParser(tokenStream); + + // Disable error listeners to suppress console output + lexer.removeErrorListeners(); + parser.removeErrorListeners(); + + parser.dictcomp(); // Try parsing as dictionary comprehension + if (parser.syntaxErrorsCount === 0) + return true; + + parser = new PythonParser(tokenStream); + tokenStream.seek(0); + parser.removeErrorListeners(); + parser.setcomp(); // Try parsing as set comprehension + return parser.syntaxErrorsCount === 0; + } + + #insertTrailingTokens() { + switch (this.lastPendingTokenTypeFromDefaultChannel) { + case PythonLexer.NEWLINE: + case PythonLexer.DEDENT: + break; // no trailing NEWLINE token is needed + default: + // insert an extra trailing NEWLINE token that serves as the end of the last statement + this.#createAndAddPendingToken(PythonLexer.NEWLINE, Token.DEFAULT_CHANNEL, null, this.ffgToken); // _ffgToken is EOF + } + this.#insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed + } + + #handleEOFtoken() { + if (this.lastPendingTokenTypeFromDefaultChannel > 0) { + // there was a statement in the input (leading NEWLINE tokens are hidden) + this.#insertTrailingTokens(); + } + this.#addPendingToken(this.curToken); + } + + #hideAndAddPendingToken(ctkn) { + ctkn.channel = Token.HIDDEN_CHANNEL; + this.#addPendingToken(ctkn); + } + + #createAndAddPendingToken(type, channel, text, sampleToken) { + const ctkn = sampleToken.clone(); + ctkn.type = type; + ctkn.channel = channel; + ctkn.stop = sampleToken.start - 1; + ctkn.text = text == null ? + `<${this.getSymbolicNames()[type]}>` : + text; + + this.#addPendingToken(ctkn); + } + + #addPendingToken(tkn) { + // save the last pending token type because the _pendingTokens linked list can be empty by the nextToken() + this.previousPendingTokenType = tkn.type; + if (tkn.channel === Token.DEFAULT_CHANNEL) { + this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; + } + this.pendingTokens.push(tkn) /* .addLast(token) */; + } + + #getIndentationLength(indentText) { // the indentText may contain spaces, tabs or form feeds + const TAB_LENGTH = 8; // the standard number of spaces to replace a tab to spaces + let length = 0; + for (let ch of indentText) { + switch (ch) { + case " ": + this.wasSpaceIndentation = true; + length += 1; + break; + case "\t": + this.wasTabIndentation = true; + length += TAB_LENGTH - (length % TAB_LENGTH); + break; + case "\f": // form feed + length = 0; + break; + } + } + + if (this.wasTabIndentation && this.wasSpaceIndentation) { + if (!this.wasIndentationMixedWithSpacesAndTabs) { + this.wasIndentationMixedWithSpacesAndTabs = true; + length = this.#INVALID_LENGTH; // only for the first inconsistent indent + } + } + return length; + } + + #reportLexerError(errMsg) { + this.getErrorListener().syntaxError(this, this.curToken, this.curToken.line, this.curToken.column, " LEXER" + this.#ERR_TXT + errMsg, null); + } + + #reportError(errMsg) { + this.#reportLexerError(errMsg); + + // the ERRORTOKEN will raise an error in the parser + this.#createAndAddPendingToken(PythonLexer.ERRORTOKEN, Token.DEFAULT_CHANNEL, this.#ERR_TXT + errMsg, this.ffgToken); + } +} diff --git a/python/python3_13/Python3/PythonLexerBase.py b/python/python3_13/Python3/PythonLexerBase.py new file mode 100644 index 0000000000..d3272163a9 --- /dev/null +++ b/python/python3_13/Python3/PythonLexerBase.py @@ -0,0 +1,557 @@ +# The MIT License (MIT) +# Copyright (c) 2021 Robert Einhorn +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Project : Python Indent/Dedent handler for ANTLR4 grammars +# +# Developed by : Robert Einhorn + +from typing import TextIO, Optional, List, Deque +from antlr4 import InputStream, Lexer, Token +from antlr4.Token import CommonToken +import sys +import re + +class PythonLexerBase(Lexer): + def __init__(self, input: InputStream, output: TextIO = sys.stdout): + super().__init__(input, output) + + # A stack that keeps track of the indentation lengths + self.__indent_length_stack: List[int] + + # A list where tokens are waiting to be loaded into the token stream + self.__pending_tokens: Deque[CommonToken] + + # last pending token type + self.__previous_pending_token_type: int + self.__last_pending_token_type_from_default_channel: int + + # The amount of opened parentheses, square brackets or curly braces + self.__opened: int + # The amount of opened parentheses and square brackets in the current lexer mode + self.__paren_or_bracket_opened_stack: List[int] + # A stack that stores expression(s) between braces in fstring + self.__brace_expression_stack: List[str] + self.__prev_brace_expression: str + + # Instead of self._mode (self._mode is not implemented in each ANTLR4 runtime) + self.__cur_lexer_mode: int + # Instead of self._modeStack (self._modeStack is not implemented in each ANTLR4 runtime) + self.__lexer_mode_stack: List[int] + + self.__was_space_indentation: bool + self.__was_tab_indentation: bool + self.__was_indentation_mixed_with_spaces_and_tabs: bool + + self.__cur_token: CommonToken # current (under processing) token + self.__ffg_token: CommonToken # following (look ahead) token + + self.__INVALID_LENGTH: int = -1 + self.__ERR_TXT: str = " ERROR: " + + self.__init() + + def nextToken(self) -> CommonToken: # reading the input stream until a return EOF + self.__check_next_token() + return self.__pending_tokens.popleft() # add the queued token to the token stream + + def reset(self) -> None: + self.__init() + super().reset() + + def __init(self) -> None: + self.__indent_length_stack = [] + self.__pending_tokens = Deque() + self.__previous_pending_token_type = 0 + self.__last_pending_token_type_from_default_channel = 0 + self.__opened = 0 + self.__paren_or_bracket_opened_stack = [] + self.__brace_expression_stack = [] + self.__prev_brace_expression = "" + self.__cur_lexer_mode = 0 + self.__lexer_mode_stack = [] + self.__was_space_indentation = False + self.__was_tab_indentation = False + self.__was_indentation_mixed_with_spaces_and_tabs = False + self.__cur_token = None + self.__ffg_token = None + + def __check_next_token(self) -> None: + if self.__previous_pending_token_type == Token.EOF: + return + + if not self.__indent_length_stack: # We're at the first token + self.__insert_ENCODING_token() + self.__set_current_and_following_tokens() + self.__handle_start_of_input() + else: + self.__set_current_and_following_tokens() + + match self.__cur_token.type: + case self.NEWLINE: + self.__handle_NEWLINE_token() + case self.LPAR | self.LSQB | self.LBRACE: + self.__opened += 1 + self.__add_pending_token(self.__cur_token) + case self.RPAR | self.RSQB | self.RBRACE: + self.__opened -= 1 + self.__add_pending_token(self.__cur_token) + case self.FSTRING_MIDDLE: + self.__handle_FSTRING_MIDDLE_token_with_double_brace() # does not affect the opened field + self.__add_pending_token(self.__cur_token) + case self.COLONEQUAL: + self.__handle_COLONEQUAL_token_in_fstring() + case self.ERRORTOKEN: + self.__report_lexer_error("token recognition error at: '" + self.__cur_token.text + "'") + self.__add_pending_token(self.__cur_token) + case Token.EOF: + self.__handle_EOF_token() + case _: + self.__add_pending_token(self.__cur_token) + self.__handle_FORMAT_SPECIFICATION_MODE() + + def __set_current_and_following_tokens(self) -> None: + self.__cur_token = super().nextToken() if self.__ffg_token is None else \ + self.__ffg_token + + self.__check_cur_token() # ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)! + + self.__ffg_token = self.__cur_token if self.__cur_token.type == Token.EOF else \ + super().nextToken() + + def __insert_ENCODING_token(self) -> None: # https://peps.python.org/pep-0263/ + line_builder: list[str] = [] + encoding_name: str = "" + line_count: int = 0 + ws_comment_pattern: re.Pattern = re.compile(r"^[ \t\f]*(#.*)?$") + input_stream: InputStream = self.inputStream + size: int = input_stream.size + + input_stream.seek(0) + for i in range(size): + c: str = chr(input_stream.LA(i + 1)) + line_builder.append(c) + + if c == '\n' or i == size - 1: + line: str = ''.join(line_builder).replace("\r", "").replace("\n", "") + if ws_comment_pattern.match(line): # WS* + COMMENT? found + encoding_name = self.__get_encoding_name(line) + if encoding_name: + break # encoding found + else: + break # statement or backslash found (first line is not empty, not whitespace(s), not comment) + + line_count += 1 + if line_count >= 2: + break # check only the first two lines + line_builder = [] + + if not encoding_name: + encoding_name = "utf-8" # default Python source code encoding + + encoding_token: CommonToken = CommonToken((None, None), self.ENCODING, CommonToken.HIDDEN_CHANNEL, 0, 0) + encoding_token.text = encoding_name + encoding_token.line = 0 + encoding_token.column = -1 + self.__add_pending_token(encoding_token) + + def __get_encoding_name(self, comment_text: str) -> str: # https://peps.python.org/pep-0263/#defining-the-encoding + encoding_comment_pattern: str = r"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)" + match: Optional[re.Match] = re.search(encoding_comment_pattern, comment_text) + return match.group(1) if match else "" + + # initialize the _indent_length_stack + # hide the leading NEWLINE token(s) + # if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel + # insert a leading INDENT token if necessary + def __handle_start_of_input(self) -> None: + # initialize the stack with a default 0 indentation length + self.__indent_length_stack.append(0) # this will never be popped off + while self.__cur_token.type != Token.EOF: + if self.__cur_token.channel == Token.DEFAULT_CHANNEL: + if self.__cur_token.type == self.NEWLINE: + # all the NEWLINE tokens must be ignored before the first statement + self.__hide_and_add_pending_token(self.__cur_token) + else: # We're at the first statement + self.__insert_leading_indent_token() + return # continue the processing of the current token with __check_next_token() + else: + self.__add_pending_token(self.__cur_token) # it can be WS, EXPLICIT_LINE_JOINING or COMMENT token + self.__set_current_and_following_tokens() + # continue the processing of the EOF token with __check_next_token() + + def __insert_leading_indent_token(self) -> None: + if self.__previous_pending_token_type == self.WS: + prev_token: CommonToken = self.__pending_tokens[-1] # WS token + if self.__get_indentation_length(prev_token.text) != 0: # there is an "indentation" before the first statement + err_msg: str = "first statement indented" + self.__report_lexer_error(err_msg) + # insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser + self.__create_and_add_pending_token(self.INDENT, Token.DEFAULT_CHANNEL, self.__ERR_TXT + err_msg, self.__cur_token) + + def __handle_NEWLINE_token(self) -> None: + if self.__lexer_mode_stack: # not is_empty + self.__add_pending_token(self.__cur_token) + elif self.__opened > 0: # We're in an implicit line joining, ignore the current NEWLINE token + self.__hide_and_add_pending_token(self.__cur_token) + else: + nl_token: CommonToken = self.__cur_token.clone() # save the current NEWLINE token + is_looking_ahead: bool = self.__ffg_token.type == self.WS + if is_looking_ahead: + self.__set_current_and_following_tokens() # set the next two tokens + + match self.__ffg_token.type: + case self.NEWLINE | self.COMMENT: + # We're before a blank line or a comment or type comment or a type ignore comment + self.__hide_and_add_pending_token(nl_token) # ignore the NEWLINE token + if is_looking_ahead: + self.__add_pending_token(self.__cur_token) # WS token + case _: + self.__add_pending_token(nl_token) + if is_looking_ahead: # We're on a whitespace(s) followed by a statement + indentation_length: int = 0 if self.__ffg_token.type == Token.EOF else \ + self.__get_indentation_length(self.__cur_token.text) + + if indentation_length != self.__INVALID_LENGTH: + self.__add_pending_token(self.__cur_token) # WS token + self.__insert_INDENT_or_DEDENT_token(indentation_length) # may insert INDENT token or DEDENT token(s) + else: + self.__report_error("inconsistent use of tabs and spaces in indentation") + else: # We're at a newline followed by a statement (there is no whitespace before the statement) + self.__insert_INDENT_or_DEDENT_token(0) # may insert DEDENT token(s) + + def __insert_INDENT_or_DEDENT_token(self, indent_length: int) -> None: + prev_indent_length: int = self.__indent_length_stack[-1] # stack peek + if indent_length > prev_indent_length: + self.__create_and_add_pending_token(self.INDENT, Token.DEFAULT_CHANNEL, None, self.__ffg_token) + self.__indent_length_stack.append(indent_length) # stack push + else: + while indent_length < prev_indent_length: # more than 1 DEDENT token may be inserted to the token stream + self.__indent_length_stack.pop() + prev_indent_length = self.__indent_length_stack[-1] # stack peek + if indent_length <= prev_indent_length: + self.__create_and_add_pending_token(self.DEDENT, Token.DEFAULT_CHANNEL, None, self.__ffg_token) + else: + self.__report_error("inconsistent dedent") + + def __check_cur_token(self) -> None: + match self.__cur_token.type: + case self.FSTRING_START: + self.__set_lexer_mode_by_FSTRING_START_token() + return + case self.FSTRING_MIDDLE: + self.__handle_FSTRING_MIDDLE_token_with_quote_and_lbrace() # affect the opened field + if self.__cur_token.type == self.FSTRING_MIDDLE: + return # No __cur_token exchange happened + case self.FSTRING_END: + self.__pop_lexer_mode() + return + case _: + if not self.__lexer_mode_stack: + return # Not in fstring mode + + match self.__cur_token.type: # the following tokens can only come from default mode (after an LBRACE in fstring) + case self.NEWLINE: + # append the current brace expression with the current newline + self.__append_to_brace_expression(self.__cur_token.text) + self.__cur_token.channel = Token.HIDDEN_CHANNEL + case self.LBRACE: + # the outermost brace expression cannot be a dictionary comprehension or a set comprehension + self.__brace_expression_stack.append("{") + self.__paren_or_bracket_opened_stack.append(0) # stack push + self.__push_lexer_mode(Lexer.DEFAULT_MODE) + case self.LPAR | self.LSQB: + # append the current brace expression with a "(" or a "[" + self.__append_to_brace_expression(self.__cur_token.text) + # https://peps.python.org/pep-0498/#lambdas-inside-expressions + self.__increment_brace_stack() + case self.RPAR | self.RSQB: + # append the current brace expression with a ")" or a "]" + self.__append_to_brace_expression(self.__cur_token.text) + self.__decrement_brace_stack() + case self.COLON | self.COLONEQUAL: + # append the current brace expression with a ":" or a ":=" + self.__append_to_brace_expression(self.__cur_token.text) + self.__set_lexer_mode_by_COLON_or_COLONEQUAL_token() + case self.RBRACE: + self.__set_lexer_mode_after_RBRACE_token() + case _: + # append the current brace expression with the current token text + self.__append_to_brace_expression(self.__cur_token.text) + + def __append_to_brace_expression(self, text: str) -> None: + self.__brace_expression_stack[-1] += text + + def __increment_brace_stack(self) -> None: # increment the last element (peek() + 1) + self.__paren_or_bracket_opened_stack[-1] += 1 + + def __decrement_brace_stack(self) -> None: # decrement the last element (peek() - 1) + self.__paren_or_bracket_opened_stack[-1] -= 1 + + def __set_lexer_mode_after_RBRACE_token(self) -> None: + match self.__cur_lexer_mode: + case Lexer.DEFAULT_MODE: + self.__pop_lexer_mode() # only once + self.__pop_by_RBRACE() + + case self.SQ1__FORMAT_SPECIFICATION_MODE \ + | self.SQ1R_FORMAT_SPECIFICATION_MODE \ + | self.DQ1__FORMAT_SPECIFICATION_MODE \ + | self.DQ1R_FORMAT_SPECIFICATION_MODE \ + | self.SQ3__FORMAT_SPECIFICATION_MODE \ + | self.SQ3R_FORMAT_SPECIFICATION_MODE \ + | self.DQ3__FORMAT_SPECIFICATION_MODE \ + | self.DQ3R_FORMAT_SPECIFICATION_MODE: + + self.__pop_lexer_mode() + self.__pop_lexer_mode() + self.__pop_by_RBRACE() + case _: + self.__report_lexer_error("f-string: single '}' is not allowed") + + def __set_lexer_mode_by_FSTRING_START_token(self) -> None: + text = self.__cur_token.text.lower() + mode_map = { + "f'": self.SQ1__FSTRING_MODE, + "rf'": self.SQ1R_FSTRING_MODE, + "fr'": self.SQ1R_FSTRING_MODE, + 'f"': self.DQ1__FSTRING_MODE, + 'rf"': self.DQ1R_FSTRING_MODE, + 'fr"': self.DQ1R_FSTRING_MODE, + "f'''": self.SQ3__FSTRING_MODE, + "rf'''": self.SQ3R_FSTRING_MODE, + "fr'''": self.SQ3R_FSTRING_MODE, + 'f"""': self.DQ3__FSTRING_MODE, + 'rf"""': self.DQ3R_FSTRING_MODE, + 'fr"""': self.DQ3R_FSTRING_MODE, + } + mode = mode_map.get(text) + if mode is not None: + self.__push_lexer_mode(mode) + + def __set_lexer_mode_by_COLON_or_COLONEQUAL_token(self) -> None: + if self.__paren_or_bracket_opened_stack[-1] == 0: # stack peek == 0 + # COLONEQUAL token will be replaced with a COLON token in checkNextToken() + match self.__lexer_mode_stack[-1]: # check the previous lexer mode (the current is DEFAULT_MODE) + case self.SQ1__FSTRING_MODE \ + | self.SQ1__FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.SQ1__FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.SQ1R_FSTRING_MODE \ + | self.SQ1R_FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.SQ1R_FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.DQ1__FSTRING_MODE \ + | self.DQ1__FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.DQ1__FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.DQ1R_FSTRING_MODE \ + | self.DQ1R_FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.DQ1R_FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.SQ3__FSTRING_MODE \ + | self.SQ3__FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.SQ3__FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.SQ3R_FSTRING_MODE \ + | self.SQ3R_FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.SQ3R_FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.DQ3__FSTRING_MODE \ + | self.DQ3__FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.DQ3__FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + case self.DQ3R_FSTRING_MODE \ + | self.DQ3R_FORMAT_SPECIFICATION_MODE: + + self.__push_lexer_mode(self.DQ3R_FORMAT_SPECIFICATION_MODE) # continue in format spec. mode + + def __pop_by_RBRACE(self) -> None: + self.__paren_or_bracket_opened_stack.pop() + self.__prev_brace_expression = self.__brace_expression_stack.pop() + "}" + if self.__brace_expression_stack: + # append the current brace expression with the previous brace expression + self.__brace_expression_stack[-1] += self.__prev_brace_expression + + def __handle_FSTRING_MIDDLE_token_with_double_brace(self) -> None: + # replace the trailing double brace with a single brace and insert a hidden brace token + match self.__get_last_two_chars_of_the_cur_token_text(): + case "{{": + self.__trim_last_char_add_pending_token_set_cur_token(self.LBRACE, "{", Token.HIDDEN_CHANNEL) + case "}}": + self.__trim_last_char_add_pending_token_set_cur_token(self.RBRACE, "}", Token.HIDDEN_CHANNEL) + + def __handle_FSTRING_MIDDLE_token_with_quote_and_lbrace(self) -> None: + # replace the trailing quote + left_brace with a quote and insert an LBRACE token + # replace the trailing backslash + left_brace with a backslash and insert an LBRACE token + match self.__get_last_two_chars_of_the_cur_token_text(): + case "\"{" | "'{" | "\\{": + self.__trim_last_char_add_pending_token_set_cur_token(self.LBRACE, "{", Token.DEFAULT_CHANNEL) + + def __get_last_two_chars_of_the_cur_token_text(self) -> str: + cur_token_text: str = self.__cur_token.text + return cur_token_text[-2:] if len(cur_token_text) >= 2 else cur_token_text + + def __trim_last_char_add_pending_token_set_cur_token(self, type: int, text: str, channel: int) -> None: + # trim the last char and add the modified curToken to the __pending_tokens stack + token_text_without_lbrace: str = self.__cur_token.text[:-1] + self.__cur_token.text = token_text_without_lbrace + self.__cur_token.stop -= 1 + self.__add_pending_token(self.__cur_token) + + self.__create_new_cur_token(type, text, channel) # set __cur_token + + def __handle_COLONEQUAL_token_in_fstring(self) -> None: + if self.__lexer_mode_stack \ + and self.__paren_or_bracket_opened_stack[-1] == 0: # stack peek == 0 + + # In fstring a colonequal (walrus operator) can only be used in parentheses + # Not in parentheses, replace COLONEQUAL token with COLON as format specifier + # and insert the equal symbol to the following FSTRING_MIDDLE token + self.__cur_token.type = self.COLON + self.__cur_token.text = ":" + self.__cur_token.stop = self.__cur_token.start + if self.__ffg_token.type == self.FSTRING_MIDDLE: + self.__ffg_token.text = "=" + self.__ffg_token.text + self.__ffg_token.start -= 1 + self.__ffg_token.column -= 1 + else: + self.__add_pending_token(self.__cur_token) + self.__create_new_current_token(self.FSTRING_MIDDLE, "=", Token.DEFAULT_CHANNEL) + self.__add_pending_token(self.__cur_token) + + def __create_new_cur_token(self, type: int, text: str, channel: int) -> None: + ctkn: CommonToken = self.__cur_token.clone() + ctkn.type = type + ctkn.text = text + ctkn.channel = channel + ctkn.column += 1 + ctkn.start += 1 + ctkn.stop = ctkn.start + self.__cur_token = ctkn + + def __push_lexer_mode(self, mode: int) -> None: + self.pushMode(mode) + self.__lexer_mode_stack.append(self.__cur_lexer_mode) # stack push + self.__cur_lexer_mode = mode + + def __pop_lexer_mode(self) -> None: + self.popMode() + self.__cur_lexer_mode = self.__lexer_mode_stack.pop() + + def __handle_FORMAT_SPECIFICATION_MODE(self) -> None: + if self.__lexer_mode_stack \ + and self.__ffg_token.type == self.RBRACE: + + # insert an empty FSTRING_MIDDLE token instead of the missing format specification + match self.__cur_token.type: + case self.COLON: + self.__create_and_add_pending_token(self.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", self.__ffg_token) + case self.RBRACE: + # only if the previous brace expression is not a dictionary comprehension or set comprehension + if not self.__is_dictionary_comprehension_or_set_comprehension(self.__prev_brace_expression): + self.__create_and_add_pending_token(self.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", self.__ffg_token) + + def __is_dictionary_comprehension_or_set_comprehension(self, code: str) -> bool: + from antlr4 import InputStream, CommonTokenStream + from PythonLexer import PythonLexer + from PythonParser import PythonParser + + input_stream: InputStream = InputStream(code) + lexer: PythonLexer = PythonLexer(input_stream) + token_stream: CommonTokenStream = CommonTokenStream(lexer) + parser: PythonParser = PythonParser(token_stream) + + # Disable error listeners to suppress console output + lexer.removeErrorListeners() + parser.removeErrorListeners() + + parser.dictcomp() # Try parsing as dictionary comprehension + if parser.getNumberOfSyntaxErrors() == 0: + return True + + parser = PythonParser(token_stream) + token_stream.seek(0) + parser.removeErrorListeners() + parser.setcomp() # Try parsing as set comprehension + return parser.getNumberOfSyntaxErrors() == 0 + + def __insert_trailing_tokens(self) -> None: + match self.__last_pending_token_type_from_default_channel: + case self.NEWLINE | self.DEDENT: + pass # no trailing NEWLINE token is needed + case _: # insert an extra trailing NEWLINE token that serves as the end of the last statement + self.__create_and_add_pending_token(self.NEWLINE, Token.DEFAULT_CHANNEL, None, self.__ffg_token) # _ffg_token is EOF + self.__insert_INDENT_or_DEDENT_token(0) # Now insert as much trailing DEDENT tokens as needed + + def __handle_EOF_token(self) -> None: + if self.__last_pending_token_type_from_default_channel > 0: + # there was statement in the input (leading NEWLINE tokens are hidden) + self.__insert_trailing_tokens() + self.__add_pending_token(self.__cur_token) + + def __hide_and_add_pending_token(self, ctkn: CommonToken) -> None: + ctkn.channel = Token.HIDDEN_CHANNEL + self.__add_pending_token(ctkn) + + def __create_and_add_pending_token(self, ttype: int, channel: int, text: Optional[str], sample_token: CommonToken) -> None: + ctkn: CommonToken = sample_token.clone() + ctkn.type = ttype + ctkn.channel = channel + ctkn.stop = sample_token.start - 1 + ctkn.text = "<" + self.symbolicNames[ttype] + ">" if text is None else \ + text + + self.__add_pending_token(ctkn) + + def __add_pending_token(self, ctkn: CommonToken) -> None: + # save the last pending token type because the _pending_tokens list can be empty by the nextToken() + self.__previous_pending_token_type = ctkn.type + if ctkn.channel == Token.DEFAULT_CHANNEL: + self.__last_pending_token_type_from_default_channel = self.__previous_pending_token_type + self.__pending_tokens.append(ctkn) + + def __get_indentation_length(self, indentText: str) -> int: # the indentText may contain spaces, tabs or form feeds + TAB_LENGTH: int = 8 # the standard number of spaces to replace a tab with spaces + length: int = 0 + ch: str + for ch in indentText: + match ch: + case ' ': + self.__was_space_indentation = True + length += 1 + case '\t': + self.__was_tab_indentation = True + length += TAB_LENGTH - (length % TAB_LENGTH) + case '\f': # form feed + length = 0 + + if self.__was_tab_indentation and self.__was_space_indentation: + if not self.__was_indentation_mixed_with_spaces_and_tabs: + self.__was_indentation_mixed_with_spaces_and_tabs = True + length = self.__INVALID_LENGTH # only for the first inconsistent indent + return length + + def __report_lexer_error(self, err_msg: str) -> None: + self.getErrorListenerDispatch().syntaxError(self, self.__cur_token, self.__cur_token.line, self.__cur_token.column, " LEXER" + self.__ERR_TXT + err_msg, None) + + def __report_error(self, err_msg: str) -> None: + self.__report_lexer_error(err_msg) + + # the ERRORTOKEN will raise an error in the parser + self.__create_and_add_pending_token(self.ERRORTOKEN, Token.DEFAULT_CHANNEL, self.__ERR_TXT + err_msg, self.__ffg_token) diff --git a/python/python3_12/Python3_12_6_official_grammar.peg b/python/python3_13/Python3_13_2_official_grammar.peg similarity index 95% rename from python/python3_12/Python3_12_6_official_grammar.peg rename to python/python3_13/Python3_13_2_official_grammar.peg index 8714b514d1..e774b1e92b 100644 --- a/python/python3_12/Python3_12_6_official_grammar.peg +++ b/python/python3_13/Python3_13_2_official_grammar.peg @@ -55,6 +55,9 @@ # Fail if e can be parsed, without consuming any input. # ~ # Commit to the current alternative, even if it fails to parse. +# &&e +# Eager parse e. The parser will not backtrack and will immediately +# fail with SyntaxError if e cannot be parsed. # # STARTING RULES @@ -215,7 +218,7 @@ function_def: function_def_raw: | 'def' NAME [type_params] '(' [params] ')' ['->' expression ] ':' [func_type_comment] block - | ASYNC 'def' NAME [type_params] '(' [params] ')' ['->' expression ] ':' [func_type_comment] block + | 'async' 'def' NAME [type_params] '(' [params] ')' ['->' expression ] ':' [func_type_comment] block # Function parameters # ------------------- @@ -303,16 +306,16 @@ while_stmt: for_stmt: | 'for' star_targets 'in' ~ star_expressions ':' [TYPE_COMMENT] block [else_block] - | ASYNC 'for' star_targets 'in' ~ star_expressions ':' [TYPE_COMMENT] block [else_block] + | 'async' 'for' star_targets 'in' ~ star_expressions ':' [TYPE_COMMENT] block [else_block] # With statement # -------------- with_stmt: - | 'with' '(' ','.with_item+ ','? ')' ':' block + | 'with' '(' ','.with_item+ ','? ')' ':' [TYPE_COMMENT] block | 'with' ','.with_item+ ':' [TYPE_COMMENT] block - | ASYNC 'with' '(' ','.with_item+ ','? ')' ':' block - | ASYNC 'with' ','.with_item+ ':' [TYPE_COMMENT] block + | 'async' 'with' '(' ','.with_item+ ','? ')' ':' block + | 'async' 'with' ','.with_item+ ':' [TYPE_COMMENT] block with_item: | expression 'as' star_target &(',' | ')' | ':') @@ -492,16 +495,20 @@ type_alias: # Type parameter declaration # -------------------------- -type_params: '[' type_param_seq ']' +type_params: + | invalid_type_params + | '[' type_param_seq ']' type_param_seq: ','.type_param+ [','] type_param: - | NAME [type_param_bound] - | '*' NAME - | '**' NAME + | NAME [type_param_bound] [type_param_default] + | '*' NAME [type_param_starred_default] + | '**' NAME [type_param_default] type_param_bound: ':' expression +type_param_default: '=' expression +type_param_starred_default: '=' star_expression # EXPRESSIONS # ----------- @@ -637,7 +644,7 @@ power: # Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ... await_primary: - | AWAIT primary + | 'await' primary | primary primary: @@ -724,7 +731,7 @@ fstring_middle: | fstring_replacement_field | FSTRING_MIDDLE fstring_replacement_field: - | '{' (yield_expr | star_expressions) '='? [fstring_conversion] [fstring_full_format_spec] '}' + | '{' annotated_rhs '='? [fstring_conversion] [fstring_full_format_spec] '}' fstring_conversion: | "!" NAME fstring_full_format_spec: @@ -767,7 +774,7 @@ for_if_clauses: | for_if_clause+ for_if_clause: - | ASYNC 'for' star_targets 'in' ~ disjunction ('if' disjunction )* + | 'async' 'for' star_targets 'in' ~ disjunction ('if' disjunction )* | 'for' star_targets 'in' ~ disjunction ('if' disjunction )* listcomp: diff --git a/python/python3_12/PythonLexer.g4 b/python/python3_13/PythonLexer.g4 similarity index 70% rename from python/python3_12/PythonLexer.g4 rename to python/python3_13/PythonLexer.g4 index ead8b3c89e..98b99d4aef 100644 --- a/python/python3_12/PythonLexer.g4 +++ b/python/python3_13/PythonLexer.g4 @@ -22,65 +22,28 @@ THE SOFTWARE. /* * Project : an ANTLR4 lexer grammar for Python 3 - * https://github.com/RobEin/ANTLR4-parser-for-Python-3.12 + * https://github.com/RobEin/ANTLR4-parser-for-Python-3.13 * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com */ +// https://docs.python.org/3.13/reference/lexical_analysis.html + lexer grammar PythonLexer; options { superClass=PythonLexerBase; } tokens { - INDENT, DEDENT // https://docs.python.org/3.12/reference/lexical_analysis.html#indentation + ENCODING // https://docs.python.org/3.13/reference/lexical_analysis.html#encoding-declarations + , INDENT, DEDENT // https://docs.python.org/3.13/reference/lexical_analysis.html#indentation , TYPE_COMMENT // not supported, only for compatibility with the PythonParser.g4 grammar , FSTRING_START, FSTRING_MIDDLE, FSTRING_END // https://peps.python.org/pep-0701/#specification } - -// https://docs.python.org/3.12/reference/lexical_analysis.html - /* * default lexer mode */ -// https://docs.python.org/3.12/reference/lexical_analysis.html#keywords -FALSE : 'False'; -AWAIT : 'await'; -ELSE : 'else'; -IMPORT : 'import'; -PASS : 'pass'; -NONE : 'None'; -BREAK : 'break'; -EXCEPT : 'except'; -IN : 'in'; -RAISE : 'raise'; -TRUE : 'True'; -CLASS : 'class'; -FINALLY : 'finally'; -IS : 'is'; -RETURN : 'return'; -AND : 'and'; -CONTINUE : 'continue'; -FOR : 'for'; -LAMBDA : 'lambda'; -TRY : 'try'; -AS : 'as'; -DEF : 'def'; -FROM : 'from'; -NONLOCAL : 'nonlocal'; -WHILE : 'while'; -ASSERT : 'assert'; -DEL : 'del'; -GLOBAL : 'global'; -NOT : 'not'; -WITH : 'with'; -ASYNC : 'async'; -ELIF : 'elif'; -IF : 'if'; -OR : 'or'; -YIELD : 'yield'; - -// https://docs.python.org/3.12/library/token.html#module-token +// https://docs.python.org/3.13/library/token.html#module-token LPAR : '('; // OPEN_PAREN LSQB : '['; // OPEN_BRACK LBRACE : '{'; // OPEN_BRACE @@ -130,136 +93,239 @@ ELLIPSIS : '...'; COLONEQUAL : ':='; EXCLAMATION : '!'; -// https://docs.python.org/3.12/reference/lexical_analysis.html#identifiers -NAME - : ID_START ID_CONTINUE* - ; +// https://docs.python.org/3.13/reference/lexical_analysis.html#keywords +FALSE : 'False'; +AWAIT : 'await'; +ELSE : 'else'; +IMPORT : 'import'; +PASS : 'pass'; +NONE : 'None'; +BREAK : 'break'; +EXCEPT : 'except'; +IN : 'in'; +RAISE : 'raise'; +TRUE : 'True'; +CLASS : 'class'; +FINALLY : 'finally'; +IS : 'is'; +RETURN : 'return'; +AND : 'and'; +CONTINUE : 'continue'; +FOR : 'for'; +LAMBDA : 'lambda'; +TRY : 'try'; +AS : 'as'; +DEF : 'def'; +FROM : 'from'; +NONLOCAL : 'nonlocal'; +WHILE : 'while'; +ASSERT : 'assert'; +DEL : 'del'; +GLOBAL : 'global'; +NOT : 'not'; +WITH : 'with'; +ASYNC : 'async'; +ELIF : 'elif'; +IF : 'if'; +OR : 'or'; +YIELD : 'yield'; + +// *** Soft Keywords: https://docs.python.org/3.13/reference/lexical_analysis.html#soft-keywords +NAME_OR_TYPE : 'type'; // identifier or type keyword, the parser grammar will decide what it means +NAME_OR_MATCH : 'match'; // identifier or match keyword, the parser grammar will decide what it means +NAME_OR_CASE : 'case'; // identifier or case keyword, the parser grammar will decide what it means +NAME_OR_WILDCARD : '_'; // identifier or wildcard symbol, the parser grammar will decide what it means + +// https://docs.python.org/3.13/reference/lexical_analysis.html#identifiers +NAME : ID_START ID_CONTINUE*; -// https://docs.python.org/3.12/reference/lexical_analysis.html#numeric-literals +// https://docs.python.org/3.13/reference/lexical_analysis.html#numeric-literals NUMBER : INTEGER | FLOAT_NUMBER | IMAG_NUMBER ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals +// https://docs.python.org/3.13/reference/lexical_analysis.html#string-and-bytes-literals STRING : STRING_LITERAL | BYTES_LITERAL ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines +// https://docs.python.org/3.13/reference/lexical_analysis.html#physical-lines NEWLINE : '\r'? '\n'; // Unix, Windows -// https://docs.python.org/3.12/reference/lexical_analysis.html#comments -COMMENT : '#' ~[\r\n]* -> channel(HIDDEN); +// https://docs.python.org/3.13/reference/lexical_analysis.html#comments +COMMENT : '#' ~[\r\n]* -> channel(HIDDEN); -// https://docs.python.org/3.12/reference/lexical_analysis.html#whitespace-between-tokens -WS : [ \t\f]+ -> channel(HIDDEN); +// https://docs.python.org/3.13/reference/lexical_analysis.html#whitespace-between-tokens +WS : [ \t\f]+ -> channel(HIDDEN); -// https://docs.python.org/3.12/reference/lexical_analysis.html#explicit-line-joining -EXPLICIT_LINE_JOINING : '\\' NEWLINE -> channel(HIDDEN); +// https://docs.python.org/3.13/reference/lexical_analysis.html#explicit-line-joining +EXPLICIT_LINE_JOINING : BACKSLASH_NEWLINE -> channel(HIDDEN); -// https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals -SINGLE_QUOTE_FSTRING_START : F_STRING_PREFIX ['] -> type(FSTRING_START), pushMode(SINGLE_QUOTE_FSTRING_MODE); -DOUBLE_QUOTE_FSTRING_START : F_STRING_PREFIX ["] -> type(FSTRING_START), pushMode(DOUBLE_QUOTE_FSTRING_MODE); -LONG_SINGLE_QUOTE_FSTRING_START : F_STRING_PREFIX ['][']['] -> type(FSTRING_START), pushMode(LONG_SINGLE_QUOTE_FSTRING_MODE); -LONG_DOUBLE_QUOTE_FSTRING_START : F_STRING_PREFIX ["]["]["] -> type(FSTRING_START), pushMode(LONG_DOUBLE_QUOTE_FSTRING_MODE); -ERRORTOKEN : . ; // catch the unrecognized characters and redirect these errors to the parser +// ************************* +// abbreviations for FSTRING +// ************************* +// SQ1__FSTRING = short single quoted formatted string: f'abc' +// DQ1__FSTRING = short double quoted formatted string: f"abc" +// SQ1R_FSTRING = short single quoted raw formatted string: rf'abc' +// DQ1R_FSTRING = short double quoted raw formatted string: rf"abc" +// +// SQ3__FSTRING = long single quoted formatted string: f'''abc''' +// DQ3__FSTRING = long double quoted formatted string: f"""abc""" +// SQ3R_FSTRING = long single quoted raw formatted string: rf'''abc''' +// DQ3R_FSTRING = long double quoted raw formatted string: rf"""abc""" + +// https://docs.python.org/3.13/reference/lexical_analysis.html#formatted-string-literals +FSTRING_START : FSTRING_PREFIX (['] + | ["] + | ['][']['] + | ["]["]["]) + ; // pushMode(????_FSTRING_MODE) will be called in PythonLexerBase class + +// catch the unrecognized characters +ERRORTOKEN : . ; // PythonLexerBase class will report an error about this (the ERRORTOKEN will also cause an error in the parser) /* * other lexer modes */ -mode SINGLE_QUOTE_FSTRING_MODE; - SINGLE_QUOTE_FSTRING_END : ['] -> type(FSTRING_END), popMode; - SINGLE_QUOTE_FSTRING_MIDDLE : SINGLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - SINGLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +mode SQ1__FSTRING_MODE; + SQ1__FSTRING_END : ['] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + SQ1__FSTRING_MIDDLE : SQ1__FSTRING_ITEM -> type(FSTRING_MIDDLE); + SQ1__FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(SQ1__FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class + +mode SQ1R_FSTRING_MODE; + SQ1R_FSTRING_END : ['] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + SQ1R_FSTRING_MIDDLE : SQ1R_FSTRING_ITEM -> type(FSTRING_MIDDLE); + SQ1R_FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(SQ1R_FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class -mode DOUBLE_QUOTE_FSTRING_MODE; - DOUBLE_QUOTE_FSTRING_END : ["] -> type(FSTRING_END), popMode; - DOUBLE_QUOTE_FSTRING_MIDDLE : DOUBLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - DOUBLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +mode DQ1__FSTRING_MODE; + DQ1__FSTRING_END : ["] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + DQ1__FSTRING_MIDDLE : DQ1__FSTRING_ITEM -> type(FSTRING_MIDDLE); + DQ1__FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(DQ1__FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class -mode LONG_SINGLE_QUOTE_FSTRING_MODE; - LONG_SINGLE_QUOTE_FSTRING_END : ['][']['] -> type(FSTRING_END), popMode; - LONG_SINGLE_QUOTE_FSTRING_MIDDLE : SINGLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - LONG_SINGLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +mode DQ1R_FSTRING_MODE; + DQ1R_FSTRING_END : ["] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + DQ1R_FSTRING_MIDDLE : DQ1R_FSTRING_ITEM -> type(FSTRING_MIDDLE); + DQ1R_FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(DQ1R_FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class -mode LONG_DOUBLE_QUOTE_FSTRING_MODE; - LONG_DOUBLE_QUOTE_FSTRING_END : ["]["]["] -> type(FSTRING_END), popMode; - LONG_DOUBLE_QUOTE_FSTRING_MIDDLE : DOUBLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - LONG_DOUBLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +mode SQ3__FSTRING_MODE; + SQ3__FSTRING_END : ['][']['] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + SQ3__FSTRING_MIDDLE : SQ3__FSTRING_ITEM -> type(FSTRING_MIDDLE); + SQ3__FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(SQ3__FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class -mode SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE; // only used after a format specifier colon - SINGLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE : FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE+ -> type(FSTRING_MIDDLE); - SINGLE_QUOTE_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class - SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class +mode SQ3R_FSTRING_MODE; + SQ3R_FSTRING_END : ['][']['] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + SQ3R_FSTRING_MIDDLE : SQ3R_FSTRING_ITEM -> type(FSTRING_MIDDLE); + SQ3R_FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(SQ3R_FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class -mode DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE; // only used after a format specifier colon - DOUBLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE : FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE+ -> type(FSTRING_MIDDLE); - DOUBLE_QUOTE_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class - DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class +mode DQ3__FSTRING_MODE; + DQ3__FSTRING_END : ["]["]["] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + DQ3__FSTRING_MIDDLE : DQ3__FSTRING_ITEM -> type(FSTRING_MIDDLE); + DQ3__FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(DQ3__FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class +mode DQ3R_FSTRING_MODE; + DQ3R_FSTRING_END : ["]["]["] -> type(FSTRING_END); // popMode will be called in PythonLexerBase class + DQ3R_FSTRING_MIDDLE : DQ3R_FSTRING_ITEM -> type(FSTRING_MIDDLE); + DQ3R_FSTRING_LBRACE : '{' -> type(LBRACE); // pushMode(DQ3R_FORMAT_SPECIFICATION_MODE) will be called in PythonLexerBase class + + +mode SQ1__FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + SQ1__FORMAT_SPECIFICATION_FSTRING_MIDDLE : SQ1__FSTRING_PART+ -> type(FSTRING_MIDDLE); + SQ1__FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + SQ1__FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to SQ1__FSTRING_MODE by PythonLexerBase class + +mode SQ1R_FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + SQ1R_FORMAT_SPECIFICATION_FSTRING_MIDDLE : SQ1R_FSTRING_PART+ -> type(FSTRING_MIDDLE); + SQ1R_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + SQ1R_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to SQ1R_FSTRING_MODEby PythonLexerBase class + +mode DQ1__FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + DQ1__FORMAT_SPECIFICATION_FSTRING_MIDDLE : DQ1__FSTRING_PART+ -> type(FSTRING_MIDDLE); + DQ1__FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + DQ1__FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to DQ1__FSTRING_MODE by PythonLexerBase class + +mode DQ1R_FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + DQ1R_FORMAT_SPECIFICATION_FSTRING_MIDDLE : DQ1R_FSTRING_PART+ -> type(FSTRING_MIDDLE); + DQ1R_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + DQ1R_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to DQ1R_FSTRING_MODE by PythonLexerBase class + +mode SQ3__FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + SQ3__FORMAT_SPECIFICATION_FSTRING_MIDDLE : SQ3__FSTRING_PART+ -> type(FSTRING_MIDDLE); + SQ3__FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + SQ3__FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to SQ3__FSTRING_MODE by PythonLexerBase class + +mode SQ3R_FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + SQ3R_FORMAT_SPECIFICATION_FSTRING_MIDDLE : SQ3R_FSTRING_PART+ -> type(FSTRING_MIDDLE); + SQ3R_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + SQ3R_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to SQ3R_FSTRING_MODE by PythonLexerBase class + +mode DQ3__FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + DQ3__FORMAT_SPECIFICATION_FSTRING_MIDDLE : DQ3__FSTRING_PART+ -> type(FSTRING_MIDDLE); + DQ3__FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + DQ3__FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to DQ3__FSTRING_MODE by PythonLexerBase class + +mode DQ3R_FORMAT_SPECIFICATION_MODE; // it is only used after a format specifier colon + DQ3R_FORMAT_SPECIFICATION_FSTRING_MIDDLE : DQ3R_FSTRING_PART+ -> type(FSTRING_MIDDLE); + DQ3R_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class + DQ3R_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to DQ3R_FSTRING_MODE by PythonLexerBase class /* * fragments */ -fragment IGNORE: 'ignore'; - -// https://docs.python.org/3.12/reference/lexical_analysis.html#literals - -// https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals +// https://docs.python.org/3.13/reference/lexical_analysis.html#literals +// +// https://docs.python.org/3.13/reference/lexical_analysis.html#string-and-bytes-literals fragment STRING_LITERAL : STRING_PREFIX? (SHORT_STRING | LONG_STRING); -fragment STRING_PREFIX : 'r' | 'u' | 'R' | 'U'; +fragment STRING_PREFIX options { caseInsensitive=true; } : 'r' | 'u'; // 'r' | 'u' | 'R' | 'U' fragment SHORT_STRING - : '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\'' - | '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"' + : ['] SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* ['] + | ["] SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* ["] ; fragment LONG_STRING - : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' - | '"""' LONG_STRING_ITEM*? '"""' + : ['][']['] LONG__STRING_ITEM*? ['][']['] // nongreede + | ["]["]["] LONG__STRING_ITEM*? ["]["]["] // nongreede ; +// https://docs.python.org/3/faq/design.html#why-can-t-raw-strings-r-strings-end-with-a-backslash fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE : SHORT_STRING_CHAR_NO_SINGLE_QUOTE | STRING_ESCAPE_SEQ; fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE | STRING_ESCAPE_SEQ; - -fragment LONG_STRING_ITEM : LONG_STRING_CHAR | STRING_ESCAPE_SEQ; +fragment LONG__STRING_ITEM : LONG__STRING_CHAR | STRING_ESCAPE_SEQ; fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // +fragment LONG__STRING_CHAR : ~[\\]; // -fragment LONG_STRING_CHAR : ~'\\'; // - -fragment STRING_ESCAPE_SEQ // https://docs.python.org/3/reference/lexical_analysis.html#escape-sequences - : '\\' '\r' '\n' // for the two-character Windows line break: \ escape sequence (string literal line continuation) - | '\\' . // "\" - ; +// https://docs.python.org/3/reference/lexical_analysis.html#escape-sequences +fragment STRING_ESCAPE_SEQ : ESCAPE_SEQ_NEWLINE | '\\' .; // "\" +// https://docs.python.org/3.13/reference/lexical_analysis.html#string-and-bytes-literals fragment BYTES_LITERAL : BYTES_PREFIX (SHORT_BYTES | LONG_BYTES); -fragment BYTES_PREFIX : 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR' | 'rb' | 'rB' | 'Rb' | 'RB'; +fragment BYTES_PREFIX options { caseInsensitive=true; } : 'b' | 'br' | 'rb'; // 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR' | 'rb' | 'rB' | 'Rb' | 'RB' fragment SHORT_BYTES - : '\'' SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE* '\'' - | '"' SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE* '"' + : ['] SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE* ['] + | ["] SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE* ["] ; fragment LONG_BYTES - : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' - | '"""' LONG_BYTES_ITEM*? '"""' + : ['][']['] LONG_BYTES_ITEM*? ['][']['] // nongreede + | ["]["]["] LONG_BYTES_ITEM*? ["]["]["] // nongreede ; -fragment SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE : SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ; -fragment SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE : SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ; +fragment SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE : SHORT_SINGLE_QUOTED_BYTES_CHAR | BYTES_ESCAPE_SEQ; +fragment SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE : SHORT_DOUBLE_QUOTED_BYTES_CHAR | BYTES_ESCAPE_SEQ; -fragment LONG_BYTES_ITEM : LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ; +fragment LONG_BYTES_ITEM : LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ; -fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE // +fragment SHORT_SINGLE_QUOTED_BYTES_CHAR // : [\u0000-\u0009] | [\u000B-\u000C] | [\u000E-\u0026] @@ -267,7 +333,7 @@ fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE // +fragment SHORT_DOUBLE_QUOTED_BYTES_CHAR // : [\u0000-\u0009] | [\u000B-\u000C] | [\u000E-\u0021] @@ -278,17 +344,57 @@ fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE // fragment BYTES_ESCAPE_SEQ : '\\' [\u0000-\u007F]; // "\" -// https://docs.python.org/3.12/library/string.html#format-specification-mini-language -fragment SINGLE_QUOTE_FSTRING_LITERAL : (FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE | DOUBLE_BRACES)+; -fragment DOUBLE_QUOTE_FSTRING_LITERAL : (FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE | DOUBLE_BRACES)+; +// https://docs.python.org/3.13/reference/lexical_analysis.html#formatted-string-literals +// https://docs.python.org/3.13/library/string.html#format-specification-mini-language +// 'f' | 'F' | 'fr' | 'Fr' | 'fR' | 'FR' | 'rf' | 'rF' | 'Rf' | 'RF' +fragment FSTRING_PREFIX options { caseInsensitive=true; } : 'f' | 'fr' | 'rf'; + +fragment SQ1__FSTRING_ITEM : (SQ1__FSTRING_PART+ TERMINATING_FSTRING_MIDDLE?) | TERMINATING_FSTRING_MIDDLE; +fragment DQ1__FSTRING_ITEM : (DQ1__FSTRING_PART+ TERMINATING_FSTRING_MIDDLE?) | TERMINATING_FSTRING_MIDDLE; +fragment SQ3__FSTRING_ITEM : (SQ3__FSTRING_PART+ TERMINATING_SQ3__FSTRING_MIDDLE?) | TERMINATING_SQ3__FSTRING_MIDDLE; +fragment DQ3__FSTRING_ITEM : (DQ3__FSTRING_PART+ TERMINATING_DQ3__FSTRING_MIDDLE?) | TERMINATING_DQ3__FSTRING_MIDDLE; + +fragment SQ1R_FSTRING_ITEM : (SQ1R_FSTRING_PART+ TERMINATING_FSTRING_MIDDLE_RAW?) | TERMINATING_FSTRING_MIDDLE_RAW; +fragment DQ1R_FSTRING_ITEM : (DQ1R_FSTRING_PART+ TERMINATING_FSTRING_MIDDLE_RAW?) | TERMINATING_FSTRING_MIDDLE_RAW; +fragment SQ3R_FSTRING_ITEM : (SQ3R_FSTRING_PART+ TERMINATING_SQ3R_FSTRING_MIDDLE?) | TERMINATING_SQ3R_FSTRING_MIDDLE; +fragment DQ3R_FSTRING_ITEM : (DQ3R_FSTRING_PART+ TERMINATING_DQ3R_FSTRING_MIDDLE?) | TERMINATING_DQ3R_FSTRING_MIDDLE; + +fragment SQ1__FSTRING_PART : SQ1_FSTRING_CHAR | FSTRING_ESCAPE_SEQ; +fragment DQ1__FSTRING_PART : DQ1_FSTRING_CHAR | FSTRING_ESCAPE_SEQ; +fragment SQ3__FSTRING_PART : ONE_OR_TWO_SQUOTE? (SQ3_FSTRING_CHAR | FSTRING_ESCAPE_SEQ); +fragment DQ3__FSTRING_PART : ONE_OR_TWO_DQUOTE? (DQ3_FSTRING_CHAR | FSTRING_ESCAPE_SEQ); + +fragment SQ1R_FSTRING_PART : SQ1_FSTRING_CHAR | FSTRING_ESCAPE_SEQ_RAW; +fragment DQ1R_FSTRING_PART : DQ1_FSTRING_CHAR | FSTRING_ESCAPE_SEQ_RAW; +fragment SQ3R_FSTRING_PART : ONE_OR_TWO_SQUOTE? (SQ3_FSTRING_CHAR | FSTRING_ESCAPE_SEQ_RAW); +fragment DQ3R_FSTRING_PART : ONE_OR_TWO_DQUOTE? (DQ3_FSTRING_CHAR | FSTRING_ESCAPE_SEQ_RAW); + +fragment SQ1_FSTRING_CHAR : ~[\\{}'\r\n]; // +fragment DQ1_FSTRING_CHAR : ~[\\{}"\r\n]; // +fragment SQ3_FSTRING_CHAR : ~[\\{}']; // +fragment DQ3_FSTRING_CHAR : ~[\\{}"]; // + +fragment TERMINATING_SQ3__FSTRING_MIDDLE : ONE_OR_TWO_SQUOTE '{' | ONE_OR_TWO_SQUOTE? TERMINATING_FSTRING_MIDDLE; +fragment TERMINATING_DQ3__FSTRING_MIDDLE : ONE_OR_TWO_DQUOTE '{' | ONE_OR_TWO_DQUOTE? TERMINATING_FSTRING_MIDDLE; +fragment TERMINATING_SQ3R_FSTRING_MIDDLE : ONE_OR_TWO_SQUOTE '{' | ONE_OR_TWO_SQUOTE? TERMINATING_FSTRING_MIDDLE_RAW; +fragment TERMINATING_DQ3R_FSTRING_MIDDLE : ONE_OR_TWO_DQUOTE '{' | ONE_OR_TWO_DQUOTE? TERMINATING_FSTRING_MIDDLE_RAW; -// https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals -fragment F_STRING_PREFIX : 'f' | 'F' | 'fr' | 'Fr' | 'fR' | 'FR' | 'rf' | 'rF' | 'Rf' | 'RF'; -fragment FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE : ~[{}']; -fragment FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE : ~[{}"]; -fragment DOUBLE_BRACES : '{{' | '}}'; +fragment TERMINATING_FSTRING_MIDDLE : '\\'? DOUBLE_BRACE | '\\{' | ESCAPE_SEQ_NAMED_CHAR; +fragment TERMINATING_FSTRING_MIDDLE_RAW : '\\'? DOUBLE_BRACE | '\\{' ; // https://docs.python.org/3/faq/design.html#why-can-t-raw-strings-r-strings-end-with-a-backslash -// https://docs.python.org/3.12/reference/lexical_analysis.html#integer-literals +fragment FSTRING_ESCAPE_SEQ : ESCAPE_SEQ_NEWLINE | '\\' ~[{}N]; // f"\\}" causes a lexer error +fragment FSTRING_ESCAPE_SEQ_RAW : ESCAPE_SEQ_NEWLINE | '\\' ~[{}]; // fr"\}" causes a lexer error + +fragment ONE_OR_TWO_SQUOTE : ['][']?; +fragment ONE_OR_TWO_DQUOTE : ["]["]?; +fragment DOUBLE_BRACE : '{{' | '}}'; // will be replaced to single brace in PythonLexerBase class + +fragment ESCAPE_SEQ_NAMED_CHAR : '\\N{' .*? '}'; // an escape sequence for a character by a name from the Unicode database +fragment ESCAPE_SEQ_NEWLINE : BACKSLASH_NEWLINE; // it is a kind of line continuation for string literals (backslash and newline will be ignored) + +fragment BACKSLASH_NEWLINE : '\\' NEWLINE; + +// https://docs.python.org/3.13/reference/lexical_analysis.html#integer-literals fragment INTEGER : DEC_INTEGER | BIN_INTEGER | OCT_INTEGER | HEX_INTEGER; fragment DEC_INTEGER : NON_ZERO_DIGIT ('_'? DIGIT)* | '0'+ ('_'? '0')*; fragment BIN_INTEGER : '0' ('b' | 'B') ('_'? BIN_DIGIT)+; @@ -300,7 +406,7 @@ fragment BIN_DIGIT : '0' | '1'; fragment OCT_DIGIT : [0-7]; fragment HEX_DIGIT : DIGIT | [a-f] | [A-F]; -// https://docs.python.org/3.12/reference/lexical_analysis.html#floating-point-literals +// https://docs.python.org/3.13/reference/lexical_analysis.html#floating-point-literals fragment FLOAT_NUMBER : POINT_FLOAT | EXPONENT_FLOAT; fragment POINT_FLOAT : DIGIT_PART? FRACTION | DIGIT_PART '.'; fragment EXPONENT_FLOAT : (DIGIT_PART | POINT_FLOAT) EXPONENT; @@ -308,12 +414,12 @@ fragment DIGIT_PART : DIGIT ('_'? DIGIT)*; fragment FRACTION : '.' DIGIT_PART; fragment EXPONENT : ('e' | 'E') ('+' | '-')? DIGIT_PART; -// https://docs.python.org/3.12/reference/lexical_analysis.html#imaginary-literals +// https://docs.python.org/3.13/reference/lexical_analysis.html#imaginary-literals fragment IMAG_NUMBER : (FLOAT_NUMBER | DIGIT_PART) ('j' | 'J'); -// https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers -fragment ID_CONTINUE: - ID_START +// https://github.com/RobEin/ANTLR4-parser-for-Python-3.13/tree/main/valid_chars_in_py_identifiers +fragment ID_CONTINUE + : ID_START | '\u{0030}' .. '\u{0039}' | '\u{00B7}' | '\u{0300}' .. '\u{036F}' @@ -490,6 +596,7 @@ fragment ID_CONTINUE: | '\u{1CF4}' | '\u{1CF7}' .. '\u{1CF9}' | '\u{1DC0}' .. '\u{1DFF}' + | '\u{200C}' .. '\u{200D}' | '\u{203F}' .. '\u{2040}' | '\u{2054}' | '\u{20D0}' .. '\u{20DC}' @@ -500,6 +607,7 @@ fragment ID_CONTINUE: | '\u{2DE0}' .. '\u{2DFF}' | '\u{302A}' .. '\u{302F}' | '\u{3099}' .. '\u{309A}' + | '\u{30FB}' | '\u{A620}' .. '\u{A629}' | '\u{A66F}' | '\u{A674}' .. '\u{A67D}' @@ -544,6 +652,7 @@ fragment ID_CONTINUE: | '\u{FE4D}' .. '\u{FE4F}' | '\u{FF10}' .. '\u{FF19}' | '\u{FF3F}' + | '\u{FF65}' | '\u{FF9E}' .. '\u{FF9F}' | '\u{101FD}' | '\u{102E0}' @@ -688,11 +797,10 @@ fragment ID_CONTINUE: | '\u{1E950}' .. '\u{1E959}' | '\u{1FBF0}' .. '\u{1FBF9}' | '\u{E0100}' .. '\u{E01EF}' -; + ; -// https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers -fragment ID_START: - '\u{0041}' .. '\u{005A}' +fragment ID_START + : '\u{0041}' .. '\u{005A}' | '\u{005F}' | '\u{0061}' .. '\u{007A}' | '\u{00AA}' @@ -1356,7 +1464,8 @@ fragment ID_START: | '\u{2B740}' .. '\u{2B81D}' | '\u{2B820}' .. '\u{2CEA1}' | '\u{2CEB0}' .. '\u{2EBE0}' + | '\u{2EBF0}' .. '\u{2EE5D}' | '\u{2F800}' .. '\u{2FA1D}' | '\u{30000}' .. '\u{3134A}' | '\u{31350}' .. '\u{323AF}' -; \ No newline at end of file + ; diff --git a/python/python3_12/PythonParser.g4 b/python/python3_13/PythonParser.g4 similarity index 85% rename from python/python3_12/PythonParser.g4 rename to python/python3_13/PythonParser.g4 index 479ad9230c..35e43d46b2 100644 --- a/python/python3_12/PythonParser.g4 +++ b/python/python3_13/PythonParser.g4 @@ -22,21 +22,20 @@ THE SOFTWARE. /* * Project : an ANTLR4 parser grammar by the official PEG grammar - * https://github.com/RobEin/ANTLR4-parser-for-Python-3.12 + * https://github.com/RobEin/ANTLR4-parser-for-Python-3.13 * Developed by : Robert Einhorn * */ - /* - * Contributors : - * [Willie Shen](https://github.com/Willie169) : Fix that `case [a, *_] if a == 0:` throws error `rule soft_kw__not__wildcard failed predicate: {this.isnotEqualToCurrentTokenText("_")}?` + /* + * Contributors : [Willie Shen](https://github.com/Willie169) */ -parser grammar PythonParser; // Python 3.12.6 https://docs.python.org/3.12/reference/grammar.html#full-grammar-specification -options { - tokenVocab=PythonLexer; - superClass=PythonParserBase; -} +// Python 3.13.1 https://docs.python.org/3.13/reference/grammar.html#full-grammar-specification + +parser grammar PythonParser; + +options { tokenVocab=PythonLexer; } // STARTING RULES // ============== @@ -45,7 +44,6 @@ file_input: statements? EOF; interactive: statement_newline; eval: expressions NEWLINE* EOF; func_type: '(' type_expressions? ')' '->' expression NEWLINE* EOF; -fstring_input: star_expressions; // GENERAL STATEMENTS // ================== @@ -97,7 +95,7 @@ compound_stmt // NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield' assignment - : NAME ':' expression ('=' annotated_rhs )? + : name ':' expression ('=' annotated_rhs )? | ('(' single_target ')' | single_subscript_attribute_target) ':' expression ('=' annotated_rhs )? | (star_targets '=' )+ (yield_expr | star_expressions) TYPE_COMMENT? @@ -127,9 +125,9 @@ raise_stmt : 'raise' (expression ('from' expression )?)? ; -global_stmt: 'global' NAME (',' NAME)*; +global_stmt: 'global' name (',' name)*; -nonlocal_stmt: 'nonlocal' NAME (',' NAME)*; +nonlocal_stmt: 'nonlocal' name (',' name)*; del_stmt : 'del' del_targets; @@ -157,14 +155,14 @@ import_from_targets import_from_as_names : import_from_as_name (',' import_from_as_name)*; import_from_as_name - : NAME ('as' NAME )?; + : name ('as' name )?; dotted_as_names : dotted_as_name (',' dotted_as_name)*; dotted_as_name - : dotted_name ('as' NAME )?; + : dotted_name ('as' name )?; dotted_name - : dotted_name '.' NAME - | NAME; + : dotted_name '.' name + | name; // COMPOUND STATEMENTS // =================== @@ -186,7 +184,7 @@ class_def | class_def_raw; class_def_raw - : 'class' NAME type_params? ('(' arguments? ')' )? ':' block; + : 'class' name type_params? ('(' arguments? ')' )? ':' block; // Function definitions // -------------------- @@ -196,8 +194,8 @@ function_def | function_def_raw; function_def_raw - : 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block - | ASYNC 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block; + : 'def' name type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block + | 'async' 'def' name type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block; // Function parameters // ------------------- @@ -256,8 +254,8 @@ param_with_default param_maybe_default : param default_assignment? ','? TYPE_COMMENT? ; -param: NAME annotation?; -param_star_annotation: NAME star_annotation; +param: name annotation?; +param_star_annotation: name star_annotation; annotation: ':' expression; star_annotation: ':' star_expression; default_assignment: '=' expression; @@ -284,16 +282,16 @@ while_stmt // ------------- for_stmt - : ASYNC? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block? + : 'async'? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block? ; // With statement // -------------- with_stmt - : ASYNC? 'with' ( '(' with_item (',' with_item)* ','? ')' ':' - | with_item (',' with_item)* ':' TYPE_COMMENT? - ) block + : 'with' '(' with_item (',' with_item)* ','? ')' ':' TYPE_COMMENT? block + | 'async' 'with' '(' with_item (',' with_item)* ','? ')' ':' block + | 'async'? 'with' with_item (',' with_item)* ':' TYPE_COMMENT? block ; with_item @@ -313,10 +311,10 @@ try_stmt // ---------------- except_block - : 'except' (expression ('as' NAME )?)? ':' block + : 'except' (expression ('as' name )?)? ':' block ; except_star_block - : 'except' '*' expression ('as' NAME )? ':' block; + : 'except' '*' expression ('as' name )? ':' block; finally_block : 'finally' ':' block; @@ -324,14 +322,14 @@ finally_block // --------------- match_stmt - : soft_kw_match subject_expr ':' NEWLINE INDENT case_block+ DEDENT; + : 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT; subject_expr : star_named_expression ',' star_named_expressions? | named_expression; case_block - : soft_kw_case patterns guard? ':' block; + : 'case' patterns guard? ':' block; guard: 'if' named_expression; @@ -399,19 +397,19 @@ capture_pattern : pattern_capture_target; pattern_capture_target - : soft_kw__not__wildcard; + : name_except_underscore; wildcard_pattern - : soft_kw_wildcard; + : '_'; value_pattern : attr; attr - : NAME ('.' NAME)+ + : name ('.' name)+ ; name_or_attr - : NAME ('.' NAME)* + : name ('.' name)* ; group_pattern @@ -432,7 +430,8 @@ maybe_star_pattern | pattern; star_pattern - : '*' NAME; + : '*' name + ; mapping_pattern : LBRACE RBRACE @@ -462,13 +461,13 @@ keyword_patterns : keyword_pattern (',' keyword_pattern)*; keyword_pattern - : NAME '=' pattern; + : name '=' pattern; // Type statement // --------------- type_alias - : soft_kw_type NAME type_params? '=' expression; + : 'type' name type_params? '=' expression; // Type parameter declaration // -------------------------- @@ -478,13 +477,15 @@ type_params: '[' type_param_seq ']'; type_param_seq: type_param (',' type_param)* ','?; type_param - : NAME type_param_bound? - | '*' NAME - | '**' NAME + : name type_param_bound? type_param_default? + | '*' name type_param_starred_default? + | '**' name type_param_default? ; type_param_bound: ':' expression; +type_param_default: '=' expression; +type_param_starred_default: '=' star_expression; // EXPRESSIONS // ----------- @@ -519,7 +520,7 @@ star_named_expression | named_expression; assignment_expression - : NAME ':=' expression; + : name ':=' expression; named_expression : assignment_expression @@ -620,11 +621,11 @@ power // Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ... await_primary - : AWAIT primary + : 'await' primary | primary; primary - : primary ('.' NAME | genexp | '(' arguments? ')' | '[' slices ']') + : primary ('.' name | genexp | '(' arguments? ')' | '[' slices ']') | atom ; @@ -639,7 +640,7 @@ slice | named_expression; atom - : NAME + : name | 'True' | 'False' | 'None' @@ -698,7 +699,7 @@ lambda_param_with_default lambda_param_maybe_default : lambda_param default_assignment? ','? ; -lambda_param: NAME; +lambda_param: name; // LITERALS // ======== @@ -707,9 +708,9 @@ fstring_middle : fstring_replacement_field | FSTRING_MIDDLE; fstring_replacement_field - : LBRACE (yield_expr | star_expressions) '='? fstring_conversion? fstring_full_format_spec? RBRACE; + : LBRACE annotated_rhs '='? fstring_conversion? fstring_full_format_spec? RBRACE; fstring_conversion - : '!' NAME; + : '!' name; fstring_full_format_spec : ':' fstring_format_spec*; fstring_format_spec @@ -750,7 +751,7 @@ for_if_clauses : for_if_clause+; for_if_clause - : ASYNC? 'for' star_targets 'in' disjunction ('if' disjunction )* + : 'async'? 'for' star_targets 'in' disjunction ('if' disjunction )* ; listcomp @@ -784,11 +785,11 @@ starred_expression : '*' expression; kwarg_or_starred - : NAME '=' expression + : name '=' expression | starred_expression; kwarg_or_double_starred - : NAME '=' expression + : name '=' expression | '**' expression; // ASSIGNMENT TARGETS @@ -802,7 +803,7 @@ star_targets : star_target (',' star_target )* ','? ; -star_targets_list_seq: star_target (',' star_target)+ ','?; +star_targets_list_seq: star_target (',' star_target)* ','?; star_targets_tuple_seq : star_target (',' | (',' star_target )+ ','?) @@ -813,27 +814,27 @@ star_target | target_with_star_atom; target_with_star_atom - : t_primary ('.' NAME | '[' slices ']') + : t_primary ('.' name | '[' slices ']') | star_atom ; star_atom - : NAME + : name | '(' target_with_star_atom ')' | '(' star_targets_tuple_seq? ')' | '[' star_targets_list_seq? ']'; single_target : single_subscript_attribute_target - | NAME + | name | '(' single_target ')'; single_subscript_attribute_target - : t_primary ('.' NAME | '[' slices ']') + : t_primary ('.' name | '[' slices ']') ; t_primary - : t_primary ('.' NAME | '[' slices ']' | genexp | '(' arguments? ')') + : t_primary ('.' name | '[' slices ']' | genexp | '(' arguments? ')') | atom ; @@ -847,12 +848,12 @@ t_primary del_targets: del_target (',' del_target)* ','?; del_target - : t_primary ('.' NAME | '[' slices ']') + : t_primary ('.' name | '[' slices ']') | del_t_atom ; del_t_atom - : NAME + : name | '(' del_target ')' | '(' del_targets? ')' | '[' del_targets? ']'; @@ -874,11 +875,15 @@ func_type_comment : NEWLINE TYPE_COMMENT // Must be followed by indented block | TYPE_COMMENT; -// *** Soft Keywords: https://docs.python.org/3.12/reference/lexical_analysis.html#soft-keywords -soft_kw_type: {this.isEqualToCurrentTokenText("type")}? NAME; -soft_kw_match: {this.isEqualToCurrentTokenText("match")}? NAME; -soft_kw_case: {this.isEqualToCurrentTokenText("case")}? NAME; -soft_kw_wildcard: {this.isEqualToCurrentTokenText("_")}? NAME; -soft_kw__not__wildcard: {this.isnotEqualToCurrentTokenText("_")}? NAME; +// *** related to soft keywords: https://docs.python.org/3.13/reference/lexical_analysis.html#soft-keywords +name_except_underscore + : NAME // ***** The NAME token can be used only in this rule ***** + | NAME_OR_TYPE + | NAME_OR_MATCH + | NAME_OR_CASE + ; + +// ***** Always use name rule instead of NAME token in this grammar ***** +name: NAME_OR_WILDCARD | name_except_underscore; // ========================= END OF THE GRAMMAR =========================== diff --git a/python/python3_13/README.md b/python/python3_13/README.md new file mode 100644 index 0000000000..3f02d91e4f --- /dev/null +++ b/python/python3_13/README.md @@ -0,0 +1,37 @@ +# Python 3.13.2 parser + +### About files: +- PythonParser.g4 is the ANTLR4 parser grammar that based on the official [Python PEG grammar](https://docs.python.org/3.13/reference/grammar.html) + +- PythonLexerBase class + - handles the Python indentations + - creates encoding token + - tokenizes fstring literals + - and manage many other things + +- Example files from: [Python 3.13 Standard Lib](https://github.com/python/cpython/tree/3.13/Lib)

+ +### Recent changes: +- parser grammar update for Python 3.13.2 +- added ENCODING token +- complete rewrite of fstring tokenizer in lexer grammar and PythonLexerBase class + - now correctly tokenizes the followings in fstring: + - escape sequences + - walrus operator + - dictionary comprehension + - set comprehension +- soft keywords changes: + - no embedded code (semantic predicates) in parser grammar for soft keywords + - no need for PythonParserBase class + - no need for transformGrammar.py + - **BREAKING CHANGES**: + - dedicated tokens for soft keywords instead of NAME token: + - NAME_OR_TYPE + - NAME_OR_MATCH + - NAME_OR_CASE + - NAME_OR_WILDCARD + +#### [Previous changes](https://github.com/antlr/grammars-v4/tree/master/python/python3_13)

+ +### Related link: +[ANTLR4-parser-for-Python-3.13](https://github.com/RobEin/ANTLR4-parser-for-Python-3.13) \ No newline at end of file diff --git a/python/python3_13/TypeScript/PythonLexerBase.ts b/python/python3_13/TypeScript/PythonLexerBase.ts new file mode 100644 index 0000000000..5ba9b2062e --- /dev/null +++ b/python/python3_13/TypeScript/PythonLexerBase.ts @@ -0,0 +1,677 @@ +/* +The MIT License (MIT) +Copyright (c) 2021 Robert Einhorn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + +/* + * + * Project : Python Indent/Dedent handler for ANTLR4 grammars + * + * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com + * + */ + +import { CharStream, CharStreams, CommonTokenStream, Token, CommonToken, Lexer, TokenStream } from "antlr4"; +import PythonLexer from "./PythonLexer"; +import PythonParser from "./PythonParser"; +import * as Collections from "typescript-collections"; + +export default abstract class PythonLexerBase extends Lexer { + // A stack that keeps track of the indentation lengths + private indentLengthStack!: Collections.Stack; + // A list where tokens are waiting to be loaded into the token stream + private pendingTokens!: Array; + + // last pending token types + private previousPendingTokenType!: number; + private lastPendingTokenTypeFromDefaultChannel!: number; + + // The amount of opened parentheses, square brackets or curly braces + private opened!: number; + // The amount of opened parentheses and square brackets in the current lexer mode + private paren_or_bracket_openedStack!: Array; + // A stack that stores expression(s) between braces in fstring + private braceExpressionStack!: Array; + private prevBraceExpression!: string; + + // Instead of this._mode (_mode is not implemented in each ANTLR4 runtime) + private curLexerMode!: number; + // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime) + private lexerModeStack!: Array; + + private wasSpaceIndentation!: boolean; + private wasTabIndentation!: boolean; + private wasIndentationMixedWithSpacesAndTabs!: boolean; + + private curToken: Token | undefined; // current (under processing) token + private ffgToken: Token | undefined; // following (look ahead) token + + private readonly INVALID_LENGTH: number = -1; + private readonly ERR_TXT: string = " ERROR: "; + + protected constructor(input: CharStream) { + super(input); + this.init(); + } + + public nextToken(): Token { // reading the input stream until a return EOF + this.checkNextToken(); + return this.pendingTokens.shift()! /* .pollFirst() */; // add the queued token to the token stream + } + + public reset(): void { + this.init(); + super.reset(); + } + + private init(): void { + this.indentLengthStack = new Collections.Stack(); + this.pendingTokens = []; + this.previousPendingTokenType = 0; + this.lastPendingTokenTypeFromDefaultChannel = 0; + this.opened = 0; + this.paren_or_bracket_openedStack = []; + this.braceExpressionStack = []; + this.prevBraceExpression = ""; + this.curLexerMode = 0; + this.lexerModeStack = []; + this.wasSpaceIndentation = false; + this.wasTabIndentation = false; + this.wasIndentationMixedWithSpacesAndTabs = false; + this.curToken = undefined; + this.ffgToken = undefined; + } + + private checkNextToken(): void { + if (this.previousPendingTokenType == PythonLexer.EOF) + return; + + if (this.indentLengthStack.isEmpty()) { // We're at the first token + this.insertENCODINGtoken(); + this.setCurrentAndFollowingTokens(); + this.handleStartOfInput(); + } else { + this.setCurrentAndFollowingTokens(); + } + + switch (this.curToken!.type) { + case PythonLexer.NEWLINE: + this.handleNEWLINEtoken(); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + case PythonLexer.LBRACE: + this.opened++; + this.addPendingToken(this.curToken!); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + case PythonLexer.RBRACE: + this.opened--; + this.addPendingToken(this.curToken!); + break; + case PythonLexer.FSTRING_MIDDLE: + this.handleFSTRING_MIDDLEtokenWithDoubleBrace(); // does not affect the opened field + this.addPendingToken(this.curToken!); + break; + case PythonLexer.COLONEQUAL: + this.handleCOLONEQUALtokenInFString(); + break; + case PythonLexer.ERRORTOKEN: + this.reportLexerError(`token recognition error at: '${this.curToken!.text}'`); + this.addPendingToken(this.curToken!); + break; + case PythonLexer.EOF: + this.handleEOFtoken(); + break; + default: + this.addPendingToken(this.curToken!); + } + this.handleFORMAT_SPECIFICATION_MODE(); + } + + private setCurrentAndFollowingTokens(): void { + this.curToken = this.ffgToken == undefined + ? super.nextToken() + : this.ffgToken; + + this.checkCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)! + + this.ffgToken = this.curToken.type === PythonLexer.EOF + ? this.curToken + : super.nextToken(); + } + + private insertENCODINGtoken(): void { // https://peps.python.org/pep-0263/ + let lineBuilder: string = ''; + let encodingName: string = ''; + let lineCount: number = 0; + const ws_commentPattern: RegExp = /^[ \t\f]*(#.*)?$/; + const charStream: CharStream = this._input; + const size: number = charStream.size; + + charStream.seek(0); + for (let i = 0; i < size; i++) { + const c: string = String.fromCharCode(charStream.LA(i + 1)); + lineBuilder += c; + + if (c === '\n' || i === size - 1) { + const line: string = lineBuilder.replace(/\r/g, '').replace(/\n/g, ''); + if (ws_commentPattern.test(line)) { // WS* + COMMENT? found + encodingName = this.getEncodingName(line); + if (encodingName !== '') { + break; // encoding found + } + } else { + break; // statement or backslash found (line is not empty, not whitespace(s), not comment) + } + + lineCount++; + if (lineCount >= 2) { + break; // check only the first two lines + } + lineBuilder = ''; + } + } + + if (encodingName === '') { + encodingName = 'utf-8'; // default Python source code encoding + } + + const encodingToken = new CommonToken([this, this._input], PythonLexer.ENCODING, Token.HIDDEN_CHANNEL, 0, 0); + encodingToken.text = encodingName; + encodingToken.line = 0; + encodingToken.column = -1; + this.addPendingToken(encodingToken); + } + + private getEncodingName(commentText: string): string { // https://peps.python.org/pep-0263/#defining-the-encoding + const encodingCommentPattern: RegExp = /^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)/; + const match: RegExpMatchArray | null = commentText.match(encodingCommentPattern); + return match ? match[1] : ''; + } + + // initialize the indentLengthStack + // hide the leading NEWLINE token(s) + // if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel + // insert a leading INDENT token if necessary + private handleStartOfInput(): void { + // initialize the stack with a default 0 indentation length + this.indentLengthStack.push(0); // this will never be popped off + while (this.curToken!.type !== PythonLexer.EOF) { + if (this.curToken!.channel === Token.DEFAULT_CHANNEL) { + if (this.curToken!.type === PythonLexer.NEWLINE) { + // all the NEWLINE tokens must be ignored before the first statement + this.hideAndAddPendingToken(this.curToken!); + } else { // We're at the first statement + this.insertLeadingIndentToken(); + return; // continue the processing of the current token with checkNextToken() + } + } else { + this.addPendingToken(this.curToken!); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token + } + this.setCurrentAndFollowingTokens(); + } // continue the processing of the EOF token with checkNextToken() + } + + private insertLeadingIndentToken(): void { + if (this.previousPendingTokenType === PythonLexer.WS) { + const prevToken: Token = this.pendingTokens.at(-1)!; /* .peekLast() */ // WS token + if (this.getIndentationLength(prevToken.text) !== 0) { // there is an "indentation" before the first statement + const errMsg: string = "first statement indented"; + this.reportLexerError(errMsg); + // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser + this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.curToken!); + } + } + } + + private handleNEWLINEtoken(): void { + if (this.lexerModeStack.length > 0) { + this.addPendingToken(this.curToken!); + } else if (this.opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token + this.hideAndAddPendingToken(this.curToken!); + } else { + const nlToken: Token = this.curToken?.clone()!; // save the current NEWLINE token + const isLookingAhead: boolean = this.ffgToken!.type === PythonLexer.WS; + if (isLookingAhead) { + this.setCurrentAndFollowingTokens(); // set the next two tokens + } + + switch (this.ffgToken!.type) { + case PythonLexer.NEWLINE: // We're before a blank line + case PythonLexer.COMMENT: // We're before a comment + this.hideAndAddPendingToken(nlToken); + if (isLookingAhead) { + this.addPendingToken(this.curToken!); // WS token + } + break; + default: + this.addPendingToken(nlToken); + if (isLookingAhead) { // We're on whitespace(s) followed by a statement + const indentationLength: number = this.ffgToken!.type === PythonLexer.EOF ? + 0 : + this.getIndentationLength(this.curToken!.text); + + if (indentationLength !== this.INVALID_LENGTH) { + this.addPendingToken(this.curToken!); // WS token + this.insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s) + } else { + this.reportError("inconsistent use of tabs and spaces in indentation"); + } + } else { // We're at a newline followed by a statement (there is no whitespace before the statement) + this.insertIndentOrDedentToken(0); // may insert DEDENT token(s) + } + } + } + } + + private insertIndentOrDedentToken(indentLength: number): void { + let prevIndentLength: number = this.indentLengthStack.peek()!; + if (indentLength > prevIndentLength) { + this.createAndAddPendingToken(PythonLexer.INDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken!); + this.indentLengthStack.push(indentLength); + } else { + while (indentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream + this.indentLengthStack.pop(); + prevIndentLength = this.indentLengthStack.peek()!; + if (indentLength <= prevIndentLength) { + this.createAndAddPendingToken(PythonLexer.DEDENT, Token.DEFAULT_CHANNEL, null, this.ffgToken!); + } else { + this.reportError("inconsistent dedent"); + } + } + } + } + + private checkCurToken(): void { + switch (this.curToken!.type) { + case PythonLexer.FSTRING_START: + this.setLexerModeByFSTRING_STARTtoken(); + return; + case PythonLexer.FSTRING_MIDDLE: + this.handleFSTRING_MIDDLEtokenWithQuoteAndLBrace(); // affect the opened field + if (this.curToken!.type === PythonLexer.FSTRING_MIDDLE) { + return; + } + break; + case PythonLexer.FSTRING_END: + this.popLexerMode(); + return; + default: + if (this.lexerModeStack.length === 0) { + return; + } + } + + switch (this.curToken!.type) { // the following tokens can only come from default mode (after an LBRACE in fstring) + case PythonLexer.NEWLINE: + // append the current brace expression with the current newline + this.appendToBraceExpression(this.curToken!.text); + this.curToken!.channel = Token.HIDDEN_CHANNEL; + break; + case PythonLexer.LBRACE: + // the outermost brace expression cannot be a dictionary comprehension or a set comprehension + this.braceExpressionStack.push("{"); + this.paren_or_bracket_openedStack.push(0); + this.pushLexerMode(Lexer.DEFAULT_MODE); + break; + case PythonLexer.LPAR: + case PythonLexer.LSQB: + // append the current brace expression with a "(" or a "[" + this.appendToBraceExpression(this.curToken!.text); + // https://peps.python.org/pep-0498/#lambdas-inside-expressions + this.incrementBraceStack(); + break; + case PythonLexer.RPAR: + case PythonLexer.RSQB: + // append the current brace expression with a ")" or a "]" + this.appendToBraceExpression(this.curToken!.text); + this.decrementBraceStack(); + break; + case PythonLexer.COLON: + case PythonLexer.COLONEQUAL: + // append the current brace expression with a ":" or a ":=" + this.appendToBraceExpression(this.curToken!.text); + this.setLexerModeByCOLONorCOLONEQUALtoken(); + break; + case PythonLexer.RBRACE: + this.setLexerModeAfterRBRACEtoken(); + break; + default: + // append the current brace expression with the current token text + this.appendToBraceExpression(this.curToken!.text); + } + } + + private appendToBraceExpression(text: string): void { + this.braceExpressionStack[this.braceExpressionStack.length - 1] += text; + } + + private incrementBraceStack(): void { // increment the last element (peek() + 1) + this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1]++; + } + + private decrementBraceStack(): void { // decrement the last element (peek() - 1) + this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1]--; + } + + private setLexerModeAfterRBRACEtoken(): void { + switch (this.curLexerMode) { + case Lexer.DEFAULT_MODE: + this.popLexerMode(); + this.popByBRACE(); + break; + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.popLexerMode(); + this.popLexerMode(); + this.popByBRACE(); + break; + default: + this.reportLexerError("f-string: single '}' is not allowed"); + } + } + + private setLexerModeByFSTRING_STARTtoken(): void { + const text = this.curToken!.text.toLowerCase(); + const modeMap: { [key: string]: number } = { + "f'": PythonLexer.SQ1__FSTRING_MODE, + "rf'": PythonLexer.SQ1R_FSTRING_MODE, + "fr'": PythonLexer.SQ1R_FSTRING_MODE, + 'f"': PythonLexer.DQ1__FSTRING_MODE, + 'rf"': PythonLexer.DQ1R_FSTRING_MODE, + 'fr"': PythonLexer.DQ1R_FSTRING_MODE, + "f'''": PythonLexer.SQ3__FSTRING_MODE, + "rf'''": PythonLexer.SQ3R_FSTRING_MODE, + "fr'''": PythonLexer.SQ3R_FSTRING_MODE, + 'f"""': PythonLexer.DQ3__FSTRING_MODE, + 'rf"""': PythonLexer.DQ3R_FSTRING_MODE, + 'fr"""': PythonLexer.DQ3R_FSTRING_MODE, + }; + const mode = modeMap[text]; + if (mode !== undefined) { + this.pushLexerMode(mode); + } + } + + private setLexerModeByCOLONorCOLONEQUALtoken(): void { + if (this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1] === 0) { // stack peek == 0 + const previousMode = this.lexerModeStack[this.lexerModeStack.length - 1]; // stack peek + switch (previousMode) { // check the previous lexer mode (the current is DEFAULT_MODE) + case PythonLexer.SQ1__FSTRING_MODE: + case PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ1R_FSTRING_MODE: + case PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1__FSTRING_MODE: + case PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ1R_FSTRING_MODE: + case PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3__FSTRING_MODE: + case PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.SQ3R_FSTRING_MODE: + case PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.SQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3__FSTRING_MODE: + case PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + case PythonLexer.DQ3R_FSTRING_MODE: + case PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE: + this.pushLexerMode(PythonLexer.DQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode + break; + } + } + } + + private popByBRACE(): void { + this.paren_or_bracket_openedStack.pop(); + this.prevBraceExpression = this.braceExpressionStack.pop() + "}"; + if (this.braceExpressionStack.length > 0) { + // append the current brace expression with the previous brace expression + this.braceExpressionStack[this.braceExpressionStack.length - 1] += this.prevBraceExpression; + } + } + + private handleFSTRING_MIDDLEtokenWithDoubleBrace(): void { + // Replace the trailing double brace with a single brace and insert a hidden brace token + switch (this.getLastTwoCharsOfTheCurTokenText()) { + case "{{": + this.trimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", Token.HIDDEN_CHANNEL); + break; + case "}}": + this.trimLastCharAddPendingTokenSetCurToken(PythonLexer.RBRACE, "}", Token.HIDDEN_CHANNEL); + break; + } + } + + private handleFSTRING_MIDDLEtokenWithQuoteAndLBrace(): void { + // Replace the trailing quote + left_brace with a quote and insert an LBRACE token + // Replace the trailing backslash + left_brace with a backslash and insert an LBRACE token + switch (this.getLastTwoCharsOfTheCurTokenText()) { + case "\"{": + case "'{": + case "\\{": + this.trimLastCharAddPendingTokenSetCurToken(PythonLexer.LBRACE, "{", Token.DEFAULT_CHANNEL); + break; + } + } + + private getLastTwoCharsOfTheCurTokenText(): string { + return this.curToken!.text.slice(-2); + } + + private trimLastCharAddPendingTokenSetCurToken(type: number, text: string, channel: number): void { + // Trim the last char and add the modified curToken to the pendingTokens stack + const tokenTextWithoutLastChar = this.curToken!.text.slice(0, -1); + this.curToken!.text = tokenTextWithoutLastChar; + this.curToken!.stop -= 1; + this.addPendingToken(this.curToken!); + + this.createNewCurToken(type, text, channel); // Set curToken + } + + private handleCOLONEQUALtokenInFString(): void { + if ( + this.lexerModeStack.length > 0 && + this.paren_or_bracket_openedStack[this.paren_or_bracket_openedStack.length - 1] === 0 // stack peek == 0 + ) { + // In fstring, a colonequal (walrus operator) can only be used in parentheses + // Not in parentheses, replace COLONEQUAL token with COLON as format specifier + // and insert the equal symbol to the following FSTRING_MIDDLE token + this.curToken!.type = PythonLexer.COLON; + this.curToken!.text = ":"; + this.curToken!.stop = this.curToken!.start; + + if (this.ffgToken!.type === PythonLexer.FSTRING_MIDDLE) { + this.ffgToken!.text = "=" + this.ffgToken!.text; + this.ffgToken!.start -= 1; + this.ffgToken!.column -= 1; + } else { + this.addPendingToken(this.curToken!); + this.createNewCurToken(PythonLexer.FSTRING_MIDDLE, "=", Token.DEFAULT_CHANNEL); + } + } + this.addPendingToken(this.curToken!); + } + + private createNewCurToken(type: number, text: string, channel: number): void { + const ctkn = this.curToken!.clone(); + ctkn.type = type; + ctkn.text = text; + ctkn.channel = channel; + ctkn.column += 1; + ctkn.start += 1; + ctkn.stop = ctkn.start; + this.curToken = ctkn; + } + + private pushLexerMode(mode: number): void { + this.pushMode(mode); + this.lexerModeStack.push(this.curLexerMode); + this.curLexerMode = mode; + } + + private popLexerMode(): void { + this.popMode(); + this.curLexerMode = this.lexerModeStack.pop()!; + } + + private handleFORMAT_SPECIFICATION_MODE() { + if (this.lexerModeStack.length > 0 && + this.ffgToken!.type === PythonLexer.RBRACE) { + + // insert an empty FSTRING_MIDDLE token instead of the missing format specification + switch (this.curToken!.type) { + case PythonLexer.COLON: + this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken!); + break; + case PythonLexer.RBRACE: + // only if the previous brace expression is not a dictionary comprehension or set comprehension + if (!this.isDictionaryComprehensionOrSetComprehension(this.prevBraceExpression)) { + this.createAndAddPendingToken(PythonLexer.FSTRING_MIDDLE, Token.DEFAULT_CHANNEL, "", this.ffgToken!); + } + break; + } + } + } + + private isDictionaryComprehensionOrSetComprehension(code: string): boolean { + const inputStream: CharStream = CharStreams.fromString(code); + const lexer = new PythonLexer(inputStream); + const tokenStream = new CommonTokenStream(lexer); + let parser = new PythonParser(tokenStream); + + // Disable error listeners to suppress console output + lexer.removeErrorListeners(); + parser.removeErrorListeners(); + + parser.dictcomp(); // Try parsing as dictionary comprehension + if (parser.syntaxErrorsCount === 0) + return true; + + parser = new PythonParser(tokenStream); + (tokenStream as any).seek(0); // seek method is not declared in CommonTokenStream.d.ts + parser.removeErrorListeners(); + parser.setcomp(); // Try parsing as set comprehension + return parser.syntaxErrorsCount === 0; + } + + private insertTrailingTokens(): void { + switch (this.lastPendingTokenTypeFromDefaultChannel) { + case PythonLexer.NEWLINE: + case PythonLexer.DEDENT: + break; // no trailing NEWLINE token is needed + default: + // insert an extra trailing NEWLINE token that serves as the end of the last statement + this.createAndAddPendingToken(PythonLexer.NEWLINE, Token.DEFAULT_CHANNEL, null, this.ffgToken!); // ffgToken is EOF + } + this.insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed + } + + private handleEOFtoken(): void { + if (this.lastPendingTokenTypeFromDefaultChannel > 0) { + // there was a statement in the input (leading NEWLINE tokens are hidden) + this.insertTrailingTokens(); + } + this.addPendingToken(this.curToken!); + } + + private hideAndAddPendingToken(tkn: Token): void { + tkn.channel = Token.HIDDEN_CHANNEL; + this.addPendingToken(tkn); + } + + private createAndAddPendingToken(type: number, channel: number, text: string | null, sampleToken: Token): void { + const tkn: Token = sampleToken.clone(); + tkn.type = type; + tkn.channel = channel; + tkn.stop = sampleToken.start - 1; + tkn.text = text == null ? + `<${this.getSymbolicNames()[type]}>` : + text; + + this.addPendingToken(tkn); + } + + private addPendingToken(tkn: Token): void { + // save the last pending token type because the pendingTokens list can be empty by the nextToken() + this.previousPendingTokenType = tkn.type; + if (tkn.channel === Token.DEFAULT_CHANNEL) { + this.lastPendingTokenTypeFromDefaultChannel = this.previousPendingTokenType; + } + this.pendingTokens.push(tkn) /* .addLast(token) */; + } + + private getIndentationLength(indentText: string): number { // the indentText may contain spaces, tabs or form feeds + const TAB_LENGTH: number = 8; // the standard number of spaces to replace a tab to spaces + let length: number = 0; + for (let ch of indentText) { + switch (ch) { + case " ": + this.wasSpaceIndentation = true; + length += 1; + break; + case "\t": + this.wasTabIndentation = true; + length += TAB_LENGTH - (length % TAB_LENGTH); + break; + case "\f": // form feed + length = 0; + break; + } + } + + if (this.wasTabIndentation && this.wasSpaceIndentation) { + if (!this.wasIndentationMixedWithSpacesAndTabs) { + this.wasIndentationMixedWithSpacesAndTabs = true; + length = this.INVALID_LENGTH; // only for the first inconsistent indent + } + } + return length; + } + + private reportLexerError(errMsg: string): void { + this.getErrorListener().syntaxError(this, 0 /* this.curToken */, this.curToken!.line, this.curToken!.column, " LEXER" + this.ERR_TXT + errMsg, undefined); + } + + private reportError(errMsg: string): void { + this.reportLexerError(errMsg); + + // the ERRORTOKEN will raise an error in the parser + this.createAndAddPendingToken(PythonLexer.ERRORTOKEN, Token.DEFAULT_CHANNEL, this.ERR_TXT + errMsg, this.ffgToken!); + } +} diff --git a/python/python3_13/changes.md b/python/python3_13/changes.md new file mode 100644 index 0000000000..7934d3757b --- /dev/null +++ b/python/python3_13/changes.md @@ -0,0 +1,30 @@ +# Jan. 07, 2025 +- parser grammar update for Python 3.13.1

+- added ENCODING token

+- complete rewrite of fstring tokenizer in lexer grammar and PythonLexerBase class + - now correctly tokenizes the followings in fstring: + - escape sequences + - walrus operator + - dictionary comprehension + - set comprehension

+- soft keywords changes: + - no embedded code (semantic predicates) in parser grammar for soft keywords + - no need for PythonParserBase class + - no need for transformGrammar.py + - BREAKING CHANGES: + - dedicated tokens for soft keywords instead of NAME token: + - NAME_OR_TYPE + - NAME_OR_MATCH + - NAME_OR_CASE + - NAME_OR_WILDCARD + +# Oct. 18, 2024 +- Fix that `case [a, *_] if a == 0:` throws error rule soft_kw__not__wildcard failed predicate: + `{this.isnotEqualToCurrentTokenText("_")}?` + +# Sept. 05, 2024 +- Type comment tokens are no longer generated. + Type comments will now be tokenized as plain comment tokens. +

+- Line continuation for string literals (backslash followed by a newline) is no longer resolved. + (backslash+newline is no longer removed from string literals) diff --git a/python/python3_12/desc.xml b/python/python3_13/desc.xml similarity index 100% rename from python/python3_12/desc.xml rename to python/python3_13/desc.xml diff --git a/python/python3_12/examples/__future__.py b/python/python3_13/examples/__future__.py similarity index 100% rename from python/python3_12/examples/__future__.py rename to python/python3_13/examples/__future__.py diff --git a/python/python3_12/examples/__hello__.py b/python/python3_13/examples/__hello__.py similarity index 100% rename from python/python3_12/examples/__hello__.py rename to python/python3_13/examples/__hello__.py diff --git a/python/python3_12/examples/_aix_support.py b/python/python3_13/examples/_aix_support.py similarity index 100% rename from python/python3_12/examples/_aix_support.py rename to python/python3_13/examples/_aix_support.py diff --git a/python/python3_13/examples/_android_support.py b/python/python3_13/examples/_android_support.py new file mode 100644 index 0000000000..7572745c85 --- /dev/null +++ b/python/python3_13/examples/_android_support.py @@ -0,0 +1,181 @@ +import io +import sys +from threading import RLock +from time import sleep, time + +# The maximum length of a log message in bytes, including the level marker and +# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. +# Messages longer than this will be be truncated by logcat. This limit has already +# been reduced at least once in the history of Android (from 4076 to 4068 between +# API level 23 and 26), so leave some headroom. +MAX_BYTES_PER_WRITE = 4000 + +# UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this +# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE. +# However, if the actual number of bytes per character is smaller than that, +# then we may still join multiple consecutive text writes into binary +# writes containing a larger number of characters. +MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4 + + +# When embedded in an app on current versions of Android, there's no easy way to +# monitor the C-level stdout and stderr. The testbed comes with a .c file to +# redirect them to the system log using a pipe, but that wouldn't be convenient +# or appropriate for all apps. So we redirect at the Python level instead. +def init_streams(android_log_write, stdout_prio, stderr_prio): + if sys.executable: + return # Not embedded in an app. + + global logcat + logcat = Logcat(android_log_write) + + sys.stdout = TextLogStream( + stdout_prio, "python.stdout", sys.stdout.fileno()) + sys.stderr = TextLogStream( + stderr_prio, "python.stderr", sys.stderr.fileno()) + + +class TextLogStream(io.TextIOWrapper): + def __init__(self, prio, tag, fileno=None, **kwargs): + # The default is surrogateescape for stdout and backslashreplace for + # stderr, but in the context of an Android log, readability is more + # important than reversibility. + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("errors", "backslashreplace") + + super().__init__(BinaryLogStream(prio, tag, fileno), **kwargs) + self._lock = RLock() + self._pending_bytes = [] + self._pending_bytes_count = 0 + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line wherever possible, so split + # the string into lines first. Note that "".splitlines() == [], so + # nothing will be logged for an empty string. + with self._lock: + for line in s.splitlines(keepends=True): + while line: + chunk = line[:MAX_CHARS_PER_WRITE] + line = line[MAX_CHARS_PER_WRITE:] + self._write_chunk(chunk) + + return len(s) + + # The size and behavior of TextIOWrapper's buffer is not part of its public + # API, so we handle buffering ourselves to avoid truncation. + def _write_chunk(self, s): + b = s.encode(self.encoding, self.errors) + if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE: + self.flush() + + self._pending_bytes.append(b) + self._pending_bytes_count += len(b) + if ( + self.write_through + or b.endswith(b"\n") + or self._pending_bytes_count > MAX_BYTES_PER_WRITE + ): + self.flush() + + def flush(self): + with self._lock: + self.buffer.write(b"".join(self._pending_bytes)) + self._pending_bytes.clear() + self._pending_bytes_count = 0 + + # Since this is a line-based logging system, line buffering cannot be turned + # off, i.e. a newline always causes a flush. + @property + def line_buffering(self): + return True + + +class BinaryLogStream(io.RawIOBase): + def __init__(self, prio, tag, fileno=None): + self.prio = prio + self.tag = tag + self._fileno = fileno + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + logcat.write(self.prio, self.tag, b) + return len(b) + + # This is needed by the test suite --timeout option, which uses faulthandler. + def fileno(self): + if self._fileno is None: + raise io.UnsupportedOperation("fileno") + return self._fileno + + +# When a large volume of data is written to logcat at once, e.g. when a test +# module fails in --verbose3 mode, there's a risk of overflowing logcat's own +# buffer and losing messages. We avoid this by imposing a rate limit using the +# token bucket algorithm, based on a conservative estimate of how fast `adb +# logcat` can consume data. +MAX_BYTES_PER_SECOND = 1024 * 1024 + +# The logcat buffer size of a device can be determined by running `logcat -g`. +# We set the token bucket size to half of the buffer size of our current minimum +# API level, because other things on the system will be producing messages as +# well. +BUCKET_SIZE = 128 * 1024 + +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 +PER_MESSAGE_OVERHEAD = 28 + + +class Logcat: + def __init__(self, android_log_write): + self.android_log_write = android_log_write + self._lock = RLock() + self._bucket_level = 0 + self._prev_write_time = time() + + def write(self, prio, tag, message): + # Encode null bytes using "modified UTF-8" to avoid them truncating the + # message. + message = message.replace(b"\x00", b"\xc0\x80") + + with self._lock: + now = time() + self._bucket_level += ( + (now - self._prev_write_time) * MAX_BYTES_PER_SECOND) + + # If the bucket level is still below zero, the clock must have gone + # backwards, so reset it to zero and continue. + self._bucket_level = max(0, min(self._bucket_level, BUCKET_SIZE)) + self._prev_write_time = now + + self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message) + if self._bucket_level < 0: + sleep(-self._bucket_level / MAX_BYTES_PER_SECOND) + + self.android_log_write(prio, tag, message) diff --git a/python/python3_12/examples/_collections_abc.py b/python/python3_13/examples/_collections_abc.py similarity index 99% rename from python/python3_12/examples/_collections_abc.py rename to python/python3_13/examples/_collections_abc.py index 601107d2d8..aebe9c8b64 100644 --- a/python/python3_12/examples/_collections_abc.py +++ b/python/python3_13/examples/_collections_abc.py @@ -85,6 +85,10 @@ def _f(): pass dict_items = type({}.items()) ## misc ## mappingproxy = type(type.__dict__) +def _get_framelocalsproxy(): + return type(sys._getframe().f_locals) +framelocalsproxy = _get_framelocalsproxy() +del _get_framelocalsproxy generator = type((lambda: (yield))()) ## coroutine ## async def _coro(): pass @@ -836,6 +840,7 @@ def __eq__(self, other): __reversed__ = None Mapping.register(mappingproxy) +Mapping.register(framelocalsproxy) class MappingView(Sized): @@ -973,7 +978,7 @@ def clear(self): def update(self, other=(), /, **kwds): ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and has a .keys() method, does: for k in E.keys(): D[k] = E[k] If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v In either case, this is followed by: for k, v in F.items(): D[k] = v ''' diff --git a/python/python3_13/examples/_colorize.py b/python/python3_13/examples/_colorize.py new file mode 100644 index 0000000000..845fb57a90 --- /dev/null +++ b/python/python3_13/examples/_colorize.py @@ -0,0 +1,64 @@ +import io +import os +import sys + +COLORIZE = True + + +class ANSIColors: + BOLD_GREEN = "\x1b[1;32m" + BOLD_MAGENTA = "\x1b[1;35m" + BOLD_RED = "\x1b[1;31m" + GREEN = "\x1b[32m" + GREY = "\x1b[90m" + MAGENTA = "\x1b[35m" + RED = "\x1b[31m" + RESET = "\x1b[0m" + YELLOW = "\x1b[33m" + + +NoColors = ANSIColors() + +for attr in dir(NoColors): + if not attr.startswith("__"): + setattr(NoColors, attr, "") + + +def get_colors(colorize: bool = False) -> ANSIColors: + if colorize or can_colorize(): + return ANSIColors() + else: + return NoColors + + +def can_colorize() -> bool: + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + if not sys.flags.ignore_environment: + if os.environ.get("PYTHON_COLORS") == "0": + return False + if os.environ.get("PYTHON_COLORS") == "1": + return True + if "NO_COLOR" in os.environ: + return False + if not COLORIZE: + return False + if not sys.flags.ignore_environment: + if "FORCE_COLOR" in os.environ: + return True + if os.environ.get("TERM") == "dumb": + return False + + if not hasattr(sys.stderr, "fileno"): + return False + + try: + return os.isatty(sys.stderr.fileno()) + except io.UnsupportedOperation: + return sys.stderr.isatty() diff --git a/python/python3_12/examples/_compat_pickle.py b/python/python3_13/examples/_compat_pickle.py similarity index 99% rename from python/python3_12/examples/_compat_pickle.py rename to python/python3_13/examples/_compat_pickle.py index 65a94b6b1b..439f8c02f4 100644 --- a/python/python3_12/examples/_compat_pickle.py +++ b/python/python3_13/examples/_compat_pickle.py @@ -22,7 +22,6 @@ 'tkMessageBox': 'tkinter.messagebox', 'ScrolledText': 'tkinter.scrolledtext', 'Tkconstants': 'tkinter.constants', - 'Tix': 'tkinter.tix', 'ttk': 'tkinter.ttk', 'Tkinter': 'tkinter', 'markupbase': '_markupbase', diff --git a/python/python3_12/examples/_compression.py b/python/python3_13/examples/_compression.py similarity index 100% rename from python/python3_12/examples/_compression.py rename to python/python3_13/examples/_compression.py diff --git a/python/python3_13/examples/_ios_support.py b/python/python3_13/examples/_ios_support.py new file mode 100644 index 0000000000..20467a7c2b --- /dev/null +++ b/python/python3_13/examples/_ios_support.py @@ -0,0 +1,71 @@ +import sys +try: + from ctypes import cdll, c_void_p, c_char_p, util +except ImportError: + # ctypes is an optional module. If it's not present, we're limited in what + # we can tell about the system, but we don't want to prevent the module + # from working. + print("ctypes isn't available; iOS system calls will not be available", file=sys.stderr) + objc = None +else: + # ctypes is available. Load the ObjC library, and wrap the objc_getClass, + # sel_registerName methods + lib = util.find_library("objc") + if lib is None: + # Failed to load the objc library + raise ImportError("ObjC runtime library couldn't be loaded") + + objc = cdll.LoadLibrary(lib) + objc.objc_getClass.restype = c_void_p + objc.objc_getClass.argtypes = [c_char_p] + objc.sel_registerName.restype = c_void_p + objc.sel_registerName.argtypes = [c_char_p] + + +def get_platform_ios(): + # Determine if this is a simulator using the multiarch value + is_simulator = sys.implementation._multiarch.endswith("simulator") + + # We can't use ctypes; abort + if not objc: + return None + + # Most of the methods return ObjC objects + objc.objc_msgSend.restype = c_void_p + # All the methods used have no arguments. + objc.objc_msgSend.argtypes = [c_void_p, c_void_p] + + # Equivalent of: + # device = [UIDevice currentDevice] + UIDevice = objc.objc_getClass(b"UIDevice") + SEL_currentDevice = objc.sel_registerName(b"currentDevice") + device = objc.objc_msgSend(UIDevice, SEL_currentDevice) + + # Equivalent of: + # device_systemVersion = [device systemVersion] + SEL_systemVersion = objc.sel_registerName(b"systemVersion") + device_systemVersion = objc.objc_msgSend(device, SEL_systemVersion) + + # Equivalent of: + # device_systemName = [device systemName] + SEL_systemName = objc.sel_registerName(b"systemName") + device_systemName = objc.objc_msgSend(device, SEL_systemName) + + # Equivalent of: + # device_model = [device model] + SEL_model = objc.sel_registerName(b"model") + device_model = objc.objc_msgSend(device, SEL_model) + + # UTF8String returns a const char*; + SEL_UTF8String = objc.sel_registerName(b"UTF8String") + objc.objc_msgSend.restype = c_char_p + + # Equivalent of: + # system = [device_systemName UTF8String] + # release = [device_systemVersion UTF8String] + # model = [device_model UTF8String] + system = objc.objc_msgSend(device_systemName, SEL_UTF8String).decode() + release = objc.objc_msgSend(device_systemVersion, SEL_UTF8String).decode() + model = objc.objc_msgSend(device_model, SEL_UTF8String).decode() + + return system, release, model, is_simulator diff --git a/python/python3_12/examples/_markupbase.py b/python/python3_13/examples/_markupbase.py similarity index 100% rename from python/python3_12/examples/_markupbase.py rename to python/python3_13/examples/_markupbase.py diff --git a/python/python3_13/examples/_opcode_metadata.py b/python/python3_13/examples/_opcode_metadata.py new file mode 100644 index 0000000000..b3d7b8103e --- /dev/null +++ b/python/python3_13/examples/_opcode_metadata.py @@ -0,0 +1,343 @@ +# This file is generated by Tools/cases_generator/py_metadata_generator.py +# from: +# Python/bytecodes.c +# Do not edit! +_specializations = { + "RESUME": [ + "RESUME_CHECK", + ], + "TO_BOOL": [ + "TO_BOOL_ALWAYS_TRUE", + "TO_BOOL_BOOL", + "TO_BOOL_INT", + "TO_BOOL_LIST", + "TO_BOOL_NONE", + "TO_BOOL_STR", + ], + "BINARY_OP": [ + "BINARY_OP_MULTIPLY_INT", + "BINARY_OP_ADD_INT", + "BINARY_OP_SUBTRACT_INT", + "BINARY_OP_MULTIPLY_FLOAT", + "BINARY_OP_ADD_FLOAT", + "BINARY_OP_SUBTRACT_FLOAT", + "BINARY_OP_ADD_UNICODE", + "BINARY_OP_INPLACE_ADD_UNICODE", + ], + "BINARY_SUBSCR": [ + "BINARY_SUBSCR_DICT", + "BINARY_SUBSCR_GETITEM", + "BINARY_SUBSCR_LIST_INT", + "BINARY_SUBSCR_STR_INT", + "BINARY_SUBSCR_TUPLE_INT", + ], + "STORE_SUBSCR": [ + "STORE_SUBSCR_DICT", + "STORE_SUBSCR_LIST_INT", + ], + "SEND": [ + "SEND_GEN", + ], + "UNPACK_SEQUENCE": [ + "UNPACK_SEQUENCE_TWO_TUPLE", + "UNPACK_SEQUENCE_TUPLE", + "UNPACK_SEQUENCE_LIST", + ], + "STORE_ATTR": [ + "STORE_ATTR_INSTANCE_VALUE", + "STORE_ATTR_SLOT", + "STORE_ATTR_WITH_HINT", + ], + "LOAD_GLOBAL": [ + "LOAD_GLOBAL_MODULE", + "LOAD_GLOBAL_BUILTIN", + ], + "LOAD_SUPER_ATTR": [ + "LOAD_SUPER_ATTR_ATTR", + "LOAD_SUPER_ATTR_METHOD", + ], + "LOAD_ATTR": [ + "LOAD_ATTR_INSTANCE_VALUE", + "LOAD_ATTR_MODULE", + "LOAD_ATTR_WITH_HINT", + "LOAD_ATTR_SLOT", + "LOAD_ATTR_CLASS", + "LOAD_ATTR_PROPERTY", + "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", + "LOAD_ATTR_METHOD_WITH_VALUES", + "LOAD_ATTR_METHOD_NO_DICT", + "LOAD_ATTR_METHOD_LAZY_DICT", + "LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", + "LOAD_ATTR_NONDESCRIPTOR_NO_DICT", + ], + "COMPARE_OP": [ + "COMPARE_OP_FLOAT", + "COMPARE_OP_INT", + "COMPARE_OP_STR", + ], + "CONTAINS_OP": [ + "CONTAINS_OP_SET", + "CONTAINS_OP_DICT", + ], + "FOR_ITER": [ + "FOR_ITER_LIST", + "FOR_ITER_TUPLE", + "FOR_ITER_RANGE", + "FOR_ITER_GEN", + ], + "CALL": [ + "CALL_BOUND_METHOD_EXACT_ARGS", + "CALL_PY_EXACT_ARGS", + "CALL_TYPE_1", + "CALL_STR_1", + "CALL_TUPLE_1", + "CALL_BUILTIN_CLASS", + "CALL_BUILTIN_O", + "CALL_BUILTIN_FAST", + "CALL_BUILTIN_FAST_WITH_KEYWORDS", + "CALL_LEN", + "CALL_ISINSTANCE", + "CALL_LIST_APPEND", + "CALL_METHOD_DESCRIPTOR_O", + "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + "CALL_METHOD_DESCRIPTOR_NOARGS", + "CALL_METHOD_DESCRIPTOR_FAST", + "CALL_ALLOC_AND_ENTER_INIT", + "CALL_PY_GENERAL", + "CALL_BOUND_METHOD_GENERAL", + "CALL_NON_PY_GENERAL", + ], +} + +_specialized_opmap = { + 'BINARY_OP_ADD_FLOAT': 150, + 'BINARY_OP_ADD_INT': 151, + 'BINARY_OP_ADD_UNICODE': 152, + 'BINARY_OP_INPLACE_ADD_UNICODE': 3, + 'BINARY_OP_MULTIPLY_FLOAT': 153, + 'BINARY_OP_MULTIPLY_INT': 154, + 'BINARY_OP_SUBTRACT_FLOAT': 155, + 'BINARY_OP_SUBTRACT_INT': 156, + 'BINARY_SUBSCR_DICT': 157, + 'BINARY_SUBSCR_GETITEM': 158, + 'BINARY_SUBSCR_LIST_INT': 159, + 'BINARY_SUBSCR_STR_INT': 160, + 'BINARY_SUBSCR_TUPLE_INT': 161, + 'CALL_ALLOC_AND_ENTER_INIT': 162, + 'CALL_BOUND_METHOD_EXACT_ARGS': 163, + 'CALL_BOUND_METHOD_GENERAL': 164, + 'CALL_BUILTIN_CLASS': 165, + 'CALL_BUILTIN_FAST': 166, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 167, + 'CALL_BUILTIN_O': 168, + 'CALL_ISINSTANCE': 169, + 'CALL_LEN': 170, + 'CALL_LIST_APPEND': 171, + 'CALL_METHOD_DESCRIPTOR_FAST': 172, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 173, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 174, + 'CALL_METHOD_DESCRIPTOR_O': 175, + 'CALL_NON_PY_GENERAL': 176, + 'CALL_PY_EXACT_ARGS': 177, + 'CALL_PY_GENERAL': 178, + 'CALL_STR_1': 179, + 'CALL_TUPLE_1': 180, + 'CALL_TYPE_1': 181, + 'COMPARE_OP_FLOAT': 182, + 'COMPARE_OP_INT': 183, + 'COMPARE_OP_STR': 184, + 'CONTAINS_OP_DICT': 185, + 'CONTAINS_OP_SET': 186, + 'FOR_ITER_GEN': 187, + 'FOR_ITER_LIST': 188, + 'FOR_ITER_RANGE': 189, + 'FOR_ITER_TUPLE': 190, + 'LOAD_ATTR_CLASS': 191, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 192, + 'LOAD_ATTR_INSTANCE_VALUE': 193, + 'LOAD_ATTR_METHOD_LAZY_DICT': 194, + 'LOAD_ATTR_METHOD_NO_DICT': 195, + 'LOAD_ATTR_METHOD_WITH_VALUES': 196, + 'LOAD_ATTR_MODULE': 197, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 198, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 199, + 'LOAD_ATTR_PROPERTY': 200, + 'LOAD_ATTR_SLOT': 201, + 'LOAD_ATTR_WITH_HINT': 202, + 'LOAD_GLOBAL_BUILTIN': 203, + 'LOAD_GLOBAL_MODULE': 204, + 'LOAD_SUPER_ATTR_ATTR': 205, + 'LOAD_SUPER_ATTR_METHOD': 206, + 'RESUME_CHECK': 207, + 'SEND_GEN': 208, + 'STORE_ATTR_INSTANCE_VALUE': 209, + 'STORE_ATTR_SLOT': 210, + 'STORE_ATTR_WITH_HINT': 211, + 'STORE_SUBSCR_DICT': 212, + 'STORE_SUBSCR_LIST_INT': 213, + 'TO_BOOL_ALWAYS_TRUE': 214, + 'TO_BOOL_BOOL': 215, + 'TO_BOOL_INT': 216, + 'TO_BOOL_LIST': 217, + 'TO_BOOL_NONE': 218, + 'TO_BOOL_STR': 219, + 'UNPACK_SEQUENCE_LIST': 220, + 'UNPACK_SEQUENCE_TUPLE': 221, + 'UNPACK_SEQUENCE_TWO_TUPLE': 222, +} + +opmap = { + 'CACHE': 0, + 'RESERVED': 17, + 'RESUME': 149, + 'INSTRUMENTED_LINE': 254, + 'BEFORE_ASYNC_WITH': 1, + 'BEFORE_WITH': 2, + 'BINARY_SLICE': 4, + 'BINARY_SUBSCR': 5, + 'CHECK_EG_MATCH': 6, + 'CHECK_EXC_MATCH': 7, + 'CLEANUP_THROW': 8, + 'DELETE_SUBSCR': 9, + 'END_ASYNC_FOR': 10, + 'END_FOR': 11, + 'END_SEND': 12, + 'EXIT_INIT_CHECK': 13, + 'FORMAT_SIMPLE': 14, + 'FORMAT_WITH_SPEC': 15, + 'GET_AITER': 16, + 'GET_ANEXT': 18, + 'GET_ITER': 19, + 'GET_LEN': 20, + 'GET_YIELD_FROM_ITER': 21, + 'INTERPRETER_EXIT': 22, + 'LOAD_ASSERTION_ERROR': 23, + 'LOAD_BUILD_CLASS': 24, + 'LOAD_LOCALS': 25, + 'MAKE_FUNCTION': 26, + 'MATCH_KEYS': 27, + 'MATCH_MAPPING': 28, + 'MATCH_SEQUENCE': 29, + 'NOP': 30, + 'POP_EXCEPT': 31, + 'POP_TOP': 32, + 'PUSH_EXC_INFO': 33, + 'PUSH_NULL': 34, + 'RETURN_GENERATOR': 35, + 'RETURN_VALUE': 36, + 'SETUP_ANNOTATIONS': 37, + 'STORE_SLICE': 38, + 'STORE_SUBSCR': 39, + 'TO_BOOL': 40, + 'UNARY_INVERT': 41, + 'UNARY_NEGATIVE': 42, + 'UNARY_NOT': 43, + 'WITH_EXCEPT_START': 44, + 'BINARY_OP': 45, + 'BUILD_CONST_KEY_MAP': 46, + 'BUILD_LIST': 47, + 'BUILD_MAP': 48, + 'BUILD_SET': 49, + 'BUILD_SLICE': 50, + 'BUILD_STRING': 51, + 'BUILD_TUPLE': 52, + 'CALL': 53, + 'CALL_FUNCTION_EX': 54, + 'CALL_INTRINSIC_1': 55, + 'CALL_INTRINSIC_2': 56, + 'CALL_KW': 57, + 'COMPARE_OP': 58, + 'CONTAINS_OP': 59, + 'CONVERT_VALUE': 60, + 'COPY': 61, + 'COPY_FREE_VARS': 62, + 'DELETE_ATTR': 63, + 'DELETE_DEREF': 64, + 'DELETE_FAST': 65, + 'DELETE_GLOBAL': 66, + 'DELETE_NAME': 67, + 'DICT_MERGE': 68, + 'DICT_UPDATE': 69, + 'ENTER_EXECUTOR': 70, + 'EXTENDED_ARG': 71, + 'FOR_ITER': 72, + 'GET_AWAITABLE': 73, + 'IMPORT_FROM': 74, + 'IMPORT_NAME': 75, + 'IS_OP': 76, + 'JUMP_BACKWARD': 77, + 'JUMP_BACKWARD_NO_INTERRUPT': 78, + 'JUMP_FORWARD': 79, + 'LIST_APPEND': 80, + 'LIST_EXTEND': 81, + 'LOAD_ATTR': 82, + 'LOAD_CONST': 83, + 'LOAD_DEREF': 84, + 'LOAD_FAST': 85, + 'LOAD_FAST_AND_CLEAR': 86, + 'LOAD_FAST_CHECK': 87, + 'LOAD_FAST_LOAD_FAST': 88, + 'LOAD_FROM_DICT_OR_DEREF': 89, + 'LOAD_FROM_DICT_OR_GLOBALS': 90, + 'LOAD_GLOBAL': 91, + 'LOAD_NAME': 92, + 'LOAD_SUPER_ATTR': 93, + 'MAKE_CELL': 94, + 'MAP_ADD': 95, + 'MATCH_CLASS': 96, + 'POP_JUMP_IF_FALSE': 97, + 'POP_JUMP_IF_NONE': 98, + 'POP_JUMP_IF_NOT_NONE': 99, + 'POP_JUMP_IF_TRUE': 100, + 'RAISE_VARARGS': 101, + 'RERAISE': 102, + 'RETURN_CONST': 103, + 'SEND': 104, + 'SET_ADD': 105, + 'SET_FUNCTION_ATTRIBUTE': 106, + 'SET_UPDATE': 107, + 'STORE_ATTR': 108, + 'STORE_DEREF': 109, + 'STORE_FAST': 110, + 'STORE_FAST_LOAD_FAST': 111, + 'STORE_FAST_STORE_FAST': 112, + 'STORE_GLOBAL': 113, + 'STORE_NAME': 114, + 'SWAP': 115, + 'UNPACK_EX': 116, + 'UNPACK_SEQUENCE': 117, + 'YIELD_VALUE': 118, + 'INSTRUMENTED_RESUME': 236, + 'INSTRUMENTED_END_FOR': 237, + 'INSTRUMENTED_END_SEND': 238, + 'INSTRUMENTED_RETURN_VALUE': 239, + 'INSTRUMENTED_RETURN_CONST': 240, + 'INSTRUMENTED_YIELD_VALUE': 241, + 'INSTRUMENTED_LOAD_SUPER_ATTR': 242, + 'INSTRUMENTED_FOR_ITER': 243, + 'INSTRUMENTED_CALL': 244, + 'INSTRUMENTED_CALL_KW': 245, + 'INSTRUMENTED_CALL_FUNCTION_EX': 246, + 'INSTRUMENTED_INSTRUCTION': 247, + 'INSTRUMENTED_JUMP_FORWARD': 248, + 'INSTRUMENTED_JUMP_BACKWARD': 249, + 'INSTRUMENTED_POP_JUMP_IF_TRUE': 250, + 'INSTRUMENTED_POP_JUMP_IF_FALSE': 251, + 'INSTRUMENTED_POP_JUMP_IF_NONE': 252, + 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 253, + 'JUMP': 256, + 'JUMP_NO_INTERRUPT': 257, + 'LOAD_CLOSURE': 258, + 'LOAD_METHOD': 259, + 'LOAD_SUPER_METHOD': 260, + 'LOAD_ZERO_SUPER_ATTR': 261, + 'LOAD_ZERO_SUPER_METHOD': 262, + 'POP_BLOCK': 263, + 'SETUP_CLEANUP': 264, + 'SETUP_FINALLY': 265, + 'SETUP_WITH': 266, + 'STORE_FAST_MAYBE_NULL': 267, +} + +HAVE_ARGUMENT = 44 +MIN_INSTRUMENTED_OPCODE = 236 diff --git a/python/python3_12/examples/_osx_support.py b/python/python3_13/examples/_osx_support.py similarity index 98% rename from python/python3_12/examples/_osx_support.py rename to python/python3_13/examples/_osx_support.py index aa66c8b9f4..0cb064fcd7 100644 --- a/python/python3_12/examples/_osx_support.py +++ b/python/python3_13/examples/_osx_support.py @@ -507,6 +507,11 @@ def get_platform_osx(_config_vars, osname, release, machine): # MACOSX_DEPLOYMENT_TARGET. macver = _config_vars.get('MACOSX_DEPLOYMENT_TARGET', '') + if macver and '.' not in macver: + # Ensure that the version includes at least a major + # and minor version, even if MACOSX_DEPLOYMENT_TARGET + # is set to a single-label version like "14". + macver += '.0' macrelease = _get_system_version() or macver macver = macver or macrelease diff --git a/python/python3_12/examples/_py_abc.py b/python/python3_13/examples/_py_abc.py similarity index 100% rename from python/python3_12/examples/_py_abc.py rename to python/python3_13/examples/_py_abc.py diff --git a/python/python3_12/examples/_pydatetime.py b/python/python3_13/examples/_pydatetime.py similarity index 98% rename from python/python3_12/examples/_pydatetime.py rename to python/python3_13/examples/_pydatetime.py index a6d43399f9..34ccb2da13 100644 --- a/python/python3_12/examples/_pydatetime.py +++ b/python/python3_13/examples/_pydatetime.py @@ -556,10 +556,6 @@ def _check_tzinfo_arg(tz): if tz is not None and not isinstance(tz, tzinfo): raise TypeError("tzinfo argument must be None or of a tzinfo subclass") -def _cmperror(x, y): - raise TypeError("can't compare '%s' to '%s'" % ( - type(x).__name__, type(y).__name__)) - def _divide_and_round(a, b): """divide a by b and round result to the nearest integer @@ -970,6 +966,8 @@ def __new__(cls, year, month=None, day=None): @classmethod def fromtimestamp(cls, t): "Construct a date from a POSIX timestamp (like time.time())." + if t is None: + raise TypeError("'NoneType' object cannot be interpreted as an integer") y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) return cls(y, m, d) @@ -1015,13 +1013,9 @@ def fromisocalendar(cls, year, week, day): def __repr__(self): """Convert to formal string, for repr(). - >>> dt = datetime(2010, 1, 1) - >>> repr(dt) - 'datetime.datetime(2010, 1, 1, 0, 0)' - - >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc) - >>> repr(dt) - 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)' + >>> d = date(2010, 1, 1) + >>> repr(d) + 'datetime.date(2010, 1, 1)' """ return "%s.%s(%d, %d, %d)" % (_get_class_module(self), self.__class__.__qualname__, @@ -1112,35 +1106,38 @@ def replace(self, year=None, month=None, day=None): day = self._day return type(self)(year, month, day) + __replace__ = replace + # Comparisons of date objects with other. def __eq__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) == 0 return NotImplemented def __le__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) <= 0 return NotImplemented def __lt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) < 0 return NotImplemented def __ge__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) >= 0 return NotImplemented def __gt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) > 0 return NotImplemented def _cmp(self, other): assert isinstance(other, date) + assert not isinstance(other, datetime) y, m, d = self._year, self._month, self._day y2, m2, d2 = other._year, other._month, other._day return _cmp((y, m, d), (y2, m2, d2)) @@ -1637,6 +1634,8 @@ def replace(self, hour=None, minute=None, second=None, microsecond=None, fold = self._fold return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) + __replace__ = replace + # Pickle support. def _getstate(self, protocol=3): @@ -1684,7 +1683,7 @@ class datetime(date): The year, month and day arguments are required. tzinfo may be None, or an instance of a tzinfo subclass. The remaining arguments may be ints. """ - __slots__ = date.__slots__ + time.__slots__ + __slots__ = time.__slots__ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): @@ -1809,7 +1808,7 @@ def fromtimestamp(cls, timestamp, tz=None): def utcfromtimestamp(cls, t): """Construct a naive UTC datetime from a POSIX timestamp.""" import warnings - warnings.warn("datetime.utcfromtimestamp() is deprecated and scheduled " + warnings.warn("datetime.datetime.utcfromtimestamp() is deprecated and scheduled " "for removal in a future version. Use timezone-aware " "objects to represent datetimes in UTC: " "datetime.datetime.fromtimestamp(t, datetime.UTC).", @@ -1827,8 +1826,8 @@ def now(cls, tz=None): def utcnow(cls): "Construct a UTC datetime from time.time()." import warnings - warnings.warn("datetime.utcnow() is deprecated and scheduled for " - "removal in a future version. Instead, Use timezone-aware " + warnings.warn("datetime.datetime.utcnow() is deprecated and scheduled for " + "removal in a future version. Use timezone-aware " "objects to represent datetimes in UTC: " "datetime.datetime.now(datetime.UTC).", DeprecationWarning, @@ -1983,6 +1982,8 @@ def replace(self, year=None, month=None, day=None, hour=None, return type(self)(year, month, day, hour, minute, second, microsecond, tzinfo, fold=fold) + __replace__ = replace + def _local_timezone(self): if self.tzinfo is None: ts = self._mktime() @@ -2135,42 +2136,32 @@ def dst(self): def __eq__(self, other): if isinstance(other, datetime): return self._cmp(other, allow_mixed=True) == 0 - elif not isinstance(other, date): - return NotImplemented else: - return False + return NotImplemented def __le__(self, other): if isinstance(other, datetime): return self._cmp(other) <= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __lt__(self, other): if isinstance(other, datetime): return self._cmp(other) < 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __ge__(self, other): if isinstance(other, datetime): return self._cmp(other) >= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __gt__(self, other): if isinstance(other, datetime): return self._cmp(other) > 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def _cmp(self, other, allow_mixed=False): assert isinstance(other, datetime) @@ -2345,6 +2336,9 @@ def __new__(cls, offset, name=_Omitted): "timedelta(hours=24).") return cls._create(offset, name) + def __init_subclass__(cls): + raise TypeError("type 'datetime.timezone' is not an acceptable base type") + @classmethod def _create(cls, offset, name=None): self = tzinfo.__new__(cls) diff --git a/python/python3_12/examples/_pydecimal.py b/python/python3_13/examples/_pydecimal.py similarity index 98% rename from python/python3_12/examples/_pydecimal.py rename to python/python3_13/examples/_pydecimal.py index 2692f2fcba..75df3db262 100644 --- a/python/python3_12/examples/_pydecimal.py +++ b/python/python3_13/examples/_pydecimal.py @@ -13,104 +13,7 @@ # bug) and will be backported. At this point the spec is stabilizing # and the updates are becoming fewer, smaller, and less significant. -""" -This is an implementation of decimal floating point arithmetic based on -the General Decimal Arithmetic Specification: - - http://speleotrove.com/decimal/decarith.html - -and IEEE standard 854-1987: - - http://en.wikipedia.org/wiki/IEEE_854-1987 - -Decimal floating point has finite precision with arbitrarily large bounds. - -The purpose of this module is to support arithmetic using familiar -"schoolhouse" rules and to avoid some of the tricky representation -issues associated with binary floating point. The package is especially -useful for financial applications or for contexts where users have -expectations that are at odds with binary floating point (for instance, -in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead -of 0.0; Decimal('1.00') % Decimal('0.1') returns the expected -Decimal('0.00')). - -Here are some examples of using the decimal module: - ->>> from decimal import * ->>> setcontext(ExtendedContext) ->>> Decimal(0) -Decimal('0') ->>> Decimal('1') -Decimal('1') ->>> Decimal('-.0123') -Decimal('-0.0123') ->>> Decimal(123456) -Decimal('123456') ->>> Decimal('123.45e12345678') -Decimal('1.2345E+12345680') ->>> Decimal('1.33') + Decimal('1.27') -Decimal('2.60') ->>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41') -Decimal('-2.20') ->>> dig = Decimal(1) ->>> print(dig / Decimal(3)) -0.333333333 ->>> getcontext().prec = 18 ->>> print(dig / Decimal(3)) -0.333333333333333333 ->>> print(dig.sqrt()) -1 ->>> print(Decimal(3).sqrt()) -1.73205080756887729 ->>> print(Decimal(3) ** 123) -4.85192780976896427E+58 ->>> inf = Decimal(1) / Decimal(0) ->>> print(inf) -Infinity ->>> neginf = Decimal(-1) / Decimal(0) ->>> print(neginf) --Infinity ->>> print(neginf + inf) -NaN ->>> print(neginf * inf) --Infinity ->>> print(dig / 0) -Infinity ->>> getcontext().traps[DivisionByZero] = 1 ->>> print(dig / 0) -Traceback (most recent call last): - ... - ... - ... -decimal.DivisionByZero: x / 0 ->>> c = Context() ->>> c.traps[InvalidOperation] = 0 ->>> print(c.flags[InvalidOperation]) -0 ->>> c.divide(Decimal(0), Decimal(0)) -Decimal('NaN') ->>> c.traps[InvalidOperation] = 1 ->>> print(c.flags[InvalidOperation]) -1 ->>> c.flags[InvalidOperation] = 0 ->>> print(c.flags[InvalidOperation]) -0 ->>> print(c.divide(Decimal(0), Decimal(0))) -Traceback (most recent call last): - ... - ... - ... -decimal.InvalidOperation: 0 / 0 ->>> print(c.flags[InvalidOperation]) -1 ->>> c.flags[InvalidOperation] = 0 ->>> c.traps[InvalidOperation] = 0 ->>> print(c.divide(Decimal(0), Decimal(0))) -NaN ->>> print(c.flags[InvalidOperation]) -1 ->>> -""" +"""Python decimal arithmetic module""" __all__ = [ # Two major classes @@ -521,7 +424,7 @@ def sin(x): # numbers.py for more detail. class Decimal(object): - """Floating point class for decimal arithmetic.""" + """Floating-point class for decimal arithmetic.""" __slots__ = ('_exp','_int','_sign', '_is_special') # Generally, the value of the Decimal instance is given by @@ -2228,10 +2131,16 @@ def _power_exact(self, other, p): else: return None - if xc >= 10**p: + # An exact power of 10 is representable, but can convert to a + # string of any length. But an exact power of 10 shouldn't be + # possible at this point. + assert xc > 1, self + assert xc % 10 != 0, self + strxc = str(xc) + if len(strxc) > p: return None xe = -e-xe - return _dec_from_triple(0, str(xc), xe) + return _dec_from_triple(0, strxc, xe) # now y is positive; find m and n such that y = m/n if ye >= 0: @@ -2281,13 +2190,18 @@ def _power_exact(self, other, p): return None xc = xc**m xe *= m - if xc > 10**p: + # An exact power of 10 is representable, but can convert to a string + # of any length. But an exact power of 10 shouldn't be possible at + # this point. + assert xc > 1, self + assert xc % 10 != 0, self + str_xc = str(xc) + if len(str_xc) > p: return None # by this point the result *is* exactly representable # adjust the exponent to get as close as possible to the ideal # exponent, if necessary - str_xc = str(xc) if other._isinteger() and other._sign == 0: ideal_exponent = self._exp*int(other) zeros = min(xe-ideal_exponent, p-len(str_xc)) diff --git a/python/python3_12/examples/_pyio.py b/python/python3_13/examples/_pyio.py similarity index 99% rename from python/python3_12/examples/_pyio.py rename to python/python3_13/examples/_pyio.py index 7f247ff47c..a3fede6992 100644 --- a/python/python3_12/examples/_pyio.py +++ b/python/python3_13/examples/_pyio.py @@ -33,11 +33,8 @@ # Rebind for compatibility BlockingIOError = BlockingIOError -# Does io.IOBase finalizer log the exception if the close() method fails? -# The exception is ignored silently by default in release build. -_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) # Does open() check its 'errors' argument? -_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) def text_encoding(encoding, stacklevel=2): @@ -416,18 +413,9 @@ def __del__(self): if closed: return - if _IOBASE_EMITS_UNRAISABLE: - self.close() - else: - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() ### Inquiries ### @@ -1209,7 +1197,8 @@ def _readinto(self, buf, read1): return written def tell(self): - return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos + # GH-95782: Keep return value non-negative + return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0) def seek(self, pos, whence=0): if whence not in valid_seek_flags: @@ -1507,6 +1496,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if isinstance(file, float): raise TypeError('integer argument expected, got float') if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) fd = file if fd < 0: raise ValueError('negative file descriptor') @@ -2210,8 +2204,9 @@ def write(self, s): self.buffer.write(b) if self._line_buffering and (haslf or "\r" in s): self.flush() - self._set_decoded_chars('') - self._snapshot = None + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None if self._decoder: self._decoder.reset() return length @@ -2525,8 +2520,9 @@ def read(self, size=None): # Read everything. result = (self._get_decoded_chars() + decoder.decode(self.buffer.read(), final=True)) - self._set_decoded_chars('') - self._snapshot = None + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None return result else: # Keep reading chunks until we have size characters to return. diff --git a/python/python3_12/examples/_pylong.py b/python/python3_13/examples/_pylong.py similarity index 58% rename from python/python3_12/examples/_pylong.py rename to python/python3_13/examples/_pylong.py index 936346e187..4970eb3fa6 100644 --- a/python/python3_12/examples/_pylong.py +++ b/python/python3_13/examples/_pylong.py @@ -14,7 +14,91 @@ import re import decimal - +try: + import _decimal +except ImportError: + _decimal = None + +# A number of functions have this form, where `w` is a desired number of +# digits in base `base`: +# +# def inner(...w...): +# if w <= LIMIT: +# return something +# lo = w >> 1 +# hi = w - lo +# something involving base**lo, inner(...lo...), j, and inner(...hi...) +# figure out largest w needed +# result = inner(w) +# +# They all had some on-the-fly scheme to cache `base**lo` results for reuse. +# Power is costly. +# +# This routine aims to compute all amd only the needed powers in advance, as +# efficiently as reasonably possible. This isn't trivial, and all the +# on-the-fly methods did needless work in many cases. The driving code above +# changes to: +# +# figure out largest w needed +# mycache = compute_powers(w, base, LIMIT) +# result = inner(w) +# +# and `mycache[lo]` replaces `base**lo` in the inner function. +# +# While this does give minor speedups (a few percent at best), the primary +# intent is to simplify the functions using this, by eliminating the need for +# them to craft their own ad-hoc caching schemes. +def compute_powers(w, base, more_than, show=False): + seen = set() + need = set() + ws = {w} + while ws: + w = ws.pop() # any element is fine to use next + if w in seen or w <= more_than: + continue + seen.add(w) + lo = w >> 1 + # only _need_ lo here; some other path may, or may not, need hi + need.add(lo) + ws.add(lo) + if w & 1: + ws.add(lo + 1) + + d = {} + if not need: + return d + it = iter(sorted(need)) + first = next(it) + if show: + print("pow at", first) + d[first] = base ** first + for this in it: + if this - 1 in d: + if show: + print("* base at", this) + d[this] = d[this - 1] * base # cheap + else: + lo = this >> 1 + hi = this - lo + assert lo in d + if show: + print("square at", this) + # Multiplying a bigint by itself (same object!) is about twice + # as fast in CPython. + sq = d[lo] * d[lo] + if hi != lo: + assert hi == lo + 1 + if show: + print(" and * base") + sq *= base + d[this] = sq + return d + +_unbounded_dec_context = decimal.getcontext().copy() +_unbounded_dec_context.prec = decimal.MAX_PREC +_unbounded_dec_context.Emax = decimal.MAX_EMAX +_unbounded_dec_context.Emin = decimal.MIN_EMIN +_unbounded_dec_context.traps[decimal.Inexact] = 1 # sanity check def int_to_decimal(n): """Asymptotically fast conversion of an 'int' to Decimal.""" @@ -29,61 +113,76 @@ def int_to_decimal(n): # "clever" recursive way. If we want a string representation, we # apply str to _that_. - D = decimal.Decimal - D2 = D(2) - - BITLIM = 128 - - mem = {} - - def w2pow(w): - """Return D(2)**w and store the result. Also possibly save some - intermediate results. In context, these are likely to be reused - across various levels of the conversion to Decimal.""" - if (result := mem.get(w)) is None: - if w <= BITLIM: - result = D2**w - elif w - 1 in mem: - result = (t := mem[w - 1]) + t - else: - w2 = w >> 1 - # If w happens to be odd, w-w2 is one larger then w2 - # now. Recurse on the smaller first (w2), so that it's - # in the cache and the larger (w-w2) can be handled by - # the cheaper `w-1 in mem` branch instead. - result = w2pow(w2) * w2pow(w - w2) - mem[w] = result - return result + from decimal import Decimal as D + BITLIM = 200 + # Don't bother caching the "lo" mask in this; the time to compute it is + # tiny compared to the multiply. def inner(n, w): if w <= BITLIM: return D(n) w2 = w >> 1 hi = n >> w2 - lo = n - (hi << w2) - return inner(lo, w2) + inner(hi, w - w2) * w2pow(w2) - - with decimal.localcontext() as ctx: - ctx.prec = decimal.MAX_PREC - ctx.Emax = decimal.MAX_EMAX - ctx.Emin = decimal.MIN_EMIN - ctx.traps[decimal.Inexact] = 1 + lo = n & ((1 << w2) - 1) + return inner(lo, w2) + inner(hi, w - w2) * w2pow[w2] + with decimal.localcontext(_unbounded_dec_context): + nbits = n.bit_length() + w2pow = compute_powers(nbits, D(2), BITLIM) if n < 0: negate = True n = -n else: negate = False - result = inner(n, n.bit_length()) + result = inner(n, nbits) if negate: result = -result return result - def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" - return str(int_to_decimal(n)) - + w = n.bit_length() + if w > 450_000 and _decimal is not None: + # It is only usable with the C decimal implementation. + # _pydecimal.py calls str() on very large integers, which in its + # turn calls int_to_decimal_string(), causing very deep recursion. + return str(int_to_decimal(n)) + + # Fallback algorithm for the case when the C decimal module isn't + # available. This algorithm is asymptotically worse than the algorithm + # using the decimal module, but better than the quadratic time + # implementation in longobject.c. + + DIGLIM = 1000 + def inner(n, w): + if w <= DIGLIM: + return str(n) + w2 = w >> 1 + hi, lo = divmod(n, pow10[w2]) + return inner(hi, w - w2) + inner(lo, w2).zfill(w2) + + # The estimation of the number of decimal digits. + # There is no harm in small error. If we guess too large, there may + # be leading 0's that need to be stripped. If we guess too small, we + # may need to call str() recursively for the remaining highest digits, + # which can still potentially be a large integer. This is manifested + # only if the number has way more than 10**15 digits, that exceeds + # the 52-bit physical address limit in both Intel64 and AMD64. + w = int(w * 0.3010299956639812 + 1) # log10(2) + pow10 = compute_powers(w, 5, DIGLIM) + for k, v in pow10.items(): + pow10[k] = v << k # 5**k << k == 5**k * 2**k == 10**k + if n < 0: + n = -n + sign = '-' + else: + sign = '' + s = inner(n, w) + if s[0] == '0' and n: + # If our guess of w is too large, there may be leading 0's that + # need to be stripped. + s = s.lstrip('0') + return sign + s def _str_to_int_inner(s): """Asymptotically fast conversion of a 'str' to an 'int'.""" @@ -100,35 +199,15 @@ def _str_to_int_inner(s): DIGLIM = 2048 - mem = {} - - def w5pow(w): - """Return 5**w and store the result. - Also possibly save some intermediate results. In context, these - are likely to be reused across various levels of the conversion - to 'int'. - """ - if (result := mem.get(w)) is None: - if w <= DIGLIM: - result = 5**w - elif w - 1 in mem: - result = mem[w - 1] * 5 - else: - w2 = w >> 1 - # If w happens to be odd, w-w2 is one larger then w2 - # now. Recurse on the smaller first (w2), so that it's - # in the cache and the larger (w-w2) can be handled by - # the cheaper `w-1 in mem` branch instead. - result = w5pow(w2) * w5pow(w - w2) - mem[w] = result - return result - def inner(a, b): if b - a <= DIGLIM: return int(s[a:b]) mid = (a + b + 1) >> 1 - return inner(mid, b) + ((inner(a, mid) * w5pow(b - mid)) << (b - mid)) + return (inner(mid, b) + + ((inner(a, mid) * w5pow[b - mid]) + << (b - mid))) + w5pow = compute_powers(len(s), 5, DIGLIM) return inner(0, len(s)) @@ -142,7 +221,6 @@ def int_from_string(s): s = s.rstrip().replace('_', '') return _str_to_int_inner(s) - def str_to_int(s): """Asymptotically fast version of decimal string to 'int' conversion.""" # FIXME: this doesn't support the full syntax that int() supports. diff --git a/python/python3_12/examples/_sitebuiltins.py b/python/python3_13/examples/_sitebuiltins.py similarity index 100% rename from python/python3_12/examples/_sitebuiltins.py rename to python/python3_13/examples/_sitebuiltins.py diff --git a/python/python3_12/examples/_strptime.py b/python/python3_13/examples/_strptime.py similarity index 75% rename from python/python3_12/examples/_strptime.py rename to python/python3_13/examples/_strptime.py index 77ccdc9e1d..4c68a6a88e 100644 --- a/python/python3_12/examples/_strptime.py +++ b/python/python3_13/examples/_strptime.py @@ -10,10 +10,12 @@ strptime -- Calculates the time struct represented by the passed-in string """ +import os import time import locale import calendar from re import compile as re_compile +from re import sub as re_sub from re import IGNORECASE from re import escape as re_escape from datetime import (date as datetime_date, @@ -27,6 +29,18 @@ def _getlang(): # Figure out what the current language is set to. return locale.getlocale(locale.LC_TIME) +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -101,7 +115,8 @@ def __calc_am_pm(self): am_pm = [] for hour in (1, 22): time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm def __calc_date_time(self): @@ -113,42 +128,130 @@ def __calc_date_time(self): # values within the format string is very important; it eliminates # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [ + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I'), + # Non-ASCII digits + ('\u0661\u0669\u0669\u0669', '%Y'), + ('\u0669\u0669', '%Oy'), + ('\u0662\u0662', '%OH'), + ('\u0664\u0664', '%OM'), + ('\u0665\u0665', '%OS'), + ('\u0661\u0667', '%Od'), + ('\u0660\u0663', '%Om'), + ('\u0663', '%Om'), + ('\u0662', '%Ow'), + ('\u0661\u0660', '%OI'), + ] + date_time = [] + for directive in ('%c', '%x', '%X'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt, 1) + if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's # lack of AM/PM info) or a platform returning a tuple of empty # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + # Transform all non-ASCII digits to digits in range U+0660 to U+0669. + current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -210,11 +313,15 @@ def __init__(self, locale_time=None): 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + '%': '%'} + for d in 'dmyHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) def __seqToRE(self, to_convert, directive): """Convert a list to a regex string for matching a directive. @@ -242,21 +349,36 @@ def pattern(self, format): regex syntax are escaped. """ - processed_format = '' # The sub() call escapes all characters that might be misconstrued # as regex syntax. Cannot use re.escape since we have to deal with # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile(r'\s+') - format = whitespace_replacement.sub(r'\\s+', format) - while '%' in format: - directive_index = format.index('%')+1 - processed_format = "%s%s%s" % (processed_format, - format[:directive_index-1], - self[format[directive_index]]) - format = format[directive_index+1:] - return "%s%s" % (processed_format, format) + format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) + format = re_sub(r'\s+', r'\\s+', format) + format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR + year_in_format = False + day_of_month_in_format = False + def repl(m): + format_char = m[1] + match format_char: + case 'Y' | 'y' | 'G': + nonlocal year_in_format + year_in_format = True + case 'd': + nonlocal day_of_month_in_format + day_of_month_in_format = True + return self[format_char] + format = re_sub(r'%(O?.)', repl, format) + if day_of_month_in_format and not year_in_format: + import warnings + warnings.warn("""\ +Parsing dates involving a day of month without a year specified is ambiguious +and fails to parse leap day. The default behavior will change in Python 3.15 +to either always raise an exception or to use a different default year (TBD). +To avoid trouble, add a specific year to the input & format. +See https://github.com/python/cpython/issues/70647.""", + DeprecationWarning, + skip_file_prefixes=(os.path.dirname(__file__),)) + return format def compile(self, format): """Return a compiled re object for the format string.""" @@ -342,8 +464,6 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): tz = -1 gmtoff = None gmtoff_fraction = 0 - # Default to -1 to signify that values not known; not critical to have, - # though iso_week = week_of_year = None week_of_year_start = None # weekday and julian defaulted to None so as to signal need to calculate @@ -470,17 +590,17 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # Deal with the cases where ambiguities arise # don't assume default values for ISO week/year - if year is None and iso_year is not None: - if iso_week is None or weekday is None: - raise ValueError("ISO year directive '%G' must be used with " - "the ISO week directive '%V' and a weekday " - "directive ('%A', '%a', '%w', or '%u').") + if iso_year is not None: if julian is not None: raise ValueError("Day of the year directive '%j' is not " "compatible with ISO year directive '%G'. " "Use '%Y' instead.") - elif week_of_year is None and iso_week is not None: - if weekday is None: + elif iso_week is None or weekday is None: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive ('%A', '%a', '%w', or '%u').") + elif iso_week is not None: + if year is None or weekday is None: raise ValueError("ISO week directive '%V' must be used with " "the ISO year directive '%G' and a weekday " "directive ('%A', '%a', '%w', or '%u').") @@ -490,11 +610,12 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): "instead.") leap_year_fix = False - if year is None and month == 2 and day == 29: - year = 1904 # 1904 is first leap year of 20th century - leap_year_fix = True - elif year is None: - year = 1900 + if year is None: + if month == 2 and day == 29: + year = 1904 # 1904 is first leap year of 20th century + leap_year_fix = True + else: + year = 1900 # If we know the week of the year and what day of that week, we can figure # out the Julian day of the year. diff --git a/python/python3_12/examples/_threading_local.py b/python/python3_13/examples/_threading_local.py similarity index 100% rename from python/python3_12/examples/_threading_local.py rename to python/python3_13/examples/_threading_local.py diff --git a/python/python3_12/examples/_weakrefset.py b/python/python3_13/examples/_weakrefset.py similarity index 100% rename from python/python3_12/examples/_weakrefset.py rename to python/python3_13/examples/_weakrefset.py diff --git a/python/python3_13/pom.xml b/python/python3_13/pom.xml new file mode 100644 index 0000000000..08dd34f0cc --- /dev/null +++ b/python/python3_13/pom.xml @@ -0,0 +1,56 @@ + + 4.0.0 + Python3 + jar + Python3 grammar + + org.antlr.grammars + pythonparent + 1.0-SNAPSHOT + + + + + org.antlr + antlr4-maven-plugin + ${antlr.version} + + ${basedir} + + PythonLexer.g4 + PythonParser.g4 + + true + true + + + + + antlr4 + + + + + + com.khubla.antlr + antlr4test-maven-plugin + ${antlr4test-maven-plugin.version} + + false + false + file_input + Python + + examples/ + + + + + test + + + + + + +