Skip to content

Commit

Permalink
update to Python 3.13.2
Browse files Browse the repository at this point in the history
  • Loading branch information
RobEin committed Feb 10, 2025
1 parent 14fc51d commit 556d232
Show file tree
Hide file tree
Showing 111 changed files with 4,984 additions and 12,976 deletions.
2 changes: 2 additions & 0 deletions python/python2_7_18/CSharp/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[assembly: CLSCompliant(true)]

24 changes: 11 additions & 13 deletions python/python2_7_18/CSharp/PythonLexerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ THE SOFTWARE.
public abstract class PythonLexerBase : Lexer
{
// A stack that keeps track of the indentation lengths
private Stack<int> indentLengthStack;
private Stack<int> indentLengthStack = new();
// A list where tokens are waiting to be loaded into the token stream
private LinkedList<IToken> pendingTokens;
private LinkedList<IToken> pendingTokens = new();

// last pending token types
private int previousPendingTokenType;
private int lastPendingTokenTypeFromDefaultChannel;
Expand All @@ -49,26 +49,24 @@ public abstract class PythonLexerBase : Lexer
private bool wasTabIndentation;
private bool wasIndentationMixedWithSpacesAndTabs;

private IToken curToken; // current (under processing) token
private IToken ffgToken; // following (look ahead) token
private IToken curToken = null!; // current (under processing) token
private IToken ffgToken = null!; // following (look ahead) token

private const int INVALID_LENGTH = -1;
private const string ERR_TXT = " ERROR: ";

protected PythonLexerBase(ICharStream input) : base(input)
{
this.Init();
}

protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput)
{
this.Init();
}

public override IToken NextToken() // reading the input stream until a return EOF
{
this.CheckNextToken();
IToken firstPendingToken = this.pendingTokens.First.Value;
IToken firstPendingToken = this.pendingTokens.First!.Value;
this.pendingTokens.RemoveFirst();
return firstPendingToken; // add the queued token to the token stream
}
Expand All @@ -78,11 +76,11 @@ public override void Reset()
this.Init();
base.Reset();
}

private void Init()
{
this.indentLengthStack = new Stack<int>();
this.pendingTokens = new LinkedList<IToken>();
this.indentLengthStack = new();
this.pendingTokens = new();
this.previousPendingTokenType = 0;
this.lastPendingTokenTypeFromDefaultChannel = 0;
this.opened = 0;
Expand Down Expand Up @@ -180,7 +178,7 @@ private void InsertLeadingIndentToken()
{
if (this.previousPendingTokenType == PythonLexer.WS)
{
var prevToken = this.pendingTokens.Last.Value;
var prevToken = this.pendingTokens.Last!.Value;
if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement
{
const string errMsg = "first statement indented";
Expand Down Expand Up @@ -302,7 +300,7 @@ private void HideAndAddPendingToken(IToken tkn)
this.AddPendingToken(ctkn);
}

private void CreateAndAddPendingToken(int ttype, int channel, string text, IToken sampleToken)
private void CreateAndAddPendingToken(int ttype, int channel, string? text, IToken sampleToken)
{
CommonToken ctkn = new CommonToken(sampleToken);
ctkn.Type = ttype;
Expand Down
5 changes: 0 additions & 5 deletions python/python2_7_18/Python3/README.md

This file was deleted.

29 changes: 0 additions & 29 deletions python/python2_7_18/Python3/transformGrammar.py

This file was deleted.

115 changes: 62 additions & 53 deletions python/python2_7_18/PythonLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -28,46 +28,17 @@ THE SOFTWARE.
*/

lexer grammar PythonLexer;

options { superClass=PythonLexerBase; }
tokens { INDENT, DEDENT } // https://docs.python.org/2.7/reference/lexical_analysis.html#indentation

tokens {
INDENT, DEDENT // https://docs.python.org/2.7/reference/lexical_analysis.html#indentation
}

/*
* lexer rules // https://docs.python.org/2.7/library/tokenize.html
*/

// https://docs.python.org/2.7/reference/lexical_analysis.html#keywords
AND : 'and';
AS : 'as';
ASSERT : 'assert';
BREAK : 'break';
CLASS : 'class';
CONTINUE : 'continue';
DEF : 'def';
DEL : 'del';
ELIF : 'elif';
ELSE : 'else';
EXCEPT : 'except';
EXEC : 'exec';
FINALLY : 'finally';
FOR : 'for';
FROM : 'from';
GLOBAL : 'global';
IF : 'if';
IMPORT : 'import';
IN : 'in';
IS : 'is';
LAMBDA : 'lambda';
NOT : 'not';
OR : 'or';
PASS : 'pass';
PRINT : 'print';
RAISE : 'raise';
RETURN : 'return';
TRY : 'try';
WHILE : 'while';
WITH : 'with';
YIELD : 'yield';

// https://docs.python.org/2.7/library/token.html#token.OP
LPAR : '('; // OPEN_PAREN
LSQB : '['; // OPEN_BRACK
Expand Down Expand Up @@ -115,6 +86,38 @@ DOUBLESLASH : '//';
DOUBLESLASHEQUAL : '//=';
AT : '@';

// https://docs.python.org/2.7/reference/lexical_analysis.html#keywords
AND : 'and';
AS : 'as';
ASSERT : 'assert';
BREAK : 'break';
CLASS : 'class';
CONTINUE : 'continue';
DEF : 'def';
DEL : 'del';
ELIF : 'elif';
ELSE : 'else';
EXCEPT : 'except';
EXEC : 'exec';
FINALLY : 'finally';
FOR : 'for';
FROM : 'from';
GLOBAL : 'global';
IF : 'if';
IMPORT : 'import';
IN : 'in';
IS : 'is';
LAMBDA : 'lambda';
NOT : 'not';
OR : 'or';
PASS : 'pass';
PRINT : 'print';
RAISE : 'raise';
RETURN : 'return';
TRY : 'try';
WHILE : 'while';
WITH : 'with';
YIELD : 'yield';

// https://docs.python.org/2.7/reference/lexical_analysis.html#identifiers
NAME : IDENTIFIER;
Expand All @@ -134,15 +137,16 @@ STRING : STRING_LITERAL;
NEWLINE : '\r'? '\n'; // Unix, Windows

// https://docs.python.org/2.7/reference/lexical_analysis.html#comments
COMMENT : '#' ~[\r\n]* -> channel(HIDDEN);
COMMENT : '#' ~[\r\n]* -> channel(HIDDEN);

// https://docs.python.org/2.7/reference/lexical_analysis.html#whitespace-between-tokens
WS : [ \t\f]+ -> channel(HIDDEN);
WS : [ \t\f]+ -> channel(HIDDEN);

// https://docs.python.org/2.7/reference/lexical_analysis.html#explicit-line-joining
EXPLICIT_LINE_JOINING : '\\' NEWLINE -> channel(HIDDEN);
EXPLICIT_LINE_JOINING : BACKSLASH_NEWLINE -> channel(HIDDEN);

ERRORTOKEN : . ; // catch unrecognized characters and redirect these errors to the parser
// catch the unrecognized character(s)
ERRORTOKEN : . ; // PythonLexerBase class will report an error about this (the ERRORTOKEN will also cause an error in the parser)


/*
Expand All @@ -153,30 +157,35 @@ ERRORTOKEN : . ; // catch unrecognized characters and redirect these errors to t

// https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
fragment STRING_LITERAL : STRING_PREFIX? (SHORT_STRING | LONG_STRING);
fragment STRING_PREFIX : 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' | 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR';

// 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' | 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR';
fragment STRING_PREFIX options { caseInsensitive=true; } : 'r' | 'u' | 'ur' | 'b' | 'br';

fragment SHORT_STRING
: '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\''
| '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"'
;
: ['] SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* [']
| ["] SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* ["]
;

fragment LONG_STRING
: '\'\'\'' LONG_STRING_ITEM*? '\'\'\''
| '"""' LONG_STRING_ITEM*? '"""'
;
: ['][']['] LONG__STRING_ITEM*? ['][']['] // nongreede
| ["]["]["] LONG__STRING_ITEM*? ["]["]["] // nongreede
;

fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE : SHORT_STRING_CHAR_NO_SINGLE_QUOTE | ESCAPE_SEQ;
fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE | ESCAPE_SEQ;

fragment LONG_STRING_ITEM : LONG_STRING_CHAR | ESCAPE_SEQ;
fragment LONG__STRING_ITEM : LONG_STRING_CHAR | ESCAPE_SEQ;

fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // <any source character except "\" or newline or single quote>
fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // <any source character except "\" or newline or double quote>
fragment LONG_STRING_CHAR : ~'\\'; // <any source character except "\">
fragment ESCAPE_SEQ // https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
: '\\' '\r' '\n' // for the two-character Windows line break: \<newline> escape sequence (string literal line continuation)
| '\\' [\u0000-\u007F] // "\" <any ASCII character>
;
fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // <any source character except "\" or newline or single quote>
fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // <any source character except "\" or newline or double quote>
fragment LONG_STRING_CHAR : ~'\\'; // <any source character except "\">
// https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
fragment ESCAPE_SEQ : ESCAPE_SEQ_NEWLINE | '\\' [\u0000-\u007F]; // "\" <any ASCII character>
fragment ESCAPE_SEQ_NEWLINE : BACKSLASH_NEWLINE; // it is a kind of line continuation for string literals (backslash and newline will be ignored)
fragment BACKSLASH_NEWLINE : '\\' NEWLINE;
// https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
fragment LONG_INTEGER : INTEGER ('l' | 'L');
Expand Down
5 changes: 4 additions & 1 deletion python/python2_7_18/PythonParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ THE SOFTWARE.
* Developed by : Robert Einhorn
*/

parser grammar PythonParser; // https://docs.python.org/2.7/reference/grammar.html
// https://docs.python.org/2.7/reference/grammar.html
parser grammar PythonParser;

options { tokenVocab=PythonLexer; }

// ANTLR4 grammar for Python

// Start symbols for the grammar:
Expand Down
11 changes: 6 additions & 5 deletions python/python2_7_18/README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Python 2.7.18 parser

### About files:
- PythonParser.g4
- PythonParser.g4
is the ANTLR4 parser grammar that based on the last official [Python 2 grammar](https://docs.python.org/2.7/reference/grammar.html)

- PythonLexerBase
handles the Python indentations

- Example files: [Python 2.7.18 Standard Lib](https://www.python.org/downloads/release/python-2718/)
- PythonLexerBase:
- handles the Python indentations
- and manage many other things

- Example files from: [Python 2.7.18 Standard Lib](https://www.python.org/downloads/release/python-2718/)

### Related link:
[ANTLR4-parser-for-Python-2.7.18](https://github.com/RobEin/ANTLR4-parser-for-Python-2.7.18)
3 changes: 3 additions & 0 deletions python/python2_7_18/changes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Sept. 05, 2024
- Line continuation for string literals (backslash followed by a newline) is no longer resolved.
(backslash+newline is no longer removed from string literals)
4 changes: 0 additions & 4 deletions python/python2_7_18/changes.txt

This file was deleted.

Empty file.
10 changes: 0 additions & 10 deletions python/python2_7_18/tests/test_error_first_statement_indented.py

This file was deleted.

10 changes: 0 additions & 10 deletions python/python2_7_18/tests/test_error_inconsistent_dedent.py

This file was deleted.

8 changes: 0 additions & 8 deletions python/python2_7_18/tests/test_error_not_indented.py

This file was deleted.

This file was deleted.

9 changes: 0 additions & 9 deletions python/python2_7_18/tests/test_error_unexpected_indent.py

This file was deleted.

9 changes: 0 additions & 9 deletions python/python2_7_18/tests/test_explicit_line_joining.py

This file was deleted.

6 changes: 0 additions & 6 deletions python/python2_7_18/tests/test_formfeed_as_separator.py

This file was deleted.

6 changes: 0 additions & 6 deletions python/python2_7_18/tests/test_formfeed_at_start_of_line.py

This file was deleted.

Loading

0 comments on commit 556d232

Please sign in to comment.