Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to Python 3.13.2 #4409

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/python2_7_18/CSharp/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[assembly: CLSCompliant(true)]

24 changes: 11 additions & 13 deletions python/python2_7_18/CSharp/PythonLexerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ THE SOFTWARE.
public abstract class PythonLexerBase : Lexer
{
// A stack that keeps track of the indentation lengths
private Stack<int> indentLengthStack;
private Stack<int> indentLengthStack = new();
// A list where tokens are waiting to be loaded into the token stream
private LinkedList<IToken> pendingTokens;
private LinkedList<IToken> pendingTokens = new();

// last pending token types
private int previousPendingTokenType;
private int lastPendingTokenTypeFromDefaultChannel;
Expand All @@ -49,26 +49,24 @@ public abstract class PythonLexerBase : Lexer
private bool wasTabIndentation;
private bool wasIndentationMixedWithSpacesAndTabs;

private IToken curToken; // current (under processing) token
private IToken ffgToken; // following (look ahead) token
private IToken curToken = null!; // current (under processing) token
private IToken ffgToken = null!; // following (look ahead) token

private const int INVALID_LENGTH = -1;
private const string ERR_TXT = " ERROR: ";

protected PythonLexerBase(ICharStream input) : base(input)
{
this.Init();
}

protected PythonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput)
{
this.Init();
}

public override IToken NextToken() // reading the input stream until a return EOF
{
this.CheckNextToken();
IToken firstPendingToken = this.pendingTokens.First.Value;
IToken firstPendingToken = this.pendingTokens.First!.Value;
this.pendingTokens.RemoveFirst();
return firstPendingToken; // add the queued token to the token stream
}
Expand All @@ -78,11 +76,11 @@ public override void Reset()
this.Init();
base.Reset();
}

private void Init()
{
this.indentLengthStack = new Stack<int>();
this.pendingTokens = new LinkedList<IToken>();
this.indentLengthStack = new();
this.pendingTokens = new();
this.previousPendingTokenType = 0;
this.lastPendingTokenTypeFromDefaultChannel = 0;
this.opened = 0;
Expand Down Expand Up @@ -180,7 +178,7 @@ private void InsertLeadingIndentToken()
{
if (this.previousPendingTokenType == PythonLexer.WS)
{
var prevToken = this.pendingTokens.Last.Value;
var prevToken = this.pendingTokens.Last!.Value;
if (this.GetIndentationLength(prevToken.Text) != 0) // there is an "indentation" before the first statement
{
const string errMsg = "first statement indented";
Expand Down Expand Up @@ -302,7 +300,7 @@ private void HideAndAddPendingToken(IToken tkn)
this.AddPendingToken(ctkn);
}

private void CreateAndAddPendingToken(int ttype, int channel, string text, IToken sampleToken)
private void CreateAndAddPendingToken(int ttype, int channel, string? text, IToken sampleToken)
{
CommonToken ctkn = new CommonToken(sampleToken);
ctkn.Type = ttype;
Expand Down
5 changes: 0 additions & 5 deletions python/python2_7_18/Python3/README.md

This file was deleted.

29 changes: 0 additions & 29 deletions python/python2_7_18/Python3/transformGrammar.py

This file was deleted.

115 changes: 62 additions & 53 deletions python/python2_7_18/PythonLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -28,46 +28,17 @@ THE SOFTWARE.
*/

lexer grammar PythonLexer;

options { superClass=PythonLexerBase; }
tokens { INDENT, DEDENT } // https://docs.python.org/2.7/reference/lexical_analysis.html#indentation

tokens {
INDENT, DEDENT // https://docs.python.org/2.7/reference/lexical_analysis.html#indentation
}

/*
* lexer rules // https://docs.python.org/2.7/library/tokenize.html
*/

// https://docs.python.org/2.7/reference/lexical_analysis.html#keywords
AND : 'and';
AS : 'as';
ASSERT : 'assert';
BREAK : 'break';
CLASS : 'class';
CONTINUE : 'continue';
DEF : 'def';
DEL : 'del';
ELIF : 'elif';
ELSE : 'else';
EXCEPT : 'except';
EXEC : 'exec';
FINALLY : 'finally';
FOR : 'for';
FROM : 'from';
GLOBAL : 'global';
IF : 'if';
IMPORT : 'import';
IN : 'in';
IS : 'is';
LAMBDA : 'lambda';
NOT : 'not';
OR : 'or';
PASS : 'pass';
PRINT : 'print';
RAISE : 'raise';
RETURN : 'return';
TRY : 'try';
WHILE : 'while';
WITH : 'with';
YIELD : 'yield';

// https://docs.python.org/2.7/library/token.html#token.OP
LPAR : '('; // OPEN_PAREN
LSQB : '['; // OPEN_BRACK
Expand Down Expand Up @@ -115,6 +86,38 @@ DOUBLESLASH : '//';
DOUBLESLASHEQUAL : '//=';
AT : '@';

// https://docs.python.org/2.7/reference/lexical_analysis.html#keywords
AND : 'and';
AS : 'as';
ASSERT : 'assert';
BREAK : 'break';
CLASS : 'class';
CONTINUE : 'continue';
DEF : 'def';
DEL : 'del';
ELIF : 'elif';
ELSE : 'else';
EXCEPT : 'except';
EXEC : 'exec';
FINALLY : 'finally';
FOR : 'for';
FROM : 'from';
GLOBAL : 'global';
IF : 'if';
IMPORT : 'import';
IN : 'in';
IS : 'is';
LAMBDA : 'lambda';
NOT : 'not';
OR : 'or';
PASS : 'pass';
PRINT : 'print';
RAISE : 'raise';
RETURN : 'return';
TRY : 'try';
WHILE : 'while';
WITH : 'with';
YIELD : 'yield';

// https://docs.python.org/2.7/reference/lexical_analysis.html#identifiers
NAME : IDENTIFIER;
Expand All @@ -134,15 +137,16 @@ STRING : STRING_LITERAL;
NEWLINE : '\r'? '\n'; // Unix, Windows

// https://docs.python.org/2.7/reference/lexical_analysis.html#comments
COMMENT : '#' ~[\r\n]* -> channel(HIDDEN);
COMMENT : '#' ~[\r\n]* -> channel(HIDDEN);

// https://docs.python.org/2.7/reference/lexical_analysis.html#whitespace-between-tokens
WS : [ \t\f]+ -> channel(HIDDEN);
WS : [ \t\f]+ -> channel(HIDDEN);

// https://docs.python.org/2.7/reference/lexical_analysis.html#explicit-line-joining
EXPLICIT_LINE_JOINING : '\\' NEWLINE -> channel(HIDDEN);
EXPLICIT_LINE_JOINING : BACKSLASH_NEWLINE -> channel(HIDDEN);

ERRORTOKEN : . ; // catch unrecognized characters and redirect these errors to the parser
// catch the unrecognized character(s)
ERRORTOKEN : . ; // PythonLexerBase class will report an error about this (the ERRORTOKEN will also cause an error in the parser)


/*
Expand All @@ -153,30 +157,35 @@ ERRORTOKEN : . ; // catch unrecognized characters and redirect these errors to t

// https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
fragment STRING_LITERAL : STRING_PREFIX? (SHORT_STRING | LONG_STRING);
fragment STRING_PREFIX : 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' | 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR';

// 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' | 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR';
fragment STRING_PREFIX options { caseInsensitive=true; } : 'r' | 'u' | 'ur' | 'b' | 'br';

fragment SHORT_STRING
: '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\''
| '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"'
;
: ['] SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* [']
| ["] SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* ["]
;

fragment LONG_STRING
: '\'\'\'' LONG_STRING_ITEM*? '\'\'\''
| '"""' LONG_STRING_ITEM*? '"""'
;
: ['][']['] LONG__STRING_ITEM*? ['][']['] // nongreede
| ["]["]["] LONG__STRING_ITEM*? ["]["]["] // nongreede
;

fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE : SHORT_STRING_CHAR_NO_SINGLE_QUOTE | ESCAPE_SEQ;
fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE | ESCAPE_SEQ;

fragment LONG_STRING_ITEM : LONG_STRING_CHAR | ESCAPE_SEQ;
fragment LONG__STRING_ITEM : LONG_STRING_CHAR | ESCAPE_SEQ;

fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // <any source character except "\" or newline or single quote>
fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // <any source character except "\" or newline or double quote>
fragment LONG_STRING_CHAR : ~'\\'; // <any source character except "\">
fragment ESCAPE_SEQ // https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
: '\\' '\r' '\n' // for the two-character Windows line break: \<newline> escape sequence (string literal line continuation)
| '\\' [\u0000-\u007F] // "\" <any ASCII character>
;
fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // <any source character except "\" or newline or single quote>
fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // <any source character except "\" or newline or double quote>
fragment LONG_STRING_CHAR : ~'\\'; // <any source character except "\">

// https://docs.python.org/2.7/reference/lexical_analysis.html#string-literals
fragment ESCAPE_SEQ : ESCAPE_SEQ_NEWLINE | '\\' [\u0000-\u007F]; // "\" <any ASCII character>

fragment ESCAPE_SEQ_NEWLINE : BACKSLASH_NEWLINE; // it is a kind of line continuation for string literals (backslash and newline will be ignored)

fragment BACKSLASH_NEWLINE : '\\' NEWLINE;

// https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
fragment LONG_INTEGER : INTEGER ('l' | 'L');
Expand Down
5 changes: 4 additions & 1 deletion python/python2_7_18/PythonParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ THE SOFTWARE.
* Developed by : Robert Einhorn
*/

parser grammar PythonParser; // https://docs.python.org/2.7/reference/grammar.html
// https://docs.python.org/2.7/reference/grammar.html
parser grammar PythonParser;

options { tokenVocab=PythonLexer; }

// ANTLR4 grammar for Python

// Start symbols for the grammar:
Expand Down
11 changes: 6 additions & 5 deletions python/python2_7_18/README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Python 2.7.18 parser

### About files:
- PythonParser.g4
- PythonParser.g4
is the ANTLR4 parser grammar that based on the last official [Python 2 grammar](https://docs.python.org/2.7/reference/grammar.html)

- PythonLexerBase
handles the Python indentations

- Example files: [Python 2.7.18 Standard Lib](https://www.python.org/downloads/release/python-2718/)
- PythonLexerBase:
- handles the Python indentations
- and manage many other things

- Example files from: [Python 2.7.18 Standard Lib](https://www.python.org/downloads/release/python-2718/)

### Related link:
[ANTLR4-parser-for-Python-2.7.18](https://github.com/RobEin/ANTLR4-parser-for-Python-2.7.18)
3 changes: 3 additions & 0 deletions python/python2_7_18/changes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Sept. 05, 2024
- Line continuation for string literals (backslash followed by a newline) is no longer resolved.
(backslash+newline is no longer removed from string literals)
4 changes: 0 additions & 4 deletions python/python2_7_18/changes.txt

This file was deleted.

Empty file.
10 changes: 0 additions & 10 deletions python/python2_7_18/tests/test_error_first_statement_indented.py

This file was deleted.

10 changes: 0 additions & 10 deletions python/python2_7_18/tests/test_error_inconsistent_dedent.py

This file was deleted.

8 changes: 0 additions & 8 deletions python/python2_7_18/tests/test_error_not_indented.py

This file was deleted.

This file was deleted.

9 changes: 0 additions & 9 deletions python/python2_7_18/tests/test_error_unexpected_indent.py

This file was deleted.

9 changes: 0 additions & 9 deletions python/python2_7_18/tests/test_explicit_line_joining.py

This file was deleted.

6 changes: 0 additions & 6 deletions python/python2_7_18/tests/test_formfeed_as_separator.py

This file was deleted.

6 changes: 0 additions & 6 deletions python/python2_7_18/tests/test_formfeed_at_start_of_line.py

This file was deleted.

Loading
Loading