From b4f673a4434840afd3258a36cba5f5a696734f97 Mon Sep 17 00:00:00 2001 From: Maciej Kaczkowski Date: Mon, 21 Oct 2024 19:21:18 +0200 Subject: [PATCH] FEAT: lexing and parsing hash literals; 190/210 @ interpreter pdf; 13/284 @ compiler pdf --- monkey/README.md | 2 +- monkey/interpreter/ast/ast.go | 22 ++++++ monkey/interpreter/lexer/lexer.go | 6 ++ monkey/interpreter/lexer/lexer_test.go | 6 ++ monkey/interpreter/parser/parser.go | 42 ++++++++++-- monkey/interpreter/parser/parser_test.go | 85 ++++++++++++++++++++++++ monkey/interpreter/token/token.go | 1 + 7 files changed, 159 insertions(+), 5 deletions(-) diff --git a/monkey/README.md b/monkey/README.md index 8308033..740bab0 100644 --- a/monkey/README.md +++ b/monkey/README.md @@ -13,7 +13,7 @@ Repo for basic tutorial-based Golang study ## adding new type checklist * [ ] add new token type (in `token/token.go`) to convert stream of characters into stream of tokens * [ ] define token type - * [ ] add branch in `NextToken()` function, calling new function + * [ ] add branch in `NextToken()` (in `lexer/lexer.go`) function, calling new function * [ ] add function to actually convert characters into tokens of given type * [ ] add parsing logic to convert stream of tokens into AST (Abstract Syntax Tree) * [ ] define node (in `ast/ast.go`) diff --git a/monkey/interpreter/ast/ast.go b/monkey/interpreter/ast/ast.go index 33b70f8..7b92696 100644 --- a/monkey/interpreter/ast/ast.go +++ b/monkey/interpreter/ast/ast.go @@ -321,3 +321,25 @@ func (ce *CallExpression) String() string { return out.String() } + +type HashLiteral struct { + Token token.Token // '{' token + Pairs map[Expression]Expression +} + +func (hl *HashLiteral) expressionNode() {} +func (hl *HashLiteral) TokenLiteral() string { return hl.Token.Literal } +func (hl *HashLiteral) String() string { + var out bytes.Buffer + + pairs := []string{} + for key, value := range hl.Pairs { + pairs = append(pairs, key.String()+":"+value.String()) + } + + out.WriteString("{") + out.WriteString(strings.Join(pairs, ", ")) + out.WriteString("}") + + return out.String() +} diff --git a/monkey/interpreter/lexer/lexer.go b/monkey/interpreter/lexer/lexer.go index 51cbf08..964f8fd 100644 --- a/monkey/interpreter/lexer/lexer.go +++ b/monkey/interpreter/lexer/lexer.go @@ -68,6 +68,12 @@ func (l *Lexer) NextToken() token.Token { tok = newToken(token.LBRACKET, l.ch) case ']': tok = newToken(token.RBRACKET, l.ch) + // remember that this character: ' is different + // from this character: " in Go + // ' means a single character (rune) + // " means a string + case ':': + tok = newToken(token.COLON, l.ch) case 0: tok.Literal = "" tok.Type = token.EOF diff --git a/monkey/interpreter/lexer/lexer_test.go b/monkey/interpreter/lexer/lexer_test.go index 0548c91..d7e20f1 100644 --- a/monkey/interpreter/lexer/lexer_test.go +++ b/monkey/interpreter/lexer/lexer_test.go @@ -65,6 +65,7 @@ if (5 < 10) { "foobar" "foo bar" [1, 2]; +{"foo": "bar"} ` tests := []struct { @@ -152,6 +153,11 @@ if (5 < 10) { {token.INT, "2"}, {token.RBRACKET, "]"}, {token.SEMICOLON, ";"}, + {token.LBRACE, "{"}, + {token.STRING, "foo"}, + {token.COLON, ":"}, + {token.STRING, "bar"}, + {token.RBRACE, "}"}, {token.EOF, ""}, } diff --git a/monkey/interpreter/parser/parser.go b/monkey/interpreter/parser/parser.go index fd3b678..5ca1ee9 100644 --- a/monkey/interpreter/parser/parser.go +++ b/monkey/interpreter/parser/parser.go @@ -66,9 +66,12 @@ func New(l *lexer.Lexer) *Parser { p.registerPrefix(token.FALSE, p.parseBoolean) p.registerPrefix(token.LPAREN, p.parseGroupedExpression) p.registerPrefix(token.IF, p.parseIfExpression) - p.registerPrefix(token.FUNCTION, p.parseFuncionLiteral) + p.registerPrefix(token.FUNCTION, p.parseFunctionLiteral) p.registerPrefix(token.STRING, p.parseStringLiteral) + // arrays and hashes are treated similarly - opening '[' or '{' + // is parsed as "prefix" to expression and then the rest is parsed p.registerPrefix(token.LBRACKET, p.parseArrayLiteral) + p.registerPrefix(token.LBRACE, p.parseHashLiteral) // Deal with infix expressions p.infixParseFns = make(map[token.TokenType]infixParseFn) @@ -343,14 +346,14 @@ func (p *Parser) parseStringLiteral() ast.Expression { // string is expression, return &ast.StringLiteral{Token: p.curToken, Value: p.curToken.Literal} } -func (p *Parser) parseFuncionLiteral() ast.Expression { +func (p *Parser) parseFunctionLiteral() ast.Expression { lit := &ast.FunctionLiteral{Token: p.curToken} if !p.expectPeek(token.LPAREN) { return nil } - lit.Parameters = p.parseFuncionParameters() + lit.Parameters = p.parseFunctionParameters() if !p.expectPeek(token.LBRACE) { return nil @@ -385,7 +388,7 @@ func (p *Parser) parseExpressionList(end token.TokenType) []ast.Expression { return list } -func (p *Parser) parseFuncionParameters() []*ast.Identifier { +func (p *Parser) parseFunctionParameters() []*ast.Identifier { identifiers := []*ast.Identifier{} if p.peekTokenIs(token.RPAREN) { @@ -413,6 +416,37 @@ func (p *Parser) parseFuncionParameters() []*ast.Identifier { return identifiers } +func (p *Parser) parseHashLiteral() ast.Expression { + hash := &ast.HashLiteral{Token: p.curToken} + hash.Pairs = make(map[ast.Expression]ast.Expression) + + for !p.peekTokenIs(token.RBRACE) { + p.nextToken() // first element from pair - "key" + key := p.parseExpression(LOWEST) + + if !p.expectPeek(token.COLON) { + return nil + } + + p.nextToken() // second element from pair - "value" + value := p.parseExpression(LOWEST) + + hash.Pairs[key] = value + + // next token should be either a comma or a closing brace + // or the hash is malformed + if !p.peekTokenIs(token.RBRACE) && !p.expectPeek(token.COMMA) { + return nil + } + } + + if !p.expectPeek(token.RBRACE) { + return nil + } + + return hash +} + func (p *Parser) parseCallExpression(function ast.Expression) ast.Expression { exp := &ast.CallExpression{Token: p.curToken, Function: function} exp.Arguments = p.parseExpressionList(token.RPAREN) diff --git a/monkey/interpreter/parser/parser_test.go b/monkey/interpreter/parser/parser_test.go index 0f5ab2c..2b4074c 100644 --- a/monkey/interpreter/parser/parser_test.go +++ b/monkey/interpreter/parser/parser_test.go @@ -749,6 +749,91 @@ func TestStringLiteralExpression(t *testing.T) { } } +func TestParsingHashLiteralsStringKeys(t *testing.T) { + input := `{"one": 1, "two": 2, "three": 3}` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + stmt := program.Statements[0].(*ast.ExpressionStatement) + hash, ok := stmt.Expression.(*ast.HashLiteral) + if !ok { + t.Fatalf("exp is not ast.HashLiteral. got=%T", stmt.Expression) + } + if len(hash.Pairs) != 3 { + t.Errorf("hash.Pairs has wrong length. got=%d", len(hash.Pairs)) + } + expected := map[string]int64{ + "one": 1, + "two": 2, + "three": 3, + } + for key, value := range hash.Pairs { + literal, ok := key.(*ast.StringLiteral) + if !ok { + t.Errorf("key is not ast.StringLiteral. got=%T", key) + } + expectedValue := expected[literal.String()] + testIntegerLiteral(t, value, expectedValue) + } +} + +func TestParsingEmptyHashLiteral(t *testing.T) { + input := "{}" + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + stmt := program.Statements[0].(*ast.ExpressionStatement) + hash, ok := stmt.Expression.(*ast.HashLiteral) + if !ok { + t.Fatalf("exp is not ast.HashLiteral. got=%T", stmt.Expression) + } + if len(hash.Pairs) != 0 { + t.Errorf("hash.Pairs has wrong length. got=%d", len(hash.Pairs)) + } +} + +func TestParsingHashLiteralsWithExpressions(t *testing.T) { + input := `{"one": 0 + 1, "two": 10 - 8, "three": 15 / 5}` + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + stmt := program.Statements[0].(*ast.ExpressionStatement) + hash, ok := stmt.Expression.(*ast.HashLiteral) + if !ok { + t.Fatalf("exp is not ast.HashLiteral. got=%T", stmt.Expression) + } + if len(hash.Pairs) != 3 { + t.Errorf("hash.Pairs has wrong length. got=%d", len(hash.Pairs)) + } + tests := map[string]func(ast.Expression){ + "one": func(e ast.Expression) { + testInfixExpression(t, e, 0, "+", 1) + }, + "two": func(e ast.Expression) { + testInfixExpression(t, e, 10, "-", 8) + }, + "three": func(e ast.Expression) { + testInfixExpression(t, e, 15, "/", 5) + }, + } + for key, value := range hash.Pairs { + literal, ok := key.(*ast.StringLiteral) + if !ok { + t.Errorf("key is not ast.StringLiteral. got=%T", key) + continue + } + testFunc, ok := tests[literal.String()] + if !ok { + t.Errorf("No test function for key %q found", literal.String()) + continue + } + testFunc(value) + } +} + func testLetStatement(t *testing.T, s ast.Statement, name string) bool { if s.TokenLiteral() != "let" { t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral()) diff --git a/monkey/interpreter/token/token.go b/monkey/interpreter/token/token.go index e496acb..f1529fe 100644 --- a/monkey/interpreter/token/token.go +++ b/monkey/interpreter/token/token.go @@ -33,6 +33,7 @@ const ( // Delimiters COMMA = "," SEMICOLON = ";" + COLON = ":" LPAREN = "(" RPAREN = ")"