generated from ianlewis/repo-template
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathparser.go
More file actions
387 lines (316 loc) · 9.37 KB
/
parser.go
File metadata and controls
387 lines (316 loc) · 9.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package lexparse
import (
"context"
"errors"
"fmt"
"io"
"strings"
)
// Node is the structure for a single node in the parse tree.
type Node[V comparable] struct {
Parent *Node[V]
Children []*Node[V]
Value V
// Start is the start position in the input where the value was found.
Start Position
}
func (n *Node[V]) String() string {
return fmtNode(n, nil)
}
func fmtNode[V comparable](node *Node[V], lastRank []bool) string {
var bldr strings.Builder
for i := range len(lastRank) - 1 {
if lastRank[i] {
bldr.WriteString(" ")
} else {
bldr.WriteString("│ ")
}
}
if len(lastRank) > 0 {
if lastRank[len(lastRank)-1] {
bldr.WriteString("└── ")
} else {
bldr.WriteString("├── ")
}
}
fmt.Fprintf(&bldr, "%v (%v)\n", node.Value, node.Start)
for i, child := range node.Children {
newLastRank := make([]bool, len(lastRank)+1)
copy(newLastRank, lastRank)
newLastRank[len(lastRank)] = i == len(node.Children)-1
bldr.WriteString(fmtNode(child, newLastRank))
}
return bldr.String()
}
// ParseState is the state of the current parsing state machine. It defines the
// logic to process the current state and returns the next state.
type ParseState[V comparable] interface {
// Run executes the logic at the current state, returning an error if one is
// encountered. Implementations are expected to add new [Node] objects to
// the AST using [Parser.Push] or [Parser.Node). As necessary, new parser
// state should be pushed onto the stack as needed using [Parser.PushState].
Run(ctx *ParserContext[V]) error
}
type parseFnState[V comparable] struct {
f func(*ParserContext[V]) error
}
// Run implements ParseState.Run.
func (s *parseFnState[V]) Run(ctx *ParserContext[V]) error {
if s.f == nil {
return nil
}
return s.f(ctx)
}
// ParseStateFn creates a State from the given Run function.
func ParseStateFn[V comparable](f func(*ParserContext[V]) error) ParseState[V] {
return &parseFnState[V]{f}
}
type stack[V comparable] []ParseState[V]
func (s *stack[V]) push(v ParseState[V]) {
*s = append(*s, v)
}
func (s *stack[V]) pop() ParseState[V] {
if len(*s) == 0 {
return nil
}
v := (*s)[len(*s)-1]
*s = (*s)[:len(*s)-1]
return v
}
// TokenSource is an interface that defines a source of tokens for the parser.
type TokenSource interface {
// NextToken returns the next token from the source. When tokens are
// exhausted, it returns a Token with Type set to TokenTypeEOF.
NextToken(ctx context.Context) *Token
}
// ParserContext provides context for the current parsing operation,
// including access to the parser state and methods to manipulate the parse
// tree.
type ParserContext[V comparable] struct {
//nolint:containedctx // Embedding context required for interface compliance.
context.Context
p *Parser[V]
}
// PushState pushes a number of new expected future states onto the state stack
// in reverse order.
func (ctx *ParserContext[V]) PushState(states ...ParseState[V]) {
ctx.p.pushState(states...)
}
// SetRoot sets the root of the parse tree to the given node. The current node
// is also set to the root node. This is useful for resetting the parser to a
// new root node.
func (ctx *ParserContext[V]) SetRoot(root *Node[V]) {
ctx.p.setRoot(root)
}
// Root returns the root of the parse tree.
func (ctx *ParserContext[V]) Root() *Node[V] {
return ctx.p.root
}
// Peek returns the next token from the lexer without consuming it.
func (ctx *ParserContext[V]) Peek() *Token {
return ctx.p.peek(ctx)
}
// Next returns the next token from the lexer. This is the new current token
// position.
func (ctx *ParserContext[V]) Next() *Token {
return ctx.p.nextToken(ctx)
}
// Pos returns the current node position in the tree.
func (ctx *ParserContext[V]) Pos() *Node[V] {
return ctx.p.node
}
// Push creates a new node, adds it as a child to the current node, updates
// the current node to the new node, and returns the new node.
func (ctx *ParserContext[V]) Push(v V) *Node[V] {
return ctx.p.push(v)
}
// Node creates a new node at the current token position and adds it as a
// child to the current node. The current node is not updated.
func (ctx *ParserContext[V]) Node(v V) *Node[V] {
return ctx.p.addNodeHere(v)
}
// NewNode creates a new node at the current token position and returns it
// without adding it to the tree.
func (ctx *ParserContext[V]) NewNode(v V) *Node[V] {
return ctx.p.newNode(v)
}
// Climb updates the current node position to the current node's parent
// returning the previous current node. It is a no-op that returns the root
// node if called on the root node. Updates the end position of the parent node
// to the end position of the current node.
func (ctx *ParserContext[V]) Climb() *Node[V] {
return ctx.p.climb()
}
// Replace replaces the current node with a new node with the given value. The
// old node is removed from the tree and its value is returned. Can be used to
// replace the root node.
//
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func (ctx *ParserContext[V]) Replace(v V) V {
return ctx.p.replace(v)
}
// NewParser creates a new Parser that reads from the tokens channel. The
// parser is initialized with a root node with an empty value.
func NewParser[V comparable](tokens TokenSource, startingState ParseState[V]) *Parser[V] {
root := &Node[V]{
Start: Position{
Offset: 0,
Line: 1,
Column: 1,
},
}
p := &Parser[V]{
stateStack: &stack[V]{},
tokens: tokens,
}
p.root = root
p.node = root
p.pushState(startingState)
return p
}
// Parser reads the tokens produced by a Lexer and builds a parse tree. It is
// implemented as a stack of states ([ParseState]) in which each state implements
// it's own processing.
//
// Parser maintains a current position in the parse tree which can be utilized
// by parser states.
type Parser[V comparable] struct {
// tokens is a the source of tokens for the parser.
tokens TokenSource
// stateStack is a stack of expected future states of the parser.
stateStack *stack[V]
// root is the root node of the parse tree.
root *Node[V]
// node is the current node under processing.
node *Node[V]
// token is the current token in the stream.
token *Token
// next is the next token in the stream.
next *Token
}
// Parse builds a parse tree by repeatedly pulling [ParseState] objects from
// the stack and running them, starting with the initial state. Parsing can be
// canceled by ctx.
//
// The caller can request that the parser stop by canceling ctx.
func (p *Parser[V]) Parse(ctx context.Context) (*Node[V], error) {
parserCtx := &ParserContext[V]{
Context: ctx,
p: p,
}
for {
state := p.stateStack.pop()
if state == nil {
break
}
select {
case <-ctx.Done():
if err := ctx.Err(); err != nil {
//nolint:wrapcheck // no additional error context for error.
return p.root, err
}
return p.root, nil
default:
}
var err error
if err = state.Run(parserCtx); err != nil {
if errors.Is(err, io.EOF) {
break
}
//nolint:wrapcheck // no additional error context for error.
return p.root, err
}
}
return p.root, nil
}
func (p *Parser[V]) pushState(states ...ParseState[V]) {
for i := len(states) - 1; i >= 0; i-- {
p.stateStack.push(states[i])
}
}
func (p *Parser[V]) setRoot(root *Node[V]) {
p.root = root
p.node = root
}
func (p *Parser[V]) peek(ctx context.Context) *Token {
if p.next != nil {
return p.next
}
p.next = p.tokens.NextToken(ctx)
return p.next
}
func (p *Parser[V]) nextToken(ctx context.Context) *Token {
l := p.peek(ctx)
p.next = nil
p.token = l
return p.token
}
func (p *Parser[V]) push(v V) *Node[V] {
p.node = p.addNodeHere(v)
return p.node
}
func (p *Parser[V]) addNodeHere(v V) *Node[V] {
n := p.newNode(v)
p.node.Children = append(p.node.Children, n)
n.Parent = p.node
return n
}
func (p *Parser[V]) newNode(v V) *Node[V] {
var start Position
if p.token != nil {
start = p.token.Start
}
return &Node[V]{
Value: v,
Start: start,
}
}
func (p *Parser[V]) climb() *Node[V] {
n := p.node
if p.node.Parent != nil {
p.node = p.node.Parent
}
return n
}
//nolint:ireturn // returning the generic interface is needed to return the previous value.
func (p *Parser[V]) replace(v V) V {
node := p.newNode(v)
// Replace the parent.
node.Parent = p.node.Parent
if node.Parent != nil {
for i := range node.Parent.Children {
if node.Parent.Children[i] == p.node {
node.Parent.Children[i] = node
break
}
}
}
// Replace children. Preserve nil, non-nil slice.
if p.node.Children != nil {
node.Children = make([]*Node[V], len(p.node.Children))
for i := range p.node.Children {
node.Children[i] = p.node.Children[i]
node.Children[i].Parent = node
}
}
// If we are currently at the root, replace the root reference as well.
if p.node == p.root {
p.root = node
}
oldVal := p.node.Value
p.node = node
return oldVal
}