Appendix A. Earley Parser
This is the Earley parsing algorithm described in SPPF-Style Parsing from Earley Recognisers by Elizabeth Scott. This algorithm forms the basis of the parser in CoffeeGrinder.
The input is a grammar Γ = (N, T, S, P) and a string a₁a₂…aₙ
1 |EARLEYPARSER {
|E₀,…,Eₙ, R, Q′, V=∅
||
for all (S ::= α) ∈ P {
5 |if α ∈ ΣN add (S ::= ·α,0, null) to E₀
|if α = a₁α′ add (S ::= ·α,0, null) to Q′
|}
||
for 0 ≤ i ≤ n {
10 |H=∅, R=Eᵢ, Q=Q′
|Q′=∅
||
while R ≠ ∅ {
|remove an element, Λ say, from R
15 |if Λ = (B ::= α·Cβ, h, w) {
|for all (C ::= δ) ∈ P {
|if δ ∈ ΣN and (C ::= ·δ, i, null) ∉ Eᵢ {
|add (C ::= ·δ, i, null) to Eᵢ and R
|}
20 |if δ = aᵢ₊₁δ′ {
|add (C ::= ·δ, i, null) to Q
|}
|}
|if ((C, v) ∈ H) {
25 |let y = MAKE_NODE(B ::= αC·β, h, i, w, v, V)
|if β ∈ ΣN and (B ::= αC·β, h, y) ∉ Eᵢ {
|add (B ::= αC·β, h, y) to Eᵢ and R
|}
|if β = aᵢ₊₁β′ {
30 |add (B ::= αC·β, h, y) to Q
|}
|}
|}
|35 |
if Λ = (D ::= α·, h, w) {
|if w = null {
|if there is no node v ∈ V labelled (D, i, i) create one
|set w=v
|if w does not have family (ϵ) add one
40 |}
|if h = i {
|add (D, w) to H
|}
|for all (A ::= τ·Dδ, k, z) in Eₕ {
45 |let y = MAKE_NODE(A ::= τD·δ, k, i, z, w, V)
|if δ ∈ ΣN and (A ::= τD·δ, k, y) ∉ Eᵢ {
|add (A ::= τD·δ, k, y) to Eᵢ and R
|}
|if δ = aᵢ₊₁δ′ {
50 |add (A ::= τD·δ, k, y) to Q
|}
|}
|}
|}
55 ||
V=∅
|create an SPPF node v labelled (aᵢ₊₁, i, i+1)
||
while Q ≠ ∅ {
60 |remove an element, Λ = (B ::= α·ai+1β, h, w) say, from Q
|let y = MAKE_NODE(B ::= αai+1·β, h, i+1, w, v, V)
|if β ∈ ΣN {
|add (B ::= αaᵢ₊₁·β, h, y) to Eᵢ₊₁
|}
65 |if β = aᵢ₊₂β′ {
|add (B ::= αaᵢ₊₁·β, h, y) to Q′
|}
|}
|}
70 ||
if (S ::= τ·, 0, w) ∈ Eₙ return w
|else return failure
|}
|75 |
MAKE_NODE(B ::= αx·β, j, i, w, v,V) {
|if β=ϵ {
|let s =B
|} else {
|let s = (B::=αx·β)
80 |}
||
if α=ϵ and β≠ϵ {
|let y=v
|} else {
85 |if there is no node y ∈ V labelled (s, j, i) create one and add it to V
|if w=null and y does not have a family of children (v) add one
|if w≠null and y does not have a family of children (w, v) add one
|}
|return y
90 |}