Grammar
Grammar : Rule+;
Rule : IDENTIFIER ':' OrderedChoice ';';
OrderedChoice : Sequence ('|' Sequence)*;
Sequence : Repetition (/\s+/ Repetition)*;
Repetition : Assertion /[*+?]/?;
Assertion : /[&!]/? Atomic;
Atomic : STRING | REGEX | IDENTIFIER | '(' OrderedChoice ')';
Metadata
public enum ExpressionType
{
OrderedChoice,
Sequence,
Repetition,
Assertion,
Literal,
Identifier
}
public interface IParsingExpression
{
public ExpressionType Type { get; }
}
public record OrderedChoice : IParsingExpression
{
public OrderedChoice(params IParsingExpression[] expressions) =>
Expressions = expressions;
public IParsingExpression[] Expressions { get; }
public ExpressionType Type => ExpressionType.OrderedChoice;
public override string ToString()
{
var choices = string.Join(" | ", Expressions.Select(c => c.ToString()));
return $"{{{Type}, {choices}}}";
}
}
public record Sequence : IParsingExpression
{
public Sequence(params IParsingExpression[] expressions) =>
Expressions = expressions;
public IParsingExpression[] Expressions { get; }
public ExpressionType Type => ExpressionType.Sequence;
public override string ToString()
{
var exprs = string.Join(" ", Expressions.Select(c => c.ToString()));
return $"{{{Type}, {exprs}}}";
}
}
public record Repetition : IParsingExpression
{
public Repetition(IParsingExpression expression, int min, int max) =>
(Expression, Min, Max) = (expression, min, max);
public IParsingExpression Expression { get; }
public int Max { get; }
public int Min { get; }
public ExpressionType Type => ExpressionType.Repetition;
public override string ToString() =>
(Min, Max) switch
{
(0, 1) => $"{{{Type}, {Expression}?}}",
(0, -1) => $"{{{Type}, {Expression}*}}",
_ => $"{{{Type}, {Expression}+}}"
};
}
public record Assertion : IParsingExpression
{
public Assertion(IParsingExpression expression, bool isPositive) =>
(Expression, IsPositive) = (expression, isPositive);
public IParsingExpression Expression { get; }
public bool IsPositive { get; }
public ExpressionType Type => ExpressionType.Assertion;
public override string ToString() =>
$"{{{Type}, {(IsPositive ? '&' : '!')}{Expression}}}";
}
public record Literal : IParsingExpression
{
public Literal(ReadOnlyMemory<char> text, bool isRegex = false) =>
(Text, IsRegex) = (text, isRegex);
public ExpressionType Type => ExpressionType.Literal;
public bool IsRegex { get; }
public ReadOnlyMemory<char> Text { get; }
public override string ToString() =>
$"{{{Type}, '{Text}'}}";
}
public record Identifier : IParsingExpression
{
public Identifier(ReadOnlyMemory<char> text) =>
Text = text;
public ExpressionType Type => ExpressionType.Literal;
public ReadOnlyMemory<char> Text { get; }
public override string ToString() =>
$"{{{Type}, {Text}}}";
}
Implementation
using Atomize;
using static Atomize.Parse;
public static class PEGGrammar
{
private static readonly
Parser<IParsingExpression> AtomicRule =
Choice(
from str in Multiple.QuotedString
select (IParsingExpression)new Literal(str[1..^1]),
from pat in Multiple.RegularExpression
select (IParsingExpression)new Literal(pat[1..^1], true),
from idn in Multiple.Identifier
select (IParsingExpression)new Identifier(idn),
Grouped(Ref(() => OrderedChoiceRule!))
);
private static readonly
Parser<IParsingExpression> AssertionRule =
from expr in AtomicRule
from asserts in Optional(Choice('&', '!'))
select asserts == '\0'
? expr
: new Assertion(expr, asserts == '&');
private static readonly
Parser<IParsingExpression> RepetitionRule =
from expr in AssertionRule
from op in Optional(Choice('*', '+', '?'))
select op switch
{
'*' => new Repetition(expr, 0, -1),
'+' => new Repetition(expr, 1, -1),
'?' => new Repetition(expr, 0, 1),
_ => expr
};
private static readonly
Parser<IParsingExpression> SequenceRule =
from exprs in SeparatedBy(RepetitionRule, Multiple.Whitespace)
select exprs.Count == 1
? exprs[0]
: new Sequence(exprs.ToArray());
private static readonly
Parser<IParsingExpression> OrderedChoiceRule =
from exprs in SeparatedBy(SequenceRule, Whitespaced(Atom('|')))
select exprs.Count == 1
? exprs[0]
: new OrderedChoice(exprs.ToArray());
private static readonly
Parser<KeyValuePair<string, IParsingExpression>> RuleRule =
from name in (
from mem in Multiple.Identifier
select new string(mem.Span)
)
from _ in Whitespaced(Atom(':'))
from expr in OrderedChoiceRule
from __ in Whitespaced(Atom(';'))
select new KeyValuePair<string, IParsingExpression>(name, expr);
public static readonly
Parser<Dictionary<string, IParsingExpression>> GrammarRule =
from rules in Minimum(1, RuleRule)
from _ in (Parser<char>)EndOfText
select new Dictionary<string, IParsingExpression>(rules);
private static Parser<T> Grouped<T>(Parser<T> parser) =>
Island(
Bind(Atom('('), _ => Optional(Multiple.Whitespace)),
parser,
Bind(Optional(Multiple.Whitespace), _ => Atom(')'))
);
private static Parser<T> Whitespaced<T>(Parser<T> parser) =>
Island(
Optional(Multiple.Whitespace),
parser,
Optional(Multiple.Whitespace)
);
}