Recursive Descent Parser Code
Following is the code of this example:
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace SimpleParser { public abstract class Symbol { public ListConstituentSymbols { get; set; } public override string ToString() { string s = ConstituentSymbols.Select(ct => ct.ToString()).StringConcatenate(); return s; } public Symbol(params Object[] symbols) { List ls = new List (); foreach (var item in symbols) { if (item is Symbol) ls.Add((Symbol)item); else if (item is IEnumerable ) foreach (var item2 in (IEnumerable )item) ls.Add(item2); else // If this error is thrown, the parser is coded incorrectly. throw new ParserException("Internal error"); } ConstituentSymbols = ls; } public Symbol() { } } public class Formula : Symbol { public static Formula Produce(IEnumerable symbols) { // formula = expression Expression e = Expression.Produce(symbols); return e == null ? null : new Formula(e); } public Formula(params Object[] symbols) : base(symbols) { } } public class Expression : Symbol { public static Expression Produce(IEnumerable symbols) { // expression = *whitespace nospace-expression *whitespace int whiteSpaceBefore = symbols.TakeWhile(s => s is WhiteSpace).Count(); int whiteSpaceAfter = symbols.Reverse().TakeWhile(s => s is WhiteSpace).Count(); IEnumerable noSpaceSymbolList = symbols .Skip(whiteSpaceBefore) .SkipLast(whiteSpaceAfter) .ToList(); NospaceExpression n = NospaceExpression.Produce(noSpaceSymbolList); if (n != null) return new Expression( Enumerable.Repeat(new WhiteSpace(), whiteSpaceBefore), n, Enumerable.Repeat(new WhiteSpace(), whiteSpaceAfter)); return null; } public Expression(params Object[] symbols) : base(symbols) { } } public class NospaceExpression : Symbol { public static Dictionary OperatorPrecedence = new Dictionary () { { "^", 3 }, { "*", 2 }, { "/", 2 }, { "+", 1 }, { "-", 1 }, }; public static NospaceExpression Produce(IEnumerable symbols) { // nospace-expression = open-parenthesis expression close-parenthesis // / numerical-constant // / prefix-operator expression // / expression infix-operator expression if (!symbols.Any()) return null; if (symbols.First() is OpenParenthesis && symbols.Last() is CloseParenthesis) { Expression e = Expression.Produce(symbols.Skip(1).SkipLast(1)); if (e != null) return new NospaceExpression(new OpenParenthesis(), e, new CloseParenthesis()); } // expression, infix-operator, expression var z = symbols.Rollup(0, (t, d) => { if (t is OpenParenthesis) return d + 1; if (t is CloseParenthesis) return d - 1; return d; }); var symbolsWithIndex = symbols.Select((s, i) => new { Symbol = s, Index = i, }); var z2 = symbolsWithIndex.Zip(z, (v1, v2) => new { SymbolWithIndex = v1, Depth = v2, }); var operatorList = z2 .Where(x => x.Depth == 0 && x.SymbolWithIndex.Index != 0 && InfixOperator.Produce(x.SymbolWithIndex.Symbol) != null) .ToList(); if (operatorList.Any()) { int minPrecedence = operatorList .Select(o2 => OperatorPrecedence[o2.SymbolWithIndex.Symbol.ToString()]).Min(); var op = operatorList .Last(o2 => OperatorPrecedence[o2.SymbolWithIndex.Symbol.ToString()] == minPrecedence); if (op != null) { var expressionTokenList1 = symbols.TakeWhile(t => t != op.SymbolWithIndex.Symbol); Expression e1 = Expression.Produce(expressionTokenList1); if (e1 == null) throw new ParserException("Invalid expression"); var expressionTokenList2 = symbols .SkipWhile(t => t != op.SymbolWithIndex.Symbol).Skip(1); Expression e2 = Expression.Produce(expressionTokenList2); if (e2 == null) throw new ParserException("Invalid expression"); InfixOperator io = new InfixOperator(op.SymbolWithIndex.Symbol); return new NospaceExpression(e1, io, e2); } } NumericalConstant n = NumericalConstant.Produce(symbols); if (n != null) return new NospaceExpression(n); PrefixOperator p = PrefixOperator.Produce(symbols.FirstOrDefault()); if (p != null) { Expression e = Expression.Produce(symbols.Skip(1)); if (e != null) return new NospaceExpression(p, e); } return null; } public NospaceExpression(params Object[] symbols) : base(symbols) { } } public class NumericalConstant : Symbol { public static NumericalConstant Produce(IEnumerable symbols) { // numerical-constant = [neg-sign] significand-part SignificandPart s = SignificandPart.Produce(symbols); if (s != null) return new NumericalConstant(s); NegSign n = NegSign.Produce(symbols.First()); if (n != null) { SignificandPart s2 = SignificandPart.Produce(symbols.Skip(1)); if (s2 != null) return new NumericalConstant(n, s2); } return null; } public NumericalConstant(params Object[] symbols) : base(symbols) { } } public class SignificandPart : Symbol { public static SignificandPart Produce(IEnumerable symbols) { // significand-part = whole-number-part [fractional-part] / fractional-part FractionalPart f; f = FractionalPart.Produce(symbols); if (f != null) return new SignificandPart(f); IEnumerable s = null; WholeNumberPart w = WholeNumberPart.Produce(symbols, out s); if (w != null) { if (!s.Any()) return new SignificandPart(w); f = FractionalPart.Produce(s); if (f != null) return new SignificandPart(w, f); } return null; } public SignificandPart(params Object[] symbols) : base(symbols) { } } public class WholeNumberPart : Symbol { public static WholeNumberPart Produce(IEnumerable symbols, out IEnumerable symbolsToProcess) { // whole-number-part = digit-sequence IEnumerable s = null; DigitSequence d = DigitSequence.Produce(symbols, out s); if (d != null) { symbolsToProcess = s; return new WholeNumberPart(d); } symbolsToProcess = null; return null; } public WholeNumberPart(params Object[] symbols) : base(symbols) { } } public class FractionalPart : Symbol { public static FractionalPart Produce(IEnumerable symbols) { // fractional-part = full-stop digit-sequence if (!symbols.Any()) return null; if (symbols.First() is FullStop) { IEnumerable s = null; DigitSequence d = DigitSequence.Produce(symbols.Skip(1), out s); if (d == null || s.Any()) return null; return new FractionalPart(new FullStop(), d); } return null; } public FractionalPart(params Object[] symbols) : base(symbols) { } } public class DigitSequence : Symbol { public static DigitSequence Produce(IEnumerable symbols, out IEnumerable symbolsToProcess) { // digit-sequence = 1*decimal-digit IEnumerable digits = symbols.TakeWhile(s => s is DecimalDigit); if (digits.Any()) { symbolsToProcess = symbols.Skip(digits.Count()); return new DigitSequence(digits); } symbolsToProcess = null; return null; } public DigitSequence(params Object[] symbols) : base(symbols) { } } public class NegSign : Symbol { public static NegSign Produce(Symbol symbol) { // neg-sign = minus if (symbol is Minus) return new NegSign(symbol); return null; } public NegSign(params Object[] symbols) : base(symbols) { } } public class PrefixOperator : Symbol { public static PrefixOperator Produce(Symbol symbol) { // prefix-operator = plus / minus if (symbol is Plus || symbol is Minus) return new PrefixOperator(symbol); return null; } public PrefixOperator(params Object[] symbols) : base(symbols) { } } public class InfixOperator : Symbol { public static InfixOperator Produce(Symbol symbol) { // infix-operator = caret / asterisk / forward-slash / plus / minus if (symbol is Plus || symbol is Minus || symbol is Asterisk || symbol is ForwardSlash || symbol is Caret) return new InfixOperator(symbol); return null; } public InfixOperator(params Object[] symbols) : base(symbols) { } } public class DecimalDigit : Symbol { private string CharacterValue; public override string ToString() { return CharacterValue; } public DecimalDigit(char c) { CharacterValue = c.ToString(); } } public class WhiteSpace : Symbol { public override string ToString() { return " "; } public WhiteSpace() { } } public class Plus : Symbol { public override string ToString() { return "+"; } public Plus() { } } public class Minus : Symbol { public override string ToString() { return "-"; } public Minus() { } } public class Asterisk : Symbol { public override string ToString() { return "*"; } public Asterisk() { } } public class ForwardSlash : Symbol { public override string ToString() { return "/"; } public ForwardSlash() { } } public class Caret : Symbol { public override string ToString() { return "^"; } public Caret() { } } public class FullStop : Symbol { public override string ToString() { return "."; } public FullStop() { } } public class OpenParenthesis : Symbol { public override string ToString() { return "("; } public OpenParenthesis() { } } public class CloseParenthesis : Symbol { public override string ToString() { return ")"; } public CloseParenthesis() { } } public class SimpleFormulaParser { public static Symbol ParseFormula(string s) { IEnumerable symbols = s.Select(c => { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return new DecimalDigit(c); case ' ': return new WhiteSpace(); case '+': return new Plus(); case '-': return new Minus(); case '*': return new Asterisk(); case '/': return new ForwardSlash(); case '^': return new Caret(); case '.': return new FullStop(); case '(': return new OpenParenthesis(); case ')': return new CloseParenthesis(); default: return (Symbol)null; } }); #if false if (symbols.Any()) { Console.WriteLine("Terminal Symbols"); Console.WriteLine("================"); foreach (var terminal in symbols) Console.WriteLine("{0} >{1}<", terminal.GetType().Name.ToString(), terminal.ToString()); Console.WriteLine(); } #endif Formula formula = Formula.Produce(symbols); if (formula == null) throw new ParserException("Invalid formula"); return formula; } public static void DumpSymbolRecursive(StringBuilder sb, Symbol symbol, int depth) { sb.Append(string.Format("{0}{1} >{2}<", "".PadRight(depth * 2), symbol.GetType().Name.ToString(), symbol.ToString())).Append(Environment.NewLine); if (symbol.ConstituentSymbols != null) foreach (var childSymbol in symbol.ConstituentSymbols) DumpSymbolRecursive(sb, childSymbol, depth + 1); } } class Program { static void Main(string[] args) { string[] sampleValidFormulas = new[] { "1+((2+3)*4)^5", "1+2-3*4/5^6", "(1+2)/3", " (1+3) ", "-123", "1+2*(-3)", "1+2*( - 3)", "12.34", ".34", "-123+456", " ( 123 + 456 ) ", "-.34", "-12.34", "-(123+456)", }; string[] sampleInvalidFormulas = new[] { "-(123+)", "-(*123)", "*123", "*123a", "1.", "--1", }; StringBuilder sb = new StringBuilder(); foreach (var formula in sampleValidFormulas) { Symbol f = SimpleFormulaParser.ParseFormula(formula); SimpleFormulaParser.DumpSymbolRecursive(sb, f, 0); sb.Append("==================================" + Environment.NewLine); } foreach (var formula in sampleInvalidFormulas) { bool exceptionThrown = false; try { Symbol f = SimpleFormulaParser.ParseFormula(formula); } catch (ParserException e) { exceptionThrown = true; sb.Append(String.Format("Parsing >{0}< Exception: {1}", formula, e.Message) + Environment.NewLine); } if (!exceptionThrown) sb.Append(String.Format("Parsing >{0}< Should have thrown exception, but did not", formula) + Environment.NewLine); } Console.WriteLine(sb.ToString()); } } public class ParserException : Exception { public ParserException(string message) : base(message) { } } public static class MyExtensions { public static IEnumerable SkipLast (this IEnumerable source, int count) { Queue saveList = new Queue (); int saved = 0; foreach (T item in source) { if (saved < count) { saveList.Enqueue(item); ++saved; continue; } saveList.Enqueue(item); yield return saveList.Dequeue(); } yield break; } public static string StringConcatenate(this IEnumerable source) { StringBuilder sb = new StringBuilder(); foreach (string s in source) sb.Append(s); return sb.ToString(); } public static string StringConcatenate ( this IEnumerable source, Func projectionFunc) { return source.Aggregate(new StringBuilder(), (s, i) => s.Append(projectionFunc(i)), s => s.ToString()); } public static IEnumerable Rollup ( this IEnumerable source, TResult seed, Func projection) { TResult nextSeed = seed; foreach (TSource src in source) { TResult projectedValue = projection(src, nextSeed); nextSeed = projectedValue; yield return projectedValue; } } } }