001/*
002 * Copyright (c) 2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 *
004 * This Source Code Form is subject to the terms of the Mozilla Public
005 * License, v. 2.0. If a copy of the MPL was not distributed with this
006 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
007 */
008
009package heretical.parser.common;
010
011import java.util.ArrayList;
012import java.util.List;
013
014import heretical.parser.common.util.IntegerVar;
015import org.parboiled.BaseParser;
016import org.parboiled.Rule;
017import org.parboiled.annotations.DontLabel;
018import org.parboiled.annotations.SuppressNode;
019import org.parboiled.support.StringVar;
020
021/**
022 *
023 */
024public class BaseSyntaxGrammar<Node> extends BaseParser<Node>
025  {
026  protected final Rule AT = Terminal( "@" );
027  protected final Rule AND_CHAR = Terminal( "&", AnyOf( "=&" ) );
028  protected final Rule ANDAND = Terminal( "&&" );
029  protected final Rule BANG = Terminal( "!", Ch( '=' ) );
030  protected final Rule COLON = Terminal( ":" );
031  protected final Rule COMMA = Terminal( "," );
032  protected final Rule SLASH = Terminal( "/" );
033  protected final Rule DOT = Terminal( "." );
034  protected final Rule EQU = Terminal( "=", Ch( '=' ) );
035  protected final Rule EQUAL = Terminal( "==" );
036  protected final Rule GE = Terminal( ">=" );
037  protected final Rule GT = Terminal( ">", AnyOf( "=>" ) );
038  protected final Rule HAT = Terminal( "^", Ch( '=' ) );
039  protected final Rule LBRK = Terminal( "[" );
040  protected final Rule LE = Terminal( "<=" );
041  protected final Rule LPAR = Terminal( "(" );
042  protected final Rule LPOINT = Terminal( "<" );
043  protected final Rule LT = Terminal( "<", AnyOf( "=<" ) );
044  protected final Rule LWING = Terminal( "{" );
045  protected final Rule MINUS = Terminal( "-", AnyOf( "=-" ) );
046  protected final Rule NOTEQUAL = Terminal( "!=" );
047  protected final Rule PIPE = Terminal( "|", AnyOf( "=|" ) );
048  protected final Rule OROR = Terminal( "||" );
049  protected final Rule PLUS = Terminal( "+", AnyOf( "=+" ) );
050  protected final Rule QUERY = Terminal( "?" );
051  protected final Rule RBRK = Terminal( "]" );
052  protected final Rule RPAR = Terminal( ")" );
053  protected final Rule RPOINT = Terminal( ">" );
054  protected final Rule RWING = Terminal( "}" );
055  protected final Rule SEMI = Terminal( ";" );
056  protected final Rule TILDA = Terminal( "~" );
057
058  @SuppressNode
059  public Rule[] IgnoreCase( String[] literals )
060    {
061    Rule[] result = new Rule[ literals.length ];
062
063    for( int i = 0; i < literals.length; i++ )
064      result[ i ] = IgnoreCase( literals[ i ] );
065
066    return result;
067    }
068
069  @SuppressNode
070  public Rule Digit()
071    {
072    return CharRange( '0', '9' );
073    }
074
075  @SuppressNode
076  public Rule TimeDigit()
077    {
078    return FirstOf( CharRange( '0', '9' ), AnyOf( "+-:" ) );
079    }
080
081  @SuppressNode
082  public Rule Letter()
083    {
084    return FirstOf( CharRange( 'a', 'z' ), CharRange( 'A', 'Z' ) );
085    }
086
087  @SuppressNode
088  protected Rule HexDigit()
089    {
090    return FirstOf( CharRange( 'a', 'f' ), CharRange( 'A', 'F' ), CharRange( '0', '9' ) );
091    }
092
093  @SuppressNode
094  protected Rule Spacing()
095    {
096    return ZeroOrMore( FirstOf(
097
098      // whitespace
099      OneOrMore( AnyOf( " \t\r\n\f" ).label( "Whitespace" ) ),
100
101      // traditional comment
102      Sequence( "/*", ZeroOrMore( TestNot( "*/" ), ANY ), "*/" ),
103
104      // end of line comment
105      Sequence(
106        "//",
107        ZeroOrMore( TestNot( AnyOf( "\r\n" ) ), ANY ),
108        FirstOf( "\r\n", '\r', '\n', EOI )
109      )
110    ) );
111    }
112
113  protected Rule ListItem( StringVar term )
114    {
115    return Sequence(
116      OneOrMore(
117        Sequence(
118          TestNot( AnyOf( " \t\r\n\f)]}," ) ),
119          ANY
120        )
121      ).suppressSubnodes(),
122      term.set( match() ),
123      Optional( TestNot( " \t\r\n\f" ), "," )
124    );
125    }
126
127  protected Rule NotListItem( StringVar term )
128    {
129    return Sequence(
130      OneOrMore(
131        Sequence(
132          TestNot( AnyOf( " \t\r\n\f)]}," ) ),
133          ANY
134        )
135      ).suppressSubnodes(),
136      term.set( match() ),
137      Spacing()
138    );
139    }
140
141  protected Rule Term( StringVar term )
142    {
143    return Sequence(
144      OneOrMore(
145        Sequence(
146          TestNot( AnyOf( ": \t\r\n\f)]}" ) ),
147          ANY
148        )
149      ).suppressSubnodes(),
150      term.set( match() ),
151      Spacing()
152    );
153    }
154
155  @SuppressNode
156  @DontLabel
157  Rule Terminal( String string )
158    {
159    return Sequence( IgnoreCase( string ), Spacing() ).label( '\'' + string + '\'' );
160    }
161
162  @SuppressNode
163  @DontLabel
164  protected Rule Terminal( String string, Rule mustNotFollow )
165    {
166    return Sequence( IgnoreCase( string ), TestNot( mustNotFollow ), Spacing() ).label( '\'' + string + '\'' );
167    }
168
169  @SuppressNode
170  @DontLabel
171  protected Rule Keyword( String lhs, IntegerVar ordinal, Rule rhs, StringVar unit )
172    {
173    return
174      Sequence(
175        IgnoreCase( lhs ),
176        Spacing(),
177        Optional( Number(), ordinal.set( match() ), Spacing() ),
178        rhs,
179        unit.set( match().trim() )
180      );
181    }
182
183  @SuppressNode
184  @DontLabel
185  protected Rule Keyword( IntegerVar ordinal, Rule lhs, StringVar unit, String rhs )
186    {
187    return
188      Sequence(
189        Optional( Number(), ordinal.set( match() ) ),
190        Spacing(),
191        lhs,
192        unit.set( match().trim() ),
193        Spacing(),
194        IgnoreCase( rhs )
195      );
196    }
197
198  @SuppressNode
199  @DontLabel
200  protected Rule Keyword( String lhs, Rule rhs, StringVar unit )
201    {
202    return
203      Sequence(
204        IgnoreCase( lhs ),
205        Spacing(),
206        rhs,
207        unit.set( match().trim() )
208      );
209    }
210
211  @SuppressNode
212  @DontLabel
213  protected Rule Keyword( String keyword )
214    {
215    return Terminal( keyword, LetterOrDigit() );
216    }
217
218  @SuppressNode
219  @DontLabel
220  protected Rule LetterOrDigit()
221    {
222    return FirstOf( CharRange( 'a', 'z' ), CharRange( 'A', 'Z' ), CharRange( '0', '9' ), '_' );
223    }
224
225  @SuppressNode
226  @DontLabel
227  protected Rule Number()
228    {
229    return OneOrMore( FirstOf( Digit(), ',' ) ); // lax on the commas
230    }
231
232  @SuppressNode
233//  @DontLabel
234  protected Rule DoubleNumber()
235    {
236    return Sequence(
237      Optional( Ch( '-' ) ),
238      Number(),
239      Optional( Ch( '.' ), OneOrMore( Digit() ) )
240    );
241    }
242
243  @SuppressNode
244  @DontLabel
245  protected Rule DoubleNumberList()
246    {
247    // comma separates items
248    return OneOrMore(
249      Sequence(
250        Optional( Ch( '-' ) ),
251        OneOrMore( Digit() ),
252        Optional( Ch( '.' ), OneOrMore( Digit() ) ),
253        Optional( Ch( ',' ) )
254      )
255    );
256    }
257
258  protected Rule StringLiteral( StringVar term )
259    {
260    return Sequence(
261      '"',
262      ZeroOrMore(
263        FirstOf(
264          Escape(),
265          Sequence( TestNot( AnyOf( "\r\n\"\\" ) ), ANY )
266        )
267      ).suppressSubnodes(),
268      term.set( match() ),
269      '"',
270      Spacing()
271    );
272    }
273
274  protected Rule Escape()
275    {
276    return Sequence( '\\', FirstOf( AnyOf( "btnfr\"\'\\" ), OctalEscape(), UnicodeEscape() ) );
277    }
278
279  protected Rule OctalEscape()
280    {
281    return FirstOf(
282      Sequence( CharRange( '0', '3' ), CharRange( '0', '7' ), CharRange( '0', '7' ) ),
283      Sequence( CharRange( '0', '7' ), CharRange( '0', '7' ) ),
284      CharRange( '0', '7' )
285    );
286    }
287
288  protected Rule UnicodeEscape()
289    {
290    return Sequence( OneOrMore( 'u' ), HexDigit(), HexDigit(), HexDigit(), HexDigit() );
291    }
292
293  protected Rule FirstOfKeyword( Object[] terms )
294    {
295    List<Rule> rules = new ArrayList<>();
296
297    for( Object level : terms )
298      rules.add( Keyword( level.toString() ) );
299
300    return FirstOf( rules.toArray( new Rule[ rules.size() ] ) );
301    }
302  }