001/* 002 * Copyright (c) 2018 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * 004 * This Source Code Form is subject to the terms of the Mozilla Public 005 * License, v. 2.0. If a copy of the MPL was not distributed with this 006 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 007 */ 008 009package heretical.parser.common; 010 011import java.util.ArrayList; 012import java.util.List; 013 014import heretical.parser.common.util.IntegerVar; 015import org.parboiled.BaseParser; 016import org.parboiled.Rule; 017import org.parboiled.annotations.DontLabel; 018import org.parboiled.annotations.SuppressNode; 019import org.parboiled.support.StringVar; 020 021/** 022 * 023 */ 024public class BaseSyntaxGrammar<Node> extends BaseParser<Node> 025 { 026 protected final Rule AT = Terminal( "@" ); 027 protected final Rule AND_CHAR = Terminal( "&", AnyOf( "=&" ) ); 028 protected final Rule ANDAND = Terminal( "&&" ); 029 protected final Rule BANG = Terminal( "!", Ch( '=' ) ); 030 protected final Rule COLON = Terminal( ":" ); 031 protected final Rule COMMA = Terminal( "," ); 032 protected final Rule SLASH = Terminal( "/" ); 033 protected final Rule DOT = Terminal( "." ); 034 protected final Rule EQU = Terminal( "=", Ch( '=' ) ); 035 protected final Rule EQUAL = Terminal( "==" ); 036 protected final Rule GE = Terminal( ">=" ); 037 protected final Rule GT = Terminal( ">", AnyOf( "=>" ) ); 038 protected final Rule HAT = Terminal( "^", Ch( '=' ) ); 039 protected final Rule LBRK = Terminal( "[" ); 040 protected final Rule LE = Terminal( "<=" ); 041 protected final Rule LPAR = Terminal( "(" ); 042 protected final Rule LPOINT = Terminal( "<" ); 043 protected final Rule LT = Terminal( "<", AnyOf( "=<" ) ); 044 protected final Rule LWING = Terminal( "{" ); 045 protected final Rule MINUS = Terminal( "-", AnyOf( "=-" ) ); 046 protected final Rule NOTEQUAL = Terminal( "!=" ); 047 protected final Rule PIPE = Terminal( "|", AnyOf( "=|" ) ); 048 protected final Rule OROR = Terminal( "||" ); 049 protected final Rule PLUS = Terminal( "+", AnyOf( "=+" ) ); 050 protected final Rule QUERY = Terminal( "?" ); 051 protected final Rule RBRK = Terminal( "]" ); 052 protected final Rule RPAR = Terminal( ")" ); 053 protected final Rule RPOINT = Terminal( ">" ); 054 protected final Rule RWING = Terminal( "}" ); 055 protected final Rule SEMI = Terminal( ";" ); 056 protected final Rule TILDA = Terminal( "~" ); 057 058 @SuppressNode 059 public Rule[] IgnoreCase( String[] literals ) 060 { 061 Rule[] result = new Rule[ literals.length ]; 062 063 for( int i = 0; i < literals.length; i++ ) 064 result[ i ] = IgnoreCase( literals[ i ] ); 065 066 return result; 067 } 068 069 @SuppressNode 070 public Rule Digit() 071 { 072 return CharRange( '0', '9' ); 073 } 074 075 @SuppressNode 076 public Rule TimeDigit() 077 { 078 return FirstOf( CharRange( '0', '9' ), AnyOf( "+-:" ) ); 079 } 080 081 @SuppressNode 082 public Rule Letter() 083 { 084 return FirstOf( CharRange( 'a', 'z' ), CharRange( 'A', 'Z' ) ); 085 } 086 087 @SuppressNode 088 protected Rule HexDigit() 089 { 090 return FirstOf( CharRange( 'a', 'f' ), CharRange( 'A', 'F' ), CharRange( '0', '9' ) ); 091 } 092 093 @SuppressNode 094 protected Rule Spacing() 095 { 096 return ZeroOrMore( FirstOf( 097 098 // whitespace 099 OneOrMore( AnyOf( " \t\r\n\f" ).label( "Whitespace" ) ), 100 101 // traditional comment 102 Sequence( "/*", ZeroOrMore( TestNot( "*/" ), ANY ), "*/" ), 103 104 // end of line comment 105 Sequence( 106 "//", 107 ZeroOrMore( TestNot( AnyOf( "\r\n" ) ), ANY ), 108 FirstOf( "\r\n", '\r', '\n', EOI ) 109 ) 110 ) ); 111 } 112 113 protected Rule ListItem( StringVar term ) 114 { 115 return Sequence( 116 OneOrMore( 117 Sequence( 118 TestNot( AnyOf( " \t\r\n\f)]}," ) ), 119 ANY 120 ) 121 ).suppressSubnodes(), 122 term.set( match() ), 123 Optional( TestNot( " \t\r\n\f" ), "," ) 124 ); 125 } 126 127 protected Rule NotListItem( StringVar term ) 128 { 129 return Sequence( 130 OneOrMore( 131 Sequence( 132 TestNot( AnyOf( " \t\r\n\f)]}," ) ), 133 ANY 134 ) 135 ).suppressSubnodes(), 136 term.set( match() ), 137 Spacing() 138 ); 139 } 140 141 protected Rule Term( StringVar term ) 142 { 143 return Sequence( 144 OneOrMore( 145 Sequence( 146 TestNot( AnyOf( ": \t\r\n\f)]}" ) ), 147 ANY 148 ) 149 ).suppressSubnodes(), 150 term.set( match() ), 151 Spacing() 152 ); 153 } 154 155 @SuppressNode 156 @DontLabel 157 Rule Terminal( String string ) 158 { 159 return Sequence( IgnoreCase( string ), Spacing() ).label( '\'' + string + '\'' ); 160 } 161 162 @SuppressNode 163 @DontLabel 164 protected Rule Terminal( String string, Rule mustNotFollow ) 165 { 166 return Sequence( IgnoreCase( string ), TestNot( mustNotFollow ), Spacing() ).label( '\'' + string + '\'' ); 167 } 168 169 @SuppressNode 170 @DontLabel 171 protected Rule Keyword( String lhs, IntegerVar ordinal, Rule rhs, StringVar unit ) 172 { 173 return 174 Sequence( 175 IgnoreCase( lhs ), 176 Spacing(), 177 Optional( Number(), ordinal.set( match() ), Spacing() ), 178 rhs, 179 unit.set( match().trim() ) 180 ); 181 } 182 183 @SuppressNode 184 @DontLabel 185 protected Rule Keyword( IntegerVar ordinal, Rule lhs, StringVar unit, String rhs ) 186 { 187 return 188 Sequence( 189 Optional( Number(), ordinal.set( match() ) ), 190 Spacing(), 191 lhs, 192 unit.set( match().trim() ), 193 Spacing(), 194 IgnoreCase( rhs ) 195 ); 196 } 197 198 @SuppressNode 199 @DontLabel 200 protected Rule Keyword( String lhs, Rule rhs, StringVar unit ) 201 { 202 return 203 Sequence( 204 IgnoreCase( lhs ), 205 Spacing(), 206 rhs, 207 unit.set( match().trim() ) 208 ); 209 } 210 211 @SuppressNode 212 @DontLabel 213 protected Rule Keyword( String keyword ) 214 { 215 return Terminal( keyword, LetterOrDigit() ); 216 } 217 218 @SuppressNode 219 @DontLabel 220 protected Rule LetterOrDigit() 221 { 222 return FirstOf( CharRange( 'a', 'z' ), CharRange( 'A', 'Z' ), CharRange( '0', '9' ), '_' ); 223 } 224 225 @SuppressNode 226 @DontLabel 227 protected Rule Number() 228 { 229 return OneOrMore( FirstOf( Digit(), ',' ) ); // lax on the commas 230 } 231 232 @SuppressNode 233// @DontLabel 234 protected Rule DoubleNumber() 235 { 236 return Sequence( 237 Optional( Ch( '-' ) ), 238 Number(), 239 Optional( Ch( '.' ), OneOrMore( Digit() ) ) 240 ); 241 } 242 243 @SuppressNode 244 @DontLabel 245 protected Rule DoubleNumberList() 246 { 247 // comma separates items 248 return OneOrMore( 249 Sequence( 250 Optional( Ch( '-' ) ), 251 OneOrMore( Digit() ), 252 Optional( Ch( '.' ), OneOrMore( Digit() ) ), 253 Optional( Ch( ',' ) ) 254 ) 255 ); 256 } 257 258 protected Rule StringLiteral( StringVar term ) 259 { 260 return Sequence( 261 '"', 262 ZeroOrMore( 263 FirstOf( 264 Escape(), 265 Sequence( TestNot( AnyOf( "\r\n\"\\" ) ), ANY ) 266 ) 267 ).suppressSubnodes(), 268 term.set( match() ), 269 '"', 270 Spacing() 271 ); 272 } 273 274 protected Rule Escape() 275 { 276 return Sequence( '\\', FirstOf( AnyOf( "btnfr\"\'\\" ), OctalEscape(), UnicodeEscape() ) ); 277 } 278 279 protected Rule OctalEscape() 280 { 281 return FirstOf( 282 Sequence( CharRange( '0', '3' ), CharRange( '0', '7' ), CharRange( '0', '7' ) ), 283 Sequence( CharRange( '0', '7' ), CharRange( '0', '7' ) ), 284 CharRange( '0', '7' ) 285 ); 286 } 287 288 protected Rule UnicodeEscape() 289 { 290 return Sequence( OneOrMore( 'u' ), HexDigit(), HexDigit(), HexDigit(), HexDigit() ); 291 } 292 293 protected Rule FirstOfKeyword( Object[] terms ) 294 { 295 List<Rule> rules = new ArrayList<>(); 296 297 for( Object level : terms ) 298 rules.add( Keyword( level.toString() ) ); 299 300 return FirstOf( rules.toArray( new Rule[ rules.size() ] ) ); 301 } 302 }