name:
src/main/g4/F7.g4
-rw-r--r--
8575
1/***************************************************************************************************
2 F7 This is the main grammar file for F7 code. The goal is to get **near feature-parity** with most
3 major spreadsheet applications. In cases of conflicting syntax, we choose the simpler of the two,
4 or the most compatible, which is mostly a judgment call. In cases where features are grand-fathered
5 in from older versions of the major spreadsheet applications we may choose to ignore those features
6 altogether.
7
8 Comments are collapsed into the main block of each section below. If we write comments for each
9 rule (and believe me, we could) it would end up being difficult to read. We therefore condense them
10 down to general comments in the main block.
11 ***************************************************************************************************
12 */
13grammar F7;
14
15/***************************************************************************************************
16 PARSER RULES These are rules that the parser will use. Some of them are explicitly named (def: ...
17 ; ... ;) and some are named with the # character at the end of the individual rule.
18
19 start - The starting block is the main entry point for parsing, but not for compilation or the
20 logic of F7 code. It basically serves as a way to captutre the code and the EOF, so we can jump
21 straight to the block.
22
23 block - All F7 code starts with a single expression.
24
25 expression - An expression is a section of code.
26
27 atom - An atom is a single variable that itself requires no execution or reduction, but may contain
28 other atoms and expressions that do require execution and reduction.
29
30 identifier - An identifier is a set of characters and numbers between 1 and N in length, starting
31 with an alphabetical character. It may contain periods and underscores as long as they are not in
32 beginning the string.
33
34 comparisionOperator - Short hand for all comparision operators.
35 ***************************************************************************************************
36 */
37start: block EOF;
38block: expression;
39expression:
40 Minus expression # unaryMinusExpression
41 | Plus expression # unaryPlusExpression
42 | expression Percent+ # unaryPercentExpression
43 // TODO:HACK - <assoc=left> vs <assoc=right> differ in Excel and Sheets, respectively. Weird.
44 | <assoc = left> left = expression op = Power right = expression # powerExpression
45 | left = expression op = (Multiply | Divide) right = expression # multiplicationExpression
46 | left = expression op = (Plus | Minus) right = expression # additiveExpression
47 | left = expression op = comparisonOperator right = expression # relationalExpression
48 | left = expression op = Ampersand right = expression # concatExpression
49 | atom # atomExpression
50 | atom (separator = Colon atom)+ # rangeExpression;
51atom:
52 range # cellAtom
53 | String # stringAtom
54 | Error # errorAtom
55 | Int # numberAtom
56 | Number # numberAtom
57 | LeftParen expression RightParen # parentheticalAtom
58 | name = identifier LeftParen arguments RightParen # formulaAtom
59 | LeftBrace (
60 expression (separator = (Comma | SemiColon) expression)*
61 )? RightBrace # listAtom
62 | identifier # namedAtom;
63range:
64 biRange
65 | uniRange
66 | columnWiseBiRange
67 | columnWiseWithRowOffsetFirstBiRange
68 | columnWiseWithRowOffsetLastBiRange
69 | rowWiseBiRange
70 | rowWiseWithColumnOffsetFirstBiRange
71 | rowWiseWithColumnOffsetLastBiRange;
72biRange: (grid = gridName Bang)? absoFirstColumn = Dollar? firstColumn = NCharacters absoRow =
73 Dollar? firstRow = Int Colon absoLastColumn = Dollar? lastColumn = NCharacters absoLastRow =
74 Dollar? lastRow = Int;
75uniRange: (grid = gridName Bang)? absoFirstColumn = Dollar? firstColumn = NCharacters absoFirstRow =
76 Dollar? firstRow = Int;
77columnWiseBiRange: (grid = gridName Bang)? absoFirstColumn = Dollar? firstColumn = NCharacters Colon
78 absoLastColumn = Dollar? lastColumn = NCharacters;
79columnWiseWithRowOffsetFirstBiRange: (grid = gridName Bang)? absoFirstColumn = Dollar? firstColumn =
80 NCharacters absoFirstRow = Dollar? firstRow = Int Colon absoLastColumn = Dollar? lastColumn
81 = NCharacters;
82columnWiseWithRowOffsetLastBiRange: (grid = gridName Bang)? absoFirstColumn = Dollar? firstColumn =
83 NCharacters Colon absoLastColumn = Dollar? lastColumn = NCharacters absoLastRow = Dollar?
84 lastRow = Int;
85rowWiseBiRange: (grid = gridName Bang)? absoFirstRow = Dollar? firstRow = Int Colon absoLastRow =
86 Dollar? lastRow = Int;
87rowWiseWithColumnOffsetFirstBiRange: (grid = gridName Bang)? absoFirstColumn = Dollar? firstColumn =
88 NCharacters absoFirstRow = Dollar? firstRow = Int Colon absoLastRow = Dollar? lastRow = Int;
89rowWiseWithColumnOffsetLastBiRange: (grid = gridName Bang)? absoFirstRow = Dollar? firstRow = Int
90 Colon absLastColumn = Dollar? lastColumn = NCharacters absoLastRow = Dollar? lastRow = Int;
91arguments: (expression (Comma expression)*)?;
92gridName: SingleQuoteString | identifier;
93identifier: NCharacters (Dot | Underscore | NCharacters | Int)*;
94comparisonOperator:
95 LessThanOrEqualTO
96 | GreaterThanOrEqualTo
97 | LessThan
98 | GreaterThan
99 | Equal
100 | NotEqual;
101
102/***************************************************************************************************
103 LEXER RULES These are rules that the lexer will use. They SHOULD BE NON-CONFLICTING, and as
104 small/big as they need to be.
105 ***************************************************************************************************
106 */
107NCharacters: CHARACTER+;
108Dot: '.';
109Int: DIGIT+;
110Number:
111 DIGIT+ (Dot DIGIT+)? (E ('+' | '-')? DIGIT+ (Dot DIGIT+)?)?;
112SingleQuoteString: SINGLE_QUOTE_STRING;
113String: STRING_LITERAL;
114Error: ERROR_LITERAL;
115LeftParen: LPAREN;
116RightParen: RPAREN;
117LeftBrace: LBRACE;
118RightBrace: RBRACE;
119Comma: COMMA;
120Colon: COLON;
121SemiColon: SEMICOLON;
122Bang: BANG;
123Plus: PLUS;
124Minus: MINUS;
125Percent: PERCENT;
126Power: POW;
127Divide: DIV;
128Multiply: MULT;
129GreaterThan: GT;
130GreaterThanOrEqualTo: GTEQ;
131LessThan: LT;
132LessThanOrEqualTO: LTEQ;
133Equal: EQ;
134NotEqual: NEQ;
135Ampersand: AMPERSAND;
136Dollar: DOLLAR;
137Underscore: UNDERSCORE;
138
139/***************************************************************************************************
140 LEXER FRAGMENTS Rules that the lexer will use, but we don't need to NAME.
141
142 Fragments A-Z let us to do case-insensitivity when it comes to literally named things like errors.
143
144
145 Error literals look weird because we're using the case-insensitive alphabet fragments.
146 ***************************************************************************************************
147 */
148fragment A: [aA];
149fragment B: [bB];
150fragment C: [cC];
151fragment D: [dD];
152fragment E: [eE];
153fragment F: [fF];
154fragment G: [gG];
155fragment H: [hH];
156fragment I: [iI];
157fragment J: [jJ];
158fragment K: [kK];
159fragment L: [lL];
160fragment M: [mM];
161fragment N: [nN];
162fragment O: [oO];
163fragment P: [pP];
164fragment Q: [qQ];
165fragment R: [rR];
166fragment S: [sS];
167fragment T: [tT];
168fragment U: [uU];
169fragment V: [vV];
170fragment W: [wW];
171fragment X: [xX];
172fragment Y: [yY];
173fragment Z: [zZ];
174fragment IDENTIFIER: CHARACTER+ ('_' | CHARACTER)*;
175fragment DIGIT: ('0' ..'9');
176fragment CHARACTER: [a-zA-Z];
177fragment ERROR_LITERAL:
178 HASH N U L L BANG // #NULL!
179 | HASH D I V DIV '0' BANG // #DIV/0!
180 | HASH V A L U E BANG // #VALUE!
181 | HASH R E F BANG // #REF!
182 | HASH N A M E QUESTION // #NAME?
183 | HASH N U M BANG // #NUM!
184 | HASH N DIV A // #N/A
185 | HASH E R R O R BANG; // #ERROR!
186fragment SINGLE_QUOTE_STRING: (
187 '\'' SINGLE_STRING_CHARACTER* '\''
188 );
189fragment SINGLE_STRING_CHARACTER: ~['\\\r\n];
190fragment STRING_LITERAL: ('"' DOUBLE_STRING_CHARACTER* '"');
191fragment DOUBLE_STRING_CHARACTER: ~["\\\r\n];
192fragment EQ: '=';
193fragment NEQ: '<>';
194fragment GT: '>';
195fragment LT: '<';
196fragment GTEQ: '>=';
197fragment LTEQ: '<=';
198fragment PLUS: '+';
199fragment MINUS: '-';
200fragment MULT: '*';
201fragment DIV: '/';
202fragment PERCENT: '%';
203fragment HASH: '#';
204fragment POW: '^';
205fragment AMPERSAND: '&';
206fragment LPAREN: '(';
207fragment RPAREN: ')';
208fragment COMMA: ',';
209fragment SEMICOLON: ';';
210fragment LBRACE: '{';
211fragment RBRACE: '}';
212fragment BANG: '!';
213fragment QUESTION: '?';
214fragment DOLLAR: '$';
215fragment COLON: ':';
216fragment UNDERSCORE: '_';
217
218/***************************************************************************************************
219 UTIL/COMMON/ETC.
220 ***************************************************************************************************
221 */
222// Skip whitespaces in between tokens.
223// Allows us to match on whitespace inside strings, but ignore them otherwise.
224WS: [ \r\n\t]+ -> skip;