compiler: Optimize renaming in SSA a little more.
[cen64.git] / compiler / parser.c
1 /*
2 * cen64/compiler/parser.c: CEN64's language parser
3 *
4 * CEN64: Cycle-Accurate Nintendo 64 Emulator
5 * Copyright (C) 2012-2015, Tyler J. Stachecki
6 *
7 * This file is subject to the terms and conditions defined in
8 * 'LICENSE', which is part of this source code package.
9 */
10
11 #include "common.h"
12 #include "compiler/alloc.h"
13 #include "compiler/cunit.h"
14 #include "compiler/lexer.h"
15 #include "compiler/parser.h"
16 #include <stddef.h>
17
18 static const char *expected_lparen = "Expected '('";
19 static const char *expected_rparen = "Expected ')'";
20
21 static const char *unexpected_tokens[] = {
22 NULL, /* NUL */
23 NULL, /* kEndToken */
24
25 NULL, /* kAnalysisErrorToken */
26 NULL, /* kIOErrorToken */
27 NULL, /* kLexErrorToken */
28 NULL, /* kParseErrorToken */
29 NULL, /* kLastErrorToken */
30
31 "Unexpected 'i8'", /* kI8 */
32 "Unexpected 'u8'", /* kI8 */
33 "Unexpected 'i16'", /* kI16 */
34 "Unexpected 'u16'", /* kU16 */
35 "Unexpected 'i32'", /* kI32 */
36 "Unexpected 'u32'", /* kU32 */
37 "Unexpected 'i64'", /* kI64 */
38 "Unexpected 'u64'", /* kU64 */
39 "Unexpected 'v128'", /* kV128 */
40
41 "Unexpected 'as'", /* kAs */
42 "Unexpected 'assign'", /* kAssign */
43 "Unexpected '&'", /* kBitwiseAnd */
44 "Unexpected '|'", /* kBitwiseOr */
45 "Unexpected '^'", /* kBitwiseXor */
46 "Unexpected 'call'", /* kCall */
47 "Unexpected ':'", /* kColon */
48 "Unexpected ','", /* kComma */
49 "Unexpected 'const'", /* kConstVal */
50 "Unexpected 'defun'", /* kDefun */
51 "Unexpected 'deobj'", /* kDeobj */
52 "Unexpected 'devar'", /* kDevar */
53 "Unexpected '/'", /* kDivide */
54 "Unexpected 'else'", /* kElse */
55 "Unexpected 'elseif'", /* kElseIf */
56 "Unexpected '='", /* kEqual */
57 "Unexpected string", /* kFilePath */
58 "Unexpected '>'", /* kGreater */
59 "Unexpected '>='", /* kGreaterOrEqual */
60 "Unexpected identifier", /* kIdentifier */
61 "Unexpected 'if'", /* kIf */
62 "Unexpected 'include'", /* kInclude */
63 "Unexpected '('", /* kLeftParen */
64 "Unexpected '<<'", /* kLeftShift */
65 "Unexpected '<'", /* kLess */
66 "Unexpected '<='", /* kLessOrEqual */
67 "Unexpected '&&'", /* kLogicalAnd */
68 "Unexpected '||'", /* kLogicalOr */
69 "Unexpected '-'", /* kMinus */
70 "Unexpected 'namespace'", /* kNamespace */
71 "Unexpected 'none'", /* kNone */
72 "Unexpected '!='", /* kNotEqual */
73 "Unexpected number", /* kNumber */
74 "Unexpected '+'", /* kPlus */
75 "Unepxpected 'return'", /* kReturn */
76 "Unexpected ')'", /* kRightParen */
77 "Unexpected '>>'", /* kRightShift */
78 "Unexpected '*'", /* kTimes */
79 "Unexpected 'to'", /* kTo */
80 "Unexpected 'while'" /* kWhile */
81 };
82
83 static void add_node_to_bb(struct cen64_compiler_node *bb,
84 struct cen64_compiler_node *node);
85
86 static struct cen64_compiler_node *conditionalize_node(
87 struct cen64_parser *parser, struct cen64_compiler_node *node);
88
89 static enum cen64_lexer_token parse_function_body(struct cen64_parser *parser,
90 struct cen64_compilation_unit *cunit, const char *namespace,
91 enum cen64_lexer_token token, struct cen64_compiler_node *bb,
92 struct cen64_compiler_node **last_bb);
93
94 static enum cen64_lexer_token parse_function_call(struct cen64_parser *parser,
95 const char *cur_namespace, struct cen64_compiler_node **call);
96
97 static enum cen64_lexer_token parse_object_typename(struct cen64_parser *parser,
98 enum cen64_lexer_token token, const char *cur_namespace,
99 struct cen64_scoped_nametype **snt);
100
101 static enum cen64_lexer_token parse_member_or_parameter(
102 struct cen64_parser *parser, enum cen64_lexer_token token,
103 const char *namespace, enum cen64_compiler_node_type node_type,
104 struct cen64_compiler_node **node);
105
106 static enum cen64_lexer_token parse_primary_expression(
107 struct cen64_parser *parser, enum cen64_lexer_token token,
108 const char *namespace, struct cen64_compiler_node **value);
109
110 static enum cen64_lexer_token parse_top_declaration(
111 struct cen64_parser *parser, struct cen64_compilation_unit *cunit,
112 enum cen64_lexer_token token, const char *namespace,
113 struct cen64_arena_alloc *arena_alloc);
114
115 static enum cen64_lexer_token parse_variable_access(struct cen64_parser *parser,
116 enum cen64_lexer_token token, const char **name);
117
118 void add_node_to_bb(struct cen64_compiler_node *bb,
119 struct cen64_compiler_node *node) {
120 if (bb->node.basic_block.tail != NULL) {
121 bb->node.basic_block.tail->next = node;
122 node->prev = bb->node.basic_block.tail;
123 bb->node.basic_block.tail = node;
124 } else {
125 bb->node.basic_block.head = bb->node.basic_block.tail = node;
126 }
127 }
128
129 struct cen64_compiler_node *conditionalize_node(
130 struct cen64_parser *parser, struct cen64_compiler_node *expr) {
131 unsigned line_number = parser->lexer.line_count;
132 struct cen64_compiler_node *outer, *zero;
133
134 zero = cen64_node_alloc(parser->node_alloc, kInteger, line_number);
135 zero->node.integer.value = 0;
136
137 outer = cen64_node_alloc(parser->node_alloc, kCondition, line_number);
138 outer->node.binary_operation.condition = kConditionNotEqual;
139 outer->node.binary_operation.left = expr;
140 outer->node.binary_operation.right = zero;
141
142 return outer;
143 }
144
145 enum cen64_lexer_token parse_function_body(struct cen64_parser *parser,
146 struct cen64_compilation_unit *cunit, const char *namespace,
147 enum cen64_lexer_token token, struct cen64_compiler_node *bb,
148 struct cen64_compiler_node **last_bb) {
149 struct cen64_compiler_node *node, *end, *expr, *inner_bb;
150 enum cen64_lexer_token old_token;
151 const char *identifier;
152 unsigned line_number;
153
154 while (token != kRightParen) {
155 line_number = parser->lexer.line_count;
156
157 switch (token) {
158 case kAssign:
159 node = cen64_node_alloc(parser->node_alloc,
160 kStore, line_number);
161
162 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
163 return token;
164
165 if ((token = parse_primary_expression(parser, token,
166 namespace, &node->node.store.value)) != kTo) {
167 if (token <= kLastErrorToken)
168 return token;
169
170 parser->error_message = "Expected a 'to' after 'assign(expr'";
171 return kParseErrorToken;
172 }
173
174 token = cen64_lexer_lex(&parser->lexer);
175
176 if (token != kIdentifier && token != kColon) {
177 if (token <= kLastErrorToken)
178 return token;
179
180 parser->error_message =
181 "Expected a variable after 'assign(expr to'";
182
183 return kParseErrorToken;
184 }
185
186 token = parse_variable_access(parser, token, &node->node.store.name);
187 add_node_to_bb(bb, node);
188 break;
189
190 case kCall:
191 token = parse_function_call(parser, namespace, &node);
192 add_node_to_bb(bb, node);
193 break;
194
195 case kDevar:
196 if ((token = cen64_lexer_lex(&parser->lexer)) != kIdentifier) {
197 if (token <= kLastErrorToken)
198 return token;
199
200 parser->error_message = "Expected an identifier after '('";
201 return kParseErrorToken;
202 }
203
204 identifier = cen64_dynsize_alloc(parser->dynsize_alloc,
205 parser->lexer.result.identifier.ptr,
206 parser->lexer.result.identifier.length);
207
208 if ((token = cen64_lexer_lex(&parser->lexer)) != kAs) {
209 if (token <= kLastErrorToken)
210 return token;
211
212 parser->error_message = "Expected 'as' after '(var'";
213 return kParseErrorToken;
214 }
215
216 node = cen64_node_alloc(parser->node_alloc, kVariable, line_number);
217 node->node.variable.name = identifier;
218
219 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
220 return token;
221
222 switch (token) {
223 case kI8:
224 case kU8:
225 case kI16:
226 case kU16:
227 case kI32:
228 case kU32:
229 case kI64:
230 case kU64:
231 node->node.variable.type = token;
232 token = cen64_lexer_lex(&parser->lexer);
233 break;
234
235 case kColon:
236 case kIdentifier:
237 token = parse_object_typename(parser, token, namespace,
238 &node->node.variable.object_type);
239
240 node->node.variable.type = kObjectType;
241 break;
242
243 default:
244 parser->error_message = "Expected a type after '(var as'";
245 return kParseErrorToken;
246 }
247
248 add_node_to_bb(bb, node);
249 break;
250
251 case kIf:
252 /* Allocate the BB following the last if (...) expression */
253 end = cen64_node_alloc(parser->node_alloc,
254 kBasicBlock, cunit->bb_count++);
255
256 do {
257 struct cen64_compiler_node *branch, *next;
258
259 old_token = token;
260
261 if ((token = cen64_lexer_lex(&parser->lexer)) != kLeftParen) {
262 if (token <= kLastErrorToken)
263 return token;
264
265 parser->error_message = "Expected a '(' after 'if/elseif'";
266 return kParseErrorToken;
267 }
268
269 /* Parse the condition for the if/else expression */
270 /* Parse into *this* BB if we're in the else block */
271 inner_bb = bb;
272
273 if (old_token != kElse) {
274 if ((token = parse_primary_expression(parser, token,
275 namespace, &expr)) != kLeftParen) {
276 if (token <= kLastErrorToken)
277 return token;
278
279 parser->error_message = expected_lparen;
280 return kParseErrorToken;
281 }
282
283 /* Create the BB that is used to skip over the cond. block */
284 next = cen64_node_alloc(parser->node_alloc,
285 kBasicBlock, cunit->bb_count++);
286
287 /* ... as well as the BB which actually represents the taken path */
288 inner_bb = cen64_node_alloc(parser->node_alloc,
289 kBasicBlock, cunit->bb_count++);
290
291 /* Since this is a conditional branch, we need a condition node */
292 if (expr->type != kCondition)
293 expr = conditionalize_node(parser, expr);
294
295 node = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
296 node->node.branch.target_bb = inner_bb->line_number_or_id;
297
298 node->node.branch.condition = expr;
299 node->node.branch.type = kBranchConditional;
300 node->node.branch.scope_end_bb = end->line_number_or_id;
301 add_node_to_bb(bb, node);
302 }
303
304 /* Parse the subgraph into a new, inner basic block */
305 token = parse_function_body(parser, cunit, namespace,
306 cen64_lexer_lex(&parser->lexer), inner_bb, last_bb);
307
308 if (token != kRightParen) {
309 if (token < kLastErrorToken)
310 return token;
311
312 parser->error_message = expected_rparen;
313 return kParseErrorToken;
314 }
315
316 (*last_bb)->next = next;
317 bb->next = inner_bb;
318
319 /* Insert a branch to the last block in the if/elseif/else chain */
320 branch = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
321 branch->node.branch.target_bb = end->line_number_or_id;
322 branch->node.branch.type = kBranchAlways;
323 add_node_to_bb(*last_bb, branch);
324
325 /* Only really needs to be done for non-else blocks... */
326 node->node.branch.nt_target_bb = next->line_number_or_id;
327 bb = next;
328 } while (((token = cen64_lexer_lex(&parser->lexer)) == kElseIf ||
329 (token == kElse)) && old_token != kElse);
330
331 /* If we had an else, reference the last block that was parsed; */
332 /* if we didn't, then we need to branch out to the end block */
333 if (old_token == kElse) {
334 if (token == kElse) {
335 parser->error_message = "if (...) chain has two else clauses";
336 return kParseErrorToken;
337 }
338
339 bb = (*last_bb);
340 } else {
341 node = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
342 node->node.branch.target_bb = end->line_number_or_id;
343 node->node.branch.type = kBranchAlways;
344
345 bb->node.basic_block.head = bb->node.basic_block.tail = node;
346 }
347
348 /* Insert end into the chain of blocks for the function, */
349 /* point the current basic block to the end/last block */
350 bb->next = end;
351 bb = end;
352
353 node = NULL;
354 break;
355
356 case kReturn:
357 node = cen64_node_alloc(parser->node_alloc,
358 kFunctionReturn, line_number);
359
360 add_node_to_bb(bb, node);
361
362 /* kReturn gets converted to a kBranch, so create a new basic */
363 /* block here so that we ensure basic blocks end in branches */
364 node = cen64_node_alloc(parser->node_alloc,
365 kBasicBlock, cunit->bb_count++);
366
367 bb->next = node;
368 bb = node;
369
370 token = cen64_lexer_lex(&parser->lexer);
371 break;
372
373 case kWhile:
374 if ((token = cen64_lexer_lex(&parser->lexer)) != kLeftParen) {
375 if (token <= kLastErrorToken)
376 return token;
377
378 parser->error_message = "Expected a '(' after 'while'";
379 return kParseErrorToken;
380 }
381
382 /* Allocate a BB to hold the invariant if needed */
383 if (bb->node.basic_block.head != NULL) {
384 node = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
385 node->node.branch.target_bb = cunit->bb_count;
386 node->node.branch.type = kBranchAlways;
387 add_node_to_bb(bb, node);
388
389 node = cen64_node_alloc(parser->node_alloc,
390 kBasicBlock, cunit->bb_count++);
391
392 bb = (bb->next = node);
393 }
394
395 /* Parse the invariant and insert it into the graph */
396 if ((token = parse_primary_expression(parser, token,
397 namespace, &expr)) != kLeftParen) {
398 if (token <= kLastErrorToken)
399 return token;
400
401 parser->error_message = expected_lparen;
402 return kParseErrorToken;
403 }
404
405 if (expr->type != kCondition)
406 expr = conditionalize_node(parser, expr);
407
408 end = cen64_node_alloc(parser->node_alloc,
409 kBasicBlock, cunit->bb_count++);
410
411 inner_bb = cen64_node_alloc(parser->node_alloc,
412 kBasicBlock, cunit->bb_count++);
413
414 bb->next = inner_bb;
415
416 node = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
417 bb->node.basic_block.head = bb->node.basic_block.tail = node;
418
419 node->node.branch.condition = expr;
420 node->node.branch.type = kBranchConditional;
421 node->node.branch.target_bb = inner_bb->line_number_or_id;
422 node->node.branch.nt_target_bb = end->line_number_or_id;
423 node->node.branch.scope_end_bb = end->line_number_or_id;
424
425 /* Parse the body of the while loop */
426 token = parse_function_body(parser, cunit, namespace,
427 cen64_lexer_lex(&parser->lexer), inner_bb, last_bb);
428
429 if (token != kRightParen) {
430 if (token < kLastErrorToken)
431 return token;
432
433 parser->error_message = expected_rparen;
434 return kParseErrorToken;
435 }
436
437 (*last_bb)->next = end;
438
439 /* Add a branch at the end of the loop body back to the invariant */
440 node = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
441 node->node.branch.target_bb = bb->line_number_or_id;
442 node->node.branch.type = kBranchAlways;
443
444 token = cen64_lexer_lex(&parser->lexer);
445 add_node_to_bb(*last_bb, node);
446 bb = end;
447 break;
448
449 default:
450 parser->error_message = unexpected_tokens[token];
451 return kParseErrorToken;
452 }
453
454 if (token <= kLastErrorToken)
455 return token;
456 }
457
458 *last_bb = bb;
459 return token;
460 }
461
462 enum cen64_lexer_token parse_function_call(struct cen64_parser *parser,
463 const char *cur_namespace, struct cen64_compiler_node **call) {
464 struct cen64_compiler_node **value, *node;
465 enum cen64_lexer_token token;
466
467 *call = cen64_node_alloc(parser->node_alloc,
468 kFunctionCall, parser->lexer.line_count);
469
470 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
471 return token;
472
473 /* Object typenames and functions are parsed similarly... */
474 if ((token = parse_object_typename(parser, token, cur_namespace,
475 &(*call)->node.function_call.scoped_name)) <= kLastErrorToken) {
476 return token;
477 }
478
479 /* Hijack the error message coming from parse_object_typename */
480 if (token == kParseErrorToken) {
481 parser->error_message = "Expected a function name after ':'";
482 return token;
483 } else if (token != kLeftParen) {
484 if (token <= kLastErrorToken)
485 return token;
486
487 parser->error_message = expected_lparen;
488 return kParseErrorToken;
489 }
490
491 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
492 return token;
493
494 for (value = &(*call)->node.function_call.params; token != kRightParen; ) {
495 if ((token = parse_primary_expression(parser,
496 token, cur_namespace, value)) <= kLastErrorToken) {
497 return token;
498 }
499
500 if (token == kComma) {
501 token = cen64_lexer_lex(&parser->lexer);
502 } else if (token != kRightParen) {
503 parser->error_message = unexpected_tokens[token];
504 return kParseErrorToken;
505 }
506
507 value = &(*value)->next;
508 }
509
510 for (node = (*call)->node.function_call.params;
511 node->next != NULL; node = node->next) {
512 node->next->prev = node;
513 }
514
515 return cen64_lexer_lex(&parser->lexer);
516 }
517
518 enum cen64_lexer_token parse_object_typename(struct cen64_parser *parser,
519 enum cen64_lexer_token token, const char *cur_namespace,
520 struct cen64_scoped_nametype **snt) {
521 *snt = cen64_scoped_nametype_alloc(parser->dynsize_alloc);
522
523 (*snt)->namespace = (*snt)->nametype = (cur_namespace != NULL
524 ? cur_namespace
525 : "");
526
527 if (token == kIdentifier) {
528 (*snt)->nametype = cen64_dynsize_alloc(parser->dynsize_alloc,
529 parser->lexer.result.identifier.ptr,
530 parser->lexer.result.identifier.length);
531
532 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
533 return token;
534 }
535
536 if (token == kColon) {
537 if ((token = cen64_lexer_lex(&parser->lexer)) != kIdentifier) {
538 if (token <= kLastErrorToken)
539 return token;
540
541 parser->error_message = "Expected an object type after ':'";
542 return kParseErrorToken;
543 }
544
545 (*snt)->namespace = (*snt)->nametype;
546 (*snt)->nametype = cen64_dynsize_alloc(parser->dynsize_alloc,
547 parser->lexer.result.identifier.ptr,
548 parser->lexer.result.identifier.length);
549
550 token = cen64_lexer_lex(&parser->lexer);
551 }
552
553 return token;
554 }
555
556 enum cen64_lexer_token parse_member_or_parameter(
557 struct cen64_parser *parser, enum cen64_lexer_token token,
558 const char *namespace, enum cen64_compiler_node_type node_type,
559 struct cen64_compiler_node **node) {
560 struct cen64_scoped_nametype *object_type;
561 enum cen64_lexer_token type;
562 const char *identifier;
563 unsigned qualifiers;
564
565 *node = cen64_node_alloc(parser->node_alloc,
566 node_type, parser->lexer.line_count);
567
568 for (qualifiers = CEN64_COMPILER_QUALIFIER_NONE, type = kLexErrorToken,
569 object_type = NULL, identifier = NULL; ; ) {
570 switch (token) {
571 case kConstVal:
572 if (qualifiers & CEN64_COMPILER_QUALIFIER_CONST) {
573 parser->error_message = "The same qualifier appears twice";
574 return kParseErrorToken;
575 }
576
577 if (type != kLexErrorToken) {
578 parser->error_message = "Qualifier must proceed type";
579 return kParseErrorToken;
580 }
581
582 qualifiers |= CEN64_COMPILER_QUALIFIER_CONST;
583 break;
584
585 case kI8:
586 case kU8:
587 case kI16:
588 case kU16:
589 case kI32:
590 case kU32:
591 case kI64:
592 case kU64:
593 case kV128:
594 if (type != kLexErrorToken) {
595 parser->error_message = "Definition with more than one type appears";
596 return kParseErrorToken;
597 }
598
599 type = token;
600 break;
601
602 case kColon:
603 case kIdentifier:
604 if (type == kLexErrorToken) {
605 token = parse_object_typename(parser, token, namespace, &object_type);
606
607 if (token <= kLastErrorToken)
608 return token;
609
610 type = kObjectType;
611 continue;
612 } else if (token == kIdentifier) {
613 identifier = cen64_dynsize_alloc(parser->dynsize_alloc,
614 parser->lexer.result.identifier.ptr,
615 parser->lexer.result.identifier.length);
616 } else {
617 parser->error_message = unexpected_tokens[token];
618 return kParseErrorToken;
619 }
620
621 break;
622
623 default:
624 parser->error_message = token == kComma || token == kRightParen
625 ? (const char *) "Definition is missing an identifier"
626 : (const char *) "Unexpected token appears in definition";
627 }
628
629 if (token == kIdentifier)
630 break;
631
632 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
633 return token;
634 }
635
636 if ((*node)->type == kObjectMember) {
637 (*node)->node.object_member.type = type;
638 (*node)->node.object_member.qualifiers = qualifiers;
639 (*node)->node.object_member.name = identifier;
640 (*node)->node.object_member.object_type = object_type;
641 } else {
642 (*node)->node.variable.type = type;
643 (*node)->node.variable.qualifiers = qualifiers;
644 (*node)->node.variable.name = identifier;
645 (*node)->node.variable.object_type = object_type;
646 }
647
648 return token;
649 }
650
651 enum cen64_lexer_token parse_primary_expression(struct cen64_parser *parser,
652 enum cen64_lexer_token token, const char *namespace,
653 struct cen64_compiler_node **value) {
654 enum cen64_compiler_node_type type;
655
656 /* Do not explicitly require '(get var)' or '(num)' here */
657 /* We should also accept 'var' and 'num' in this context */
658 if (token != kLeftParen) {
659 switch (token) {
660 case kCall:
661 return parse_function_call(parser, namespace, value);
662
663 case kColon:
664 case kIdentifier:
665 *value = cen64_node_alloc(parser->node_alloc,
666 kLoad, parser->lexer.line_count);
667
668 return parse_variable_access(parser, token,
669 &((*value)->node.store.name));
670
671 case kNumber:
672 *value = cen64_node_alloc(parser->node_alloc,
673 kInteger, parser->lexer.line_count);
674
675 (*value)->node.integer.value = (long int) parser->lexer.result.number;
676 break;
677
678 default:
679 break;
680 }
681 }
682
683 /* Use recursion to expand the inner expression if we have a left paren */
684 else {
685 enum cen64_binary_operation operation = kLastBinaryOperation;
686 enum cen64_condition condition = kLastCondition;
687
688 struct cen64_compiler_node *inner_value = NULL;
689 struct cen64_compiler_node *outer_value;
690
691 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
692 return token;
693
694 if ((token = parse_primary_expression(parser,
695 token, namespace, &inner_value)) <= kLastErrorToken) {
696 return token;
697 }
698
699 /* Attempt to parse the binary expression following us */
700 switch (token) {
701 case kBitwiseAnd:
702 type = kBinaryOperation;
703 operation = kAndOp;
704 break;
705
706 case kBitwiseOr:
707 type = kBinaryOperation;
708 operation = kOrOp;
709 break;
710
711 case kBitwiseXor:
712 type = kBinaryOperation;
713 operation = kXorOp;
714 break;
715
716 case kDivide:
717 type = kBinaryOperation;
718 operation = kDivOp;
719 break;
720
721 case kEqual:
722 type = kCondition;
723 condition = kConditionEqual;
724 break;
725
726 case kGreater:
727 type = kCondition;
728 condition = kConditionGreater;
729 break;
730
731 case kGreaterOrEqual:
732 type = kCondition;
733 condition = kConditionGreaterOrEqual;
734 break;
735
736 case kLeftShift:
737 type = kBinaryOperation;
738 operation = kLeftShiftOp;
739 break;
740
741 case kLess:
742 type = kCondition;
743 condition = kConditionLess;
744 break;
745
746 case kLessOrEqual:
747 type = kCondition;
748 condition = kConditionLessOrEqual;
749 break;
750
751 case kLogicalAnd:
752 type = kCondition;
753 condition = kConditionAnd;
754 break;
755
756 case kLogicalOr:
757 type = kCondition;
758 condition = kConditionOr;
759 break;
760
761 case kMinus:
762 type = kBinaryOperation;
763 operation = kSubOp;
764 break;
765
766 case kNotEqual:
767 type = kCondition;
768 condition = kConditionNotEqual;
769 break;
770
771 case kPlus:
772 type = kBinaryOperation;
773 operation = kAddOp;
774 break;
775
776 case kRightParen:
777 *value = inner_value;
778 return cen64_lexer_lex(&parser->lexer);
779
780 case kRightShift:
781 type = kBinaryOperation;
782 operation = kRightShiftOp;
783 break;
784
785 case kTimes:
786 type = kBinaryOperation;
787 operation = kMulOp;
788 break;
789
790 default:
791 parser->error_message = unexpected_tokens[token];
792 return kParseErrorToken;
793 }
794
795 outer_value = cen64_node_alloc(parser->node_alloc,
796 type, parser->lexer.line_count);
797
798 outer_value->node.binary_operation.condition = condition;
799 outer_value->node.binary_operation.operation = operation;
800 outer_value->node.binary_operation.left = inner_value;
801 inner_value = NULL;
802
803 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
804 return token;
805
806 if ((token = parse_primary_expression(parser,
807 token, namespace, &inner_value)) != kRightParen) {
808 if (token <= kLastErrorToken)
809 return token;
810
811 parser->error_message = expected_rparen;
812 return kParseErrorToken;
813 }
814
815 outer_value->node.binary_operation.right = inner_value;
816 *value = outer_value;
817
818 /* If this is a logical and or a logical or, then we need to make sure */
819 /* both both children are comparisons nodes as well (or convert them) */
820 if ((*value)->type == kCondition && (
821 (*value)->node.binary_operation.condition == kConditionAnd ||
822 (*value)->node.binary_operation.condition == kConditionOr)) {
823 struct cen64_compiler_node *left = (*value)->node.binary_operation.left;
824 struct cen64_compiler_node *right = (*value)->node.binary_operation.right;
825
826 if (left->type != kCondition) {
827 (*value)->node.binary_operation.left =
828 conditionalize_node(parser, left);
829 }
830
831 if (right->type != kCondition) {
832 (*value)->node.binary_operation.right =
833 conditionalize_node(parser, right);
834 }
835 }
836 }
837
838 return cen64_lexer_lex(&parser->lexer);
839 }
840
841 enum cen64_lexer_token parse_top_declaration(
842 struct cen64_parser *parser, struct cen64_compilation_unit *cunit,
843 enum cen64_lexer_token token, const char *namespace,
844 struct cen64_arena_alloc *arena_alloc) {
845 struct cen64_compiler_node *node, *temp_node, *members, **params;
846 const char *identifier, *object_type;
847 struct cen64_parser *nested_parser;
848 unsigned line_number;
849
850 if (token > kLastErrorToken && token != kLeftParen) {
851 parser->error_message = expected_lparen;
852 return kParseErrorToken;
853 }
854
855 while ((token = cen64_lexer_lex(&parser->lexer)) != 0) {
856 line_number = parser->lexer.line_count;
857
858 switch (token) {
859 case kDefun:
860 cunit->function_count++;
861
862 node = cen64_node_alloc(parser->node_alloc, kFunction, line_number);
863
864 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
865 return token;
866
867 switch (token) {
868 case kI8:
869 case kU8:
870 case kI16:
871 case kU16:
872 case kI32:
873 case kU32:
874 case kI64:
875 case kU64:
876 case kV128:
877 case kNone:
878 node->node.function.return_type = token;
879 token = cen64_lexer_lex(&parser->lexer);
880 break;
881
882 case kColon:
883 case kIdentifier:
884 node->node.function.return_type = kObjectType;
885 token = parse_object_typename(parser, token, namespace,
886 &node->node.function.object_type);
887
888 break;
889
890 default:
891 parser->error_message = "Expected a return type name after 'defun'";
892 return kParseErrorToken;
893 }
894
895 if (token != kIdentifier) {
896 if (token <= kLastErrorToken)
897 return token;
898
899 parser->error_message = "Expected a function name after 'defun type'";
900 return kParseErrorToken;
901 }
902
903 identifier = cen64_dynsize_alloc(parser->dynsize_alloc,
904 parser->lexer.result.identifier.ptr,
905 parser->lexer.result.identifier.length);
906
907 cunit->program_node->tail->next = node;
908 node->prev = cunit->program_node->tail;
909 cunit->program_node->tail = node;
910 node->node.function.namespace = namespace == NULL ? "" : namespace;
911 node->node.function.name = identifier;
912 node->node.function.params_and_entry = NULL;
913
914 /* Parse parameters to function */
915 if ((token = cen64_lexer_lex(&parser->lexer)) != kLeftParen) {
916 if (token <= kLastErrorToken)
917 return token;
918
919 parser->error_message = "Expected a parameter list after 'defun ...'";
920 return kParseErrorToken;
921 }
922
923 identifier = NULL;
924 params = &node->node.function.params_and_entry;
925
926 /* Parse function parameters */
927 while ((token = cen64_lexer_lex(&parser->lexer)) != kRightParen) {
928 if (token <= kLastErrorToken)
929 return token;
930
931 if (*params != NULL) {
932 if (token != kComma) {
933 parser->error_message = "Expected ',' after function parameter";
934 return kParseErrorToken;
935 }
936
937 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
938 return token;
939 }
940
941 if ((token = parse_member_or_parameter(parser, token, namespace,
942 kVariable, params)) <= kLastErrorToken) {
943 return token;
944 }
945
946 params = &(*params)->next;
947 }
948
949 /* Create the entry node and parse the function body */
950 (*params) = cen64_node_alloc(parser->node_alloc,
951 kFunctionEntryExit, line_number);
952
953 node = cen64_node_alloc(parser->node_alloc,
954 kBasicBlock, cunit->bb_count++);
955
956 (*params)->node.function_entry_exit.entry = node;
957
958 if ((token = parse_function_body(parser, cunit, namespace,
959 cen64_lexer_lex(&parser->lexer), node, &node))
960 <= kLastErrorToken) {
961 return token;
962 }
963
964 /* Create the exit node and insert a branch to the exit node */
965 temp_node = cen64_node_alloc(parser->node_alloc, kBranch, line_number);
966 temp_node->node.branch.target_bb = cunit->bb_count++;
967 temp_node->node.branch.type = kBranchAlways;
968 add_node_to_bb(node, temp_node);
969
970 node->next = cen64_node_alloc(parser->node_alloc, kBasicBlock, ~0U);
971 node->next->line_number_or_id = temp_node->node.branch.target_bb;
972 (*params)->node.function_entry_exit.exit = node->next;
973
974 /* Replace all instances of kFunctionReturn with kBranch */
975 for (node = (*params)->node.function_entry_exit.entry;
976 node != NULL; node = node->next) {
977 for (temp_node = node->node.basic_block.head;
978 temp_node != NULL; temp_node = temp_node->next) {
979 if (temp_node->type == kFunctionReturn) {
980 temp_node->type = kBranch;
981 temp_node->node.branch.type = kBranchAlways;
982 temp_node->node.branch.target_bb = (*params)->node.
983 function_entry_exit.exit->line_number_or_id;
984 }
985 }
986 }
987
988 break;
989
990 case kDeobj:
991 cunit->object_count++;
992
993 if ((token = cen64_lexer_lex(&parser->lexer)) != kIdentifier) {
994 if (token <= kLastErrorToken)
995 return token;
996
997 parser->error_message = "Expected a type name after 'deobj'";
998 return kParseErrorToken;
999 }
1000
1001 node = cen64_node_alloc(parser->node_alloc, kObject, line_number);
1002
1003 object_type = cen64_dynsize_alloc(parser->dynsize_alloc,
1004 parser->lexer.result.identifier.ptr,
1005 parser->lexer.result.identifier.length);
1006
1007 cunit->program_node->tail->next = node;
1008 node->prev = cunit->program_node->tail;
1009 cunit->program_node->tail = node;
1010 node->node.object.namespace = namespace == NULL ? "" : namespace;
1011 node->node.object.type = object_type;
1012 node->node.object.member_count = 0;
1013 node->node.object.members = NULL;
1014
1015 identifier = NULL;
1016 members = NULL;
1017
1018 /* Parse object body */
1019 while ((token = cen64_lexer_lex(&parser->lexer)) != kRightParen) {
1020 if (token <= kLastErrorToken)
1021 return token;
1022
1023 if (members != NULL) {
1024 if (token != kComma) {
1025 parser->error_message = "Expected ',' after member definition";
1026 return kParseErrorToken;
1027 }
1028
1029 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
1030 return token;
1031 }
1032
1033 if ((token = parse_member_or_parameter(parser, token, namespace,
1034 kObjectMember, &temp_node)) <= kLastErrorToken) {
1035 return token;
1036 }
1037
1038 if (members == NULL)
1039 node->node.object.members = temp_node;
1040
1041 else {
1042 members->next = temp_node;
1043 temp_node->prev = members;
1044 }
1045
1046 node->node.object.member_count++;
1047 members = temp_node;
1048 }
1049
1050 if (node->node.object.member_count == 0) {
1051 parser->error_message = "Object must contain at least one member";
1052 return kParseErrorToken;
1053 }
1054
1055 break;
1056
1057 case kInclude:
1058 if ((token = cen64_lexer_lex(&parser->lexer)) <= kLastErrorToken)
1059 return token;
1060
1061 switch (token) {
1062 case kFilePath:
1063 case kIdentifier:
1064 identifier = cen64_dynsize_alloc(parser->dynsize_alloc,
1065 parser->lexer.result.identifier.ptr,
1066 parser->lexer.result.identifier.length);
1067
1068 /* Create a temporary nested parser, parse the included file */
1069 nested_parser = cen64_arena_alloc(
1070 arena_alloc, sizeof(*nested_parser));
1071
1072 if (cen64_parser_create(nested_parser, identifier,
1073 parser->dynsize_alloc, parser->node_alloc)) {
1074 parser->error_message = "Failed to include file";
1075 return kParseErrorToken;
1076 }
1077
1078 parser->nested_parser = parser;
1079
1080 if ((token = cen64_parser_parse(
1081 nested_parser, arena_alloc, cunit))) {
1082 return kParseErrorToken;
1083 }
1084
1085 parser->nested_parser = NULL;
1086
1087 /* XXX: We have no mechanism to free individual arena */
1088 /* objects right now, but in the future, free this... */
1089 cen64_parser_destroy(nested_parser);
1090 cen64_arena_free(arena_alloc, nested_parser, sizeof(*nested_parser));
1091 break;
1092
1093 default:
1094 break;
1095 }
1096
1097 if ((token = cen64_lexer_lex(&parser->lexer)) != kRightParen) {
1098 if (token < kLastErrorToken)
1099 return token;
1100
1101 parser->error_message = expected_rparen;
1102 return kParseErrorToken;
1103 }
1104
1105 break;
1106
1107 case kNamespace:
1108 if (namespace != NULL) {
1109 parser->error_message = "Unsupported use of nested namespace";
1110 return kParseErrorToken;
1111 }
1112
1113 if ((token = cen64_lexer_lex(&parser->lexer)) != kIdentifier) {
1114 if (token <= kLastErrorToken)
1115 return token;
1116
1117 parser->error_message = "Expected an identifier after 'namespace'";
1118 return kParseErrorToken;
1119 }
1120
1121 identifier = cen64_dynsize_alloc(parser->dynsize_alloc,
1122 parser->lexer.result.identifier.ptr,
1123 parser->lexer.result.identifier.length);
1124
1125 while ((token = cen64_lexer_lex(&parser->lexer)) != kRightParen) {
1126 if (token <= kLastErrorToken)
1127 return token;
1128
1129 if ((token = parse_top_declaration(parser, cunit,
1130 token, identifier, arena_alloc)) <= kLastErrorToken) {
1131 return token;
1132 }
1133 }
1134
1135 return kRightParen;
1136
1137 case kRightParen:
1138 return kRightParen;
1139
1140 /* Propagate error messages */
1141 case kEndToken:
1142 case kIOErrorToken:
1143 case kLastErrorToken:
1144 case kLexErrorToken:
1145 case kParseErrorToken:
1146 return token;
1147
1148 default:
1149 parser->error_message = unexpected_tokens[token];
1150 return kParseErrorToken;
1151 }
1152
1153 if (token == kRightParen)
1154 return kRightParen;
1155 }
1156
1157 return token;
1158 }
1159
1160 enum cen64_lexer_token parse_variable_access(struct cen64_parser *parser,
1161 enum cen64_lexer_token token, const char **name) {
1162 struct cen64_scoped_nametype *snt;
1163
1164 if ((token = parse_object_typename(parser, token,
1165 NULL, &snt)) <= kLastErrorToken) {
1166 return token;
1167 }
1168
1169 /* Hijack the error message coming from parse_object_typename */
1170 if (token == kParseErrorToken) {
1171 parser->error_message = "Expected a variable/object name after ':'";
1172 return token;
1173 }
1174
1175 if (*snt->namespace != '\0') {
1176 parser->error_message = "Access to variable in another namespace appears";
1177 return kParseErrorToken;
1178 }
1179
1180 *name = snt->nametype;
1181 return token;
1182 }
1183
1184 int cen64_parser_create(struct cen64_parser *parser, const char *path,
1185 struct cen64_dynsize_alloc *dynsize_alloc,
1186 struct cen64_node_alloc *node_alloc) {
1187 int status;
1188
1189 if ((status = cen64_lexer_create(&parser->lexer, path)))
1190 return status;
1191
1192 parser->dynsize_alloc = dynsize_alloc;
1193 parser->node_alloc = node_alloc;
1194 parser->nested_parser = NULL;
1195
1196 return 0;
1197 }
1198
1199 int cen64_parser_destroy(struct cen64_parser *parser) {
1200 if (parser->nested_parser)
1201 cen64_parser_destroy(parser->nested_parser);
1202
1203 return cen64_lexer_destroy(&parser->lexer);
1204 }
1205
1206 enum cen64_lexer_token cen64_parser_parse(struct cen64_parser *parser,
1207 struct cen64_arena_alloc *arena_alloc,
1208 struct cen64_compilation_unit *cunit) {
1209 enum cen64_lexer_token token;
1210
1211 while ((token = cen64_lexer_lex(&parser->lexer)) != kEndToken) {
1212 if ((token = parse_top_declaration(parser, cunit,
1213 token, NULL, arena_alloc)) <= kLastErrorToken) {
1214 switch (token) {
1215 case kEndToken:
1216 parser->error_message = "Unexpected end of file";
1217 break;
1218
1219 case kIOErrorToken:
1220 parser->error_message = "I/O error while compiling";
1221 break;
1222
1223 case kLexErrorToken:
1224 parser->error_message = parser->lexer.result.error_message;
1225 break;
1226
1227 case kParseErrorToken:
1228 break;
1229
1230 default:
1231 case kLastErrorToken:
1232 parser->error_message = "Internal compiler error";
1233 break;
1234 }
1235
1236 return token;
1237 }
1238 }
1239
1240 return kSuccess;
1241 }
1242