milly/src/parser.c

501 lines
15 KiB
C

#include "parser.h"
#include "decl.h"
#include "type.h"
#include "expr.h"
#include "pattern.h"
#include <stdnoreturn.h>
#include <stdarg.h>
#include <stdlib.h>
static struct type *parse_type(struct parser *p);
static struct pattern *parse_pattern(struct parser *p);
static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim);
void init_parser(struct parser *p, FILE *in) {
init_lexer(&p->lex, in);
/* Read next token */
lex_next(&p->lex, &p->cur, &p->loc);
}
/* Get current lookahead token type */
static inline enum token_type cur_tok(const struct parser *p) {
return p->cur.type;
}
/* Get current lookahead token lexeme */
static inline char *cur_lexeme(struct parser *p) {
return p->cur.lexeme;
}
/* read next token */
static inline void consume(struct parser *p) {
lex_next(&p->lex, &p->cur, &p->loc);
}
static noreturn void report_error(struct parser *p, const char *fmt, ...) {
/* Report error at the beginning of the current lookahead position */
if (fprintf(stderr, "Error at %ld:%ld ", p->loc.line, p->loc.col) < 0) {
exit(EXIT_FAILURE);
}
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(EXIT_FAILURE);
}
/* Consume next token if it is of the required type, otherwise fail with error */
static void expect(struct parser *p, enum token_type t) {
if (cur_tok(p) != t) {
report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
}
consume(p);
}
/* Types */
static struct type *parse_paren_type(struct parser *p) {
struct type *res;
struct type *t = parse_type(p);
if (cur_tok(p) == tok_comma) {
consume(p);
struct type_list_builder list = { NULL };
type_list_append(&list, t);
t = parse_type(p);
type_list_append(&list, t);
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
expect(p, tok_comma);
t = parse_type(p);
type_list_append(&list, t);
}
res = make_tuple_type(list.head);
}
else {
res = t;
}
expect(p, tok_right_paren);
return res;
}
/* sets *is_ident to true if the result is a single type name */
static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident) {
struct type *res = NULL;
*is_ident = false;
switch (cur_tok(p)) {
case tok_left_paren:
consume(p);
res = parse_paren_type(p);
break;
case tok_param_ident:
res = make_var_type(cur_lexeme(p));
consume(p);
break;
case tok_ident:
*is_ident = true;
res = make_type_name(NULL, cur_lexeme(p));
consume(p);
break;
default:
res = NULL;
}
return res;
}
static struct type *parse_atomic_type(struct parser *p) {
bool t_is_ident;
struct type *t = try_parse_atomic_type_piece(p, &t_is_ident);
if (!t) {
report_error(p, "Expected type.\n");
}
/* t always contains the latest parsed atom piece */
bool curr_t_is_ident;
struct type *curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident);
if (curr_t != NULL) {
struct type_list_builder params = { NULL };
while (curr_t != NULL) {
/* We have a new atom piece, therefore we append t
* to the params list
*/
type_list_append(&params, t);
/* And set t to the latest parsed piece */
t_is_ident = curr_t_is_ident;
t = curr_t;
curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident);
}
/* Now we must check that t is a simple type name */
if (!t_is_ident) {
report_error(p, "Invalid type, expected a simple type name here.\n");
}
char *name = t->type_name.name;
/* Free allocated type object, we just need its name
* TODO: improve this, it's a ugly hack
* */
free(t);
return make_type_name(params.head, name);
}
return t;
}
static struct type *parse_type(struct parser *p) {
struct type *t = parse_atomic_type(p);
if (cur_tok(p) == tok_arrow) {
consume(p);
struct type *cod = parse_type(p);
return make_func_type(t, cod);
}
return t;
}
/* Definitions */
static struct var_list *parse_def_var_list(struct parser *p) {
struct var_list_builder params = { NULL };
while (cur_tok(p) == tok_param_ident) {
var_list_append(&params, cur_lexeme(p));
consume(p);
}
return params.head;
}
static void parse_datatype_constructor(struct parser *p, struct constructor_list_builder *b) {
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
}
char *ctor_name = cur_lexeme(p);
consume(p);
struct type *ty = NULL;
if (cur_tok(p) == tok_of) {
consume(p);
ty = parse_type(p);
}
constructor_list_append(b, ctor_name, ty);
}
static struct decl *parse_datatype_decl(struct parser *p) {
struct var_list *params = parse_def_var_list(p);
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid datatype name `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
}
char *datatype_name = cur_lexeme(p);
consume(p);
expect(p, tok_left_brace);
/* Parse constructors */
struct constructor_list_builder ctors = { NULL };
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
parse_datatype_constructor(p, &ctors);
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
expect(p, tok_comma);
parse_datatype_constructor(p, &ctors);
}
expect(p, tok_right_brace);
return make_datatype_decl(params, datatype_name, ctors.head);
}
static struct decl *parse_alias_decl(struct parser *p) {
struct var_list *params = parse_def_var_list(p);
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid type name, expected an identifier.\n");
}
char *name = cur_lexeme(p);
consume(p);
expect(p, tok_equal);
struct type *body = parse_type(p);
return make_alias_decl(params, name, body);
}
static struct decl *parse_typecheck_decl(struct parser *p) {
if (cur_tok(p) != tok_ident) {
report_error(p, "Expected identifier.\n");
}
char *name = cur_lexeme(p);
consume(p);
expect(p, tok_colon);
struct type *t = parse_type(p);
return make_typecheck_decl(name, t);
}
struct decl *parse_value_or_func_decl(struct parser *p) {
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid definition, expected an identifier.\n");
}
char *ident = cur_lexeme(p);
consume(p);
if (cur_tok(p) == tok_equal) {
/* It's a value definition */
consume(p);
struct expr *body = parse_expr(p);
return make_value_decl(ident, body);
}
else {
/* It's a function definition */
struct func_decl_list_builder func = { NULL };
struct pattern_list *args = parse_pattern_list(p, tok_equal);
expect(p, tok_equal);
struct expr *body = parse_expr(p);
func_decl_list_append(&func, ident, args, body);
while (cur_tok(p) == tok_pipe) {
consume(p);
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid function case definition, expected an identifier.\n");
}
ident = cur_lexeme(p);
consume(p);
args = parse_pattern_list(p, tok_equal);
if (args == NULL) {
report_error(p, "Expected at least one argument in function definition.\n");
}
expect(p, tok_equal);
body = parse_expr(p);
func_decl_list_append(&func, ident, args, body);
}
return make_func_decl(func.head);
}
}
struct decl *parse_decl(struct parser *p) {
switch (cur_tok(p)) {
case tok_datatype:
consume(p);
return parse_datatype_decl(p);
case tok_alias:
consume(p);
return parse_alias_decl(p);
case tok_typecheck:
consume(p);
return parse_typecheck_decl(p);
case tok_def:
consume(p);
return parse_value_or_func_decl(p);
default:
report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
}
}
/* Expressions */
static struct expr *parse_list_literal(struct parser *p) {
struct expr_list_builder elems = { NULL };
struct expr *e;
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
e = parse_expr(p);
expr_list_append(&elems, e);
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
expect(p, tok_comma);
e = parse_expr(p);
expr_list_append(&elems, e);
}
expect(p, tok_right_square);
return make_list_lit(elems.head);
}
static struct expr *parse_paren_expr(struct parser *p) {
struct expr *e = parse_expr(p);
if (cur_tok(p) == tok_comma) {
consume(p);
struct expr_list_builder elems = { NULL };
expr_list_append(&elems, e);
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_left_paren) {
e = parse_expr(p);
expr_list_append(&elems, e);
expect(p, tok_comma);
}
e = make_tuple_lit(elems.head);
}
expect(p, tok_right_paren);
return e;
}
static struct expr *try_parse_atomic_expr(struct parser *p) {
struct expr *res;
switch (cur_tok(p)) {
case tok_ident:
res = make_ident_expr(cur_lexeme(p));
consume(p);
break;
case tok_int:
res = make_int_lit(cur_lexeme(p));
consume(p);
break;
case tok_true:
res = make_bool_lit(true);
consume(p);
break;
case tok_false:
res = make_bool_lit(false);
consume(p);
break;
case tok_string:
res = make_string_lit(cur_lexeme(p));
consume(p);
break;
case tok_left_square:
consume(p);
res = parse_list_literal(p);
break;
case tok_left_paren:
consume(p);
res = parse_paren_expr(p);
break;
default:
res = NULL;
break;
}
return res;
}
static struct expr *parse_fun_app(struct parser *p) {
struct expr *fun = try_parse_atomic_expr(p);
if (!fun) {
report_error(p, "Expected expression.\n");
}
struct expr_list_builder args = { NULL };
struct expr *arg;
while ((arg = try_parse_atomic_expr(p))) {
expr_list_append(&args, arg);
}
return make_func_app(fun, args.head);
}
static struct expr *parse_match_expr(struct parser *p) {
struct expr *cond = parse_expr(p);
expect(p, tok_left_brace);
struct pattern *pattern;
struct expr *body;
struct case_list_builder cases = { NULL };
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
expect(p, tok_case);
pattern = parse_pattern(p);
expect(p, tok_arrow);
body = parse_expr(p);
case_list_append(&cases, pattern, body);
}
expect(p, tok_right_brace);
return make_match_expr(cond, cases.head);
}
static struct expr *parse_let_expr(struct parser *p) {
struct decl_list_builder decls = { NULL };
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_in) {
decl_list_append(&decls, parse_decl(p));
}
expect(p, tok_in);
struct expr *body = parse_expr(p);
return make_let_expr(decls.head, body);
}
struct expr *parse_expr(struct parser *p) {
switch (cur_tok(p)) {
case tok_match:
consume(p);
return parse_match_expr(p);
case tok_let:
consume(p);
return parse_let_expr(p);
default:
return parse_fun_app(p);
}
}
/* Patterns */
static struct pattern *parse_list_pattern(struct parser *p) {
struct pattern_list_builder elems = { NULL };
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
pattern_list_append(&elems, parse_pattern(p));
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
expect(p, tok_comma);
pattern_list_append(&elems, parse_pattern(p));
}
expect(p, tok_right_square);
return make_list_pattern(elems.head);
}
static struct pattern *parse_tuple_pattern(struct parser *p) {
struct pattern_list_builder elems = { NULL };
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
pattern_list_append(&elems, parse_pattern(p));
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
expect(p, tok_comma);
pattern_list_append(&elems, parse_pattern(p));
}
expect(p, tok_right_square);
return make_tuple_pattern(elems.head);
}
/* Parse patterns until the `delim` token is found or end of file is encountered */
static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim) {
struct pattern_list_builder patterns = { NULL };
while (cur_tok(p) != tok_eof && cur_tok(p) != delim) {
pattern_list_append(&patterns, parse_pattern(p));
}
return patterns.head;
}
static struct pattern *parse_constructor_pattern(struct parser *p) {
if (cur_tok(p) != tok_ident) {
report_error(p, "Expected constructor name in constructor pattern.\n");
}
else {
char *name = cur_lexeme(p);
consume(p);
struct pattern_list *args = parse_pattern_list(p, tok_right_angle_bracket);
expect(p, tok_right_angle_bracket);
return make_constructor_pattern(name, args);
}
}
static struct pattern *parse_pattern(struct parser *p) {
struct pattern *res;
switch (cur_tok(p)) {
case tok_ident:
res = make_var_pattern(cur_lexeme(p));
consume(p);
break;
case tok_int:
res = make_int_pattern(cur_lexeme(p));
consume(p);
break;
case tok_true:
res = make_bool_pattern(true);
consume(p);
break;
case tok_false:
res = make_bool_pattern(false);
consume(p);
break;
case tok_string:
res = make_string_pattern(cur_lexeme(p));
consume(p);
break;
case tok_left_paren:
consume(p);
res = parse_tuple_pattern(p);
break;
case tok_left_square:
consume(p);
res = parse_list_pattern(p);
break;
case tok_left_angle_bracket:
consume(p);
res = parse_constructor_pattern(p);
break;
default:
report_error(p, "Invalid pattern starting with %s.\n", token_descr(cur_tok(p)));
}
return res;
}
struct decl_list *parse_program(struct parser *p) {
struct decl_list_builder decls = { NULL };
while (cur_tok(p) != tok_eof) {
decl_list_append(&decls, parse_decl(p));
}
return decls.head;
}