501 lines
15 KiB
C
501 lines
15 KiB
C
#include "parser.h"
|
|
#include "decl.h"
|
|
#include "type.h"
|
|
#include "expr.h"
|
|
#include "pattern.h"
|
|
#include <stdnoreturn.h>
|
|
#include <stdarg.h>
|
|
#include <stdlib.h>
|
|
|
|
static struct type *parse_type(struct parser *p);
|
|
static struct pattern *parse_pattern(struct parser *p);
|
|
static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim);
|
|
|
|
void init_parser(struct parser *p, FILE *in) {
|
|
init_lexer(&p->lex, in);
|
|
/* Read next token */
|
|
lex_next(&p->lex, &p->cur, &p->loc);
|
|
}
|
|
|
|
/* Get current lookahead token type */
|
|
static inline enum token_type cur_tok(const struct parser *p) {
|
|
return p->cur.type;
|
|
}
|
|
|
|
/* Get current lookahead token lexeme */
|
|
static inline char *cur_lexeme(struct parser *p) {
|
|
return p->cur.lexeme;
|
|
}
|
|
|
|
/* read next token */
|
|
static inline void consume(struct parser *p) {
|
|
lex_next(&p->lex, &p->cur, &p->loc);
|
|
}
|
|
|
|
static noreturn void report_error(struct parser *p, const char *fmt, ...) {
|
|
/* Report error at the beginning of the current lookahead position */
|
|
if (fprintf(stderr, "Error at %ld:%ld ", p->loc.line, p->loc.col) < 0) {
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
va_list args;
|
|
va_start(args, fmt);
|
|
vfprintf(stderr, fmt, args);
|
|
va_end(args);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* Consume next token if it is of the required type, otherwise fail with error */
|
|
static void expect(struct parser *p, enum token_type t) {
|
|
if (cur_tok(p) != t) {
|
|
report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
|
|
}
|
|
consume(p);
|
|
}
|
|
|
|
/* Types */
|
|
static struct type *parse_paren_type(struct parser *p) {
|
|
struct type *res;
|
|
struct type *t = parse_type(p);
|
|
if (cur_tok(p) == tok_comma) {
|
|
consume(p);
|
|
struct type_list_builder list = { NULL };
|
|
type_list_append(&list, t);
|
|
t = parse_type(p);
|
|
type_list_append(&list, t);
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
|
|
expect(p, tok_comma);
|
|
t = parse_type(p);
|
|
type_list_append(&list, t);
|
|
}
|
|
res = make_tuple_type(list.head);
|
|
}
|
|
else {
|
|
res = t;
|
|
}
|
|
expect(p, tok_right_paren);
|
|
return res;
|
|
}
|
|
|
|
/* sets *is_ident to true if the result is a single type name */
|
|
static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident) {
|
|
struct type *res = NULL;
|
|
*is_ident = false;
|
|
switch (cur_tok(p)) {
|
|
case tok_left_paren:
|
|
consume(p);
|
|
res = parse_paren_type(p);
|
|
break;
|
|
|
|
case tok_param_ident:
|
|
res = make_var_type(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
|
|
case tok_ident:
|
|
*is_ident = true;
|
|
res = make_type_name(NULL, cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
|
|
default:
|
|
res = NULL;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static struct type *parse_atomic_type(struct parser *p) {
|
|
bool t_is_ident;
|
|
struct type *t = try_parse_atomic_type_piece(p, &t_is_ident);
|
|
if (!t) {
|
|
report_error(p, "Expected type.\n");
|
|
}
|
|
/* t always contains the latest parsed atom piece */
|
|
bool curr_t_is_ident;
|
|
struct type *curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident);
|
|
if (curr_t != NULL) {
|
|
struct type_list_builder params = { NULL };
|
|
while (curr_t != NULL) {
|
|
/* We have a new atom piece, therefore we append t
|
|
* to the params list
|
|
*/
|
|
type_list_append(¶ms, t);
|
|
/* And set t to the latest parsed piece */
|
|
t_is_ident = curr_t_is_ident;
|
|
t = curr_t;
|
|
curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident);
|
|
}
|
|
/* Now we must check that t is a simple type name */
|
|
if (!t_is_ident) {
|
|
report_error(p, "Invalid type, expected a simple type name here.\n");
|
|
}
|
|
char *name = t->type_name.name;
|
|
/* Free allocated type object, we just need its name
|
|
* TODO: improve this, it's a ugly hack
|
|
* */
|
|
free(t);
|
|
return make_type_name(params.head, name);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
static struct type *parse_type(struct parser *p) {
|
|
struct type *t = parse_atomic_type(p);
|
|
if (cur_tok(p) == tok_arrow) {
|
|
consume(p);
|
|
struct type *cod = parse_type(p);
|
|
return make_func_type(t, cod);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
/* Definitions */
|
|
static struct var_list *parse_def_var_list(struct parser *p) {
|
|
struct var_list_builder params = { NULL };
|
|
while (cur_tok(p) == tok_param_ident) {
|
|
var_list_append(¶ms, cur_lexeme(p));
|
|
consume(p);
|
|
}
|
|
return params.head;
|
|
}
|
|
|
|
static void parse_datatype_constructor(struct parser *p, struct constructor_list_builder *b) {
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
|
|
}
|
|
char *ctor_name = cur_lexeme(p);
|
|
consume(p);
|
|
struct type *ty = NULL;
|
|
if (cur_tok(p) == tok_of) {
|
|
consume(p);
|
|
ty = parse_type(p);
|
|
}
|
|
constructor_list_append(b, ctor_name, ty);
|
|
}
|
|
|
|
static struct decl *parse_datatype_decl(struct parser *p) {
|
|
struct var_list *params = parse_def_var_list(p);
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Invalid datatype name `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
|
|
}
|
|
char *datatype_name = cur_lexeme(p);
|
|
consume(p);
|
|
|
|
expect(p, tok_left_brace);
|
|
|
|
/* Parse constructors */
|
|
struct constructor_list_builder ctors = { NULL };
|
|
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
|
|
parse_datatype_constructor(p, &ctors);
|
|
}
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
|
|
expect(p, tok_comma);
|
|
parse_datatype_constructor(p, &ctors);
|
|
}
|
|
expect(p, tok_right_brace);
|
|
return make_datatype_decl(params, datatype_name, ctors.head);
|
|
}
|
|
|
|
static struct decl *parse_alias_decl(struct parser *p) {
|
|
struct var_list *params = parse_def_var_list(p);
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Invalid type name, expected an identifier.\n");
|
|
}
|
|
char *name = cur_lexeme(p);
|
|
consume(p);
|
|
expect(p, tok_equal);
|
|
struct type *body = parse_type(p);
|
|
return make_alias_decl(params, name, body);
|
|
}
|
|
|
|
static struct decl *parse_typecheck_decl(struct parser *p) {
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Expected identifier.\n");
|
|
}
|
|
char *name = cur_lexeme(p);
|
|
consume(p);
|
|
expect(p, tok_colon);
|
|
struct type *t = parse_type(p);
|
|
return make_typecheck_decl(name, t);
|
|
}
|
|
|
|
struct decl *parse_value_or_func_decl(struct parser *p) {
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Invalid definition, expected an identifier.\n");
|
|
}
|
|
char *ident = cur_lexeme(p);
|
|
consume(p);
|
|
if (cur_tok(p) == tok_equal) {
|
|
/* It's a value definition */
|
|
consume(p);
|
|
struct expr *body = parse_expr(p);
|
|
return make_value_decl(ident, body);
|
|
}
|
|
else {
|
|
/* It's a function definition */
|
|
struct func_decl_list_builder func = { NULL };
|
|
struct pattern_list *args = parse_pattern_list(p, tok_equal);
|
|
expect(p, tok_equal);
|
|
struct expr *body = parse_expr(p);
|
|
func_decl_list_append(&func, ident, args, body);
|
|
while (cur_tok(p) == tok_pipe) {
|
|
consume(p);
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Invalid function case definition, expected an identifier.\n");
|
|
}
|
|
ident = cur_lexeme(p);
|
|
consume(p);
|
|
args = parse_pattern_list(p, tok_equal);
|
|
if (args == NULL) {
|
|
report_error(p, "Expected at least one argument in function definition.\n");
|
|
}
|
|
expect(p, tok_equal);
|
|
body = parse_expr(p);
|
|
func_decl_list_append(&func, ident, args, body);
|
|
}
|
|
return make_func_decl(func.head);
|
|
}
|
|
}
|
|
|
|
struct decl *parse_decl(struct parser *p) {
|
|
switch (cur_tok(p)) {
|
|
case tok_datatype:
|
|
consume(p);
|
|
return parse_datatype_decl(p);
|
|
case tok_alias:
|
|
consume(p);
|
|
return parse_alias_decl(p);
|
|
case tok_typecheck:
|
|
consume(p);
|
|
return parse_typecheck_decl(p);
|
|
case tok_def:
|
|
consume(p);
|
|
return parse_value_or_func_decl(p);
|
|
default:
|
|
report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
|
|
}
|
|
}
|
|
|
|
/* Expressions */
|
|
static struct expr *parse_list_literal(struct parser *p) {
|
|
struct expr_list_builder elems = { NULL };
|
|
struct expr *e;
|
|
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
|
|
e = parse_expr(p);
|
|
expr_list_append(&elems, e);
|
|
}
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
|
|
expect(p, tok_comma);
|
|
e = parse_expr(p);
|
|
expr_list_append(&elems, e);
|
|
}
|
|
expect(p, tok_right_square);
|
|
return make_list_lit(elems.head);
|
|
}
|
|
|
|
static struct expr *parse_paren_expr(struct parser *p) {
|
|
struct expr *e = parse_expr(p);
|
|
if (cur_tok(p) == tok_comma) {
|
|
consume(p);
|
|
struct expr_list_builder elems = { NULL };
|
|
expr_list_append(&elems, e);
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_left_paren) {
|
|
e = parse_expr(p);
|
|
expr_list_append(&elems, e);
|
|
expect(p, tok_comma);
|
|
}
|
|
e = make_tuple_lit(elems.head);
|
|
}
|
|
expect(p, tok_right_paren);
|
|
return e;
|
|
}
|
|
|
|
static struct expr *try_parse_atomic_expr(struct parser *p) {
|
|
struct expr *res;
|
|
switch (cur_tok(p)) {
|
|
case tok_ident:
|
|
res = make_ident_expr(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
case tok_int:
|
|
res = make_int_lit(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
case tok_true:
|
|
res = make_bool_lit(true);
|
|
consume(p);
|
|
break;
|
|
case tok_false:
|
|
res = make_bool_lit(false);
|
|
consume(p);
|
|
break;
|
|
case tok_string:
|
|
res = make_string_lit(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
case tok_left_square:
|
|
consume(p);
|
|
res = parse_list_literal(p);
|
|
break;
|
|
case tok_left_paren:
|
|
consume(p);
|
|
res = parse_paren_expr(p);
|
|
break;
|
|
default:
|
|
res = NULL;
|
|
break;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static struct expr *parse_fun_app(struct parser *p) {
|
|
struct expr *fun = try_parse_atomic_expr(p);
|
|
if (!fun) {
|
|
report_error(p, "Expected expression.\n");
|
|
}
|
|
struct expr_list_builder args = { NULL };
|
|
struct expr *arg;
|
|
while ((arg = try_parse_atomic_expr(p))) {
|
|
expr_list_append(&args, arg);
|
|
}
|
|
return make_func_app(fun, args.head);
|
|
}
|
|
|
|
static struct expr *parse_match_expr(struct parser *p) {
|
|
struct expr *cond = parse_expr(p);
|
|
expect(p, tok_left_brace);
|
|
struct pattern *pattern;
|
|
struct expr *body;
|
|
struct case_list_builder cases = { NULL };
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
|
|
expect(p, tok_case);
|
|
pattern = parse_pattern(p);
|
|
expect(p, tok_arrow);
|
|
body = parse_expr(p);
|
|
case_list_append(&cases, pattern, body);
|
|
}
|
|
expect(p, tok_right_brace);
|
|
return make_match_expr(cond, cases.head);
|
|
}
|
|
|
|
static struct expr *parse_let_expr(struct parser *p) {
|
|
struct decl_list_builder decls = { NULL };
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_in) {
|
|
decl_list_append(&decls, parse_decl(p));
|
|
}
|
|
expect(p, tok_in);
|
|
struct expr *body = parse_expr(p);
|
|
return make_let_expr(decls.head, body);
|
|
}
|
|
|
|
struct expr *parse_expr(struct parser *p) {
|
|
switch (cur_tok(p)) {
|
|
case tok_match:
|
|
consume(p);
|
|
return parse_match_expr(p);
|
|
case tok_let:
|
|
consume(p);
|
|
return parse_let_expr(p);
|
|
default:
|
|
return parse_fun_app(p);
|
|
}
|
|
}
|
|
|
|
/* Patterns */
|
|
static struct pattern *parse_list_pattern(struct parser *p) {
|
|
struct pattern_list_builder elems = { NULL };
|
|
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
|
|
pattern_list_append(&elems, parse_pattern(p));
|
|
}
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
|
|
expect(p, tok_comma);
|
|
pattern_list_append(&elems, parse_pattern(p));
|
|
}
|
|
expect(p, tok_right_square);
|
|
return make_list_pattern(elems.head);
|
|
}
|
|
|
|
static struct pattern *parse_tuple_pattern(struct parser *p) {
|
|
struct pattern_list_builder elems = { NULL };
|
|
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
|
|
pattern_list_append(&elems, parse_pattern(p));
|
|
}
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
|
|
expect(p, tok_comma);
|
|
pattern_list_append(&elems, parse_pattern(p));
|
|
}
|
|
expect(p, tok_right_square);
|
|
return make_tuple_pattern(elems.head);
|
|
}
|
|
|
|
/* Parse patterns until the `delim` token is found or end of file is encountered */
|
|
static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim) {
|
|
struct pattern_list_builder patterns = { NULL };
|
|
while (cur_tok(p) != tok_eof && cur_tok(p) != delim) {
|
|
pattern_list_append(&patterns, parse_pattern(p));
|
|
}
|
|
return patterns.head;
|
|
}
|
|
|
|
static struct pattern *parse_constructor_pattern(struct parser *p) {
|
|
if (cur_tok(p) != tok_ident) {
|
|
report_error(p, "Expected constructor name in constructor pattern.\n");
|
|
}
|
|
else {
|
|
char *name = cur_lexeme(p);
|
|
consume(p);
|
|
struct pattern_list *args = parse_pattern_list(p, tok_right_angle_bracket);
|
|
expect(p, tok_right_angle_bracket);
|
|
return make_constructor_pattern(name, args);
|
|
}
|
|
}
|
|
|
|
static struct pattern *parse_pattern(struct parser *p) {
|
|
struct pattern *res;
|
|
switch (cur_tok(p)) {
|
|
case tok_ident:
|
|
res = make_var_pattern(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
case tok_int:
|
|
res = make_int_pattern(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
case tok_true:
|
|
res = make_bool_pattern(true);
|
|
consume(p);
|
|
break;
|
|
case tok_false:
|
|
res = make_bool_pattern(false);
|
|
consume(p);
|
|
break;
|
|
case tok_string:
|
|
res = make_string_pattern(cur_lexeme(p));
|
|
consume(p);
|
|
break;
|
|
case tok_left_paren:
|
|
consume(p);
|
|
res = parse_tuple_pattern(p);
|
|
break;
|
|
case tok_left_square:
|
|
consume(p);
|
|
res = parse_list_pattern(p);
|
|
break;
|
|
case tok_left_angle_bracket:
|
|
consume(p);
|
|
res = parse_constructor_pattern(p);
|
|
break;
|
|
default:
|
|
report_error(p, "Invalid pattern starting with %s.\n", token_descr(cur_tok(p)));
|
|
}
|
|
return res;
|
|
}
|
|
|
|
struct decl_list *parse_program(struct parser *p) {
|
|
struct decl_list_builder decls = { NULL };
|
|
while (cur_tok(p) != tok_eof) {
|
|
decl_list_append(&decls, parse_decl(p));
|
|
}
|
|
return decls.head;
|
|
}
|
|
|