#include "parser.h" #include "decl.h" #include "type.h" #include "expr.h" #include "pattern.h" #include #include #include static struct type *parse_type(struct parser *p); static struct pattern *parse_pattern(struct parser *p); static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim); void init_parser(struct parser *p, FILE *in) { init_lexer(&p->lex, in); /* Read next token */ lex_next(&p->lex, &p->cur, &p->loc); } /* Get current lookahead token type */ static inline enum token_type cur_tok(const struct parser *p) { return p->cur.type; } /* Get current lookahead token lexeme */ static inline char *cur_lexeme(struct parser *p) { return p->cur.lexeme; } /* read next token */ static inline void consume(struct parser *p) { lex_next(&p->lex, &p->cur, &p->loc); } static noreturn void report_error(struct parser *p, const char *fmt, ...) { /* Report error at the beginning of the current lookahead position */ if (fprintf(stderr, "Error at %ld:%ld ", p->loc.line, p->loc.col) < 0) { exit(EXIT_FAILURE); } va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); exit(EXIT_FAILURE); } /* Consume next token if it is of the required type, otherwise fail with error */ static void expect(struct parser *p, enum token_type t) { if (cur_tok(p) != t) { report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p))); } consume(p); } /* Types */ static struct type *parse_paren_type(struct parser *p) { struct type *res; struct type *t = parse_type(p); if (cur_tok(p) == tok_comma) { consume(p); struct type_list_builder list = { NULL }; type_list_append(&list, t); t = parse_type(p); type_list_append(&list, t); while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) { expect(p, tok_comma); t = parse_type(p); type_list_append(&list, t); } res = make_tuple_type(list.head); } else { res = t; } expect(p, tok_right_paren); return res; } /* sets *is_ident to true if the result is a single type name */ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident) { struct type *res = NULL; *is_ident = false; switch (cur_tok(p)) { case tok_left_paren: consume(p); res = parse_paren_type(p); break; case tok_param_ident: res = make_var_type(cur_lexeme(p)); consume(p); break; case tok_ident: *is_ident = true; res = make_type_name(NULL, cur_lexeme(p)); consume(p); break; default: res = NULL; } return res; } static struct type *parse_atomic_type(struct parser *p) { bool t_is_ident; struct type *t = try_parse_atomic_type_piece(p, &t_is_ident); if (!t) { report_error(p, "Expected type.\n"); } /* t always contains the latest parsed atom piece */ bool curr_t_is_ident; struct type *curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident); if (curr_t != NULL) { struct type_list_builder params = { NULL }; while (curr_t != NULL) { /* We have a new atom piece, therefore we append t * to the params list */ type_list_append(¶ms, t); /* And set t to the latest parsed piece */ t_is_ident = curr_t_is_ident; t = curr_t; curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident); } /* Now we must check that t is a simple type name */ if (!t_is_ident) { report_error(p, "Invalid type, expected a simple type name here.\n"); } char *name = t->type_name.name; /* Free allocated type object, we just need its name * TODO: improve this, it's a ugly hack * */ free(t); return make_type_name(params.head, name); } return t; } static struct type *parse_type(struct parser *p) { struct type *t = parse_atomic_type(p); if (cur_tok(p) == tok_arrow) { consume(p); struct type *cod = parse_type(p); return make_func_type(t, cod); } return t; } /* Definitions */ static struct var_list *parse_def_var_list(struct parser *p) { struct var_list_builder params = { NULL }; while (cur_tok(p) == tok_param_ident) { var_list_append(¶ms, cur_lexeme(p)); consume(p); } return params.head; } static struct decl *parse_datatype_decl(struct parser *p) { struct var_list *params = parse_def_var_list(p); if (cur_tok(p) != tok_ident) { report_error(p, "Invalid datatype name, expected an identifier.\n"); } char *datatype_name = cur_lexeme(p); consume(p); expect(p, tok_left_brace); /* Parse constructors */ struct constructor_list_builder ctors = { NULL }; char *ctor_name; struct type *ty; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { if (cur_tok(p) != tok_ident) { report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p))); } ctor_name = cur_lexeme(p); ty = parse_type(p); constructor_list_append(&ctors, ctor_name, ty); expect(p, tok_comma); } expect(p, tok_right_brace); return make_datatype_decl(params, datatype_name, ctors.head); } static struct decl *parse_alias_decl(struct parser *p) { struct var_list *params = parse_def_var_list(p); if (cur_tok(p) != tok_ident) { report_error(p, "Invalid type name, expected an identifier.\n"); } char *name = cur_lexeme(p); consume(p); expect(p, tok_equal); struct type *body = parse_type(p); return make_alias_decl(params, name, body); } static struct decl *parse_typecheck_decl(struct parser *p) { if (cur_tok(p) != tok_ident) { report_error(p, "Expected identifier.\n"); } char *name = cur_lexeme(p); consume(p); expect(p, tok_colon); struct type *t = parse_type(p); return make_typecheck_decl(name, t); } struct decl *parse_value_or_func_decl(struct parser *p) { if (cur_tok(p) != tok_ident) { report_error(p, "Invalid definition, expected an identifier.\n"); } char *ident = cur_lexeme(p); consume(p); if (cur_tok(p) == tok_equal) { /* It's a value definition */ consume(p); struct expr *body = parse_expr(p); return make_value_decl(ident, body); } else { /* It's a function definition */ struct func_decl_list_builder func = { NULL }; struct pattern_list *args = parse_pattern_list(p, tok_equal); expect(p, tok_equal); struct expr *body = parse_expr(p); func_decl_list_append(&func, ident, args, body); while (cur_tok(p) == tok_pipe) { consume(p); if (cur_tok(p) != tok_ident) { report_error(p, "Invalid function case definition, expected an identifier.\n"); } ident = cur_lexeme(p); consume(p); args = parse_pattern_list(p, tok_equal); if (args == NULL) { report_error(p, "Expected at least one argument in function definition.\n"); } expect(p, tok_equal); body = parse_expr(p); func_decl_list_append(&func, ident, args, body); } return make_func_decl(func.head); } } struct decl *parse_decl(struct parser *p) { switch (cur_tok(p)) { case tok_datatype: consume(p); return parse_datatype_decl(p); case tok_alias: consume(p); return parse_alias_decl(p); case tok_typecheck: consume(p); return parse_typecheck_decl(p); case tok_def: consume(p); return parse_value_or_func_decl(p); default: report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p))); } } /* Expressions */ static struct expr *parse_list_literal(struct parser *p) { struct expr_list_builder elems = { NULL }; struct expr *e; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { e = parse_expr(p); expr_list_append(&elems, e); expect(p, tok_comma); } expect(p, tok_right_square); return make_list_lit(elems.head); } static struct expr *parse_paren_expr(struct parser *p) { struct expr *e = parse_expr(p); if (cur_tok(p) == tok_comma) { consume(p); struct expr_list_builder elems = { NULL }; expr_list_append(&elems, e); while (cur_tok(p) != tok_eof && cur_tok(p) != tok_left_paren) { e = parse_expr(p); expr_list_append(&elems, e); expect(p, tok_comma); } e = make_tuple_lit(elems.head); } expect(p, tok_right_paren); return e; } static struct expr *try_parse_atomic_expr(struct parser *p) { struct expr *res; switch (cur_tok(p)) { case tok_ident: res = make_ident_expr(cur_lexeme(p)); consume(p); break; case tok_int: res = make_int_lit(cur_lexeme(p)); consume(p); break; case tok_true: res = make_bool_lit(true); consume(p); break; case tok_false: res = make_bool_lit(false); consume(p); break; case tok_string: res = make_string_lit(cur_lexeme(p)); consume(p); break; case tok_left_square: consume(p); res = parse_list_literal(p); break; case tok_left_paren: consume(p); res = parse_paren_expr(p); break; default: res = NULL; break; } return res; } static struct expr *parse_fun_app(struct parser *p) { struct expr *fun = try_parse_atomic_expr(p); if (!fun) { report_error(p, "Expected expression.\n"); } struct expr_list_builder args = { NULL }; struct expr *arg; while ((arg = try_parse_atomic_expr(p))) { expr_list_append(&args, arg); } return make_func_app(fun, args.head); } /* TODO: Implement infix operations */ static struct expr *parse_infix_expr(struct parser *p) { struct expr *lhs = parse_fun_app(p); /*if (cur_tok(p) == tok_colon) { consume(p); struct expr_list_builder params = { NULL };; expr_list_append(¶ms, lhs); struct expr *rhs = parse_infix_expr(p); expr_list_append(¶ms, rhs); return make_list_cons(lhs, rhs); }*/ return lhs; } static struct expr *parse_match_expr(struct parser *p) { struct expr *cond = parse_expr(p); expect(p, tok_left_brace); struct pattern *pattern; struct expr *body; struct case_list_builder cases = { NULL }; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { expect(p, tok_case); pattern = parse_pattern(p); expect(p, tok_arrow); body = parse_expr(p); case_list_append(&cases, pattern, body); } expect(p, tok_right_brace); return make_match_expr(cond, cases.head); } static struct expr *parse_let_expr(struct parser *p) { struct decl_list_builder decls = { NULL }; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_in) { decl_list_append(&decls, parse_decl(p)); } expect(p, tok_in); struct expr *body = parse_expr(p); return make_let_expr(decls.head, body); } struct expr *parse_expr(struct parser *p) { switch (cur_tok(p)) { case tok_match: consume(p); return parse_match_expr(p); case tok_let: consume(p); return parse_let_expr(p); default: return parse_infix_expr(p); } } /* Patterns */ static struct pattern *parse_list_pattern(struct parser *p) { struct pattern_list_builder elems = { NULL }; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { pattern_list_append(&elems, parse_pattern(p)); expect(p, tok_comma); } expect(p, tok_right_square); return make_list_pattern(elems.head); } static struct pattern *parse_tuple_pattern(struct parser *p) { struct pattern_list_builder elems = { NULL }; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { pattern_list_append(&elems, parse_pattern(p)); expect(p, tok_comma); } expect(p, tok_right_square); return make_tuple_pattern(elems.head); } /* Parse patterns until the `delim` token is found or end of file is encountered */ static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim) { struct pattern_list_builder patterns = { NULL }; while (cur_tok(p) != tok_eof && cur_tok(p) != delim) { pattern_list_append(&patterns, parse_pattern(p)); } return patterns.head; } static struct pattern *parse_constructor_pattern(struct parser *p) { if (cur_tok(p) != tok_ident) { report_error(p, "Expected constructor name in constructor pattern.\n"); } else { char *name = cur_lexeme(p); consume(p); struct pattern_list *args = parse_pattern_list(p, tok_right_angle_bracket); expect(p, tok_right_angle_bracket); return make_constructor_pattern(name, args); } } static struct pattern *parse_pattern(struct parser *p) { struct pattern *res; switch (cur_tok(p)) { case tok_ident: res = make_var_pattern(cur_lexeme(p)); consume(p); break; case tok_int: res = make_int_pattern(cur_lexeme(p)); consume(p); break; case tok_true: res = make_bool_pattern(true); consume(p); break; case tok_false: res = make_bool_pattern(false); consume(p); break; case tok_string: res = make_string_pattern(cur_lexeme(p)); consume(p); break; case tok_left_paren: consume(p); res = parse_tuple_pattern(p); break; case tok_left_square: consume(p); res = parse_list_pattern(p); break; case tok_left_angle_bracket: consume(p); res = parse_constructor_pattern(p); break; default: report_error(p, "Invalid pattern.\n"); } return res; } struct decl_list *parse_program(struct parser *p) { struct decl_list_builder decls = { NULL }; while (cur_tok(p) != tok_eof) { decl_list_append(&decls, parse_decl(p)); } return decls.head; }