Fix some parser errors

2022-05-17 21:33:36 +02:00 · 2022-05-17 21:33:36 +02:00 · 7dac4b66d3
parent dfbe469fb3
commit 7dac4b66d3
5 changed files with 121 additions and 38 deletions
--- a/README.md
+++ b/README.md
@ -32,6 +32,9 @@ here is a list of the features (or better, limitations) I want to introduce:
 - Polymorphic types à la Hindley Milner (probably unfeasible)
 - Automatic currying of functions, with optimised partial evaluation (unfeasible)
 - Delimited continuations (this is an overkill)
 - Easy syntax allowing easy kakoune support
 milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`.
 kakoune plugin for milly is in `extra/milly.kak`.
--- a/include/lexer.h
+++ b/include/lexer.h
@ -45,6 +45,9 @@ struct token {
    char *lexeme;
 };
 /* Gives a text description of a token type */
 const char *token_descr(enum token_type tok);
 struct location {
    size_t line;
    size_t col;
--- a/src/lexer.c
+++ b/src/lexer.c
@ -6,6 +6,73 @@
 #include <ctype.h>
 #include <stdnoreturn.h>
 const char *token_descr(enum token_type tok) {
    switch (tok) {
    case tok_ident:
        return "identifier";
    case tok_param_ident:
        return "variable identifier";
    case tok_int:
        return "int literal";
    case tok_string:
        return "string literal";
    case tok_arrow:
        return "->";
    case tok_backslash:
        return "\\";
    case tok_equal:
        return "=";
    case tok_left_paren:
        return "(";
    case tok_right_paren:
        return ")";
    case tok_left_square:
        return "[";
    case tok_right_square:
        return "]";
    case tok_left_brace:
        return "{";
    case tok_right_brace:
        return "}";
    case tok_left_angle_bracket:
        return "<";
    case tok_right_angle_bracket:
        return ">";
    case tok_comma:
        return ",";
    case tok_pipe:
        return "|";
    case tok_colon:
        return ":";
    case tok_true:
        return "true";
    case tok_false:
        return "false";
    case tok_case:
        return "case";
    case tok_let:
        return "let";
    case tok_in:
        return "in";
    case tok_match:
        return "match";
    case tok_of:
        return "of";
    case tok_def:
        return "def";
    case tok_datatype:
        return "datatype";
    case tok_alias:
        return "alias";
    case tok_typecheck:
        return "typecheck";
    case tok_eof:
        return "end of file";
    default:
        return "unknown token";
    }
 }
 /* Helper to report lexical errors */
 static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) {
    if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) {
@ -78,35 +145,35 @@ struct reserved_symbol {
    enum token_type type;
 };
-/* These two arrays must be alpha sorted to be usable by bsearch */
+/* These two arrays must be sorted (according to ascii value) to be usable by bsearch */
 struct reserved_symbol punctuation[] = {
    { "(", tok_left_paren },
    { ")", tok_right_paren },
    { "[", tok_left_square },
    { "]", tok_right_square },
    { "{", tok_left_brace },
    { "}", tok_right_brace },
    { ",", tok_comma },
    { "|", tok_pipe },
    { ":", tok_colon },
    { "<", tok_left_angle_bracket },
    { ">", tok_right_angle_bracket },
    { "[", tok_left_square },
    { "\\", tok_backslash },
    { "]", tok_right_square },
    { "{", tok_left_brace },
    { "|", tok_pipe },
    { "}", tok_right_brace },
 };
 struct reserved_symbol keywords[] = {
    { "=", tok_equal },
    { "->", tok_arrow },
-    { "true", tok_true },
+    { "=", tok_equal },
-    { "false", tok_false },
+    { "alias", tok_alias },
    { "case", tok_case },
-    { "let", tok_let },
+    { "datatype", tok_datatype },
    { "def", tok_def },
    { "false", tok_false },
    { "in", tok_in },
    { "let", tok_let },
    { "match", tok_match },
    { "of", tok_of },
-    { "def", tok_def },
+    { "true", tok_true },
    { "datatype", tok_datatype },
    { "alias", tok_alias },
    { "typecheck", tok_typecheck },
 };
@ -124,6 +191,7 @@ void lex_punctuation(struct lexer *lex, struct token *out) {
    /* This can't fail because of the precondition to this function */
    struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp);
    out->type = r->type;
    advance(lex);
 }
 /* We assume that the curr char is already a digit */
@ -171,6 +239,7 @@ static void lex_param_ident(struct lexer *lex, struct token *out) {
 static void lex_ident(struct lexer *lex, struct token *out) {
    /* Store ident in the internal buffer */
    store_char(lex, lex->cur);
    advance(lex);
    while (is_ident_cont(lex->cur)) {
        /* Store ident in the internal buffer */
        store_char(lex, lex->cur);
@ -276,6 +345,7 @@ keep_lexing:
    case '\'':
        *loc = lex->loc;
        advance(lex);
        lex_param_ident(lex, out);
        break;
--- a/src/main.c
+++ b/src/main.c
@ -2,8 +2,18 @@
 #include "parser.h"
 int main(int argc, char *argv[]) {
    if (argc != 2) {
        printf("Usage: milly file.mil\n");
        return 1;
    }
    FILE *input = fopen(argv[1], "r");
    if (!input) {
        fprintf(stderr, "Error: Could not open file %s.\n", argv[1]);
        return 1;
    }
    struct parser p;
-    init_parser(&p, stdin);
+    init_parser(&p, input);
    parse_program(&p);
    fclose(input);
    return 0;
 }
--- a/src/parser.c
+++ b/src/parser.c
@ -44,32 +44,36 @@ static noreturn void report_error(struct parser *p, const char *fmt, ...) {
    exit(EXIT_FAILURE);
 }
 static const char *token_descr(enum token_type t) {
    return "another token";
 }
 /* Consume next token if it is of the required type, otherwise fail with error */
 static void expect(struct parser *p, enum token_type t) {
    if (cur_tok(p) != t) {
-        report_error(p, "Expected %s.\n", token_descr(t));
+        report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
    }
    consume(p);
 }
 /* Types */
-static struct type *parse_tuple_type(struct parser *p) {
+static struct type *parse_paren_type(struct parser *p) {
    struct type *res;
    struct type *t = parse_type(p);
-    struct type_list_builder list = { NULL };
+    if (cur_tok(p) == tok_comma) {
-    type_list_append(&list, t);
+        consume(p);
-    expect(p, tok_comma);
+        struct type_list_builder list = { NULL };
-    t = parse_type(p);
+        type_list_append(&list, t);
    type_list_append(&list, t);
    while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) {
        expect(p, tok_comma);
        t = parse_type(p);
        type_list_append(&list, t);
        while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
            expect(p, tok_comma);
            t = parse_type(p);
            type_list_append(&list, t);
        }
        res = make_tuple_type(list.head);
    }
-    return make_tuple_type(list.head);
+    else {
        res = t;
    }
    expect(p, tok_right_paren);
    return res;
 }
 /* sets *is_ident to true if the result is a single type name */
@ -79,14 +83,7 @@ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident
    switch (cur_tok(p)) {
    case tok_left_paren:
        consume(p);
-        res = parse_type(p);
+        res = parse_paren_type(p);
        expect(p, tok_right_paren);
        break;
    case tok_left_angle_bracket:
        consume(p);
        res = parse_tuple_type(p);
        expect(p, tok_right_angle_bracket);
        break;
    case tok_param_ident:
@ -177,7 +174,7 @@ static struct decl *parse_datatype_decl(struct parser *p) {
    struct type *ty;
    while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
        if (cur_tok(p) != tok_ident) {
-            report_error(p, "Invalid datatype constructor, expected an identifier.\n");
+            report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
        }
        ctor_name = cur_lexeme(p);
        ty = parse_type(p);
@ -264,7 +261,7 @@ struct decl *parse_decl(struct parser *p) {
        consume(p);
        return parse_value_or_func_decl(p);
    default:
-        report_error(p, "Declaration expected, invalid token.\n");
+        report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
    }
 }