Fix some parser errors

2022-05-17 21:33:36 +02:00 · 2022-05-17 21:33:36 +02:00 · 7dac4b66d3
parent dfbe469fb3
commit 7dac4b66d3
5 changed files with 121 additions and 38 deletions
--- a/README.md
+++ b/README.md
@ -32,6 +32,9 @@ here is a list of the features (or better, limitations) I want to introduce:
 - Polymorphic types à la Hindley Milner (probably unfeasible)
 - Automatic currying of functions, with optimised partial evaluation (unfeasible)
 - Delimited continuations (this is an overkill)
+- Easy syntax allowing easy kakoune support

 milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`.

+kakoune plugin for milly is in `extra/milly.kak`.
+
--- a/include/lexer.h
+++ b/include/lexer.h
@ -45,6 +45,9 @@ struct token {
    char *lexeme;
 };

+/* Gives a text description of a token type */
+const char *token_descr(enum token_type tok);
+
 struct location {
    size_t line;
    size_t col;
--- a/src/lexer.c
+++ b/src/lexer.c
@ -6,6 +6,73 @@
 #include <ctype.h>
 #include <stdnoreturn.h>

+const char *token_descr(enum token_type tok) {
+    switch (tok) {
+    case tok_ident:
+        return "identifier";
+    case tok_param_ident:
+        return "variable identifier";
+    case tok_int:
+        return "int literal";
+    case tok_string:
+        return "string literal";
+    case tok_arrow:
+        return "->";
+    case tok_backslash:
+        return "\\";
+    case tok_equal:
+        return "=";
+    case tok_left_paren:
+        return "(";
+    case tok_right_paren:
+        return ")";
+    case tok_left_square:
+        return "[";
+    case tok_right_square:
+        return "]";
+    case tok_left_brace:
+        return "{";
+    case tok_right_brace:
+        return "}";
+    case tok_left_angle_bracket:
+        return "<";
+    case tok_right_angle_bracket:
+        return ">";
+    case tok_comma:
+        return ",";
+    case tok_pipe:
+        return "|";
+    case tok_colon:
+        return ":";
+    case tok_true:
+        return "true";
+    case tok_false:
+        return "false";
+    case tok_case:
+        return "case";
+    case tok_let:
+        return "let";
+    case tok_in:
+        return "in";
+    case tok_match:
+        return "match";
+    case tok_of:
+        return "of";
+    case tok_def:
+        return "def";
+    case tok_datatype:
+        return "datatype";
+    case tok_alias:
+        return "alias";
+    case tok_typecheck:
+        return "typecheck";
+    case tok_eof:
+        return "end of file";
+    default:
+        return "unknown token";
+    }
+}
+
 /* Helper to report lexical errors */
 static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) {
    if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) {
@ -78,35 +145,35 @@ struct reserved_symbol {
    enum token_type type;
 };

-/* These two arrays must be alpha sorted to be usable by bsearch */
+/* These two arrays must be sorted (according to ascii value) to be usable by bsearch */
 struct reserved_symbol punctuation[] = {
    { "(", tok_left_paren },
    { ")", tok_right_paren },
-    { "[", tok_left_square },
-    { "]", tok_right_square },
-    { "{", tok_left_brace },
-    { "}", tok_right_brace },
    { ",", tok_comma },
-    { "|", tok_pipe },
    { ":", tok_colon },
    { "<", tok_left_angle_bracket },
    { ">", tok_right_angle_bracket },
+    { "[", tok_left_square },
    { "\\", tok_backslash },
+    { "]", tok_right_square },
+    { "{", tok_left_brace },
+    { "|", tok_pipe },
+    { "}", tok_right_brace },
 };

 struct reserved_symbol keywords[] = {
-    { "=", tok_equal },
    { "->", tok_arrow },
-    { "true", tok_true },
-    { "false", tok_false },
+    { "=", tok_equal },
+    { "alias", tok_alias },
    { "case", tok_case },
-    { "let", tok_let },
+    { "datatype", tok_datatype },
+    { "def", tok_def },
+    { "false", tok_false },
    { "in", tok_in },
+    { "let", tok_let },
    { "match", tok_match },
    { "of", tok_of },
-    { "def", tok_def },
-    { "datatype", tok_datatype },
-    { "alias", tok_alias },
+    { "true", tok_true },
    { "typecheck", tok_typecheck },
 };

@ -124,6 +191,7 @@ void lex_punctuation(struct lexer *lex, struct token *out) {
    /* This can't fail because of the precondition to this function */
    struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp);
    out->type = r->type;
+    advance(lex);
 }

 /* We assume that the curr char is already a digit */
@ -171,6 +239,7 @@ static void lex_param_ident(struct lexer *lex, struct token *out) {
 static void lex_ident(struct lexer *lex, struct token *out) {
    /* Store ident in the internal buffer */
    store_char(lex, lex->cur);
+    advance(lex);
    while (is_ident_cont(lex->cur)) {
        /* Store ident in the internal buffer */
        store_char(lex, lex->cur);
@ -276,6 +345,7 @@ keep_lexing:

    case '\'':
        *loc = lex->loc;
+        advance(lex);
        lex_param_ident(lex, out);
        break;

--- a/src/main.c
+++ b/src/main.c
@ -2,8 +2,18 @@
 #include "parser.h"

 int main(int argc, char *argv[]) {
+    if (argc != 2) {
+        printf("Usage: milly file.mil\n");
+        return 1;
+    }
+    FILE *input = fopen(argv[1], "r");
+    if (!input) {
+        fprintf(stderr, "Error: Could not open file %s.\n", argv[1]);
+        return 1;
+    }
    struct parser p;
-    init_parser(&p, stdin);
+    init_parser(&p, input);
    parse_program(&p);
+    fclose(input);
    return 0;
 }
--- a/src/parser.c
+++ b/src/parser.c
@ -44,32 +44,36 @@ static noreturn void report_error(struct parser *p, const char *fmt, ...) {
    exit(EXIT_FAILURE);
 }

-static const char *token_descr(enum token_type t) {
-    return "another token";
-}
-
 /* Consume next token if it is of the required type, otherwise fail with error */
 static void expect(struct parser *p, enum token_type t) {
    if (cur_tok(p) != t) {
-        report_error(p, "Expected %s.\n", token_descr(t));
+        report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
    }
    consume(p);
 }

 /* Types */
-static struct type *parse_tuple_type(struct parser *p) {
+static struct type *parse_paren_type(struct parser *p) {
+    struct type *res;
    struct type *t = parse_type(p);
+    if (cur_tok(p) == tok_comma) {
+        consume(p);
        struct type_list_builder list = { NULL };
        type_list_append(&list, t);
-    expect(p, tok_comma);
        t = parse_type(p);
        type_list_append(&list, t);
-    while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) {
+        while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
            expect(p, tok_comma);
            t = parse_type(p);
            type_list_append(&list, t);
        }
-    return make_tuple_type(list.head);
+        res = make_tuple_type(list.head);
+    }
+    else {
+        res = t;
+    }
+    expect(p, tok_right_paren);
+    return res;
 }

 /* sets *is_ident to true if the result is a single type name */
@ -79,14 +83,7 @@ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident
    switch (cur_tok(p)) {
    case tok_left_paren:
        consume(p);
-        res = parse_type(p);
-        expect(p, tok_right_paren);
-        break;
-
-    case tok_left_angle_bracket:
-        consume(p);
-        res = parse_tuple_type(p);
-        expect(p, tok_right_angle_bracket);
+        res = parse_paren_type(p);
        break;

    case tok_param_ident:
@ -177,7 +174,7 @@ static struct decl *parse_datatype_decl(struct parser *p) {
    struct type *ty;
    while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
        if (cur_tok(p) != tok_ident) {
-            report_error(p, "Invalid datatype constructor, expected an identifier.\n");
+            report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
        }
        ctor_name = cur_lexeme(p);
        ty = parse_type(p);
@ -264,7 +261,7 @@ struct decl *parse_decl(struct parser *p) {
        consume(p);
        return parse_value_or_func_decl(p);
    default:
-        report_error(p, "Declaration expected, invalid token.\n");
+        report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
    }
 }