From 2911139d968f5ce0a1b1bb6a064b8e5b4dce5d21 Mon Sep 17 00:00:00 2001 From: Francesco Magliocca Date: Wed, 18 May 2022 00:18:31 +0200 Subject: [PATCH] Fix some parser errors --- README.md | 52 +++++++++++++++++++++++++++++++ examples/lambda_calculus.mil | 29 ++++++++--------- ref_parser/main.c | 18 +++++++++-- ref_parser/parser.y | 42 +++++++++++++++---------- src/lexer.c | 24 ++++++++++++--- src/main.c | 1 + src/parser.c | 60 ++++++++++++++++++++---------------- 7 files changed, 160 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 4323aab..c9a2bf6 100644 --- a/README.md +++ b/README.md @@ -38,3 +38,55 @@ milly's full grammar is specified as a pair of lex and yacc files in `ref_parser kakoune plugin for milly is in `extra/milly.kak`. +Here is a small example of the syntax: + +``` +# Helpers +datatype 'a list { + Cons of ('a, 'a list), + Empty +} + +datatype 'a maybe { + Just of 'a, + Nothing +} + +# Lambda calculus term, nameless representation +datatype term { + Var of int, + Int of int, + Abs of term, + App of (term, term) +} + +# Result of the evaluation +datatype value { + Int of int, + Closure of (env, term) +} + +alias env = value list + +typecheck add_binding : value -> env -> env +def add_binding v env = Cons v env + +typecheck lookup : int -> env -> value maybe +def lookup _ = Nothing + | lookup 0 = Just x + | lookup n = lookup (subtract n 1) xs + +typecheck eval : env -> term -> value +def eval e = + match lookup idx e { + case -> v + case -> abort "Variable out of scope" + } + + | eval e = Int x + | eval e = Closure e t + | eval e = + let def new_env = add_binding (eval e t2) e + in eval new_env t1 +``` + diff --git a/examples/lambda_calculus.mil b/examples/lambda_calculus.mil index c9501de..569c782 100644 --- a/examples/lambda_calculus.mil +++ b/examples/lambda_calculus.mil @@ -1,26 +1,26 @@ # Helpers datatype 'a list { - Cons ('a, 'a list), - Empty, + Cons of ('a, 'a list), + Empty } datatype 'a maybe { - Just 'a, - Nothing, + Just of 'a, + Nothing } # Lambda calculus term, nameless representation datatype term { - Var int - Int int - Abs term - App (term, term) + Var of int, + Int of int, + Abs of term, + App of (term, term) } # Result of the evaluation datatype value { - Int int - Closure (env, term) + Int of int, + Closure of (env, term) } alias env = value list @@ -34,10 +34,11 @@ def lookup _ = Nothing | lookup n = lookup (subtract n 1) xs typecheck eval : env -> term -> value -def eval e = match lookup idx e { - case -> v - case -> abort "Variable out of scope" - } +def eval e = + match lookup idx e { + case -> v + case -> abort "Variable out of scope" + } | eval e = Int x | eval e = Closure e t diff --git a/ref_parser/main.c b/ref_parser/main.c index 1403e77..eb02ff0 100644 --- a/ref_parser/main.c +++ b/ref_parser/main.c @@ -1,7 +1,19 @@ #include #include "parser.tab.h" -int main() { - yyparse(); - return 0; +int main(int argc, char *argv[]) { + if (argc != 2) { + printf("Usage: reference_parser filename.mil\n"); + return 1; + } + FILE *input = fopen(argv[1], "r"); + if (!input) { + fprintf(stderr, "Could not open file %s.\n", argv[1]); + return 1; + } + stdin = input; + yyparse(); + fclose(input); + printf("\n"); + return 0; } diff --git a/ref_parser/parser.y b/ref_parser/parser.y index 02313d6..a2cf715 100644 --- a/ref_parser/parser.y +++ b/ref_parser/parser.y @@ -4,6 +4,7 @@ int yylex(); void yyerror(const char *); + %} %token IDENT %token PARAM_IDENT @@ -15,8 +16,6 @@ void yyerror(const char *); /* Function constructor is right associative */ %right ARROW -%right ':' - %start program %% @@ -32,18 +31,18 @@ type: atomic_type atomic_type: type_app | PARAM_IDENT | '(' type ')' - | '(' tuple_types ')' + | '(' type tuple_types ')' type_app_atom: PARAM_IDENT | '(' type ')' - | '(' tuple_types ')' + | '(' type tuple_types ')' | IDENT type_app: IDENT | type_app_atom type_app -tuple_types: type ',' type - | type ',' tuple_types +tuple_types: ',' type + | tuple_types ',' type /* Declaration syntax */ decl: datatype_decl @@ -59,7 +58,12 @@ def_type_params: %empty | PARAM_IDENT def_type_params datatype_alts: %empty - | IDENT type ',' datatype_alts + | IDENT OF type datatype_alts_cont + | IDENT datatype_alts_cont + +datatype_alts_cont: %empty + | ',' IDENT type datatype_alts_cont + | ',' IDENT datatype_alts_cont alias_decl: ALIAS def_type_params IDENT '=' type @@ -81,20 +85,19 @@ atomic_expr: IDENT | TRUE | FALSE | STRING | '[' list_elems ']' - | '(' expr ',' list_elems ')' + | '(' expr ',' expr list_elems_cont ')' | '(' expr ')' list_elems: %empty - | expr ',' list_elems + | expr list_elems_cont + +list_elems_cont: %empty + | ',' expr list_elems_cont fun_app_expr: atomic_expr | fun_app_expr atomic_expr -infix_expr: fun_app_expr - | infix_expr ':' infix_expr /* List cons */ - /* TODO: | infix_expr IDENT infix_expr */ - -expr: infix_expr +expr: fun_app_expr | MATCH expr '{' case_alts '}' | LET let_decls IN expr @@ -108,12 +111,17 @@ pattern: IDENT | INT | TRUE | FALSE | STRING - | '<' IDENT param_patterns '>' - | '(' pattern ',' pattern_list ')' + | '<' IDENT ctor_param_patterns '>' + | '(' pattern ',' pattern pattern_list_cont ')' | '[' pattern_list ']' +ctor_param_patterns: %empty | param_patterns + pattern_list: %empty - | pattern ',' pattern_list + | pattern pattern_list_cont + +pattern_list_cont: %empty + | ',' pattern pattern_list_cont %% diff --git a/src/lexer.c b/src/lexer.c index 9e9cad5..4e3e596 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -162,7 +162,6 @@ struct reserved_symbol punctuation[] = { }; struct reserved_symbol keywords[] = { - { "->", tok_arrow }, { "=", tok_equal }, { "alias", tok_alias }, { "case", tok_case }, @@ -235,11 +234,9 @@ static void lex_param_ident(struct lexer *lex, struct token *out) { out->lexeme = dup_lexeme(lex); } -/* We assume the current char is valid identifier starter */ +/* We assume the identifier starter has already been analyzed + * and stored in the internal buffer */ static void lex_ident(struct lexer *lex, struct token *out) { - /* Store ident in the internal buffer */ - store_char(lex, lex->cur); - advance(lex); while (is_ident_cont(lex->cur)) { /* Store ident in the internal buffer */ store_char(lex, lex->cur); @@ -363,9 +360,26 @@ keep_lexing: lex_string(lex, out); break; + case '-': + *loc = lex->loc; + store_char(lex, lex->cur); + advance(lex); + if (lex->cur == '>') { + advance(lex); + /* Separately process the -> operator */ + out->type = tok_arrow; + } + else { + /* Otherwise it's an identifier */ + lex_ident(lex, out); + } + break; + default: if (is_ident_start(lex->cur)) { *loc = lex->loc; + store_char(lex, lex->cur); + advance(lex); lex_ident(lex, out); } else { diff --git a/src/main.c b/src/main.c index 77507df..e0c2ed6 100644 --- a/src/main.c +++ b/src/main.c @@ -14,6 +14,7 @@ int main(int argc, char *argv[]) { struct parser p; init_parser(&p, input); parse_program(&p); + printf("Parsing is successful.\n"); fclose(input); return 0; } diff --git a/src/parser.c b/src/parser.c index 8c0ec01..345f8db 100644 --- a/src/parser.c +++ b/src/parser.c @@ -158,6 +158,20 @@ static struct var_list *parse_def_var_list(struct parser *p) { return params.head; } +static void parse_datatype_constructor(struct parser *p, struct constructor_list_builder *b) { + if (cur_tok(p) != tok_ident) { + report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p))); + } + char *ctor_name = cur_lexeme(p); + consume(p); + struct type *ty = NULL; + if (cur_tok(p) == tok_of) { + consume(p); + ty = parse_type(p); + } + constructor_list_append(b, ctor_name, ty); +} + static struct decl *parse_datatype_decl(struct parser *p) { struct var_list *params = parse_def_var_list(p); if (cur_tok(p) != tok_ident) { @@ -170,16 +184,12 @@ static struct decl *parse_datatype_decl(struct parser *p) { /* Parse constructors */ struct constructor_list_builder ctors = { NULL }; - char *ctor_name; - struct type *ty; + if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { + parse_datatype_constructor(p, &ctors); + } while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { - if (cur_tok(p) != tok_ident) { - report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p))); - } - ctor_name = cur_lexeme(p); - ty = parse_type(p); - constructor_list_append(&ctors, ctor_name, ty); expect(p, tok_comma); + parse_datatype_constructor(p, &ctors); } expect(p, tok_right_brace); return make_datatype_decl(params, datatype_name, ctors.head); @@ -269,10 +279,14 @@ struct decl *parse_decl(struct parser *p) { static struct expr *parse_list_literal(struct parser *p) { struct expr_list_builder elems = { NULL }; struct expr *e; - while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { + if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { e = parse_expr(p); expr_list_append(&elems, e); + } + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { expect(p, tok_comma); + e = parse_expr(p); + expr_list_append(&elems, e); } expect(p, tok_right_square); return make_list_lit(elems.head); @@ -346,20 +360,6 @@ static struct expr *parse_fun_app(struct parser *p) { return make_func_app(fun, args.head); } -/* TODO: Implement infix operations */ -static struct expr *parse_infix_expr(struct parser *p) { - struct expr *lhs = parse_fun_app(p); - /*if (cur_tok(p) == tok_colon) { - consume(p); - struct expr_list_builder params = { NULL };; - expr_list_append(¶ms, lhs); - struct expr *rhs = parse_infix_expr(p); - expr_list_append(¶ms, rhs); - return make_list_cons(lhs, rhs); - }*/ - return lhs; -} - static struct expr *parse_match_expr(struct parser *p) { struct expr *cond = parse_expr(p); expect(p, tok_left_brace); @@ -396,16 +396,19 @@ struct expr *parse_expr(struct parser *p) { consume(p); return parse_let_expr(p); default: - return parse_infix_expr(p); + return parse_fun_app(p); } } /* Patterns */ static struct pattern *parse_list_pattern(struct parser *p) { struct pattern_list_builder elems = { NULL }; - while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { + if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { pattern_list_append(&elems, parse_pattern(p)); + } + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { expect(p, tok_comma); + pattern_list_append(&elems, parse_pattern(p)); } expect(p, tok_right_square); return make_list_pattern(elems.head); @@ -413,9 +416,12 @@ static struct pattern *parse_list_pattern(struct parser *p) { static struct pattern *parse_tuple_pattern(struct parser *p) { struct pattern_list_builder elems = { NULL }; - while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { + if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) { pattern_list_append(&elems, parse_pattern(p)); + } + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) { expect(p, tok_comma); + pattern_list_append(&elems, parse_pattern(p)); } expect(p, tok_right_square); return make_tuple_pattern(elems.head); @@ -479,7 +485,7 @@ static struct pattern *parse_pattern(struct parser *p) { res = parse_constructor_pattern(p); break; default: - report_error(p, "Invalid pattern.\n"); + report_error(p, "Invalid pattern starting with %s.\n", token_descr(cur_tok(p))); } return res; }