Fix some parser errors

This commit is contained in:
Francesco Magliocca 2022-05-18 00:18:31 +02:00
parent 7dac4b66d3
commit 2911139d96
7 changed files with 160 additions and 66 deletions

View File

@ -38,3 +38,55 @@ milly's full grammar is specified as a pair of lex and yacc files in `ref_parser
kakoune plugin for milly is in `extra/milly.kak`.
Here is a small example of the syntax:
```
# Helpers
datatype 'a list {
Cons of ('a, 'a list),
Empty
}
datatype 'a maybe {
Just of 'a,
Nothing
}
# Lambda calculus term, nameless representation
datatype term {
Var of int,
Int of int,
Abs of term,
App of (term, term)
}
# Result of the evaluation
datatype value {
Int of int,
Closure of (env, term)
}
alias env = value list
typecheck add_binding : value -> env -> env
def add_binding v env = Cons v env
typecheck lookup : int -> env -> value maybe
def lookup _ <Empty> = Nothing
| lookup 0 <Cons x xs> = Just x
| lookup n <Cons x xs> = lookup (subtract n 1) xs
typecheck eval : env -> term -> value
def eval e <Var idx> =
match lookup idx e {
case <Just v> -> v
case <Nothing> -> abort "Variable out of scope"
}
| eval e <Int x> = Int x
| eval e <Abs t> = Closure e t
| eval e <App t1 t2> =
let def new_env = add_binding (eval e t2) e
in eval new_env t1
```

View File

@ -1,26 +1,26 @@
# Helpers
datatype 'a list {
Cons ('a, 'a list),
Empty,
Cons of ('a, 'a list),
Empty
}
datatype 'a maybe {
Just 'a,
Nothing,
Just of 'a,
Nothing
}
# Lambda calculus term, nameless representation
datatype term {
Var int
Int int
Abs term
App (term, term)
Var of int,
Int of int,
Abs of term,
App of (term, term)
}
# Result of the evaluation
datatype value {
Int int
Closure (env, term)
Int of int,
Closure of (env, term)
}
alias env = value list
@ -34,7 +34,8 @@ def lookup _ <Empty> = Nothing
| lookup n <Cons x xs> = lookup (subtract n 1) xs
typecheck eval : env -> term -> value
def eval e <Var idx> = match lookup idx e {
def eval e <Var idx> =
match lookup idx e {
case <Just v> -> v
case <Nothing> -> abort "Variable out of scope"
}

View File

@ -1,7 +1,19 @@
#include <stdio.h>
#include "parser.tab.h"
int main() {
int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: reference_parser filename.mil\n");
return 1;
}
FILE *input = fopen(argv[1], "r");
if (!input) {
fprintf(stderr, "Could not open file %s.\n", argv[1]);
return 1;
}
stdin = input;
yyparse();
fclose(input);
printf("\n");
return 0;
}

View File

@ -4,6 +4,7 @@
int yylex();
void yyerror(const char *);
%}
%token IDENT
%token PARAM_IDENT
@ -15,8 +16,6 @@ void yyerror(const char *);
/* Function constructor is right associative */
%right ARROW
%right ':'
%start program
%%
@ -32,18 +31,18 @@ type: atomic_type
atomic_type: type_app
| PARAM_IDENT
| '(' type ')'
| '(' tuple_types ')'
| '(' type tuple_types ')'
type_app_atom: PARAM_IDENT
| '(' type ')'
| '(' tuple_types ')'
| '(' type tuple_types ')'
| IDENT
type_app: IDENT
| type_app_atom type_app
tuple_types: type ',' type
| type ',' tuple_types
tuple_types: ',' type
| tuple_types ',' type
/* Declaration syntax */
decl: datatype_decl
@ -59,7 +58,12 @@ def_type_params: %empty
| PARAM_IDENT def_type_params
datatype_alts: %empty
| IDENT type ',' datatype_alts
| IDENT OF type datatype_alts_cont
| IDENT datatype_alts_cont
datatype_alts_cont: %empty
| ',' IDENT type datatype_alts_cont
| ',' IDENT datatype_alts_cont
alias_decl: ALIAS def_type_params IDENT '=' type
@ -81,20 +85,19 @@ atomic_expr: IDENT
| TRUE | FALSE
| STRING
| '[' list_elems ']'
| '(' expr ',' list_elems ')'
| '(' expr ',' expr list_elems_cont ')'
| '(' expr ')'
list_elems: %empty
| expr ',' list_elems
| expr list_elems_cont
list_elems_cont: %empty
| ',' expr list_elems_cont
fun_app_expr: atomic_expr
| fun_app_expr atomic_expr
infix_expr: fun_app_expr
| infix_expr ':' infix_expr /* List cons */
/* TODO: | infix_expr IDENT infix_expr */
expr: infix_expr
expr: fun_app_expr
| MATCH expr '{' case_alts '}'
| LET let_decls IN expr
@ -108,12 +111,17 @@ pattern: IDENT
| INT
| TRUE | FALSE
| STRING
| '<' IDENT param_patterns '>'
| '(' pattern ',' pattern_list ')'
| '<' IDENT ctor_param_patterns '>'
| '(' pattern ',' pattern pattern_list_cont ')'
| '[' pattern_list ']'
ctor_param_patterns: %empty | param_patterns
pattern_list: %empty
| pattern ',' pattern_list
| pattern pattern_list_cont
pattern_list_cont: %empty
| ',' pattern pattern_list_cont
%%

View File

@ -162,7 +162,6 @@ struct reserved_symbol punctuation[] = {
};
struct reserved_symbol keywords[] = {
{ "->", tok_arrow },
{ "=", tok_equal },
{ "alias", tok_alias },
{ "case", tok_case },
@ -235,11 +234,9 @@ static void lex_param_ident(struct lexer *lex, struct token *out) {
out->lexeme = dup_lexeme(lex);
}
/* We assume the current char is valid identifier starter */
/* We assume the identifier starter has already been analyzed
* and stored in the internal buffer */
static void lex_ident(struct lexer *lex, struct token *out) {
/* Store ident in the internal buffer */
store_char(lex, lex->cur);
advance(lex);
while (is_ident_cont(lex->cur)) {
/* Store ident in the internal buffer */
store_char(lex, lex->cur);
@ -363,9 +360,26 @@ keep_lexing:
lex_string(lex, out);
break;
case '-':
*loc = lex->loc;
store_char(lex, lex->cur);
advance(lex);
if (lex->cur == '>') {
advance(lex);
/* Separately process the -> operator */
out->type = tok_arrow;
}
else {
/* Otherwise it's an identifier */
lex_ident(lex, out);
}
break;
default:
if (is_ident_start(lex->cur)) {
*loc = lex->loc;
store_char(lex, lex->cur);
advance(lex);
lex_ident(lex, out);
}
else {

View File

@ -14,6 +14,7 @@ int main(int argc, char *argv[]) {
struct parser p;
init_parser(&p, input);
parse_program(&p);
printf("Parsing is successful.\n");
fclose(input);
return 0;
}

View File

@ -158,6 +158,20 @@ static struct var_list *parse_def_var_list(struct parser *p) {
return params.head;
}
static void parse_datatype_constructor(struct parser *p, struct constructor_list_builder *b) {
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
}
char *ctor_name = cur_lexeme(p);
consume(p);
struct type *ty = NULL;
if (cur_tok(p) == tok_of) {
consume(p);
ty = parse_type(p);
}
constructor_list_append(b, ctor_name, ty);
}
static struct decl *parse_datatype_decl(struct parser *p) {
struct var_list *params = parse_def_var_list(p);
if (cur_tok(p) != tok_ident) {
@ -170,16 +184,12 @@ static struct decl *parse_datatype_decl(struct parser *p) {
/* Parse constructors */
struct constructor_list_builder ctors = { NULL };
char *ctor_name;
struct type *ty;
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
parse_datatype_constructor(p, &ctors);
}
ctor_name = cur_lexeme(p);
ty = parse_type(p);
constructor_list_append(&ctors, ctor_name, ty);
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
expect(p, tok_comma);
parse_datatype_constructor(p, &ctors);
}
expect(p, tok_right_brace);
return make_datatype_decl(params, datatype_name, ctors.head);
@ -269,10 +279,14 @@ struct decl *parse_decl(struct parser *p) {
static struct expr *parse_list_literal(struct parser *p) {
struct expr_list_builder elems = { NULL };
struct expr *e;
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
e = parse_expr(p);
expr_list_append(&elems, e);
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
expect(p, tok_comma);
e = parse_expr(p);
expr_list_append(&elems, e);
}
expect(p, tok_right_square);
return make_list_lit(elems.head);
@ -346,20 +360,6 @@ static struct expr *parse_fun_app(struct parser *p) {
return make_func_app(fun, args.head);
}
/* TODO: Implement infix operations */
static struct expr *parse_infix_expr(struct parser *p) {
struct expr *lhs = parse_fun_app(p);
/*if (cur_tok(p) == tok_colon) {
consume(p);
struct expr_list_builder params = { NULL };;
expr_list_append(&params, lhs);
struct expr *rhs = parse_infix_expr(p);
expr_list_append(&params, rhs);
return make_list_cons(lhs, rhs);
}*/
return lhs;
}
static struct expr *parse_match_expr(struct parser *p) {
struct expr *cond = parse_expr(p);
expect(p, tok_left_brace);
@ -396,16 +396,19 @@ struct expr *parse_expr(struct parser *p) {
consume(p);
return parse_let_expr(p);
default:
return parse_infix_expr(p);
return parse_fun_app(p);
}
}
/* Patterns */
static struct pattern *parse_list_pattern(struct parser *p) {
struct pattern_list_builder elems = { NULL };
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
pattern_list_append(&elems, parse_pattern(p));
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
expect(p, tok_comma);
pattern_list_append(&elems, parse_pattern(p));
}
expect(p, tok_right_square);
return make_list_pattern(elems.head);
@ -413,9 +416,12 @@ static struct pattern *parse_list_pattern(struct parser *p) {
static struct pattern *parse_tuple_pattern(struct parser *p) {
struct pattern_list_builder elems = { NULL };
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) {
if (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
pattern_list_append(&elems, parse_pattern(p));
}
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
expect(p, tok_comma);
pattern_list_append(&elems, parse_pattern(p));
}
expect(p, tok_right_square);
return make_tuple_pattern(elems.head);
@ -479,7 +485,7 @@ static struct pattern *parse_pattern(struct parser *p) {
res = parse_constructor_pattern(p);
break;
default:
report_error(p, "Invalid pattern.\n");
report_error(p, "Invalid pattern starting with %s.\n", token_descr(cur_tok(p)));
}
return res;
}