Fix some parser errors

This commit is contained in:
Francesco Magliocca 2022-05-17 21:33:36 +02:00
parent dfbe469fb3
commit 7dac4b66d3
5 changed files with 121 additions and 38 deletions

View File

@ -32,6 +32,9 @@ here is a list of the features (or better, limitations) I want to introduce:
- Polymorphic types à la Hindley Milner (probably unfeasible) - Polymorphic types à la Hindley Milner (probably unfeasible)
- Automatic currying of functions, with optimised partial evaluation (unfeasible) - Automatic currying of functions, with optimised partial evaluation (unfeasible)
- Delimited continuations (this is an overkill) - Delimited continuations (this is an overkill)
- Easy syntax allowing easy kakoune support
milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`. milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`.
kakoune plugin for milly is in `extra/milly.kak`.

View File

@ -45,6 +45,9 @@ struct token {
char *lexeme; char *lexeme;
}; };
/* Gives a text description of a token type */
const char *token_descr(enum token_type tok);
struct location { struct location {
size_t line; size_t line;
size_t col; size_t col;

View File

@ -6,6 +6,73 @@
#include <ctype.h> #include <ctype.h>
#include <stdnoreturn.h> #include <stdnoreturn.h>
const char *token_descr(enum token_type tok) {
switch (tok) {
case tok_ident:
return "identifier";
case tok_param_ident:
return "variable identifier";
case tok_int:
return "int literal";
case tok_string:
return "string literal";
case tok_arrow:
return "->";
case tok_backslash:
return "\\";
case tok_equal:
return "=";
case tok_left_paren:
return "(";
case tok_right_paren:
return ")";
case tok_left_square:
return "[";
case tok_right_square:
return "]";
case tok_left_brace:
return "{";
case tok_right_brace:
return "}";
case tok_left_angle_bracket:
return "<";
case tok_right_angle_bracket:
return ">";
case tok_comma:
return ",";
case tok_pipe:
return "|";
case tok_colon:
return ":";
case tok_true:
return "true";
case tok_false:
return "false";
case tok_case:
return "case";
case tok_let:
return "let";
case tok_in:
return "in";
case tok_match:
return "match";
case tok_of:
return "of";
case tok_def:
return "def";
case tok_datatype:
return "datatype";
case tok_alias:
return "alias";
case tok_typecheck:
return "typecheck";
case tok_eof:
return "end of file";
default:
return "unknown token";
}
}
/* Helper to report lexical errors */ /* Helper to report lexical errors */
static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) { static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) {
if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) { if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) {
@ -78,35 +145,35 @@ struct reserved_symbol {
enum token_type type; enum token_type type;
}; };
/* These two arrays must be alpha sorted to be usable by bsearch */ /* These two arrays must be sorted (according to ascii value) to be usable by bsearch */
struct reserved_symbol punctuation[] = { struct reserved_symbol punctuation[] = {
{ "(", tok_left_paren }, { "(", tok_left_paren },
{ ")", tok_right_paren }, { ")", tok_right_paren },
{ "[", tok_left_square },
{ "]", tok_right_square },
{ "{", tok_left_brace },
{ "}", tok_right_brace },
{ ",", tok_comma }, { ",", tok_comma },
{ "|", tok_pipe },
{ ":", tok_colon }, { ":", tok_colon },
{ "<", tok_left_angle_bracket }, { "<", tok_left_angle_bracket },
{ ">", tok_right_angle_bracket }, { ">", tok_right_angle_bracket },
{ "[", tok_left_square },
{ "\\", tok_backslash }, { "\\", tok_backslash },
{ "]", tok_right_square },
{ "{", tok_left_brace },
{ "|", tok_pipe },
{ "}", tok_right_brace },
}; };
struct reserved_symbol keywords[] = { struct reserved_symbol keywords[] = {
{ "=", tok_equal },
{ "->", tok_arrow }, { "->", tok_arrow },
{ "true", tok_true }, { "=", tok_equal },
{ "false", tok_false }, { "alias", tok_alias },
{ "case", tok_case }, { "case", tok_case },
{ "let", tok_let }, { "datatype", tok_datatype },
{ "def", tok_def },
{ "false", tok_false },
{ "in", tok_in }, { "in", tok_in },
{ "let", tok_let },
{ "match", tok_match }, { "match", tok_match },
{ "of", tok_of }, { "of", tok_of },
{ "def", tok_def }, { "true", tok_true },
{ "datatype", tok_datatype },
{ "alias", tok_alias },
{ "typecheck", tok_typecheck }, { "typecheck", tok_typecheck },
}; };
@ -124,6 +191,7 @@ void lex_punctuation(struct lexer *lex, struct token *out) {
/* This can't fail because of the precondition to this function */ /* This can't fail because of the precondition to this function */
struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp); struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp);
out->type = r->type; out->type = r->type;
advance(lex);
} }
/* We assume that the curr char is already a digit */ /* We assume that the curr char is already a digit */
@ -171,6 +239,7 @@ static void lex_param_ident(struct lexer *lex, struct token *out) {
static void lex_ident(struct lexer *lex, struct token *out) { static void lex_ident(struct lexer *lex, struct token *out) {
/* Store ident in the internal buffer */ /* Store ident in the internal buffer */
store_char(lex, lex->cur); store_char(lex, lex->cur);
advance(lex);
while (is_ident_cont(lex->cur)) { while (is_ident_cont(lex->cur)) {
/* Store ident in the internal buffer */ /* Store ident in the internal buffer */
store_char(lex, lex->cur); store_char(lex, lex->cur);
@ -276,6 +345,7 @@ keep_lexing:
case '\'': case '\'':
*loc = lex->loc; *loc = lex->loc;
advance(lex);
lex_param_ident(lex, out); lex_param_ident(lex, out);
break; break;

View File

@ -2,8 +2,18 @@
#include "parser.h" #include "parser.h"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: milly file.mil\n");
return 1;
}
FILE *input = fopen(argv[1], "r");
if (!input) {
fprintf(stderr, "Error: Could not open file %s.\n", argv[1]);
return 1;
}
struct parser p; struct parser p;
init_parser(&p, stdin); init_parser(&p, input);
parse_program(&p); parse_program(&p);
fclose(input);
return 0; return 0;
} }

View File

@ -44,32 +44,36 @@ static noreturn void report_error(struct parser *p, const char *fmt, ...) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
static const char *token_descr(enum token_type t) {
return "another token";
}
/* Consume next token if it is of the required type, otherwise fail with error */ /* Consume next token if it is of the required type, otherwise fail with error */
static void expect(struct parser *p, enum token_type t) { static void expect(struct parser *p, enum token_type t) {
if (cur_tok(p) != t) { if (cur_tok(p) != t) {
report_error(p, "Expected %s.\n", token_descr(t)); report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
} }
consume(p); consume(p);
} }
/* Types */ /* Types */
static struct type *parse_tuple_type(struct parser *p) { static struct type *parse_paren_type(struct parser *p) {
struct type *res;
struct type *t = parse_type(p); struct type *t = parse_type(p);
if (cur_tok(p) == tok_comma) {
consume(p);
struct type_list_builder list = { NULL }; struct type_list_builder list = { NULL };
type_list_append(&list, t); type_list_append(&list, t);
expect(p, tok_comma);
t = parse_type(p); t = parse_type(p);
type_list_append(&list, t); type_list_append(&list, t);
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) { while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
expect(p, tok_comma); expect(p, tok_comma);
t = parse_type(p); t = parse_type(p);
type_list_append(&list, t); type_list_append(&list, t);
} }
return make_tuple_type(list.head); res = make_tuple_type(list.head);
}
else {
res = t;
}
expect(p, tok_right_paren);
return res;
} }
/* sets *is_ident to true if the result is a single type name */ /* sets *is_ident to true if the result is a single type name */
@ -79,14 +83,7 @@ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident
switch (cur_tok(p)) { switch (cur_tok(p)) {
case tok_left_paren: case tok_left_paren:
consume(p); consume(p);
res = parse_type(p); res = parse_paren_type(p);
expect(p, tok_right_paren);
break;
case tok_left_angle_bracket:
consume(p);
res = parse_tuple_type(p);
expect(p, tok_right_angle_bracket);
break; break;
case tok_param_ident: case tok_param_ident:
@ -177,7 +174,7 @@ static struct decl *parse_datatype_decl(struct parser *p) {
struct type *ty; struct type *ty;
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
if (cur_tok(p) != tok_ident) { if (cur_tok(p) != tok_ident) {
report_error(p, "Invalid datatype constructor, expected an identifier.\n"); report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
} }
ctor_name = cur_lexeme(p); ctor_name = cur_lexeme(p);
ty = parse_type(p); ty = parse_type(p);
@ -264,7 +261,7 @@ struct decl *parse_decl(struct parser *p) {
consume(p); consume(p);
return parse_value_or_func_decl(p); return parse_value_or_func_decl(p);
default: default:
report_error(p, "Declaration expected, invalid token.\n"); report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
} }
} }