Fix some parser errors
This commit is contained in:
parent
dfbe469fb3
commit
7dac4b66d3
|
|
@ -32,6 +32,9 @@ here is a list of the features (or better, limitations) I want to introduce:
|
|||
- Polymorphic types à la Hindley Milner (probably unfeasible)
|
||||
- Automatic currying of functions, with optimised partial evaluation (unfeasible)
|
||||
- Delimited continuations (this is an overkill)
|
||||
- Easy syntax allowing easy kakoune support
|
||||
|
||||
milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`.
|
||||
|
||||
kakoune plugin for milly is in `extra/milly.kak`.
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,9 @@ struct token {
|
|||
char *lexeme;
|
||||
};
|
||||
|
||||
/* Gives a text description of a token type */
|
||||
const char *token_descr(enum token_type tok);
|
||||
|
||||
struct location {
|
||||
size_t line;
|
||||
size_t col;
|
||||
|
|
|
|||
96
src/lexer.c
96
src/lexer.c
|
|
@ -6,6 +6,73 @@
|
|||
#include <ctype.h>
|
||||
#include <stdnoreturn.h>
|
||||
|
||||
const char *token_descr(enum token_type tok) {
|
||||
switch (tok) {
|
||||
case tok_ident:
|
||||
return "identifier";
|
||||
case tok_param_ident:
|
||||
return "variable identifier";
|
||||
case tok_int:
|
||||
return "int literal";
|
||||
case tok_string:
|
||||
return "string literal";
|
||||
case tok_arrow:
|
||||
return "->";
|
||||
case tok_backslash:
|
||||
return "\\";
|
||||
case tok_equal:
|
||||
return "=";
|
||||
case tok_left_paren:
|
||||
return "(";
|
||||
case tok_right_paren:
|
||||
return ")";
|
||||
case tok_left_square:
|
||||
return "[";
|
||||
case tok_right_square:
|
||||
return "]";
|
||||
case tok_left_brace:
|
||||
return "{";
|
||||
case tok_right_brace:
|
||||
return "}";
|
||||
case tok_left_angle_bracket:
|
||||
return "<";
|
||||
case tok_right_angle_bracket:
|
||||
return ">";
|
||||
case tok_comma:
|
||||
return ",";
|
||||
case tok_pipe:
|
||||
return "|";
|
||||
case tok_colon:
|
||||
return ":";
|
||||
case tok_true:
|
||||
return "true";
|
||||
case tok_false:
|
||||
return "false";
|
||||
case tok_case:
|
||||
return "case";
|
||||
case tok_let:
|
||||
return "let";
|
||||
case tok_in:
|
||||
return "in";
|
||||
case tok_match:
|
||||
return "match";
|
||||
case tok_of:
|
||||
return "of";
|
||||
case tok_def:
|
||||
return "def";
|
||||
case tok_datatype:
|
||||
return "datatype";
|
||||
case tok_alias:
|
||||
return "alias";
|
||||
case tok_typecheck:
|
||||
return "typecheck";
|
||||
case tok_eof:
|
||||
return "end of file";
|
||||
default:
|
||||
return "unknown token";
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper to report lexical errors */
|
||||
static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) {
|
||||
if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) {
|
||||
|
|
@ -78,35 +145,35 @@ struct reserved_symbol {
|
|||
enum token_type type;
|
||||
};
|
||||
|
||||
/* These two arrays must be alpha sorted to be usable by bsearch */
|
||||
/* These two arrays must be sorted (according to ascii value) to be usable by bsearch */
|
||||
struct reserved_symbol punctuation[] = {
|
||||
{ "(", tok_left_paren },
|
||||
{ ")", tok_right_paren },
|
||||
{ "[", tok_left_square },
|
||||
{ "]", tok_right_square },
|
||||
{ "{", tok_left_brace },
|
||||
{ "}", tok_right_brace },
|
||||
{ ",", tok_comma },
|
||||
{ "|", tok_pipe },
|
||||
{ ":", tok_colon },
|
||||
{ "<", tok_left_angle_bracket },
|
||||
{ ">", tok_right_angle_bracket },
|
||||
{ "[", tok_left_square },
|
||||
{ "\\", tok_backslash },
|
||||
{ "]", tok_right_square },
|
||||
{ "{", tok_left_brace },
|
||||
{ "|", tok_pipe },
|
||||
{ "}", tok_right_brace },
|
||||
};
|
||||
|
||||
struct reserved_symbol keywords[] = {
|
||||
{ "=", tok_equal },
|
||||
{ "->", tok_arrow },
|
||||
{ "true", tok_true },
|
||||
{ "false", tok_false },
|
||||
{ "=", tok_equal },
|
||||
{ "alias", tok_alias },
|
||||
{ "case", tok_case },
|
||||
{ "let", tok_let },
|
||||
{ "datatype", tok_datatype },
|
||||
{ "def", tok_def },
|
||||
{ "false", tok_false },
|
||||
{ "in", tok_in },
|
||||
{ "let", tok_let },
|
||||
{ "match", tok_match },
|
||||
{ "of", tok_of },
|
||||
{ "def", tok_def },
|
||||
{ "datatype", tok_datatype },
|
||||
{ "alias", tok_alias },
|
||||
{ "true", tok_true },
|
||||
{ "typecheck", tok_typecheck },
|
||||
};
|
||||
|
||||
|
|
@ -124,6 +191,7 @@ void lex_punctuation(struct lexer *lex, struct token *out) {
|
|||
/* This can't fail because of the precondition to this function */
|
||||
struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp);
|
||||
out->type = r->type;
|
||||
advance(lex);
|
||||
}
|
||||
|
||||
/* We assume that the curr char is already a digit */
|
||||
|
|
@ -171,6 +239,7 @@ static void lex_param_ident(struct lexer *lex, struct token *out) {
|
|||
static void lex_ident(struct lexer *lex, struct token *out) {
|
||||
/* Store ident in the internal buffer */
|
||||
store_char(lex, lex->cur);
|
||||
advance(lex);
|
||||
while (is_ident_cont(lex->cur)) {
|
||||
/* Store ident in the internal buffer */
|
||||
store_char(lex, lex->cur);
|
||||
|
|
@ -276,6 +345,7 @@ keep_lexing:
|
|||
|
||||
case '\'':
|
||||
*loc = lex->loc;
|
||||
advance(lex);
|
||||
lex_param_ident(lex, out);
|
||||
break;
|
||||
|
||||
|
|
|
|||
12
src/main.c
12
src/main.c
|
|
@ -2,8 +2,18 @@
|
|||
#include "parser.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
printf("Usage: milly file.mil\n");
|
||||
return 1;
|
||||
}
|
||||
FILE *input = fopen(argv[1], "r");
|
||||
if (!input) {
|
||||
fprintf(stderr, "Error: Could not open file %s.\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
struct parser p;
|
||||
init_parser(&p, stdin);
|
||||
init_parser(&p, input);
|
||||
parse_program(&p);
|
||||
fclose(input);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
35
src/parser.c
35
src/parser.c
|
|
@ -44,32 +44,36 @@ static noreturn void report_error(struct parser *p, const char *fmt, ...) {
|
|||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
static const char *token_descr(enum token_type t) {
|
||||
return "another token";
|
||||
}
|
||||
|
||||
/* Consume next token if it is of the required type, otherwise fail with error */
|
||||
static void expect(struct parser *p, enum token_type t) {
|
||||
if (cur_tok(p) != t) {
|
||||
report_error(p, "Expected %s.\n", token_descr(t));
|
||||
report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
|
||||
}
|
||||
consume(p);
|
||||
}
|
||||
|
||||
/* Types */
|
||||
static struct type *parse_tuple_type(struct parser *p) {
|
||||
static struct type *parse_paren_type(struct parser *p) {
|
||||
struct type *res;
|
||||
struct type *t = parse_type(p);
|
||||
if (cur_tok(p) == tok_comma) {
|
||||
consume(p);
|
||||
struct type_list_builder list = { NULL };
|
||||
type_list_append(&list, t);
|
||||
expect(p, tok_comma);
|
||||
t = parse_type(p);
|
||||
type_list_append(&list, t);
|
||||
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) {
|
||||
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
|
||||
expect(p, tok_comma);
|
||||
t = parse_type(p);
|
||||
type_list_append(&list, t);
|
||||
}
|
||||
return make_tuple_type(list.head);
|
||||
res = make_tuple_type(list.head);
|
||||
}
|
||||
else {
|
||||
res = t;
|
||||
}
|
||||
expect(p, tok_right_paren);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* sets *is_ident to true if the result is a single type name */
|
||||
|
|
@ -79,14 +83,7 @@ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident
|
|||
switch (cur_tok(p)) {
|
||||
case tok_left_paren:
|
||||
consume(p);
|
||||
res = parse_type(p);
|
||||
expect(p, tok_right_paren);
|
||||
break;
|
||||
|
||||
case tok_left_angle_bracket:
|
||||
consume(p);
|
||||
res = parse_tuple_type(p);
|
||||
expect(p, tok_right_angle_bracket);
|
||||
res = parse_paren_type(p);
|
||||
break;
|
||||
|
||||
case tok_param_ident:
|
||||
|
|
@ -177,7 +174,7 @@ static struct decl *parse_datatype_decl(struct parser *p) {
|
|||
struct type *ty;
|
||||
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
|
||||
if (cur_tok(p) != tok_ident) {
|
||||
report_error(p, "Invalid datatype constructor, expected an identifier.\n");
|
||||
report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
|
||||
}
|
||||
ctor_name = cur_lexeme(p);
|
||||
ty = parse_type(p);
|
||||
|
|
@ -264,7 +261,7 @@ struct decl *parse_decl(struct parser *p) {
|
|||
consume(p);
|
||||
return parse_value_or_func_decl(p);
|
||||
default:
|
||||
report_error(p, "Declaration expected, invalid token.\n");
|
||||
report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue