Fix some parser errors
This commit is contained in:
parent
dfbe469fb3
commit
7dac4b66d3
|
|
@ -32,6 +32,9 @@ here is a list of the features (or better, limitations) I want to introduce:
|
||||||
- Polymorphic types à la Hindley Milner (probably unfeasible)
|
- Polymorphic types à la Hindley Milner (probably unfeasible)
|
||||||
- Automatic currying of functions, with optimised partial evaluation (unfeasible)
|
- Automatic currying of functions, with optimised partial evaluation (unfeasible)
|
||||||
- Delimited continuations (this is an overkill)
|
- Delimited continuations (this is an overkill)
|
||||||
|
- Easy syntax allowing easy kakoune support
|
||||||
|
|
||||||
milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`.
|
milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`.
|
||||||
|
|
||||||
|
kakoune plugin for milly is in `extra/milly.kak`.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,9 @@ struct token {
|
||||||
char *lexeme;
|
char *lexeme;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Gives a text description of a token type */
|
||||||
|
const char *token_descr(enum token_type tok);
|
||||||
|
|
||||||
struct location {
|
struct location {
|
||||||
size_t line;
|
size_t line;
|
||||||
size_t col;
|
size_t col;
|
||||||
|
|
|
||||||
96
src/lexer.c
96
src/lexer.c
|
|
@ -6,6 +6,73 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdnoreturn.h>
|
#include <stdnoreturn.h>
|
||||||
|
|
||||||
|
const char *token_descr(enum token_type tok) {
|
||||||
|
switch (tok) {
|
||||||
|
case tok_ident:
|
||||||
|
return "identifier";
|
||||||
|
case tok_param_ident:
|
||||||
|
return "variable identifier";
|
||||||
|
case tok_int:
|
||||||
|
return "int literal";
|
||||||
|
case tok_string:
|
||||||
|
return "string literal";
|
||||||
|
case tok_arrow:
|
||||||
|
return "->";
|
||||||
|
case tok_backslash:
|
||||||
|
return "\\";
|
||||||
|
case tok_equal:
|
||||||
|
return "=";
|
||||||
|
case tok_left_paren:
|
||||||
|
return "(";
|
||||||
|
case tok_right_paren:
|
||||||
|
return ")";
|
||||||
|
case tok_left_square:
|
||||||
|
return "[";
|
||||||
|
case tok_right_square:
|
||||||
|
return "]";
|
||||||
|
case tok_left_brace:
|
||||||
|
return "{";
|
||||||
|
case tok_right_brace:
|
||||||
|
return "}";
|
||||||
|
case tok_left_angle_bracket:
|
||||||
|
return "<";
|
||||||
|
case tok_right_angle_bracket:
|
||||||
|
return ">";
|
||||||
|
case tok_comma:
|
||||||
|
return ",";
|
||||||
|
case tok_pipe:
|
||||||
|
return "|";
|
||||||
|
case tok_colon:
|
||||||
|
return ":";
|
||||||
|
case tok_true:
|
||||||
|
return "true";
|
||||||
|
case tok_false:
|
||||||
|
return "false";
|
||||||
|
case tok_case:
|
||||||
|
return "case";
|
||||||
|
case tok_let:
|
||||||
|
return "let";
|
||||||
|
case tok_in:
|
||||||
|
return "in";
|
||||||
|
case tok_match:
|
||||||
|
return "match";
|
||||||
|
case tok_of:
|
||||||
|
return "of";
|
||||||
|
case tok_def:
|
||||||
|
return "def";
|
||||||
|
case tok_datatype:
|
||||||
|
return "datatype";
|
||||||
|
case tok_alias:
|
||||||
|
return "alias";
|
||||||
|
case tok_typecheck:
|
||||||
|
return "typecheck";
|
||||||
|
case tok_eof:
|
||||||
|
return "end of file";
|
||||||
|
default:
|
||||||
|
return "unknown token";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Helper to report lexical errors */
|
/* Helper to report lexical errors */
|
||||||
static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) {
|
static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) {
|
||||||
if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) {
|
if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) {
|
||||||
|
|
@ -78,35 +145,35 @@ struct reserved_symbol {
|
||||||
enum token_type type;
|
enum token_type type;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* These two arrays must be alpha sorted to be usable by bsearch */
|
/* These two arrays must be sorted (according to ascii value) to be usable by bsearch */
|
||||||
struct reserved_symbol punctuation[] = {
|
struct reserved_symbol punctuation[] = {
|
||||||
{ "(", tok_left_paren },
|
{ "(", tok_left_paren },
|
||||||
{ ")", tok_right_paren },
|
{ ")", tok_right_paren },
|
||||||
{ "[", tok_left_square },
|
|
||||||
{ "]", tok_right_square },
|
|
||||||
{ "{", tok_left_brace },
|
|
||||||
{ "}", tok_right_brace },
|
|
||||||
{ ",", tok_comma },
|
{ ",", tok_comma },
|
||||||
{ "|", tok_pipe },
|
|
||||||
{ ":", tok_colon },
|
{ ":", tok_colon },
|
||||||
{ "<", tok_left_angle_bracket },
|
{ "<", tok_left_angle_bracket },
|
||||||
{ ">", tok_right_angle_bracket },
|
{ ">", tok_right_angle_bracket },
|
||||||
|
{ "[", tok_left_square },
|
||||||
{ "\\", tok_backslash },
|
{ "\\", tok_backslash },
|
||||||
|
{ "]", tok_right_square },
|
||||||
|
{ "{", tok_left_brace },
|
||||||
|
{ "|", tok_pipe },
|
||||||
|
{ "}", tok_right_brace },
|
||||||
};
|
};
|
||||||
|
|
||||||
struct reserved_symbol keywords[] = {
|
struct reserved_symbol keywords[] = {
|
||||||
{ "=", tok_equal },
|
|
||||||
{ "->", tok_arrow },
|
{ "->", tok_arrow },
|
||||||
{ "true", tok_true },
|
{ "=", tok_equal },
|
||||||
{ "false", tok_false },
|
{ "alias", tok_alias },
|
||||||
{ "case", tok_case },
|
{ "case", tok_case },
|
||||||
{ "let", tok_let },
|
{ "datatype", tok_datatype },
|
||||||
|
{ "def", tok_def },
|
||||||
|
{ "false", tok_false },
|
||||||
{ "in", tok_in },
|
{ "in", tok_in },
|
||||||
|
{ "let", tok_let },
|
||||||
{ "match", tok_match },
|
{ "match", tok_match },
|
||||||
{ "of", tok_of },
|
{ "of", tok_of },
|
||||||
{ "def", tok_def },
|
{ "true", tok_true },
|
||||||
{ "datatype", tok_datatype },
|
|
||||||
{ "alias", tok_alias },
|
|
||||||
{ "typecheck", tok_typecheck },
|
{ "typecheck", tok_typecheck },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -124,6 +191,7 @@ void lex_punctuation(struct lexer *lex, struct token *out) {
|
||||||
/* This can't fail because of the precondition to this function */
|
/* This can't fail because of the precondition to this function */
|
||||||
struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp);
|
struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp);
|
||||||
out->type = r->type;
|
out->type = r->type;
|
||||||
|
advance(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We assume that the curr char is already a digit */
|
/* We assume that the curr char is already a digit */
|
||||||
|
|
@ -171,6 +239,7 @@ static void lex_param_ident(struct lexer *lex, struct token *out) {
|
||||||
static void lex_ident(struct lexer *lex, struct token *out) {
|
static void lex_ident(struct lexer *lex, struct token *out) {
|
||||||
/* Store ident in the internal buffer */
|
/* Store ident in the internal buffer */
|
||||||
store_char(lex, lex->cur);
|
store_char(lex, lex->cur);
|
||||||
|
advance(lex);
|
||||||
while (is_ident_cont(lex->cur)) {
|
while (is_ident_cont(lex->cur)) {
|
||||||
/* Store ident in the internal buffer */
|
/* Store ident in the internal buffer */
|
||||||
store_char(lex, lex->cur);
|
store_char(lex, lex->cur);
|
||||||
|
|
@ -276,6 +345,7 @@ keep_lexing:
|
||||||
|
|
||||||
case '\'':
|
case '\'':
|
||||||
*loc = lex->loc;
|
*loc = lex->loc;
|
||||||
|
advance(lex);
|
||||||
lex_param_ident(lex, out);
|
lex_param_ident(lex, out);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
||||||
12
src/main.c
12
src/main.c
|
|
@ -2,8 +2,18 @@
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
if (argc != 2) {
|
||||||
|
printf("Usage: milly file.mil\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
FILE *input = fopen(argv[1], "r");
|
||||||
|
if (!input) {
|
||||||
|
fprintf(stderr, "Error: Could not open file %s.\n", argv[1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
struct parser p;
|
struct parser p;
|
||||||
init_parser(&p, stdin);
|
init_parser(&p, input);
|
||||||
parse_program(&p);
|
parse_program(&p);
|
||||||
|
fclose(input);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
45
src/parser.c
45
src/parser.c
|
|
@ -44,32 +44,36 @@ static noreturn void report_error(struct parser *p, const char *fmt, ...) {
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *token_descr(enum token_type t) {
|
|
||||||
return "another token";
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Consume next token if it is of the required type, otherwise fail with error */
|
/* Consume next token if it is of the required type, otherwise fail with error */
|
||||||
static void expect(struct parser *p, enum token_type t) {
|
static void expect(struct parser *p, enum token_type t) {
|
||||||
if (cur_tok(p) != t) {
|
if (cur_tok(p) != t) {
|
||||||
report_error(p, "Expected %s.\n", token_descr(t));
|
report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p)));
|
||||||
}
|
}
|
||||||
consume(p);
|
consume(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Types */
|
/* Types */
|
||||||
static struct type *parse_tuple_type(struct parser *p) {
|
static struct type *parse_paren_type(struct parser *p) {
|
||||||
|
struct type *res;
|
||||||
struct type *t = parse_type(p);
|
struct type *t = parse_type(p);
|
||||||
struct type_list_builder list = { NULL };
|
if (cur_tok(p) == tok_comma) {
|
||||||
type_list_append(&list, t);
|
consume(p);
|
||||||
expect(p, tok_comma);
|
struct type_list_builder list = { NULL };
|
||||||
t = parse_type(p);
|
type_list_append(&list, t);
|
||||||
type_list_append(&list, t);
|
|
||||||
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) {
|
|
||||||
expect(p, tok_comma);
|
|
||||||
t = parse_type(p);
|
t = parse_type(p);
|
||||||
type_list_append(&list, t);
|
type_list_append(&list, t);
|
||||||
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) {
|
||||||
|
expect(p, tok_comma);
|
||||||
|
t = parse_type(p);
|
||||||
|
type_list_append(&list, t);
|
||||||
|
}
|
||||||
|
res = make_tuple_type(list.head);
|
||||||
}
|
}
|
||||||
return make_tuple_type(list.head);
|
else {
|
||||||
|
res = t;
|
||||||
|
}
|
||||||
|
expect(p, tok_right_paren);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* sets *is_ident to true if the result is a single type name */
|
/* sets *is_ident to true if the result is a single type name */
|
||||||
|
|
@ -79,14 +83,7 @@ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident
|
||||||
switch (cur_tok(p)) {
|
switch (cur_tok(p)) {
|
||||||
case tok_left_paren:
|
case tok_left_paren:
|
||||||
consume(p);
|
consume(p);
|
||||||
res = parse_type(p);
|
res = parse_paren_type(p);
|
||||||
expect(p, tok_right_paren);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case tok_left_angle_bracket:
|
|
||||||
consume(p);
|
|
||||||
res = parse_tuple_type(p);
|
|
||||||
expect(p, tok_right_angle_bracket);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case tok_param_ident:
|
case tok_param_ident:
|
||||||
|
|
@ -177,7 +174,7 @@ static struct decl *parse_datatype_decl(struct parser *p) {
|
||||||
struct type *ty;
|
struct type *ty;
|
||||||
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
|
while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) {
|
||||||
if (cur_tok(p) != tok_ident) {
|
if (cur_tok(p) != tok_ident) {
|
||||||
report_error(p, "Invalid datatype constructor, expected an identifier.\n");
|
report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p)));
|
||||||
}
|
}
|
||||||
ctor_name = cur_lexeme(p);
|
ctor_name = cur_lexeme(p);
|
||||||
ty = parse_type(p);
|
ty = parse_type(p);
|
||||||
|
|
@ -264,7 +261,7 @@ struct decl *parse_decl(struct parser *p) {
|
||||||
consume(p);
|
consume(p);
|
||||||
return parse_value_or_func_decl(p);
|
return parse_value_or_func_decl(p);
|
||||||
default:
|
default:
|
||||||
report_error(p, "Declaration expected, invalid token.\n");
|
report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue