diff --git a/README.md b/README.md index 9c1e51c..4323aab 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ here is a list of the features (or better, limitations) I want to introduce: - Polymorphic types à la Hindley Milner (probably unfeasible) - Automatic currying of functions, with optimised partial evaluation (unfeasible) - Delimited continuations (this is an overkill) +- Easy syntax allowing easy kakoune support milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`. +kakoune plugin for milly is in `extra/milly.kak`. + diff --git a/include/lexer.h b/include/lexer.h index c658954..c903d8f 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -45,6 +45,9 @@ struct token { char *lexeme; }; +/* Gives a text description of a token type */ +const char *token_descr(enum token_type tok); + struct location { size_t line; size_t col; diff --git a/src/lexer.c b/src/lexer.c index d019a20..9e9cad5 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -6,6 +6,73 @@ #include #include +const char *token_descr(enum token_type tok) { + switch (tok) { + case tok_ident: + return "identifier"; + case tok_param_ident: + return "variable identifier"; + case tok_int: + return "int literal"; + case tok_string: + return "string literal"; + case tok_arrow: + return "->"; + case tok_backslash: + return "\\"; + case tok_equal: + return "="; + case tok_left_paren: + return "("; + case tok_right_paren: + return ")"; + case tok_left_square: + return "["; + case tok_right_square: + return "]"; + case tok_left_brace: + return "{"; + case tok_right_brace: + return "}"; + case tok_left_angle_bracket: + return "<"; + case tok_right_angle_bracket: + return ">"; + case tok_comma: + return ","; + case tok_pipe: + return "|"; + case tok_colon: + return ":"; + case tok_true: + return "true"; + case tok_false: + return "false"; + case tok_case: + return "case"; + case tok_let: + return "let"; + case tok_in: + return "in"; + case tok_match: + return "match"; + case tok_of: + return "of"; + case tok_def: + return "def"; + case tok_datatype: + return "datatype"; + case tok_alias: + return "alias"; + case tok_typecheck: + return "typecheck"; + case tok_eof: + return "end of file"; + default: + return "unknown token"; + } +} + /* Helper to report lexical errors */ static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) { if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) { @@ -78,35 +145,35 @@ struct reserved_symbol { enum token_type type; }; -/* These two arrays must be alpha sorted to be usable by bsearch */ +/* These two arrays must be sorted (according to ascii value) to be usable by bsearch */ struct reserved_symbol punctuation[] = { { "(", tok_left_paren }, { ")", tok_right_paren }, - { "[", tok_left_square }, - { "]", tok_right_square }, - { "{", tok_left_brace }, - { "}", tok_right_brace }, { ",", tok_comma }, - { "|", tok_pipe }, { ":", tok_colon }, { "<", tok_left_angle_bracket }, { ">", tok_right_angle_bracket }, + { "[", tok_left_square }, { "\\", tok_backslash }, + { "]", tok_right_square }, + { "{", tok_left_brace }, + { "|", tok_pipe }, + { "}", tok_right_brace }, }; struct reserved_symbol keywords[] = { - { "=", tok_equal }, { "->", tok_arrow }, - { "true", tok_true }, - { "false", tok_false }, + { "=", tok_equal }, + { "alias", tok_alias }, { "case", tok_case }, - { "let", tok_let }, + { "datatype", tok_datatype }, + { "def", tok_def }, + { "false", tok_false }, { "in", tok_in }, + { "let", tok_let }, { "match", tok_match }, { "of", tok_of }, - { "def", tok_def }, - { "datatype", tok_datatype }, - { "alias", tok_alias }, + { "true", tok_true }, { "typecheck", tok_typecheck }, }; @@ -124,6 +191,7 @@ void lex_punctuation(struct lexer *lex, struct token *out) { /* This can't fail because of the precondition to this function */ struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp); out->type = r->type; + advance(lex); } /* We assume that the curr char is already a digit */ @@ -171,6 +239,7 @@ static void lex_param_ident(struct lexer *lex, struct token *out) { static void lex_ident(struct lexer *lex, struct token *out) { /* Store ident in the internal buffer */ store_char(lex, lex->cur); + advance(lex); while (is_ident_cont(lex->cur)) { /* Store ident in the internal buffer */ store_char(lex, lex->cur); @@ -276,6 +345,7 @@ keep_lexing: case '\'': *loc = lex->loc; + advance(lex); lex_param_ident(lex, out); break; diff --git a/src/main.c b/src/main.c index 6f8d583..77507df 100644 --- a/src/main.c +++ b/src/main.c @@ -2,8 +2,18 @@ #include "parser.h" int main(int argc, char *argv[]) { + if (argc != 2) { + printf("Usage: milly file.mil\n"); + return 1; + } + FILE *input = fopen(argv[1], "r"); + if (!input) { + fprintf(stderr, "Error: Could not open file %s.\n", argv[1]); + return 1; + } struct parser p; - init_parser(&p, stdin); + init_parser(&p, input); parse_program(&p); + fclose(input); return 0; } diff --git a/src/parser.c b/src/parser.c index e3834c3..8c0ec01 100644 --- a/src/parser.c +++ b/src/parser.c @@ -44,32 +44,36 @@ static noreturn void report_error(struct parser *p, const char *fmt, ...) { exit(EXIT_FAILURE); } -static const char *token_descr(enum token_type t) { - return "another token"; -} - /* Consume next token if it is of the required type, otherwise fail with error */ static void expect(struct parser *p, enum token_type t) { if (cur_tok(p) != t) { - report_error(p, "Expected %s.\n", token_descr(t)); + report_error(p, "Expected `%s`, but found `%s`.\n", token_descr(t), token_descr(cur_tok(p))); } consume(p); } /* Types */ -static struct type *parse_tuple_type(struct parser *p) { +static struct type *parse_paren_type(struct parser *p) { + struct type *res; struct type *t = parse_type(p); - struct type_list_builder list = { NULL }; - type_list_append(&list, t); - expect(p, tok_comma); - t = parse_type(p); - type_list_append(&list, t); - while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) { - expect(p, tok_comma); + if (cur_tok(p) == tok_comma) { + consume(p); + struct type_list_builder list = { NULL }; + type_list_append(&list, t); t = parse_type(p); type_list_append(&list, t); + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_paren) { + expect(p, tok_comma); + t = parse_type(p); + type_list_append(&list, t); + } + res = make_tuple_type(list.head); } - return make_tuple_type(list.head); + else { + res = t; + } + expect(p, tok_right_paren); + return res; } /* sets *is_ident to true if the result is a single type name */ @@ -79,14 +83,7 @@ static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident switch (cur_tok(p)) { case tok_left_paren: consume(p); - res = parse_type(p); - expect(p, tok_right_paren); - break; - - case tok_left_angle_bracket: - consume(p); - res = parse_tuple_type(p); - expect(p, tok_right_angle_bracket); + res = parse_paren_type(p); break; case tok_param_ident: @@ -177,7 +174,7 @@ static struct decl *parse_datatype_decl(struct parser *p) { struct type *ty; while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { if (cur_tok(p) != tok_ident) { - report_error(p, "Invalid datatype constructor, expected an identifier.\n"); + report_error(p, "Invalid datatype constructor `%s`, expected an identifier.\n", token_descr(cur_tok(p))); } ctor_name = cur_lexeme(p); ty = parse_type(p); @@ -264,7 +261,7 @@ struct decl *parse_decl(struct parser *p) { consume(p); return parse_value_or_func_decl(p); default: - report_error(p, "Declaration expected, invalid token.\n"); + report_error(p, "Declaration expected, invalid token `%s`\n", token_descr(cur_tok(p))); } }