From 5073f33c9ff2d2814f850903736bf7e33ae4e345 Mon Sep 17 00:00:00 2001 From: Francesco Magliocca Date: Tue, 17 May 2022 18:44:40 +0200 Subject: [PATCH] First commit, with parser --- .gitignore | 4 + Makefile | 33 +++ README.md | 37 ++++ include/decl.h | 114 ++++++++++ include/expr.h | 89 ++++++++ include/lexer.h | 77 +++++++ include/memory.h | 8 + include/parser.h | 25 +++ include/pattern.h | 54 +++++ include/type.h | 54 +++++ ref_parser/lexer.x | 59 ++++++ ref_parser/main.c | 7 + ref_parser/parser.y | 122 +++++++++++ src/decl.c | 184 ++++++++++++++++ src/expr.c | 152 ++++++++++++++ src/lexer.c | 311 +++++++++++++++++++++++++++ src/main.c | 9 + src/memory.c | 21 ++ src/parser.c | 497 ++++++++++++++++++++++++++++++++++++++++++++ src/pattern.c | 102 +++++++++ src/type.c | 78 +++++++ 21 files changed, 2037 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 include/decl.h create mode 100644 include/expr.h create mode 100644 include/lexer.h create mode 100644 include/memory.h create mode 100644 include/parser.h create mode 100644 include/pattern.h create mode 100644 include/type.h create mode 100644 ref_parser/lexer.x create mode 100644 ref_parser/main.c create mode 100644 ref_parser/parser.y create mode 100644 src/decl.c create mode 100644 src/expr.c create mode 100644 src/lexer.c create mode 100644 src/main.c create mode 100644 src/memory.c create mode 100644 src/parser.c create mode 100644 src/pattern.c create mode 100644 src/type.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5f3d1e5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +# build artifacts +build/ +milly +reference_parser diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..693ee21 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +CFLAGS=-std=c11 -Wall -Wpedantic -Iinclude/ + +objects = main.o lexer.o memory.o parser.o decl.o expr.o type.o pattern.o + +obj_dir = build + +object_paths = $(addprefix $(obj_dir)/, $(objects)) + + +milly: $(object_paths) + $(CC) -o $@ $(CFLAGS) $(object_paths) + +$(obj_dir)/%.o: src/%.c include/*.h | $(obj_dir) + $(CC) $(CFLAGS) -o $@ -c $< + +reference_parser: $(obj_dir)/parser.tab.c $(obj_dir)/lex.yy.c ref_parser/main.c + $(CC) -o $@ -I$(obj_dir)/ $(obj_dir)/parser.tab.c $(obj_dir)/lex.yy.c ref_parser/main.c + +$(obj_dir)/parser.tab.c: ref_parser/parser.y | $(obj_dir) + bison -d -o $@ ref_parser/parser.y + +$(obj_dir)/lex.yy.c: ref_parser/lexer.x | $(obj_dir) + flex -o $@ ref_parser/lexer.x + +$(obj_dir): + mkdir -p $(obj_dir) + +clean: + rm -f milly + rm -f reference_parser + rm -r $(obj_dir) + +.PHONY: clean diff --git a/README.md b/README.md new file mode 100644 index 0000000..9c1e51c --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# milly + +milly is an hobby, and should not be taken seriously. + +Interpreter for a Minimal ML-like language whose ideal goal is to be used for bootstrapping compilers and interpreters. + +## How to build + +milly's interpreter is written in portable ISO C11, sources are in `src/` and header files are in `include/`. + +If your system happens to have a `make` implementation compatible with **GNU Make**, you can just run: + +> make + +Otherwise, something like this should work: + +> $(CC) -std=c11 -o milly -Iinclude/ src/*.c + +## The language + +milly is a minimal dialect of Standard ML and Haskell, +here is a list of the features (or better, limitations) I want to introduce: + +- Keep the implementation <5000 LOC +- Eager evaluation (call by value) +- Algebraic Datatypes (pretty useful for representing syntax trees) +- First class functions +- Pattern patching (pretty useful for analyzing syntax trees) +- Integers, booleans, strings +- Mutable `ref` cell +- FFI to C +- Polymorphic types à la Hindley Milner (probably unfeasible) +- Automatic currying of functions, with optimised partial evaluation (unfeasible) +- Delimited continuations (this is an overkill) + +milly's full grammar is specified as a pair of lex and yacc files in `ref_parser/`. + diff --git a/include/decl.h b/include/decl.h new file mode 100644 index 0000000..d17af3b --- /dev/null +++ b/include/decl.h @@ -0,0 +1,114 @@ +struct type; +struct expr; +struct pattern_list; + +struct value_decl { + char *name; + struct expr *body; +}; + +/* List of function definition clauses */ +struct func_decl_list { + char *name; + struct pattern_list *args; + struct expr *body; + + struct func_decl_list *next; +}; + +struct func_decl_list_builder { + struct func_decl_list *head; + struct func_decl_list *last; +}; + +void func_decl_list_append(struct func_decl_list_builder *b, char *name, struct pattern_list *args, struct expr *body); +void free_func_decl_list(struct func_decl_list *l); + +/* A list of type variables */ +struct var_list { + char *name; + struct var_list *next; +}; + +struct var_list_builder { + struct var_list *head; + struct var_list *last; +}; + +void var_list_append(struct var_list_builder *b, char *elem); +void free_var_list(struct var_list *l); + +/* List of datatype constructors */ +struct constructor_list { + char *name; + struct type *t; + struct constructor_list *next; +}; + +struct constructor_list_builder { + struct constructor_list *head; + struct constructor_list *last; +}; + +void constructor_list_append(struct constructor_list_builder *b, char *name, struct type *t); +void free_constructor_list(struct constructor_list *l); + +struct datatype_decl { + /* Definition parameters */ + struct var_list *type_params; + /* datatype name */ + char *name; + /* Datatype constructors list */ + struct constructor_list *ctors; +}; + +struct alias_decl { + struct var_list *type_params; + char *definiendum; + struct type *definiens; +}; + +struct typecheck_decl { + char *name; + struct type *t; +}; + +enum decl_form { + value_decl, + func_decl, + datatype_decl, + alias_decl, + typecheck_decl, +}; + +struct decl { + enum decl_form form; + union { + struct value_decl value; + struct func_decl_list *func; + struct datatype_decl datatype; + struct alias_decl alias; + struct typecheck_decl typecheck; + }; +}; + +struct decl_list { + struct decl *elem; + struct decl_list *next; +}; + +struct decl_list_builder { + struct decl_list *head; + struct decl_list *last; +}; + +void decl_list_append(struct decl_list_builder *b, struct decl *elem); +void free_decl_list(struct decl_list *l); + +struct decl *make_value_decl(char *name, struct expr *body); +struct decl *make_func_decl(struct func_decl_list *f); +struct decl *make_datatype_decl(struct var_list *ps, char *name, struct constructor_list *cs); +struct decl *make_alias_decl(struct var_list *ps, char *def, struct type *body); +struct decl *make_typecheck_decl(char *n, struct type *t); + +void free_decl(struct decl *d); diff --git a/include/expr.h b/include/expr.h new file mode 100644 index 0000000..370a5e6 --- /dev/null +++ b/include/expr.h @@ -0,0 +1,89 @@ +#include + +enum expr_form { + expr_ident, + expr_int_lit, + expr_bool_lit, + expr_string_lit, + expr_list_lit, + expr_tuple_lit, + expr_fun_app, + expr_match, + expr_let, +}; + +struct expr; +struct pattern; +struct decl; +struct decl_list; + +struct expr_list { + struct expr *elem; + struct expr_list *next; +}; + +struct expr_list_builder { + struct expr_list *head; + struct expr_list *last; +}; + +void expr_list_append(struct expr_list_builder *b, struct expr *elem); +void free_expr_list(struct expr_list *l); + +struct func_app { + struct expr *fun; + struct expr_list *args; +}; + +/* List of case alternatives in a match expression */ +struct case_list { + struct pattern *pat; + struct expr *body; + + struct case_list *next; +}; + +struct case_list_builder { + struct case_list *head; + struct case_list *last; +}; + +void case_list_append(struct case_list_builder *b, struct pattern *pat, struct expr *body); +void free_case_list(struct case_list *l); + +struct match_expr { + struct expr *cond; + struct case_list *alts; +}; + +struct let_expr { + struct decl_list *local_decls; + struct expr *body; +}; + +struct expr { + enum expr_form form; + union { + /* Useful for identifiers, string and integers */ + char *lexeme; + bool bool_lit; + struct expr_list *list; + /* A tuple is represented as a non empty list */ + struct expr_list *tuple; + struct func_app func_app; + struct match_expr match; + struct let_expr let; + }; +}; + +struct expr *make_ident_expr(char *lexeme); +struct expr *make_int_lit(char *lexeme); +struct expr *make_bool_lit(bool b); +struct expr *make_string_lit(char *lexeme); +struct expr *make_list_lit(struct expr_list *list); +struct expr *make_tuple_lit(struct expr_list *list); +struct expr *make_func_app(struct expr *fun, struct expr_list *args); +struct expr *make_match_expr(struct expr *cond, struct case_list *alts); +struct expr *make_let_expr(struct decl_list *decls, struct expr *body); + +void free_expr(struct expr *e); diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..c658954 --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,77 @@ +#include +#include +#include + +enum token_type { + tok_ident, + tok_param_ident, + tok_int, + tok_string, + + /* Symbols */ + tok_arrow, + tok_backslash, + tok_equal, + tok_left_paren, + tok_right_paren, + tok_left_square, + tok_right_square, + tok_left_brace, + tok_right_brace, + tok_left_angle_bracket, + tok_right_angle_bracket, + tok_comma, + tok_pipe, + tok_colon, + + /* Keywords */ + tok_true, + tok_false, + tok_case, + tok_let, + tok_in, + tok_match, + tok_of, + tok_def, + tok_datatype, + tok_alias, + tok_typecheck, + + tok_eof, +}; + +struct token { + enum token_type type; + char *lexeme; +}; + +struct location { + size_t line; + size_t col; +}; + +#define LEXEME_MAX_LEN 1024 + +struct lexer { + /* Input stream to be lexed */ + FILE *in; + /* Lookahead character */ + char cur; + /* Internal buffer used to incrementally store lexemes */ + char buf[LEXEME_MAX_LEN]; + /* Number of character currently stored in buf */ + size_t buf_len; + /* Source Location information */ + struct location loc; +}; + +/* Initialize lexer. + * If there is an error, aborts the program. + */ +void init_lexer(struct lexer *lex, FILE *in); + +/* Read next token, and store it in out. + * Store the token location in loc. + * If there is an error, aborts the program. + */ +void lex_next(struct lexer *lex, struct token *out, struct location *loc); diff --git a/include/memory.h b/include/memory.h new file mode 100644 index 0000000..37c23c7 --- /dev/null +++ b/include/memory.h @@ -0,0 +1,8 @@ +#include + +/* Custom allocators that abort the program + * and print an error message on stderr, + * if they fail + */ +void *xmalloc(size_t s); +void *xrealloc(void *ptr, size_t s); diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..61c54a5 --- /dev/null +++ b/include/parser.h @@ -0,0 +1,25 @@ +#include "lexer.h" + +struct decl; +struct expr; + +struct parser { + /* Lexical analyzer */ + struct lexer lex; + /* Lookahead token */ + struct token cur; + /* Location of the lookahead token */ + struct location loc; +}; + +/* Initialize parser */ +void init_parser(struct parser *p, FILE *in); + +/* Parse a single declaration */ +struct decl *parse_decl(struct parser *p); + +/* Parse a single expression */ +struct expr *parse_expr(struct parser *p); + +/* Parse a list of declarations */ +struct decl_list *parse_program(struct parser *p); diff --git a/include/pattern.h b/include/pattern.h new file mode 100644 index 0000000..e8b9892 --- /dev/null +++ b/include/pattern.h @@ -0,0 +1,54 @@ +#include + +enum pattern_form { + pattern_var, + pattern_int_lit, + pattern_bool_lit, + pattern_string_lit, + pattern_list_lit, + pattern_tuple_lit, + pattern_constructor, +}; + +struct pattern; + +struct pattern_list { + struct pattern *elem; + struct pattern_list *next; +}; + +struct pattern_list_builder { + struct pattern_list *head; + struct pattern_list *last; +}; + +void pattern_list_append(struct pattern_list_builder *b, struct pattern *p); +void free_pattern_list(struct pattern_list *l); + +struct constructor_pattern { + char *name; + struct pattern_list *args; +}; + +struct pattern { + enum pattern_form form; + union { + /* Useful for vars, ints and strings */ + char *lexeme; + bool b; + struct pattern_list *list; + struct pattern_list *tuple; + struct constructor_pattern ctor; + }; +}; + +struct pattern *make_var_pattern(char *name); +struct pattern *make_int_pattern(char *lexeme); +struct pattern *make_bool_pattern(bool b); +struct pattern *make_string_pattern(char *lexeme); +struct pattern *make_list_pattern(struct pattern_list *l); +struct pattern *make_tuple_pattern(struct pattern_list *l); +struct pattern *make_constructor_pattern(char *name, struct pattern_list *args); + +void free_pattern(struct pattern *p); + diff --git a/include/type.h b/include/type.h new file mode 100644 index 0000000..d66d881 --- /dev/null +++ b/include/type.h @@ -0,0 +1,54 @@ +enum type_form { + type_var, + type_name, + tuple_type, + func_type, +}; + +struct type; + +/* A Linked list of types */ +struct type_list { + struct type *elem; + struct type_list *next; +}; + +struct func_type { + struct type *dom; + struct type *cod; +}; + +struct type_name { + struct type_list *params; + char *name; +}; + +struct type { + enum type_form form; + union { + char *var_name; + /* We represent a tuple with the linked list of its type components */ + struct type_list *tuple; + struct func_type func; + struct type_name type_name; + }; +}; + +/* type constructors and destructors */ +struct type *make_tuple_type(struct type_list *t); +struct type *make_func_type(struct type *dom, struct type *cod); +struct type *make_var_type(char *name); +struct type *make_type_name(struct type_list *params, char *name); + +void free_type(struct type *t); + +/* type_list operations */ +void free_type_list(struct type_list *l); + +struct type_list_builder { + struct type_list *head; + struct type_list *last; +}; + +void type_list_append(struct type_list_builder *b, struct type *elem); + diff --git a/ref_parser/lexer.x b/ref_parser/lexer.x new file mode 100644 index 0000000..e84318e --- /dev/null +++ b/ref_parser/lexer.x @@ -0,0 +1,59 @@ +/* milly's Flex grammar */ +%{ +#include "parser.tab.h" + +int yywrap(); +%} + +DIGIT [0-9] +IDENT_START [a-zA-Z_?*+!/=]|"-" +IDENT_CONT {IDENT_START}|{DIGIT} +ESCAPED \\[nt"\\] +STRING_CHAR {ESCAPED}|[[:print:]]{-}["\\] + +%% + +"true" { return TRUE; } +"false" { return FALSE; } +"case" { return CASE; } +"let" { return LET; } +"in" { return IN; } +"match" { return MATCH; } +"of" { return OF; } +"def" { return DEF; } +"datatype" { return DATATYPE; } +"alias" { return ALIAS; } +"typecheck" { return TYPECHECK; } + +"->" { return ARROW; } +"=" { return *yytext; } + +"\\" { return *yytext; } +"(" { return *yytext; } +")" { return *yytext; } +"[" { return *yytext; } +"]" { return *yytext; } +"{" { return *yytext; } +"}" { return *yytext; } +"," { return *yytext; } +"|" { return *yytext; } +":" { return *yytext; } +"<" { return *yytext; } +">" { return *yytext; } + +[ \t\n]+ ; +#[^\n]* ; + +{IDENT_START}{IDENT_CONT}* { return IDENT; } + +'{IDENT_START}{IDENT_CONT}* { return PARAM_IDENT; } + +{DIGIT}+ { return INT; } + +\"{STRING_CHAR}*\" { return STRING; } + +%% + +int yywrap() { + return 1; +} diff --git a/ref_parser/main.c b/ref_parser/main.c new file mode 100644 index 0000000..1403e77 --- /dev/null +++ b/ref_parser/main.c @@ -0,0 +1,7 @@ +#include +#include "parser.tab.h" + +int main() { + yyparse(); + return 0; +} diff --git a/ref_parser/parser.y b/ref_parser/parser.y new file mode 100644 index 0000000..02313d6 --- /dev/null +++ b/ref_parser/parser.y @@ -0,0 +1,122 @@ +/* milly's Bison grammar */ +%{ +#include + +int yylex(); +void yyerror(const char *); +%} +%token IDENT +%token PARAM_IDENT +%token INT STRING +%token TRUE FALSE +%token CASE LET IN MATCH OF DEF DATATYPE ALIAS TYPECHECK +%token ARROW + +/* Function constructor is right associative */ +%right ARROW + +%right ':' + +%start program + +%% + +program: decl + | decl program + + +/* Types syntax */ +type: atomic_type + | type ARROW type + +atomic_type: type_app + | PARAM_IDENT + | '(' type ')' + | '(' tuple_types ')' + +type_app_atom: PARAM_IDENT + | '(' type ')' + | '(' tuple_types ')' + | IDENT + +type_app: IDENT + | type_app_atom type_app + +tuple_types: type ',' type + | type ',' tuple_types + +/* Declaration syntax */ +decl: datatype_decl + | alias_decl + | typecheck_decl + | value_decl + | func_decl + +datatype_decl: DATATYPE def_type_params IDENT '{' datatype_alts '}' + +/* type parameters of a definition */ +def_type_params: %empty + | PARAM_IDENT def_type_params + +datatype_alts: %empty + | IDENT type ',' datatype_alts + +alias_decl: ALIAS def_type_params IDENT '=' type + +typecheck_decl: TYPECHECK IDENT ':' type + +value_decl: DEF IDENT '=' expr + +func_decl: DEF IDENT param_patterns '=' expr fun_alts + +param_patterns: pattern + | pattern param_patterns + +fun_alts: %empty + | '|' IDENT param_patterns '=' expr fun_alts + +/* Expressions */ +atomic_expr: IDENT + | INT + | TRUE | FALSE + | STRING + | '[' list_elems ']' + | '(' expr ',' list_elems ')' + | '(' expr ')' + +list_elems: %empty + | expr ',' list_elems + +fun_app_expr: atomic_expr + | fun_app_expr atomic_expr + +infix_expr: fun_app_expr + | infix_expr ':' infix_expr /* List cons */ + /* TODO: | infix_expr IDENT infix_expr */ + +expr: infix_expr + | MATCH expr '{' case_alts '}' + | LET let_decls IN expr + +case_alts: %empty + | CASE pattern ARROW expr case_alts + +let_decls: decl | decl let_decls + +/* Pattern language */ +pattern: IDENT + | INT + | TRUE | FALSE + | STRING + | '<' IDENT param_patterns '>' + | '(' pattern ',' pattern_list ')' + | '[' pattern_list ']' + +pattern_list: %empty + | pattern ',' pattern_list + +%% + +void yyerror(const char *err) { + fprintf(stderr, "Error: %s", err); +} diff --git a/src/decl.c b/src/decl.c new file mode 100644 index 0000000..a1c78e2 --- /dev/null +++ b/src/decl.c @@ -0,0 +1,184 @@ +#include "decl.h" +#include "type.h" +#include "expr.h" +#include "pattern.h" +#include "memory.h" +#include + +static struct decl *alloc_decl(enum decl_form form) { + struct decl *res = xmalloc(sizeof(struct decl)); + res->form = form; + return res; +} + +struct decl *make_value_decl(char *name, struct expr *body) { + struct decl *d = alloc_decl(value_decl); + d->value.name = name; + d->value.body = body; + return d; +} + +struct decl *make_func_decl(struct func_decl_list *f) { + struct decl *d = alloc_decl(func_decl); + d->func = f; + return d; +} + +struct decl *make_datatype_decl(struct var_list *ps, char *name, struct constructor_list *cs) { + struct decl *d = alloc_decl(datatype_decl); + d->datatype = (struct datatype_decl) { + .type_params = ps, + .name = name, + .ctors = cs, + }; + return d; +} + +struct decl *make_alias_decl(struct var_list *ps, char *def, struct type *body) { + struct decl *d = alloc_decl(alias_decl); + d->alias = (struct alias_decl) { + .type_params = ps, + .definiendum = def, + .definiens = body, + }; + return d; +} + +struct decl *make_typecheck_decl(char *n, struct type *t) { + struct decl *d = alloc_decl(typecheck_decl); + d->typecheck = (struct typecheck_decl) { + .name = n, + .t = t, + }; + return d; +} + +void free_decl(struct decl *d) { + if (!d) return; + + switch (d->form) { + case value_decl: + free(d->value.name); + free_expr(d->value.body); + break; + case func_decl: + free_func_decl_list(d->func); + break; + case datatype_decl: + free_var_list(d->datatype.type_params); + free(d->datatype.name); + free_constructor_list(d->datatype.ctors); + break; + case alias_decl: + free_var_list(d->alias.type_params); + free(d->alias.definiendum); + free_type(d->alias.definiens); + break; + case typecheck_decl: + free(d->typecheck.name); + free_type(d->typecheck.t); + break; + } + free(d); +} + +void func_decl_list_append(struct func_decl_list_builder *b, char *name, struct pattern_list *args, struct expr *body) { + struct func_decl_list *node = xmalloc(sizeof(struct func_decl_list)); + node->name = name; + node->args = args; + node->body = body; + node->next = NULL; + if (b->last == NULL) { + b->head = node; + b->last = node; + } + else { + b->last->next = node; + b->last = node; + } +} + +void free_func_decl_list(struct func_decl_list *l) { + struct func_decl_list *tmp; + while (l != NULL) { + tmp = l; + l = l->next; + free(tmp->name); + free_pattern_list(tmp->args); + free(tmp); + } +} + +void var_list_append(struct var_list_builder *b, char *elem) { + struct var_list *l = xmalloc(sizeof(struct var_list)); + l->name = elem; + l->next = NULL; + if (b->last == NULL) { + b->head = l; + b->last = l; + } + else { + b->last->next = l; + b->last = l; + } +} + +void free_var_list(struct var_list *l) { + struct var_list *tmp; + while (l != NULL) { + tmp = l; + l = l->next; + free(tmp->name); + free(tmp); + } +} + +void constructor_list_append(struct constructor_list_builder *b, char *name, struct type *t) { + struct constructor_list *c = xmalloc(sizeof(struct constructor_list)); + c->name = name; + c->t = t; + c->next = NULL; + if (b->last == NULL) { + b->head = c; + b->last = c; + } + else { + b->last->next = c; + b->last = c; + } +} + +void free_constructor_list(struct constructor_list *c) { + struct constructor_list *tmp; + while (c != NULL) { + tmp = c; + c = c->next; + free(tmp->name); + free_type(tmp->t); + free(tmp); + } +} + +void decl_list_append(struct decl_list_builder *l, struct decl *elem) { + struct decl_list *node = xmalloc(sizeof(struct decl_list)); + node->elem = elem; + node->next = NULL; + if (l->last == NULL) { + l->head = node; + l->last = node; + } + else { + l->last->next = node; + l->last = node; + } +} + +void free_decl_list(struct decl_list *l) { + struct decl_list *tmp; + while (l != NULL) { + tmp = l; + l = l->next; + free_decl(tmp->elem); + free(tmp); + } +} diff --git a/src/expr.c b/src/expr.c new file mode 100644 index 0000000..4b5f68b --- /dev/null +++ b/src/expr.c @@ -0,0 +1,152 @@ +#include "expr.h" +#include "decl.h" +#include "pattern.h" +#include "memory.h" +#include + +static struct expr *alloc_expr(enum expr_form form) { + struct expr *res = xmalloc(sizeof(struct expr)); + res->form = form; + return res; +} + +struct expr *make_ident_expr(char *lexeme) { + struct expr *res = alloc_expr(expr_ident); + res->lexeme = lexeme; + return res; +} + +struct expr *make_int_lit(char *lexeme) { + struct expr *res = alloc_expr(expr_int_lit); + res->lexeme = lexeme; + return res; +} + +struct expr *make_bool_lit(bool b) { + struct expr *res = alloc_expr(expr_bool_lit); + res->bool_lit = b; + return res; +} + +struct expr *make_string_lit(char *lexeme) { + struct expr *res = alloc_expr(expr_string_lit); + res->lexeme = lexeme; + return res; +} + +struct expr *make_list_lit(struct expr_list *list) { + struct expr *res = alloc_expr(expr_list_lit); + res->list = list; + return res; +} + +struct expr *make_tuple_lit(struct expr_list *list) { + struct expr *res = alloc_expr(expr_tuple_lit); + res->tuple = list; + return res; +} + +struct expr *make_func_app(struct expr *fun, struct expr_list *args) { + struct expr *res = alloc_expr(expr_fun_app); + res->func_app = (struct func_app) { fun, args }; + return res; +} +struct expr *make_match_expr(struct expr *cond, struct case_list *alts) { + struct expr *res = alloc_expr(expr_match); + res->match.cond = cond; + res->match.alts = alts; + return res; +} + +struct expr *make_let_expr(struct decl_list *decls, struct expr *body) { + struct expr *res = alloc_expr(expr_let); + res->let.local_decls = decls; + res->let.body = body; + return res; +} + +void free_expr(struct expr *e) { + if (!e) return; + switch (e->form) { + case expr_ident: + free(e->lexeme); + break; + case expr_int_lit: + free(e->lexeme); + break; + case expr_bool_lit: + break; + case expr_string_lit: + free(e->lexeme); + break; + case expr_list_lit: + free_expr_list(e->list); + break; + case expr_tuple_lit: + free_expr_list(e->list); + break; + case expr_fun_app: + free_expr(e->func_app.fun); + free_expr_list(e->func_app.args); + break; + case expr_match: + free_expr(e->match.cond); + free_case_list(e->match.alts); + break; + case expr_let: + free_decl_list(e->let.local_decls); + free_expr(e->let.body); + break; + } + free(e); +} + +void expr_list_append(struct expr_list_builder *b, struct expr *elem) { + struct expr_list *node = xmalloc(sizeof(struct expr_list)); + node->elem = elem; + node->next = NULL; + if (b->last == NULL) { + b->head = node; + b->last = node; + } + else { + b->last->next = node; + b->last = node; + } +} + +void free_expr_list(struct expr_list *l) { + struct expr_list *tmp; + while (l != NULL) { + tmp = l; + l = l->next; + free_expr(tmp->elem); + free(tmp); + } +} + +void case_list_append(struct case_list_builder *b, struct pattern *pat, struct expr *body) { + struct case_list *node = xmalloc(sizeof(struct case_list)); + node->pat = pat; + node->body = body; + node->next = NULL; + if (b->last == NULL) { + b->head = node; + b->last = node; + } + else { + b->last->next = node; + b->last = node; + } +} +void free_case_list(struct case_list *l) { + struct case_list *tmp; + while (l != NULL) { + tmp = l; + l = l->next; + free_pattern(tmp->pat); + free_expr(tmp->body); + free(tmp); + } +} + diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..d019a20 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,311 @@ +#include "lexer.h" +#include "memory.h" +#include +#include +#include +#include +#include + +/* Helper to report lexical errors */ +static noreturn void report_lex_error(struct lexer *lex, const char *fmt, ...) { + if (fprintf(stderr, "Error at %ld:%ld ", lex->loc.line, lex->loc.col) < 0) { + exit(EXIT_FAILURE); + } + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + exit(EXIT_FAILURE); +} + + +/* Read next character from input stream */ +static void next_char(struct lexer *lex) { + lex->cur = fgetc(lex->in); + if (ferror(lex->in)) { + fprintf(stderr, "IO Read error.\n"); + exit(EXIT_FAILURE); + } +} + +/* Advance in input stream and update location info */ +static void advance(struct lexer *lex) { + /* Update location info */ + if (lex->cur == '\n') { + ++lex->loc.line; + lex->loc.col = 1; + } + else { + ++lex->loc.col; + } + + /* And read next input char */ + next_char(lex); +} + +/* Store char in the internal buffer */ +static void store_char(struct lexer *lex, const char c) { + if (lex->buf_len + 1 < LEXEME_MAX_LEN) { + lex->buf[lex->buf_len] = c; + ++lex->buf_len; + } + else { + report_lex_error(lex, "Lexeme too long.\n"); + } +} + +/* Allocates a null terminated string in out, + * containing a copy of the inner buffer + */ +static char *dup_lexeme(struct lexer *lex) { + char *lexeme = xmalloc(lex->buf_len + 1); + memcpy(lexeme, lex->buf, lex->buf_len); + lexeme[lex->buf_len] = 0; + return lexeme; +} + +void init_lexer(struct lexer *lex, FILE *in) { + lex->in = in; + lex->loc = (struct location) { 1, 1 }; + /* Reset internal buffer */ + lex->buf_len = 0; + /* Read next char */ + next_char(lex); +} + +struct reserved_symbol { + const char *lexeme; + enum token_type type; +}; + +/* These two arrays must be alpha sorted to be usable by bsearch */ +struct reserved_symbol punctuation[] = { + { "(", tok_left_paren }, + { ")", tok_right_paren }, + { "[", tok_left_square }, + { "]", tok_right_square }, + { "{", tok_left_brace }, + { "}", tok_right_brace }, + { ",", tok_comma }, + { "|", tok_pipe }, + { ":", tok_colon }, + { "<", tok_left_angle_bracket }, + { ">", tok_right_angle_bracket }, + { "\\", tok_backslash }, +}; + +struct reserved_symbol keywords[] = { + { "=", tok_equal }, + { "->", tok_arrow }, + { "true", tok_true }, + { "false", tok_false }, + { "case", tok_case }, + { "let", tok_let }, + { "in", tok_in }, + { "match", tok_match }, + { "of", tok_of }, + { "def", tok_def }, + { "datatype", tok_datatype }, + { "alias", tok_alias }, + { "typecheck", tok_typecheck }, +}; + +int symbol_cmp(const void *lhs, const void *rhs) { + const char *key = lhs; + const struct reserved_symbol *entry = rhs; + return strcmp(key, entry->lexeme); +} + +/* We assume that the current character is indeed a punctuation */ +void lex_punctuation(struct lexer *lex, struct token *out) { + char key[] = { lex->cur, 0 }; + const size_t elem_size = sizeof(struct reserved_symbol); + const size_t num = sizeof(punctuation) / elem_size; + /* This can't fail because of the precondition to this function */ + struct reserved_symbol *r = bsearch(key, punctuation, num, elem_size, symbol_cmp); + out->type = r->type; +} + +/* We assume that the curr char is already a digit */ +static void lex_int(struct lexer *lex, struct token *out) { + while (isdigit(lex->cur)) { + /* Store digit in the internal buffer */ + store_char(lex, lex->cur); + advance(lex); + } + out->type = tok_int; + out->lexeme = dup_lexeme(lex); +} + +static bool is_ident_start(const char c) { + switch (c) { + case '_': case '?': case '-': + case '+': case '*': case '!': + case '/': case '=': + return true; + default: + return isalpha(c); + } +} + +static bool is_ident_cont(const char c) { + return is_ident_start(c) || isdigit(c); +} + +static void lex_param_ident(struct lexer *lex, struct token *out) { + if (!is_ident_start(lex->cur)) { + report_lex_error(lex, "Invalid character `%c` in param name.\n", lex->cur); + } + /* Store parameter name in the internal buffer */ + store_char(lex, lex->cur); + while (is_ident_cont(lex->cur)) { + /* Store parameter name in the internal buffer */ + store_char(lex, lex->cur); + advance(lex); + } + out->type = tok_param_ident; + out->lexeme = dup_lexeme(lex); +} + +/* We assume the current char is valid identifier starter */ +static void lex_ident(struct lexer *lex, struct token *out) { + /* Store ident in the internal buffer */ + store_char(lex, lex->cur); + while (is_ident_cont(lex->cur)) { + /* Store ident in the internal buffer */ + store_char(lex, lex->cur); + advance(lex); + } + /* Check for reserved symbols */ + const size_t elem_size = sizeof(struct reserved_symbol); + const size_t num = sizeof(keywords) / elem_size; + /* TODO: this is a ugly trick, try to improve it + * In order to perform the binary search, we must use a null terminated + * string as string, so we must either allocate a new string, or use the internal buffer. + * This second solution is cheaper, but then we need to add a null terminator, + * To do so, we must make sure there is enough room in the internal buffer, + * but reserved symbols are very short, so we first do a fast check on the + * buffer size, if there is no room for a null terminator, it can't be + * a reserved keyword. + */ + if (lex->buf_len <= LEXEME_MAX_LEN - 1) { + /* Add null terminator to buffer end */ + lex->buf[lex->buf_len] = 0; + struct reserved_symbol *kw = bsearch(lex->buf, keywords, num, elem_size, symbol_cmp); + if (kw) { + /* This is a reserved keyword */ + out->type = kw->type; + return; + } + } + /* If we are here, the identifier is not a reserved keyword */ + out->type = tok_ident; + out->lexeme = dup_lexeme(lex); +} + +/* String literal */ +static void lex_string(struct lexer *lex, struct token *out) { + while (lex->cur != EOF && lex->cur != '"') { + if (lex->cur == '\\') { + advance(lex); + /* Correctly interpret escaped characters */ + switch (lex->cur) { + case 'n': + store_char(lex, '\n'); + break; + case 't': + store_char(lex, '\t'); + break; + case '\\': + case '"': + store_char(lex, lex->cur); + break; + default: + report_lex_error(lex, "Invalid escaped character in string literal.\n"); + } + advance(lex); + } + else if (isprint(lex->cur)) { + store_char(lex, lex->cur); + advance(lex); + } + else { + report_lex_error(lex, "Invalid character in string literal.\n"); + } + } + if (lex->cur != '"') { + report_lex_error(lex, "Unterminated string literal. Expected closing double quote.\n"); + } + advance(lex); + out->type = tok_string; + out->lexeme = dup_lexeme(lex); +} + +void lex_next(struct lexer *lex, struct token *out, struct location *loc) { + /* Reset internal buffer */ + lex->buf_len = 0; + +keep_lexing: + switch (lex->cur) { + case ' ': + case '\t': + case '\n': + /* Ignore whitespaces */ + advance(lex); + goto keep_lexing; + + case '#': + /* Ignore comments */ + while (lex->cur != EOF && lex->cur != '\n') { + advance(lex); + } + goto keep_lexing; + + case EOF: + *loc = lex->loc; + out->type = tok_eof; + break; + + case '(': case ')': case '[': + case ']': case '<': case '>': + case '{': case '}': case ',': + case ':': case '|': case '\\': + *loc = lex->loc; + lex_punctuation(lex, out); + break; + + case '\'': + *loc = lex->loc; + lex_param_ident(lex, out); + break; + + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + case '0': + *loc = lex->loc; + lex_int(lex, out); + break; + + case '"': + *loc = lex->loc; + advance(lex); + lex_string(lex, out); + break; + + default: + if (is_ident_start(lex->cur)) { + *loc = lex->loc; + lex_ident(lex, out); + } + else { + if (isprint(lex->cur)) { + report_lex_error(lex, "Invalid character `%c`.\n", lex->cur); + } + else { + report_lex_error(lex, "Invalid control character.\n"); + } + } + break; + } +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..6f8d583 --- /dev/null +++ b/src/main.c @@ -0,0 +1,9 @@ +#include +#include "parser.h" + +int main(int argc, char *argv[]) { + struct parser p; + init_parser(&p, stdin); + parse_program(&p); + return 0; +} diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..863b008 --- /dev/null +++ b/src/memory.c @@ -0,0 +1,21 @@ +#include "memory.h" +#include +#include + +void *xmalloc(size_t s) { + void *ptr = malloc(s); + if (!ptr) { + fprintf(stderr, "Memory exhausted.\n"); + exit(EXIT_FAILURE); + } + return ptr; +} + +void *xrealloc(void *ptr, size_t s) { + ptr = realloc(ptr, s); + if (!ptr) { + fprintf(stderr, "Memory exhausted.\n"); + exit(EXIT_FAILURE); + } + return ptr; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..e3834c3 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,497 @@ +#include "parser.h" +#include "decl.h" +#include "type.h" +#include "expr.h" +#include "pattern.h" +#include +#include +#include + +static struct type *parse_type(struct parser *p); +static struct pattern *parse_pattern(struct parser *p); +static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim); + +void init_parser(struct parser *p, FILE *in) { + init_lexer(&p->lex, in); + /* Read next token */ + lex_next(&p->lex, &p->cur, &p->loc); +} + +/* Get current lookahead token type */ +static inline enum token_type cur_tok(const struct parser *p) { + return p->cur.type; +} + +/* Get current lookahead token lexeme */ +static inline char *cur_lexeme(struct parser *p) { + return p->cur.lexeme; +} + +/* read next token */ +static inline void consume(struct parser *p) { + lex_next(&p->lex, &p->cur, &p->loc); +} + +static noreturn void report_error(struct parser *p, const char *fmt, ...) { + /* Report error at the beginning of the current lookahead position */ + if (fprintf(stderr, "Error at %ld:%ld ", p->loc.line, p->loc.col) < 0) { + exit(EXIT_FAILURE); + } + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + exit(EXIT_FAILURE); +} + +static const char *token_descr(enum token_type t) { + return "another token"; +} + +/* Consume next token if it is of the required type, otherwise fail with error */ +static void expect(struct parser *p, enum token_type t) { + if (cur_tok(p) != t) { + report_error(p, "Expected %s.\n", token_descr(t)); + } + consume(p); +} + +/* Types */ +static struct type *parse_tuple_type(struct parser *p) { + struct type *t = parse_type(p); + struct type_list_builder list = { NULL }; + type_list_append(&list, t); + expect(p, tok_comma); + t = parse_type(p); + type_list_append(&list, t); + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_angle_bracket) { + expect(p, tok_comma); + t = parse_type(p); + type_list_append(&list, t); + } + return make_tuple_type(list.head); +} + +/* sets *is_ident to true if the result is a single type name */ +static struct type *try_parse_atomic_type_piece(struct parser *p, bool *is_ident) { + struct type *res = NULL; + *is_ident = false; + switch (cur_tok(p)) { + case tok_left_paren: + consume(p); + res = parse_type(p); + expect(p, tok_right_paren); + break; + + case tok_left_angle_bracket: + consume(p); + res = parse_tuple_type(p); + expect(p, tok_right_angle_bracket); + break; + + case tok_param_ident: + res = make_var_type(cur_lexeme(p)); + consume(p); + break; + + case tok_ident: + *is_ident = true; + res = make_type_name(NULL, cur_lexeme(p)); + consume(p); + break; + + default: + res = NULL; + } + return res; +} + +static struct type *parse_atomic_type(struct parser *p) { + bool t_is_ident; + struct type *t = try_parse_atomic_type_piece(p, &t_is_ident); + if (!t) { + report_error(p, "Expected type.\n"); + } + /* t always contains the latest parsed atom piece */ + bool curr_t_is_ident; + struct type *curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident); + if (curr_t != NULL) { + struct type_list_builder params = { NULL }; + while (curr_t != NULL) { + /* We have a new atom piece, therefore we append t + * to the params list + */ + type_list_append(¶ms, t); + /* And set t to the latest parsed piece */ + t_is_ident = curr_t_is_ident; + t = curr_t; + curr_t = try_parse_atomic_type_piece(p, &curr_t_is_ident); + } + /* Now we must check that t is a simple type name */ + if (!t_is_ident) { + report_error(p, "Invalid type, expected a simple type name here.\n"); + } + char *name = t->type_name.name; + /* Free allocated type object, we just need its name + * TODO: improve this, it's a ugly hack + * */ + free(t); + return make_type_name(params.head, name); + } + return t; +} + +static struct type *parse_type(struct parser *p) { + struct type *t = parse_atomic_type(p); + if (cur_tok(p) == tok_arrow) { + consume(p); + struct type *cod = parse_type(p); + return make_func_type(t, cod); + } + return t; +} + +/* Definitions */ +static struct var_list *parse_def_var_list(struct parser *p) { + struct var_list_builder params = { NULL }; + while (cur_tok(p) == tok_param_ident) { + var_list_append(¶ms, cur_lexeme(p)); + consume(p); + } + return params.head; +} + +static struct decl *parse_datatype_decl(struct parser *p) { + struct var_list *params = parse_def_var_list(p); + if (cur_tok(p) != tok_ident) { + report_error(p, "Invalid datatype name, expected an identifier.\n"); + } + char *datatype_name = cur_lexeme(p); + consume(p); + + expect(p, tok_left_brace); + + /* Parse constructors */ + struct constructor_list_builder ctors = { NULL }; + char *ctor_name; + struct type *ty; + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { + if (cur_tok(p) != tok_ident) { + report_error(p, "Invalid datatype constructor, expected an identifier.\n"); + } + ctor_name = cur_lexeme(p); + ty = parse_type(p); + constructor_list_append(&ctors, ctor_name, ty); + expect(p, tok_comma); + } + expect(p, tok_right_brace); + return make_datatype_decl(params, datatype_name, ctors.head); +} + +static struct decl *parse_alias_decl(struct parser *p) { + struct var_list *params = parse_def_var_list(p); + if (cur_tok(p) != tok_ident) { + report_error(p, "Invalid type name, expected an identifier.\n"); + } + char *name = cur_lexeme(p); + consume(p); + expect(p, tok_equal); + struct type *body = parse_type(p); + return make_alias_decl(params, name, body); +} + +static struct decl *parse_typecheck_decl(struct parser *p) { + if (cur_tok(p) != tok_ident) { + report_error(p, "Expected identifier.\n"); + } + char *name = cur_lexeme(p); + consume(p); + expect(p, tok_colon); + struct type *t = parse_type(p); + return make_typecheck_decl(name, t); +} + +struct decl *parse_value_or_func_decl(struct parser *p) { + if (cur_tok(p) != tok_ident) { + report_error(p, "Invalid definition, expected an identifier.\n"); + } + char *ident = cur_lexeme(p); + consume(p); + if (cur_tok(p) == tok_equal) { + /* It's a value definition */ + consume(p); + struct expr *body = parse_expr(p); + return make_value_decl(ident, body); + } + else { + /* It's a function definition */ + struct func_decl_list_builder func = { NULL }; + struct pattern_list *args = parse_pattern_list(p, tok_equal); + expect(p, tok_equal); + struct expr *body = parse_expr(p); + func_decl_list_append(&func, ident, args, body); + while (cur_tok(p) == tok_pipe) { + consume(p); + if (cur_tok(p) != tok_ident) { + report_error(p, "Invalid function case definition, expected an identifier.\n"); + } + ident = cur_lexeme(p); + consume(p); + args = parse_pattern_list(p, tok_equal); + if (args == NULL) { + report_error(p, "Expected at least one argument in function definition.\n"); + } + expect(p, tok_equal); + body = parse_expr(p); + func_decl_list_append(&func, ident, args, body); + } + return make_func_decl(func.head); + } +} + +struct decl *parse_decl(struct parser *p) { + switch (cur_tok(p)) { + case tok_datatype: + consume(p); + return parse_datatype_decl(p); + case tok_alias: + consume(p); + return parse_alias_decl(p); + case tok_typecheck: + consume(p); + return parse_typecheck_decl(p); + case tok_def: + consume(p); + return parse_value_or_func_decl(p); + default: + report_error(p, "Declaration expected, invalid token.\n"); + } +} + +/* Expressions */ +static struct expr *parse_list_literal(struct parser *p) { + struct expr_list_builder elems = { NULL }; + struct expr *e; + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { + e = parse_expr(p); + expr_list_append(&elems, e); + expect(p, tok_comma); + } + expect(p, tok_right_square); + return make_list_lit(elems.head); +} + +static struct expr *parse_paren_expr(struct parser *p) { + struct expr *e = parse_expr(p); + if (cur_tok(p) == tok_comma) { + consume(p); + struct expr_list_builder elems = { NULL }; + expr_list_append(&elems, e); + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_left_paren) { + e = parse_expr(p); + expr_list_append(&elems, e); + expect(p, tok_comma); + } + e = make_tuple_lit(elems.head); + } + expect(p, tok_right_paren); + return e; +} + +static struct expr *try_parse_atomic_expr(struct parser *p) { + struct expr *res; + switch (cur_tok(p)) { + case tok_ident: + res = make_ident_expr(cur_lexeme(p)); + consume(p); + break; + case tok_int: + res = make_int_lit(cur_lexeme(p)); + consume(p); + break; + case tok_true: + res = make_bool_lit(true); + consume(p); + break; + case tok_false: + res = make_bool_lit(false); + consume(p); + break; + case tok_string: + res = make_string_lit(cur_lexeme(p)); + consume(p); + break; + case tok_left_square: + consume(p); + res = parse_list_literal(p); + break; + case tok_left_paren: + consume(p); + res = parse_paren_expr(p); + break; + default: + res = NULL; + break; + } + return res; +} + +static struct expr *parse_fun_app(struct parser *p) { + struct expr *fun = try_parse_atomic_expr(p); + if (!fun) { + report_error(p, "Expected expression.\n"); + } + struct expr_list_builder args = { NULL }; + struct expr *arg; + while ((arg = try_parse_atomic_expr(p))) { + expr_list_append(&args, arg); + } + return make_func_app(fun, args.head); +} + +/* TODO: Implement infix operations */ +static struct expr *parse_infix_expr(struct parser *p) { + struct expr *lhs = parse_fun_app(p); + /*if (cur_tok(p) == tok_colon) { + consume(p); + struct expr_list_builder params = { NULL };; + expr_list_append(¶ms, lhs); + struct expr *rhs = parse_infix_expr(p); + expr_list_append(¶ms, rhs); + return make_list_cons(lhs, rhs); + }*/ + return lhs; +} + +static struct expr *parse_match_expr(struct parser *p) { + struct expr *cond = parse_expr(p); + expect(p, tok_left_brace); + struct pattern *pattern; + struct expr *body; + struct case_list_builder cases = { NULL }; + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_brace) { + expect(p, tok_case); + pattern = parse_pattern(p); + expect(p, tok_arrow); + body = parse_expr(p); + case_list_append(&cases, pattern, body); + } + expect(p, tok_right_brace); + return make_match_expr(cond, cases.head); +} + +static struct expr *parse_let_expr(struct parser *p) { + struct decl_list_builder decls = { NULL }; + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_in) { + decl_list_append(&decls, parse_decl(p)); + } + expect(p, tok_in); + struct expr *body = parse_expr(p); + return make_let_expr(decls.head, body); +} + +struct expr *parse_expr(struct parser *p) { + switch (cur_tok(p)) { + case tok_match: + consume(p); + return parse_match_expr(p); + case tok_let: + consume(p); + return parse_let_expr(p); + default: + return parse_infix_expr(p); + } +} + +/* Patterns */ +static struct pattern *parse_list_pattern(struct parser *p) { + struct pattern_list_builder elems = { NULL }; + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { + pattern_list_append(&elems, parse_pattern(p)); + expect(p, tok_comma); + } + expect(p, tok_right_square); + return make_list_pattern(elems.head); +} + +static struct pattern *parse_tuple_pattern(struct parser *p) { + struct pattern_list_builder elems = { NULL }; + while (cur_tok(p) != tok_eof && cur_tok(p) != tok_right_square) { + pattern_list_append(&elems, parse_pattern(p)); + expect(p, tok_comma); + } + expect(p, tok_right_square); + return make_tuple_pattern(elems.head); +} + +/* Parse patterns until the `delim` token is found or end of file is encountered */ +static struct pattern_list *parse_pattern_list(struct parser *p, enum token_type delim) { + struct pattern_list_builder patterns = { NULL }; + while (cur_tok(p) != tok_eof && cur_tok(p) != delim) { + pattern_list_append(&patterns, parse_pattern(p)); + } + return patterns.head; +} + +static struct pattern *parse_constructor_pattern(struct parser *p) { + if (cur_tok(p) != tok_ident) { + report_error(p, "Expected constructor name in constructor pattern.\n"); + } + else { + char *name = cur_lexeme(p); + consume(p); + struct pattern_list *args = parse_pattern_list(p, tok_right_angle_bracket); + expect(p, tok_right_angle_bracket); + return make_constructor_pattern(name, args); + } +} + +static struct pattern *parse_pattern(struct parser *p) { + struct pattern *res; + switch (cur_tok(p)) { + case tok_ident: + res = make_var_pattern(cur_lexeme(p)); + consume(p); + break; + case tok_int: + res = make_int_pattern(cur_lexeme(p)); + consume(p); + break; + case tok_true: + res = make_bool_pattern(true); + consume(p); + break; + case tok_false: + res = make_bool_pattern(false); + consume(p); + break; + case tok_string: + res = make_string_pattern(cur_lexeme(p)); + consume(p); + break; + case tok_left_paren: + consume(p); + res = parse_tuple_pattern(p); + break; + case tok_left_square: + consume(p); + res = parse_list_pattern(p); + break; + case tok_left_angle_bracket: + consume(p); + res = parse_constructor_pattern(p); + break; + default: + report_error(p, "Invalid pattern.\n"); + } + return res; +} + +struct decl_list *parse_program(struct parser *p) { + struct decl_list_builder decls = { NULL }; + while (cur_tok(p) != tok_eof) { + decl_list_append(&decls, parse_decl(p)); + } + return decls.head; +} + diff --git a/src/pattern.c b/src/pattern.c new file mode 100644 index 0000000..e0b615b --- /dev/null +++ b/src/pattern.c @@ -0,0 +1,102 @@ +#include "pattern.h" +#include "memory.h" +#include + +void pattern_list_append(struct pattern_list_builder *b, struct pattern *p) { + struct pattern_list *node = xmalloc(sizeof(struct pattern_list)); + node->elem = p; + node->next = NULL; + if (b->last == NULL) { + b->head = node; + b->last = node; + } + else { + b->last->next = node; + b->last = node; + } +} + +void free_pattern_list(struct pattern_list *l) { + struct pattern_list *tmp; + while (l != NULL) { + tmp = l; + l = l->next; + free_pattern(tmp->elem); + free(tmp); + } +} + +static struct pattern *alloc_pattern(enum pattern_form form) { + struct pattern *p = xmalloc(sizeof(struct pattern)); + p->form = form; + return p; +} + +struct pattern *make_var_pattern(char *name) { + struct pattern *p = alloc_pattern(pattern_var); + p->lexeme = name; + return p; +} +struct pattern *make_int_pattern(char *lexeme) { + struct pattern *p = alloc_pattern(pattern_var); + p->lexeme = lexeme; + return p; +} + +struct pattern *make_bool_pattern(bool b) { + struct pattern *p = alloc_pattern(pattern_var); + p->b = b; + return p; +} + +struct pattern *make_string_pattern(char *lexeme) { + struct pattern *p = alloc_pattern(pattern_var); + p->lexeme = lexeme; + return p; +} + +struct pattern *make_list_pattern(struct pattern_list *l) { + struct pattern *p = alloc_pattern(pattern_list_lit); + p->list = l; + return p; +} + +struct pattern *make_tuple_pattern(struct pattern_list *l) { + struct pattern *p = alloc_pattern(pattern_tuple_lit); + p->tuple = l; + return p; +} + +struct pattern *make_constructor_pattern(char *name, struct pattern_list *args) { + struct pattern *p = alloc_pattern(pattern_tuple_lit); + p->ctor = (struct constructor_pattern) { name, args }; + return p; +} + +void free_pattern(struct pattern *p) { + if(!p) return; + switch (p->form) { + case pattern_var: + free(p->lexeme); + break; + case pattern_int_lit: + free(p->lexeme); + break; + case pattern_bool_lit: + break; + case pattern_string_lit: + free(p->lexeme); + break; + case pattern_list_lit: + free_pattern_list(p->list); + break; + case pattern_tuple_lit: + free_pattern_list(p->list); + break; + case pattern_constructor: + free(p->ctor.name); + free_pattern_list(p->ctor.args); + break; + } + free(p); +} diff --git a/src/type.c b/src/type.c new file mode 100644 index 0000000..bface02 --- /dev/null +++ b/src/type.c @@ -0,0 +1,78 @@ +#include "type.h" +#include "memory.h" +#include + +struct type *make_tuple_type(struct type_list *t) { + struct type *res = xmalloc(sizeof(struct type)); + res->form = tuple_type; + res->tuple = t; + return res; +} + +struct type *make_func_type(struct type *dom, struct type *cod) { + struct type *res = xmalloc(sizeof(struct type)); + res->form = func_type; + res->func = (struct func_type) { dom, cod }; + return res; +} + +struct type *make_var_type(char *name) { + struct type *res = xmalloc(sizeof(struct type)); + res->form = type_var; + res->var_name = name; + return res; +} + +struct type *make_type_name(struct type_list *params, char *name) { + struct type *res = xmalloc(sizeof(struct type)); + res->form = type_name; + res->type_name = (struct type_name) { params, name }; + return res; +} + +void free_type(struct type *t) { + if (!t) return; + + switch (t->form) { + case type_var: + free(t->var_name); + break; + case type_name: + free_type_list(t->type_name.params); + free(t->type_name.name); + break; + case tuple_type: + free_type_list(t->tuple); + break; + case func_type: + free(t->func.dom); + free(t->func.cod); + break; + } + free(t); +} + +void type_list_append(struct type_list_builder *b, struct type *elem) { + struct type_list *t = xmalloc(sizeof(struct type_list)); + t->next = NULL; + t->elem = elem; + if (b->last == NULL) { + b->head = t; + b->last = t; + } + else { + b->last->next = t; + b->last = t; + } +} + +void free_type_list(struct type_list *l) { + struct type_list *tmp; + while (l != NULL) { + free_type(l->elem); + tmp = l; + l = l->next; + free(tmp->elem); + free(tmp); + } +}