diff --git a/bshell/parse/syntax/arith.c b/bshell/parse/syntax/arith.c index 86a30b5..d17ff52 100644 --- a/bshell/parse/syntax/arith.c +++ b/bshell/parse/syntax/arith.c @@ -1,27 +1,902 @@ +#include "../../debug.h" +#include "../../operator.h" #include "../syntax.h" +#include + +enum expr_component { + EXPR_C_NONE = 0, + EXPR_C_OPERAND, + EXPR_C_BINARY_OP, + EXPR_C_UNARY_OP, +}; + +struct expr_parse_ctx { + fx_queue expr_operator_stack, expr_out_queue; + enum expr_component expr_prev; + unsigned int expr_prev_symbol; + enum operator_precedence expr_minimum_precedence; + bool expr_done, expr_fail; +}; + +static bool op_node_is_complete(struct op_ast_node *node) +{ + if (!node->n_op) { + return false; + } + + switch (node->n_op->op_arity) { + case OPA_UNARY: + return node->n_right != NULL; + case OPA_BINARY: + return (node->n_left != NULL && node->n_right != NULL); + default: + return false; + } +} + +static bool finalise_expr( + struct expr_parse_ctx *ctx, + struct ast_node **out, + enum operator_precedence minimum_precedence) +{ + fx_queue_entry *entry = NULL; + while (true) { + entry = fx_queue_pop_back(&ctx->expr_operator_stack); + if (!entry) { + break; + } + + struct op_ast_node *node + = fx_unbox(struct op_ast_node, entry, n_base.n_entry); + if (!node) { + /* this should never happen */ + return false; + } + + const struct operator_info *op = node->n_op; + + /* if we aren't processing operators below a certain precedence + * then leave them on the stack and stop here. */ + if (op->op_precedence < minimum_precedence) { + fx_queue_push_back(&ctx->expr_operator_stack, entry); + break; + } + + fx_queue_push_back(&ctx->expr_out_queue, entry); + } + + fx_queue q = FX_QUEUE_INIT; + fx_queue_entry *tmp = NULL; + entry = fx_queue_first(&ctx->expr_out_queue); + int i = 0; + + while (entry) { + struct ast_node *item + = fx_unbox(struct ast_node, entry, n_entry); + fx_queue_entry *next = fx_queue_next(entry); + fx_queue_delete(&ctx->expr_out_queue, entry); + + /* if the node is an operand, just push it to a + * temporary queue and come back to it later. */ + if (item->n_type != AST_OP) { + /* operand */ + fx_queue_push_back(&q, &item->n_entry); + goto next; + } + + const struct operator_info *op = NULL; + + struct op_ast_node *op_node = (struct op_ast_node *)item; + + /* if an operator node is already complete (i.e. it + * already has all the operands it needs, it can be + * pushed to the operand queue as-is */ + if (op_node_is_complete(op_node)) { + fx_queue_push_back(&q, &item->n_entry); + goto next; + } + + /* otherwise, pop the relevant operands from the operand + * queue... */ + op = op_node->n_op; + tmp = fx_queue_pop_back(&q); + op_node->n_right = fx_unbox(struct ast_node, tmp, n_entry); + + if (op_node->n_right) { + op_node->n_right->n_parent = (struct ast_node *)op_node; + +#if 0 + ast_node_extend_bounds_recursive( + (struct ivy_ast_node *)op_node, + (struct ivy_ast_node *)tmp); +#endif + } + + if (op->op_arity == OPA_BINARY) { + tmp = fx_queue_pop_back(&q); + op_node->n_left + = fx_unbox(struct ast_node, tmp, n_entry); + + if (op_node->n_left) { + op_node->n_left->n_parent + = (struct ast_node *)op_node; + +#if 0 + ast_node_extend_bounds_recursive( + (struct ivy_ast_node *)op_node, + (struct ivy_ast_node *)tmp); +#endif + } + } + + /* ...and push the newly-completed operator node to the + * operand queue */ + fx_queue_push_back(&q, &op_node->n_base.n_entry); + next: + entry = next; + } + +#if 0 + debug_printf("** after hierarchisation:\n"); + print_expr_queues(state); +#endif + + /* if we are not processing operators below a certain precedence, + * i.e. when determining the recipient of a keyword-message), these + * operators will still be on the parser state's operator stack, but + * their operands have just been moved to the temporary operand stack + * used above. move them back to the parser state's output queue here + * so they can be used later. */ + entry = fx_queue_first(&ctx->expr_operator_stack); + while (entry) { + fx_queue_entry *entry2 = fx_queue_pop_front(&q); + if (!entry2) { + return false; + } + + fx_queue_push_back(&ctx->expr_out_queue, entry2); + entry = fx_queue_next(entry); + } + +#if 0 + debug_printf("** after de-linearisation:\n"); + print_expr_queues(state); + ivy_ast_node_print(*expr_tree); + debug_printf("------\n"); +#endif + + /* the final node remaining on the temp operand stack is the + * root node of the new expression tree */ + tmp = fx_queue_pop_back(&q); + *out = fx_unbox(struct ast_node, tmp, n_entry); + + return true; +} + bool peek_arith_expr(struct parse_ctx *ctx) { switch (peek_token_type(ctx)) { case TOK_SYMBOL: - switch (peek_unknown_symbol(ctx)) { - case SYM_PLUS: - case SYM_HYPHEN: - return true; - default: - return false; - } + return operator_get_by_token(peek_unknown_symbol(ctx)); case TOK_INT: case TOK_DOUBLE: case TOK_STRING: + case TOK_VAR: case TOK_STR_START: + case TOK_OPERATOR: return true; default: return false; } } -bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out) +static bool parse_subexpr(struct parse_ctx *ctx, struct ast_node **out) +{ + if (!parse_symbol(ctx, SYM_LEFT_PAREN)) { + report_error(ctx, "expected `(`"); + } + + struct ast_node *v = NULL; + if (!parse_expr(ctx, &v)) { + report_error(ctx, "error while parsing parenthesis expression"); + return false; + } + + if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) { + report_error(ctx, "expected `)` after parenthesis expression"); + return false; + } + + *out = v; + return true; +} + +static bool parse_stmt_block(struct parse_ctx *ctx, struct ast_node **out) +{ + if (!parse_symbol(ctx, SYM_DOLLAR_LEFT_PAREN)) { + report_error(ctx, "expected `$(`"); + return false; + } + + if (parse_symbol(ctx, SYM_RIGHT_PAREN)) { + *out = ast_node_create(AST_NULL); + return true; + } + + struct ast_node *v = NULL; + if (!parse_statement_list(ctx, &v)) { + return false; + } + + if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) { + report_error(ctx, "expected ')' after subexpression"); + ast_node_destroy(v); + return false; + } + + *out = v; + return true; +} + +static bool parse_hashtable(struct parse_ctx *ctx, struct ast_node **out) +{ + if (!parse_symbol(ctx, SYM_AT_LEFT_BRACE)) { + report_error(ctx, "expected `@{`"); + return false; + } + + parse_linefeed(ctx); + + struct hashtable_ast_node *table + = (struct hashtable_ast_node *)ast_node_create(AST_HASHTABLE); + if (!table) { + ctx->p_status = BSHELL_ERR_NO_MEMORY; + return false; + } + + size_t nr_items = 0; + bool ok = true; + while (ok) { + if (parse_symbol(ctx, SYM_RIGHT_BRACE)) { + break; + } + + parse_linefeed(ctx); + + struct hashtable_item_ast_node *item + = (struct hashtable_item_ast_node *)ast_node_create( + AST_HASHTABLE_ITEM); + struct lex_token *tok = NULL; + if (parse_word(ctx, &tok)) { + struct string_ast_node *v + = (struct string_ast_node *)ast_node_create( + AST_STRING); + v->n_value = tok; + item->n_key = (struct ast_node *)v; + } else if (!parse_arith_value(ctx, &item->n_key)) { + report_error(ctx, "failed to parse hashtable key"); + ast_node_destroy((struct ast_node *)item); + ok = false; + break; + } + + if (!parse_symbol(ctx, SYM_EQUAL)) { + report_error(ctx, "expected `=` after hashtable key"); + ast_node_destroy((struct ast_node *)item); + ok = false; + break; + } + + if (!parse_expr(ctx, &item->n_value)) { + report_error(ctx, "failed to parse hashtable value"); + ast_node_destroy((struct ast_node *)item); + ok = false; + break; + } + + fx_queue_push_back(&table->n_items, &item->n_base.n_entry); + nr_items++; + + if (parse_symbol(ctx, SYM_RIGHT_BRACE)) { + break; + } + + if (!parse_linefeed(ctx) && !parse_symbol(ctx, SYM_SEMICOLON)) { + report_error( + ctx, + "expected `;`, `}`, or linefeed after " + "hashtable value"); + ok = false; + break; + } + } + + if (!ok) { + ast_node_destroy((struct ast_node *)table); + return false; + } + + *out = (struct ast_node *)table; + return true; +} + +static bool parse_array(struct parse_ctx *ctx, struct ast_node **out) +{ + if (!parse_symbol(ctx, SYM_AT_LEFT_PAREN)) { + report_error(ctx, "expected `@(`"); + return false; + } + + struct array_ast_node *array + = (struct array_ast_node *)ast_node_create(AST_ARRAY); + if (!array) { + ctx->p_status = BSHELL_ERR_NO_MEMORY; + return false; + } + + size_t nr_items = 0; + bool ok = true; + while (ok) { + if (parse_symbol(ctx, SYM_RIGHT_PAREN)) { + break; + } + + if (nr_items && !parse_symbol(ctx, SYM_COMMA)) { + report_error( + ctx, + "expected `,` or `)` after array value"); + ok = false; + } + + struct ast_node *item = NULL; + if (!parse_arith_value(ctx, &item)) { + report_error(ctx, "failed to parse array item"); + ok = false; + break; + } + + fx_queue_push_back(&array->n_items, &item->n_entry); + nr_items++; + } + + if (!ok) { + ast_node_destroy((struct ast_node *)array); + return false; + } + + *out = (struct ast_node *)array; + return true; +} + +bool parse_fstring(struct parse_ctx *ctx, struct ast_node **out) +{ + if (peek_token_type(ctx) != TOK_STR_START) { + return false; + } + + discard_token(ctx); + + struct fstring_ast_node *fstring + = (struct fstring_ast_node *)ast_node_create(AST_FSTRING); + if (!fstring) { + ctx->p_status = BSHELL_ERR_NO_MEMORY; + return false; + } + + bool ok = true; + while (ok) { + if (peek_token_type(ctx) == TOK_STR_END) { + discard_token(ctx); + break; + } + + struct ast_node *item = NULL; + if (!parse_arith_value(ctx, &item)) { + ok = false; + break; + } + + fx_queue_push_back(&fstring->n_elements, &item->n_entry); + } + + if (!ok) { + ast_node_destroy((struct ast_node *)fstring); + fstring = NULL; + } + + *out = (struct ast_node *)fstring; + return ok; +} + +bool parse_arith_value(struct parse_ctx *ctx, struct ast_node **out) +{ + struct lex_token *tok = peek_token(ctx); + switch (tok->tok_type) { + case TOK_INT: { + struct int_ast_node *v + = (struct int_ast_node *)ast_node_create(AST_INT); + v->n_value = claim_token(ctx); + *out = (struct ast_node *)v; + return true; + } + case TOK_DOUBLE: { + struct double_ast_node *v + = (struct double_ast_node *)ast_node_create(AST_DOUBLE); + v->n_value = claim_token(ctx); + *out = (struct ast_node *)v; + return true; + } + case TOK_STRING: { + struct string_ast_node *v + = (struct string_ast_node *)ast_node_create(AST_STRING); + v->n_value = claim_token(ctx); + *out = (struct ast_node *)v; + return true; + } + case TOK_VAR: { + struct var_ast_node *v + = (struct var_ast_node *)ast_node_create(AST_VAR); + v->n_ident = claim_token(ctx); + *out = (struct ast_node *)v; + return true; + } + case TOK_STR_START: + return parse_fstring(ctx, out); + case TOK_SYMBOL: + switch (tok->tok_symbol) { + case SYM_LEFT_PAREN: + return parse_subexpr(ctx, out); + case SYM_DOLLAR_LEFT_PAREN: + return parse_stmt_block(ctx, out); + case SYM_AT_LEFT_BRACE: + return parse_hashtable(ctx, out); + case SYM_AT_LEFT_PAREN: + return parse_array(ctx, out); + case SYM_LEFT_BRACE: + return parse_block(ctx, out); + default: + report_error(ctx, "token is not a valid operand"); + return false; + } + break; + default: + report_error(ctx, "token is not a valid operand"); + return false; + } +} + +static bool parse_operand(struct parse_ctx *ctx, struct expr_parse_ctx *expr) +{ + if (expr->expr_prev == EXPR_C_OPERAND) { + report_error(ctx, "encountered two operands in a row"); + return false; + } + + expr->expr_prev = EXPR_C_OPERAND; + + struct ast_node *v = NULL; + if (!parse_arith_value(ctx, &v)) { + return false; + } + + fx_queue_push_back(&expr->expr_out_queue, &v->n_entry); + return true; +} + +void arith_push_operator(struct expr_parse_ctx *state, struct op_ast_node *node) +{ + const struct operator_info *op = node->n_op; + if (!op) { + return; + } + + while (true) { + fx_queue_entry *top + = fx_queue_last(&state->expr_operator_stack); + if (!top) { + break; + } + + struct ast_node *top_node + = fx_unbox(struct ast_node, top, n_entry); + const struct operator_info *top_op = NULL; + + switch (top_node->n_type) { + case AST_OP: { + struct op_ast_node *op_node + = (struct op_ast_node *)top_node; + top_op = op_node->n_op; + break; + } + default: + return; + } + + if (top_op->op_precedence < op->op_precedence + || (top_op->op_precedence == op->op_precedence + && op->op_associativity != ASSOCIATIVITY_LEFT)) { + break; + } + + fx_queue_delete(&state->expr_operator_stack, top); + fx_queue_push_back(&state->expr_out_queue, top); + } + + fx_queue_push_back(&state->expr_operator_stack, &node->n_base.n_entry); +} + +static bool parse_unary_operator( + struct parse_ctx *ctx, + struct expr_parse_ctx *expr) +{ + struct lex_token *tok = peek_token(ctx); + + const struct operator_info *op = NULL; + switch (tok->tok_type) { + case TOK_SYMBOL: + op = operator_get_by_token(tok->tok_symbol); + break; + case TOK_OPERATOR: + switch (tok->tok_operator) { + case TKOP_SPLIT: + op = operator_get_by_id(OP_USPLIT); + break; + case TKOP_JOIN: + op = operator_get_by_id(OP_USPLIT); + break; + default: + op = operator_get_by_token(tok->tok_operator); + break; + } + break; + default: + break; + } + + if (expr->expr_prev == EXPR_C_OPERAND + && op->op_location == OPL_PREFIX) { + report_error( + ctx, + "unexpected operand before unary " + "operator"); + return false; + } + + if (!op) { + report_error(ctx, "unknown unary operator"); + return false; + } + + if (op->op_precedence < expr->expr_minimum_precedence) { + expr->expr_done = true; + return true; + } + + expr->expr_prev = EXPR_C_BINARY_OP; + + struct op_ast_node *op_node + = (struct op_ast_node *)ast_node_create(AST_OP); + if (!op_node) { + return false; + } + + op_node->n_op = op; + discard_token(ctx); + arith_push_operator(expr, op_node); + return true; +} + +static bool parse_binary_operator( + struct parse_ctx *ctx, + struct expr_parse_ctx *expr) +{ + struct lex_token *tok = peek_token(ctx); + + const struct operator_info *op = NULL; + switch (tok->tok_type) { + case TOK_SYMBOL: + op = operator_get_by_token(tok->tok_symbol); + break; + case TOK_OPERATOR: + switch (tok->tok_operator) { + case TKOP_SPLIT: + op = operator_get_by_id(OP_BSPLIT); + break; + case TKOP_JOIN: + op = operator_get_by_id(OP_BJOIN); + break; + default: + op = operator_get_by_token(tok->tok_operator); + break; + } + default: + break; + } + + if (!op) { + report_error(ctx, "unknown binary operator"); + return false; + } + + if (op->op_precedence < expr->expr_minimum_precedence) { + expr->expr_done = true; + return true; + } + + if (expr->expr_prev != EXPR_C_OPERAND) { + switch (op->op_id) { + case OP_PAREN: + break; + default: + report_error( + ctx, + "expected operand before binary " + "operator"); + return false; + } + } + + expr->expr_prev = EXPR_C_BINARY_OP; + + struct op_ast_node *op_node + = (struct op_ast_node *)ast_node_create(AST_OP); + if (!op_node) { + return false; + } + + op_node->n_op = op; + discard_token(ctx); + arith_push_operator(expr, op_node); + return true; +} + +static bool parse_call(struct parse_ctx *ctx, struct expr_parse_ctx *expr) { return false; } + +static bool parse_comma(struct parse_ctx *ctx, struct expr_parse_ctx *expr) +{ + if (PRECEDENCE_ARRAY < expr->expr_minimum_precedence) { + expr->expr_done = true; + return true; + } + + struct ast_node *item = NULL; + if (!finalise_expr(expr, &item, PRECEDENCE_ARRAY)) { + report_error(ctx, "failed to collect first array item."); + return false; + } + + struct array_ast_node *array + = (struct array_ast_node *)ast_node_create(AST_ARRAY); + if (!array) { + ctx->p_status = BSHELL_ERR_NO_MEMORY; + ast_node_destroy(item); + return false; + } + + if (item) { + fx_queue_push_back(&array->n_items, &item->n_entry); + } + + while (1) { + if (!parse_symbol(ctx, SYM_COMMA)) { + break; + } + + if (!parse_arith_expr(ctx, PRECEDENCE_ARRAY + 1, &item)) { + report_error(ctx, "failed to parse array item."); + ast_node_destroy((struct ast_node *)array); + return false; + } + + fx_queue_push_back(&array->n_items, &item->n_entry); + } + + fx_queue_push_back(&expr->expr_out_queue, &array->n_base.n_entry); + expr->expr_prev = EXPR_C_OPERAND; + + return true; +} + +static void dump_expr_ctx(struct expr_parse_ctx *expr) +{ + printf("op stack:\n"); + fx_queue_entry *entry = fx_queue_first(&expr->expr_operator_stack); + while (entry) { + struct ast_node *node + = fx_unbox(struct ast_node, entry, n_entry); + print_ast_node(node); + entry = fx_queue_next(entry); + } + + printf("out queue:\n"); + entry = fx_queue_first(&expr->expr_out_queue); + while (entry) { + struct ast_node *node + = fx_unbox(struct ast_node, entry, n_entry); + print_ast_node(node); + entry = fx_queue_next(entry); + } +} + +static bool can_use_command(struct expr_parse_ctx *ctx) +{ + switch (ctx->expr_prev_symbol) { + case TOK_NONE: + case SYM_EQUAL: + case SYM_PLUS_EQUAL: + case SYM_HYPHEN_EQUAL: + case SYM_ASTERISK_EQUAL: + case SYM_FORWARD_SLASH_EQUAL: + case SYM_PERCENT_EQUAL: + return true; + default: + return false; + } +} + +bool parse_arith_expr( + struct parse_ctx *ctx, + enum operator_precedence minimum_precedence, + struct ast_node **out) +{ + struct expr_parse_ctx expr = { + .expr_minimum_precedence = minimum_precedence, + }; + + while (!expr.expr_fail && !expr.expr_done) { + struct lex_token *tok = peek_token(ctx); + if (!tok) { + break; + } + + switch (tok->tok_type) { + case TOK_LINEFEED: + expr.expr_done = true; + break; + case TOK_WORD: { + if (!can_use_command(&expr)) { + report_error( + ctx, + "expected a value expression"); + expr.expr_fail = true; + break; + } + + struct ast_node *value = NULL; + if (!parse_command(ctx, &value)) { + expr.expr_fail = true; + break; + } + + fx_queue_push_back( + &expr.expr_out_queue, + &value->n_entry); + break; + } + + case TOK_VAR: + case TOK_INT: + case TOK_DOUBLE: + case TOK_STRING: + case TOK_STR_START: + expr.expr_fail = !parse_operand(ctx, &expr); + expr.expr_prev_symbol = tok->tok_type; + break; + case TOK_OPERATOR: + switch (tok->tok_operator) { + /* these two are special cases, as they are both + * unary AND binary operators */ + case TKOP_SPLIT: + case TKOP_JOIN: + if (expr.expr_prev == EXPR_C_OPERAND) { + expr.expr_fail = !parse_binary_operator( + ctx, + &expr); + } else { + expr.expr_fail = !parse_unary_operator( + ctx, + &expr); + } + + break; + case TKOP_BNOT: + case TKOP_NOT: + expr.expr_fail + = !parse_unary_operator(ctx, &expr); + break; + default: + expr.expr_fail + = !parse_binary_operator(ctx, &expr); + break; + } + expr.expr_prev_symbol = tok->tok_operator; + break; + case TOK_SYMBOL: + switch (tok->tok_symbol) { + case SYM_SEMICOLON: + case SYM_AMPERSAND: + case SYM_PIPE: + case SYM_RIGHT_PAREN: + case SYM_RIGHT_BRACE: + case SYM_RIGHT_BRACKET: + expr.expr_done = true; + break; + case SYM_COMMA: + expr.expr_fail = !parse_comma(ctx, &expr); + break; + case SYM_LEFT_PAREN: { + if (expr.expr_prev == EXPR_C_OPERAND) { + return parse_call(ctx, &expr); + } + + struct ast_node *v = NULL; + expr.expr_fail = !parse_subexpr(ctx, &v); + if (expr.expr_fail) { + break; + } + + fx_queue_push_back( + &expr.expr_out_queue, + &v->n_entry); + expr.expr_prev = EXPR_C_OPERAND; + break; + } + case SYM_DOLLAR_LEFT_PAREN: + case SYM_AT_LEFT_PAREN: + case SYM_AT_LEFT_BRACE: + expr.expr_fail = !parse_operand(ctx, &expr); + break; + default: { + const struct operator_info *op + = operator_get_by_token( + tok->tok_symbol); + if (op->op_arity == OPA_BINARY) { + expr.expr_fail = !parse_binary_operator( + ctx, + &expr); + } else { + expr.expr_fail = !parse_unary_operator( + ctx, + &expr); + } + break; + } + } + + expr.expr_prev_symbol = tok->tok_symbol; + break; + default: + report_error( + ctx, + "unexpected token in arithmetic " + "expression"); + expr.expr_fail = true; + break; + } + } + + if (expr.expr_fail) { + /* TODO cleanup */ + return false; + } + + struct ast_node *value = NULL; + if (!finalise_expr(&expr, &value, PRECEDENCE_ASSIGN)) { + report_error(ctx, "failed to convert expression to AST"); + /* TODO cleanup */ + return false; + } + + if (PRECEDENCE_PIPELINE >= expr.expr_minimum_precedence) { + if (peek_symbol(ctx, SYM_PIPE)) { + return parse_pipeline(ctx, value, out); + } + } + + *out = value; + return true; +} diff --git a/bshell/parse/syntax/expr.c b/bshell/parse/syntax/expr.c new file mode 100644 index 0000000..60d2fe6 --- /dev/null +++ b/bshell/parse/syntax/expr.c @@ -0,0 +1,15 @@ +#include "../syntax.h" + +bool parse_expr(struct parse_ctx *ctx, struct ast_node **out) +{ + bool ok = false; + if (!ok && peek_arith_expr(ctx)) { + ok = parse_arith_expr(ctx, PRECEDENCE_MINIMUM, out); + } + + if (!ok && peek_command(ctx)) { + ok = parse_command(ctx, out); + } + + return ok; +}