parse: implement parsing of arithmetic expressions and data structures

This commit is contained in:
2026-05-12 22:58:48 +01:00
parent 227e73853c
commit c4529d474a
2 changed files with 898 additions and 8 deletions
+883 -8
View File
@@ -1,27 +1,902 @@
#include "../../debug.h"
#include "../../operator.h"
#include "../syntax.h"
#include <fx/queue.h>
enum expr_component {
EXPR_C_NONE = 0,
EXPR_C_OPERAND,
EXPR_C_BINARY_OP,
EXPR_C_UNARY_OP,
};
struct expr_parse_ctx {
fx_queue expr_operator_stack, expr_out_queue;
enum expr_component expr_prev;
unsigned int expr_prev_symbol;
enum operator_precedence expr_minimum_precedence;
bool expr_done, expr_fail;
};
static bool op_node_is_complete(struct op_ast_node *node)
{
if (!node->n_op) {
return false;
}
switch (node->n_op->op_arity) {
case OPA_UNARY:
return node->n_right != NULL;
case OPA_BINARY:
return (node->n_left != NULL && node->n_right != NULL);
default:
return false;
}
}
static bool finalise_expr(
struct expr_parse_ctx *ctx,
struct ast_node **out,
enum operator_precedence minimum_precedence)
{
fx_queue_entry *entry = NULL;
while (true) {
entry = fx_queue_pop_back(&ctx->expr_operator_stack);
if (!entry) {
break;
}
struct op_ast_node *node
= fx_unbox(struct op_ast_node, entry, n_base.n_entry);
if (!node) {
/* this should never happen */
return false;
}
const struct operator_info *op = node->n_op;
/* if we aren't processing operators below a certain precedence
* then leave them on the stack and stop here. */
if (op->op_precedence < minimum_precedence) {
fx_queue_push_back(&ctx->expr_operator_stack, entry);
break;
}
fx_queue_push_back(&ctx->expr_out_queue, entry);
}
fx_queue q = FX_QUEUE_INIT;
fx_queue_entry *tmp = NULL;
entry = fx_queue_first(&ctx->expr_out_queue);
int i = 0;
while (entry) {
struct ast_node *item
= fx_unbox(struct ast_node, entry, n_entry);
fx_queue_entry *next = fx_queue_next(entry);
fx_queue_delete(&ctx->expr_out_queue, entry);
/* if the node is an operand, just push it to a
* temporary queue and come back to it later. */
if (item->n_type != AST_OP) {
/* operand */
fx_queue_push_back(&q, &item->n_entry);
goto next;
}
const struct operator_info *op = NULL;
struct op_ast_node *op_node = (struct op_ast_node *)item;
/* if an operator node is already complete (i.e. it
* already has all the operands it needs, it can be
* pushed to the operand queue as-is */
if (op_node_is_complete(op_node)) {
fx_queue_push_back(&q, &item->n_entry);
goto next;
}
/* otherwise, pop the relevant operands from the operand
* queue... */
op = op_node->n_op;
tmp = fx_queue_pop_back(&q);
op_node->n_right = fx_unbox(struct ast_node, tmp, n_entry);
if (op_node->n_right) {
op_node->n_right->n_parent = (struct ast_node *)op_node;
#if 0
ast_node_extend_bounds_recursive(
(struct ivy_ast_node *)op_node,
(struct ivy_ast_node *)tmp);
#endif
}
if (op->op_arity == OPA_BINARY) {
tmp = fx_queue_pop_back(&q);
op_node->n_left
= fx_unbox(struct ast_node, tmp, n_entry);
if (op_node->n_left) {
op_node->n_left->n_parent
= (struct ast_node *)op_node;
#if 0
ast_node_extend_bounds_recursive(
(struct ivy_ast_node *)op_node,
(struct ivy_ast_node *)tmp);
#endif
}
}
/* ...and push the newly-completed operator node to the
* operand queue */
fx_queue_push_back(&q, &op_node->n_base.n_entry);
next:
entry = next;
}
#if 0
debug_printf("** after hierarchisation:\n");
print_expr_queues(state);
#endif
/* if we are not processing operators below a certain precedence,
* i.e. when determining the recipient of a keyword-message), these
* operators will still be on the parser state's operator stack, but
* their operands have just been moved to the temporary operand stack
* used above. move them back to the parser state's output queue here
* so they can be used later. */
entry = fx_queue_first(&ctx->expr_operator_stack);
while (entry) {
fx_queue_entry *entry2 = fx_queue_pop_front(&q);
if (!entry2) {
return false;
}
fx_queue_push_back(&ctx->expr_out_queue, entry2);
entry = fx_queue_next(entry);
}
#if 0
debug_printf("** after de-linearisation:\n");
print_expr_queues(state);
ivy_ast_node_print(*expr_tree);
debug_printf("------\n");
#endif
/* the final node remaining on the temp operand stack is the
* root node of the new expression tree */
tmp = fx_queue_pop_back(&q);
*out = fx_unbox(struct ast_node, tmp, n_entry);
return true;
}
bool peek_arith_expr(struct parse_ctx *ctx)
{
switch (peek_token_type(ctx)) {
case TOK_SYMBOL:
switch (peek_unknown_symbol(ctx)) {
case SYM_PLUS:
case SYM_HYPHEN:
return true;
default:
return false;
}
return operator_get_by_token(peek_unknown_symbol(ctx));
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_VAR:
case TOK_STR_START:
case TOK_OPERATOR:
return true;
default:
return false;
}
}
bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out)
static bool parse_subexpr(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(`");
}
struct ast_node *v = NULL;
if (!parse_expr(ctx, &v)) {
report_error(ctx, "error while parsing parenthesis expression");
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected `)` after parenthesis expression");
return false;
}
*out = v;
return true;
}
static bool parse_stmt_block(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_DOLLAR_LEFT_PAREN)) {
report_error(ctx, "expected `$(`");
return false;
}
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
*out = ast_node_create(AST_NULL);
return true;
}
struct ast_node *v = NULL;
if (!parse_statement_list(ctx, &v)) {
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected ')' after subexpression");
ast_node_destroy(v);
return false;
}
*out = v;
return true;
}
static bool parse_hashtable(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_AT_LEFT_BRACE)) {
report_error(ctx, "expected `@{`");
return false;
}
parse_linefeed(ctx);
struct hashtable_ast_node *table
= (struct hashtable_ast_node *)ast_node_create(AST_HASHTABLE);
if (!table) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
size_t nr_items = 0;
bool ok = true;
while (ok) {
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
parse_linefeed(ctx);
struct hashtable_item_ast_node *item
= (struct hashtable_item_ast_node *)ast_node_create(
AST_HASHTABLE_ITEM);
struct lex_token *tok = NULL;
if (parse_word(ctx, &tok)) {
struct string_ast_node *v
= (struct string_ast_node *)ast_node_create(
AST_STRING);
v->n_value = tok;
item->n_key = (struct ast_node *)v;
} else if (!parse_arith_value(ctx, &item->n_key)) {
report_error(ctx, "failed to parse hashtable key");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
if (!parse_symbol(ctx, SYM_EQUAL)) {
report_error(ctx, "expected `=` after hashtable key");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
if (!parse_expr(ctx, &item->n_value)) {
report_error(ctx, "failed to parse hashtable value");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
fx_queue_push_back(&table->n_items, &item->n_base.n_entry);
nr_items++;
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
if (!parse_linefeed(ctx) && !parse_symbol(ctx, SYM_SEMICOLON)) {
report_error(
ctx,
"expected `;`, `}`, or linefeed after "
"hashtable value");
ok = false;
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)table);
return false;
}
*out = (struct ast_node *)table;
return true;
}
static bool parse_array(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_AT_LEFT_PAREN)) {
report_error(ctx, "expected `@(`");
return false;
}
struct array_ast_node *array
= (struct array_ast_node *)ast_node_create(AST_ARRAY);
if (!array) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
size_t nr_items = 0;
bool ok = true;
while (ok) {
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
break;
}
if (nr_items && !parse_symbol(ctx, SYM_COMMA)) {
report_error(
ctx,
"expected `,` or `)` after array value");
ok = false;
}
struct ast_node *item = NULL;
if (!parse_arith_value(ctx, &item)) {
report_error(ctx, "failed to parse array item");
ok = false;
break;
}
fx_queue_push_back(&array->n_items, &item->n_entry);
nr_items++;
}
if (!ok) {
ast_node_destroy((struct ast_node *)array);
return false;
}
*out = (struct ast_node *)array;
return true;
}
bool parse_fstring(struct parse_ctx *ctx, struct ast_node **out)
{
if (peek_token_type(ctx) != TOK_STR_START) {
return false;
}
discard_token(ctx);
struct fstring_ast_node *fstring
= (struct fstring_ast_node *)ast_node_create(AST_FSTRING);
if (!fstring) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
bool ok = true;
while (ok) {
if (peek_token_type(ctx) == TOK_STR_END) {
discard_token(ctx);
break;
}
struct ast_node *item = NULL;
if (!parse_arith_value(ctx, &item)) {
ok = false;
break;
}
fx_queue_push_back(&fstring->n_elements, &item->n_entry);
}
if (!ok) {
ast_node_destroy((struct ast_node *)fstring);
fstring = NULL;
}
*out = (struct ast_node *)fstring;
return ok;
}
bool parse_arith_value(struct parse_ctx *ctx, struct ast_node **out)
{
struct lex_token *tok = peek_token(ctx);
switch (tok->tok_type) {
case TOK_INT: {
struct int_ast_node *v
= (struct int_ast_node *)ast_node_create(AST_INT);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_DOUBLE: {
struct double_ast_node *v
= (struct double_ast_node *)ast_node_create(AST_DOUBLE);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_STRING: {
struct string_ast_node *v
= (struct string_ast_node *)ast_node_create(AST_STRING);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_VAR: {
struct var_ast_node *v
= (struct var_ast_node *)ast_node_create(AST_VAR);
v->n_ident = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_STR_START:
return parse_fstring(ctx, out);
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_LEFT_PAREN:
return parse_subexpr(ctx, out);
case SYM_DOLLAR_LEFT_PAREN:
return parse_stmt_block(ctx, out);
case SYM_AT_LEFT_BRACE:
return parse_hashtable(ctx, out);
case SYM_AT_LEFT_PAREN:
return parse_array(ctx, out);
case SYM_LEFT_BRACE:
return parse_block(ctx, out);
default:
report_error(ctx, "token is not a valid operand");
return false;
}
break;
default:
report_error(ctx, "token is not a valid operand");
return false;
}
}
static bool parse_operand(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
if (expr->expr_prev == EXPR_C_OPERAND) {
report_error(ctx, "encountered two operands in a row");
return false;
}
expr->expr_prev = EXPR_C_OPERAND;
struct ast_node *v = NULL;
if (!parse_arith_value(ctx, &v)) {
return false;
}
fx_queue_push_back(&expr->expr_out_queue, &v->n_entry);
return true;
}
void arith_push_operator(struct expr_parse_ctx *state, struct op_ast_node *node)
{
const struct operator_info *op = node->n_op;
if (!op) {
return;
}
while (true) {
fx_queue_entry *top
= fx_queue_last(&state->expr_operator_stack);
if (!top) {
break;
}
struct ast_node *top_node
= fx_unbox(struct ast_node, top, n_entry);
const struct operator_info *top_op = NULL;
switch (top_node->n_type) {
case AST_OP: {
struct op_ast_node *op_node
= (struct op_ast_node *)top_node;
top_op = op_node->n_op;
break;
}
default:
return;
}
if (top_op->op_precedence < op->op_precedence
|| (top_op->op_precedence == op->op_precedence
&& op->op_associativity != ASSOCIATIVITY_LEFT)) {
break;
}
fx_queue_delete(&state->expr_operator_stack, top);
fx_queue_push_back(&state->expr_out_queue, top);
}
fx_queue_push_back(&state->expr_operator_stack, &node->n_base.n_entry);
}
static bool parse_unary_operator(
struct parse_ctx *ctx,
struct expr_parse_ctx *expr)
{
struct lex_token *tok = peek_token(ctx);
const struct operator_info *op = NULL;
switch (tok->tok_type) {
case TOK_SYMBOL:
op = operator_get_by_token(tok->tok_symbol);
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
case TKOP_SPLIT:
op = operator_get_by_id(OP_USPLIT);
break;
case TKOP_JOIN:
op = operator_get_by_id(OP_USPLIT);
break;
default:
op = operator_get_by_token(tok->tok_operator);
break;
}
break;
default:
break;
}
if (expr->expr_prev == EXPR_C_OPERAND
&& op->op_location == OPL_PREFIX) {
report_error(
ctx,
"unexpected operand before unary "
"operator");
return false;
}
if (!op) {
report_error(ctx, "unknown unary operator");
return false;
}
if (op->op_precedence < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
expr->expr_prev = EXPR_C_BINARY_OP;
struct op_ast_node *op_node
= (struct op_ast_node *)ast_node_create(AST_OP);
if (!op_node) {
return false;
}
op_node->n_op = op;
discard_token(ctx);
arith_push_operator(expr, op_node);
return true;
}
static bool parse_binary_operator(
struct parse_ctx *ctx,
struct expr_parse_ctx *expr)
{
struct lex_token *tok = peek_token(ctx);
const struct operator_info *op = NULL;
switch (tok->tok_type) {
case TOK_SYMBOL:
op = operator_get_by_token(tok->tok_symbol);
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
case TKOP_SPLIT:
op = operator_get_by_id(OP_BSPLIT);
break;
case TKOP_JOIN:
op = operator_get_by_id(OP_BJOIN);
break;
default:
op = operator_get_by_token(tok->tok_operator);
break;
}
default:
break;
}
if (!op) {
report_error(ctx, "unknown binary operator");
return false;
}
if (op->op_precedence < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
if (expr->expr_prev != EXPR_C_OPERAND) {
switch (op->op_id) {
case OP_PAREN:
break;
default:
report_error(
ctx,
"expected operand before binary "
"operator");
return false;
}
}
expr->expr_prev = EXPR_C_BINARY_OP;
struct op_ast_node *op_node
= (struct op_ast_node *)ast_node_create(AST_OP);
if (!op_node) {
return false;
}
op_node->n_op = op;
discard_token(ctx);
arith_push_operator(expr, op_node);
return true;
}
static bool parse_call(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
return false;
}
static bool parse_comma(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
if (PRECEDENCE_ARRAY < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
struct ast_node *item = NULL;
if (!finalise_expr(expr, &item, PRECEDENCE_ARRAY)) {
report_error(ctx, "failed to collect first array item.");
return false;
}
struct array_ast_node *array
= (struct array_ast_node *)ast_node_create(AST_ARRAY);
if (!array) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(item);
return false;
}
if (item) {
fx_queue_push_back(&array->n_items, &item->n_entry);
}
while (1) {
if (!parse_symbol(ctx, SYM_COMMA)) {
break;
}
if (!parse_arith_expr(ctx, PRECEDENCE_ARRAY + 1, &item)) {
report_error(ctx, "failed to parse array item.");
ast_node_destroy((struct ast_node *)array);
return false;
}
fx_queue_push_back(&array->n_items, &item->n_entry);
}
fx_queue_push_back(&expr->expr_out_queue, &array->n_base.n_entry);
expr->expr_prev = EXPR_C_OPERAND;
return true;
}
static void dump_expr_ctx(struct expr_parse_ctx *expr)
{
printf("op stack:\n");
fx_queue_entry *entry = fx_queue_first(&expr->expr_operator_stack);
while (entry) {
struct ast_node *node
= fx_unbox(struct ast_node, entry, n_entry);
print_ast_node(node);
entry = fx_queue_next(entry);
}
printf("out queue:\n");
entry = fx_queue_first(&expr->expr_out_queue);
while (entry) {
struct ast_node *node
= fx_unbox(struct ast_node, entry, n_entry);
print_ast_node(node);
entry = fx_queue_next(entry);
}
}
static bool can_use_command(struct expr_parse_ctx *ctx)
{
switch (ctx->expr_prev_symbol) {
case TOK_NONE:
case SYM_EQUAL:
case SYM_PLUS_EQUAL:
case SYM_HYPHEN_EQUAL:
case SYM_ASTERISK_EQUAL:
case SYM_FORWARD_SLASH_EQUAL:
case SYM_PERCENT_EQUAL:
return true;
default:
return false;
}
}
bool parse_arith_expr(
struct parse_ctx *ctx,
enum operator_precedence minimum_precedence,
struct ast_node **out)
{
struct expr_parse_ctx expr = {
.expr_minimum_precedence = minimum_precedence,
};
while (!expr.expr_fail && !expr.expr_done) {
struct lex_token *tok = peek_token(ctx);
if (!tok) {
break;
}
switch (tok->tok_type) {
case TOK_LINEFEED:
expr.expr_done = true;
break;
case TOK_WORD: {
if (!can_use_command(&expr)) {
report_error(
ctx,
"expected a value expression");
expr.expr_fail = true;
break;
}
struct ast_node *value = NULL;
if (!parse_command(ctx, &value)) {
expr.expr_fail = true;
break;
}
fx_queue_push_back(
&expr.expr_out_queue,
&value->n_entry);
break;
}
case TOK_VAR:
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_STR_START:
expr.expr_fail = !parse_operand(ctx, &expr);
expr.expr_prev_symbol = tok->tok_type;
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
/* these two are special cases, as they are both
* unary AND binary operators */
case TKOP_SPLIT:
case TKOP_JOIN:
if (expr.expr_prev == EXPR_C_OPERAND) {
expr.expr_fail = !parse_binary_operator(
ctx,
&expr);
} else {
expr.expr_fail = !parse_unary_operator(
ctx,
&expr);
}
break;
case TKOP_BNOT:
case TKOP_NOT:
expr.expr_fail
= !parse_unary_operator(ctx, &expr);
break;
default:
expr.expr_fail
= !parse_binary_operator(ctx, &expr);
break;
}
expr.expr_prev_symbol = tok->tok_operator;
break;
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_SEMICOLON:
case SYM_AMPERSAND:
case SYM_PIPE:
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
case SYM_RIGHT_BRACKET:
expr.expr_done = true;
break;
case SYM_COMMA:
expr.expr_fail = !parse_comma(ctx, &expr);
break;
case SYM_LEFT_PAREN: {
if (expr.expr_prev == EXPR_C_OPERAND) {
return parse_call(ctx, &expr);
}
struct ast_node *v = NULL;
expr.expr_fail = !parse_subexpr(ctx, &v);
if (expr.expr_fail) {
break;
}
fx_queue_push_back(
&expr.expr_out_queue,
&v->n_entry);
expr.expr_prev = EXPR_C_OPERAND;
break;
}
case SYM_DOLLAR_LEFT_PAREN:
case SYM_AT_LEFT_PAREN:
case SYM_AT_LEFT_BRACE:
expr.expr_fail = !parse_operand(ctx, &expr);
break;
default: {
const struct operator_info *op
= operator_get_by_token(
tok->tok_symbol);
if (op->op_arity == OPA_BINARY) {
expr.expr_fail = !parse_binary_operator(
ctx,
&expr);
} else {
expr.expr_fail = !parse_unary_operator(
ctx,
&expr);
}
break;
}
}
expr.expr_prev_symbol = tok->tok_symbol;
break;
default:
report_error(
ctx,
"unexpected token in arithmetic "
"expression");
expr.expr_fail = true;
break;
}
}
if (expr.expr_fail) {
/* TODO cleanup */
return false;
}
struct ast_node *value = NULL;
if (!finalise_expr(&expr, &value, PRECEDENCE_ASSIGN)) {
report_error(ctx, "failed to convert expression to AST");
/* TODO cleanup */
return false;
}
if (PRECEDENCE_PIPELINE >= expr.expr_minimum_precedence) {
if (peek_symbol(ctx, SYM_PIPE)) {
return parse_pipeline(ctx, value, out);
}
}
*out = value;
return true;
}
+15
View File
@@ -0,0 +1,15 @@
#include "../syntax.h"
bool parse_expr(struct parse_ctx *ctx, struct ast_node **out)
{
bool ok = false;
if (!ok && peek_arith_expr(ctx)) {
ok = parse_arith_expr(ctx, PRECEDENCE_MINIMUM, out);
}
if (!ok && peek_command(ctx)) {
ok = parse_command(ctx, out);
}
return ok;
}