diff --git a/bshell/operator.c b/bshell/operator.c new file mode 100644 index 0000000..02ca884 --- /dev/null +++ b/bshell/operator.c @@ -0,0 +1,264 @@ +#include "operator.h" + +#include "parse/token.h" + +#define OP(id, p, a, l, u) \ + [OP_##id] = { \ + .op_id = (OP_##id), \ + .op_precedence = (PRECEDENCE_##p), \ + .op_associativity = (ASSOCIATIVITY_##a), \ + .op_location = (OPL_##l), \ + .op_arity = (OPA_##u), \ + } + +#define TOK_OP(id, tok) [TOK_##tok - __TOK_INDEX_BASE] = &operators[OP_##id] +#define SYM_OP(id, sym) [SYM_##sym - __SYM_INDEX_BASE] = &operators[OP_##id] +#define KW_OP(id, kw) [KW_##kw - __KW_INDEX_BASE] = &operators[OP_##id] +#define TKOP_OP(id, kw) [TKOP_##kw - __TKOP_INDEX_BASE] = &operators[OP_##id] + +/* clang-format off */ +static const struct operator_info operators[] = { + OP(SUBEXPR, PARENTHESIS, LEFT, PREFIX, UNARY), + OP(ARRAY_START, PARENTHESIS, LEFT, PREFIX, UNARY), + OP(PAREN, PARENTHESIS, LEFT, PREFIX, UNARY), + OP(HASHTABLE_START, PARENTHESIS, LEFT, PREFIX, UNARY), + OP(ACCESS, MEMBER_ACCESS, LEFT, INFIX, BINARY), + OP(CONDITIONAL_ACCESS, MEMBER_ACCESS, LEFT, INFIX, BINARY), + OP(STATIC_ACCESS, STATIC_ACCESS, LEFT, INFIX, BINARY), + OP(SUBSCRIPT, SUBSCRIPT, LEFT, INFIX, BINARY), + OP(CONDITIONAL_SUBSCRIPT, SUBSCRIPT, LEFT, INFIX, BINARY), + OP(CAST, CAST, LEFT, PREFIX, UNARY), + OP(USPLIT, SPLIT, LEFT, PREFIX, UNARY), + OP(UJOIN, JOIN, LEFT, PREFIX, UNARY), + OP(ARRAY_DELIMITER, ARRAY, LEFT, INFIX, BINARY), + OP(INCREMENT, INCREMENT, LEFT, INFIX, BINARY), + OP(LOGICAL_NOT, NOT, LEFT, PREFIX, UNARY), + OP(RANGE, RANGE, LEFT, INFIX, BINARY), + OP(FORMAT, FORMAT, LEFT, INFIX, BINARY), + OP(MULTIPLY, MULTIPLICATION, LEFT, INFIX, BINARY), + OP(DIVIDE, MULTIPLICATION, LEFT, INFIX, BINARY), + OP(MODULO, MULTIPLICATION, LEFT, INFIX, BINARY), + OP(ADD, ADDITION, LEFT, INFIX, BINARY), + OP(SUBTRACT, ADDITION, LEFT, INFIX, BINARY), + + OP(BSPLIT, COMPARISON, LEFT, INFIX, BINARY), + OP(BJOIN, COMPARISON, LEFT, INFIX, BINARY), + OP(IS, COMPARISON, LEFT, INFIX, BINARY), + OP(ISNOT, COMPARISON, LEFT, INFIX, BINARY), + OP(AS, COMPARISON, LEFT, INFIX, BINARY), + OP(EQUAL, COMPARISON, LEFT, INFIX, BINARY), + OP(NOT_EQUAL, COMPARISON, LEFT, INFIX, BINARY), + OP(GREATER_THAN, COMPARISON, LEFT, INFIX, BINARY), + OP(LESS_THAN, COMPARISON, LEFT, INFIX, BINARY), + OP(GREATER_EQUAL, COMPARISON, LEFT, INFIX, BINARY), + OP(LESS_EQUAL, COMPARISON, LEFT, INFIX, BINARY), + OP(LIKE, COMPARISON, LEFT, INFIX, BINARY), + OP(NOTLIKE, COMPARISON, LEFT, INFIX, BINARY), + OP(MATCH, COMPARISON, LEFT, INFIX, BINARY), + OP(NOTMATCH, COMPARISON, LEFT, INFIX, BINARY), + OP(IN, COMPARISON, LEFT, INFIX, BINARY), + OP(NOTIN, COMPARISON, LEFT, INFIX, BINARY), + OP(CONTAINS, COMPARISON, LEFT, INFIX, BINARY), + OP(NOTCONTAINS, COMPARISON, LEFT, INFIX, BINARY), + OP(REPLACE, COMPARISON, LEFT, INFIX, BINARY), + + OP(LOGICAL_AND, LOGICAL, LEFT, INFIX, BINARY), + OP(LOGICAL_OR, LOGICAL, LEFT, INFIX, BINARY), + OP(LOGICAL_XOR, LOGICAL, LEFT, INFIX, BINARY), + + OP(BINARY_AND, BITWISE, LEFT, INFIX, BINARY), + OP(BINARY_OR, BITWISE, LEFT, INFIX, BINARY), + OP(BINARY_NOT, BITWISE, LEFT, INFIX, BINARY), + OP(BINARY_XOR, BITWISE, LEFT, INFIX, BINARY), + OP(LEFT_SHIFT, BITWISE, LEFT, INFIX, BINARY), + OP(RIGHT_SHIFT, BITWISE, LEFT, INFIX, BINARY), + + OP(ASSIGN, ASSIGN, LEFT, INFIX, BINARY), + OP(ADD_ASSIGN, ASSIGN, LEFT, INFIX, BINARY), + OP(SUBTRACT_ASSIGN, ASSIGN, LEFT, INFIX, BINARY), + OP(MULTIPLY_ASSIGN, ASSIGN, LEFT, INFIX, BINARY), + OP(DIVIDE_ASSIGN, ASSIGN, LEFT, INFIX, BINARY), + OP(MODULO_ASSIGN, ASSIGN, LEFT, INFIX, BINARY), +}; +static const size_t nr_operators = sizeof operators / sizeof operators[0]; + +static const struct operator_info *operator_symbols[] = { + SYM_OP(LOGICAL_NOT, BANG), + SYM_OP(ASSIGN, EQUAL), + SYM_OP(ADD, PLUS), + SYM_OP(SUBTRACT, HYPHEN), + SYM_OP(MULTIPLY, ASTERISK), + SYM_OP(DIVIDE, FORWARD_SLASH), + SYM_OP(MODULO, PERCENT), + SYM_OP(ADD_ASSIGN, PLUS_EQUAL), + SYM_OP(SUBTRACT_ASSIGN, HYPHEN_EQUAL), + SYM_OP(MULTIPLY_ASSIGN, ASTERISK_EQUAL), + SYM_OP(DIVIDE_ASSIGN, FORWARD_SLASH_EQUAL), + SYM_OP(MODULO_ASSIGN, PERCENT_EQUAL), + SYM_OP(RANGE, DOT_DOT), + SYM_OP(SUBSCRIPT, LEFT_BRACKET), + SYM_OP(CONDITIONAL_SUBSCRIPT, QUESTION_LEFT_BRACKET), + + SYM_OP(ACCESS, DOT), + SYM_OP(CONDITIONAL_ACCESS, QUESTION_DOT), + SYM_OP(STATIC_ACCESS, COLON_COLON), + + /* parser-internal pseudo-operators. */ + + /* CAST uses the same symbol as SUBSCRIPT */ + /* SYM_OP(CAST, LEFT_BRACKET), */ + SYM_OP(SUBEXPR, DOLLAR_LEFT_PAREN), + SYM_OP(PAREN, LEFT_PAREN), + SYM_OP(ARRAY_START, AT_LEFT_PAREN), + SYM_OP(HASHTABLE_START, AT_LEFT_BRACE), +}; +static const size_t nr_operator_symbols = sizeof operator_symbols / sizeof operator_symbols[0]; + +static const struct operator_info *operator_token_ops[] = { + TKOP_OP(FORMAT, F), + TKOP_OP(BINARY_AND, BAND), + TKOP_OP(BINARY_OR, BOR), + TKOP_OP(BINARY_XOR, BXOR), + TKOP_OP(BINARY_NOT, BNOT), + TKOP_OP(LEFT_SHIFT, SHL), + TKOP_OP(RIGHT_SHIFT, SHR), + TKOP_OP(EQUAL, EQ), + TKOP_OP(NOT_EQUAL, NE), + TKOP_OP(GREATER_THAN, GT), + TKOP_OP(LESS_THAN, LT), + TKOP_OP(GREATER_EQUAL, GE), + TKOP_OP(LESS_EQUAL, LE), + TKOP_OP(MATCH, MATCH), + TKOP_OP(NOTMATCH, NOTMATCH), + TKOP_OP(REPLACE, REPLACE), + TKOP_OP(LIKE, LIKE), + TKOP_OP(NOTLIKE, NOTLIKE), + TKOP_OP(IN, IN), + TKOP_OP(NOTIN, NOTIN), + TKOP_OP(CONTAINS, CONTAINS), + TKOP_OP(NOTCONTAINS, NOTCONTAINS), + TKOP_OP(LOGICAL_AND, AND), + TKOP_OP(LOGICAL_OR, OR), + TKOP_OP(LOGICAL_XOR, XOR), + TKOP_OP(LOGICAL_NOT, NOT), + /* there are also unary versions of these operators */ + TKOP_OP(BSPLIT, SPLIT), + TKOP_OP(BJOIN, JOIN), + + TKOP_OP(IS, IS), + TKOP_OP(ISNOT, ISNOT), + TKOP_OP(AS, AS), +}; +static const size_t nr_operator_token_ops = sizeof operator_token_ops / sizeof operator_token_ops[0]; + +/* clang-format on */ + +const struct operator_info *operator_get_by_token(unsigned int token) +{ + const struct operator_info **op_list = NULL; + size_t base = 0; + size_t op_list_size = 0; + + if (token > __TKOP_INDEX_BASE && token < __TKOP_INDEX_LIMIT) { + op_list = operator_token_ops; + base = __TKOP_INDEX_BASE; + op_list_size = nr_operator_token_ops; + } else if (token > __SYM_INDEX_BASE && token < __SYM_INDEX_LIMIT) { + op_list = operator_symbols; + base = __SYM_INDEX_BASE; + op_list_size = nr_operator_symbols; + } else { + return NULL; + } + + if (token - base >= op_list_size) { + return NULL; + } + + return op_list[token - base]; +} + +const struct operator_info *operator_get_by_id(enum operator_id id) +{ + if (id >= nr_operators) { + return NULL; + } + + const struct operator_info *op = &operators[id]; + if (op->op_id != id) { + return NULL; + } + + return op; +} + +#define ENUM_STR(x) \ + case x: \ + return #x + +const char *operator_id_to_string(enum operator_id op) +{ + switch (op) { + ENUM_STR(OP_NONE); + ENUM_STR(OP_ADD); + ENUM_STR(OP_SUBTRACT); + ENUM_STR(OP_MULTIPLY); + ENUM_STR(OP_DIVIDE); + ENUM_STR(OP_MODULO); + ENUM_STR(OP_INCREMENT); + ENUM_STR(OP_DECREMENT); + ENUM_STR(OP_LEFT_SHIFT); + ENUM_STR(OP_RIGHT_SHIFT); + ENUM_STR(OP_BINARY_AND); + ENUM_STR(OP_BINARY_OR); + ENUM_STR(OP_BINARY_XOR); + ENUM_STR(OP_BINARY_NOT); + ENUM_STR(OP_LESS_THAN); + ENUM_STR(OP_GREATER_THAN); + ENUM_STR(OP_EQUAL); + ENUM_STR(OP_NOT_EQUAL); + ENUM_STR(OP_LESS_EQUAL); + ENUM_STR(OP_GREATER_EQUAL); + ENUM_STR(OP_ASSIGN); + ENUM_STR(OP_ADD_ASSIGN); + ENUM_STR(OP_SUBTRACT_ASSIGN); + ENUM_STR(OP_MULTIPLY_ASSIGN); + ENUM_STR(OP_DIVIDE_ASSIGN); + ENUM_STR(OP_MODULO_ASSIGN); + ENUM_STR(OP_LOGICAL_AND); + ENUM_STR(OP_LOGICAL_OR); + ENUM_STR(OP_LOGICAL_XOR); + ENUM_STR(OP_LOGICAL_NOT); + ENUM_STR(OP_RANGE); + ENUM_STR(OP_MATCH); + ENUM_STR(OP_NOTMATCH); + ENUM_STR(OP_REPLACE); + ENUM_STR(OP_LIKE); + ENUM_STR(OP_NOTLIKE); + ENUM_STR(OP_IN); + ENUM_STR(OP_NOTIN); + ENUM_STR(OP_FORMAT); + ENUM_STR(OP_CONTAINS); + ENUM_STR(OP_NOTCONTAINS); + ENUM_STR(OP_USPLIT); + ENUM_STR(OP_BSPLIT); + ENUM_STR(OP_UJOIN); + ENUM_STR(OP_BJOIN); + ENUM_STR(OP_IS); + ENUM_STR(OP_ISNOT); + ENUM_STR(OP_AS); + ENUM_STR(OP_SUBSCRIPT); + ENUM_STR(OP_CONDITIONAL_SUBSCRIPT); + ENUM_STR(OP_ARRAY_DELIMITER); + ENUM_STR(OP_ACCESS); + ENUM_STR(OP_STATIC_ACCESS); + ENUM_STR(OP_CONDITIONAL_ACCESS); + ENUM_STR(OP_CAST); + ENUM_STR(OP_SUBEXPR); + ENUM_STR(OP_PAREN); + ENUM_STR(OP_ARRAY_START); + ENUM_STR(OP_HASHTABLE_START); + default: + return ""; + } +} diff --git a/bshell/operator.h b/bshell/operator.h new file mode 100644 index 0000000..7aba914 --- /dev/null +++ b/bshell/operator.h @@ -0,0 +1,123 @@ +#ifndef OPERATOR_H_ +#define OPERATOR_H_ + +enum operator_precedence { + PRECEDENCE_MINIMUM = 0, + PRECEDENCE_ASSIGN, + PRECEDENCE_PIPELINE, + PRECEDENCE_LOGICAL, + PRECEDENCE_BITWISE, + PRECEDENCE_COMPARISON, + PRECEDENCE_ADDITION, + PRECEDENCE_MULTIPLICATION, + PRECEDENCE_NEGATE, + PRECEDENCE_FORMAT, + PRECEDENCE_RANGE, + PRECEDENCE_NOT, + PRECEDENCE_INCREMENT, + PRECEDENCE_ARRAY, + PRECEDENCE_JOIN, + PRECEDENCE_SPLIT, + PRECEDENCE_CAST, + PRECEDENCE_SUBSCRIPT, + PRECEDENCE_STATIC_ACCESS, + PRECEDENCE_MEMBER_ACCESS, + PRECEDENCE_PARENTHESIS, +}; + +enum operator_associativity { + ASSOCIATIVITY_LEFT, + ASSOCIATIVITY_RIGHT, +}; + +enum operator_location { + OPL_PREFIX, + OPL_INFIX, + OPL_POSTFIX, +}; + +enum operator_arity { + OPA_UNARY, + OPA_BINARY, +}; + +enum operator_id { + OP_NONE = 0, + OP_ADD, + OP_SUBTRACT, + OP_MULTIPLY, + OP_DIVIDE, + OP_MODULO, + OP_INCREMENT, + OP_DECREMENT, + OP_LEFT_SHIFT, + OP_RIGHT_SHIFT, + OP_BINARY_AND, + OP_BINARY_OR, + OP_BINARY_XOR, + OP_BINARY_NOT, + OP_LESS_THAN, + OP_GREATER_THAN, + OP_EQUAL, + OP_NOT_EQUAL, + OP_LESS_EQUAL, + OP_GREATER_EQUAL, + OP_ASSIGN, + OP_ADD_ASSIGN, + OP_SUBTRACT_ASSIGN, + OP_MULTIPLY_ASSIGN, + OP_DIVIDE_ASSIGN, + OP_MODULO_ASSIGN, + OP_LOGICAL_AND, + OP_LOGICAL_OR, + OP_LOGICAL_XOR, + OP_LOGICAL_NOT, + OP_RANGE, + OP_MATCH, + OP_NOTMATCH, + OP_REPLACE, + OP_LIKE, + OP_NOTLIKE, + OP_IN, + OP_NOTIN, + OP_FORMAT, + OP_CONTAINS, + OP_NOTCONTAINS, + OP_USPLIT, + OP_BSPLIT, + OP_UJOIN, + OP_BJOIN, + OP_IS, + OP_ISNOT, + OP_AS, + + OP_SUBSCRIPT, + OP_CONDITIONAL_SUBSCRIPT, + + OP_ARRAY_DELIMITER, + OP_ACCESS, + OP_STATIC_ACCESS, + OP_CONDITIONAL_ACCESS, + + /* these are not real operators, and are just used internally by the + * parser. */ + OP_CAST, + OP_SUBEXPR, + OP_PAREN, + OP_ARRAY_START, + OP_HASHTABLE_START, +}; + +struct operator_info { + enum operator_id op_id; + enum operator_precedence op_precedence; + enum operator_associativity op_associativity; + enum operator_location op_location; + enum operator_arity op_arity; +}; + +extern const struct operator_info *operator_get_by_id(enum operator_id id); +extern const struct operator_info *operator_get_by_token(unsigned int token); +extern const char *operator_id_to_string(enum operator_id op); + +#endif