Compare commits

...

28 Commits

Author SHA1 Message Date
wash 34114ca451 debug: add print support for new ast node types 2026-05-12 23:00:03 +01:00
wash 750e3df7d5 lang: add arithmetic operator definitions 2026-05-12 22:59:50 +01:00
wash 83189d4d9b ast: add lots of ast node definitions 2026-05-12 22:59:29 +01:00
wash 1ea3471f0d parse: implement parsing of complex command arguments 2026-05-12 22:58:59 +01:00
wash c4529d474a parse: implement parsing of arithmetic expressions and data structures 2026-05-12 22:58:48 +01:00
wash 227e73853c parse: implement parsing of function definitions 2026-05-12 22:57:49 +01:00
wash 7d2e45edcb parse: implement parsing of if-statements 2026-05-12 22:57:31 +01:00
wash 26e2a63200 parse: implement parsing of {...} statement blocks 2026-05-12 22:57:16 +01:00
wash 0cd7ca2dde parse: add a basic function to report parse errors 2026-05-12 22:56:40 +01:00
wash 5ce780e037 parse: add a range of internal parser definitions 2026-05-12 22:55:59 +01:00
wash 2235d8593b parse: lex: add a range of operator tokens 2026-05-12 22:54:50 +01:00
wash 64903c821c parse: lex: add missing lex ctx members 2026-05-12 22:54:17 +01:00
wash 39457aa7e6 parse: add some more generic token parser functions 2026-05-12 22:53:26 +01:00
wash 440561cb39 parse: implement parsing of semicolon-delimited statement lists 2026-05-12 22:52:48 +01:00
wash cc450da31e parse: lex: support tokens terminating multiple lex states in certain circumstances 2026-05-12 22:51:45 +01:00
wash e3b92fe4f2 parse: lex: fix scanning of sub-expressions and fstrings in statement mode 2026-05-12 22:51:15 +01:00
wash b2190dd4d0 parse: lex: improve scanning of more complex redirection expressions 2026-05-12 22:48:57 +01:00
wash 3dd5f12ee5 parse: lex: fix string state not terminating when encountering a dquote 2026-05-12 22:48:08 +01:00
wash 721e0f851a parse: lex: add a range of new symbol tokens 2026-05-12 22:47:29 +01:00
wash dee4e5dbf7 parse: lex: fix arithmetic state handling dquote symbols incorrectly 2026-05-12 22:45:43 +01:00
wash 39125cea50 parse: lex: switch from arithmetic to statement when scanning =, |, and \n 2026-05-12 22:45:05 +01:00
wash 7ddc140dbf parse: lex: fix arithmetic state not scanning operator tokens 2026-05-12 22:44:33 +01:00
wash a408b9efa2 parse: lex: move per-state token settings to state source files 2026-05-11 23:57:35 +01:00
wash 0c21be8d67 parse: lex: add proper data-driven state-machine functionality
movement between lexer states is now defined (almost) exclusively
by a table of outgoing links defined for each state type.

the main lexer system uses this table to determine when, how, and to
where the state should be changed.

also add a dedicated lexer state for scanning hashtables, due to the
particularly unique rules that apply within.
2026-05-11 23:02:02 +01:00
wash 304eb80e0d bshell: add debug output support for operator tokens 2026-05-10 19:15:41 +01:00
wash ffdb28ba22 parse: lex: replace expression scanner with statement; implement complex-word scanner
also fix a bunch of scanning edge-cases
2026-05-10 19:14:24 +01:00
wash 7aa2aee5bd parse: lex: implement recording coordinates of lex tokens 2026-05-10 19:13:29 +01:00
wash 7071630af8 parse: lex: add flags for lexer states 2026-05-10 19:10:14 +01:00
39 changed files with 3752 additions and 573 deletions
+25
View File
@@ -0,0 +1,25 @@
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct array_ast_node *array = (struct array_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&array->n_items);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition array_ast_node = {
.def_id = AST_ARRAY,
.def_node_size = sizeof(struct array_ast_node),
.def_collect_children = collect_children,
};
+30
View File
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
extern struct ast_node_definition null_ast_node;
extern struct ast_node_definition int_ast_node;
extern struct ast_node_definition double_ast_node;
extern struct ast_node_definition word_ast_node;
@@ -15,8 +16,18 @@ extern struct ast_node_definition fstring_ast_node;
extern struct ast_node_definition cmdcall_ast_node;
extern struct ast_node_definition pipeline_ast_node;
extern struct ast_node_definition redirection_ast_node;
extern struct ast_node_definition block_ast_node;
extern struct ast_node_definition stmt_list_ast_node;
extern struct ast_node_definition func_ast_node;
extern struct ast_node_definition array_ast_node;
extern struct ast_node_definition hashtable_ast_node;
extern struct ast_node_definition hashtable_item_ast_node;
extern struct ast_node_definition if_ast_node;
extern struct ast_node_definition if_branch_ast_node;
extern struct ast_node_definition op_ast_node;
static const struct ast_node_definition *ast_node_defintions[] = {
[AST_NULL] = &null_ast_node,
[AST_INT] = &int_ast_node,
[AST_DOUBLE] = &double_ast_node,
[AST_WORD] = &word_ast_node,
@@ -26,6 +37,15 @@ static const struct ast_node_definition *ast_node_defintions[] = {
[AST_CMDCALL] = &cmdcall_ast_node,
[AST_PIPELINE] = &pipeline_ast_node,
[AST_REDIRECTION] = &redirection_ast_node,
[AST_BLOCK] = &block_ast_node,
[AST_STMT_LIST] = &stmt_list_ast_node,
[AST_FUNC] = &func_ast_node,
[AST_ARRAY] = &array_ast_node,
[AST_IF] = &if_ast_node,
[AST_IF_BRANCH] = &if_branch_ast_node,
[AST_HASHTABLE] = &hashtable_ast_node,
[AST_HASHTABLE_ITEM] = &hashtable_item_ast_node,
[AST_OP] = &op_ast_node,
};
static const size_t nr_ast_node_definitions
= sizeof ast_node_defintions / sizeof ast_node_defintions[0];
@@ -94,6 +114,8 @@ const char *ast_node_type_to_string(enum ast_node_type type)
{
switch (type) {
ENUM_STR(AST_NONE);
ENUM_STR(AST_NULL);
ENUM_STR(AST_STMT_LIST);
ENUM_STR(AST_INT);
ENUM_STR(AST_DOUBLE);
ENUM_STR(AST_WORD);
@@ -105,6 +127,14 @@ const char *ast_node_type_to_string(enum ast_node_type type)
ENUM_STR(AST_CMDCALL);
ENUM_STR(AST_PIPELINE);
ENUM_STR(AST_REDIRECTION);
ENUM_STR(AST_BLOCK);
ENUM_STR(AST_FUNC);
ENUM_STR(AST_IF);
ENUM_STR(AST_IF_BRANCH);
ENUM_STR(AST_OP);
ENUM_STR(AST_ARRAY);
ENUM_STR(AST_HASHTABLE);
ENUM_STR(AST_HASHTABLE_ITEM);
default:
return "<unknown>";
}
+70
View File
@@ -10,6 +10,8 @@ struct lex_token;
enum ast_node_type {
AST_NONE = 0x00u,
AST_NULL,
AST_STMT_LIST,
AST_INT,
AST_DOUBLE,
AST_WORD,
@@ -19,8 +21,17 @@ enum ast_node_type {
AST_VAR_SPLAT,
AST_FLAG,
AST_CMDCALL,
AST_FUNCALL,
AST_PIPELINE,
AST_REDIRECTION,
AST_BLOCK,
AST_FUNC,
AST_ARRAY,
AST_HASHTABLE,
AST_HASHTABLE_ITEM,
AST_OP,
AST_IF,
AST_IF_BRANCH,
};
struct ast_iterator_entry {
@@ -35,6 +46,10 @@ struct ast_node {
struct ast_iterator_entry n_it;
};
struct null_ast_node {
struct ast_node n_base;
};
struct int_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
@@ -76,6 +91,12 @@ struct cmdcall_ast_node {
fx_queue n_redirect;
};
struct funcall_ast_node {
struct ast_node n_base;
struct ast_node *n_func;
fx_queue n_args;
};
struct pipeline_ast_node {
struct ast_node n_base;
fx_queue n_stages;
@@ -93,6 +114,55 @@ struct redirection_ast_node {
struct lex_token *n_out_tok;
};
struct stmt_list_ast_node {
struct ast_node n_base;
fx_queue n_statements;
};
struct block_ast_node {
struct ast_node n_base;
fx_queue n_statements;
};
struct func_ast_node {
struct ast_node n_base;
struct lex_token *n_name;
fx_queue n_params;
struct ast_node *n_body;
};
struct array_ast_node {
struct ast_node n_base;
fx_queue n_items;
};
struct hashtable_ast_node {
struct ast_node n_base;
fx_queue n_items;
};
struct hashtable_item_ast_node {
struct ast_node n_base;
struct ast_node *n_key, *n_value;
};
struct op_ast_node {
struct ast_node n_base;
const struct operator_info *n_op;
struct ast_node *n_left, *n_right;
};
struct if_branch_ast_node {
struct ast_node n_base;
struct ast_node *n_cond;
struct ast_node *n_body;
};
struct if_ast_node {
struct ast_node n_base;
fx_queue n_branches;
};
struct ast_iterator {
struct ast_node *it_cur;
fx_queue it_queue;
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct block_ast_node *block = (struct block_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&block->n_statements);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition block_ast_node = {
.def_id = AST_BLOCK,
.def_node_size = sizeof(struct block_ast_node),
.def_collect_children = collect_children,
};
+17
View File
@@ -1,6 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct fstring_ast_node *fstring = (struct fstring_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&fstring->n_elements);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition fstring_ast_node = {
.def_id = AST_FSTRING,
.def_node_size = sizeof(struct fstring_ast_node),
.def_collect_children = collect_children,
};
+36
View File
@@ -0,0 +1,36 @@
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct func_ast_node *func = (struct func_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&func->n_params);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
if (func->n_body) {
ast_iterator_enqueue(it, func->n_body);
}
return BSHELL_SUCCESS;
}
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct func_ast_node *func = (const struct func_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", func->n_name->tok_str);
}
struct ast_node_definition func_ast_node = {
.def_id = AST_FUNC,
.def_node_size = sizeof(struct func_ast_node),
.def_collect_children = collect_children,
.def_to_string = to_string,
};
+26
View File
@@ -0,0 +1,26 @@
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct hashtable_item_ast_node *item
= (struct hashtable_item_ast_node *)node;
if (item->n_key) {
ast_iterator_enqueue(it, item->n_key);
}
if (item->n_value) {
ast_iterator_enqueue(it, item->n_value);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition hashtable_item_ast_node = {
.def_id = AST_HASHTABLE_ITEM,
.def_node_size = sizeof(struct hashtable_item_ast_node),
.def_collect_children = collect_children,
};
+24
View File
@@ -0,0 +1,24 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct hashtable_ast_node *hashtable
= (struct hashtable_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&hashtable->n_items);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition hashtable_ast_node = {
.def_id = AST_HASHTABLE,
.def_node_size = sizeof(struct hashtable_ast_node),
.def_collect_children = collect_children,
};
+24
View File
@@ -0,0 +1,24 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct if_branch_ast_node *if_branch
= (struct if_branch_ast_node *)node;
if (if_branch->n_cond) {
ast_iterator_enqueue(it, if_branch->n_cond);
}
if (if_branch->n_body) {
ast_iterator_enqueue(it, if_branch->n_body);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition if_branch_ast_node = {
.def_id = AST_IF_BRANCH,
.def_node_size = sizeof(struct if_branch_ast_node),
.def_collect_children = collect_children,
};
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct if_ast_node *if_group = (struct if_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&if_group->n_branches);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition if_ast_node = {
.def_id = AST_IF,
.def_node_size = sizeof(struct if_ast_node),
.def_collect_children = collect_children,
};
+7
View File
@@ -0,0 +1,7 @@
#include "../parse/token.h"
#include "ast.h"
struct ast_node_definition null_ast_node = {
.def_id = AST_NULL,
.def_node_size = sizeof(struct null_ast_node),
};
+37
View File
@@ -0,0 +1,37 @@
#include "../operator.h"
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct op_ast_node *op = (struct op_ast_node *)node;
if (op->n_left) {
ast_iterator_enqueue(it, op->n_left);
}
if (op->n_right) {
ast_iterator_enqueue(it, op->n_right);
}
return BSHELL_SUCCESS;
}
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct op_ast_node *op = (const struct op_ast_node *)node;
fx_bstr_write_fmt(
out,
NULL,
"%s",
operator_id_to_string(op->n_op->op_id));
}
struct ast_node_definition op_ast_node = {
.def_id = AST_OP,
.def_node_size = sizeof(struct op_ast_node),
.def_collect_children = collect_children,
.def_to_string = to_string,
};
+24
View File
@@ -0,0 +1,24 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct stmt_list_ast_node *stmt_list
= (struct stmt_list_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&stmt_list->n_statements);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition stmt_list_ast_node = {
.def_id = AST_STMT_LIST,
.def_node_size = sizeof(struct stmt_list_ast_node),
.def_collect_children = collect_children,
};
+21 -7
View File
@@ -24,22 +24,20 @@ extern void print_lex_token(struct lex_token *tok)
break;
case TOK_INT:
case TOK_DOUBLE:
case TOK_VAR:
case TOK_VAR_SPLAT:
fx_puts("[yellow]");
break;
case TOK_FLAG:
case TOK_OPERATOR:
fx_puts("[red]");
break;
case TOK_WORD:
case TOK_VAR:
case TOK_VAR_SPLAT:
case TOK_WORD_START:
case TOK_WORD_END:
fx_puts("[cyan]");
break;
case TOK_STRING:
fx_puts("[green]");
break;
case TOK_STR_START:
fx_puts("[green]");
break;
case TOK_STR_END:
fx_puts("[green]");
break;
@@ -60,6 +58,9 @@ extern void print_lex_token(struct lex_token *tok)
case TOK_VAR_SPLAT:
printf("(%s)", tok->tok_str);
break;
case TOK_OPERATOR:
printf("(%s)", token_operator_to_string(tok->tok_operator));
break;
case TOK_SYMBOL:
printf("(%s)", token_symbol_to_string(tok->tok_symbol));
break;
@@ -95,15 +96,28 @@ void print_ast_node(struct ast_node *node)
}
switch (node->n_type) {
case AST_IF:
case AST_IF_BRANCH:
case AST_BLOCK:
case AST_FUNC:
case AST_STMT_LIST:
fx_puts("[magenta]");
break;
case AST_REDIRECTION:
case AST_PIPELINE:
case AST_OP:
case AST_ARRAY:
case AST_HASHTABLE:
case AST_HASHTABLE_ITEM:
fx_puts("[blue]");
break;
case AST_CMDCALL:
case AST_NULL:
fx_puts("[red]");
break;
case AST_INT:
case AST_DOUBLE:
case AST_VAR:
fx_puts("[yellow]");
break;
case AST_WORD:
+264
View File
@@ -0,0 +1,264 @@
#include "operator.h"
#include "parse/token.h"
#define OP(id, p, a, l, u) \
[OP_##id] = { \
.op_id = (OP_##id), \
.op_precedence = (PRECEDENCE_##p), \
.op_associativity = (ASSOCIATIVITY_##a), \
.op_location = (OPL_##l), \
.op_arity = (OPA_##u), \
}
#define TOK_OP(id, tok) [TOK_##tok - __TOK_INDEX_BASE] = &operators[OP_##id]
#define SYM_OP(id, sym) [SYM_##sym - __SYM_INDEX_BASE] = &operators[OP_##id]
#define KW_OP(id, kw) [KW_##kw - __KW_INDEX_BASE] = &operators[OP_##id]
#define TKOP_OP(id, kw) [TKOP_##kw - __TKOP_INDEX_BASE] = &operators[OP_##id]
/* clang-format off */
static const struct operator_info operators[] = {
OP(SUBEXPR, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(ARRAY_START, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(PAREN, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(HASHTABLE_START, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(ACCESS, MEMBER_ACCESS, LEFT, INFIX, BINARY),
OP(CONDITIONAL_ACCESS, MEMBER_ACCESS, LEFT, INFIX, BINARY),
OP(STATIC_ACCESS, STATIC_ACCESS, LEFT, INFIX, BINARY),
OP(SUBSCRIPT, SUBSCRIPT, LEFT, INFIX, BINARY),
OP(CONDITIONAL_SUBSCRIPT, SUBSCRIPT, LEFT, INFIX, BINARY),
OP(CAST, CAST, LEFT, PREFIX, UNARY),
OP(USPLIT, SPLIT, LEFT, PREFIX, UNARY),
OP(UJOIN, JOIN, LEFT, PREFIX, UNARY),
OP(ARRAY_DELIMITER, ARRAY, LEFT, INFIX, BINARY),
OP(INCREMENT, INCREMENT, LEFT, INFIX, BINARY),
OP(LOGICAL_NOT, NOT, LEFT, PREFIX, UNARY),
OP(RANGE, RANGE, LEFT, INFIX, BINARY),
OP(FORMAT, FORMAT, LEFT, INFIX, BINARY),
OP(MULTIPLY, MULTIPLICATION, LEFT, INFIX, BINARY),
OP(DIVIDE, MULTIPLICATION, LEFT, INFIX, BINARY),
OP(MODULO, MULTIPLICATION, LEFT, INFIX, BINARY),
OP(ADD, ADDITION, LEFT, INFIX, BINARY),
OP(SUBTRACT, ADDITION, LEFT, INFIX, BINARY),
OP(BSPLIT, COMPARISON, LEFT, INFIX, BINARY),
OP(BJOIN, COMPARISON, LEFT, INFIX, BINARY),
OP(IS, COMPARISON, LEFT, INFIX, BINARY),
OP(ISNOT, COMPARISON, LEFT, INFIX, BINARY),
OP(AS, COMPARISON, LEFT, INFIX, BINARY),
OP(EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(NOT_EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(GREATER_THAN, COMPARISON, LEFT, INFIX, BINARY),
OP(LESS_THAN, COMPARISON, LEFT, INFIX, BINARY),
OP(GREATER_EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(LESS_EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(LIKE, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTLIKE, COMPARISON, LEFT, INFIX, BINARY),
OP(MATCH, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTMATCH, COMPARISON, LEFT, INFIX, BINARY),
OP(IN, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTIN, COMPARISON, LEFT, INFIX, BINARY),
OP(CONTAINS, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTCONTAINS, COMPARISON, LEFT, INFIX, BINARY),
OP(REPLACE, COMPARISON, LEFT, INFIX, BINARY),
OP(LOGICAL_AND, LOGICAL, LEFT, INFIX, BINARY),
OP(LOGICAL_OR, LOGICAL, LEFT, INFIX, BINARY),
OP(LOGICAL_XOR, LOGICAL, LEFT, INFIX, BINARY),
OP(BINARY_AND, BITWISE, LEFT, INFIX, BINARY),
OP(BINARY_OR, BITWISE, LEFT, INFIX, BINARY),
OP(BINARY_NOT, BITWISE, LEFT, INFIX, BINARY),
OP(BINARY_XOR, BITWISE, LEFT, INFIX, BINARY),
OP(LEFT_SHIFT, BITWISE, LEFT, INFIX, BINARY),
OP(RIGHT_SHIFT, BITWISE, LEFT, INFIX, BINARY),
OP(ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(ADD_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(SUBTRACT_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(MULTIPLY_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(DIVIDE_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(MODULO_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
};
static const size_t nr_operators = sizeof operators / sizeof operators[0];
static const struct operator_info *operator_symbols[] = {
SYM_OP(LOGICAL_NOT, BANG),
SYM_OP(ASSIGN, EQUAL),
SYM_OP(ADD, PLUS),
SYM_OP(SUBTRACT, HYPHEN),
SYM_OP(MULTIPLY, ASTERISK),
SYM_OP(DIVIDE, FORWARD_SLASH),
SYM_OP(MODULO, PERCENT),
SYM_OP(ADD_ASSIGN, PLUS_EQUAL),
SYM_OP(SUBTRACT_ASSIGN, HYPHEN_EQUAL),
SYM_OP(MULTIPLY_ASSIGN, ASTERISK_EQUAL),
SYM_OP(DIVIDE_ASSIGN, FORWARD_SLASH_EQUAL),
SYM_OP(MODULO_ASSIGN, PERCENT_EQUAL),
SYM_OP(RANGE, DOT_DOT),
SYM_OP(SUBSCRIPT, LEFT_BRACKET),
SYM_OP(CONDITIONAL_SUBSCRIPT, QUESTION_LEFT_BRACKET),
SYM_OP(ACCESS, DOT),
SYM_OP(CONDITIONAL_ACCESS, QUESTION_DOT),
SYM_OP(STATIC_ACCESS, COLON_COLON),
/* parser-internal pseudo-operators. */
/* CAST uses the same symbol as SUBSCRIPT */
/* SYM_OP(CAST, LEFT_BRACKET), */
SYM_OP(SUBEXPR, DOLLAR_LEFT_PAREN),
SYM_OP(PAREN, LEFT_PAREN),
SYM_OP(ARRAY_START, AT_LEFT_PAREN),
SYM_OP(HASHTABLE_START, AT_LEFT_BRACE),
};
static const size_t nr_operator_symbols = sizeof operator_symbols / sizeof operator_symbols[0];
static const struct operator_info *operator_token_ops[] = {
TKOP_OP(FORMAT, F),
TKOP_OP(BINARY_AND, BAND),
TKOP_OP(BINARY_OR, BOR),
TKOP_OP(BINARY_XOR, BXOR),
TKOP_OP(BINARY_NOT, BNOT),
TKOP_OP(LEFT_SHIFT, SHL),
TKOP_OP(RIGHT_SHIFT, SHR),
TKOP_OP(EQUAL, EQ),
TKOP_OP(NOT_EQUAL, NE),
TKOP_OP(GREATER_THAN, GT),
TKOP_OP(LESS_THAN, LT),
TKOP_OP(GREATER_EQUAL, GE),
TKOP_OP(LESS_EQUAL, LE),
TKOP_OP(MATCH, MATCH),
TKOP_OP(NOTMATCH, NOTMATCH),
TKOP_OP(REPLACE, REPLACE),
TKOP_OP(LIKE, LIKE),
TKOP_OP(NOTLIKE, NOTLIKE),
TKOP_OP(IN, IN),
TKOP_OP(NOTIN, NOTIN),
TKOP_OP(CONTAINS, CONTAINS),
TKOP_OP(NOTCONTAINS, NOTCONTAINS),
TKOP_OP(LOGICAL_AND, AND),
TKOP_OP(LOGICAL_OR, OR),
TKOP_OP(LOGICAL_XOR, XOR),
TKOP_OP(LOGICAL_NOT, NOT),
/* there are also unary versions of these operators */
TKOP_OP(BSPLIT, SPLIT),
TKOP_OP(BJOIN, JOIN),
TKOP_OP(IS, IS),
TKOP_OP(ISNOT, ISNOT),
TKOP_OP(AS, AS),
};
static const size_t nr_operator_token_ops = sizeof operator_token_ops / sizeof operator_token_ops[0];
/* clang-format on */
const struct operator_info *operator_get_by_token(unsigned int token)
{
const struct operator_info **op_list = NULL;
size_t base = 0;
size_t op_list_size = 0;
if (token > __TKOP_INDEX_BASE && token < __TKOP_INDEX_LIMIT) {
op_list = operator_token_ops;
base = __TKOP_INDEX_BASE;
op_list_size = nr_operator_token_ops;
} else if (token > __SYM_INDEX_BASE && token < __SYM_INDEX_LIMIT) {
op_list = operator_symbols;
base = __SYM_INDEX_BASE;
op_list_size = nr_operator_symbols;
} else {
return NULL;
}
if (token - base >= op_list_size) {
return NULL;
}
return op_list[token - base];
}
const struct operator_info *operator_get_by_id(enum operator_id id)
{
if (id >= nr_operators) {
return NULL;
}
const struct operator_info *op = &operators[id];
if (op->op_id != id) {
return NULL;
}
return op;
}
#define ENUM_STR(x) \
case x: \
return #x
const char *operator_id_to_string(enum operator_id op)
{
switch (op) {
ENUM_STR(OP_NONE);
ENUM_STR(OP_ADD);
ENUM_STR(OP_SUBTRACT);
ENUM_STR(OP_MULTIPLY);
ENUM_STR(OP_DIVIDE);
ENUM_STR(OP_MODULO);
ENUM_STR(OP_INCREMENT);
ENUM_STR(OP_DECREMENT);
ENUM_STR(OP_LEFT_SHIFT);
ENUM_STR(OP_RIGHT_SHIFT);
ENUM_STR(OP_BINARY_AND);
ENUM_STR(OP_BINARY_OR);
ENUM_STR(OP_BINARY_XOR);
ENUM_STR(OP_BINARY_NOT);
ENUM_STR(OP_LESS_THAN);
ENUM_STR(OP_GREATER_THAN);
ENUM_STR(OP_EQUAL);
ENUM_STR(OP_NOT_EQUAL);
ENUM_STR(OP_LESS_EQUAL);
ENUM_STR(OP_GREATER_EQUAL);
ENUM_STR(OP_ASSIGN);
ENUM_STR(OP_ADD_ASSIGN);
ENUM_STR(OP_SUBTRACT_ASSIGN);
ENUM_STR(OP_MULTIPLY_ASSIGN);
ENUM_STR(OP_DIVIDE_ASSIGN);
ENUM_STR(OP_MODULO_ASSIGN);
ENUM_STR(OP_LOGICAL_AND);
ENUM_STR(OP_LOGICAL_OR);
ENUM_STR(OP_LOGICAL_XOR);
ENUM_STR(OP_LOGICAL_NOT);
ENUM_STR(OP_RANGE);
ENUM_STR(OP_MATCH);
ENUM_STR(OP_NOTMATCH);
ENUM_STR(OP_REPLACE);
ENUM_STR(OP_LIKE);
ENUM_STR(OP_NOTLIKE);
ENUM_STR(OP_IN);
ENUM_STR(OP_NOTIN);
ENUM_STR(OP_FORMAT);
ENUM_STR(OP_CONTAINS);
ENUM_STR(OP_NOTCONTAINS);
ENUM_STR(OP_USPLIT);
ENUM_STR(OP_BSPLIT);
ENUM_STR(OP_UJOIN);
ENUM_STR(OP_BJOIN);
ENUM_STR(OP_IS);
ENUM_STR(OP_ISNOT);
ENUM_STR(OP_AS);
ENUM_STR(OP_SUBSCRIPT);
ENUM_STR(OP_CONDITIONAL_SUBSCRIPT);
ENUM_STR(OP_ARRAY_DELIMITER);
ENUM_STR(OP_ACCESS);
ENUM_STR(OP_STATIC_ACCESS);
ENUM_STR(OP_CONDITIONAL_ACCESS);
ENUM_STR(OP_CAST);
ENUM_STR(OP_SUBEXPR);
ENUM_STR(OP_PAREN);
ENUM_STR(OP_ARRAY_START);
ENUM_STR(OP_HASHTABLE_START);
default:
return "";
}
}
+123
View File
@@ -0,0 +1,123 @@
#ifndef OPERATOR_H_
#define OPERATOR_H_
enum operator_precedence {
PRECEDENCE_MINIMUM = 0,
PRECEDENCE_ASSIGN,
PRECEDENCE_PIPELINE,
PRECEDENCE_LOGICAL,
PRECEDENCE_BITWISE,
PRECEDENCE_COMPARISON,
PRECEDENCE_ADDITION,
PRECEDENCE_MULTIPLICATION,
PRECEDENCE_NEGATE,
PRECEDENCE_FORMAT,
PRECEDENCE_RANGE,
PRECEDENCE_NOT,
PRECEDENCE_INCREMENT,
PRECEDENCE_ARRAY,
PRECEDENCE_JOIN,
PRECEDENCE_SPLIT,
PRECEDENCE_CAST,
PRECEDENCE_SUBSCRIPT,
PRECEDENCE_STATIC_ACCESS,
PRECEDENCE_MEMBER_ACCESS,
PRECEDENCE_PARENTHESIS,
};
enum operator_associativity {
ASSOCIATIVITY_LEFT,
ASSOCIATIVITY_RIGHT,
};
enum operator_location {
OPL_PREFIX,
OPL_INFIX,
OPL_POSTFIX,
};
enum operator_arity {
OPA_UNARY,
OPA_BINARY,
};
enum operator_id {
OP_NONE = 0,
OP_ADD,
OP_SUBTRACT,
OP_MULTIPLY,
OP_DIVIDE,
OP_MODULO,
OP_INCREMENT,
OP_DECREMENT,
OP_LEFT_SHIFT,
OP_RIGHT_SHIFT,
OP_BINARY_AND,
OP_BINARY_OR,
OP_BINARY_XOR,
OP_BINARY_NOT,
OP_LESS_THAN,
OP_GREATER_THAN,
OP_EQUAL,
OP_NOT_EQUAL,
OP_LESS_EQUAL,
OP_GREATER_EQUAL,
OP_ASSIGN,
OP_ADD_ASSIGN,
OP_SUBTRACT_ASSIGN,
OP_MULTIPLY_ASSIGN,
OP_DIVIDE_ASSIGN,
OP_MODULO_ASSIGN,
OP_LOGICAL_AND,
OP_LOGICAL_OR,
OP_LOGICAL_XOR,
OP_LOGICAL_NOT,
OP_RANGE,
OP_MATCH,
OP_NOTMATCH,
OP_REPLACE,
OP_LIKE,
OP_NOTLIKE,
OP_IN,
OP_NOTIN,
OP_FORMAT,
OP_CONTAINS,
OP_NOTCONTAINS,
OP_USPLIT,
OP_BSPLIT,
OP_UJOIN,
OP_BJOIN,
OP_IS,
OP_ISNOT,
OP_AS,
OP_SUBSCRIPT,
OP_CONDITIONAL_SUBSCRIPT,
OP_ARRAY_DELIMITER,
OP_ACCESS,
OP_STATIC_ACCESS,
OP_CONDITIONAL_ACCESS,
/* these are not real operators, and are just used internally by the
* parser. */
OP_CAST,
OP_SUBEXPR,
OP_PAREN,
OP_ARRAY_START,
OP_HASHTABLE_START,
};
struct operator_info {
enum operator_id op_id;
enum operator_precedence op_precedence;
enum operator_associativity op_associativity;
enum operator_location op_location;
enum operator_arity op_arity;
};
extern const struct operator_info *operator_get_by_id(enum operator_id id);
extern const struct operator_info *operator_get_by_token(unsigned int token);
extern const char *operator_id_to_string(enum operator_id op);
#endif
+30 -5
View File
@@ -2,24 +2,43 @@
#define LEX_H_
#include "../status.h"
#include "token.h"
#include <fx/queue.h>
#include <fx/string.h>
#include <fx/stringstream.h>
struct lex_token;
#define LEX_STATE_MAX_TERMINATORS 16
struct line_source;
enum lex_flags {
LEX_PRINT_TOKENS = 0x01u,
};
enum lex_token_flags {
/* a token with this flag not only interrupts the word currently being
* scanned, but also stops multi-words */
LEX_TOKEN_TERMINATES_WORD = 0x01u,
/* a token with this flag can appear at the start of an arithmetic
* expression. a statement that encounters this token as its first char
* will switch to arithmetic mode */
LEX_TOKEN_UNARY_ARITHMETIC = 0x02u,
/* if a token has this flag defined, the lexer will
* switch to command mode after encountering it. */
LEX_TOKEN_COMMAND_MODE = 0x08u,
/* if a token has this flag defined, the lexer will
* switch to statement mode after encountering it. */
LEX_TOKEN_STATEMENT_MODE = 0x10u,
};
enum lex_state_type_id {
LEX_STATE_STATEMENT = 0x01u,
LEX_STATE_EXPRESSION = 0x02u,
LEX_STATE_COMMAND = 0x04u,
LEX_STATE_ARITHMETIC = 0x08u,
LEX_STATE_STRING = 0x10u,
LEX_STATE_COMMAND = 0x02u,
LEX_STATE_ARITHMETIC = 0x04u,
LEX_STATE_STRING = 0x08u,
LEX_STATE_WORD = 0x10u,
LEX_STATE_HASHTABLE = 0x20u,
};
struct lex_token_def {
@@ -27,6 +46,7 @@ struct lex_token_def {
const char *name;
uint64_t name_hash;
enum lex_state_type_id enabled_states;
enum lex_token_flags flags;
};
struct lex_symbol_node {
@@ -39,9 +59,12 @@ struct lex_symbol_node {
struct lex_state {
const struct lex_state_type *s_type;
unsigned int s_terminators[LEX_STATE_MAX_TERMINATORS];
unsigned int s_nr_terminators;
unsigned int s_paren_depth;
fx_queue_entry s_entry;
fx_string *s_tempstr;
unsigned int s_flags;
};
struct lex_ctx {
@@ -52,6 +75,8 @@ struct lex_ctx {
fx_string *lex_tmp;
fx_wchar lex_ch;
fx_queue lex_state;
enum token_type lex_prev_token;
struct char_cell lex_cursor, lex_start, lex_end;
struct lex_symbol_node *lex_sym_tree;
enum bshell_status lex_status;
};
+133 -37
View File
@@ -1,5 +1,42 @@
#include "lex-internal.h"
static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = TOK_OPERATOR;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
@@ -9,8 +46,12 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
@@ -18,15 +59,10 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return arithmetic_hyphen(ctx);
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
@@ -49,14 +85,6 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
@@ -65,39 +93,27 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_SEMICOLON:
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
default:
break;
}
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool converted = convert_word_to_keyword(word);
if (!converted) {
converted = convert_word_to_int(word);
unsigned int token_type = TOK_WORD;
bool kw = false, number = false;
if (convert_word_to_keyword(word)) {
token_type = word->tok_keyword;
} else if (convert_word_to_int(word)) {
token_type = TOK_INT;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
@@ -107,11 +123,13 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
@@ -119,7 +137,7 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
@@ -130,7 +148,85 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
return arithmetic_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
LINK_CHANGE(SYM_EQUAL, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_PAREN),
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
LINK_CHANGE(SYM_PIPE, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_END,
};
static const unsigned int keywords[] = {
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_F, TKOP_BAND, TKOP_BOR, TKOP_BXOR,
TKOP_BNOT, TKOP_SHL, TKOP_SHR, TKOP_EQ,
TKOP_NE, TKOP_GT, TKOP_LT, TKOP_GE,
TKOP_LE, TKOP_MATCH, TKOP_NOTMATCH, TKOP_REPLACE,
TKOP_LIKE, TKOP_NOTLIKE, TKOP_IN, TKOP_NOTIN,
TKOP_CONTAINS, TKOP_NOTCONTAINS, TKOP_AND, TKOP_OR,
TKOP_XOR, TKOP_NOT, TKOP_SPLIT, TKOP_JOIN,
TKOP_IS, TKOP_ISNOT, TKOP_AS, TKOP_NONE,
};
static const unsigned int symbols[] = {
SYM_BANG,
SYM_PLUS,
SYM_HYPHEN,
SYM_FORWARD_SLASH,
SYM_ASTERISK,
SYM_AMPERSAND,
SYM_PERCENT,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_QUESTION_DOT,
SYM_QUESTION_LEFT_BRACKET,
SYM_EQUAL,
SYM_PLUS_EQUAL,
SYM_HYPHEN_EQUAL,
SYM_FORWARD_SLASH_EQUAL,
SYM_ASTERISK_EQUAL,
SYM_PERCENT_EQUAL,
SYM_DOT,
SYM_DOT_DOT,
SYM_COLON_COLON,
SYM_NONE,
};
const struct lex_state_type lex_arithmetic_state = {
.s_id = LEX_STATE_ARITHMETIC,
.s_pump_token = arithmetic_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};
+124 -29
View File
@@ -1,5 +1,27 @@
#include "../token.h"
#include "lex-internal.h"
static bool char_can_continue_word(struct lex_ctx *ctx, fx_wchar c)
{
if (fx_wchar_is_alnum(c)) {
return true;
}
if (fx_wchar_is_space(c)) {
return false;
}
if (c == '$') {
return true;
}
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) {
return false;
}
return true;
}
static enum bshell_status command_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
@@ -9,8 +31,12 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
@@ -21,18 +47,16 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
if (char_can_continue_word(ctx, peek_char(ctx))) {
lex_state_push(ctx, LEX_STATE_WORD, 0);
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
@@ -49,12 +73,8 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
if (char_can_continue_word(ctx, peek_char(ctx))) {
lex_state_push(ctx, LEX_STATE_WORD, 0);
}
enqueue_token(ctx, tok);
@@ -65,34 +85,71 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_SEMICOLON:
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
default:
break;
}
static bool string_is_redirection(const char *s)
{
if (!*s) {
return false;
}
return BSHELL_SUCCESS;
if (!strcmp(s, ">") || !strcmp(s, ">>")) {
return true;
}
long nr_angles = 0;
for (size_t i = 0; s[i];) {
fx_wchar c = fx_wchar_utf8_codepoint_decode(s);
if (fx_wchar_is_number(c)) {
if (nr_angles) {
return false;
}
} else if (c == '>') {
nr_angles++;
if (nr_angles > 2) {
return false;
}
} else {
return false;
}
s += fx_wchar_utf8_codepoint_stride(s);
}
return true;
}
static enum bshell_status command_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
enum bshell_status status
= read_word(ctx, READ_NO_NUMBER_RECOGNITION, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool continue_word = false;
fx_wchar c = peek_char(ctx);
const char *s = word->tok_str;
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) {
continue_word = true;
}
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
continue_word = false;
}
if (string_is_redirection(s)) {
continue_word = false;
}
if (continue_word) {
lex_state_push(ctx, LEX_STATE_WORD, 0);
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
@@ -102,11 +159,13 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
@@ -114,7 +173,7 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
@@ -125,7 +184,43 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
return command_word(ctx);
}
const struct lex_state_link links[] = {
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_PAREN),
LINK_POP(SYM_RIGHT_BRACE),
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_DQUOTE,
SYM_SQUOTE,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_AT_LEFT_PAREN,
SYM_AMPERSAND,
SYM_PIPE,
SYM_SEMICOLON,
SYM_RIGHT_PAREN,
SYM_LEFT_PAREN,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_NONE,
};
const struct lex_state_type lex_command_state = {
.s_id = LEX_STATE_COMMAND,
.s_pump_token = command_pump_token,
.s_links = links,
.s_symbols = symbols,
};
-134
View File
@@ -1,134 +0,0 @@
#include "lex-internal.h"
static enum bshell_status expression_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_SEMICOLON:
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
default:
break;
}
return BSHELL_SUCCESS;
}
static enum bshell_status expression_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool converted = convert_word_to_int(word);
if (converted) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
} else {
lex_state_change(ctx, LEX_STATE_COMMAND);
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status expression_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return expression_symbol(ctx);
}
return expression_word(ctx);
}
const struct lex_state_type lex_expression_state = {
.s_id = LEX_STATE_EXPRESSION,
.s_pump_token = expression_pump_token,
};
+184
View File
@@ -0,0 +1,184 @@
#include "lex-internal.h"
static enum bshell_status hashtable_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = tok->tok_operator;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status hashtable_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return hashtable_hyphen(ctx);
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static enum bshell_status hashtable_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
convert_word_to_int(word);
handle_lex_state_transition(ctx, word->tok_type);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
#if 1
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
#endif
if (char_can_begin_symbol(ctx, c)) {
return hashtable_symbol(ctx);
}
return hashtable_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH_WITH_TERM(
SYM_EQUAL,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_BRACE,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH_WITH_TERM(
TOK_LINEFEED,
LEX_STATE_STATEMENT,
0,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP2(SYM_RIGHT_BRACE, LINK_ALLOW_RECURSION),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_EQUAL,
SYM_DQUOTE,
SYM_SQUOTE,
SYM_SEMICOLON,
SYM_RIGHT_BRACE,
SYM_DOLLAR_LEFT_PAREN,
SYM_LEFT_PAREN,
SYM_HASH,
SYM_NONE,
};
const struct lex_state_type lex_hashtable_state = {
.s_id = LEX_STATE_HASHTABLE,
.s_pump_token = hashtable_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+121 -1
View File
@@ -7,6 +7,77 @@
struct lex_ctx;
enum state_flags {
/* statement: don't convert matching words to keywords */
STATEMENT_F_DISABLE_KEYWORDS = 0x01u,
/* arithmetic: don't switch back to statement mode even when
* encountering a token that would otherwise require it. */
ARITHMETIC_F_DISABLE_STATEMENTS = 0x01u,
};
enum read_flags {
READ_APPEND_HYPHEN = 0x01u,
READ_NO_SET_TOKEN_START = 0x02u,
READ_NO_NUMBER_RECOGNITION = 0x04u,
};
enum link_flags {
LINK_ALLOW_RECURSION = 0x01u,
};
#define LINK_PUSH(tok, target, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_PUSH, \
.l_target = (target), \
.l_target_flags = (flags), \
})
#define LINK_PUSH_WITH_TERM(tok, target, flags, ...) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_PUSH, \
.l_target = (target), \
.l_target_flags = (flags), \
.l_terminators = {__VA_ARGS__, TOK_NONE}, \
})
#define LINK_CHANGE(tok, target) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_CHANGE, \
.l_target = (target), \
})
#define LINK_POP(tok) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
})
#define LINK_POP2(tok, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
.l_flags = (flags), \
})
#define LINK_NONE(tok) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_NONE, \
})
#define LINK_END ((struct lex_state_link) {})
struct lex_state_link {
unsigned int l_token;
enum {
LEX_STATE_LINK_NONE,
LEX_STATE_LINK_PUSH,
LEX_STATE_LINK_CHANGE,
LEX_STATE_LINK_POP,
} l_type;
enum link_flags l_flags;
enum lex_state_type_id l_target;
enum state_flags l_target_flags;
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
};
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
@@ -16,6 +87,11 @@ struct lex_state_type {
lex_state_pump_token s_pump_token;
lex_state_begin s_begin;
lex_state_end s_end;
const unsigned int *s_keywords;
const unsigned int *s_operators;
const unsigned int *s_symbols;
const struct lex_state_link *s_links;
};
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
@@ -24,27 +100,46 @@ extern enum bshell_status pump_token_command(struct lex_ctx *ctx);
extern enum bshell_status pump_token_arithmetic(struct lex_ctx *ctx);
extern enum bshell_status pump_token_string(struct lex_ctx *ctx);
extern void set_token_start(struct lex_ctx *ctx);
extern void set_token_end(struct lex_ctx *ctx);
extern struct lex_state *lex_state_push(
struct lex_ctx *ctx,
enum lex_state_type_id state_type);
enum lex_state_type_id state_type,
enum state_flags flags);
extern void lex_state_pop(struct lex_ctx *ctx);
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
extern void lex_state_add_terminator(struct lex_state *state, unsigned int tok);
extern bool lex_state_terminates_at_token(
struct lex_ctx *ctx,
unsigned int tok);
extern fx_wchar peek_char(struct lex_ctx *ctx);
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
extern fx_wchar peek2_char(struct lex_ctx *ctx);
extern fx_wchar peek2_char_noread(struct lex_ctx *ctx);
extern void advance_char(struct lex_ctx *ctx);
extern void advance_char_noread(struct lex_ctx *ctx);
extern bool string_is_valid_number(const char *s, long long *out);
extern bool convert_word_to_int(struct lex_token *tok);
extern bool convert_word_to_keyword(struct lex_token *tok);
extern bool convert_word_to_operator(
struct lex_ctx *ctx,
struct lex_token *tok);
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
extern void enqueue_token_with_coordinates(
struct lex_ctx *ctx,
struct lex_token *tok,
const struct char_cell *start,
const struct char_cell *end);
extern enum bshell_status read_word(
struct lex_ctx *ctx,
enum read_flags flags,
struct lex_token **out);
extern enum bshell_status read_symbol(
struct lex_ctx *ctx,
@@ -71,5 +166,30 @@ extern bool char_can_begin_symbol_in_state(
struct lex_ctx *ctx,
char c,
enum lex_state_type_id state_type);
extern bool char_has_flags(
struct lex_ctx *ctx,
char c,
enum lex_token_flags flags);
extern bool keyword_has_flags(
struct lex_ctx *ctx,
enum token_keyword kw,
enum lex_token_flags flags);
extern enum lex_token_flags keyword_get_flags(
struct lex_ctx *ctx,
enum token_keyword kw);
extern bool symbol_has_flags(
struct lex_ctx *ctx,
enum token_symbol sym,
enum lex_token_flags flags);
extern enum lex_token_flags symbol_get_flags(
struct lex_ctx *ctx,
enum token_symbol sym);
extern enum token_operator get_operator_with_string(
struct lex_ctx *ctx,
const char *s);
extern void handle_lex_state_transition(
struct lex_ctx *ctx,
unsigned int token);
#endif
+520 -238
View File
@@ -5,96 +5,171 @@
#include "../token.h"
#include "lex-internal.h"
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
#include <assert.h>
#define CONVERSION_REQUESTED(flags) \
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
#define SYMBOL_DEF(i, n, f) \
[i - __SYM_INDEX_BASE] = { \
.id = (i), \
.name = (n), \
.flags = (f), \
}
#define KW_DEF(i, n, f) \
[i - __KW_INDEX_BASE] = { \
.id = (i), \
.name = (n), \
.flags = (f), \
}
#define TKOP_DEF(i, n, f) \
[i - __TKOP_INDEX_BASE] = { \
.id = (i), \
.name = (n), \
.flags = (f), \
}
static struct lex_token_def keywords[] = {
LEX_TOKEN_DEF(KW_FUNC, "func", LEX_STATE_STATEMENT),
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
KW_DEF(KW_FUNC, "func", LEX_TOKEN_COMMAND_MODE),
KW_DEF(KW_IF, "if", 0),
KW_DEF(KW_ELSEIF, "elseif", 0),
KW_DEF(KW_ELSE, "else", 0),
};
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
#define LEX_STATES(states) (LEX_STATE_STATEMENT | LEX_STATE_EXPRESSION | states)
#define LEX_STATE_ALL \
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
| LEX_STATE_STRING | LEX_STATE_EXPRESSION)
static struct lex_token_def operators[] = {
TKOP_DEF(TKOP_BAND, "-band", 0),
TKOP_DEF(TKOP_BOR, "-bor", 0),
TKOP_DEF(TKOP_BXOR, "-bxor", 0),
TKOP_DEF(TKOP_BNOT, "-bnot", 0),
TKOP_DEF(TKOP_SHL, "-shl", 0),
TKOP_DEF(TKOP_SHR, "-shr", 0),
TKOP_DEF(TKOP_EQ, "-eq", 0),
TKOP_DEF(TKOP_NE, "-ne", 0),
TKOP_DEF(TKOP_GT, "-gt", 0),
TKOP_DEF(TKOP_LT, "-lt", 0),
TKOP_DEF(TKOP_GE, "-ge", 0),
TKOP_DEF(TKOP_LE, "-le", 0),
TKOP_DEF(TKOP_MATCH, "-match", 0),
TKOP_DEF(TKOP_NOTMATCH, "-notmatch", 0),
TKOP_DEF(TKOP_REPLACE, "-replace", 0),
TKOP_DEF(TKOP_LIKE, "-like", 0),
TKOP_DEF(TKOP_NOTLIKE, "-notlike", 0),
TKOP_DEF(TKOP_CONTAINS, "-contains", 0),
TKOP_DEF(TKOP_NOTCONTAINS, "-notcontains", 0),
TKOP_DEF(TKOP_AND, "-and", 0),
TKOP_DEF(TKOP_OR, "-or", 0),
TKOP_DEF(TKOP_XOR, "-xor", 0),
TKOP_DEF(TKOP_NOT, "-not", 0),
TKOP_DEF(TKOP_SPLIT, "-split", 0),
TKOP_DEF(TKOP_JOIN, "-join", 0),
TKOP_DEF(TKOP_IS, "-is", 0),
TKOP_DEF(TKOP_ISNOT, "-isnot", 0),
TKOP_DEF(TKOP_AS, "-as", 0),
TKOP_DEF(TKOP_F, "-f", 0),
};
static const size_t nr_operators = sizeof operators / sizeof operators[0];
static struct lex_token_def symbols[] = {
LEX_TOKEN_DEF(SYM_PLUS, "+", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_HYPHEN, "-", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(
SYM_AMPERSAND,
"&",
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
SYM_SQUOTE,
"'",
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
LEX_TOKEN_DEF(SYM_DQUOTE, "\"", LEX_STATE_ALL),
LEX_TOKEN_DEF(
SYM_HASH,
"#",
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
LEX_TOKEN_DEF(
SYM_DOLLAR,
"$",
LEX_STATES(
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
| LEX_STATE_STRING)),
LEX_TOKEN_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_PIPE, "|", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_COMMA, ",", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_SEMICOLON, ";", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_LEFT_BRACE, "{", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_RIGHT_BRACE, "}", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_LEFT_PAREN, "(", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_RIGHT_PAREN, ")", LEX_STATE_ALL),
LEX_TOKEN_DEF(SYM_EQUAL, "=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
SYMBOL_DEF(SYM_BANG, "!", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_PLUS, "+", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_HYPHEN, "-", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_FORWARD_SLASH, "/", 0),
SYMBOL_DEF(SYM_ASTERISK, "*", 0),
SYMBOL_DEF(SYM_AMPERSAND, "&", 0),
SYMBOL_DEF(SYM_PERCENT, "%", 0),
SYMBOL_DEF(SYM_SQUOTE, "'", 0),
SYMBOL_DEF(SYM_DQUOTE, "\"", 0),
SYMBOL_DEF(SYM_HASH, "#", 0),
SYMBOL_DEF(SYM_DOLLAR, "$", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_AT, "@", 0),
SYMBOL_DEF(SYM_PIPE, "|", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_COMMA, ",", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_SEMICOLON, ";", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_AT_LEFT_PAREN, "@(", 0),
SYMBOL_DEF(SYM_LEFT_BRACE, "{", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_RIGHT_BRACE, "}", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_LEFT_BRACKET, "[", 0),
SYMBOL_DEF(SYM_RIGHT_BRACKET, "]", 0),
SYMBOL_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", 0),
SYMBOL_DEF(SYM_LEFT_PAREN, "(", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_RIGHT_PAREN, ")", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_EQUAL, "=", 0),
SYMBOL_DEF(SYM_PLUS_EQUAL, "+=", 0),
SYMBOL_DEF(SYM_HYPHEN_EQUAL, "-=", 0),
SYMBOL_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", 0),
SYMBOL_DEF(SYM_ASTERISK_EQUAL, "*=", 0),
SYMBOL_DEF(SYM_PERCENT_EQUAL, "%=", 0),
SYMBOL_DEF(SYM_DOT, ".", 0),
SYMBOL_DEF(SYM_COLON_COLON, "::", 0),
SYMBOL_DEF(SYM_DOT_DOT, "..", 0),
SYMBOL_DEF(SYM_QUESTION_DOT, "?.", 0),
};
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
extern const struct lex_state_type lex_statement_state;
extern const struct lex_state_type lex_expression_state;
extern const struct lex_state_type lex_command_state;
extern const struct lex_state_type lex_arithmetic_state;
extern const struct lex_state_type lex_string_state;
extern const struct lex_state_type lex_word_state;
extern const struct lex_state_type lex_hashtable_state;
static const struct lex_state_type *state_types[] = {
[LEX_STATE_STATEMENT] = &lex_statement_state,
[LEX_STATE_EXPRESSION] = &lex_expression_state,
[LEX_STATE_COMMAND] = &lex_command_state,
[LEX_STATE_ARITHMETIC] = &lex_arithmetic_state,
[LEX_STATE_STRING] = &lex_string_state,
[LEX_STATE_WORD] = &lex_word_state,
[LEX_STATE_HASHTABLE] = &lex_hashtable_state,
};
void set_token_start(struct lex_ctx *ctx)
{
memcpy(&ctx->lex_start, &ctx->lex_cursor, sizeof ctx->lex_cursor);
}
void set_token_end(struct lex_ctx *ctx)
{
memcpy(&ctx->lex_end, &ctx->lex_cursor, sizeof ctx->lex_cursor);
}
static const char *lex_state_type_id_to_string(enum lex_state_type_id id)
{
#define ENUM_STR(v) \
case v: \
return #v
switch (id) {
ENUM_STR(LEX_STATE_STATEMENT);
ENUM_STR(LEX_STATE_COMMAND);
ENUM_STR(LEX_STATE_ARITHMETIC);
ENUM_STR(LEX_STATE_STRING);
ENUM_STR(LEX_STATE_WORD);
ENUM_STR(LEX_STATE_HASHTABLE);
default:
return "<unknown>";
}
#undef ENUM_STR
}
struct lex_state *lex_state_push(
struct lex_ctx *ctx,
enum lex_state_type_id state_type)
enum lex_state_type_id state_type,
enum state_flags flags)
{
struct lex_state *state = malloc(sizeof *state);
if (!state) {
return NULL;
}
#if defined(VERBOSE)
printf("push(%s, 0x%04x)\n",
lex_state_type_id_to_string(state_type),
flags);
#endif
memset(state, 0x0, sizeof *state);
state->s_type = state_types[state_type];
state->s_flags = flags;
fx_queue_push_back(&ctx->lex_state, &state->s_entry);
if (state->s_type->s_begin) {
@@ -114,6 +189,12 @@ void lex_state_pop(struct lex_ctx *ctx)
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
#if defined(VERBOSE)
printf("pop(%s) -> %s\n",
lex_state_type_id_to_string(state->s_type->s_id),
lex_state_type_id_to_string(lex_state_get(ctx)->s_type->s_id));
#endif
if (state->s_type->s_end) {
state->s_type->s_end(ctx);
}
@@ -144,6 +225,12 @@ void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type)
return;
}
#if defined(VERBOSE)
printf("change(%s -> %s)\n",
lex_state_type_id_to_string(state->s_type->s_id),
lex_state_type_id_to_string(type));
#endif
if (state->s_type->s_end) {
state->s_type->s_end(ctx);
}
@@ -173,6 +260,13 @@ fx_string *lex_state_get_tempstr(struct lex_ctx *ctx)
return state->s_tempstr;
}
void lex_state_add_terminator(struct lex_state *state, unsigned int tok)
{
if (state->s_nr_terminators < LEX_STATE_MAX_TERMINATORS) {
state->s_terminators[state->s_nr_terminators++] = tok;
}
}
static struct lex_symbol_node *get_symbol_node(
struct lex_symbol_node *node,
char c)
@@ -250,6 +344,10 @@ static struct lex_symbol_node *build_symbol_tree(void)
enum bshell_status status = BSHELL_SUCCESS;
for (size_t i = 0; i < nr_symbols; i++) {
if (!symbols[i].name) {
continue;
}
status = put_symbol(root, &symbols[i]);
if (status != BSHELL_SUCCESS) {
@@ -261,6 +359,33 @@ static struct lex_symbol_node *build_symbol_tree(void)
return root;
}
static void init_token_enabled_states(const struct lex_state_type *state_type)
{
if (state_type->s_keywords) {
for (size_t i = 0; state_type->s_keywords[i]; i++) {
unsigned int id = state_type->s_keywords[i];
keywords[id - __KW_INDEX_BASE].enabled_states
|= state_type->s_id;
}
}
if (state_type->s_operators) {
for (size_t i = 0; state_type->s_operators[i]; i++) {
unsigned int id = state_type->s_operators[i];
operators[id - __TKOP_INDEX_BASE].enabled_states
|= state_type->s_id;
}
}
if (state_type->s_symbols) {
for (size_t i = 0; state_type->s_symbols[i]; i++) {
unsigned int id = state_type->s_symbols[i];
symbols[id - __SYM_INDEX_BASE].enabled_states
|= state_type->s_id;
}
}
}
enum bshell_status lex_ctx_init(
struct lex_ctx *ctx,
enum lex_flags flags,
@@ -272,9 +397,17 @@ enum bshell_status lex_ctx_init(
ctx->lex_status = BSHELL_SUCCESS;
ctx->lex_buf = fx_stringstream_create();
ctx->lex_sym_tree = build_symbol_tree();
lex_state_push(ctx, LEX_STATE_STATEMENT);
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
ctx->lex_src = src;
ctx->lex_ch = FX_WCHAR_INVALID;
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
init_token_enabled_states(&lex_statement_state);
init_token_enabled_states(&lex_command_state);
init_token_enabled_states(&lex_arithmetic_state);
init_token_enabled_states(&lex_string_state);
init_token_enabled_states(&lex_word_state);
init_token_enabled_states(&lex_hashtable_state);
return BSHELL_SUCCESS;
}
@@ -341,12 +474,18 @@ fx_wchar peek_char_noread(struct lex_ctx *ctx)
static void __advance_char(struct lex_ctx *ctx, bool noread)
{
if (ctx->lex_ch != FX_WCHAR_INVALID) {
ctx->lex_ch = FX_WCHAR_INVALID;
if (ctx->lex_status != BSHELL_SUCCESS) {
return;
}
if (ctx->lex_status != BSHELL_SUCCESS) {
ctx->lex_cursor.c_col++;
if (ctx->lex_ch == '\n') {
ctx->lex_cursor.c_col = 1;
ctx->lex_cursor.c_row++;
}
if (ctx->lex_ch != FX_WCHAR_INVALID) {
ctx->lex_ch = FX_WCHAR_INVALID;
return;
}
@@ -380,7 +519,7 @@ bool convert_word_to_keyword(struct lex_token *tok)
for (size_t i = 0; i < nr_keywords; i++) {
const char *kw_str = keywords[i].name;
if (strcmp(kw_str, tok->tok_str) != 0) {
if (!kw_str || strcmp(kw_str, tok->tok_str) != 0) {
continue;
}
@@ -392,6 +531,22 @@ bool convert_word_to_keyword(struct lex_token *tok)
return false;
}
bool convert_word_to_operator(struct lex_ctx *ctx, struct lex_token *tok)
{
if (!lex_token_has_string_value(tok)) {
return false;
}
enum token_operator op = get_operator_with_string(ctx, tok->tok_str);
if (op == TKOP_NONE) {
return false;
}
lex_token_change_type(tok, TOK_OPERATOR);
tok->tok_operator = op;
return true;
}
static int get_int_base_by_prefix(const char **s)
{
#define CH(x) (tolower(value[x]))
@@ -442,6 +597,10 @@ static size_t get_int_multiplier_by_suffix(const char *suffix)
bool string_is_valid_number(const char *s, long long *out)
{
if (s[0] == '\0') {
return NULL;
}
int base = get_int_base_by_prefix(&s);
char *ep = NULL;
@@ -486,6 +645,29 @@ static struct lex_token *get_next_token(struct lex_ctx *ctx)
void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
{
enqueue_token_with_coordinates(
ctx,
tok,
&ctx->lex_start,
&ctx->lex_end);
}
extern void enqueue_token_with_coordinates(
struct lex_ctx *ctx,
struct lex_token *tok,
const struct char_cell *start,
const struct char_cell *end)
{
if (tok->tok_type == TOK_LINEFEED
&& ctx->lex_prev_token == TOK_LINEFEED) {
lex_token_destroy(tok);
return;
}
tok->tok_start = *start;
tok->tok_end = *end;
ctx->lex_prev_token = tok->tok_type;
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
print_lex_token(tok);
}
@@ -546,6 +728,7 @@ enum bshell_status read_var(
}
fx_string_append_wc(tmp, c);
set_token_end(ctx);
advance_char(ctx);
}
@@ -581,6 +764,7 @@ enum bshell_status read_braced_var(
}
fx_string_append_wc(tmp, c);
set_token_end(ctx);
advance_char(ctx);
}
@@ -599,93 +783,6 @@ enum bshell_status read_braced_var(
return BSHELL_SUCCESS;
}
#if 0
static enum bshell_status read_flag(struct lex_ctx *ctx)
{
fx_string *tmp = get_temp_string(ctx);
bool done = false;
while (!done) {
fx_wchar c = peek_char(ctx);
if (c == FX_WCHAR_INVALID) {
break;
}
if (fx_wchar_is_space(c)) {
break;
}
switch (c) {
case '{':
case '}':
case '(':
case ')':
case ';':
case ',':
case '|':
case '&':
case '$':
done = true;
break;
default:
break;
}
if (done) {
break;
}
fx_string_append_wc(tmp, c);
advance_char(ctx);
}
struct lex_token *tok = NULL;
if (fx_string_get_size(tmp, FX_STRLEN_NORMAL) == 1) {
tok = lex_token_create(TOK_SYMBOL);
tok->tok_symbol = SYM_HYPHEN;
} else {
tok = lex_token_create_with_string(
TOK_FLAG,
fx_string_get_cstr(tmp));
}
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
#if 0
if (convert_word_to_int(tok)) {
tok->tok_int *= -1;
struct lex_token *prefix = lex_token_create(TOK_SYMBOL);
prefix->tok_symbol = SYM_HYPHEN;
enqueue_token(ctx, prefix);
}
#endif
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status read_interpolation_marker(struct lex_ctx *ctx)
{
enum bshell_status status = BSHELL_SUCCESS;
struct lex_state *state = lex_state_get(ctx);
struct lex_token *tok = NULL;
if (state->s_type != LEX_STATE_STRING) {
return BSHELL_ERR_INTERNAL_FAILURE;
}
/* start of a new interpolation */
if (!lex_state_push(ctx, LEX_STATE_STATEMENT)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
}
#endif
enum bshell_status read_literal_string(
struct lex_ctx *ctx,
struct lex_token **out)
@@ -703,6 +800,7 @@ enum bshell_status read_literal_string(
if (c == '\'') {
fail = false;
done = true;
set_token_end(ctx);
advance_char(ctx);
break;
}
@@ -738,39 +836,25 @@ enum bshell_status read_line_comment(struct lex_ctx *lex)
return BSHELL_SUCCESS;
}
#if 0
enum bshell_status read_dquote_marker(struct lex_ctx *ctx)
{
enum bshell_status status = BSHELL_SUCCESS;
struct lex_state *state = lex_state_get(ctx);
struct lex_token *tok = NULL;
if (state->s_type == LEX_STATE_STRING) {
/* already within an fstring */
lex_state_pop(ctx);
tok = lex_token_create(TOK_STR_END);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
/* start of a new fstring */
tok = lex_token_create(TOK_STR_START);
enqueue_token(ctx, tok);
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
}
#endif
enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
enum bshell_status read_word(
struct lex_ctx *ctx,
enum read_flags flags,
struct lex_token **out)
{
fx_string *tmp = get_temp_string(ctx);
bool word_is_number = false;
if (!(flags & READ_NO_SET_TOKEN_START)) {
set_token_start(ctx);
}
if (flags & READ_APPEND_HYPHEN) {
fx_string_append_c(tmp, '-');
}
bool number_recog = !(flags & READ_NO_NUMBER_RECOGNITION);
enum token_operator op = TKOP_NONE;
bool done = false;
while (!done) {
fx_wchar c = peek_char(ctx);
@@ -783,39 +867,32 @@ enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
break;
}
if (word_is_number && char_can_begin_symbol(ctx, c)) {
done = true;
break;
}
if (char_can_begin_symbol(ctx, c)) {
done = true;
break;
}
switch (c) {
case '{':
case '}':
case '(':
case ')':
case ';':
case ',':
case '|':
case '&':
case '$':
const char *s = fx_string_get_cstr(tmp);
if (number_recog && string_is_valid_number(s, NULL)) {
if (char_can_begin_symbol_in_state(
ctx,
c,
LEX_STATE_ARITHMETIC)) {
done = true;
break;
default:
break;
}
}
if (done) {
if (!fx_wchar_is_alpha(c)) {
op = get_operator_with_string(ctx, s);
if (op != TKOP_NONE) {
done = true;
break;
}
}
fx_string_append_wc(tmp, c);
word_is_number
= string_is_valid_number(fx_string_get_cstr(tmp), NULL);
set_token_end(ctx);
advance_char(ctx);
}
@@ -830,12 +907,6 @@ enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
struct lex_token *tok = lex_token_create_with_string(
TOK_WORD,
fx_string_get_cstr(tmp));
#if 0
bool converted = convert_word_to_keyword(tok);
if (!converted) {
converted = convert_word_to_int(tok);
}
#endif
*out = tok;
return BSHELL_SUCCESS;
@@ -846,6 +917,7 @@ enum bshell_status read_symbol(
const struct lex_token_def **out)
{
struct lex_state *state = lex_state_get(ctx);
set_token_start(ctx);
struct lex_symbol_node *node = ctx->lex_sym_tree;
char prev = 0;
@@ -858,12 +930,15 @@ enum bshell_status read_symbol(
struct lex_symbol_node *next = get_symbol_node(node, c);
if (!next
|| !(next->s_def->enabled_states & state->s_type->s_id)) {
|| (next->s_def
&& !(next->s_def->enabled_states
& state->s_type->s_id))) {
prev = c;
break;
}
node = next;
set_token_end(ctx);
advance_char(ctx);
prev = c;
}
@@ -872,40 +947,6 @@ enum bshell_status read_symbol(
return BSHELL_ERR_BAD_SYNTAX;
}
#if 0
struct lex_token *tok = NULL;
switch (node->s_def->id) {
case SYM_SQUOTE:
return read_literal_string(ctx);
case SYM_DQUOTE:
return read_dquote_marker(ctx);
case SYM_DOLLAR_LEFT_PAREN:
push_symbol(ctx, SYM_DOLLAR_LEFT_PAREN);
if (state->s_type == LEX_STATE_STRING) {
lex_state_push(ctx, LEX_STATE_STRING);
}
break;
case SYM_DOLLAR_LEFT_BRACE:
return read_braced_var(ctx, TOK_VAR);
case SYM_HASH:
return read_line_comment(ctx);
case SYM_LEFT_PAREN:
push_symbol(ctx, SYM_LEFT_PAREN);
lex_state_push(ctx, LEX_STATE_EXPRESSION);
break;
case SYM_RIGHT_PAREN:
push_symbol(ctx, SYM_RIGHT_PAREN);
lex_state_pop(ctx);
break;
case SYM_DOLLAR:
return read_var(ctx, TOK_VAR);
case SYM_AT:
return read_var(ctx, TOK_VAR_SPLAT);
default:
push_symbol(ctx, node->s_def->id);
break;
}
#endif
*out = node->s_def;
return BSHELL_SUCCESS;
}
@@ -916,6 +957,10 @@ bool char_can_begin_symbol_in_state(
enum lex_state_type_id state_type)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (!symbols[i].name) {
continue;
}
if (symbols[i].name[0] != c) {
continue;
}
@@ -934,6 +979,243 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
return char_can_begin_symbol_in_state(ctx, c, state->s_type->s_id);
}
bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (!symbols[i].name) {
continue;
}
if (symbols[i].name[0] != c) {
continue;
}
return (symbols[i].flags & flags) == flags;
}
return false;
}
bool keyword_has_flags(
struct lex_ctx *ctx,
enum token_keyword kw,
enum lex_token_flags flags)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (keywords[i].id == kw) {
return (keywords[i].flags & flags) == flags;
}
}
return false;
}
enum lex_token_flags keyword_get_flags(
struct lex_ctx *ctx,
enum token_keyword kw)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (keywords[i].id == kw) {
return keywords[i].flags;
}
}
return false;
}
bool symbol_has_flags(
struct lex_ctx *ctx,
enum token_symbol sym,
enum lex_token_flags flags)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (symbols[i].id == sym) {
return (symbols[i].flags & flags) == flags;
}
}
return false;
}
enum lex_token_flags symbol_get_flags(
struct lex_ctx *ctx,
enum token_symbol sym)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (symbols[i].id == sym) {
return symbols[i].flags;
}
}
return false;
}
enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
{
struct lex_state *state = lex_state_get(ctx);
for (size_t i = 0; i < nr_operators; i++) {
const char *op_str = operators[i].name;
if (!op_str || strcmp(op_str, s) != 0) {
continue;
}
if (!(operators[i].enabled_states & state->s_type->s_id)) {
continue;
}
return operators[i].id;
}
return false;
}
int compare_token_types(unsigned int a, unsigned int b)
{
if (a == b) {
return 2;
}
#define BETWEEN(v, lo, hi) ((v) >= (lo) && (v) <= (hi))
enum token_type a_type = TOK_NONE, b_type = TOK_NONE;
if (BETWEEN(a, __KW_INDEX_BASE, __KW_INDEX_LIMIT)) {
a_type = TOK_KEYWORD;
} else if (BETWEEN(a, __TKOP_INDEX_BASE, __TKOP_INDEX_LIMIT)) {
a_type = TOK_OPERATOR;
} else if (BETWEEN(a, __SYM_INDEX_BASE, __SYM_INDEX_LIMIT)) {
a_type = TOK_SYMBOL;
} else {
a_type = a;
}
if (BETWEEN(b, __KW_INDEX_BASE, __KW_INDEX_LIMIT)) {
b_type = TOK_KEYWORD;
} else if (BETWEEN(b, __TKOP_INDEX_BASE, __TKOP_INDEX_LIMIT)) {
b_type = TOK_OPERATOR;
} else if (BETWEEN(b, __SYM_INDEX_BASE, __SYM_INDEX_LIMIT)) {
b_type = TOK_SYMBOL;
} else {
b_type = b;
}
#undef BETWEEN
int result = 0;
if (a_type == b_type) {
if (a != a_type && b != b_type) {
result = 0;
} else {
result = a == b ? 2 : 1;
}
}
if (result < 0) {
result = 0;
}
return result;
}
static bool do_lex_state_transition(
struct lex_ctx *ctx,
unsigned int token,
bool recursive)
{
struct lex_state *state = lex_state_get(ctx);
enum link_flags required_flags = 0;
if (recursive) {
required_flags |= LINK_ALLOW_RECURSION;
}
if (!recursive) {
for (unsigned int i = 0; i < state->s_nr_terminators; i++) {
if (state->s_terminators[i] == token) {
lex_state_pop(ctx);
return true;
}
}
}
const struct lex_state_link *table = state->s_type->s_links;
if (!table) {
return false;
}
#define MAX_MATCHES 8
const struct lex_state_link *best_matches[MAX_MATCHES] = {0};
unsigned int match_count = 0;
int best_score = 0;
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
int score = compare_token_types(table[i].l_token, token);
if ((table[i].l_flags & required_flags) != required_flags) {
score = 0;
}
if (score == 0) {
continue;
}
assert(match_count < MAX_MATCHES
|| "lex state has too many matches");
if (score == best_score) {
best_matches[match_count++] = &table[i];
} else if (score > best_score) {
match_count = 0;
best_matches[match_count++] = &table[i];
best_score = score;
}
}
#undef MAX_MATCHES
if (!match_count) {
return false;
}
bool result = false;
for (unsigned int i = 0; i < match_count; i++) {
const struct lex_state_link *link = best_matches[i];
switch (link->l_type) {
case LEX_STATE_LINK_POP:
lex_state_pop(ctx);
result = true;
break;
case LEX_STATE_LINK_PUSH: {
struct lex_state *state = lex_state_push(
ctx,
link->l_target,
link->l_target_flags);
for (unsigned int i = 0; link->l_terminators[i]; i++) {
lex_state_add_terminator(
state,
link->l_terminators[i]);
}
result = true;
break;
}
case LEX_STATE_LINK_CHANGE:
lex_state_change(ctx, link->l_target);
result = true;
break;
default:
break;
}
}
return result;
}
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
{
bool cont = false;
bool recursive = false;
do {
cont = do_lex_state_transition(ctx, token, recursive);
recursive = true;
} while (cont);
}
static enum bshell_status read_string_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
+136 -62
View File
@@ -1,5 +1,45 @@
#include "lex-internal.h"
static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = TOK_OPERATOR;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
@@ -9,8 +49,14 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return statement_hyphen(ctx);
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
@@ -21,17 +67,7 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
@@ -40,10 +76,6 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
enqueue_token(ctx, tok);
return status;
case SYM_AT:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
@@ -52,27 +84,11 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
@@ -80,50 +96,29 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
}
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_LEFT_BRACE:
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
default:
break;
}
if (sym->enabled_states & LEX_STATE_COMMAND) {
lex_state_change(ctx, LEX_STATE_COMMAND);
} else if (sym->enabled_states & LEX_STATE_ARITHMETIC) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
}
return BSHELL_SUCCESS;
}
static enum bshell_status statement_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool converted = convert_word_to_keyword(word);
if (!converted) {
converted = convert_word_to_int(word);
struct lex_state *state = lex_state_get(ctx);
bool enable_keywords = !(state->s_flags & STATEMENT_F_DISABLE_KEYWORDS);
unsigned int token = TOK_WORD;
if (enable_keywords && convert_word_to_keyword(word)) {
token = word->tok_keyword;
} else if (convert_word_to_int(word)) {
token = TOK_INT;
}
if (converted) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
} else {
lex_state_change(ctx, LEX_STATE_COMMAND);
}
handle_lex_state_transition(ctx, token);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
@@ -134,11 +129,13 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
@@ -146,6 +143,7 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
@@ -153,10 +151,86 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
return statement_symbol(ctx);
}
if (char_has_flags(ctx, c, LEX_TOKEN_UNARY_ARITHMETIC)) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return BSHELL_SUCCESS;
}
return statement_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
/* arithmetic tokens */
LINK_CHANGE(TOK_KEYWORD, LEX_STATE_ARITHMETIC),
LINK_CHANGE(TOK_INT, LEX_STATE_ARITHMETIC),
LINK_PUSH(SYM_DOLLAR, LEX_STATE_ARITHMETIC, 0),
LINK_PUSH(SYM_DOLLAR_LEFT_BRACE, LEX_STATE_ARITHMETIC, 0),
LINK_CHANGE(SYM_AT_LEFT_BRACE, LEX_STATE_ARITHMETIC),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_PUSH(SYM_AT, LEX_STATE_ARITHMETIC, 0),
LINK_CHANGE(SYM_LEFT_PAREN, LEX_STATE_ARITHMETIC),
LINK_CHANGE(SYM_BANG, LEX_STATE_ARITHMETIC),
LINK_PUSH_WITH_TERM(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS,
SYM_RIGHT_PAREN),
/* statement tokens */
LINK_PUSH(SYM_LEFT_BRACE, LEX_STATE_STATEMENT, 0),
LINK_PUSH_WITH_TERM(
SYM_DOLLAR_LEFT_PAREN,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_PAREN),
/* command tokens */
LINK_CHANGE(KW_FUNC, LEX_STATE_COMMAND),
LINK_CHANGE(SYM_AMPERSAND, LEX_STATE_COMMAND),
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
LINK_END,
};
static const unsigned int keywords[] = {
KW_FUNC,
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_BNOT,
TKOP_NOT,
TKOP_NONE,
};
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_BANG,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_statement_state = {
.s_id = LEX_STATE_STATEMENT,
.s_pump_token = statement_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};
+22 -17
View File
@@ -9,20 +9,15 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DOLLAR_LEFT_PAREN:
status = push_symbol(ctx, sym->id);
if (status != BSHELL_SUCCESS) {
return status;
}
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_DQUOTE:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
return push_symbol(ctx, sym->id);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
@@ -45,14 +40,6 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
@@ -66,6 +53,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
fx_string *temp = lex_state_get_tempstr(ctx);
set_token_start(ctx);
fx_string_clear(temp);
while (1) {
@@ -80,6 +68,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
}
fx_string_append_wc(temp, c);
set_token_end(ctx);
advance_char(ctx);
}
@@ -128,9 +117,25 @@ static enum bshell_status string_pump_token(struct lex_ctx *ctx)
return string_content(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_DQUOTE),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_DQUOTE,
SYM_NONE,
};
const struct lex_state_type lex_string_state = {
.s_id = LEX_STATE_STRING,
.s_begin = string_begin,
.s_end = string_end,
.s_pump_token = string_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+162
View File
@@ -0,0 +1,162 @@
#include "lex-internal.h"
static enum bshell_status word_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DOLLAR_LEFT_PAREN:
status = push_symbol(ctx, sym->id);
if (status != BSHELL_SUCCESS) {
return status;
}
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
status = push_symbol(ctx, sym->id);
if (status != BSHELL_SUCCESS) {
return status;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
return BSHELL_ERR_BAD_SYNTAX;
}
static enum bshell_status word_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
fx_string *temp = lex_state_get_tempstr(ctx);
set_token_start(ctx);
fx_string_clear(temp);
while (1) {
c = peek_char(ctx);
if (c == FX_WCHAR_INVALID) {
/* EOF without end of word */
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
}
if (fx_wchar_is_space(c)) {
break;
}
if (char_can_begin_symbol(ctx, c)) {
break;
}
fx_string_append_wc(temp, c);
set_token_end(ctx);
advance_char(ctx);
}
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
return BSHELL_SUCCESS;
}
struct lex_token *tok = lex_token_create_with_string(
TOK_WORD,
fx_string_get_cstr(temp));
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status word_begin(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_WORD_START);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token_with_coordinates(
ctx,
tok,
&ctx->lex_start,
&ctx->lex_start);
return BSHELL_SUCCESS;
}
static enum bshell_status word_end(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_WORD_END);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token_with_coordinates(ctx, tok, &ctx->lex_end, &ctx->lex_end);
return BSHELL_SUCCESS;
}
static enum bshell_status word_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (fx_wchar_is_space(c)) {
lex_state_pop(ctx);
return BSHELL_SUCCESS;
}
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
lex_state_pop(ctx);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return word_symbol(ctx);
}
return word_content(ctx);
}
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_word_state = {
.s_id = LEX_STATE_WORD,
.s_begin = word_begin,
.s_end = word_end,
.s_pump_token = word_pump_token,
.s_symbols = symbols,
};
+25
View File
@@ -1,6 +1,7 @@
#include "parse.h"
#include "../ast/ast.h"
#include "../debug.h"
#include "lex.h"
#include "syntax.h"
#include "token.h"
@@ -23,8 +24,32 @@ void parse_ctx_cleanup(struct parse_ctx *ctx)
struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx)
{
parse_symbol(ctx, SYM_SEMICOLON);
parse_linefeed(ctx);
struct ast_node *result = NULL;
bool ok = parse_statement(ctx, &result);
return ok ? result : NULL;
}
void report_error(struct parse_ctx *ctx, const char *format, ...)
{
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
fprintf(stderr, "PARSE: ");
va_list arg;
va_start(arg, format);
vfprintf(stderr, format, arg);
va_end(arg);
fprintf(stderr, "\n");
struct lex_token *tok = peek_token(ctx);
fprintf(stderr, " peek_token = ");
if (tok) {
print_lex_token(tok);
} else {
fprintf(stderr, " EOF\n");
}
}
+26 -8
View File
@@ -2,15 +2,15 @@
#define PARSE_SYNTAX_H_
#include "../ast/ast.h"
#include "../operator.h"
#include "lex.h"
#include "parse.h"
#include "token.h"
#include <stdbool.h>
#include <stdio.h>
enum parse_operand_flags {
OPERAND_BASIC = 0x01u,
};
extern void report_error(struct parse_ctx *ctx, const char *format, ...);
extern struct lex_token *peek_token(struct parse_ctx *ctx);
extern enum token_type peek_token_type(struct parse_ctx *ctx);
@@ -28,21 +28,39 @@ extern bool peek_int(struct parse_ctx *ctx);
extern bool parse_linefeed(struct parse_ctx *ctx);
extern bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym);
extern bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw);
extern bool parse_int(struct parse_ctx *ctx, long long *out);
extern bool parse_word(struct parse_ctx *ctx, struct lex_token **out);
extern bool parse_var(struct parse_ctx *ctx, struct lex_token **out);
extern bool parse_flag(struct parse_ctx *ctx, struct lex_token **out);
extern bool peek_arith_expr(struct parse_ctx *ctx);
extern bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_operand(
extern bool parse_arith_value(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_arith_expr(
struct parse_ctx *ctx,
enum parse_operand_flags flags,
enum operator_precedence minimum_precedence,
struct ast_node **out);
extern bool parse_statement(struct parse_ctx *ctx, struct ast_node **out);
extern bool peek_keyword_expr(struct parse_ctx *ctx);
extern bool parse_keyword_expr(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_if(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_func(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_fstring(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_block(struct parse_ctx *ctx, struct ast_node **out);
extern bool peek_command(struct parse_ctx *ctx);
extern bool parse_pipeline(
struct parse_ctx *ctx,
struct ast_node *first_item,
struct ast_node **out);
extern bool parse_command(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_expr(struct parse_ctx *ctx, struct ast_node **out);
extern bool peek_statement(struct parse_ctx *ctx);
extern bool parse_statement(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_statement_list(struct parse_ctx *ctx, struct ast_node **out);
#endif
+883 -8
View File
@@ -1,27 +1,902 @@
#include "../../debug.h"
#include "../../operator.h"
#include "../syntax.h"
#include <fx/queue.h>
enum expr_component {
EXPR_C_NONE = 0,
EXPR_C_OPERAND,
EXPR_C_BINARY_OP,
EXPR_C_UNARY_OP,
};
struct expr_parse_ctx {
fx_queue expr_operator_stack, expr_out_queue;
enum expr_component expr_prev;
unsigned int expr_prev_symbol;
enum operator_precedence expr_minimum_precedence;
bool expr_done, expr_fail;
};
static bool op_node_is_complete(struct op_ast_node *node)
{
if (!node->n_op) {
return false;
}
switch (node->n_op->op_arity) {
case OPA_UNARY:
return node->n_right != NULL;
case OPA_BINARY:
return (node->n_left != NULL && node->n_right != NULL);
default:
return false;
}
}
static bool finalise_expr(
struct expr_parse_ctx *ctx,
struct ast_node **out,
enum operator_precedence minimum_precedence)
{
fx_queue_entry *entry = NULL;
while (true) {
entry = fx_queue_pop_back(&ctx->expr_operator_stack);
if (!entry) {
break;
}
struct op_ast_node *node
= fx_unbox(struct op_ast_node, entry, n_base.n_entry);
if (!node) {
/* this should never happen */
return false;
}
const struct operator_info *op = node->n_op;
/* if we aren't processing operators below a certain precedence
* then leave them on the stack and stop here. */
if (op->op_precedence < minimum_precedence) {
fx_queue_push_back(&ctx->expr_operator_stack, entry);
break;
}
fx_queue_push_back(&ctx->expr_out_queue, entry);
}
fx_queue q = FX_QUEUE_INIT;
fx_queue_entry *tmp = NULL;
entry = fx_queue_first(&ctx->expr_out_queue);
int i = 0;
while (entry) {
struct ast_node *item
= fx_unbox(struct ast_node, entry, n_entry);
fx_queue_entry *next = fx_queue_next(entry);
fx_queue_delete(&ctx->expr_out_queue, entry);
/* if the node is an operand, just push it to a
* temporary queue and come back to it later. */
if (item->n_type != AST_OP) {
/* operand */
fx_queue_push_back(&q, &item->n_entry);
goto next;
}
const struct operator_info *op = NULL;
struct op_ast_node *op_node = (struct op_ast_node *)item;
/* if an operator node is already complete (i.e. it
* already has all the operands it needs, it can be
* pushed to the operand queue as-is */
if (op_node_is_complete(op_node)) {
fx_queue_push_back(&q, &item->n_entry);
goto next;
}
/* otherwise, pop the relevant operands from the operand
* queue... */
op = op_node->n_op;
tmp = fx_queue_pop_back(&q);
op_node->n_right = fx_unbox(struct ast_node, tmp, n_entry);
if (op_node->n_right) {
op_node->n_right->n_parent = (struct ast_node *)op_node;
#if 0
ast_node_extend_bounds_recursive(
(struct ivy_ast_node *)op_node,
(struct ivy_ast_node *)tmp);
#endif
}
if (op->op_arity == OPA_BINARY) {
tmp = fx_queue_pop_back(&q);
op_node->n_left
= fx_unbox(struct ast_node, tmp, n_entry);
if (op_node->n_left) {
op_node->n_left->n_parent
= (struct ast_node *)op_node;
#if 0
ast_node_extend_bounds_recursive(
(struct ivy_ast_node *)op_node,
(struct ivy_ast_node *)tmp);
#endif
}
}
/* ...and push the newly-completed operator node to the
* operand queue */
fx_queue_push_back(&q, &op_node->n_base.n_entry);
next:
entry = next;
}
#if 0
debug_printf("** after hierarchisation:\n");
print_expr_queues(state);
#endif
/* if we are not processing operators below a certain precedence,
* i.e. when determining the recipient of a keyword-message), these
* operators will still be on the parser state's operator stack, but
* their operands have just been moved to the temporary operand stack
* used above. move them back to the parser state's output queue here
* so they can be used later. */
entry = fx_queue_first(&ctx->expr_operator_stack);
while (entry) {
fx_queue_entry *entry2 = fx_queue_pop_front(&q);
if (!entry2) {
return false;
}
fx_queue_push_back(&ctx->expr_out_queue, entry2);
entry = fx_queue_next(entry);
}
#if 0
debug_printf("** after de-linearisation:\n");
print_expr_queues(state);
ivy_ast_node_print(*expr_tree);
debug_printf("------\n");
#endif
/* the final node remaining on the temp operand stack is the
* root node of the new expression tree */
tmp = fx_queue_pop_back(&q);
*out = fx_unbox(struct ast_node, tmp, n_entry);
return true;
}
bool peek_arith_expr(struct parse_ctx *ctx)
{
switch (peek_token_type(ctx)) {
case TOK_SYMBOL:
switch (peek_unknown_symbol(ctx)) {
case SYM_PLUS:
case SYM_HYPHEN:
return true;
default:
return false;
}
return operator_get_by_token(peek_unknown_symbol(ctx));
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_VAR:
case TOK_STR_START:
case TOK_OPERATOR:
return true;
default:
return false;
}
}
bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out)
static bool parse_subexpr(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(`");
}
struct ast_node *v = NULL;
if (!parse_expr(ctx, &v)) {
report_error(ctx, "error while parsing parenthesis expression");
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected `)` after parenthesis expression");
return false;
}
*out = v;
return true;
}
static bool parse_stmt_block(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_DOLLAR_LEFT_PAREN)) {
report_error(ctx, "expected `$(`");
return false;
}
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
*out = ast_node_create(AST_NULL);
return true;
}
struct ast_node *v = NULL;
if (!parse_statement_list(ctx, &v)) {
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected ')' after subexpression");
ast_node_destroy(v);
return false;
}
*out = v;
return true;
}
static bool parse_hashtable(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_AT_LEFT_BRACE)) {
report_error(ctx, "expected `@{`");
return false;
}
parse_linefeed(ctx);
struct hashtable_ast_node *table
= (struct hashtable_ast_node *)ast_node_create(AST_HASHTABLE);
if (!table) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
size_t nr_items = 0;
bool ok = true;
while (ok) {
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
parse_linefeed(ctx);
struct hashtable_item_ast_node *item
= (struct hashtable_item_ast_node *)ast_node_create(
AST_HASHTABLE_ITEM);
struct lex_token *tok = NULL;
if (parse_word(ctx, &tok)) {
struct string_ast_node *v
= (struct string_ast_node *)ast_node_create(
AST_STRING);
v->n_value = tok;
item->n_key = (struct ast_node *)v;
} else if (!parse_arith_value(ctx, &item->n_key)) {
report_error(ctx, "failed to parse hashtable key");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
if (!parse_symbol(ctx, SYM_EQUAL)) {
report_error(ctx, "expected `=` after hashtable key");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
if (!parse_expr(ctx, &item->n_value)) {
report_error(ctx, "failed to parse hashtable value");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
fx_queue_push_back(&table->n_items, &item->n_base.n_entry);
nr_items++;
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
if (!parse_linefeed(ctx) && !parse_symbol(ctx, SYM_SEMICOLON)) {
report_error(
ctx,
"expected `;`, `}`, or linefeed after "
"hashtable value");
ok = false;
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)table);
return false;
}
*out = (struct ast_node *)table;
return true;
}
static bool parse_array(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_AT_LEFT_PAREN)) {
report_error(ctx, "expected `@(`");
return false;
}
struct array_ast_node *array
= (struct array_ast_node *)ast_node_create(AST_ARRAY);
if (!array) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
size_t nr_items = 0;
bool ok = true;
while (ok) {
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
break;
}
if (nr_items && !parse_symbol(ctx, SYM_COMMA)) {
report_error(
ctx,
"expected `,` or `)` after array value");
ok = false;
}
struct ast_node *item = NULL;
if (!parse_arith_value(ctx, &item)) {
report_error(ctx, "failed to parse array item");
ok = false;
break;
}
fx_queue_push_back(&array->n_items, &item->n_entry);
nr_items++;
}
if (!ok) {
ast_node_destroy((struct ast_node *)array);
return false;
}
*out = (struct ast_node *)array;
return true;
}
bool parse_fstring(struct parse_ctx *ctx, struct ast_node **out)
{
if (peek_token_type(ctx) != TOK_STR_START) {
return false;
}
discard_token(ctx);
struct fstring_ast_node *fstring
= (struct fstring_ast_node *)ast_node_create(AST_FSTRING);
if (!fstring) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
bool ok = true;
while (ok) {
if (peek_token_type(ctx) == TOK_STR_END) {
discard_token(ctx);
break;
}
struct ast_node *item = NULL;
if (!parse_arith_value(ctx, &item)) {
ok = false;
break;
}
fx_queue_push_back(&fstring->n_elements, &item->n_entry);
}
if (!ok) {
ast_node_destroy((struct ast_node *)fstring);
fstring = NULL;
}
*out = (struct ast_node *)fstring;
return ok;
}
bool parse_arith_value(struct parse_ctx *ctx, struct ast_node **out)
{
struct lex_token *tok = peek_token(ctx);
switch (tok->tok_type) {
case TOK_INT: {
struct int_ast_node *v
= (struct int_ast_node *)ast_node_create(AST_INT);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_DOUBLE: {
struct double_ast_node *v
= (struct double_ast_node *)ast_node_create(AST_DOUBLE);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_STRING: {
struct string_ast_node *v
= (struct string_ast_node *)ast_node_create(AST_STRING);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_VAR: {
struct var_ast_node *v
= (struct var_ast_node *)ast_node_create(AST_VAR);
v->n_ident = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_STR_START:
return parse_fstring(ctx, out);
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_LEFT_PAREN:
return parse_subexpr(ctx, out);
case SYM_DOLLAR_LEFT_PAREN:
return parse_stmt_block(ctx, out);
case SYM_AT_LEFT_BRACE:
return parse_hashtable(ctx, out);
case SYM_AT_LEFT_PAREN:
return parse_array(ctx, out);
case SYM_LEFT_BRACE:
return parse_block(ctx, out);
default:
report_error(ctx, "token is not a valid operand");
return false;
}
break;
default:
report_error(ctx, "token is not a valid operand");
return false;
}
}
static bool parse_operand(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
if (expr->expr_prev == EXPR_C_OPERAND) {
report_error(ctx, "encountered two operands in a row");
return false;
}
expr->expr_prev = EXPR_C_OPERAND;
struct ast_node *v = NULL;
if (!parse_arith_value(ctx, &v)) {
return false;
}
fx_queue_push_back(&expr->expr_out_queue, &v->n_entry);
return true;
}
void arith_push_operator(struct expr_parse_ctx *state, struct op_ast_node *node)
{
const struct operator_info *op = node->n_op;
if (!op) {
return;
}
while (true) {
fx_queue_entry *top
= fx_queue_last(&state->expr_operator_stack);
if (!top) {
break;
}
struct ast_node *top_node
= fx_unbox(struct ast_node, top, n_entry);
const struct operator_info *top_op = NULL;
switch (top_node->n_type) {
case AST_OP: {
struct op_ast_node *op_node
= (struct op_ast_node *)top_node;
top_op = op_node->n_op;
break;
}
default:
return;
}
if (top_op->op_precedence < op->op_precedence
|| (top_op->op_precedence == op->op_precedence
&& op->op_associativity != ASSOCIATIVITY_LEFT)) {
break;
}
fx_queue_delete(&state->expr_operator_stack, top);
fx_queue_push_back(&state->expr_out_queue, top);
}
fx_queue_push_back(&state->expr_operator_stack, &node->n_base.n_entry);
}
static bool parse_unary_operator(
struct parse_ctx *ctx,
struct expr_parse_ctx *expr)
{
struct lex_token *tok = peek_token(ctx);
const struct operator_info *op = NULL;
switch (tok->tok_type) {
case TOK_SYMBOL:
op = operator_get_by_token(tok->tok_symbol);
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
case TKOP_SPLIT:
op = operator_get_by_id(OP_USPLIT);
break;
case TKOP_JOIN:
op = operator_get_by_id(OP_USPLIT);
break;
default:
op = operator_get_by_token(tok->tok_operator);
break;
}
break;
default:
break;
}
if (expr->expr_prev == EXPR_C_OPERAND
&& op->op_location == OPL_PREFIX) {
report_error(
ctx,
"unexpected operand before unary "
"operator");
return false;
}
if (!op) {
report_error(ctx, "unknown unary operator");
return false;
}
if (op->op_precedence < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
expr->expr_prev = EXPR_C_BINARY_OP;
struct op_ast_node *op_node
= (struct op_ast_node *)ast_node_create(AST_OP);
if (!op_node) {
return false;
}
op_node->n_op = op;
discard_token(ctx);
arith_push_operator(expr, op_node);
return true;
}
static bool parse_binary_operator(
struct parse_ctx *ctx,
struct expr_parse_ctx *expr)
{
struct lex_token *tok = peek_token(ctx);
const struct operator_info *op = NULL;
switch (tok->tok_type) {
case TOK_SYMBOL:
op = operator_get_by_token(tok->tok_symbol);
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
case TKOP_SPLIT:
op = operator_get_by_id(OP_BSPLIT);
break;
case TKOP_JOIN:
op = operator_get_by_id(OP_BJOIN);
break;
default:
op = operator_get_by_token(tok->tok_operator);
break;
}
default:
break;
}
if (!op) {
report_error(ctx, "unknown binary operator");
return false;
}
if (op->op_precedence < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
if (expr->expr_prev != EXPR_C_OPERAND) {
switch (op->op_id) {
case OP_PAREN:
break;
default:
report_error(
ctx,
"expected operand before binary "
"operator");
return false;
}
}
expr->expr_prev = EXPR_C_BINARY_OP;
struct op_ast_node *op_node
= (struct op_ast_node *)ast_node_create(AST_OP);
if (!op_node) {
return false;
}
op_node->n_op = op;
discard_token(ctx);
arith_push_operator(expr, op_node);
return true;
}
static bool parse_call(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
return false;
}
static bool parse_comma(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
if (PRECEDENCE_ARRAY < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
struct ast_node *item = NULL;
if (!finalise_expr(expr, &item, PRECEDENCE_ARRAY)) {
report_error(ctx, "failed to collect first array item.");
return false;
}
struct array_ast_node *array
= (struct array_ast_node *)ast_node_create(AST_ARRAY);
if (!array) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(item);
return false;
}
if (item) {
fx_queue_push_back(&array->n_items, &item->n_entry);
}
while (1) {
if (!parse_symbol(ctx, SYM_COMMA)) {
break;
}
if (!parse_arith_expr(ctx, PRECEDENCE_ARRAY + 1, &item)) {
report_error(ctx, "failed to parse array item.");
ast_node_destroy((struct ast_node *)array);
return false;
}
fx_queue_push_back(&array->n_items, &item->n_entry);
}
fx_queue_push_back(&expr->expr_out_queue, &array->n_base.n_entry);
expr->expr_prev = EXPR_C_OPERAND;
return true;
}
static void dump_expr_ctx(struct expr_parse_ctx *expr)
{
printf("op stack:\n");
fx_queue_entry *entry = fx_queue_first(&expr->expr_operator_stack);
while (entry) {
struct ast_node *node
= fx_unbox(struct ast_node, entry, n_entry);
print_ast_node(node);
entry = fx_queue_next(entry);
}
printf("out queue:\n");
entry = fx_queue_first(&expr->expr_out_queue);
while (entry) {
struct ast_node *node
= fx_unbox(struct ast_node, entry, n_entry);
print_ast_node(node);
entry = fx_queue_next(entry);
}
}
static bool can_use_command(struct expr_parse_ctx *ctx)
{
switch (ctx->expr_prev_symbol) {
case TOK_NONE:
case SYM_EQUAL:
case SYM_PLUS_EQUAL:
case SYM_HYPHEN_EQUAL:
case SYM_ASTERISK_EQUAL:
case SYM_FORWARD_SLASH_EQUAL:
case SYM_PERCENT_EQUAL:
return true;
default:
return false;
}
}
bool parse_arith_expr(
struct parse_ctx *ctx,
enum operator_precedence minimum_precedence,
struct ast_node **out)
{
struct expr_parse_ctx expr = {
.expr_minimum_precedence = minimum_precedence,
};
while (!expr.expr_fail && !expr.expr_done) {
struct lex_token *tok = peek_token(ctx);
if (!tok) {
break;
}
switch (tok->tok_type) {
case TOK_LINEFEED:
expr.expr_done = true;
break;
case TOK_WORD: {
if (!can_use_command(&expr)) {
report_error(
ctx,
"expected a value expression");
expr.expr_fail = true;
break;
}
struct ast_node *value = NULL;
if (!parse_command(ctx, &value)) {
expr.expr_fail = true;
break;
}
fx_queue_push_back(
&expr.expr_out_queue,
&value->n_entry);
break;
}
case TOK_VAR:
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_STR_START:
expr.expr_fail = !parse_operand(ctx, &expr);
expr.expr_prev_symbol = tok->tok_type;
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
/* these two are special cases, as they are both
* unary AND binary operators */
case TKOP_SPLIT:
case TKOP_JOIN:
if (expr.expr_prev == EXPR_C_OPERAND) {
expr.expr_fail = !parse_binary_operator(
ctx,
&expr);
} else {
expr.expr_fail = !parse_unary_operator(
ctx,
&expr);
}
break;
case TKOP_BNOT:
case TKOP_NOT:
expr.expr_fail
= !parse_unary_operator(ctx, &expr);
break;
default:
expr.expr_fail
= !parse_binary_operator(ctx, &expr);
break;
}
expr.expr_prev_symbol = tok->tok_operator;
break;
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_SEMICOLON:
case SYM_AMPERSAND:
case SYM_PIPE:
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
case SYM_RIGHT_BRACKET:
expr.expr_done = true;
break;
case SYM_COMMA:
expr.expr_fail = !parse_comma(ctx, &expr);
break;
case SYM_LEFT_PAREN: {
if (expr.expr_prev == EXPR_C_OPERAND) {
return parse_call(ctx, &expr);
}
struct ast_node *v = NULL;
expr.expr_fail = !parse_subexpr(ctx, &v);
if (expr.expr_fail) {
break;
}
fx_queue_push_back(
&expr.expr_out_queue,
&v->n_entry);
expr.expr_prev = EXPR_C_OPERAND;
break;
}
case SYM_DOLLAR_LEFT_PAREN:
case SYM_AT_LEFT_PAREN:
case SYM_AT_LEFT_BRACE:
expr.expr_fail = !parse_operand(ctx, &expr);
break;
default: {
const struct operator_info *op
= operator_get_by_token(
tok->tok_symbol);
if (op->op_arity == OPA_BINARY) {
expr.expr_fail = !parse_binary_operator(
ctx,
&expr);
} else {
expr.expr_fail = !parse_unary_operator(
ctx,
&expr);
}
break;
}
}
expr.expr_prev_symbol = tok->tok_symbol;
break;
default:
report_error(
ctx,
"unexpected token in arithmetic "
"expression");
expr.expr_fail = true;
break;
}
}
if (expr.expr_fail) {
/* TODO cleanup */
return false;
}
struct ast_node *value = NULL;
if (!finalise_expr(&expr, &value, PRECEDENCE_ASSIGN)) {
report_error(ctx, "failed to convert expression to AST");
/* TODO cleanup */
return false;
}
if (PRECEDENCE_PIPELINE >= expr.expr_minimum_precedence) {
if (peek_symbol(ctx, SYM_PIPE)) {
return parse_pipeline(ctx, value, out);
}
}
*out = value;
return true;
}
+30
View File
@@ -0,0 +1,30 @@
#include "../syntax.h"
bool parse_block(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_LEFT_BRACE)) {
return false;
}
struct block_ast_node *block
= (struct block_ast_node *)ast_node_create(AST_BLOCK);
while (1) {
parse_linefeed(ctx);
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
struct ast_node *stmt = NULL;
if (!parse_statement(ctx, &stmt)) {
ast_node_destroy((struct ast_node *)block);
return false;
}
fx_queue_push_back(&block->n_statements, &stmt->n_entry);
}
*out = (struct ast_node *)block;
return true;
}
+114 -18
View File
@@ -1,7 +1,63 @@
#include "../../debug.h"
#include "../syntax.h"
#include <fx/encoding.h>
static bool parse_fword(struct parse_ctx *ctx, struct ast_node **out)
{
if (peek_token_type(ctx) != TOK_WORD_START) {
return false;
}
discard_token(ctx);
struct fstring_ast_node *fstring
= (struct fstring_ast_node *)ast_node_create(AST_FSTRING);
if (!fstring) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
bool ok = true;
while (ok) {
if (peek_token_type(ctx) == TOK_WORD_END) {
discard_token(ctx);
break;
}
struct ast_node *item = NULL;
if (peek_token_type(ctx) == TOK_WORD) {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(
AST_WORD);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ok = false;
break;
}
n->n_value = claim_token(ctx);
item = (struct ast_node *)n;
} else {
if (!parse_arith_value(ctx, &item)) {
ok = false;
break;
}
}
fx_queue_push_back(&fstring->n_elements, &item->n_entry);
}
if (!ok) {
ast_node_destroy((struct ast_node *)fstring);
fstring = NULL;
}
*out = (struct ast_node *)fstring;
return ok;
return false;
}
static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
@@ -16,6 +72,10 @@ static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
struct ast_node *arg = NULL;
switch (tok->tok_type) {
case TOK_WORD_START:
return parse_fword(ctx, out);
case TOK_STR_START:
return parse_fstring(ctx, out);
case TOK_WORD: {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(AST_WORD);
@@ -29,21 +89,6 @@ static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
return true;
}
#if 0
case TOK_FLAG: {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(AST_WORD);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_value = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
#endif
case TOK_VAR: {
struct var_ast_node *n
= (struct var_ast_node *)ast_node_create(AST_VAR);
@@ -84,7 +129,23 @@ static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
return true;
}
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_LEFT_PAREN:
case SYM_LEFT_BRACE:
case SYM_DOLLAR_LEFT_PAREN:
case SYM_AT_LEFT_BRACE:
case SYM_AT_LEFT_PAREN:
return parse_arith_value(ctx, out);
default:
report_error(
ctx,
"encountered unsupported command arg");
return false;
}
break;
default:
report_error(ctx, "encountered unsupported command arg");
return false;
}
@@ -218,9 +279,13 @@ bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out)
bool append = false;
if (fx_wchar_is_number(*str)) {
in_fd = *str - '0';
in_fd = 0;
while (fx_wchar_is_number(*str)) {
in_fd *= 10;
in_fd += *str - '0';
str++;
}
}
if (*str != '>') {
return false;
@@ -275,7 +340,7 @@ static bool peek_cmdcall_item(struct parse_ctx *ctx, bool unrestricted)
case TOK_VAR:
case TOK_VAR_SPLAT:
case TOK_STRING:
case TOK_STR_START:
case TOK_WORD_START:
return unrestricted;
case TOK_SYMBOL:
switch (peek_unknown_symbol(ctx)) {
@@ -285,6 +350,9 @@ static bool peek_cmdcall_item(struct parse_ctx *ctx, bool unrestricted)
case SYM_PIPE:
case SYM_AMPERSAND:
case SYM_SEMICOLON:
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
case SYM_RIGHT_BRACKET:
return false;
default:
return true;
@@ -379,7 +447,7 @@ bool parse_command(struct parse_ctx *ctx, struct ast_node **out)
struct pipeline_ast_node *pipeline = NULL;
while (1) {
if (parse_symbol(ctx, SYM_SEMICOLON) || parse_linefeed(ctx)) {
if (peek_symbol(ctx, SYM_SEMICOLON) || peek_linefeed(ctx)) {
break;
}
@@ -417,3 +485,31 @@ bool parse_command(struct parse_ctx *ctx, struct ast_node **out)
return true;
}
bool parse_pipeline(
struct parse_ctx *ctx,
struct ast_node *first_item,
struct ast_node **out)
{
struct pipeline_ast_node *pipeline
= (struct pipeline_ast_node *)ast_node_create(AST_PIPELINE);
fx_queue_push_back(&pipeline->n_stages, &first_item->n_entry);
while (1) {
if (!parse_symbol(ctx, SYM_PIPE)) {
break;
}
struct ast_node *cmdcall = NULL;
if (!parse_cmdcall(ctx, &cmdcall)) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
fx_queue_push_back(&pipeline->n_stages, &cmdcall->n_entry);
}
*out = (struct ast_node *)pipeline;
return true;
}
+15
View File
@@ -0,0 +1,15 @@
#include "../syntax.h"
bool parse_expr(struct parse_ctx *ctx, struct ast_node **out)
{
bool ok = false;
if (!ok && peek_arith_expr(ctx)) {
ok = parse_arith_expr(ctx, PRECEDENCE_MINIMUM, out);
}
if (!ok && peek_command(ctx)) {
ok = parse_command(ctx, out);
}
return ok;
}
+85
View File
@@ -0,0 +1,85 @@
#include "../syntax.h"
bool parse_func(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_keyword(ctx, KW_FUNC)) {
return false;
}
struct lex_token *name = NULL;
if (!parse_word(ctx, &name)) {
report_error(ctx, "expected function identifier");
return false;
}
struct func_ast_node *func
= (struct func_ast_node *)ast_node_create(AST_FUNC);
if (!func) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
lex_token_destroy(name);
return false;
}
func->n_name = name;
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(` after function identifier");
ast_node_destroy((struct ast_node *)func);
return false;
}
size_t nr_args = 0;
bool ok = true;
while (1) {
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
break;
}
if (nr_args > 0 && !parse_symbol(ctx, SYM_COMMA)) {
report_error(
ctx,
"expected `,` or `)` after parameter name");
ok = false;
break;
}
struct lex_token *param_token = NULL;
struct var_ast_node *param_node = NULL;
if (!parse_var(ctx, &param_token)) {
report_error(ctx, "expected parameter variable");
ok = false;
break;
}
param_node = (struct var_ast_node *)ast_node_create(AST_VAR);
if (!param_node) {
ok = false;
ctx->p_status = BSHELL_ERR_NO_MEMORY;
lex_token_destroy(param_token);
break;
}
param_node->n_ident = param_token;
fx_queue_push_back(
&func->n_params,
&param_node->n_base.n_entry);
}
if (!ok) {
if (ctx->p_status == BSHELL_SUCCESS) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
}
ast_node_destroy((struct ast_node *)func);
return false;
}
if (!parse_block(ctx, &func->n_body)) {
report_error(ctx, "failed to parse function body");
ast_node_destroy((struct ast_node *)func);
return false;
}
*out = (struct ast_node *)func;
return true;
}
+30
View File
@@ -126,6 +126,36 @@ bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw)
return true;
}
bool parse_word(struct parse_ctx *ctx, struct lex_token **out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_WORD) {
return false;
}
*out = claim_token(ctx);
return true;
}
bool parse_var(struct parse_ctx *ctx, struct lex_token **out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_VAR) {
return false;
}
*out = claim_token(ctx);
return true;
}
bool parse_int(struct parse_ctx *ctx, long long *out)
{
struct lex_token *tok = peek_token(ctx);
+110
View File
@@ -0,0 +1,110 @@
#include "../syntax.h"
static bool add_branch(
struct if_ast_node *group,
struct ast_node *cond,
struct ast_node *body)
{
struct if_branch_ast_node *branch
= (struct if_branch_ast_node *)ast_node_create(AST_IF_BRANCH);
if (!branch) {
return false;
}
branch->n_cond = cond;
branch->n_body = body;
fx_queue_push_back(&group->n_branches, &branch->n_base.n_entry);
return true;
}
bool parse_if(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_keyword(ctx, KW_IF)) {
return false;
}
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(` after `if`");
return false;
}
struct ast_node *if_cond = NULL, *if_body = NULL;
if (!parse_expr(ctx, &if_cond)) {
report_error(ctx, "invalid if condition");
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected `)` after if-condition");
ast_node_destroy(if_cond);
return false;
}
if (!parse_block(ctx, &if_body)) {
report_error(ctx, "invalid if body");
ast_node_destroy(if_cond);
return false;
}
struct if_ast_node *if_group
= (struct if_ast_node *)ast_node_create(AST_IF);
if (!if_group) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(if_cond);
ast_node_destroy(if_body);
return false;
}
if (!add_branch(if_group, if_cond, if_body)) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(if_cond);
ast_node_destroy(if_body);
ast_node_destroy((struct ast_node *)if_group);
return false;
}
bool done = false;
while (!done) {
struct ast_node *cond = NULL, *body = NULL;
if (parse_keyword(ctx, KW_ELSE)) {
done = true;
} else if (parse_keyword(ctx, KW_ELSEIF)) {
if (!parse_expr(ctx, &cond)) {
report_error(
ctx,
"invalid conditional expression");
ast_node_destroy((struct ast_node *)if_group);
return false;
}
} else {
done = true;
break;
}
if (!parse_block(ctx, &body)) {
report_error(ctx, "invalid conditional body");
if (cond) {
ast_node_destroy(cond);
}
ast_node_destroy((struct ast_node *)if_group);
return false;
}
if (!add_branch(if_group, cond, body)) {
report_error(ctx, "failed to add branch to if-group");
if (cond) {
ast_node_destroy(cond);
}
ast_node_destroy(body);
ast_node_destroy((struct ast_node *)if_group);
return false;
}
}
*out = (struct ast_node *)if_group;
return true;
}
+21
View File
@@ -0,0 +1,21 @@
#include "../syntax.h"
bool peek_keyword_expr(struct parse_ctx *ctx)
{
return peek_unknown_keyword(ctx) != KW_NONE;
}
bool parse_keyword_expr(struct parse_ctx *ctx, struct ast_node **out)
{
switch (peek_unknown_keyword(ctx)) {
case KW_NONE:
return false;
case KW_IF:
return parse_if(ctx, out);
case KW_FUNC:
return parse_func(ctx, out);
default:
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
}
+86 -2
View File
@@ -1,15 +1,99 @@
#include "../syntax.h"
bool peek_statement(struct parse_ctx *ctx)
{
if (peek_keyword_expr(ctx)) {
return true;
}
if (peek_arith_expr(ctx)) {
return true;
}
if (peek_command(ctx)) {
return true;
}
return false;
}
bool parse_statement(struct parse_ctx *ctx, struct ast_node **out)
{
if (!peek_token(ctx)) {
/* error, or EOF */
return false;
}
bool unknown = true;
bool ok = false;
if (peek_arith_expr(ctx)) {
ok = parse_arith_expr(ctx, out);
if (peek_keyword_expr(ctx)) {
unknown = false;
ok = parse_keyword_expr(ctx, out);
}
if (!ok && peek_arith_expr(ctx)) {
unknown = false;
ok = parse_arith_expr(ctx, PRECEDENCE_MINIMUM, out);
}
if (!ok && peek_command(ctx)) {
unknown = false;
ok = parse_command(ctx, out);
}
if (!ok && unknown) {
report_error(
ctx,
"encountered unknown token while parsing statement");
return false;
}
return ok;
}
static struct ast_node *convert_single_statement(
struct stmt_list_ast_node *list)
{
fx_queue_entry *first_entry = fx_queue_first(&list->n_statements);
if (!first_entry || fx_queue_next(first_entry)) {
return (struct ast_node *)list;
}
fx_queue_delete(&list->n_statements, first_entry);
struct ast_node *first
= fx_unbox(struct ast_node, first_entry, n_entry);
ast_node_destroy((struct ast_node *)list);
return first;
}
bool parse_statement_list(struct parse_ctx *ctx, struct ast_node **out)
{
struct stmt_list_ast_node *stmt_list
= (struct stmt_list_ast_node *)ast_node_create(AST_STMT_LIST);
bool ok = true;
while (ok) {
parse_linefeed(ctx);
struct ast_node *stmt = NULL;
if (!parse_statement(ctx, &stmt)) {
ok = false;
break;
}
fx_queue_push_back(&stmt_list->n_statements, &stmt->n_entry);
if (!parse_symbol(ctx, SYM_SEMICOLON)) {
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)stmt_list);
return false;
}
*out = convert_single_statement(stmt_list);
return true;
}
+64 -1
View File
@@ -75,6 +75,19 @@ struct lex_token *lex_token_change_type(
return tok;
}
void lex_token_change_string(struct lex_token *tok, const char *s)
{
if (!lex_token_has_string_value(tok)) {
return;
}
if (tok->tok_str) {
free(tok->tok_str);
}
tok->tok_str = fx_strdup(s);
}
#define ENUM_STR(x) \
case x: \
return #x
@@ -88,6 +101,9 @@ const char *token_type_to_string(enum token_type type)
ENUM_STR(TOK_INT);
ENUM_STR(TOK_DOUBLE);
ENUM_STR(TOK_WORD);
ENUM_STR(TOK_WORD_START);
ENUM_STR(TOK_WORD_END);
ENUM_STR(TOK_OPERATOR);
ENUM_STR(TOK_VAR);
ENUM_STR(TOK_VAR_SPLAT);
ENUM_STR(TOK_FLAG);
@@ -106,6 +122,7 @@ const char *token_keyword_to_string(enum token_keyword keyword)
ENUM_STR(KW_NONE);
ENUM_STR(KW_FUNC);
ENUM_STR(KW_IF);
ENUM_STR(KW_ELSEIF);
ENUM_STR(KW_ELSE);
default:
return "<unknown>";
@@ -125,12 +142,17 @@ const char *token_symbol_to_string(enum token_symbol sym)
ENUM_STR(SYM_SQUOTE);
ENUM_STR(SYM_DQUOTE);
ENUM_STR(SYM_HASH);
ENUM_STR(SYM_COLON_COLON);
ENUM_STR(SYM_SEMICOLON);
ENUM_STR(SYM_COMMA);
ENUM_STR(SYM_DOLLAR);
ENUM_STR(SYM_DOLLAR_LEFT_PAREN);
ENUM_STR(SYM_DOLLAR_LEFT_BRACE);
ENUM_STR(SYM_DOT);
ENUM_STR(SYM_DOT_DOT);
ENUM_STR(SYM_PIPE);
ENUM_STR(SYM_AT);
ENUM_STR(SYM_AT_LEFT_PAREN);
ENUM_STR(SYM_AT_LEFT_BRACE);
ENUM_STR(SYM_LEFT_BRACE);
ENUM_STR(SYM_RIGHT_BRACE);
@@ -141,9 +163,50 @@ const char *token_symbol_to_string(enum token_symbol sym)
ENUM_STR(SYM_EQUAL);
ENUM_STR(SYM_PLUS_EQUAL);
ENUM_STR(SYM_HYPHEN_EQUAL);
ENUM_STR(SYM_FORWARD_SLASH_EQUAL);
ENUM_STR(SYM_ASTERISK_EQUAL);
ENUM_STR(SYM_FORWARD_SLASH_EQUAL);
ENUM_STR(SYM_PERCENT_EQUAL);
ENUM_STR(SYM_QUESTION_DOT);
ENUM_STR(SYM_QUESTION_LEFT_BRACKET);
default:
return "<unknown>";
}
}
const char *token_operator_to_string(enum token_operator op)
{
switch (op) {
ENUM_STR(TKOP_BAND);
ENUM_STR(TKOP_BOR);
ENUM_STR(TKOP_BXOR);
ENUM_STR(TKOP_BNOT);
ENUM_STR(TKOP_SHL);
ENUM_STR(TKOP_SHR);
ENUM_STR(TKOP_EQ);
ENUM_STR(TKOP_NE);
ENUM_STR(TKOP_GT);
ENUM_STR(TKOP_LT);
ENUM_STR(TKOP_GE);
ENUM_STR(TKOP_LE);
ENUM_STR(TKOP_MATCH);
ENUM_STR(TKOP_NOTMATCH);
ENUM_STR(TKOP_REPLACE);
ENUM_STR(TKOP_LIKE);
ENUM_STR(TKOP_NOTLIKE);
ENUM_STR(TKOP_IN);
ENUM_STR(TKOP_F);
ENUM_STR(TKOP_NOTIN);
ENUM_STR(TKOP_CONTAINS);
ENUM_STR(TKOP_NOTCONTAINS);
ENUM_STR(TKOP_AND);
ENUM_STR(TKOP_OR);
ENUM_STR(TKOP_XOR);
ENUM_STR(TKOP_NOT);
ENUM_STR(TKOP_SPLIT);
ENUM_STR(TKOP_JOIN);
ENUM_STR(TKOP_IS);
ENUM_STR(TKOP_ISNOT);
ENUM_STR(TKOP_AS);
default:
return "<unknown>";
}
+52 -1
View File
@@ -16,7 +16,10 @@ enum token_type {
TOK_INT,
TOK_DOUBLE,
TOK_WORD,
TOK_WORD_START,
TOK_WORD_END,
TOK_FLAG,
TOK_OPERATOR,
TOK_VAR,
TOK_VAR_SPLAT,
TOK_STRING,
@@ -31,13 +34,52 @@ enum token_keyword {
__KW_INDEX_BASE = 200,
KW_FUNC,
KW_IF,
KW_ELSEIF,
KW_ELSE,
__KW_INDEX_LIMIT,
};
enum token_operator {
TKOP_NONE = 0,
__TKOP_INDEX_BASE = 300,
TKOP_F,
TKOP_BAND,
TKOP_BOR,
TKOP_BXOR,
TKOP_BNOT,
TKOP_SHL,
TKOP_SHR,
TKOP_EQ,
TKOP_NE,
TKOP_GT,
TKOP_LT,
TKOP_GE,
TKOP_LE,
TKOP_MATCH,
TKOP_NOTMATCH,
TKOP_REPLACE,
TKOP_LIKE,
TKOP_NOTLIKE,
TKOP_IN,
TKOP_NOTIN,
TKOP_CONTAINS,
TKOP_NOTCONTAINS,
TKOP_AND,
TKOP_OR,
TKOP_XOR,
TKOP_NOT,
TKOP_SPLIT,
TKOP_JOIN,
TKOP_IS,
TKOP_ISNOT,
TKOP_AS,
__TKOP_INDEX_LIMIT,
};
enum token_symbol {
SYM_NONE = 0,
__SYM_INDEX_BASE = 300,
__SYM_INDEX_BASE = 400,
SYM_BANG,
SYM_PLUS,
SYM_HYPHEN,
SYM_FORWARD_SLASH,
@@ -47,13 +89,17 @@ enum token_symbol {
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_COLON_COLON,
SYM_SEMICOLON,
SYM_COMMA,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_DOT,
SYM_DOT_DOT,
SYM_PIPE,
SYM_AT,
SYM_AT_LEFT_PAREN,
SYM_AT_LEFT_BRACE,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
@@ -67,6 +113,8 @@ enum token_symbol {
SYM_ASTERISK_EQUAL,
SYM_FORWARD_SLASH_EQUAL,
SYM_PERCENT_EQUAL,
SYM_QUESTION_DOT,
SYM_QUESTION_LEFT_BRACKET,
__SYM_INDEX_LIMIT,
};
@@ -80,6 +128,7 @@ struct lex_token {
union {
enum token_keyword tok_keyword;
enum token_symbol tok_symbol;
enum token_operator tok_operator;
long long tok_int;
double tok_double;
char *tok_str;
@@ -95,6 +144,7 @@ extern void lex_token_destroy(struct lex_token *tok);
extern struct lex_token *lex_token_change_type(
struct lex_token *tok,
enum token_type new_type);
extern void lex_token_change_string(struct lex_token *tok, const char *s);
static inline bool lex_token_is_symbol(
struct lex_token *tok,
@@ -129,5 +179,6 @@ static inline bool lex_token_has_string_value(const struct lex_token *tok)
extern const char *token_type_to_string(enum token_type type);
extern const char *token_keyword_to_string(enum token_keyword keyword);
extern const char *token_symbol_to_string(enum token_symbol sym);
extern const char *token_operator_to_string(enum token_operator op);
#endif