parse: lex: move per-state token settings to state source files

This commit is contained in:
2026-05-11 23:57:35 +01:00
parent 0c21be8d67
commit a408b9efa2
8 changed files with 297 additions and 193 deletions
+59
View File
@@ -160,8 +160,67 @@ static const struct lex_state_link links[] = {
LINK_END,
};
static const unsigned int keywords[] = {
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_NONE, TKOP_F, TKOP_BAND, TKOP_BOR,
TKOP_BXOR, TKOP_BNOT, TKOP_SHL, TKOP_SHR,
TKOP_EQ, TKOP_NE, TKOP_GT, TKOP_LT,
TKOP_GE, TKOP_LE, TKOP_MATCH, TKOP_NOTMATCH,
TKOP_REPLACE, TKOP_LIKE, TKOP_NOTLIKE, TKOP_IN,
TKOP_NOTIN, TKOP_CONTAINS, TKOP_NOTCONTAINS, TKOP_AND,
TKOP_OR, TKOP_XOR, TKOP_NOT, TKOP_SPLIT,
TKOP_JOIN, TKOP_IS, TKOP_ISNOT, TKOP_AS,
};
static const unsigned int symbols[] = {
SYM_PLUS,
SYM_HYPHEN,
SYM_FORWARD_SLASH,
SYM_ASTERISK,
SYM_AMPERSAND,
SYM_PERCENT,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_QUESTION_DOT,
SYM_QUESTION_LEFT_BRACKET,
SYM_EQUAL,
SYM_PLUS_EQUAL,
SYM_HYPHEN_EQUAL,
SYM_FORWARD_SLASH_EQUAL,
SYM_ASTERISK_EQUAL,
SYM_PERCENT_EQUAL,
SYM_DOT,
SYM_DOT_DOT,
SYM_COLON_COLON,
SYM_NONE,
};
const struct lex_state_type lex_arithmetic_state = {
.s_id = LEX_STATE_ARITHMETIC,
.s_pump_token = arithmetic_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};
+19
View File
@@ -151,14 +151,33 @@ const struct lex_state_link links[] = {
STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_PAREN),
LINK_POP(SYM_RIGHT_BRACE),
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_DQUOTE,
SYM_SQUOTE,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_AT_LEFT_PAREN,
SYM_AMPERSAND,
SYM_PIPE,
SYM_SEMICOLON,
SYM_RIGHT_PAREN,
SYM_RIGHT_BRACE,
SYM_NONE,
};
const struct lex_state_type lex_command_state = {
.s_id = LEX_STATE_COMMAND,
.s_pump_token = command_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+11 -2
View File
@@ -125,11 +125,10 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
c = peek_char_noread(ctx);
}
#if 0
#if 1
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
}
#endif
@@ -153,8 +152,18 @@ static const struct lex_state_link links[] = {
LINK_END,
};
static const unsigned int symbols[] = {
SYM_EQUAL,
SYM_SEMICOLON,
SYM_RIGHT_BRACE,
SYM_DOLLAR_LEFT_PAREN,
SYM_LEFT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_hashtable_state = {
.s_id = LEX_STATE_HASHTABLE,
.s_pump_token = hashtable_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+4
View File
@@ -76,6 +76,10 @@ struct lex_state_type {
lex_state_pump_token s_pump_token;
lex_state_begin s_begin;
lex_state_end s_end;
const unsigned int *s_keywords;
const unsigned int *s_operators;
const unsigned int *s_symbols;
const struct lex_state_link *s_links;
};
+134 -184
View File
@@ -7,198 +7,102 @@
#include <assert.h>
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
#define LEX_TOKEN_DEF2(i, n, s, f) \
{.id = (i), .name = (n), .enabled_states = (s), .flags = (f)}
#define CONVERSION_REQUESTED(flags) \
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
#define SYMBOL_DEF(i, n, f) \
[i - __SYM_INDEX_BASE] = { \
.id = (i), \
.name = (n), \
.flags = (f), \
}
#define KW_DEF(i, n, f) \
[i - __KW_INDEX_BASE] = { \
.id = (i), \
.name = (n), \
.flags = (f), \
}
#define TKOP_DEF(i, n, f) \
[i - __TKOP_INDEX_BASE] = { \
.id = (i), \
.name = (n), \
.flags = (f), \
}
static struct lex_token_def keywords[] = {
LEX_TOKEN_DEF2(
KW_FUNC,
"func",
LEX_STATE_STATEMENT,
LEX_TOKEN_COMMAND_MODE),
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
LEX_TOKEN_DEF(KW_ELSEIF, "elseif", LEX_STATE_STATEMENT),
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
KW_DEF(KW_FUNC, "func", LEX_TOKEN_COMMAND_MODE),
KW_DEF(KW_IF, "if", 0),
KW_DEF(KW_ELSEIF, "elseif", 0),
KW_DEF(KW_ELSE, "else", 0),
};
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
static struct lex_token_def operators[] = {
LEX_TOKEN_DEF(TKOP_BAND, "-band", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_BOR, "-bor", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_BXOR, "-bxor", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_BNOT,
"-bnot",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_SHL, "-shl", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_SHR, "-shr", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_EQ,
"-eq",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_NE,
"-ne",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_GT,
"-gt",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_LT,
"-lt",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_GE,
"-ge",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_LE,
"-le",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_MATCH, "-match", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_REPLACE, "-replace", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_LIKE, "-like", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_AND, "-and", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_OR,
"-OR",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_XOR, "-xor", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
TKOP_NOT,
"-not",
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_SPLIT, "-split", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_JOIN, "-join", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_IS, "-is", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_AS, "-as", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(TKOP_F, "-f", LEX_STATE_ARITHMETIC),
TKOP_DEF(TKOP_BAND, "-band", 0),
TKOP_DEF(TKOP_BOR, "-bor", 0),
TKOP_DEF(TKOP_BXOR, "-bxor", 0),
TKOP_DEF(TKOP_BNOT, "-bnot", 0),
TKOP_DEF(TKOP_SHL, "-shl", 0),
TKOP_DEF(TKOP_SHR, "-shr", 0),
TKOP_DEF(TKOP_EQ, "-eq", 0),
TKOP_DEF(TKOP_NE, "-ne", 0),
TKOP_DEF(TKOP_GT, "-gt", 0),
TKOP_DEF(TKOP_LT, "-lt", 0),
TKOP_DEF(TKOP_GE, "-ge", 0),
TKOP_DEF(TKOP_LE, "-le", 0),
TKOP_DEF(TKOP_MATCH, "-match", 0),
TKOP_DEF(TKOP_NOTMATCH, "-notmatch", 0),
TKOP_DEF(TKOP_REPLACE, "-replace", 0),
TKOP_DEF(TKOP_LIKE, "-like", 0),
TKOP_DEF(TKOP_NOTLIKE, "-notlike", 0),
TKOP_DEF(TKOP_CONTAINS, "-contains", 0),
TKOP_DEF(TKOP_NOTCONTAINS, "-notcontains", 0),
TKOP_DEF(TKOP_AND, "-and", 0),
TKOP_DEF(TKOP_OR, "-or", 0),
TKOP_DEF(TKOP_XOR, "-xor", 0),
TKOP_DEF(TKOP_NOT, "-not", 0),
TKOP_DEF(TKOP_SPLIT, "-split", 0),
TKOP_DEF(TKOP_JOIN, "-join", 0),
TKOP_DEF(TKOP_IS, "-is", 0),
TKOP_DEF(TKOP_ISNOT, "-isnot", 0),
TKOP_DEF(TKOP_AS, "-as", 0),
TKOP_DEF(TKOP_F, "-f", 0),
};
static const size_t nr_operators = sizeof operators / sizeof operators[0];
#define LEX_STATES(states) (LEX_STATE_STATEMENT | states)
#define LEX_STATE_ALL \
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
| LEX_STATE_STRING | LEX_STATE_WORD | LEX_STATE_HASHTABLE)
static struct lex_token_def symbols[] = {
LEX_TOKEN_DEF2(
SYM_PLUS,
"+",
LEX_STATE_ARITHMETIC,
LEX_TOKEN_UNARY_ARITHMETIC),
LEX_TOKEN_DEF2(
SYM_HYPHEN,
"-",
LEX_STATE_ARITHMETIC,
LEX_TOKEN_UNARY_ARITHMETIC),
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
SYM_AMPERSAND,
"&",
LEX_STATES(
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
| LEX_STATE_WORD)),
LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(
SYM_SQUOTE,
"'",
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
LEX_TOKEN_DEF(SYM_DQUOTE, "\"", LEX_STATE_ALL),
LEX_TOKEN_DEF(
SYM_HASH,
"#",
LEX_STATES(
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
| LEX_STATE_WORD)),
LEX_TOKEN_DEF2(
SYM_DOLLAR,
"$",
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
| LEX_STATE_WORD,
LEX_TOKEN_UNARY_ARITHMETIC),
LEX_TOKEN_DEF2(
SYM_DOLLAR_LEFT_PAREN,
"$(",
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
| LEX_STATE_WORD,
LEX_TOKEN_UNARY_ARITHMETIC),
LEX_TOKEN_DEF2(
SYM_DOLLAR_LEFT_BRACE,
"${",
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
| LEX_STATE_WORD,
LEX_TOKEN_UNARY_ARITHMETIC),
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
LEX_TOKEN_DEF2(
SYM_PIPE,
"|",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_COMMAND_MODE),
LEX_TOKEN_DEF2(
SYM_COMMA,
",",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
LEX_TOKEN_DEF2(
SYM_SEMICOLON,
";",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
LEX_TOKEN_DEF2(
SYM_AT_LEFT_BRACE,
"@{",
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
| LEX_STATE_WORD | LEX_STATE_STATEMENT,
LEX_TOKEN_UNARY_ARITHMETIC),
LEX_TOKEN_DEF(SYM_AT_LEFT_PAREN, "@(", LEX_STATE_ALL),
LEX_TOKEN_DEF2(
SYM_LEFT_BRACE,
"{",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD),
LEX_TOKEN_DEF2(
SYM_RIGHT_BRACE,
"}",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD),
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
LEX_TOKEN_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF2(
SYM_LEFT_PAREN,
"(",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD),
LEX_TOKEN_DEF2(
SYM_RIGHT_PAREN,
")",
LEX_STATE_ALL,
LEX_TOKEN_TERMINATES_WORD),
LEX_TOKEN_DEF(
SYM_EQUAL,
"=",
LEX_STATE_ARITHMETIC | LEX_STATE_HASHTABLE),
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_DOT, ".", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_COLON_COLON, "::", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_DOT_DOT, "..", LEX_STATE_ARITHMETIC),
LEX_TOKEN_DEF(SYM_QUESTION_DOT, "?.", LEX_STATE_ARITHMETIC),
SYMBOL_DEF(SYM_PLUS, "+", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_HYPHEN, "-", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_FORWARD_SLASH, "/", 0),
SYMBOL_DEF(SYM_ASTERISK, "*", 0),
SYMBOL_DEF(SYM_AMPERSAND, "&", 0),
SYMBOL_DEF(SYM_PERCENT, "%", 0),
SYMBOL_DEF(SYM_SQUOTE, "'", 0),
SYMBOL_DEF(SYM_DQUOTE, "\"", 0),
SYMBOL_DEF(SYM_HASH, "#", 0),
SYMBOL_DEF(SYM_DOLLAR, "$", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_AT, "@", 0),
SYMBOL_DEF(SYM_PIPE, "|", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_COMMA, ",", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_SEMICOLON, ";", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_TOKEN_UNARY_ARITHMETIC),
SYMBOL_DEF(SYM_AT_LEFT_PAREN, "@(", 0),
SYMBOL_DEF(SYM_LEFT_BRACE, "{", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_RIGHT_BRACE, "}", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_LEFT_BRACKET, "[", 0),
SYMBOL_DEF(SYM_RIGHT_BRACKET, "]", 0),
SYMBOL_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", 0),
SYMBOL_DEF(SYM_LEFT_PAREN, "(", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_RIGHT_PAREN, ")", LEX_TOKEN_TERMINATES_WORD),
SYMBOL_DEF(SYM_EQUAL, "=", 0),
SYMBOL_DEF(SYM_PLUS_EQUAL, "+=", 0),
SYMBOL_DEF(SYM_HYPHEN_EQUAL, "-=", 0),
SYMBOL_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", 0),
SYMBOL_DEF(SYM_ASTERISK_EQUAL, "*=", 0),
SYMBOL_DEF(SYM_PERCENT_EQUAL, "%=", 0),
SYMBOL_DEF(SYM_DOT, ".", 0),
SYMBOL_DEF(SYM_COLON_COLON, "::", 0),
SYMBOL_DEF(SYM_DOT_DOT, "..", 0),
SYMBOL_DEF(SYM_QUESTION_DOT, "?.", 0),
};
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
@@ -437,6 +341,10 @@ static struct lex_symbol_node *build_symbol_tree(void)
enum bshell_status status = BSHELL_SUCCESS;
for (size_t i = 0; i < nr_symbols; i++) {
if (!symbols[i].name) {
continue;
}
status = put_symbol(root, &symbols[i]);
if (status != BSHELL_SUCCESS) {
@@ -448,6 +356,33 @@ static struct lex_symbol_node *build_symbol_tree(void)
return root;
}
static void init_token_enabled_states(const struct lex_state_type *state_type)
{
if (state_type->s_keywords) {
for (size_t i = 0; state_type->s_keywords[i]; i++) {
unsigned int id = state_type->s_keywords[i];
keywords[id - __KW_INDEX_BASE].enabled_states
|= state_type->s_id;
}
}
if (state_type->s_operators) {
for (size_t i = 0; state_type->s_operators[i]; i++) {
unsigned int id = state_type->s_operators[i];
operators[id - __TKOP_INDEX_BASE].enabled_states
|= state_type->s_id;
}
}
if (state_type->s_symbols) {
for (size_t i = 0; state_type->s_symbols[i]; i++) {
unsigned int id = state_type->s_symbols[i];
symbols[id - __SYM_INDEX_BASE].enabled_states
|= state_type->s_id;
}
}
}
enum bshell_status lex_ctx_init(
struct lex_ctx *ctx,
enum lex_flags flags,
@@ -464,6 +399,13 @@ enum bshell_status lex_ctx_init(
ctx->lex_ch = FX_WCHAR_INVALID;
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
init_token_enabled_states(&lex_statement_state);
init_token_enabled_states(&lex_command_state);
init_token_enabled_states(&lex_arithmetic_state);
init_token_enabled_states(&lex_string_state);
init_token_enabled_states(&lex_word_state);
init_token_enabled_states(&lex_hashtable_state);
return BSHELL_SUCCESS;
}
@@ -574,7 +516,7 @@ bool convert_word_to_keyword(struct lex_token *tok)
for (size_t i = 0; i < nr_keywords; i++) {
const char *kw_str = keywords[i].name;
if (strcmp(kw_str, tok->tok_str) != 0) {
if (!kw_str || strcmp(kw_str, tok->tok_str) != 0) {
continue;
}
@@ -1012,6 +954,10 @@ bool char_can_begin_symbol_in_state(
enum lex_state_type_id state_type)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (!symbols[i].name) {
continue;
}
if (symbols[i].name[0] != c) {
continue;
}
@@ -1033,6 +979,10 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (!symbols[i].name) {
continue;
}
if (symbols[i].name[0] != c) {
continue;
}
@@ -1103,7 +1053,7 @@ enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
for (size_t i = 0; i < nr_operators; i++) {
const char *op_str = operators[i].name;
if (strcmp(op_str, s) != 0) {
if (!op_str || strcmp(op_str, s) != 0) {
continue;
}
+39
View File
@@ -183,8 +183,47 @@ static const struct lex_state_link links[] = {
LINK_END,
};
static const unsigned int keywords[] = {
KW_FUNC,
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_BNOT,
TKOP_NOT,
TKOP_NONE,
};
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_statement_state = {
.s_id = LEX_STATE_STATEMENT,
.s_pump_token = statement_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};
+16 -4
View File
@@ -9,6 +9,8 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
@@ -18,10 +20,6 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
return status;
}
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
return BSHELL_SUCCESS;
case SYM_DQUOTE:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
@@ -122,9 +120,23 @@ static enum bshell_status string_pump_token(struct lex_ctx *ctx)
return string_content(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_DQUOTE),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_NONE,
};
const struct lex_state_type lex_string_state = {
.s_id = LEX_STATE_STRING,
.s_begin = string_begin,
.s_end = string_end,
.s_pump_token = string_pump_token,
.s_links = links,
};
+15 -3
View File
@@ -137,8 +137,20 @@ static enum bshell_status word_pump_token(struct lex_ctx *ctx)
return word_content(ctx);
}
static const struct lex_state_link links[] = {
LINK_END,
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_word_state = {
@@ -146,5 +158,5 @@ const struct lex_state_type lex_word_state = {
.s_begin = word_begin,
.s_end = word_end,
.s_pump_token = word_pump_token,
.s_links = links,
.s_symbols = symbols,
};