parse: lex: move per-state token settings to state source files
This commit is contained in:
@@ -160,8 +160,67 @@ static const struct lex_state_link links[] = {
|
||||
LINK_END,
|
||||
};
|
||||
|
||||
static const unsigned int keywords[] = {
|
||||
KW_IF,
|
||||
KW_ELSEIF,
|
||||
KW_ELSE,
|
||||
KW_NONE,
|
||||
};
|
||||
|
||||
static const unsigned int operators[] = {
|
||||
TKOP_NONE, TKOP_F, TKOP_BAND, TKOP_BOR,
|
||||
TKOP_BXOR, TKOP_BNOT, TKOP_SHL, TKOP_SHR,
|
||||
TKOP_EQ, TKOP_NE, TKOP_GT, TKOP_LT,
|
||||
TKOP_GE, TKOP_LE, TKOP_MATCH, TKOP_NOTMATCH,
|
||||
TKOP_REPLACE, TKOP_LIKE, TKOP_NOTLIKE, TKOP_IN,
|
||||
TKOP_NOTIN, TKOP_CONTAINS, TKOP_NOTCONTAINS, TKOP_AND,
|
||||
TKOP_OR, TKOP_XOR, TKOP_NOT, TKOP_SPLIT,
|
||||
TKOP_JOIN, TKOP_IS, TKOP_ISNOT, TKOP_AS,
|
||||
};
|
||||
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_PLUS,
|
||||
SYM_HYPHEN,
|
||||
SYM_FORWARD_SLASH,
|
||||
SYM_ASTERISK,
|
||||
SYM_AMPERSAND,
|
||||
SYM_PERCENT,
|
||||
SYM_SQUOTE,
|
||||
SYM_DQUOTE,
|
||||
SYM_HASH,
|
||||
SYM_DOLLAR,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
SYM_AT,
|
||||
SYM_AT_LEFT_BRACE,
|
||||
SYM_PIPE,
|
||||
SYM_COMMA,
|
||||
SYM_SEMICOLON,
|
||||
SYM_LEFT_PAREN,
|
||||
SYM_RIGHT_PAREN,
|
||||
SYM_LEFT_BRACE,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_LEFT_BRACKET,
|
||||
SYM_RIGHT_BRACKET,
|
||||
SYM_QUESTION_DOT,
|
||||
SYM_QUESTION_LEFT_BRACKET,
|
||||
SYM_EQUAL,
|
||||
SYM_PLUS_EQUAL,
|
||||
SYM_HYPHEN_EQUAL,
|
||||
SYM_FORWARD_SLASH_EQUAL,
|
||||
SYM_ASTERISK_EQUAL,
|
||||
SYM_PERCENT_EQUAL,
|
||||
SYM_DOT,
|
||||
SYM_DOT_DOT,
|
||||
SYM_COLON_COLON,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
const struct lex_state_type lex_arithmetic_state = {
|
||||
.s_id = LEX_STATE_ARITHMETIC,
|
||||
.s_pump_token = arithmetic_pump_token,
|
||||
.s_links = links,
|
||||
.s_keywords = keywords,
|
||||
.s_operators = operators,
|
||||
.s_symbols = symbols,
|
||||
};
|
||||
|
||||
@@ -151,14 +151,33 @@ const struct lex_state_link links[] = {
|
||||
STATEMENT_F_DISABLE_KEYWORDS),
|
||||
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||
LINK_POP(SYM_RIGHT_PAREN),
|
||||
LINK_POP(SYM_RIGHT_BRACE),
|
||||
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
|
||||
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
||||
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
|
||||
LINK_END,
|
||||
};
|
||||
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_DQUOTE,
|
||||
SYM_SQUOTE,
|
||||
SYM_DOLLAR,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
SYM_AT,
|
||||
SYM_AT_LEFT_BRACE,
|
||||
SYM_AT_LEFT_PAREN,
|
||||
SYM_AMPERSAND,
|
||||
SYM_PIPE,
|
||||
SYM_SEMICOLON,
|
||||
SYM_RIGHT_PAREN,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
const struct lex_state_type lex_command_state = {
|
||||
.s_id = LEX_STATE_COMMAND,
|
||||
.s_pump_token = command_pump_token,
|
||||
.s_links = links,
|
||||
.s_symbols = symbols,
|
||||
};
|
||||
|
||||
@@ -125,11 +125,10 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
#if 0
|
||||
#if 1
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
@@ -153,8 +152,18 @@ static const struct lex_state_link links[] = {
|
||||
LINK_END,
|
||||
};
|
||||
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_EQUAL,
|
||||
SYM_SEMICOLON,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_LEFT_PAREN,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
const struct lex_state_type lex_hashtable_state = {
|
||||
.s_id = LEX_STATE_HASHTABLE,
|
||||
.s_pump_token = hashtable_pump_token,
|
||||
.s_links = links,
|
||||
.s_symbols = symbols,
|
||||
};
|
||||
|
||||
@@ -76,6 +76,10 @@ struct lex_state_type {
|
||||
lex_state_pump_token s_pump_token;
|
||||
lex_state_begin s_begin;
|
||||
lex_state_end s_end;
|
||||
|
||||
const unsigned int *s_keywords;
|
||||
const unsigned int *s_operators;
|
||||
const unsigned int *s_symbols;
|
||||
const struct lex_state_link *s_links;
|
||||
};
|
||||
|
||||
|
||||
+134
-184
@@ -7,198 +7,102 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
|
||||
#define LEX_TOKEN_DEF2(i, n, s, f) \
|
||||
{.id = (i), .name = (n), .enabled_states = (s), .flags = (f)}
|
||||
|
||||
#define CONVERSION_REQUESTED(flags) \
|
||||
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
|
||||
#define SYMBOL_DEF(i, n, f) \
|
||||
[i - __SYM_INDEX_BASE] = { \
|
||||
.id = (i), \
|
||||
.name = (n), \
|
||||
.flags = (f), \
|
||||
}
|
||||
#define KW_DEF(i, n, f) \
|
||||
[i - __KW_INDEX_BASE] = { \
|
||||
.id = (i), \
|
||||
.name = (n), \
|
||||
.flags = (f), \
|
||||
}
|
||||
#define TKOP_DEF(i, n, f) \
|
||||
[i - __TKOP_INDEX_BASE] = { \
|
||||
.id = (i), \
|
||||
.name = (n), \
|
||||
.flags = (f), \
|
||||
}
|
||||
|
||||
static struct lex_token_def keywords[] = {
|
||||
LEX_TOKEN_DEF2(
|
||||
KW_FUNC,
|
||||
"func",
|
||||
LEX_STATE_STATEMENT,
|
||||
LEX_TOKEN_COMMAND_MODE),
|
||||
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
|
||||
LEX_TOKEN_DEF(KW_ELSEIF, "elseif", LEX_STATE_STATEMENT),
|
||||
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
|
||||
KW_DEF(KW_FUNC, "func", LEX_TOKEN_COMMAND_MODE),
|
||||
KW_DEF(KW_IF, "if", 0),
|
||||
KW_DEF(KW_ELSEIF, "elseif", 0),
|
||||
KW_DEF(KW_ELSE, "else", 0),
|
||||
};
|
||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||
|
||||
static struct lex_token_def operators[] = {
|
||||
LEX_TOKEN_DEF(TKOP_BAND, "-band", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_BOR, "-bor", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_BXOR, "-bxor", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_BNOT,
|
||||
"-bnot",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_SHL, "-shl", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_SHR, "-shr", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_EQ,
|
||||
"-eq",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_NE,
|
||||
"-ne",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_GT,
|
||||
"-gt",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_LT,
|
||||
"-lt",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_GE,
|
||||
"-ge",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_LE,
|
||||
"-le",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_MATCH, "-match", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_REPLACE, "-replace", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_LIKE, "-like", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_AND, "-and", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_OR,
|
||||
"-OR",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_XOR, "-xor", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
TKOP_NOT,
|
||||
"-not",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_SPLIT, "-split", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_JOIN, "-join", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_IS, "-is", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_AS, "-as", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(TKOP_F, "-f", LEX_STATE_ARITHMETIC),
|
||||
TKOP_DEF(TKOP_BAND, "-band", 0),
|
||||
TKOP_DEF(TKOP_BOR, "-bor", 0),
|
||||
TKOP_DEF(TKOP_BXOR, "-bxor", 0),
|
||||
TKOP_DEF(TKOP_BNOT, "-bnot", 0),
|
||||
TKOP_DEF(TKOP_SHL, "-shl", 0),
|
||||
TKOP_DEF(TKOP_SHR, "-shr", 0),
|
||||
TKOP_DEF(TKOP_EQ, "-eq", 0),
|
||||
TKOP_DEF(TKOP_NE, "-ne", 0),
|
||||
TKOP_DEF(TKOP_GT, "-gt", 0),
|
||||
TKOP_DEF(TKOP_LT, "-lt", 0),
|
||||
TKOP_DEF(TKOP_GE, "-ge", 0),
|
||||
TKOP_DEF(TKOP_LE, "-le", 0),
|
||||
TKOP_DEF(TKOP_MATCH, "-match", 0),
|
||||
TKOP_DEF(TKOP_NOTMATCH, "-notmatch", 0),
|
||||
TKOP_DEF(TKOP_REPLACE, "-replace", 0),
|
||||
TKOP_DEF(TKOP_LIKE, "-like", 0),
|
||||
TKOP_DEF(TKOP_NOTLIKE, "-notlike", 0),
|
||||
TKOP_DEF(TKOP_CONTAINS, "-contains", 0),
|
||||
TKOP_DEF(TKOP_NOTCONTAINS, "-notcontains", 0),
|
||||
TKOP_DEF(TKOP_AND, "-and", 0),
|
||||
TKOP_DEF(TKOP_OR, "-or", 0),
|
||||
TKOP_DEF(TKOP_XOR, "-xor", 0),
|
||||
TKOP_DEF(TKOP_NOT, "-not", 0),
|
||||
TKOP_DEF(TKOP_SPLIT, "-split", 0),
|
||||
TKOP_DEF(TKOP_JOIN, "-join", 0),
|
||||
TKOP_DEF(TKOP_IS, "-is", 0),
|
||||
TKOP_DEF(TKOP_ISNOT, "-isnot", 0),
|
||||
TKOP_DEF(TKOP_AS, "-as", 0),
|
||||
TKOP_DEF(TKOP_F, "-f", 0),
|
||||
};
|
||||
static const size_t nr_operators = sizeof operators / sizeof operators[0];
|
||||
|
||||
#define LEX_STATES(states) (LEX_STATE_STATEMENT | states)
|
||||
#define LEX_STATE_ALL \
|
||||
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
|
||||
| LEX_STATE_STRING | LEX_STATE_WORD | LEX_STATE_HASHTABLE)
|
||||
|
||||
static struct lex_token_def symbols[] = {
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_PLUS,
|
||||
"+",
|
||||
LEX_STATE_ARITHMETIC,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_HYPHEN,
|
||||
"-",
|
||||
LEX_STATE_ARITHMETIC,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_AMPERSAND,
|
||||
"&",
|
||||
LEX_STATES(
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
||||
| LEX_STATE_WORD)),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_SQUOTE,
|
||||
"'",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
||||
LEX_TOKEN_DEF(SYM_DQUOTE, "\"", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_HASH,
|
||||
"#",
|
||||
LEX_STATES(
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
||||
| LEX_STATE_WORD)),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_DOLLAR,
|
||||
"$",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
"$(",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
"${",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_PIPE,
|
||||
"|",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_COMMAND_MODE),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_COMMA,
|
||||
",",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_SEMICOLON,
|
||||
";",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_AT_LEFT_BRACE,
|
||||
"@{",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD | LEX_STATE_STATEMENT,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_AT_LEFT_PAREN, "@(", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_LEFT_BRACE,
|
||||
"{",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_RIGHT_BRACE,
|
||||
"}",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_LEFT_PAREN,
|
||||
"(",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_RIGHT_PAREN,
|
||||
")",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_EQUAL,
|
||||
"=",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_HASHTABLE),
|
||||
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_DOT, ".", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_COLON_COLON, "::", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_DOT_DOT, "..", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_QUESTION_DOT, "?.", LEX_STATE_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_PLUS, "+", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_HYPHEN, "-", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_FORWARD_SLASH, "/", 0),
|
||||
SYMBOL_DEF(SYM_ASTERISK, "*", 0),
|
||||
SYMBOL_DEF(SYM_AMPERSAND, "&", 0),
|
||||
SYMBOL_DEF(SYM_PERCENT, "%", 0),
|
||||
SYMBOL_DEF(SYM_SQUOTE, "'", 0),
|
||||
SYMBOL_DEF(SYM_DQUOTE, "\"", 0),
|
||||
SYMBOL_DEF(SYM_HASH, "#", 0),
|
||||
SYMBOL_DEF(SYM_DOLLAR, "$", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_AT, "@", 0),
|
||||
SYMBOL_DEF(SYM_PIPE, "|", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_COMMA, ",", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_SEMICOLON, ";", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
SYMBOL_DEF(SYM_AT_LEFT_PAREN, "@(", 0),
|
||||
SYMBOL_DEF(SYM_LEFT_BRACE, "{", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_RIGHT_BRACE, "}", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_LEFT_BRACKET, "[", 0),
|
||||
SYMBOL_DEF(SYM_RIGHT_BRACKET, "]", 0),
|
||||
SYMBOL_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", 0),
|
||||
SYMBOL_DEF(SYM_LEFT_PAREN, "(", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_RIGHT_PAREN, ")", LEX_TOKEN_TERMINATES_WORD),
|
||||
SYMBOL_DEF(SYM_EQUAL, "=", 0),
|
||||
SYMBOL_DEF(SYM_PLUS_EQUAL, "+=", 0),
|
||||
SYMBOL_DEF(SYM_HYPHEN_EQUAL, "-=", 0),
|
||||
SYMBOL_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", 0),
|
||||
SYMBOL_DEF(SYM_ASTERISK_EQUAL, "*=", 0),
|
||||
SYMBOL_DEF(SYM_PERCENT_EQUAL, "%=", 0),
|
||||
SYMBOL_DEF(SYM_DOT, ".", 0),
|
||||
SYMBOL_DEF(SYM_COLON_COLON, "::", 0),
|
||||
SYMBOL_DEF(SYM_DOT_DOT, "..", 0),
|
||||
SYMBOL_DEF(SYM_QUESTION_DOT, "?.", 0),
|
||||
};
|
||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||
|
||||
@@ -437,6 +341,10 @@ static struct lex_symbol_node *build_symbol_tree(void)
|
||||
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (!symbols[i].name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
status = put_symbol(root, &symbols[i]);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
@@ -448,6 +356,33 @@ static struct lex_symbol_node *build_symbol_tree(void)
|
||||
return root;
|
||||
}
|
||||
|
||||
static void init_token_enabled_states(const struct lex_state_type *state_type)
|
||||
{
|
||||
if (state_type->s_keywords) {
|
||||
for (size_t i = 0; state_type->s_keywords[i]; i++) {
|
||||
unsigned int id = state_type->s_keywords[i];
|
||||
keywords[id - __KW_INDEX_BASE].enabled_states
|
||||
|= state_type->s_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (state_type->s_operators) {
|
||||
for (size_t i = 0; state_type->s_operators[i]; i++) {
|
||||
unsigned int id = state_type->s_operators[i];
|
||||
operators[id - __TKOP_INDEX_BASE].enabled_states
|
||||
|= state_type->s_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (state_type->s_symbols) {
|
||||
for (size_t i = 0; state_type->s_symbols[i]; i++) {
|
||||
unsigned int id = state_type->s_symbols[i];
|
||||
symbols[id - __SYM_INDEX_BASE].enabled_states
|
||||
|= state_type->s_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum bshell_status lex_ctx_init(
|
||||
struct lex_ctx *ctx,
|
||||
enum lex_flags flags,
|
||||
@@ -464,6 +399,13 @@ enum bshell_status lex_ctx_init(
|
||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
|
||||
|
||||
init_token_enabled_states(&lex_statement_state);
|
||||
init_token_enabled_states(&lex_command_state);
|
||||
init_token_enabled_states(&lex_arithmetic_state);
|
||||
init_token_enabled_states(&lex_string_state);
|
||||
init_token_enabled_states(&lex_word_state);
|
||||
init_token_enabled_states(&lex_hashtable_state);
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -574,7 +516,7 @@ bool convert_word_to_keyword(struct lex_token *tok)
|
||||
|
||||
for (size_t i = 0; i < nr_keywords; i++) {
|
||||
const char *kw_str = keywords[i].name;
|
||||
if (strcmp(kw_str, tok->tok_str) != 0) {
|
||||
if (!kw_str || strcmp(kw_str, tok->tok_str) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1012,6 +954,10 @@ bool char_can_begin_symbol_in_state(
|
||||
enum lex_state_type_id state_type)
|
||||
{
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (!symbols[i].name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (symbols[i].name[0] != c) {
|
||||
continue;
|
||||
}
|
||||
@@ -1033,6 +979,10 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
|
||||
bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags)
|
||||
{
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (!symbols[i].name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (symbols[i].name[0] != c) {
|
||||
continue;
|
||||
}
|
||||
@@ -1103,7 +1053,7 @@ enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
|
||||
|
||||
for (size_t i = 0; i < nr_operators; i++) {
|
||||
const char *op_str = operators[i].name;
|
||||
if (strcmp(op_str, s) != 0) {
|
||||
if (!op_str || strcmp(op_str, s) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -183,8 +183,47 @@ static const struct lex_state_link links[] = {
|
||||
LINK_END,
|
||||
};
|
||||
|
||||
static const unsigned int keywords[] = {
|
||||
KW_FUNC,
|
||||
KW_IF,
|
||||
KW_ELSEIF,
|
||||
KW_ELSE,
|
||||
KW_NONE,
|
||||
};
|
||||
|
||||
static const unsigned int operators[] = {
|
||||
TKOP_BNOT,
|
||||
TKOP_NOT,
|
||||
TKOP_NONE,
|
||||
};
|
||||
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_AMPERSAND,
|
||||
SYM_SQUOTE,
|
||||
SYM_DQUOTE,
|
||||
SYM_HASH,
|
||||
SYM_DOLLAR,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
SYM_AT,
|
||||
SYM_AT_LEFT_BRACE,
|
||||
SYM_PIPE,
|
||||
SYM_COMMA,
|
||||
SYM_SEMICOLON,
|
||||
SYM_LEFT_BRACE,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_LEFT_BRACKET,
|
||||
SYM_RIGHT_BRACKET,
|
||||
SYM_LEFT_PAREN,
|
||||
SYM_RIGHT_PAREN,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
const struct lex_state_type lex_statement_state = {
|
||||
.s_id = LEX_STATE_STATEMENT,
|
||||
.s_pump_token = statement_pump_token,
|
||||
.s_links = links,
|
||||
.s_keywords = keywords,
|
||||
.s_operators = operators,
|
||||
.s_symbols = symbols,
|
||||
};
|
||||
|
||||
@@ -9,6 +9,8 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
||||
return status;
|
||||
}
|
||||
|
||||
handle_lex_state_transition(ctx, sym->id);
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
@@ -18,10 +20,6 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
||||
return status;
|
||||
}
|
||||
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DQUOTE:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
@@ -122,9 +120,23 @@ static enum bshell_status string_pump_token(struct lex_ctx *ctx)
|
||||
return string_content(ctx);
|
||||
}
|
||||
|
||||
static const struct lex_state_link links[] = {
|
||||
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||
LINK_POP(SYM_DQUOTE),
|
||||
LINK_END,
|
||||
};
|
||||
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_DOLLAR,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
const struct lex_state_type lex_string_state = {
|
||||
.s_id = LEX_STATE_STRING,
|
||||
.s_begin = string_begin,
|
||||
.s_end = string_end,
|
||||
.s_pump_token = string_pump_token,
|
||||
.s_links = links,
|
||||
};
|
||||
|
||||
+15
-3
@@ -137,8 +137,20 @@ static enum bshell_status word_pump_token(struct lex_ctx *ctx)
|
||||
return word_content(ctx);
|
||||
}
|
||||
|
||||
static const struct lex_state_link links[] = {
|
||||
LINK_END,
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_AMPERSAND,
|
||||
SYM_HASH,
|
||||
SYM_DOLLAR,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
SYM_PIPE,
|
||||
SYM_COMMA,
|
||||
SYM_SEMICOLON,
|
||||
SYM_LEFT_BRACE,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_LEFT_PAREN,
|
||||
SYM_RIGHT_PAREN,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
const struct lex_state_type lex_word_state = {
|
||||
@@ -146,5 +158,5 @@ const struct lex_state_type lex_word_state = {
|
||||
.s_begin = word_begin,
|
||||
.s_end = word_end,
|
||||
.s_pump_token = word_pump_token,
|
||||
.s_links = links,
|
||||
.s_symbols = symbols,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user