parse: lex: move per-state token settings to state source files
This commit is contained in:
@@ -160,8 +160,67 @@ static const struct lex_state_link links[] = {
|
|||||||
LINK_END,
|
LINK_END,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const unsigned int keywords[] = {
|
||||||
|
KW_IF,
|
||||||
|
KW_ELSEIF,
|
||||||
|
KW_ELSE,
|
||||||
|
KW_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned int operators[] = {
|
||||||
|
TKOP_NONE, TKOP_F, TKOP_BAND, TKOP_BOR,
|
||||||
|
TKOP_BXOR, TKOP_BNOT, TKOP_SHL, TKOP_SHR,
|
||||||
|
TKOP_EQ, TKOP_NE, TKOP_GT, TKOP_LT,
|
||||||
|
TKOP_GE, TKOP_LE, TKOP_MATCH, TKOP_NOTMATCH,
|
||||||
|
TKOP_REPLACE, TKOP_LIKE, TKOP_NOTLIKE, TKOP_IN,
|
||||||
|
TKOP_NOTIN, TKOP_CONTAINS, TKOP_NOTCONTAINS, TKOP_AND,
|
||||||
|
TKOP_OR, TKOP_XOR, TKOP_NOT, TKOP_SPLIT,
|
||||||
|
TKOP_JOIN, TKOP_IS, TKOP_ISNOT, TKOP_AS,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned int symbols[] = {
|
||||||
|
SYM_PLUS,
|
||||||
|
SYM_HYPHEN,
|
||||||
|
SYM_FORWARD_SLASH,
|
||||||
|
SYM_ASTERISK,
|
||||||
|
SYM_AMPERSAND,
|
||||||
|
SYM_PERCENT,
|
||||||
|
SYM_SQUOTE,
|
||||||
|
SYM_DQUOTE,
|
||||||
|
SYM_HASH,
|
||||||
|
SYM_DOLLAR,
|
||||||
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
|
SYM_DOLLAR_LEFT_BRACE,
|
||||||
|
SYM_AT,
|
||||||
|
SYM_AT_LEFT_BRACE,
|
||||||
|
SYM_PIPE,
|
||||||
|
SYM_COMMA,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
SYM_RIGHT_PAREN,
|
||||||
|
SYM_LEFT_BRACE,
|
||||||
|
SYM_RIGHT_BRACE,
|
||||||
|
SYM_LEFT_BRACKET,
|
||||||
|
SYM_RIGHT_BRACKET,
|
||||||
|
SYM_QUESTION_DOT,
|
||||||
|
SYM_QUESTION_LEFT_BRACKET,
|
||||||
|
SYM_EQUAL,
|
||||||
|
SYM_PLUS_EQUAL,
|
||||||
|
SYM_HYPHEN_EQUAL,
|
||||||
|
SYM_FORWARD_SLASH_EQUAL,
|
||||||
|
SYM_ASTERISK_EQUAL,
|
||||||
|
SYM_PERCENT_EQUAL,
|
||||||
|
SYM_DOT,
|
||||||
|
SYM_DOT_DOT,
|
||||||
|
SYM_COLON_COLON,
|
||||||
|
SYM_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_arithmetic_state = {
|
const struct lex_state_type lex_arithmetic_state = {
|
||||||
.s_id = LEX_STATE_ARITHMETIC,
|
.s_id = LEX_STATE_ARITHMETIC,
|
||||||
.s_pump_token = arithmetic_pump_token,
|
.s_pump_token = arithmetic_pump_token,
|
||||||
.s_links = links,
|
.s_links = links,
|
||||||
|
.s_keywords = keywords,
|
||||||
|
.s_operators = operators,
|
||||||
|
.s_symbols = symbols,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -151,14 +151,33 @@ const struct lex_state_link links[] = {
|
|||||||
STATEMENT_F_DISABLE_KEYWORDS),
|
STATEMENT_F_DISABLE_KEYWORDS),
|
||||||
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
LINK_POP(SYM_RIGHT_PAREN),
|
LINK_POP(SYM_RIGHT_PAREN),
|
||||||
|
LINK_POP(SYM_RIGHT_BRACE),
|
||||||
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
|
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
|
||||||
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
||||||
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
|
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
|
||||||
LINK_END,
|
LINK_END,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const unsigned int symbols[] = {
|
||||||
|
SYM_DQUOTE,
|
||||||
|
SYM_SQUOTE,
|
||||||
|
SYM_DOLLAR,
|
||||||
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
|
SYM_DOLLAR_LEFT_BRACE,
|
||||||
|
SYM_AT,
|
||||||
|
SYM_AT_LEFT_BRACE,
|
||||||
|
SYM_AT_LEFT_PAREN,
|
||||||
|
SYM_AMPERSAND,
|
||||||
|
SYM_PIPE,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
SYM_RIGHT_PAREN,
|
||||||
|
SYM_RIGHT_BRACE,
|
||||||
|
SYM_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_command_state = {
|
const struct lex_state_type lex_command_state = {
|
||||||
.s_id = LEX_STATE_COMMAND,
|
.s_id = LEX_STATE_COMMAND,
|
||||||
.s_pump_token = command_pump_token,
|
.s_pump_token = command_pump_token,
|
||||||
.s_links = links,
|
.s_links = links,
|
||||||
|
.s_symbols = symbols,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -125,11 +125,10 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
|
|||||||
c = peek_char_noread(ctx);
|
c = peek_char_noread(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
if (newline) {
|
if (newline) {
|
||||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -153,8 +152,18 @@ static const struct lex_state_link links[] = {
|
|||||||
LINK_END,
|
LINK_END,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const unsigned int symbols[] = {
|
||||||
|
SYM_EQUAL,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
SYM_RIGHT_BRACE,
|
||||||
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
SYM_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_hashtable_state = {
|
const struct lex_state_type lex_hashtable_state = {
|
||||||
.s_id = LEX_STATE_HASHTABLE,
|
.s_id = LEX_STATE_HASHTABLE,
|
||||||
.s_pump_token = hashtable_pump_token,
|
.s_pump_token = hashtable_pump_token,
|
||||||
.s_links = links,
|
.s_links = links,
|
||||||
|
.s_symbols = symbols,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -76,6 +76,10 @@ struct lex_state_type {
|
|||||||
lex_state_pump_token s_pump_token;
|
lex_state_pump_token s_pump_token;
|
||||||
lex_state_begin s_begin;
|
lex_state_begin s_begin;
|
||||||
lex_state_end s_end;
|
lex_state_end s_end;
|
||||||
|
|
||||||
|
const unsigned int *s_keywords;
|
||||||
|
const unsigned int *s_operators;
|
||||||
|
const unsigned int *s_symbols;
|
||||||
const struct lex_state_link *s_links;
|
const struct lex_state_link *s_links;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
+134
-184
@@ -7,198 +7,102 @@
|
|||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
|
#define SYMBOL_DEF(i, n, f) \
|
||||||
#define LEX_TOKEN_DEF2(i, n, s, f) \
|
[i - __SYM_INDEX_BASE] = { \
|
||||||
{.id = (i), .name = (n), .enabled_states = (s), .flags = (f)}
|
.id = (i), \
|
||||||
|
.name = (n), \
|
||||||
#define CONVERSION_REQUESTED(flags) \
|
.flags = (f), \
|
||||||
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
|
}
|
||||||
|
#define KW_DEF(i, n, f) \
|
||||||
|
[i - __KW_INDEX_BASE] = { \
|
||||||
|
.id = (i), \
|
||||||
|
.name = (n), \
|
||||||
|
.flags = (f), \
|
||||||
|
}
|
||||||
|
#define TKOP_DEF(i, n, f) \
|
||||||
|
[i - __TKOP_INDEX_BASE] = { \
|
||||||
|
.id = (i), \
|
||||||
|
.name = (n), \
|
||||||
|
.flags = (f), \
|
||||||
|
}
|
||||||
|
|
||||||
static struct lex_token_def keywords[] = {
|
static struct lex_token_def keywords[] = {
|
||||||
LEX_TOKEN_DEF2(
|
KW_DEF(KW_FUNC, "func", LEX_TOKEN_COMMAND_MODE),
|
||||||
KW_FUNC,
|
KW_DEF(KW_IF, "if", 0),
|
||||||
"func",
|
KW_DEF(KW_ELSEIF, "elseif", 0),
|
||||||
LEX_STATE_STATEMENT,
|
KW_DEF(KW_ELSE, "else", 0),
|
||||||
LEX_TOKEN_COMMAND_MODE),
|
|
||||||
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
|
|
||||||
LEX_TOKEN_DEF(KW_ELSEIF, "elseif", LEX_STATE_STATEMENT),
|
|
||||||
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
|
|
||||||
};
|
};
|
||||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||||
|
|
||||||
static struct lex_token_def operators[] = {
|
static struct lex_token_def operators[] = {
|
||||||
LEX_TOKEN_DEF(TKOP_BAND, "-band", LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_BAND, "-band", 0),
|
||||||
LEX_TOKEN_DEF(TKOP_BOR, "-bor", LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_BOR, "-bor", 0),
|
||||||
LEX_TOKEN_DEF(TKOP_BXOR, "-bxor", LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_BXOR, "-bxor", 0),
|
||||||
LEX_TOKEN_DEF(
|
TKOP_DEF(TKOP_BNOT, "-bnot", 0),
|
||||||
TKOP_BNOT,
|
TKOP_DEF(TKOP_SHL, "-shl", 0),
|
||||||
"-bnot",
|
TKOP_DEF(TKOP_SHR, "-shr", 0),
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_EQ, "-eq", 0),
|
||||||
LEX_TOKEN_DEF(TKOP_SHL, "-shl", LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_NE, "-ne", 0),
|
||||||
LEX_TOKEN_DEF(TKOP_SHR, "-shr", LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_GT, "-gt", 0),
|
||||||
LEX_TOKEN_DEF(
|
TKOP_DEF(TKOP_LT, "-lt", 0),
|
||||||
TKOP_EQ,
|
TKOP_DEF(TKOP_GE, "-ge", 0),
|
||||||
"-eq",
|
TKOP_DEF(TKOP_LE, "-le", 0),
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_MATCH, "-match", 0),
|
||||||
LEX_TOKEN_DEF(
|
TKOP_DEF(TKOP_NOTMATCH, "-notmatch", 0),
|
||||||
TKOP_NE,
|
TKOP_DEF(TKOP_REPLACE, "-replace", 0),
|
||||||
"-ne",
|
TKOP_DEF(TKOP_LIKE, "-like", 0),
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_NOTLIKE, "-notlike", 0),
|
||||||
LEX_TOKEN_DEF(
|
TKOP_DEF(TKOP_CONTAINS, "-contains", 0),
|
||||||
TKOP_GT,
|
TKOP_DEF(TKOP_NOTCONTAINS, "-notcontains", 0),
|
||||||
"-gt",
|
TKOP_DEF(TKOP_AND, "-and", 0),
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_OR, "-or", 0),
|
||||||
LEX_TOKEN_DEF(
|
TKOP_DEF(TKOP_XOR, "-xor", 0),
|
||||||
TKOP_LT,
|
TKOP_DEF(TKOP_NOT, "-not", 0),
|
||||||
"-lt",
|
TKOP_DEF(TKOP_SPLIT, "-split", 0),
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_JOIN, "-join", 0),
|
||||||
LEX_TOKEN_DEF(
|
TKOP_DEF(TKOP_IS, "-is", 0),
|
||||||
TKOP_GE,
|
TKOP_DEF(TKOP_ISNOT, "-isnot", 0),
|
||||||
"-ge",
|
TKOP_DEF(TKOP_AS, "-as", 0),
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
TKOP_DEF(TKOP_F, "-f", 0),
|
||||||
LEX_TOKEN_DEF(
|
|
||||||
TKOP_LE,
|
|
||||||
"-le",
|
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_MATCH, "-match", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_REPLACE, "-replace", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_LIKE, "-like", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_AND, "-and", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(
|
|
||||||
TKOP_OR,
|
|
||||||
"-OR",
|
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_XOR, "-xor", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(
|
|
||||||
TKOP_NOT,
|
|
||||||
"-not",
|
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_SPLIT, "-split", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_JOIN, "-join", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_IS, "-is", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_AS, "-as", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(TKOP_F, "-f", LEX_STATE_ARITHMETIC),
|
|
||||||
};
|
};
|
||||||
static const size_t nr_operators = sizeof operators / sizeof operators[0];
|
static const size_t nr_operators = sizeof operators / sizeof operators[0];
|
||||||
|
|
||||||
#define LEX_STATES(states) (LEX_STATE_STATEMENT | states)
|
|
||||||
#define LEX_STATE_ALL \
|
|
||||||
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
|
|
||||||
| LEX_STATE_STRING | LEX_STATE_WORD | LEX_STATE_HASHTABLE)
|
|
||||||
|
|
||||||
static struct lex_token_def symbols[] = {
|
static struct lex_token_def symbols[] = {
|
||||||
LEX_TOKEN_DEF2(
|
SYMBOL_DEF(SYM_PLUS, "+", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
SYM_PLUS,
|
SYMBOL_DEF(SYM_HYPHEN, "-", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
"+",
|
SYMBOL_DEF(SYM_FORWARD_SLASH, "/", 0),
|
||||||
LEX_STATE_ARITHMETIC,
|
SYMBOL_DEF(SYM_ASTERISK, "*", 0),
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
SYMBOL_DEF(SYM_AMPERSAND, "&", 0),
|
||||||
LEX_TOKEN_DEF2(
|
SYMBOL_DEF(SYM_PERCENT, "%", 0),
|
||||||
SYM_HYPHEN,
|
SYMBOL_DEF(SYM_SQUOTE, "'", 0),
|
||||||
"-",
|
SYMBOL_DEF(SYM_DQUOTE, "\"", 0),
|
||||||
LEX_STATE_ARITHMETIC,
|
SYMBOL_DEF(SYM_HASH, "#", 0),
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
SYMBOL_DEF(SYM_DOLLAR, "$", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC),
|
SYMBOL_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC),
|
SYMBOL_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(
|
SYMBOL_DEF(SYM_AT, "@", 0),
|
||||||
SYM_AMPERSAND,
|
SYMBOL_DEF(SYM_PIPE, "|", LEX_TOKEN_TERMINATES_WORD),
|
||||||
"&",
|
SYMBOL_DEF(SYM_COMMA, ",", LEX_TOKEN_TERMINATES_WORD),
|
||||||
LEX_STATES(
|
SYMBOL_DEF(SYM_SEMICOLON, ";", LEX_TOKEN_TERMINATES_WORD),
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
SYMBOL_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
| LEX_STATE_WORD)),
|
SYMBOL_DEF(SYM_AT_LEFT_PAREN, "@(", 0),
|
||||||
LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC),
|
SYMBOL_DEF(SYM_LEFT_BRACE, "{", LEX_TOKEN_TERMINATES_WORD),
|
||||||
LEX_TOKEN_DEF(
|
SYMBOL_DEF(SYM_RIGHT_BRACE, "}", LEX_TOKEN_TERMINATES_WORD),
|
||||||
SYM_SQUOTE,
|
SYMBOL_DEF(SYM_LEFT_BRACKET, "[", 0),
|
||||||
"'",
|
SYMBOL_DEF(SYM_RIGHT_BRACKET, "]", 0),
|
||||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
SYMBOL_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", 0),
|
||||||
LEX_TOKEN_DEF(SYM_DQUOTE, "\"", LEX_STATE_ALL),
|
SYMBOL_DEF(SYM_LEFT_PAREN, "(", LEX_TOKEN_TERMINATES_WORD),
|
||||||
LEX_TOKEN_DEF(
|
SYMBOL_DEF(SYM_RIGHT_PAREN, ")", LEX_TOKEN_TERMINATES_WORD),
|
||||||
SYM_HASH,
|
SYMBOL_DEF(SYM_EQUAL, "=", 0),
|
||||||
"#",
|
SYMBOL_DEF(SYM_PLUS_EQUAL, "+=", 0),
|
||||||
LEX_STATES(
|
SYMBOL_DEF(SYM_HYPHEN_EQUAL, "-=", 0),
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
SYMBOL_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", 0),
|
||||||
| LEX_STATE_WORD)),
|
SYMBOL_DEF(SYM_ASTERISK_EQUAL, "*=", 0),
|
||||||
LEX_TOKEN_DEF2(
|
SYMBOL_DEF(SYM_PERCENT_EQUAL, "%=", 0),
|
||||||
SYM_DOLLAR,
|
SYMBOL_DEF(SYM_DOT, ".", 0),
|
||||||
"$",
|
SYMBOL_DEF(SYM_COLON_COLON, "::", 0),
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
SYMBOL_DEF(SYM_DOT_DOT, "..", 0),
|
||||||
| LEX_STATE_WORD,
|
SYMBOL_DEF(SYM_QUESTION_DOT, "?.", 0),
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_DOLLAR_LEFT_PAREN,
|
|
||||||
"$(",
|
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
|
||||||
| LEX_STATE_WORD,
|
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_DOLLAR_LEFT_BRACE,
|
|
||||||
"${",
|
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
|
||||||
| LEX_STATE_WORD,
|
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_PIPE,
|
|
||||||
"|",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_COMMAND_MODE),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_COMMA,
|
|
||||||
",",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_SEMICOLON,
|
|
||||||
";",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_AT_LEFT_BRACE,
|
|
||||||
"@{",
|
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
|
||||||
| LEX_STATE_WORD | LEX_STATE_STATEMENT,
|
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_AT_LEFT_PAREN, "@(", LEX_STATE_ALL),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_LEFT_BRACE,
|
|
||||||
"{",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_RIGHT_BRACE,
|
|
||||||
"}",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
|
||||||
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
|
||||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
|
||||||
LEX_TOKEN_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_LEFT_PAREN,
|
|
||||||
"(",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
|
||||||
LEX_TOKEN_DEF2(
|
|
||||||
SYM_RIGHT_PAREN,
|
|
||||||
")",
|
|
||||||
LEX_STATE_ALL,
|
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
|
||||||
LEX_TOKEN_DEF(
|
|
||||||
SYM_EQUAL,
|
|
||||||
"=",
|
|
||||||
LEX_STATE_ARITHMETIC | LEX_STATE_HASHTABLE),
|
|
||||||
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_DOT, ".", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_COLON_COLON, "::", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_DOT_DOT, "..", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(SYM_QUESTION_DOT, "?.", LEX_STATE_ARITHMETIC),
|
|
||||||
};
|
};
|
||||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||||
|
|
||||||
@@ -437,6 +341,10 @@ static struct lex_symbol_node *build_symbol_tree(void)
|
|||||||
|
|
||||||
enum bshell_status status = BSHELL_SUCCESS;
|
enum bshell_status status = BSHELL_SUCCESS;
|
||||||
for (size_t i = 0; i < nr_symbols; i++) {
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (!symbols[i].name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
status = put_symbol(root, &symbols[i]);
|
status = put_symbol(root, &symbols[i]);
|
||||||
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
if (status != BSHELL_SUCCESS) {
|
||||||
@@ -448,6 +356,33 @@ static struct lex_symbol_node *build_symbol_tree(void)
|
|||||||
return root;
|
return root;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void init_token_enabled_states(const struct lex_state_type *state_type)
|
||||||
|
{
|
||||||
|
if (state_type->s_keywords) {
|
||||||
|
for (size_t i = 0; state_type->s_keywords[i]; i++) {
|
||||||
|
unsigned int id = state_type->s_keywords[i];
|
||||||
|
keywords[id - __KW_INDEX_BASE].enabled_states
|
||||||
|
|= state_type->s_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_type->s_operators) {
|
||||||
|
for (size_t i = 0; state_type->s_operators[i]; i++) {
|
||||||
|
unsigned int id = state_type->s_operators[i];
|
||||||
|
operators[id - __TKOP_INDEX_BASE].enabled_states
|
||||||
|
|= state_type->s_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_type->s_symbols) {
|
||||||
|
for (size_t i = 0; state_type->s_symbols[i]; i++) {
|
||||||
|
unsigned int id = state_type->s_symbols[i];
|
||||||
|
symbols[id - __SYM_INDEX_BASE].enabled_states
|
||||||
|
|= state_type->s_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
enum bshell_status lex_ctx_init(
|
enum bshell_status lex_ctx_init(
|
||||||
struct lex_ctx *ctx,
|
struct lex_ctx *ctx,
|
||||||
enum lex_flags flags,
|
enum lex_flags flags,
|
||||||
@@ -464,6 +399,13 @@ enum bshell_status lex_ctx_init(
|
|||||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||||
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
|
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
|
||||||
|
|
||||||
|
init_token_enabled_states(&lex_statement_state);
|
||||||
|
init_token_enabled_states(&lex_command_state);
|
||||||
|
init_token_enabled_states(&lex_arithmetic_state);
|
||||||
|
init_token_enabled_states(&lex_string_state);
|
||||||
|
init_token_enabled_states(&lex_word_state);
|
||||||
|
init_token_enabled_states(&lex_hashtable_state);
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -574,7 +516,7 @@ bool convert_word_to_keyword(struct lex_token *tok)
|
|||||||
|
|
||||||
for (size_t i = 0; i < nr_keywords; i++) {
|
for (size_t i = 0; i < nr_keywords; i++) {
|
||||||
const char *kw_str = keywords[i].name;
|
const char *kw_str = keywords[i].name;
|
||||||
if (strcmp(kw_str, tok->tok_str) != 0) {
|
if (!kw_str || strcmp(kw_str, tok->tok_str) != 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1012,6 +954,10 @@ bool char_can_begin_symbol_in_state(
|
|||||||
enum lex_state_type_id state_type)
|
enum lex_state_type_id state_type)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < nr_symbols; i++) {
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (!symbols[i].name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (symbols[i].name[0] != c) {
|
if (symbols[i].name[0] != c) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1033,6 +979,10 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
|
|||||||
bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags)
|
bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < nr_symbols; i++) {
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (!symbols[i].name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (symbols[i].name[0] != c) {
|
if (symbols[i].name[0] != c) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1103,7 +1053,7 @@ enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
|
|||||||
|
|
||||||
for (size_t i = 0; i < nr_operators; i++) {
|
for (size_t i = 0; i < nr_operators; i++) {
|
||||||
const char *op_str = operators[i].name;
|
const char *op_str = operators[i].name;
|
||||||
if (strcmp(op_str, s) != 0) {
|
if (!op_str || strcmp(op_str, s) != 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -183,8 +183,47 @@ static const struct lex_state_link links[] = {
|
|||||||
LINK_END,
|
LINK_END,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const unsigned int keywords[] = {
|
||||||
|
KW_FUNC,
|
||||||
|
KW_IF,
|
||||||
|
KW_ELSEIF,
|
||||||
|
KW_ELSE,
|
||||||
|
KW_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned int operators[] = {
|
||||||
|
TKOP_BNOT,
|
||||||
|
TKOP_NOT,
|
||||||
|
TKOP_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned int symbols[] = {
|
||||||
|
SYM_AMPERSAND,
|
||||||
|
SYM_SQUOTE,
|
||||||
|
SYM_DQUOTE,
|
||||||
|
SYM_HASH,
|
||||||
|
SYM_DOLLAR,
|
||||||
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
|
SYM_DOLLAR_LEFT_BRACE,
|
||||||
|
SYM_AT,
|
||||||
|
SYM_AT_LEFT_BRACE,
|
||||||
|
SYM_PIPE,
|
||||||
|
SYM_COMMA,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
SYM_LEFT_BRACE,
|
||||||
|
SYM_RIGHT_BRACE,
|
||||||
|
SYM_LEFT_BRACKET,
|
||||||
|
SYM_RIGHT_BRACKET,
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
SYM_RIGHT_PAREN,
|
||||||
|
SYM_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_statement_state = {
|
const struct lex_state_type lex_statement_state = {
|
||||||
.s_id = LEX_STATE_STATEMENT,
|
.s_id = LEX_STATE_STATEMENT,
|
||||||
.s_pump_token = statement_pump_token,
|
.s_pump_token = statement_pump_token,
|
||||||
.s_links = links,
|
.s_links = links,
|
||||||
|
.s_keywords = keywords,
|
||||||
|
.s_operators = operators,
|
||||||
|
.s_symbols = symbols,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, sym->id);
|
||||||
|
|
||||||
struct lex_token *tok = NULL;
|
struct lex_token *tok = NULL;
|
||||||
|
|
||||||
switch (sym->id) {
|
switch (sym->id) {
|
||||||
@@ -18,10 +20,6 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_DQUOTE:
|
|
||||||
lex_state_pop(ctx);
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
case SYM_DOLLAR:
|
case SYM_DOLLAR:
|
||||||
status = read_var(ctx, TOK_VAR, &tok);
|
status = read_var(ctx, TOK_VAR, &tok);
|
||||||
@@ -122,9 +120,23 @@ static enum bshell_status string_pump_token(struct lex_ctx *ctx)
|
|||||||
return string_content(ctx);
|
return string_content(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct lex_state_link links[] = {
|
||||||
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
|
LINK_POP(SYM_DQUOTE),
|
||||||
|
LINK_END,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned int symbols[] = {
|
||||||
|
SYM_DOLLAR,
|
||||||
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
|
SYM_DOLLAR_LEFT_BRACE,
|
||||||
|
SYM_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_string_state = {
|
const struct lex_state_type lex_string_state = {
|
||||||
.s_id = LEX_STATE_STRING,
|
.s_id = LEX_STATE_STRING,
|
||||||
.s_begin = string_begin,
|
.s_begin = string_begin,
|
||||||
.s_end = string_end,
|
.s_end = string_end,
|
||||||
.s_pump_token = string_pump_token,
|
.s_pump_token = string_pump_token,
|
||||||
|
.s_links = links,
|
||||||
};
|
};
|
||||||
|
|||||||
+15
-3
@@ -137,8 +137,20 @@ static enum bshell_status word_pump_token(struct lex_ctx *ctx)
|
|||||||
return word_content(ctx);
|
return word_content(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct lex_state_link links[] = {
|
static const unsigned int symbols[] = {
|
||||||
LINK_END,
|
SYM_AMPERSAND,
|
||||||
|
SYM_HASH,
|
||||||
|
SYM_DOLLAR,
|
||||||
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
|
SYM_DOLLAR_LEFT_BRACE,
|
||||||
|
SYM_PIPE,
|
||||||
|
SYM_COMMA,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
SYM_LEFT_BRACE,
|
||||||
|
SYM_RIGHT_BRACE,
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
SYM_RIGHT_PAREN,
|
||||||
|
SYM_NONE,
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_word_state = {
|
const struct lex_state_type lex_word_state = {
|
||||||
@@ -146,5 +158,5 @@ const struct lex_state_type lex_word_state = {
|
|||||||
.s_begin = word_begin,
|
.s_begin = word_begin,
|
||||||
.s_end = word_end,
|
.s_end = word_end,
|
||||||
.s_pump_token = word_pump_token,
|
.s_pump_token = word_pump_token,
|
||||||
.s_links = links,
|
.s_symbols = symbols,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user