diff --git a/bshell/parse/lex/arithmetic.c b/bshell/parse/lex/arithmetic.c index dcad905..40ae795 100644 --- a/bshell/parse/lex/arithmetic.c +++ b/bshell/parse/lex/arithmetic.c @@ -160,8 +160,67 @@ static const struct lex_state_link links[] = { LINK_END, }; +static const unsigned int keywords[] = { + KW_IF, + KW_ELSEIF, + KW_ELSE, + KW_NONE, +}; + +static const unsigned int operators[] = { + TKOP_NONE, TKOP_F, TKOP_BAND, TKOP_BOR, + TKOP_BXOR, TKOP_BNOT, TKOP_SHL, TKOP_SHR, + TKOP_EQ, TKOP_NE, TKOP_GT, TKOP_LT, + TKOP_GE, TKOP_LE, TKOP_MATCH, TKOP_NOTMATCH, + TKOP_REPLACE, TKOP_LIKE, TKOP_NOTLIKE, TKOP_IN, + TKOP_NOTIN, TKOP_CONTAINS, TKOP_NOTCONTAINS, TKOP_AND, + TKOP_OR, TKOP_XOR, TKOP_NOT, TKOP_SPLIT, + TKOP_JOIN, TKOP_IS, TKOP_ISNOT, TKOP_AS, +}; + +static const unsigned int symbols[] = { + SYM_PLUS, + SYM_HYPHEN, + SYM_FORWARD_SLASH, + SYM_ASTERISK, + SYM_AMPERSAND, + SYM_PERCENT, + SYM_SQUOTE, + SYM_DQUOTE, + SYM_HASH, + SYM_DOLLAR, + SYM_DOLLAR_LEFT_PAREN, + SYM_DOLLAR_LEFT_BRACE, + SYM_AT, + SYM_AT_LEFT_BRACE, + SYM_PIPE, + SYM_COMMA, + SYM_SEMICOLON, + SYM_LEFT_PAREN, + SYM_RIGHT_PAREN, + SYM_LEFT_BRACE, + SYM_RIGHT_BRACE, + SYM_LEFT_BRACKET, + SYM_RIGHT_BRACKET, + SYM_QUESTION_DOT, + SYM_QUESTION_LEFT_BRACKET, + SYM_EQUAL, + SYM_PLUS_EQUAL, + SYM_HYPHEN_EQUAL, + SYM_FORWARD_SLASH_EQUAL, + SYM_ASTERISK_EQUAL, + SYM_PERCENT_EQUAL, + SYM_DOT, + SYM_DOT_DOT, + SYM_COLON_COLON, + SYM_NONE, +}; + const struct lex_state_type lex_arithmetic_state = { .s_id = LEX_STATE_ARITHMETIC, .s_pump_token = arithmetic_pump_token, .s_links = links, + .s_keywords = keywords, + .s_operators = operators, + .s_symbols = symbols, }; diff --git a/bshell/parse/lex/command.c b/bshell/parse/lex/command.c index 0984acb..2bd6122 100644 --- a/bshell/parse/lex/command.c +++ b/bshell/parse/lex/command.c @@ -151,14 +151,33 @@ const struct lex_state_link links[] = { STATEMENT_F_DISABLE_KEYWORDS), LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0), LINK_POP(SYM_RIGHT_PAREN), + LINK_POP(SYM_RIGHT_BRACE), LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT), LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0), LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT), LINK_END, }; +static const unsigned int symbols[] = { + SYM_DQUOTE, + SYM_SQUOTE, + SYM_DOLLAR, + SYM_DOLLAR_LEFT_PAREN, + SYM_DOLLAR_LEFT_BRACE, + SYM_AT, + SYM_AT_LEFT_BRACE, + SYM_AT_LEFT_PAREN, + SYM_AMPERSAND, + SYM_PIPE, + SYM_SEMICOLON, + SYM_RIGHT_PAREN, + SYM_RIGHT_BRACE, + SYM_NONE, +}; + const struct lex_state_type lex_command_state = { .s_id = LEX_STATE_COMMAND, .s_pump_token = command_pump_token, .s_links = links, + .s_symbols = symbols, }; diff --git a/bshell/parse/lex/hashtable.c b/bshell/parse/lex/hashtable.c index 8bf018b..edbd1b4 100644 --- a/bshell/parse/lex/hashtable.c +++ b/bshell/parse/lex/hashtable.c @@ -125,11 +125,10 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx) c = peek_char_noread(ctx); } -#if 0 +#if 1 if (newline) { struct lex_token *tok = lex_token_create(TOK_LINEFEED); enqueue_token(ctx, tok); - lex_state_change(ctx, LEX_STATE_STATEMENT); return BSHELL_SUCCESS; } #endif @@ -153,8 +152,18 @@ static const struct lex_state_link links[] = { LINK_END, }; +static const unsigned int symbols[] = { + SYM_EQUAL, + SYM_SEMICOLON, + SYM_RIGHT_BRACE, + SYM_DOLLAR_LEFT_PAREN, + SYM_LEFT_PAREN, + SYM_NONE, +}; + const struct lex_state_type lex_hashtable_state = { .s_id = LEX_STATE_HASHTABLE, .s_pump_token = hashtable_pump_token, .s_links = links, + .s_symbols = symbols, }; diff --git a/bshell/parse/lex/lex-internal.h b/bshell/parse/lex/lex-internal.h index 16612e3..de4f983 100644 --- a/bshell/parse/lex/lex-internal.h +++ b/bshell/parse/lex/lex-internal.h @@ -76,6 +76,10 @@ struct lex_state_type { lex_state_pump_token s_pump_token; lex_state_begin s_begin; lex_state_end s_end; + + const unsigned int *s_keywords; + const unsigned int *s_operators; + const unsigned int *s_symbols; const struct lex_state_link *s_links; }; diff --git a/bshell/parse/lex/lex.c b/bshell/parse/lex/lex.c index 434233c..1e87f4f 100644 --- a/bshell/parse/lex/lex.c +++ b/bshell/parse/lex/lex.c @@ -7,198 +7,102 @@ #include -#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)} -#define LEX_TOKEN_DEF2(i, n, s, f) \ - {.id = (i), .name = (n), .enabled_states = (s), .flags = (f)} - -#define CONVERSION_REQUESTED(flags) \ - ((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD)) +#define SYMBOL_DEF(i, n, f) \ + [i - __SYM_INDEX_BASE] = { \ + .id = (i), \ + .name = (n), \ + .flags = (f), \ + } +#define KW_DEF(i, n, f) \ + [i - __KW_INDEX_BASE] = { \ + .id = (i), \ + .name = (n), \ + .flags = (f), \ + } +#define TKOP_DEF(i, n, f) \ + [i - __TKOP_INDEX_BASE] = { \ + .id = (i), \ + .name = (n), \ + .flags = (f), \ + } static struct lex_token_def keywords[] = { - LEX_TOKEN_DEF2( - KW_FUNC, - "func", - LEX_STATE_STATEMENT, - LEX_TOKEN_COMMAND_MODE), - LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT), - LEX_TOKEN_DEF(KW_ELSEIF, "elseif", LEX_STATE_STATEMENT), - LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT), + KW_DEF(KW_FUNC, "func", LEX_TOKEN_COMMAND_MODE), + KW_DEF(KW_IF, "if", 0), + KW_DEF(KW_ELSEIF, "elseif", 0), + KW_DEF(KW_ELSE, "else", 0), }; static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; static struct lex_token_def operators[] = { - LEX_TOKEN_DEF(TKOP_BAND, "-band", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_BOR, "-bor", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_BXOR, "-bxor", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_BNOT, - "-bnot", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_SHL, "-shl", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_SHR, "-shr", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_EQ, - "-eq", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_NE, - "-ne", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_GT, - "-gt", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_LT, - "-lt", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_GE, - "-ge", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_LE, - "-le", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_MATCH, "-match", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_REPLACE, "-replace", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_LIKE, "-like", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_AND, "-and", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_OR, - "-OR", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_XOR, "-xor", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - TKOP_NOT, - "-not", - LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_SPLIT, "-split", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_JOIN, "-join", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_IS, "-is", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_AS, "-as", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(TKOP_F, "-f", LEX_STATE_ARITHMETIC), + TKOP_DEF(TKOP_BAND, "-band", 0), + TKOP_DEF(TKOP_BOR, "-bor", 0), + TKOP_DEF(TKOP_BXOR, "-bxor", 0), + TKOP_DEF(TKOP_BNOT, "-bnot", 0), + TKOP_DEF(TKOP_SHL, "-shl", 0), + TKOP_DEF(TKOP_SHR, "-shr", 0), + TKOP_DEF(TKOP_EQ, "-eq", 0), + TKOP_DEF(TKOP_NE, "-ne", 0), + TKOP_DEF(TKOP_GT, "-gt", 0), + TKOP_DEF(TKOP_LT, "-lt", 0), + TKOP_DEF(TKOP_GE, "-ge", 0), + TKOP_DEF(TKOP_LE, "-le", 0), + TKOP_DEF(TKOP_MATCH, "-match", 0), + TKOP_DEF(TKOP_NOTMATCH, "-notmatch", 0), + TKOP_DEF(TKOP_REPLACE, "-replace", 0), + TKOP_DEF(TKOP_LIKE, "-like", 0), + TKOP_DEF(TKOP_NOTLIKE, "-notlike", 0), + TKOP_DEF(TKOP_CONTAINS, "-contains", 0), + TKOP_DEF(TKOP_NOTCONTAINS, "-notcontains", 0), + TKOP_DEF(TKOP_AND, "-and", 0), + TKOP_DEF(TKOP_OR, "-or", 0), + TKOP_DEF(TKOP_XOR, "-xor", 0), + TKOP_DEF(TKOP_NOT, "-not", 0), + TKOP_DEF(TKOP_SPLIT, "-split", 0), + TKOP_DEF(TKOP_JOIN, "-join", 0), + TKOP_DEF(TKOP_IS, "-is", 0), + TKOP_DEF(TKOP_ISNOT, "-isnot", 0), + TKOP_DEF(TKOP_AS, "-as", 0), + TKOP_DEF(TKOP_F, "-f", 0), }; static const size_t nr_operators = sizeof operators / sizeof operators[0]; -#define LEX_STATES(states) (LEX_STATE_STATEMENT | states) -#define LEX_STATE_ALL \ - (LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \ - | LEX_STATE_STRING | LEX_STATE_WORD | LEX_STATE_HASHTABLE) - static struct lex_token_def symbols[] = { - LEX_TOKEN_DEF2( - SYM_PLUS, - "+", - LEX_STATE_ARITHMETIC, - LEX_TOKEN_UNARY_ARITHMETIC), - LEX_TOKEN_DEF2( - SYM_HYPHEN, - "-", - LEX_STATE_ARITHMETIC, - LEX_TOKEN_UNARY_ARITHMETIC), - LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - SYM_AMPERSAND, - "&", - LEX_STATES( - LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND - | LEX_STATE_WORD)), - LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF( - SYM_SQUOTE, - "'", - LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)), - LEX_TOKEN_DEF(SYM_DQUOTE, "\"", LEX_STATE_ALL), - LEX_TOKEN_DEF( - SYM_HASH, - "#", - LEX_STATES( - LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND - | LEX_STATE_WORD)), - LEX_TOKEN_DEF2( - SYM_DOLLAR, - "$", - LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING - | LEX_STATE_WORD, - LEX_TOKEN_UNARY_ARITHMETIC), - LEX_TOKEN_DEF2( - SYM_DOLLAR_LEFT_PAREN, - "$(", - LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING - | LEX_STATE_WORD, - LEX_TOKEN_UNARY_ARITHMETIC), - LEX_TOKEN_DEF2( - SYM_DOLLAR_LEFT_BRACE, - "${", - LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING - | LEX_STATE_WORD, - LEX_TOKEN_UNARY_ARITHMETIC), - LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL), - LEX_TOKEN_DEF2( - SYM_PIPE, - "|", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_COMMAND_MODE), - LEX_TOKEN_DEF2( - SYM_COMMA, - ",", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE), - LEX_TOKEN_DEF2( - SYM_SEMICOLON, - ";", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE), - LEX_TOKEN_DEF2( - SYM_AT_LEFT_BRACE, - "@{", - LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING - | LEX_STATE_WORD | LEX_STATE_STATEMENT, - LEX_TOKEN_UNARY_ARITHMETIC), - LEX_TOKEN_DEF(SYM_AT_LEFT_PAREN, "@(", LEX_STATE_ALL), - LEX_TOKEN_DEF2( - SYM_LEFT_BRACE, - "{", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD), - LEX_TOKEN_DEF2( - SYM_RIGHT_BRACE, - "}", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD), - LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)), - LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)), - LEX_TOKEN_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF2( - SYM_LEFT_PAREN, - "(", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD), - LEX_TOKEN_DEF2( - SYM_RIGHT_PAREN, - ")", - LEX_STATE_ALL, - LEX_TOKEN_TERMINATES_WORD), - LEX_TOKEN_DEF( - SYM_EQUAL, - "=", - LEX_STATE_ARITHMETIC | LEX_STATE_HASHTABLE), - LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_DOT, ".", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_COLON_COLON, "::", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_DOT_DOT, "..", LEX_STATE_ARITHMETIC), - LEX_TOKEN_DEF(SYM_QUESTION_DOT, "?.", LEX_STATE_ARITHMETIC), + SYMBOL_DEF(SYM_PLUS, "+", LEX_TOKEN_UNARY_ARITHMETIC), + SYMBOL_DEF(SYM_HYPHEN, "-", LEX_TOKEN_UNARY_ARITHMETIC), + SYMBOL_DEF(SYM_FORWARD_SLASH, "/", 0), + SYMBOL_DEF(SYM_ASTERISK, "*", 0), + SYMBOL_DEF(SYM_AMPERSAND, "&", 0), + SYMBOL_DEF(SYM_PERCENT, "%", 0), + SYMBOL_DEF(SYM_SQUOTE, "'", 0), + SYMBOL_DEF(SYM_DQUOTE, "\"", 0), + SYMBOL_DEF(SYM_HASH, "#", 0), + SYMBOL_DEF(SYM_DOLLAR, "$", LEX_TOKEN_UNARY_ARITHMETIC), + SYMBOL_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_TOKEN_UNARY_ARITHMETIC), + SYMBOL_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_TOKEN_UNARY_ARITHMETIC), + SYMBOL_DEF(SYM_AT, "@", 0), + SYMBOL_DEF(SYM_PIPE, "|", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_COMMA, ",", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_SEMICOLON, ";", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_TOKEN_UNARY_ARITHMETIC), + SYMBOL_DEF(SYM_AT_LEFT_PAREN, "@(", 0), + SYMBOL_DEF(SYM_LEFT_BRACE, "{", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_RIGHT_BRACE, "}", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_LEFT_BRACKET, "[", 0), + SYMBOL_DEF(SYM_RIGHT_BRACKET, "]", 0), + SYMBOL_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", 0), + SYMBOL_DEF(SYM_LEFT_PAREN, "(", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_RIGHT_PAREN, ")", LEX_TOKEN_TERMINATES_WORD), + SYMBOL_DEF(SYM_EQUAL, "=", 0), + SYMBOL_DEF(SYM_PLUS_EQUAL, "+=", 0), + SYMBOL_DEF(SYM_HYPHEN_EQUAL, "-=", 0), + SYMBOL_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", 0), + SYMBOL_DEF(SYM_ASTERISK_EQUAL, "*=", 0), + SYMBOL_DEF(SYM_PERCENT_EQUAL, "%=", 0), + SYMBOL_DEF(SYM_DOT, ".", 0), + SYMBOL_DEF(SYM_COLON_COLON, "::", 0), + SYMBOL_DEF(SYM_DOT_DOT, "..", 0), + SYMBOL_DEF(SYM_QUESTION_DOT, "?.", 0), }; static const size_t nr_symbols = sizeof symbols / sizeof symbols[0]; @@ -437,6 +341,10 @@ static struct lex_symbol_node *build_symbol_tree(void) enum bshell_status status = BSHELL_SUCCESS; for (size_t i = 0; i < nr_symbols; i++) { + if (!symbols[i].name) { + continue; + } + status = put_symbol(root, &symbols[i]); if (status != BSHELL_SUCCESS) { @@ -448,6 +356,33 @@ static struct lex_symbol_node *build_symbol_tree(void) return root; } +static void init_token_enabled_states(const struct lex_state_type *state_type) +{ + if (state_type->s_keywords) { + for (size_t i = 0; state_type->s_keywords[i]; i++) { + unsigned int id = state_type->s_keywords[i]; + keywords[id - __KW_INDEX_BASE].enabled_states + |= state_type->s_id; + } + } + + if (state_type->s_operators) { + for (size_t i = 0; state_type->s_operators[i]; i++) { + unsigned int id = state_type->s_operators[i]; + operators[id - __TKOP_INDEX_BASE].enabled_states + |= state_type->s_id; + } + } + + if (state_type->s_symbols) { + for (size_t i = 0; state_type->s_symbols[i]; i++) { + unsigned int id = state_type->s_symbols[i]; + symbols[id - __SYM_INDEX_BASE].enabled_states + |= state_type->s_id; + } + } +} + enum bshell_status lex_ctx_init( struct lex_ctx *ctx, enum lex_flags flags, @@ -464,6 +399,13 @@ enum bshell_status lex_ctx_init( ctx->lex_ch = FX_WCHAR_INVALID; ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1; + init_token_enabled_states(&lex_statement_state); + init_token_enabled_states(&lex_command_state); + init_token_enabled_states(&lex_arithmetic_state); + init_token_enabled_states(&lex_string_state); + init_token_enabled_states(&lex_word_state); + init_token_enabled_states(&lex_hashtable_state); + return BSHELL_SUCCESS; } @@ -574,7 +516,7 @@ bool convert_word_to_keyword(struct lex_token *tok) for (size_t i = 0; i < nr_keywords; i++) { const char *kw_str = keywords[i].name; - if (strcmp(kw_str, tok->tok_str) != 0) { + if (!kw_str || strcmp(kw_str, tok->tok_str) != 0) { continue; } @@ -1012,6 +954,10 @@ bool char_can_begin_symbol_in_state( enum lex_state_type_id state_type) { for (size_t i = 0; i < nr_symbols; i++) { + if (!symbols[i].name) { + continue; + } + if (symbols[i].name[0] != c) { continue; } @@ -1033,6 +979,10 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c) bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags) { for (size_t i = 0; i < nr_symbols; i++) { + if (!symbols[i].name) { + continue; + } + if (symbols[i].name[0] != c) { continue; } @@ -1103,7 +1053,7 @@ enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s) for (size_t i = 0; i < nr_operators; i++) { const char *op_str = operators[i].name; - if (strcmp(op_str, s) != 0) { + if (!op_str || strcmp(op_str, s) != 0) { continue; } diff --git a/bshell/parse/lex/statement.c b/bshell/parse/lex/statement.c index 6e295aa..732b648 100644 --- a/bshell/parse/lex/statement.c +++ b/bshell/parse/lex/statement.c @@ -183,8 +183,47 @@ static const struct lex_state_link links[] = { LINK_END, }; +static const unsigned int keywords[] = { + KW_FUNC, + KW_IF, + KW_ELSEIF, + KW_ELSE, + KW_NONE, +}; + +static const unsigned int operators[] = { + TKOP_BNOT, + TKOP_NOT, + TKOP_NONE, +}; + +static const unsigned int symbols[] = { + SYM_AMPERSAND, + SYM_SQUOTE, + SYM_DQUOTE, + SYM_HASH, + SYM_DOLLAR, + SYM_DOLLAR_LEFT_PAREN, + SYM_DOLLAR_LEFT_BRACE, + SYM_AT, + SYM_AT_LEFT_BRACE, + SYM_PIPE, + SYM_COMMA, + SYM_SEMICOLON, + SYM_LEFT_BRACE, + SYM_RIGHT_BRACE, + SYM_LEFT_BRACKET, + SYM_RIGHT_BRACKET, + SYM_LEFT_PAREN, + SYM_RIGHT_PAREN, + SYM_NONE, +}; + const struct lex_state_type lex_statement_state = { .s_id = LEX_STATE_STATEMENT, .s_pump_token = statement_pump_token, .s_links = links, + .s_keywords = keywords, + .s_operators = operators, + .s_symbols = symbols, }; diff --git a/bshell/parse/lex/string.c b/bshell/parse/lex/string.c index c6a9402..fac7b81 100644 --- a/bshell/parse/lex/string.c +++ b/bshell/parse/lex/string.c @@ -9,6 +9,8 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx) return status; } + handle_lex_state_transition(ctx, sym->id); + struct lex_token *tok = NULL; switch (sym->id) { @@ -18,10 +20,6 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx) return status; } - lex_state_push(ctx, LEX_STATE_STATEMENT, 0); - return BSHELL_SUCCESS; - case SYM_DQUOTE: - lex_state_pop(ctx); return BSHELL_SUCCESS; case SYM_DOLLAR: status = read_var(ctx, TOK_VAR, &tok); @@ -122,9 +120,23 @@ static enum bshell_status string_pump_token(struct lex_ctx *ctx) return string_content(ctx); } +static const struct lex_state_link links[] = { + LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0), + LINK_POP(SYM_DQUOTE), + LINK_END, +}; + +static const unsigned int symbols[] = { + SYM_DOLLAR, + SYM_DOLLAR_LEFT_PAREN, + SYM_DOLLAR_LEFT_BRACE, + SYM_NONE, +}; + const struct lex_state_type lex_string_state = { .s_id = LEX_STATE_STRING, .s_begin = string_begin, .s_end = string_end, .s_pump_token = string_pump_token, + .s_links = links, }; diff --git a/bshell/parse/lex/word.c b/bshell/parse/lex/word.c index 1d3f97c..9ac8f06 100644 --- a/bshell/parse/lex/word.c +++ b/bshell/parse/lex/word.c @@ -137,8 +137,20 @@ static enum bshell_status word_pump_token(struct lex_ctx *ctx) return word_content(ctx); } -static const struct lex_state_link links[] = { - LINK_END, +static const unsigned int symbols[] = { + SYM_AMPERSAND, + SYM_HASH, + SYM_DOLLAR, + SYM_DOLLAR_LEFT_PAREN, + SYM_DOLLAR_LEFT_BRACE, + SYM_PIPE, + SYM_COMMA, + SYM_SEMICOLON, + SYM_LEFT_BRACE, + SYM_RIGHT_BRACE, + SYM_LEFT_PAREN, + SYM_RIGHT_PAREN, + SYM_NONE, }; const struct lex_state_type lex_word_state = { @@ -146,5 +158,5 @@ const struct lex_state_type lex_word_state = { .s_begin = word_begin, .s_end = word_end, .s_pump_token = word_pump_token, - .s_links = links, + .s_symbols = symbols, };