parse: lex: add proper data-driven state-machine functionality
movement between lexer states is now defined (almost) exclusively by a table of outgoing links defined for each state type. the main lexer system uses this table to determine when, how, and to where the state should be changed. also add a dedicated lexer state for scanning hashtables, due to the particularly unique rules that apply within.
This commit is contained in:
@@ -5,7 +5,7 @@ static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
|
|||||||
fx_wchar c = peek_char(ctx);
|
fx_wchar c = peek_char(ctx);
|
||||||
if (!fx_wchar_is_alnum(c)) {
|
if (!fx_wchar_is_alnum(c)) {
|
||||||
push_symbol(ctx, SYM_HYPHEN);
|
push_symbol(ctx, SYM_HYPHEN);
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
handle_lex_state_transition(ctx, SYM_HYPHEN);
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -18,10 +18,10 @@ static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool converted = convert_word_to_int(tok);
|
unsigned int token_type = TOK_WORD;
|
||||||
|
if (convert_word_to_int(tok)) {
|
||||||
|
token_type = TOK_INT;
|
||||||
|
|
||||||
if (converted) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
||||||
* tokens are detected properly), the resulting number will be
|
* tokens are detected properly), the resulting number will be
|
||||||
* negative.
|
* negative.
|
||||||
@@ -29,15 +29,8 @@ static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
|
|||||||
* must be positive */
|
* must be positive */
|
||||||
tok->tok_int *= -1;
|
tok->tok_int *= -1;
|
||||||
push_symbol(ctx, SYM_HYPHEN);
|
push_symbol(ctx, SYM_HYPHEN);
|
||||||
enqueue_token(ctx, tok);
|
} else if (convert_word_to_operator(ctx, tok)) {
|
||||||
return BSHELL_SUCCESS;
|
token_type = TOK_OPERATOR;
|
||||||
}
|
|
||||||
|
|
||||||
converted = convert_word_to_operator(ctx, tok);
|
|
||||||
if (converted) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
} else {
|
|
||||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
@@ -53,6 +46,8 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, sym->id);
|
||||||
|
|
||||||
struct lex_token *tok = NULL;
|
struct lex_token *tok = NULL;
|
||||||
switch (sym->id) {
|
switch (sym->id) {
|
||||||
case SYM_SQUOTE:
|
case SYM_SQUOTE:
|
||||||
@@ -66,12 +61,6 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
|||||||
return arithmetic_hyphen(ctx);
|
return arithmetic_hyphen(ctx);
|
||||||
case SYM_HASH:
|
case SYM_HASH:
|
||||||
return read_line_comment(ctx);
|
return read_line_comment(ctx);
|
||||||
case SYM_DQUOTE:
|
|
||||||
if (!lex_state_push(ctx, LEX_STATE_STRING, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_DOLLAR:
|
case SYM_DOLLAR:
|
||||||
status = read_var(ctx, TOK_VAR, &tok);
|
status = read_var(ctx, TOK_VAR, &tok);
|
||||||
if (status != BSHELL_SUCCESS) {
|
if (status != BSHELL_SUCCESS) {
|
||||||
@@ -94,14 +83,6 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
|
||||||
return status;
|
|
||||||
case SYM_AT_LEFT_BRACE:
|
|
||||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
default:
|
default:
|
||||||
@@ -110,26 +91,6 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
push_symbol(ctx, sym->id);
|
push_symbol(ctx, sym->id);
|
||||||
|
|
||||||
switch (sym->id) {
|
|
||||||
case SYM_LEFT_PAREN:
|
|
||||||
lex_state_push(
|
|
||||||
ctx,
|
|
||||||
LEX_STATE_STATEMENT,
|
|
||||||
STATEMENT_F_DISABLE_KEYWORDS);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_DOLLAR_LEFT_PAREN:
|
|
||||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_RIGHT_PAREN:
|
|
||||||
lex_state_pop(ctx);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_SEMICOLON:
|
|
||||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -141,11 +102,16 @@ static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool converted = convert_word_to_keyword(word);
|
unsigned int token_type = TOK_WORD;
|
||||||
if (!converted) {
|
bool kw = false, number = false;
|
||||||
converted = convert_word_to_int(word);
|
if (convert_word_to_keyword(word)) {
|
||||||
|
token_type = word->tok_keyword;
|
||||||
|
} else if (convert_word_to_int(word)) {
|
||||||
|
token_type = TOK_INT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, token_type);
|
||||||
|
|
||||||
enqueue_token(ctx, word);
|
enqueue_token(ctx, word);
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -180,7 +146,22 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
|
|||||||
return arithmetic_word(ctx);
|
return arithmetic_word(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct lex_state_link links[] = {
|
||||||
|
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
|
||||||
|
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
||||||
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
|
LINK_POP(SYM_RIGHT_PAREN),
|
||||||
|
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
|
||||||
|
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
||||||
|
LINK_PUSH(
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
STATEMENT_F_DISABLE_KEYWORDS),
|
||||||
|
LINK_END,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_arithmetic_state = {
|
const struct lex_state_type lex_arithmetic_state = {
|
||||||
.s_id = LEX_STATE_ARITHMETIC,
|
.s_id = LEX_STATE_ARITHMETIC,
|
||||||
.s_pump_token = arithmetic_pump_token,
|
.s_pump_token = arithmetic_pump_token,
|
||||||
|
.s_links = links,
|
||||||
};
|
};
|
||||||
|
|||||||
+18
-35
@@ -30,6 +30,8 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, sym->id);
|
||||||
|
|
||||||
struct lex_token *tok = NULL;
|
struct lex_token *tok = NULL;
|
||||||
switch (sym->id) {
|
switch (sym->id) {
|
||||||
case SYM_SQUOTE:
|
case SYM_SQUOTE:
|
||||||
@@ -42,12 +44,6 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
case SYM_HASH:
|
case SYM_HASH:
|
||||||
return read_line_comment(ctx);
|
return read_line_comment(ctx);
|
||||||
case SYM_DQUOTE:
|
|
||||||
if (!lex_state_push(ctx, LEX_STATE_STRING, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_DOLLAR:
|
case SYM_DOLLAR:
|
||||||
status = read_var(ctx, TOK_VAR, &tok);
|
status = read_var(ctx, TOK_VAR, &tok);
|
||||||
if (status != BSHELL_SUCCESS) {
|
if (status != BSHELL_SUCCESS) {
|
||||||
@@ -78,14 +74,6 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
|||||||
lex_state_push(ctx, LEX_STATE_WORD, 0);
|
lex_state_push(ctx, LEX_STATE_WORD, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
|
||||||
return status;
|
|
||||||
case SYM_AT_LEFT_BRACE:
|
|
||||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
default:
|
default:
|
||||||
@@ -94,26 +82,6 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
push_symbol(ctx, sym->id);
|
push_symbol(ctx, sym->id);
|
||||||
|
|
||||||
switch (sym->id) {
|
|
||||||
case SYM_LEFT_PAREN:
|
|
||||||
lex_state_push(
|
|
||||||
ctx,
|
|
||||||
LEX_STATE_STATEMENT,
|
|
||||||
STATEMENT_F_DISABLE_KEYWORDS);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_DOLLAR_LEFT_PAREN:
|
|
||||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_RIGHT_PAREN:
|
|
||||||
lex_state_pop(ctx);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_SEMICOLON:
|
|
||||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,7 +132,7 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
|
|||||||
if (newline) {
|
if (newline) {
|
||||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
handle_lex_state_transition(ctx, TOK_LINEFEED);
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -175,7 +143,22 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
|
|||||||
return command_word(ctx);
|
return command_word(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const struct lex_state_link links[] = {
|
||||||
|
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
||||||
|
LINK_PUSH(
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
STATEMENT_F_DISABLE_KEYWORDS),
|
||||||
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
|
LINK_POP(SYM_RIGHT_PAREN),
|
||||||
|
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
|
||||||
|
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
||||||
|
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
|
||||||
|
LINK_END,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_command_state = {
|
const struct lex_state_type lex_command_state = {
|
||||||
.s_id = LEX_STATE_COMMAND,
|
.s_id = LEX_STATE_COMMAND,
|
||||||
.s_pump_token = command_pump_token,
|
.s_pump_token = command_pump_token,
|
||||||
|
.s_links = links,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -0,0 +1,160 @@
|
|||||||
|
#include "lex-internal.h"
|
||||||
|
|
||||||
|
static enum bshell_status hashtable_hyphen(struct lex_ctx *ctx)
|
||||||
|
{
|
||||||
|
fx_wchar c = peek_char(ctx);
|
||||||
|
if (!fx_wchar_is_alnum(c)) {
|
||||||
|
push_symbol(ctx, SYM_HYPHEN);
|
||||||
|
handle_lex_state_transition(ctx, SYM_HYPHEN);
|
||||||
|
return BSHELL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct lex_token *tok = NULL;
|
||||||
|
enum bshell_status status = read_word(
|
||||||
|
ctx,
|
||||||
|
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
|
||||||
|
&tok);
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int token_type = TOK_WORD;
|
||||||
|
if (convert_word_to_int(tok)) {
|
||||||
|
token_type = TOK_INT;
|
||||||
|
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
||||||
|
* tokens are detected properly), the resulting number will be
|
||||||
|
* negative.
|
||||||
|
* this token will be preceded by a HYPHEN token, so the number
|
||||||
|
* must be positive */
|
||||||
|
tok->tok_int *= -1;
|
||||||
|
push_symbol(ctx, SYM_HYPHEN);
|
||||||
|
} else if (convert_word_to_operator(ctx, tok)) {
|
||||||
|
token_type = tok->tok_operator;
|
||||||
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, token_type);
|
||||||
|
enqueue_token(ctx, tok);
|
||||||
|
return BSHELL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum bshell_status hashtable_symbol(struct lex_ctx *ctx)
|
||||||
|
{
|
||||||
|
const struct lex_token_def *sym = NULL;
|
||||||
|
enum bshell_status status = read_symbol(ctx, &sym);
|
||||||
|
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, sym->id);
|
||||||
|
|
||||||
|
struct lex_token *tok = NULL;
|
||||||
|
switch (sym->id) {
|
||||||
|
case SYM_SQUOTE:
|
||||||
|
status = read_literal_string(ctx, &tok);
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
enqueue_token(ctx, tok);
|
||||||
|
return BSHELL_SUCCESS;
|
||||||
|
case SYM_HYPHEN:
|
||||||
|
return hashtable_hyphen(ctx);
|
||||||
|
case SYM_HASH:
|
||||||
|
return read_line_comment(ctx);
|
||||||
|
case SYM_DOLLAR:
|
||||||
|
status = read_var(ctx, TOK_VAR, &tok);
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
enqueue_token(ctx, tok);
|
||||||
|
return status;
|
||||||
|
case SYM_AT:
|
||||||
|
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
enqueue_token(ctx, tok);
|
||||||
|
return status;
|
||||||
|
case SYM_DOLLAR_LEFT_BRACE:
|
||||||
|
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
enqueue_token(ctx, tok);
|
||||||
|
return status;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
push_symbol(ctx, sym->id);
|
||||||
|
|
||||||
|
return BSHELL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum bshell_status hashtable_word(struct lex_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct lex_token *word = NULL;
|
||||||
|
enum bshell_status status = read_word(ctx, 0, &word);
|
||||||
|
if (status != BSHELL_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
convert_word_to_int(word);
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, word->tok_type);
|
||||||
|
enqueue_token(ctx, word);
|
||||||
|
return BSHELL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
|
||||||
|
{
|
||||||
|
fx_wchar c = peek_char(ctx);
|
||||||
|
bool newline = false;
|
||||||
|
|
||||||
|
set_token_start(ctx);
|
||||||
|
while (fx_wchar_is_space(c)) {
|
||||||
|
if (c == '\n') {
|
||||||
|
newline = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_token_end(ctx);
|
||||||
|
advance_char_noread(ctx);
|
||||||
|
c = peek_char_noread(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (newline) {
|
||||||
|
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||||
|
enqueue_token(ctx, tok);
|
||||||
|
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||||
|
return BSHELL_SUCCESS;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (char_can_begin_symbol(ctx, c)) {
|
||||||
|
return hashtable_symbol(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
return hashtable_word(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct lex_state_link links[] = {
|
||||||
|
LINK_PUSH_WITH_TERM(SYM_EQUAL, LEX_STATE_STATEMENT, 0, SYM_SEMICOLON),
|
||||||
|
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
||||||
|
LINK_PUSH(
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
STATEMENT_F_DISABLE_KEYWORDS),
|
||||||
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
|
LINK_POP(SYM_RIGHT_BRACE),
|
||||||
|
LINK_END,
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct lex_state_type lex_hashtable_state = {
|
||||||
|
.s_id = LEX_STATE_HASHTABLE,
|
||||||
|
.s_pump_token = hashtable_pump_token,
|
||||||
|
.s_links = links,
|
||||||
|
};
|
||||||
@@ -8,7 +8,11 @@
|
|||||||
struct lex_ctx;
|
struct lex_ctx;
|
||||||
|
|
||||||
enum state_flags {
|
enum state_flags {
|
||||||
|
/* statement: don't convert matching words to keywords */
|
||||||
STATEMENT_F_DISABLE_KEYWORDS = 0x01u,
|
STATEMENT_F_DISABLE_KEYWORDS = 0x01u,
|
||||||
|
/* arithmetic: don't switch back to statement mode even when
|
||||||
|
* encountering a token that would otherwise require it. */
|
||||||
|
ARITHMETIC_F_DISABLE_STATEMENTS = 0x01u,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum read_flags {
|
enum read_flags {
|
||||||
@@ -17,6 +21,52 @@ enum read_flags {
|
|||||||
READ_NO_NUMBER_RECOGNITION = 0x04u,
|
READ_NO_NUMBER_RECOGNITION = 0x04u,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define LINK_PUSH(tok, target, flags) \
|
||||||
|
((struct lex_state_link) { \
|
||||||
|
.l_token = (tok), \
|
||||||
|
.l_type = LEX_STATE_LINK_PUSH, \
|
||||||
|
.l_target = (target), \
|
||||||
|
.l_target_flags = (flags), \
|
||||||
|
})
|
||||||
|
#define LINK_PUSH_WITH_TERM(tok, target, flags, ...) \
|
||||||
|
((struct lex_state_link) { \
|
||||||
|
.l_token = (tok), \
|
||||||
|
.l_type = LEX_STATE_LINK_PUSH, \
|
||||||
|
.l_target = (target), \
|
||||||
|
.l_target_flags = (flags), \
|
||||||
|
.l_terminators = {__VA_ARGS__, TOK_NONE}, \
|
||||||
|
})
|
||||||
|
#define LINK_CHANGE(tok, target) \
|
||||||
|
((struct lex_state_link) { \
|
||||||
|
.l_token = (tok), \
|
||||||
|
.l_type = LEX_STATE_LINK_CHANGE, \
|
||||||
|
.l_target = (target), \
|
||||||
|
})
|
||||||
|
#define LINK_POP(tok) \
|
||||||
|
((struct lex_state_link) { \
|
||||||
|
.l_token = (tok), \
|
||||||
|
.l_type = LEX_STATE_LINK_POP, \
|
||||||
|
})
|
||||||
|
#define LINK_NONE(tok) \
|
||||||
|
((struct lex_state_link) { \
|
||||||
|
.l_token = (tok), \
|
||||||
|
.l_type = LEX_STATE_LINK_NONE, \
|
||||||
|
})
|
||||||
|
#define LINK_END ((struct lex_state_link) {})
|
||||||
|
|
||||||
|
struct lex_state_link {
|
||||||
|
unsigned int l_token;
|
||||||
|
enum {
|
||||||
|
LEX_STATE_LINK_NONE,
|
||||||
|
LEX_STATE_LINK_PUSH,
|
||||||
|
LEX_STATE_LINK_CHANGE,
|
||||||
|
LEX_STATE_LINK_POP,
|
||||||
|
} l_type;
|
||||||
|
enum lex_state_type_id l_target;
|
||||||
|
enum state_flags l_target_flags;
|
||||||
|
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
|
||||||
|
};
|
||||||
|
|
||||||
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
|
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
|
||||||
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
|
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
|
||||||
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
|
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
|
||||||
@@ -26,6 +76,7 @@ struct lex_state_type {
|
|||||||
lex_state_pump_token s_pump_token;
|
lex_state_pump_token s_pump_token;
|
||||||
lex_state_begin s_begin;
|
lex_state_begin s_begin;
|
||||||
lex_state_end s_end;
|
lex_state_end s_end;
|
||||||
|
const struct lex_state_link *s_links;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
|
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
|
||||||
@@ -45,6 +96,10 @@ extern void lex_state_pop(struct lex_ctx *ctx);
|
|||||||
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
|
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
|
||||||
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
|
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
|
||||||
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
|
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
|
||||||
|
extern void lex_state_add_terminator(struct lex_state *state, unsigned int tok);
|
||||||
|
extern bool lex_state_terminates_at_token(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
unsigned int tok);
|
||||||
|
|
||||||
extern fx_wchar peek_char(struct lex_ctx *ctx);
|
extern fx_wchar peek_char(struct lex_ctx *ctx);
|
||||||
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
|
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
|
||||||
@@ -100,8 +155,26 @@ extern bool char_has_flags(
|
|||||||
struct lex_ctx *ctx,
|
struct lex_ctx *ctx,
|
||||||
char c,
|
char c,
|
||||||
enum lex_token_flags flags);
|
enum lex_token_flags flags);
|
||||||
|
extern bool keyword_has_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_keyword kw,
|
||||||
|
enum lex_token_flags flags);
|
||||||
|
extern enum lex_token_flags keyword_get_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_keyword kw);
|
||||||
|
extern bool symbol_has_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_symbol sym,
|
||||||
|
enum lex_token_flags flags);
|
||||||
|
extern enum lex_token_flags symbol_get_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_symbol sym);
|
||||||
extern enum token_operator get_operator_with_string(
|
extern enum token_operator get_operator_with_string(
|
||||||
struct lex_ctx *ctx,
|
struct lex_ctx *ctx,
|
||||||
const char *s);
|
const char *s);
|
||||||
|
|
||||||
|
extern void handle_lex_state_transition(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
unsigned int token);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
+313
-44
@@ -5,6 +5,8 @@
|
|||||||
#include "../token.h"
|
#include "../token.h"
|
||||||
#include "lex-internal.h"
|
#include "lex-internal.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
|
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
|
||||||
#define LEX_TOKEN_DEF2(i, n, s, f) \
|
#define LEX_TOKEN_DEF2(i, n, s, f) \
|
||||||
{.id = (i), .name = (n), .enabled_states = (s), .flags = (f)}
|
{.id = (i), .name = (n), .enabled_states = (s), .flags = (f)}
|
||||||
@@ -13,54 +15,81 @@
|
|||||||
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
|
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
|
||||||
|
|
||||||
static struct lex_token_def keywords[] = {
|
static struct lex_token_def keywords[] = {
|
||||||
LEX_TOKEN_DEF(KW_FUNC, "func", LEX_STATE_STATEMENT),
|
LEX_TOKEN_DEF2(
|
||||||
|
KW_FUNC,
|
||||||
|
"func",
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
LEX_TOKEN_COMMAND_MODE),
|
||||||
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
|
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
|
||||||
|
LEX_TOKEN_DEF(KW_ELSEIF, "elseif", LEX_STATE_STATEMENT),
|
||||||
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
|
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
|
||||||
};
|
};
|
||||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||||
|
|
||||||
static struct lex_token_def operators[] = {
|
static struct lex_token_def operators[] = {
|
||||||
LEX_TOKEN_DEF(OP_BAND, "-band", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_BAND, "-band", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_BOR, "-bor", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_BOR, "-bor", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_BXOR, "-bxor", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_BXOR, "-bxor", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(
|
LEX_TOKEN_DEF(
|
||||||
OP_BNOT,
|
TKOP_BNOT,
|
||||||
"-bnot",
|
"-bnot",
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_SHL, "-shl", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_SHL, "-shl", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_SHR, "-shr", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_SHR, "-shr", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_EQ, "-eq", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_NE, "-ne", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_GT, "-gt", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_LT, "-lt", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_GE, "-ge", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_LE, "-le", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_MATCH, "-match", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_REPLACE, "-replace", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_LIKE, "-like", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_AND, "-and", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_OR, "-OR", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(OP_XOR, "-xor", LEX_STATE_ARITHMETIC),
|
|
||||||
LEX_TOKEN_DEF(
|
LEX_TOKEN_DEF(
|
||||||
OP_NOT,
|
TKOP_EQ,
|
||||||
|
"-eq",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_NE,
|
||||||
|
"-ne",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_GT,
|
||||||
|
"-gt",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_LT,
|
||||||
|
"-lt",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_GE,
|
||||||
|
"-ge",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_LE,
|
||||||
|
"-le",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_MATCH, "-match", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_REPLACE, "-replace", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_LIKE, "-like", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_AND, "-and", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_OR,
|
||||||
|
"-OR",
|
||||||
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_XOR, "-xor", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(
|
||||||
|
TKOP_NOT,
|
||||||
"-not",
|
"-not",
|
||||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_SPLIT, "-split", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_SPLIT, "-split", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_JOIN, "-join", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_JOIN, "-join", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_IS, "-is", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_IS, "-is", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(OP_AS, "-as", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(TKOP_AS, "-as", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(TKOP_F, "-f", LEX_STATE_ARITHMETIC),
|
||||||
};
|
};
|
||||||
static const size_t nr_operators = sizeof operators / sizeof operators[0];
|
static const size_t nr_operators = sizeof operators / sizeof operators[0];
|
||||||
|
|
||||||
#define LEX_STATES(states) (LEX_STATE_STATEMENT | states)
|
#define LEX_STATES(states) (LEX_STATE_STATEMENT | states)
|
||||||
#define LEX_STATE_ALL \
|
#define LEX_STATE_ALL \
|
||||||
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
|
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
|
||||||
| LEX_STATE_STRING | LEX_STATE_WORD)
|
| LEX_STATE_STRING | LEX_STATE_WORD | LEX_STATE_HASHTABLE)
|
||||||
|
|
||||||
static struct lex_token_def symbols[] = {
|
static struct lex_token_def symbols[] = {
|
||||||
LEX_TOKEN_DEF2(
|
LEX_TOKEN_DEF2(
|
||||||
@@ -71,7 +100,7 @@ static struct lex_token_def symbols[] = {
|
|||||||
LEX_TOKEN_DEF2(
|
LEX_TOKEN_DEF2(
|
||||||
SYM_HYPHEN,
|
SYM_HYPHEN,
|
||||||
"-",
|
"-",
|
||||||
LEX_STATES(LEX_STATE_ARITHMETIC),
|
LEX_STATE_ARITHMETIC,
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC),
|
||||||
@@ -112,18 +141,28 @@ static struct lex_token_def symbols[] = {
|
|||||||
| LEX_STATE_WORD,
|
| LEX_STATE_WORD,
|
||||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
|
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
|
||||||
LEX_TOKEN_DEF2(SYM_PIPE, "|", LEX_STATE_ALL, LEX_TOKEN_TERMINATES_WORD),
|
LEX_TOKEN_DEF2(
|
||||||
|
SYM_PIPE,
|
||||||
|
"|",
|
||||||
|
LEX_STATE_ALL,
|
||||||
|
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_COMMAND_MODE),
|
||||||
LEX_TOKEN_DEF2(
|
LEX_TOKEN_DEF2(
|
||||||
SYM_COMMA,
|
SYM_COMMA,
|
||||||
",",
|
",",
|
||||||
LEX_STATE_ALL,
|
LEX_STATE_ALL,
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
|
||||||
LEX_TOKEN_DEF2(
|
LEX_TOKEN_DEF2(
|
||||||
SYM_SEMICOLON,
|
SYM_SEMICOLON,
|
||||||
";",
|
";",
|
||||||
LEX_STATE_ALL,
|
LEX_STATE_ALL,
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
LEX_TOKEN_TERMINATES_WORD | LEX_TOKEN_STATEMENT_MODE),
|
||||||
LEX_TOKEN_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_STATE_ALL),
|
LEX_TOKEN_DEF2(
|
||||||
|
SYM_AT_LEFT_BRACE,
|
||||||
|
"@{",
|
||||||
|
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||||
|
| LEX_STATE_WORD | LEX_STATE_STATEMENT,
|
||||||
|
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(SYM_AT_LEFT_PAREN, "@(", LEX_STATE_ALL),
|
||||||
LEX_TOKEN_DEF2(
|
LEX_TOKEN_DEF2(
|
||||||
SYM_LEFT_BRACE,
|
SYM_LEFT_BRACE,
|
||||||
"{",
|
"{",
|
||||||
@@ -136,6 +175,7 @@ static struct lex_token_def symbols[] = {
|
|||||||
LEX_TOKEN_TERMINATES_WORD),
|
LEX_TOKEN_TERMINATES_WORD),
|
||||||
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||||
|
LEX_TOKEN_DEF(SYM_QUESTION_LEFT_BRACKET, "?[", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF2(
|
LEX_TOKEN_DEF2(
|
||||||
SYM_LEFT_PAREN,
|
SYM_LEFT_PAREN,
|
||||||
"(",
|
"(",
|
||||||
@@ -146,12 +186,19 @@ static struct lex_token_def symbols[] = {
|
|||||||
")",
|
")",
|
||||||
LEX_STATE_ALL,
|
LEX_STATE_ALL,
|
||||||
LEX_TOKEN_TERMINATES_WORD),
|
LEX_TOKEN_TERMINATES_WORD),
|
||||||
LEX_TOKEN_DEF(SYM_EQUAL, "=", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(
|
||||||
|
SYM_EQUAL,
|
||||||
|
"=",
|
||||||
|
LEX_STATE_ARITHMETIC | LEX_STATE_HASHTABLE),
|
||||||
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
|
||||||
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
|
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(SYM_DOT, ".", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(SYM_COLON_COLON, "::", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(SYM_DOT_DOT, "..", LEX_STATE_ARITHMETIC),
|
||||||
|
LEX_TOKEN_DEF(SYM_QUESTION_DOT, "?.", LEX_STATE_ARITHMETIC),
|
||||||
};
|
};
|
||||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||||
|
|
||||||
@@ -160,6 +207,7 @@ extern const struct lex_state_type lex_command_state;
|
|||||||
extern const struct lex_state_type lex_arithmetic_state;
|
extern const struct lex_state_type lex_arithmetic_state;
|
||||||
extern const struct lex_state_type lex_string_state;
|
extern const struct lex_state_type lex_string_state;
|
||||||
extern const struct lex_state_type lex_word_state;
|
extern const struct lex_state_type lex_word_state;
|
||||||
|
extern const struct lex_state_type lex_hashtable_state;
|
||||||
|
|
||||||
static const struct lex_state_type *state_types[] = {
|
static const struct lex_state_type *state_types[] = {
|
||||||
[LEX_STATE_STATEMENT] = &lex_statement_state,
|
[LEX_STATE_STATEMENT] = &lex_statement_state,
|
||||||
@@ -167,6 +215,7 @@ static const struct lex_state_type *state_types[] = {
|
|||||||
[LEX_STATE_ARITHMETIC] = &lex_arithmetic_state,
|
[LEX_STATE_ARITHMETIC] = &lex_arithmetic_state,
|
||||||
[LEX_STATE_STRING] = &lex_string_state,
|
[LEX_STATE_STRING] = &lex_string_state,
|
||||||
[LEX_STATE_WORD] = &lex_word_state,
|
[LEX_STATE_WORD] = &lex_word_state,
|
||||||
|
[LEX_STATE_HASHTABLE] = &lex_hashtable_state,
|
||||||
};
|
};
|
||||||
|
|
||||||
void set_token_start(struct lex_ctx *ctx)
|
void set_token_start(struct lex_ctx *ctx)
|
||||||
@@ -179,6 +228,24 @@ void set_token_end(struct lex_ctx *ctx)
|
|||||||
memcpy(&ctx->lex_end, &ctx->lex_cursor, sizeof ctx->lex_cursor);
|
memcpy(&ctx->lex_end, &ctx->lex_cursor, sizeof ctx->lex_cursor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *lex_state_type_id_to_string(enum lex_state_type_id id)
|
||||||
|
{
|
||||||
|
#define ENUM_STR(v) \
|
||||||
|
case v: \
|
||||||
|
return #v
|
||||||
|
switch (id) {
|
||||||
|
ENUM_STR(LEX_STATE_STATEMENT);
|
||||||
|
ENUM_STR(LEX_STATE_COMMAND);
|
||||||
|
ENUM_STR(LEX_STATE_ARITHMETIC);
|
||||||
|
ENUM_STR(LEX_STATE_STRING);
|
||||||
|
ENUM_STR(LEX_STATE_WORD);
|
||||||
|
ENUM_STR(LEX_STATE_HASHTABLE);
|
||||||
|
default:
|
||||||
|
return "<unknown>";
|
||||||
|
}
|
||||||
|
#undef ENUM_STR
|
||||||
|
}
|
||||||
|
|
||||||
struct lex_state *lex_state_push(
|
struct lex_state *lex_state_push(
|
||||||
struct lex_ctx *ctx,
|
struct lex_ctx *ctx,
|
||||||
enum lex_state_type_id state_type,
|
enum lex_state_type_id state_type,
|
||||||
@@ -189,6 +256,11 @@ struct lex_state *lex_state_push(
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(VERBOSE)
|
||||||
|
printf("push(%s, 0x%04x)\n",
|
||||||
|
lex_state_type_id_to_string(state_type),
|
||||||
|
flags);
|
||||||
|
#endif
|
||||||
memset(state, 0x0, sizeof *state);
|
memset(state, 0x0, sizeof *state);
|
||||||
|
|
||||||
state->s_type = state_types[state_type];
|
state->s_type = state_types[state_type];
|
||||||
@@ -212,6 +284,10 @@ void lex_state_pop(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
|
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
|
||||||
|
|
||||||
|
#if defined(VERBOSE)
|
||||||
|
printf("pop(%s)\n", lex_state_type_id_to_string(state->s_type->s_id));
|
||||||
|
#endif
|
||||||
|
|
||||||
if (state->s_type->s_end) {
|
if (state->s_type->s_end) {
|
||||||
state->s_type->s_end(ctx);
|
state->s_type->s_end(ctx);
|
||||||
}
|
}
|
||||||
@@ -242,6 +318,12 @@ void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(VERBOSE)
|
||||||
|
printf("change(%s -> %s)\n",
|
||||||
|
lex_state_type_id_to_string(state->s_type->s_id),
|
||||||
|
lex_state_type_id_to_string(type));
|
||||||
|
#endif
|
||||||
|
|
||||||
if (state->s_type->s_end) {
|
if (state->s_type->s_end) {
|
||||||
state->s_type->s_end(ctx);
|
state->s_type->s_end(ctx);
|
||||||
}
|
}
|
||||||
@@ -271,6 +353,13 @@ fx_string *lex_state_get_tempstr(struct lex_ctx *ctx)
|
|||||||
return state->s_tempstr;
|
return state->s_tempstr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void lex_state_add_terminator(struct lex_state *state, unsigned int tok)
|
||||||
|
{
|
||||||
|
if (state->s_nr_terminators < LEX_STATE_MAX_TERMINATORS) {
|
||||||
|
state->s_terminators[state->s_nr_terminators++] = tok;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static struct lex_symbol_node *get_symbol_node(
|
static struct lex_symbol_node *get_symbol_node(
|
||||||
struct lex_symbol_node *node,
|
struct lex_symbol_node *node,
|
||||||
char c)
|
char c)
|
||||||
@@ -504,7 +593,7 @@ bool convert_word_to_operator(struct lex_ctx *ctx, struct lex_token *tok)
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum token_operator op = get_operator_with_string(ctx, tok->tok_str);
|
enum token_operator op = get_operator_with_string(ctx, tok->tok_str);
|
||||||
if (op == OP_NONE) {
|
if (op == TKOP_NONE) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -624,8 +713,15 @@ extern void enqueue_token_with_coordinates(
|
|||||||
const struct char_cell *start,
|
const struct char_cell *start,
|
||||||
const struct char_cell *end)
|
const struct char_cell *end)
|
||||||
{
|
{
|
||||||
|
if (tok->tok_type == TOK_LINEFEED
|
||||||
|
&& ctx->lex_prev_token == TOK_LINEFEED) {
|
||||||
|
lex_token_destroy(tok);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
tok->tok_start = *start;
|
tok->tok_start = *start;
|
||||||
tok->tok_end = *end;
|
tok->tok_end = *end;
|
||||||
|
ctx->lex_prev_token = tok->tok_type;
|
||||||
|
|
||||||
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
|
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
|
||||||
print_lex_token(tok);
|
print_lex_token(tok);
|
||||||
@@ -813,7 +909,7 @@ enum bshell_status read_word(
|
|||||||
|
|
||||||
bool number_recog = !(flags & READ_NO_NUMBER_RECOGNITION);
|
bool number_recog = !(flags & READ_NO_NUMBER_RECOGNITION);
|
||||||
|
|
||||||
enum token_operator op = OP_NONE;
|
enum token_operator op = TKOP_NONE;
|
||||||
bool done = false;
|
bool done = false;
|
||||||
while (!done) {
|
while (!done) {
|
||||||
fx_wchar c = peek_char(ctx);
|
fx_wchar c = peek_char(ctx);
|
||||||
@@ -844,7 +940,7 @@ enum bshell_status read_word(
|
|||||||
|
|
||||||
if (!fx_wchar_is_alpha(c)) {
|
if (!fx_wchar_is_alpha(c)) {
|
||||||
op = get_operator_with_string(ctx, s);
|
op = get_operator_with_string(ctx, s);
|
||||||
if (op != OP_NONE) {
|
if (op != TKOP_NONE) {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -889,7 +985,9 @@ enum bshell_status read_symbol(
|
|||||||
|
|
||||||
struct lex_symbol_node *next = get_symbol_node(node, c);
|
struct lex_symbol_node *next = get_symbol_node(node, c);
|
||||||
if (!next
|
if (!next
|
||||||
|| !(next->s_def->enabled_states & state->s_type->s_id)) {
|
|| (next->s_def
|
||||||
|
&& !(next->s_def->enabled_states
|
||||||
|
& state->s_type->s_id))) {
|
||||||
prev = c;
|
prev = c;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -932,10 +1030,7 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
|
|||||||
return char_can_begin_symbol_in_state(ctx, c, state->s_type->s_id);
|
return char_can_begin_symbol_in_state(ctx, c, state->s_type->s_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern bool char_has_flags(
|
bool char_has_flags(struct lex_ctx *ctx, char c, enum lex_token_flags flags)
|
||||||
struct lex_ctx *ctx,
|
|
||||||
char c,
|
|
||||||
enum lex_token_flags flags)
|
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < nr_symbols; i++) {
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
if (symbols[i].name[0] != c) {
|
if (symbols[i].name[0] != c) {
|
||||||
@@ -948,6 +1043,60 @@ extern bool char_has_flags(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool keyword_has_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_keyword kw,
|
||||||
|
enum lex_token_flags flags)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (keywords[i].id == kw) {
|
||||||
|
return (keywords[i].flags & flags) == flags;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum lex_token_flags keyword_get_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_keyword kw)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (keywords[i].id == kw) {
|
||||||
|
return keywords[i].flags;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool symbol_has_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_symbol sym,
|
||||||
|
enum lex_token_flags flags)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (symbols[i].id == sym) {
|
||||||
|
return (symbols[i].flags & flags) == flags;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum lex_token_flags symbol_get_flags(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
enum token_symbol sym)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
|
if (symbols[i].id == sym) {
|
||||||
|
return symbols[i].flags;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
|
enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
|
||||||
{
|
{
|
||||||
struct lex_state *state = lex_state_get(ctx);
|
struct lex_state *state = lex_state_get(ctx);
|
||||||
@@ -968,6 +1117,126 @@ enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int compare_token_types(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
if (a == b) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BETWEEN(v, lo, hi) ((v) >= (lo) && (v) <= (hi))
|
||||||
|
enum token_type a_type = TOK_NONE, b_type = TOK_NONE;
|
||||||
|
|
||||||
|
if (BETWEEN(a, __KW_INDEX_BASE, __KW_INDEX_LIMIT)) {
|
||||||
|
a_type = TOK_KEYWORD;
|
||||||
|
} else if (BETWEEN(a, __TKOP_INDEX_BASE, __TKOP_INDEX_LIMIT)) {
|
||||||
|
a_type = TOK_OPERATOR;
|
||||||
|
} else if (BETWEEN(a, __SYM_INDEX_BASE, __SYM_INDEX_LIMIT)) {
|
||||||
|
a_type = TOK_SYMBOL;
|
||||||
|
} else {
|
||||||
|
a_type = a;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BETWEEN(b, __KW_INDEX_BASE, __KW_INDEX_LIMIT)) {
|
||||||
|
b_type = TOK_KEYWORD;
|
||||||
|
} else if (BETWEEN(b, __TKOP_INDEX_BASE, __TKOP_INDEX_LIMIT)) {
|
||||||
|
b_type = TOK_OPERATOR;
|
||||||
|
} else if (BETWEEN(b, __SYM_INDEX_BASE, __SYM_INDEX_LIMIT)) {
|
||||||
|
b_type = TOK_SYMBOL;
|
||||||
|
} else {
|
||||||
|
b_type = b;
|
||||||
|
}
|
||||||
|
#undef BETWEEN
|
||||||
|
|
||||||
|
int result = 0;
|
||||||
|
if (a_type == b_type) {
|
||||||
|
if (a != a_type && b != b_type) {
|
||||||
|
result = 0;
|
||||||
|
} else {
|
||||||
|
result = a == b ? 2 : 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result < 0) {
|
||||||
|
result = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||||
|
{
|
||||||
|
struct lex_state *state = lex_state_get(ctx);
|
||||||
|
for (unsigned int i = 0; i < LEX_STATE_MAX_TERMINATORS; i++) {
|
||||||
|
if (state->s_terminators[i] == TOK_NONE) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state->s_terminators[i] == token) {
|
||||||
|
lex_state_pop(ctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct lex_state_link *table = state->s_type->s_links;
|
||||||
|
if (!table) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAX_MATCHES 8
|
||||||
|
const struct lex_state_link *best_matches[MAX_MATCHES] = {0};
|
||||||
|
unsigned int match_count = 0;
|
||||||
|
int best_score = 0;
|
||||||
|
|
||||||
|
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
|
||||||
|
int score = compare_token_types(table[i].l_token, token);
|
||||||
|
if (score == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(match_count < MAX_MATCHES
|
||||||
|
|| "lex state has too many matches");
|
||||||
|
if (score == best_score) {
|
||||||
|
best_matches[match_count++] = &table[i];
|
||||||
|
} else if (score > best_score) {
|
||||||
|
match_count = 0;
|
||||||
|
best_matches[match_count++] = &table[i];
|
||||||
|
best_score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef MAX_MATCHES
|
||||||
|
|
||||||
|
if (!match_count) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < match_count; i++) {
|
||||||
|
const struct lex_state_link *link = best_matches[i];
|
||||||
|
switch (link->l_type) {
|
||||||
|
case LEX_STATE_LINK_POP:
|
||||||
|
lex_state_pop(ctx);
|
||||||
|
break;
|
||||||
|
case LEX_STATE_LINK_PUSH: {
|
||||||
|
struct lex_state *state = lex_state_push(
|
||||||
|
ctx,
|
||||||
|
link->l_target,
|
||||||
|
link->l_target_flags);
|
||||||
|
for (unsigned int i = 0; link->l_terminators[i]; i++) {
|
||||||
|
lex_state_add_terminator(
|
||||||
|
state,
|
||||||
|
link->l_terminators[i]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case LEX_STATE_LINK_CHANGE:
|
||||||
|
lex_state_change(ctx, link->l_target);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
||||||
{
|
{
|
||||||
fx_wchar c = FX_WCHAR_INVALID;
|
fx_wchar c = FX_WCHAR_INVALID;
|
||||||
|
|||||||
+46
-178
@@ -1,88 +1,11 @@
|
|||||||
#include "lex-internal.h"
|
#include "lex-internal.h"
|
||||||
|
|
||||||
#if 0
|
|
||||||
#define APPEND_HYPHEN 0x8000u
|
|
||||||
|
|
||||||
static enum bshell_status __read_word(
|
|
||||||
struct lex_ctx *ctx,
|
|
||||||
int flags,
|
|
||||||
struct lex_token **out)
|
|
||||||
{
|
|
||||||
fx_string *tmp = lex_state_get_tempstr(ctx);
|
|
||||||
fx_string_clear(tmp);
|
|
||||||
bool word_is_number = false;
|
|
||||||
|
|
||||||
if (flags & APPEND_HYPHEN) {
|
|
||||||
fx_string_append_c(tmp, '-');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(flags & READ_NO_SET_TOKEN_START)) {
|
|
||||||
set_token_start(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
enum token_operator op = OP_NONE;
|
|
||||||
|
|
||||||
bool done = false;
|
|
||||||
while (!done) {
|
|
||||||
fx_wchar c = peek_char(ctx);
|
|
||||||
if (c == FX_WCHAR_INVALID) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fx_wchar_is_space(c)) {
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (char_can_begin_symbol(ctx, c)) {
|
|
||||||
op = get_operator_with_string(
|
|
||||||
ctx,
|
|
||||||
fx_string_get_cstr(tmp));
|
|
||||||
if (op != OP_NONE) {
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fx_string_append_wc(tmp, c);
|
|
||||||
set_token_end(ctx);
|
|
||||||
advance_char(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fx_string_get_size(tmp, FX_STRLEN_NORMAL) == 0) {
|
|
||||||
if (ctx->lex_status == BSHELL_SUCCESS) {
|
|
||||||
return BSHELL_ERR_BAD_SYNTAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctx->lex_status;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct lex_token *tok = NULL;
|
|
||||||
if (op != OP_NONE) {
|
|
||||||
tok = lex_token_create(TOK_OPERATOR);
|
|
||||||
tok->tok_operator = op;
|
|
||||||
} else {
|
|
||||||
tok = lex_token_create_with_string(
|
|
||||||
TOK_WORD,
|
|
||||||
fx_string_get_cstr(tmp));
|
|
||||||
}
|
|
||||||
|
|
||||||
*out = tok;
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
|
static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
|
||||||
{
|
{
|
||||||
fx_wchar c = peek_char(ctx);
|
fx_wchar c = peek_char(ctx);
|
||||||
if (!fx_wchar_is_alnum(c)) {
|
if (!fx_wchar_is_alnum(c)) {
|
||||||
push_symbol(ctx, SYM_HYPHEN);
|
push_symbol(ctx, SYM_HYPHEN);
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
handle_lex_state_transition(ctx, SYM_HYPHEN);
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,10 +18,11 @@ static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool converted = convert_word_to_int(tok);
|
unsigned int token_type = TOK_WORD;
|
||||||
|
|
||||||
|
if (convert_word_to_int(tok)) {
|
||||||
|
token_type = TOK_INT;
|
||||||
|
|
||||||
if (converted) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
||||||
* tokens are detected properly), the resulting number will be
|
* tokens are detected properly), the resulting number will be
|
||||||
* negative.
|
* negative.
|
||||||
@@ -106,18 +30,13 @@ static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
|
|||||||
* must be positive */
|
* must be positive */
|
||||||
tok->tok_int *= -1;
|
tok->tok_int *= -1;
|
||||||
push_symbol(ctx, SYM_HYPHEN);
|
push_symbol(ctx, SYM_HYPHEN);
|
||||||
enqueue_token(ctx, tok);
|
} else if (convert_word_to_operator(ctx, tok)) {
|
||||||
return BSHELL_SUCCESS;
|
token_type = TOK_OPERATOR;
|
||||||
}
|
|
||||||
|
|
||||||
converted = convert_word_to_operator(ctx, tok);
|
|
||||||
if (converted) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
} else {
|
|
||||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, token_type);
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,6 +49,8 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_lex_state_transition(ctx, sym->id);
|
||||||
|
|
||||||
struct lex_token *tok = NULL;
|
struct lex_token *tok = NULL;
|
||||||
switch (sym->id) {
|
switch (sym->id) {
|
||||||
case SYM_HYPHEN:
|
case SYM_HYPHEN:
|
||||||
@@ -144,17 +65,7 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
case SYM_HASH:
|
case SYM_HASH:
|
||||||
return read_line_comment(ctx);
|
return read_line_comment(ctx);
|
||||||
case SYM_DQUOTE:
|
|
||||||
if (!lex_state_push(ctx, LEX_STATE_STRING, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_DOLLAR:
|
case SYM_DOLLAR:
|
||||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = read_var(ctx, TOK_VAR, &tok);
|
status = read_var(ctx, TOK_VAR, &tok);
|
||||||
if (status != BSHELL_SUCCESS) {
|
if (status != BSHELL_SUCCESS) {
|
||||||
return status;
|
return status;
|
||||||
@@ -163,10 +74,6 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
|||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
case SYM_AT:
|
case SYM_AT:
|
||||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||||
if (status != BSHELL_SUCCESS) {
|
if (status != BSHELL_SUCCESS) {
|
||||||
return status;
|
return status;
|
||||||
@@ -175,27 +82,11 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
|||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
case SYM_DOLLAR_LEFT_BRACE:
|
case SYM_DOLLAR_LEFT_BRACE:
|
||||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||||
if (status != BSHELL_SUCCESS) {
|
if (status != BSHELL_SUCCESS) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
|
||||||
return status;
|
|
||||||
case SYM_AT_LEFT_BRACE:
|
|
||||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC, 0)) {
|
|
||||||
return BSHELL_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
default:
|
default:
|
||||||
@@ -203,32 +94,6 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
push_symbol(ctx, sym->id);
|
push_symbol(ctx, sym->id);
|
||||||
|
|
||||||
switch (sym->id) {
|
|
||||||
case SYM_LEFT_PAREN:
|
|
||||||
lex_state_push(
|
|
||||||
ctx,
|
|
||||||
LEX_STATE_STATEMENT,
|
|
||||||
STATEMENT_F_DISABLE_KEYWORDS);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_LEFT_BRACE:
|
|
||||||
case SYM_DOLLAR_LEFT_PAREN:
|
|
||||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
case SYM_RIGHT_PAREN:
|
|
||||||
case SYM_RIGHT_BRACE:
|
|
||||||
lex_state_pop(ctx);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sym->enabled_states & LEX_STATE_COMMAND) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
|
||||||
} else if (sym->enabled_states & LEX_STATE_ARITHMETIC) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -242,21 +107,16 @@ static enum bshell_status statement_word(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
struct lex_state *state = lex_state_get(ctx);
|
struct lex_state *state = lex_state_get(ctx);
|
||||||
|
|
||||||
bool converted = false;
|
bool enable_keywords = !(state->s_flags & STATEMENT_F_DISABLE_KEYWORDS);
|
||||||
|
unsigned int token = TOK_WORD;
|
||||||
|
|
||||||
if (!(state->s_flags & STATEMENT_F_DISABLE_KEYWORDS)) {
|
if (enable_keywords && convert_word_to_keyword(word)) {
|
||||||
converted = convert_word_to_keyword(word);
|
token = word->tok_keyword;
|
||||||
|
} else if (convert_word_to_int(word)) {
|
||||||
|
token = TOK_INT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!converted) {
|
handle_lex_state_transition(ctx, token);
|
||||||
converted = convert_word_to_int(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (converted) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
} else {
|
|
||||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, word);
|
enqueue_token(ctx, word);
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
@@ -284,18 +144,6 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
|||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_ARITHMETIC)) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_COMMAND)) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
|
||||||
return BSHELL_SUCCESS;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (char_can_begin_symbol(ctx, c)) {
|
if (char_can_begin_symbol(ctx, c)) {
|
||||||
return statement_symbol(ctx);
|
return statement_symbol(ctx);
|
||||||
}
|
}
|
||||||
@@ -305,18 +153,38 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
|||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (fx_wchar_is_number(c)) {
|
|
||||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
|
||||||
} else {
|
|
||||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return statement_word(ctx);
|
return statement_word(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct lex_state_link links[] = {
|
||||||
|
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
||||||
|
/* arithmetic tokens */
|
||||||
|
LINK_CHANGE(TOK_KEYWORD, LEX_STATE_ARITHMETIC),
|
||||||
|
LINK_CHANGE(TOK_INT, LEX_STATE_ARITHMETIC),
|
||||||
|
LINK_PUSH(SYM_DOLLAR, LEX_STATE_ARITHMETIC, 0),
|
||||||
|
LINK_PUSH(SYM_DOLLAR_LEFT_BRACE, LEX_STATE_ARITHMETIC, 0),
|
||||||
|
LINK_CHANGE(SYM_AT_LEFT_BRACE, LEX_STATE_ARITHMETIC),
|
||||||
|
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
||||||
|
LINK_PUSH(SYM_AT, LEX_STATE_ARITHMETIC, 0),
|
||||||
|
LINK_CHANGE(SYM_LEFT_PAREN, LEX_STATE_ARITHMETIC),
|
||||||
|
LINK_PUSH(
|
||||||
|
SYM_LEFT_PAREN,
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
STATEMENT_F_DISABLE_KEYWORDS),
|
||||||
|
|
||||||
|
/* statement tokens */
|
||||||
|
LINK_PUSH(SYM_LEFT_BRACE, LEX_STATE_STATEMENT, 0),
|
||||||
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
|
|
||||||
|
/* command tokens */
|
||||||
|
LINK_CHANGE(KW_FUNC, LEX_STATE_COMMAND),
|
||||||
|
LINK_CHANGE(SYM_AMPERSAND, LEX_STATE_COMMAND),
|
||||||
|
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
|
||||||
|
LINK_END,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_statement_state = {
|
const struct lex_state_type lex_statement_state = {
|
||||||
.s_id = LEX_STATE_STATEMENT,
|
.s_id = LEX_STATE_STATEMENT,
|
||||||
.s_pump_token = statement_pump_token,
|
.s_pump_token = statement_pump_token,
|
||||||
|
.s_links = links,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -45,14 +45,6 @@ static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
|
||||||
return status;
|
|
||||||
case SYM_AT_LEFT_BRACE:
|
|
||||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
default:
|
default:
|
||||||
|
|||||||
+5
-16
@@ -42,22 +42,6 @@ static enum bshell_status word_symbol(struct lex_ctx *ctx)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
|
||||||
return status;
|
|
||||||
case SYM_DOLLAR_LEFT_BRACE:
|
|
||||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
|
||||||
return status;
|
|
||||||
case SYM_AT_LEFT_BRACE:
|
|
||||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
|
||||||
if (status != BSHELL_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueue_token(ctx, tok);
|
enqueue_token(ctx, tok);
|
||||||
return status;
|
return status;
|
||||||
default:
|
default:
|
||||||
@@ -153,9 +137,14 @@ static enum bshell_status word_pump_token(struct lex_ctx *ctx)
|
|||||||
return word_content(ctx);
|
return word_content(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const struct lex_state_link links[] = {
|
||||||
|
LINK_END,
|
||||||
|
};
|
||||||
|
|
||||||
const struct lex_state_type lex_word_state = {
|
const struct lex_state_type lex_word_state = {
|
||||||
.s_id = LEX_STATE_WORD,
|
.s_id = LEX_STATE_WORD,
|
||||||
.s_begin = word_begin,
|
.s_begin = word_begin,
|
||||||
.s_end = word_end,
|
.s_end = word_end,
|
||||||
.s_pump_token = word_pump_token,
|
.s_pump_token = word_pump_token,
|
||||||
|
.s_links = links,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user