parse: lex: support tokens terminating multiple lex states in certain circumstances
This commit is contained in:
@@ -141,23 +141,38 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
|
||||
}
|
||||
|
||||
static const struct lex_state_link links[] = {
|
||||
LINK_PUSH_WITH_TERM(SYM_EQUAL, LEX_STATE_STATEMENT, 0, SYM_SEMICOLON),
|
||||
LINK_PUSH_WITH_TERM(
|
||||
SYM_EQUAL,
|
||||
LEX_STATE_STATEMENT,
|
||||
0,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_SEMICOLON,
|
||||
TOK_LINEFEED),
|
||||
LINK_PUSH_WITH_TERM(
|
||||
TOK_LINEFEED,
|
||||
LEX_STATE_STATEMENT,
|
||||
0,
|
||||
SYM_SEMICOLON,
|
||||
TOK_LINEFEED),
|
||||
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
||||
LINK_PUSH(
|
||||
SYM_LEFT_PAREN,
|
||||
LEX_STATE_STATEMENT,
|
||||
STATEMENT_F_DISABLE_KEYWORDS),
|
||||
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||
LINK_POP(SYM_RIGHT_BRACE),
|
||||
LINK_POP2(SYM_RIGHT_BRACE, LINK_ALLOW_RECURSION),
|
||||
LINK_END,
|
||||
};
|
||||
|
||||
static const unsigned int symbols[] = {
|
||||
SYM_EQUAL,
|
||||
SYM_DQUOTE,
|
||||
SYM_SQUOTE,
|
||||
SYM_SEMICOLON,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_LEFT_PAREN,
|
||||
SYM_HASH,
|
||||
SYM_NONE,
|
||||
};
|
||||
|
||||
|
||||
@@ -21,6 +21,10 @@ enum read_flags {
|
||||
READ_NO_NUMBER_RECOGNITION = 0x04u,
|
||||
};
|
||||
|
||||
enum link_flags {
|
||||
LINK_ALLOW_RECURSION = 0x01u,
|
||||
};
|
||||
|
||||
#define LINK_PUSH(tok, target, flags) \
|
||||
((struct lex_state_link) { \
|
||||
.l_token = (tok), \
|
||||
@@ -47,6 +51,12 @@ enum read_flags {
|
||||
.l_token = (tok), \
|
||||
.l_type = LEX_STATE_LINK_POP, \
|
||||
})
|
||||
#define LINK_POP2(tok, flags) \
|
||||
((struct lex_state_link) { \
|
||||
.l_token = (tok), \
|
||||
.l_type = LEX_STATE_LINK_POP, \
|
||||
.l_flags = (flags), \
|
||||
})
|
||||
#define LINK_NONE(tok) \
|
||||
((struct lex_state_link) { \
|
||||
.l_token = (tok), \
|
||||
@@ -62,6 +72,7 @@ struct lex_state_link {
|
||||
LEX_STATE_LINK_CHANGE,
|
||||
LEX_STATE_LINK_POP,
|
||||
} l_type;
|
||||
enum link_flags l_flags;
|
||||
enum lex_state_type_id l_target;
|
||||
enum state_flags l_target_flags;
|
||||
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
|
||||
|
||||
+39
-11
@@ -190,7 +190,9 @@ void lex_state_pop(struct lex_ctx *ctx)
|
||||
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
|
||||
|
||||
#if defined(VERBOSE)
|
||||
printf("pop(%s)\n", lex_state_type_id_to_string(state->s_type->s_id));
|
||||
printf("pop(%s) -> %s\n",
|
||||
lex_state_type_id_to_string(state->s_type->s_id),
|
||||
lex_state_type_id_to_string(lex_state_get(ctx)->s_type->s_id));
|
||||
#endif
|
||||
|
||||
if (state->s_type->s_end) {
|
||||
@@ -1114,23 +1116,29 @@ int compare_token_types(unsigned int a, unsigned int b)
|
||||
return result;
|
||||
}
|
||||
|
||||
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||
static bool do_lex_state_transition(
|
||||
struct lex_ctx *ctx,
|
||||
unsigned int token,
|
||||
bool recursive)
|
||||
{
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
for (unsigned int i = 0; i < LEX_STATE_MAX_TERMINATORS; i++) {
|
||||
if (state->s_terminators[i] == TOK_NONE) {
|
||||
break;
|
||||
}
|
||||
enum link_flags required_flags = 0;
|
||||
if (recursive) {
|
||||
required_flags |= LINK_ALLOW_RECURSION;
|
||||
}
|
||||
|
||||
if (state->s_terminators[i] == token) {
|
||||
lex_state_pop(ctx);
|
||||
return;
|
||||
if (!recursive) {
|
||||
for (unsigned int i = 0; i < state->s_nr_terminators; i++) {
|
||||
if (state->s_terminators[i] == token) {
|
||||
lex_state_pop(ctx);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const struct lex_state_link *table = state->s_type->s_links;
|
||||
if (!table) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
#define MAX_MATCHES 8
|
||||
@@ -1140,6 +1148,10 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||
|
||||
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
|
||||
int score = compare_token_types(table[i].l_token, token);
|
||||
if ((table[i].l_flags & required_flags) != required_flags) {
|
||||
score = 0;
|
||||
}
|
||||
|
||||
if (score == 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -1157,14 +1169,16 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||
#undef MAX_MATCHES
|
||||
|
||||
if (!match_count) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
for (unsigned int i = 0; i < match_count; i++) {
|
||||
const struct lex_state_link *link = best_matches[i];
|
||||
switch (link->l_type) {
|
||||
case LEX_STATE_LINK_POP:
|
||||
lex_state_pop(ctx);
|
||||
result = true;
|
||||
break;
|
||||
case LEX_STATE_LINK_PUSH: {
|
||||
struct lex_state *state = lex_state_push(
|
||||
@@ -1176,16 +1190,30 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||
state,
|
||||
link->l_terminators[i]);
|
||||
}
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case LEX_STATE_LINK_CHANGE:
|
||||
lex_state_change(ctx, link->l_target);
|
||||
result = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||
{
|
||||
bool cont = false;
|
||||
bool recursive = false;
|
||||
do {
|
||||
cont = do_lex_state_transition(ctx, token, recursive);
|
||||
recursive = true;
|
||||
} while (cont);
|
||||
}
|
||||
|
||||
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
||||
|
||||
Reference in New Issue
Block a user