parse: lex: support tokens terminating multiple lex states in certain circumstances

This commit is contained in:
2026-05-12 22:51:45 +01:00
parent e3b92fe4f2
commit cc450da31e
3 changed files with 67 additions and 13 deletions
+17 -2
View File
@@ -141,23 +141,38 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
}
static const struct lex_state_link links[] = {
LINK_PUSH_WITH_TERM(SYM_EQUAL, LEX_STATE_STATEMENT, 0, SYM_SEMICOLON),
LINK_PUSH_WITH_TERM(
SYM_EQUAL,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_BRACE,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH_WITH_TERM(
TOK_LINEFEED,
LEX_STATE_STATEMENT,
0,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_BRACE),
LINK_POP2(SYM_RIGHT_BRACE, LINK_ALLOW_RECURSION),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_EQUAL,
SYM_DQUOTE,
SYM_SQUOTE,
SYM_SEMICOLON,
SYM_RIGHT_BRACE,
SYM_DOLLAR_LEFT_PAREN,
SYM_LEFT_PAREN,
SYM_HASH,
SYM_NONE,
};
+11
View File
@@ -21,6 +21,10 @@ enum read_flags {
READ_NO_NUMBER_RECOGNITION = 0x04u,
};
enum link_flags {
LINK_ALLOW_RECURSION = 0x01u,
};
#define LINK_PUSH(tok, target, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
@@ -47,6 +51,12 @@ enum read_flags {
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
})
#define LINK_POP2(tok, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
.l_flags = (flags), \
})
#define LINK_NONE(tok) \
((struct lex_state_link) { \
.l_token = (tok), \
@@ -62,6 +72,7 @@ struct lex_state_link {
LEX_STATE_LINK_CHANGE,
LEX_STATE_LINK_POP,
} l_type;
enum link_flags l_flags;
enum lex_state_type_id l_target;
enum state_flags l_target_flags;
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
+39 -11
View File
@@ -190,7 +190,9 @@ void lex_state_pop(struct lex_ctx *ctx)
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
#if defined(VERBOSE)
printf("pop(%s)\n", lex_state_type_id_to_string(state->s_type->s_id));
printf("pop(%s) -> %s\n",
lex_state_type_id_to_string(state->s_type->s_id),
lex_state_type_id_to_string(lex_state_get(ctx)->s_type->s_id));
#endif
if (state->s_type->s_end) {
@@ -1114,23 +1116,29 @@ int compare_token_types(unsigned int a, unsigned int b)
return result;
}
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
static bool do_lex_state_transition(
struct lex_ctx *ctx,
unsigned int token,
bool recursive)
{
struct lex_state *state = lex_state_get(ctx);
for (unsigned int i = 0; i < LEX_STATE_MAX_TERMINATORS; i++) {
if (state->s_terminators[i] == TOK_NONE) {
break;
}
enum link_flags required_flags = 0;
if (recursive) {
required_flags |= LINK_ALLOW_RECURSION;
}
if (state->s_terminators[i] == token) {
lex_state_pop(ctx);
return;
if (!recursive) {
for (unsigned int i = 0; i < state->s_nr_terminators; i++) {
if (state->s_terminators[i] == token) {
lex_state_pop(ctx);
return true;
}
}
}
const struct lex_state_link *table = state->s_type->s_links;
if (!table) {
return;
return false;
}
#define MAX_MATCHES 8
@@ -1140,6 +1148,10 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
int score = compare_token_types(table[i].l_token, token);
if ((table[i].l_flags & required_flags) != required_flags) {
score = 0;
}
if (score == 0) {
continue;
}
@@ -1157,14 +1169,16 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
#undef MAX_MATCHES
if (!match_count) {
return;
return false;
}
bool result = false;
for (unsigned int i = 0; i < match_count; i++) {
const struct lex_state_link *link = best_matches[i];
switch (link->l_type) {
case LEX_STATE_LINK_POP:
lex_state_pop(ctx);
result = true;
break;
case LEX_STATE_LINK_PUSH: {
struct lex_state *state = lex_state_push(
@@ -1176,16 +1190,30 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
state,
link->l_terminators[i]);
}
result = true;
break;
}
case LEX_STATE_LINK_CHANGE:
lex_state_change(ctx, link->l_target);
result = true;
break;
default:
break;
}
}
return result;
}
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
{
bool cont = false;
bool recursive = false;
do {
cont = do_lex_state_transition(ctx, token, recursive);
recursive = true;
} while (cont);
}
static enum bshell_status read_string_content(struct lex_ctx *ctx)