parse: lex: support tokens terminating multiple lex states in certain circumstances

This commit is contained in:
2026-05-12 22:51:45 +01:00
parent e3b92fe4f2
commit cc450da31e
3 changed files with 67 additions and 13 deletions
+17 -2
View File
@@ -141,23 +141,38 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
} }
static const struct lex_state_link links[] = { static const struct lex_state_link links[] = {
LINK_PUSH_WITH_TERM(SYM_EQUAL, LEX_STATE_STATEMENT, 0, SYM_SEMICOLON), LINK_PUSH_WITH_TERM(
SYM_EQUAL,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_BRACE,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH_WITH_TERM(
TOK_LINEFEED,
LEX_STATE_STATEMENT,
0,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0), LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH( LINK_PUSH(
SYM_LEFT_PAREN, SYM_LEFT_PAREN,
LEX_STATE_STATEMENT, LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS), STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0), LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_BRACE), LINK_POP2(SYM_RIGHT_BRACE, LINK_ALLOW_RECURSION),
LINK_END, LINK_END,
}; };
static const unsigned int symbols[] = { static const unsigned int symbols[] = {
SYM_EQUAL, SYM_EQUAL,
SYM_DQUOTE,
SYM_SQUOTE,
SYM_SEMICOLON, SYM_SEMICOLON,
SYM_RIGHT_BRACE, SYM_RIGHT_BRACE,
SYM_DOLLAR_LEFT_PAREN, SYM_DOLLAR_LEFT_PAREN,
SYM_LEFT_PAREN, SYM_LEFT_PAREN,
SYM_HASH,
SYM_NONE, SYM_NONE,
}; };
+11
View File
@@ -21,6 +21,10 @@ enum read_flags {
READ_NO_NUMBER_RECOGNITION = 0x04u, READ_NO_NUMBER_RECOGNITION = 0x04u,
}; };
enum link_flags {
LINK_ALLOW_RECURSION = 0x01u,
};
#define LINK_PUSH(tok, target, flags) \ #define LINK_PUSH(tok, target, flags) \
((struct lex_state_link) { \ ((struct lex_state_link) { \
.l_token = (tok), \ .l_token = (tok), \
@@ -47,6 +51,12 @@ enum read_flags {
.l_token = (tok), \ .l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \ .l_type = LEX_STATE_LINK_POP, \
}) })
#define LINK_POP2(tok, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
.l_flags = (flags), \
})
#define LINK_NONE(tok) \ #define LINK_NONE(tok) \
((struct lex_state_link) { \ ((struct lex_state_link) { \
.l_token = (tok), \ .l_token = (tok), \
@@ -62,6 +72,7 @@ struct lex_state_link {
LEX_STATE_LINK_CHANGE, LEX_STATE_LINK_CHANGE,
LEX_STATE_LINK_POP, LEX_STATE_LINK_POP,
} l_type; } l_type;
enum link_flags l_flags;
enum lex_state_type_id l_target; enum lex_state_type_id l_target;
enum state_flags l_target_flags; enum state_flags l_target_flags;
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS]; unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
+39 -11
View File
@@ -190,7 +190,9 @@ void lex_state_pop(struct lex_ctx *ctx)
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry); struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
#if defined(VERBOSE) #if defined(VERBOSE)
printf("pop(%s)\n", lex_state_type_id_to_string(state->s_type->s_id)); printf("pop(%s) -> %s\n",
lex_state_type_id_to_string(state->s_type->s_id),
lex_state_type_id_to_string(lex_state_get(ctx)->s_type->s_id));
#endif #endif
if (state->s_type->s_end) { if (state->s_type->s_end) {
@@ -1114,23 +1116,29 @@ int compare_token_types(unsigned int a, unsigned int b)
return result; return result;
} }
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token) static bool do_lex_state_transition(
struct lex_ctx *ctx,
unsigned int token,
bool recursive)
{ {
struct lex_state *state = lex_state_get(ctx); struct lex_state *state = lex_state_get(ctx);
for (unsigned int i = 0; i < LEX_STATE_MAX_TERMINATORS; i++) { enum link_flags required_flags = 0;
if (state->s_terminators[i] == TOK_NONE) { if (recursive) {
break; required_flags |= LINK_ALLOW_RECURSION;
} }
if (state->s_terminators[i] == token) { if (!recursive) {
lex_state_pop(ctx); for (unsigned int i = 0; i < state->s_nr_terminators; i++) {
return; if (state->s_terminators[i] == token) {
lex_state_pop(ctx);
return true;
}
} }
} }
const struct lex_state_link *table = state->s_type->s_links; const struct lex_state_link *table = state->s_type->s_links;
if (!table) { if (!table) {
return; return false;
} }
#define MAX_MATCHES 8 #define MAX_MATCHES 8
@@ -1140,6 +1148,10 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) { for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
int score = compare_token_types(table[i].l_token, token); int score = compare_token_types(table[i].l_token, token);
if ((table[i].l_flags & required_flags) != required_flags) {
score = 0;
}
if (score == 0) { if (score == 0) {
continue; continue;
} }
@@ -1157,14 +1169,16 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
#undef MAX_MATCHES #undef MAX_MATCHES
if (!match_count) { if (!match_count) {
return; return false;
} }
bool result = false;
for (unsigned int i = 0; i < match_count; i++) { for (unsigned int i = 0; i < match_count; i++) {
const struct lex_state_link *link = best_matches[i]; const struct lex_state_link *link = best_matches[i];
switch (link->l_type) { switch (link->l_type) {
case LEX_STATE_LINK_POP: case LEX_STATE_LINK_POP:
lex_state_pop(ctx); lex_state_pop(ctx);
result = true;
break; break;
case LEX_STATE_LINK_PUSH: { case LEX_STATE_LINK_PUSH: {
struct lex_state *state = lex_state_push( struct lex_state *state = lex_state_push(
@@ -1176,16 +1190,30 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
state, state,
link->l_terminators[i]); link->l_terminators[i]);
} }
result = true;
break; break;
} }
case LEX_STATE_LINK_CHANGE: case LEX_STATE_LINK_CHANGE:
lex_state_change(ctx, link->l_target); lex_state_change(ctx, link->l_target);
result = true;
break; break;
default: default:
break; break;
} }
} }
return result;
}
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
{
bool cont = false;
bool recursive = false;
do {
cont = do_lex_state_transition(ctx, token, recursive);
recursive = true;
} while (cont);
} }
static enum bshell_status read_string_content(struct lex_ctx *ctx) static enum bshell_status read_string_content(struct lex_ctx *ctx)