parse: lex: support tokens terminating multiple lex states in certain circumstances
This commit is contained in:
@@ -141,23 +141,38 @@ static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const struct lex_state_link links[] = {
|
static const struct lex_state_link links[] = {
|
||||||
LINK_PUSH_WITH_TERM(SYM_EQUAL, LEX_STATE_STATEMENT, 0, SYM_SEMICOLON),
|
LINK_PUSH_WITH_TERM(
|
||||||
|
SYM_EQUAL,
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
0,
|
||||||
|
SYM_RIGHT_BRACE,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
TOK_LINEFEED),
|
||||||
|
LINK_PUSH_WITH_TERM(
|
||||||
|
TOK_LINEFEED,
|
||||||
|
LEX_STATE_STATEMENT,
|
||||||
|
0,
|
||||||
|
SYM_SEMICOLON,
|
||||||
|
TOK_LINEFEED),
|
||||||
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
||||||
LINK_PUSH(
|
LINK_PUSH(
|
||||||
SYM_LEFT_PAREN,
|
SYM_LEFT_PAREN,
|
||||||
LEX_STATE_STATEMENT,
|
LEX_STATE_STATEMENT,
|
||||||
STATEMENT_F_DISABLE_KEYWORDS),
|
STATEMENT_F_DISABLE_KEYWORDS),
|
||||||
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
||||||
LINK_POP(SYM_RIGHT_BRACE),
|
LINK_POP2(SYM_RIGHT_BRACE, LINK_ALLOW_RECURSION),
|
||||||
LINK_END,
|
LINK_END,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned int symbols[] = {
|
static const unsigned int symbols[] = {
|
||||||
SYM_EQUAL,
|
SYM_EQUAL,
|
||||||
|
SYM_DQUOTE,
|
||||||
|
SYM_SQUOTE,
|
||||||
SYM_SEMICOLON,
|
SYM_SEMICOLON,
|
||||||
SYM_RIGHT_BRACE,
|
SYM_RIGHT_BRACE,
|
||||||
SYM_DOLLAR_LEFT_PAREN,
|
SYM_DOLLAR_LEFT_PAREN,
|
||||||
SYM_LEFT_PAREN,
|
SYM_LEFT_PAREN,
|
||||||
|
SYM_HASH,
|
||||||
SYM_NONE,
|
SYM_NONE,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,10 @@ enum read_flags {
|
|||||||
READ_NO_NUMBER_RECOGNITION = 0x04u,
|
READ_NO_NUMBER_RECOGNITION = 0x04u,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum link_flags {
|
||||||
|
LINK_ALLOW_RECURSION = 0x01u,
|
||||||
|
};
|
||||||
|
|
||||||
#define LINK_PUSH(tok, target, flags) \
|
#define LINK_PUSH(tok, target, flags) \
|
||||||
((struct lex_state_link) { \
|
((struct lex_state_link) { \
|
||||||
.l_token = (tok), \
|
.l_token = (tok), \
|
||||||
@@ -47,6 +51,12 @@ enum read_flags {
|
|||||||
.l_token = (tok), \
|
.l_token = (tok), \
|
||||||
.l_type = LEX_STATE_LINK_POP, \
|
.l_type = LEX_STATE_LINK_POP, \
|
||||||
})
|
})
|
||||||
|
#define LINK_POP2(tok, flags) \
|
||||||
|
((struct lex_state_link) { \
|
||||||
|
.l_token = (tok), \
|
||||||
|
.l_type = LEX_STATE_LINK_POP, \
|
||||||
|
.l_flags = (flags), \
|
||||||
|
})
|
||||||
#define LINK_NONE(tok) \
|
#define LINK_NONE(tok) \
|
||||||
((struct lex_state_link) { \
|
((struct lex_state_link) { \
|
||||||
.l_token = (tok), \
|
.l_token = (tok), \
|
||||||
@@ -62,6 +72,7 @@ struct lex_state_link {
|
|||||||
LEX_STATE_LINK_CHANGE,
|
LEX_STATE_LINK_CHANGE,
|
||||||
LEX_STATE_LINK_POP,
|
LEX_STATE_LINK_POP,
|
||||||
} l_type;
|
} l_type;
|
||||||
|
enum link_flags l_flags;
|
||||||
enum lex_state_type_id l_target;
|
enum lex_state_type_id l_target;
|
||||||
enum state_flags l_target_flags;
|
enum state_flags l_target_flags;
|
||||||
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
|
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
|
||||||
|
|||||||
+36
-8
@@ -190,7 +190,9 @@ void lex_state_pop(struct lex_ctx *ctx)
|
|||||||
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
|
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
|
||||||
|
|
||||||
#if defined(VERBOSE)
|
#if defined(VERBOSE)
|
||||||
printf("pop(%s)\n", lex_state_type_id_to_string(state->s_type->s_id));
|
printf("pop(%s) -> %s\n",
|
||||||
|
lex_state_type_id_to_string(state->s_type->s_id),
|
||||||
|
lex_state_type_id_to_string(lex_state_get(ctx)->s_type->s_id));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (state->s_type->s_end) {
|
if (state->s_type->s_end) {
|
||||||
@@ -1114,23 +1116,29 @@ int compare_token_types(unsigned int a, unsigned int b)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
static bool do_lex_state_transition(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
unsigned int token,
|
||||||
|
bool recursive)
|
||||||
{
|
{
|
||||||
struct lex_state *state = lex_state_get(ctx);
|
struct lex_state *state = lex_state_get(ctx);
|
||||||
for (unsigned int i = 0; i < LEX_STATE_MAX_TERMINATORS; i++) {
|
enum link_flags required_flags = 0;
|
||||||
if (state->s_terminators[i] == TOK_NONE) {
|
if (recursive) {
|
||||||
break;
|
required_flags |= LINK_ALLOW_RECURSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!recursive) {
|
||||||
|
for (unsigned int i = 0; i < state->s_nr_terminators; i++) {
|
||||||
if (state->s_terminators[i] == token) {
|
if (state->s_terminators[i] == token) {
|
||||||
lex_state_pop(ctx);
|
lex_state_pop(ctx);
|
||||||
return;
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct lex_state_link *table = state->s_type->s_links;
|
const struct lex_state_link *table = state->s_type->s_links;
|
||||||
if (!table) {
|
if (!table) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_MATCHES 8
|
#define MAX_MATCHES 8
|
||||||
@@ -1140,6 +1148,10 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
|||||||
|
|
||||||
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
|
for (unsigned int i = 0; table[i].l_token != TOK_NONE; i++) {
|
||||||
int score = compare_token_types(table[i].l_token, token);
|
int score = compare_token_types(table[i].l_token, token);
|
||||||
|
if ((table[i].l_flags & required_flags) != required_flags) {
|
||||||
|
score = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (score == 0) {
|
if (score == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1157,14 +1169,16 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
|||||||
#undef MAX_MATCHES
|
#undef MAX_MATCHES
|
||||||
|
|
||||||
if (!match_count) {
|
if (!match_count) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool result = false;
|
||||||
for (unsigned int i = 0; i < match_count; i++) {
|
for (unsigned int i = 0; i < match_count; i++) {
|
||||||
const struct lex_state_link *link = best_matches[i];
|
const struct lex_state_link *link = best_matches[i];
|
||||||
switch (link->l_type) {
|
switch (link->l_type) {
|
||||||
case LEX_STATE_LINK_POP:
|
case LEX_STATE_LINK_POP:
|
||||||
lex_state_pop(ctx);
|
lex_state_pop(ctx);
|
||||||
|
result = true;
|
||||||
break;
|
break;
|
||||||
case LEX_STATE_LINK_PUSH: {
|
case LEX_STATE_LINK_PUSH: {
|
||||||
struct lex_state *state = lex_state_push(
|
struct lex_state *state = lex_state_push(
|
||||||
@@ -1176,16 +1190,30 @@ void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
|||||||
state,
|
state,
|
||||||
link->l_terminators[i]);
|
link->l_terminators[i]);
|
||||||
}
|
}
|
||||||
|
result = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case LEX_STATE_LINK_CHANGE:
|
case LEX_STATE_LINK_CHANGE:
|
||||||
lex_state_change(ctx, link->l_target);
|
lex_state_change(ctx, link->l_target);
|
||||||
|
result = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void handle_lex_state_transition(struct lex_ctx *ctx, unsigned int token)
|
||||||
|
{
|
||||||
|
bool cont = false;
|
||||||
|
bool recursive = false;
|
||||||
|
do {
|
||||||
|
cont = do_lex_state_transition(ctx, token, recursive);
|
||||||
|
recursive = true;
|
||||||
|
} while (cont);
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
||||||
|
|||||||
Reference in New Issue
Block a user