parse: lex: implement recording coordinates of lex tokens

This commit is contained in:
2026-05-10 19:13:29 +01:00
parent 7071630af8
commit 7aa2aee5bd
7 changed files with 55 additions and 3 deletions
+1
View File
@@ -53,6 +53,7 @@ struct lex_ctx {
fx_string *lex_tmp;
fx_wchar lex_ch;
fx_queue lex_state;
struct char_cell lex_cursor, lex_start, lex_end;
struct lex_symbol_node *lex_sym_tree;
enum bshell_status lex_status;
};
+2
View File
@@ -110,11 +110,13 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
+2
View File
@@ -105,11 +105,13 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
+5
View File
@@ -50,6 +50,11 @@ extern bool convert_word_to_int(struct lex_token *tok);
extern bool convert_word_to_keyword(struct lex_token *tok);
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
extern void enqueue_token_with_coordinates(
struct lex_ctx *ctx,
struct lex_token *tok,
const struct char_cell *start,
const struct char_cell *end);
extern enum bshell_status read_word(
struct lex_ctx *ctx,
+41 -3
View File
@@ -83,6 +83,16 @@ static const struct lex_state_type *state_types[] = {
[LEX_STATE_STRING] = &lex_string_state,
};
void set_token_start(struct lex_ctx *ctx)
{
memcpy(&ctx->lex_start, &ctx->lex_cursor, sizeof ctx->lex_cursor);
}
void set_token_end(struct lex_ctx *ctx)
{
memcpy(&ctx->lex_end, &ctx->lex_cursor, sizeof ctx->lex_cursor);
}
struct lex_state *lex_state_push(
struct lex_ctx *ctx,
enum lex_state_type_id state_type,
@@ -277,6 +287,7 @@ enum bshell_status lex_ctx_init(
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
ctx->lex_src = src;
ctx->lex_ch = FX_WCHAR_INVALID;
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
return BSHELL_SUCCESS;
}
@@ -343,12 +354,18 @@ fx_wchar peek_char_noread(struct lex_ctx *ctx)
static void __advance_char(struct lex_ctx *ctx, bool noread)
{
if (ctx->lex_ch != FX_WCHAR_INVALID) {
ctx->lex_ch = FX_WCHAR_INVALID;
if (ctx->lex_status != BSHELL_SUCCESS) {
return;
}
if (ctx->lex_status != BSHELL_SUCCESS) {
ctx->lex_cursor.c_col++;
if (ctx->lex_ch == '\n') {
ctx->lex_cursor.c_col = 1;
ctx->lex_cursor.c_row++;
}
if (ctx->lex_ch != FX_WCHAR_INVALID) {
ctx->lex_ch = FX_WCHAR_INVALID;
return;
}
@@ -488,6 +505,22 @@ static struct lex_token *get_next_token(struct lex_ctx *ctx)
void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
{
enqueue_token_with_coordinates(
ctx,
tok,
&ctx->lex_start,
&ctx->lex_end);
}
extern void enqueue_token_with_coordinates(
struct lex_ctx *ctx,
struct lex_token *tok,
const struct char_cell *start,
const struct char_cell *end)
{
tok->tok_start = *start;
tok->tok_end = *end;
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
print_lex_token(tok);
}
@@ -548,6 +581,7 @@ enum bshell_status read_var(
}
fx_string_append_wc(tmp, c);
set_token_end(ctx);
advance_char(ctx);
}
@@ -583,6 +617,7 @@ enum bshell_status read_braced_var(
}
fx_string_append_wc(tmp, c);
set_token_end(ctx);
advance_char(ctx);
}
@@ -705,6 +740,7 @@ enum bshell_status read_literal_string(
if (c == '\'') {
fail = false;
done = true;
set_token_end(ctx);
advance_char(ctx);
break;
}
@@ -848,6 +884,7 @@ enum bshell_status read_symbol(
const struct lex_token_def **out)
{
struct lex_state *state = lex_state_get(ctx);
set_token_start(ctx);
struct lex_symbol_node *node = ctx->lex_sym_tree;
char prev = 0;
@@ -866,6 +903,7 @@ enum bshell_status read_symbol(
}
node = next;
set_token_end(ctx);
advance_char(ctx);
prev = c;
}
+2
View File
@@ -137,11 +137,13 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
+2
View File
@@ -66,6 +66,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
fx_string *temp = lex_state_get_tempstr(ctx);
set_token_start(ctx);
fx_string_clear(temp);
while (1) {
@@ -80,6 +81,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
}
fx_string_append_wc(temp, c);
set_token_end(ctx);
advance_char(ctx);
}