parse: lex: implement recording coordinates of lex tokens
This commit is contained in:
@@ -53,6 +53,7 @@ struct lex_ctx {
|
||||
fx_string *lex_tmp;
|
||||
fx_wchar lex_ch;
|
||||
fx_queue lex_state;
|
||||
struct char_cell lex_cursor, lex_start, lex_end;
|
||||
struct lex_symbol_node *lex_sym_tree;
|
||||
enum bshell_status lex_status;
|
||||
};
|
||||
|
||||
@@ -110,11 +110,13 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
set_token_start(ctx);
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
set_token_end(ctx);
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
@@ -105,11 +105,13 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
set_token_start(ctx);
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
set_token_end(ctx);
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
@@ -50,6 +50,11 @@ extern bool convert_word_to_int(struct lex_token *tok);
|
||||
extern bool convert_word_to_keyword(struct lex_token *tok);
|
||||
|
||||
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
|
||||
extern void enqueue_token_with_coordinates(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token *tok,
|
||||
const struct char_cell *start,
|
||||
const struct char_cell *end);
|
||||
|
||||
extern enum bshell_status read_word(
|
||||
struct lex_ctx *ctx,
|
||||
|
||||
+41
-3
@@ -83,6 +83,16 @@ static const struct lex_state_type *state_types[] = {
|
||||
[LEX_STATE_STRING] = &lex_string_state,
|
||||
};
|
||||
|
||||
void set_token_start(struct lex_ctx *ctx)
|
||||
{
|
||||
memcpy(&ctx->lex_start, &ctx->lex_cursor, sizeof ctx->lex_cursor);
|
||||
}
|
||||
|
||||
void set_token_end(struct lex_ctx *ctx)
|
||||
{
|
||||
memcpy(&ctx->lex_end, &ctx->lex_cursor, sizeof ctx->lex_cursor);
|
||||
}
|
||||
|
||||
struct lex_state *lex_state_push(
|
||||
struct lex_ctx *ctx,
|
||||
enum lex_state_type_id state_type,
|
||||
@@ -277,6 +287,7 @@ enum bshell_status lex_ctx_init(
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
||||
ctx->lex_src = src;
|
||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
@@ -343,12 +354,18 @@ fx_wchar peek_char_noread(struct lex_ctx *ctx)
|
||||
|
||||
static void __advance_char(struct lex_ctx *ctx, bool noread)
|
||||
{
|
||||
if (ctx->lex_ch != FX_WCHAR_INVALID) {
|
||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||
if (ctx->lex_status != BSHELL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx->lex_status != BSHELL_SUCCESS) {
|
||||
ctx->lex_cursor.c_col++;
|
||||
if (ctx->lex_ch == '\n') {
|
||||
ctx->lex_cursor.c_col = 1;
|
||||
ctx->lex_cursor.c_row++;
|
||||
}
|
||||
|
||||
if (ctx->lex_ch != FX_WCHAR_INVALID) {
|
||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -488,6 +505,22 @@ static struct lex_token *get_next_token(struct lex_ctx *ctx)
|
||||
|
||||
void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
|
||||
{
|
||||
enqueue_token_with_coordinates(
|
||||
ctx,
|
||||
tok,
|
||||
&ctx->lex_start,
|
||||
&ctx->lex_end);
|
||||
}
|
||||
|
||||
extern void enqueue_token_with_coordinates(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token *tok,
|
||||
const struct char_cell *start,
|
||||
const struct char_cell *end)
|
||||
{
|
||||
tok->tok_start = *start;
|
||||
tok->tok_end = *end;
|
||||
|
||||
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
|
||||
print_lex_token(tok);
|
||||
}
|
||||
@@ -548,6 +581,7 @@ enum bshell_status read_var(
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
@@ -583,6 +617,7 @@ enum bshell_status read_braced_var(
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
@@ -705,6 +740,7 @@ enum bshell_status read_literal_string(
|
||||
if (c == '\'') {
|
||||
fail = false;
|
||||
done = true;
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
break;
|
||||
}
|
||||
@@ -848,6 +884,7 @@ enum bshell_status read_symbol(
|
||||
const struct lex_token_def **out)
|
||||
{
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
set_token_start(ctx);
|
||||
|
||||
struct lex_symbol_node *node = ctx->lex_sym_tree;
|
||||
char prev = 0;
|
||||
@@ -866,6 +903,7 @@ enum bshell_status read_symbol(
|
||||
}
|
||||
|
||||
node = next;
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
prev = c;
|
||||
}
|
||||
|
||||
@@ -137,11 +137,13 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
set_token_start(ctx);
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
set_token_end(ctx);
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
@@ -66,6 +66,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = FX_WCHAR_INVALID;
|
||||
fx_string *temp = lex_state_get_tempstr(ctx);
|
||||
set_token_start(ctx);
|
||||
fx_string_clear(temp);
|
||||
|
||||
while (1) {
|
||||
@@ -80,6 +81,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
|
||||
}
|
||||
|
||||
fx_string_append_wc(temp, c);
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user