parse: lex: implement recording coordinates of lex tokens
This commit is contained in:
@@ -53,6 +53,7 @@ struct lex_ctx {
|
|||||||
fx_string *lex_tmp;
|
fx_string *lex_tmp;
|
||||||
fx_wchar lex_ch;
|
fx_wchar lex_ch;
|
||||||
fx_queue lex_state;
|
fx_queue lex_state;
|
||||||
|
struct char_cell lex_cursor, lex_start, lex_end;
|
||||||
struct lex_symbol_node *lex_sym_tree;
|
struct lex_symbol_node *lex_sym_tree;
|
||||||
enum bshell_status lex_status;
|
enum bshell_status lex_status;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -110,11 +110,13 @@ static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
|
|||||||
fx_wchar c = peek_char(ctx);
|
fx_wchar c = peek_char(ctx);
|
||||||
bool newline = false;
|
bool newline = false;
|
||||||
|
|
||||||
|
set_token_start(ctx);
|
||||||
while (fx_wchar_is_space(c)) {
|
while (fx_wchar_is_space(c)) {
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
newline = true;
|
newline = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char_noread(ctx);
|
advance_char_noread(ctx);
|
||||||
c = peek_char_noread(ctx);
|
c = peek_char_noread(ctx);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,11 +105,13 @@ enum bshell_status command_pump_token(struct lex_ctx *ctx)
|
|||||||
fx_wchar c = peek_char(ctx);
|
fx_wchar c = peek_char(ctx);
|
||||||
bool newline = false;
|
bool newline = false;
|
||||||
|
|
||||||
|
set_token_start(ctx);
|
||||||
while (fx_wchar_is_space(c)) {
|
while (fx_wchar_is_space(c)) {
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
newline = true;
|
newline = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char_noread(ctx);
|
advance_char_noread(ctx);
|
||||||
c = peek_char_noread(ctx);
|
c = peek_char_noread(ctx);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,6 +50,11 @@ extern bool convert_word_to_int(struct lex_token *tok);
|
|||||||
extern bool convert_word_to_keyword(struct lex_token *tok);
|
extern bool convert_word_to_keyword(struct lex_token *tok);
|
||||||
|
|
||||||
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
|
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
|
||||||
|
extern void enqueue_token_with_coordinates(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
struct lex_token *tok,
|
||||||
|
const struct char_cell *start,
|
||||||
|
const struct char_cell *end);
|
||||||
|
|
||||||
extern enum bshell_status read_word(
|
extern enum bshell_status read_word(
|
||||||
struct lex_ctx *ctx,
|
struct lex_ctx *ctx,
|
||||||
|
|||||||
+41
-3
@@ -83,6 +83,16 @@ static const struct lex_state_type *state_types[] = {
|
|||||||
[LEX_STATE_STRING] = &lex_string_state,
|
[LEX_STATE_STRING] = &lex_string_state,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void set_token_start(struct lex_ctx *ctx)
|
||||||
|
{
|
||||||
|
memcpy(&ctx->lex_start, &ctx->lex_cursor, sizeof ctx->lex_cursor);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_token_end(struct lex_ctx *ctx)
|
||||||
|
{
|
||||||
|
memcpy(&ctx->lex_end, &ctx->lex_cursor, sizeof ctx->lex_cursor);
|
||||||
|
}
|
||||||
|
|
||||||
struct lex_state *lex_state_push(
|
struct lex_state *lex_state_push(
|
||||||
struct lex_ctx *ctx,
|
struct lex_ctx *ctx,
|
||||||
enum lex_state_type_id state_type,
|
enum lex_state_type_id state_type,
|
||||||
@@ -277,6 +287,7 @@ enum bshell_status lex_ctx_init(
|
|||||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
||||||
ctx->lex_src = src;
|
ctx->lex_src = src;
|
||||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||||
|
ctx->lex_cursor.c_row = ctx->lex_cursor.c_col = 1;
|
||||||
|
|
||||||
return BSHELL_SUCCESS;
|
return BSHELL_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -343,12 +354,18 @@ fx_wchar peek_char_noread(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
static void __advance_char(struct lex_ctx *ctx, bool noread)
|
static void __advance_char(struct lex_ctx *ctx, bool noread)
|
||||||
{
|
{
|
||||||
if (ctx->lex_ch != FX_WCHAR_INVALID) {
|
if (ctx->lex_status != BSHELL_SUCCESS) {
|
||||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->lex_status != BSHELL_SUCCESS) {
|
ctx->lex_cursor.c_col++;
|
||||||
|
if (ctx->lex_ch == '\n') {
|
||||||
|
ctx->lex_cursor.c_col = 1;
|
||||||
|
ctx->lex_cursor.c_row++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx->lex_ch != FX_WCHAR_INVALID) {
|
||||||
|
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -488,6 +505,22 @@ static struct lex_token *get_next_token(struct lex_ctx *ctx)
|
|||||||
|
|
||||||
void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
|
void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
|
||||||
{
|
{
|
||||||
|
enqueue_token_with_coordinates(
|
||||||
|
ctx,
|
||||||
|
tok,
|
||||||
|
&ctx->lex_start,
|
||||||
|
&ctx->lex_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void enqueue_token_with_coordinates(
|
||||||
|
struct lex_ctx *ctx,
|
||||||
|
struct lex_token *tok,
|
||||||
|
const struct char_cell *start,
|
||||||
|
const struct char_cell *end)
|
||||||
|
{
|
||||||
|
tok->tok_start = *start;
|
||||||
|
tok->tok_end = *end;
|
||||||
|
|
||||||
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
|
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
|
||||||
print_lex_token(tok);
|
print_lex_token(tok);
|
||||||
}
|
}
|
||||||
@@ -548,6 +581,7 @@ enum bshell_status read_var(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fx_string_append_wc(tmp, c);
|
fx_string_append_wc(tmp, c);
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char(ctx);
|
advance_char(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -583,6 +617,7 @@ enum bshell_status read_braced_var(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fx_string_append_wc(tmp, c);
|
fx_string_append_wc(tmp, c);
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char(ctx);
|
advance_char(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -705,6 +740,7 @@ enum bshell_status read_literal_string(
|
|||||||
if (c == '\'') {
|
if (c == '\'') {
|
||||||
fail = false;
|
fail = false;
|
||||||
done = true;
|
done = true;
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char(ctx);
|
advance_char(ctx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -848,6 +884,7 @@ enum bshell_status read_symbol(
|
|||||||
const struct lex_token_def **out)
|
const struct lex_token_def **out)
|
||||||
{
|
{
|
||||||
struct lex_state *state = lex_state_get(ctx);
|
struct lex_state *state = lex_state_get(ctx);
|
||||||
|
set_token_start(ctx);
|
||||||
|
|
||||||
struct lex_symbol_node *node = ctx->lex_sym_tree;
|
struct lex_symbol_node *node = ctx->lex_sym_tree;
|
||||||
char prev = 0;
|
char prev = 0;
|
||||||
@@ -866,6 +903,7 @@ enum bshell_status read_symbol(
|
|||||||
}
|
}
|
||||||
|
|
||||||
node = next;
|
node = next;
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char(ctx);
|
advance_char(ctx);
|
||||||
prev = c;
|
prev = c;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -137,11 +137,13 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
|||||||
fx_wchar c = peek_char(ctx);
|
fx_wchar c = peek_char(ctx);
|
||||||
bool newline = false;
|
bool newline = false;
|
||||||
|
|
||||||
|
set_token_start(ctx);
|
||||||
while (fx_wchar_is_space(c)) {
|
while (fx_wchar_is_space(c)) {
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
newline = true;
|
newline = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char_noread(ctx);
|
advance_char_noread(ctx);
|
||||||
c = peek_char_noread(ctx);
|
c = peek_char_noread(ctx);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
|
|||||||
{
|
{
|
||||||
fx_wchar c = FX_WCHAR_INVALID;
|
fx_wchar c = FX_WCHAR_INVALID;
|
||||||
fx_string *temp = lex_state_get_tempstr(ctx);
|
fx_string *temp = lex_state_get_tempstr(ctx);
|
||||||
|
set_token_start(ctx);
|
||||||
fx_string_clear(temp);
|
fx_string_clear(temp);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
@@ -80,6 +81,7 @@ static enum bshell_status string_content(struct lex_ctx *ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
fx_string_append_wc(temp, c);
|
fx_string_append_wc(temp, c);
|
||||||
|
set_token_end(ctx);
|
||||||
advance_char(ctx);
|
advance_char(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user