Files
bshell/bshell/parse/lex/arithmetic.c
T
wash 0c21be8d67 parse: lex: add proper data-driven state-machine functionality
movement between lexer states is now defined (almost) exclusively
by a table of outgoing links defined for each state type.

the main lexer system uses this table to determine when, how, and to
where the state should be changed.

also add a dedicated lexer state for scanning hashtables, due to the
particularly unique rules that apply within.
2026-05-11 23:02:02 +01:00

168 lines
3.7 KiB
C

#include "lex-internal.h"
static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = TOK_OPERATOR;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return arithmetic_hyphen(ctx);
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
bool kw = false, number = false;
if (convert_word_to_keyword(word)) {
token_type = word->tok_keyword;
} else if (convert_word_to_int(word)) {
token_type = TOK_INT;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return arithmetic_symbol(ctx);
}
return arithmetic_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_PAREN),
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_END,
};
const struct lex_state_type lex_arithmetic_state = {
.s_id = LEX_STATE_ARITHMETIC,
.s_pump_token = arithmetic_pump_token,
.s_links = links,
};