0c21be8d67
movement between lexer states is now defined (almost) exclusively by a table of outgoing links defined for each state type. the main lexer system uses this table to determine when, how, and to where the state should be changed. also add a dedicated lexer state for scanning hashtables, due to the particularly unique rules that apply within.
168 lines
3.7 KiB
C
168 lines
3.7 KiB
C
#include "lex-internal.h"
|
|
|
|
static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
|
|
{
|
|
fx_wchar c = peek_char(ctx);
|
|
if (!fx_wchar_is_alnum(c)) {
|
|
push_symbol(ctx, SYM_HYPHEN);
|
|
handle_lex_state_transition(ctx, SYM_HYPHEN);
|
|
return BSHELL_SUCCESS;
|
|
}
|
|
|
|
struct lex_token *tok = NULL;
|
|
enum bshell_status status = read_word(
|
|
ctx,
|
|
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
|
|
&tok);
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
unsigned int token_type = TOK_WORD;
|
|
if (convert_word_to_int(tok)) {
|
|
token_type = TOK_INT;
|
|
|
|
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
|
* tokens are detected properly), the resulting number will be
|
|
* negative.
|
|
* this token will be preceded by a HYPHEN token, so the number
|
|
* must be positive */
|
|
tok->tok_int *= -1;
|
|
push_symbol(ctx, SYM_HYPHEN);
|
|
} else if (convert_word_to_operator(ctx, tok)) {
|
|
token_type = TOK_OPERATOR;
|
|
}
|
|
|
|
enqueue_token(ctx, tok);
|
|
return BSHELL_SUCCESS;
|
|
}
|
|
|
|
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
|
{
|
|
const struct lex_token_def *sym = NULL;
|
|
enum bshell_status status = read_symbol(ctx, &sym);
|
|
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
handle_lex_state_transition(ctx, sym->id);
|
|
|
|
struct lex_token *tok = NULL;
|
|
switch (sym->id) {
|
|
case SYM_SQUOTE:
|
|
status = read_literal_string(ctx, &tok);
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
enqueue_token(ctx, tok);
|
|
return BSHELL_SUCCESS;
|
|
case SYM_HYPHEN:
|
|
return arithmetic_hyphen(ctx);
|
|
case SYM_HASH:
|
|
return read_line_comment(ctx);
|
|
case SYM_DOLLAR:
|
|
status = read_var(ctx, TOK_VAR, &tok);
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
enqueue_token(ctx, tok);
|
|
return status;
|
|
case SYM_AT:
|
|
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
enqueue_token(ctx, tok);
|
|
return status;
|
|
case SYM_DOLLAR_LEFT_BRACE:
|
|
status = read_braced_var(ctx, TOK_VAR, &tok);
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
enqueue_token(ctx, tok);
|
|
return status;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
push_symbol(ctx, sym->id);
|
|
|
|
return BSHELL_SUCCESS;
|
|
}
|
|
|
|
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
|
|
{
|
|
struct lex_token *word = NULL;
|
|
enum bshell_status status = read_word(ctx, 0, &word);
|
|
if (status != BSHELL_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
unsigned int token_type = TOK_WORD;
|
|
bool kw = false, number = false;
|
|
if (convert_word_to_keyword(word)) {
|
|
token_type = word->tok_keyword;
|
|
} else if (convert_word_to_int(word)) {
|
|
token_type = TOK_INT;
|
|
}
|
|
|
|
handle_lex_state_transition(ctx, token_type);
|
|
|
|
enqueue_token(ctx, word);
|
|
return BSHELL_SUCCESS;
|
|
}
|
|
|
|
static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
|
|
{
|
|
fx_wchar c = peek_char(ctx);
|
|
bool newline = false;
|
|
|
|
set_token_start(ctx);
|
|
while (fx_wchar_is_space(c)) {
|
|
if (c == '\n') {
|
|
newline = true;
|
|
}
|
|
|
|
set_token_end(ctx);
|
|
advance_char_noread(ctx);
|
|
c = peek_char_noread(ctx);
|
|
}
|
|
|
|
if (newline) {
|
|
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
|
enqueue_token(ctx, tok);
|
|
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
|
return BSHELL_SUCCESS;
|
|
}
|
|
|
|
if (char_can_begin_symbol(ctx, c)) {
|
|
return arithmetic_symbol(ctx);
|
|
}
|
|
|
|
return arithmetic_word(ctx);
|
|
}
|
|
|
|
static const struct lex_state_link links[] = {
|
|
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
|
|
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
|
|
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
|
|
LINK_POP(SYM_RIGHT_PAREN),
|
|
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
|
|
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
|
|
LINK_PUSH(
|
|
SYM_LEFT_PAREN,
|
|
LEX_STATE_STATEMENT,
|
|
STATEMENT_F_DISABLE_KEYWORDS),
|
|
LINK_END,
|
|
};
|
|
|
|
const struct lex_state_type lex_arithmetic_state = {
|
|
.s_id = LEX_STATE_ARITHMETIC,
|
|
.s_pump_token = arithmetic_pump_token,
|
|
.s_links = links,
|
|
};
|