Files
bshell/bshell/parse/lex/statement.c
T

237 lines
5.2 KiB
C

#include "lex-internal.h"
static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = TOK_OPERATOR;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return statement_hyphen(ctx);
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_state *state = lex_state_get(ctx);
bool enable_keywords = !(state->s_flags & STATEMENT_F_DISABLE_KEYWORDS);
unsigned int token = TOK_WORD;
if (enable_keywords && convert_word_to_keyword(word)) {
token = word->tok_keyword;
} else if (convert_word_to_int(word)) {
token = TOK_INT;
}
handle_lex_state_transition(ctx, token);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return statement_symbol(ctx);
}
if (char_has_flags(ctx, c, LEX_TOKEN_UNARY_ARITHMETIC)) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return BSHELL_SUCCESS;
}
return statement_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
/* arithmetic tokens */
LINK_CHANGE(TOK_KEYWORD, LEX_STATE_ARITHMETIC),
LINK_CHANGE(TOK_INT, LEX_STATE_ARITHMETIC),
LINK_PUSH(SYM_DOLLAR, LEX_STATE_ARITHMETIC, 0),
LINK_PUSH(SYM_DOLLAR_LEFT_BRACE, LEX_STATE_ARITHMETIC, 0),
LINK_CHANGE(SYM_AT_LEFT_BRACE, LEX_STATE_ARITHMETIC),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_PUSH(SYM_AT, LEX_STATE_ARITHMETIC, 0),
LINK_CHANGE(SYM_LEFT_PAREN, LEX_STATE_ARITHMETIC),
LINK_CHANGE(SYM_BANG, LEX_STATE_ARITHMETIC),
LINK_PUSH_WITH_TERM(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS,
SYM_RIGHT_PAREN),
/* statement tokens */
LINK_PUSH(SYM_LEFT_BRACE, LEX_STATE_STATEMENT, 0),
LINK_PUSH_WITH_TERM(
SYM_DOLLAR_LEFT_PAREN,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_PAREN),
/* command tokens */
LINK_CHANGE(KW_FUNC, LEX_STATE_COMMAND),
LINK_CHANGE(SYM_AMPERSAND, LEX_STATE_COMMAND),
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
LINK_END,
};
static const unsigned int keywords[] = {
KW_FUNC,
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_BNOT,
TKOP_NOT,
TKOP_NONE,
};
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_BANG,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_statement_state = {
.s_id = LEX_STATE_STATEMENT,
.s_pump_token = statement_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};