parse: lex: re-implement lexer as a state machine to allow more complex scanning behaviour
This commit is contained in:
+11
-29
@@ -12,23 +12,21 @@ struct line_source;
|
||||
|
||||
enum lex_flags {
|
||||
LEX_PRINT_TOKENS = 0x01u,
|
||||
|
||||
/* these flags are for lex_ctx_peek and lex_ctx_claim */
|
||||
LEX_ENABLE_KEYWORD = 0x0100u,
|
||||
LEX_ENABLE_INT = 0x0200u,
|
||||
LEX_ENABLE_SYMBOL = 0x0400u,
|
||||
};
|
||||
|
||||
enum lex_token_flags {
|
||||
LEX_TOKEN_ENABLE_IN_STRING = 0x01u,
|
||||
LEX_TOKEN_ENABLE_IN_WORD = 0x02u,
|
||||
enum lex_state_type_id {
|
||||
LEX_STATE_STATEMENT = 0x01u,
|
||||
LEX_STATE_EXPRESSION = 0x02u,
|
||||
LEX_STATE_COMMAND = 0x04u,
|
||||
LEX_STATE_ARITHMETIC = 0x08u,
|
||||
LEX_STATE_STRING = 0x10u,
|
||||
};
|
||||
|
||||
struct lex_token_def {
|
||||
int id;
|
||||
const char *name;
|
||||
uint64_t name_hash;
|
||||
enum lex_token_flags flags;
|
||||
enum lex_state_type_id enabled_states;
|
||||
};
|
||||
|
||||
struct lex_symbol_node {
|
||||
@@ -39,33 +37,19 @@ struct lex_symbol_node {
|
||||
fx_queue s_children;
|
||||
};
|
||||
|
||||
enum lex_state_type {
|
||||
LEX_STATE_NORMAL = 0,
|
||||
LEX_STATE_WORD,
|
||||
LEX_STATE_STRING,
|
||||
LEX_STATE_INTERPOLATION,
|
||||
};
|
||||
|
||||
struct lex_state {
|
||||
enum lex_state_type s_type;
|
||||
const struct lex_state_type *s_type;
|
||||
unsigned int s_paren_depth;
|
||||
fx_queue_entry s_entry;
|
||||
fx_string *s_tempstr;
|
||||
};
|
||||
|
||||
struct lex_ctx {
|
||||
enum lex_flags lex_flags;
|
||||
/* lex_ctx maintains two queues of tokens.
|
||||
* lex_words is a simple queue of all WORDS scanned by the lexer,
|
||||
* without any further parsing applied to potentially convert the words
|
||||
* into TOK_INT, TOK_SYMBOL, etc.
|
||||
* lex_tokens represent all of the tokens generated by applying the
|
||||
* aforementioned parsing.
|
||||
* the two queues are kept in sync such that, as tokens are dequeued
|
||||
* from one queue, the other queue is moved forward too. */
|
||||
fx_queue lex_tokens, lex_words;
|
||||
fx_queue lex_tokens;
|
||||
struct line_source *lex_src;
|
||||
fx_stringstream *lex_buf;
|
||||
fx_string *lex_tmp, *lex_wordbuf;
|
||||
fx_string *lex_tmp;
|
||||
fx_wchar lex_ch;
|
||||
fx_queue lex_state;
|
||||
struct lex_symbol_node *lex_sym_tree;
|
||||
@@ -79,9 +63,7 @@ extern enum bshell_status lex_ctx_init(
|
||||
extern enum bshell_status lex_ctx_cleanup(struct lex_ctx *ctx);
|
||||
|
||||
extern struct lex_token *lex_ctx_peek(struct lex_ctx *ctx);
|
||||
extern struct lex_token *lex_ctx_peek_word(struct lex_ctx *ctx);
|
||||
extern struct lex_token *lex_ctx_claim(struct lex_ctx *ctx);
|
||||
extern struct lex_token *lex_ctx_claim_word(struct lex_ctx *ctx);
|
||||
extern void lex_ctx_discard(struct lex_ctx *ctx);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_keyword(word);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(word);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return arithmetic_symbol(ctx);
|
||||
}
|
||||
|
||||
return arithmetic_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_arithmetic_state = {
|
||||
.s_id = LEX_STATE_ARITHMETIC,
|
||||
.s_pump_token = arithmetic_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,131 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status command_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
enum bshell_status command_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return command_symbol(ctx);
|
||||
}
|
||||
|
||||
return command_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_command_state = {
|
||||
.s_id = LEX_STATE_COMMAND,
|
||||
.s_pump_token = command_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,134 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status expression_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status expression_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_int(word);
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status expression_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return expression_symbol(ctx);
|
||||
}
|
||||
|
||||
return expression_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_expression_state = {
|
||||
.s_id = LEX_STATE_EXPRESSION,
|
||||
.s_pump_token = expression_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,75 @@
|
||||
#ifndef PARSE_LEX_INTERNAL_H_
|
||||
#define PARSE_LEX_INTERNAL_H_
|
||||
|
||||
#include "../../status.h"
|
||||
#include "../lex.h"
|
||||
#include "../token.h"
|
||||
|
||||
struct lex_ctx;
|
||||
|
||||
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
|
||||
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
|
||||
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
|
||||
|
||||
struct lex_state_type {
|
||||
enum lex_state_type_id s_id;
|
||||
lex_state_pump_token s_pump_token;
|
||||
lex_state_begin s_begin;
|
||||
lex_state_end s_end;
|
||||
};
|
||||
|
||||
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_expression(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_command(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_arithmetic(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_string(struct lex_ctx *ctx);
|
||||
|
||||
extern struct lex_state *lex_state_push(
|
||||
struct lex_ctx *ctx,
|
||||
enum lex_state_type_id state_type);
|
||||
extern void lex_state_pop(struct lex_ctx *ctx);
|
||||
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
|
||||
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
|
||||
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
|
||||
|
||||
extern fx_wchar peek_char(struct lex_ctx *ctx);
|
||||
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
|
||||
extern void advance_char(struct lex_ctx *ctx);
|
||||
extern void advance_char_noread(struct lex_ctx *ctx);
|
||||
|
||||
extern bool string_is_valid_number(const char *s, long long *out);
|
||||
extern bool convert_word_to_int(struct lex_token *tok);
|
||||
extern bool convert_word_to_keyword(struct lex_token *tok);
|
||||
|
||||
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
|
||||
|
||||
extern enum bshell_status read_word(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
const struct lex_token_def **out);
|
||||
extern enum bshell_status read_literal_string(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_line_comment(struct lex_ctx *lex);
|
||||
extern enum bshell_status read_var(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_type type,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_braced_var(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_type type,
|
||||
struct lex_token **out);
|
||||
|
||||
extern enum bshell_status push_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_symbol sym);
|
||||
|
||||
extern bool char_can_begin_symbol(struct lex_ctx *ctx, char c);
|
||||
extern bool char_can_begin_symbol_in_state(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum lex_state_type_id state_type);
|
||||
|
||||
#endif
|
||||
@@ -1,72 +1,91 @@
|
||||
#include "lex.h"
|
||||
#include "../lex.h"
|
||||
|
||||
#include "../debug.h"
|
||||
#include "../line-source.h"
|
||||
#include "token.h"
|
||||
#include "../../debug.h"
|
||||
#include "../../line-source.h"
|
||||
#include "../token.h"
|
||||
#include "lex-internal.h"
|
||||
|
||||
#define LEX_TOKEN_DEF(i, n) {.id = (i), .name = (n)}
|
||||
#define LEX_TOKEN_DEF2(i, n, f) {.id = (i), .name = (n), .flags = (f)}
|
||||
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
|
||||
|
||||
#define CONVERSION_REQUESTED(flags) \
|
||||
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
|
||||
|
||||
static struct lex_token_def keywords[] = {
|
||||
LEX_TOKEN_DEF(KW_FUNC, "func"),
|
||||
LEX_TOKEN_DEF(KW_FUNC, "func", LEX_STATE_STATEMENT),
|
||||
LEX_TOKEN_DEF(KW_IF, "if", LEX_STATE_STATEMENT),
|
||||
LEX_TOKEN_DEF(KW_ELSE, "else", LEX_STATE_STATEMENT),
|
||||
};
|
||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||
|
||||
#define LEX_STATES(states) (LEX_STATE_STATEMENT | LEX_STATE_EXPRESSION | states)
|
||||
#define LEX_STATE_ALL \
|
||||
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
|
||||
| LEX_STATE_STRING | LEX_STATE_EXPRESSION)
|
||||
|
||||
static struct lex_token_def symbols[] = {
|
||||
LEX_TOKEN_DEF(SYM_PLUS, "+"),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN, "-"),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/"),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*"),
|
||||
LEX_TOKEN_DEF2(SYM_AMPERSAND, "&", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT, "%"),
|
||||
LEX_TOKEN_DEF(SYM_SQUOTE, "'"),
|
||||
LEX_TOKEN_DEF2(SYM_DQUOTE, "\"", LEX_TOKEN_ENABLE_IN_STRING),
|
||||
LEX_TOKEN_DEF(SYM_HASH, "#"),
|
||||
LEX_TOKEN_DEF2(SYM_DOLLAR, "$", LEX_TOKEN_ENABLE_IN_STRING),
|
||||
LEX_TOKEN_DEF2(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_TOKEN_ENABLE_IN_STRING),
|
||||
LEX_TOKEN_DEF2(SYM_DOLLAR_LEFT_BRACE, "${", LEX_TOKEN_ENABLE_IN_STRING),
|
||||
LEX_TOKEN_DEF(SYM_AT, "@"),
|
||||
LEX_TOKEN_DEF2(SYM_PIPE, "|", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF2(SYM_COMMA, ",", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF2(SYM_SEMICOLON, ";", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF(SYM_AT_LEFT_BRACE, "@{"),
|
||||
LEX_TOKEN_DEF2(SYM_LEFT_BRACE, "{", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF2(SYM_RIGHT_BRACE, "}", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "["),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]"),
|
||||
LEX_TOKEN_DEF2(SYM_LEFT_PAREN, "(", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF2(SYM_RIGHT_PAREN, ")", LEX_TOKEN_ENABLE_IN_WORD),
|
||||
LEX_TOKEN_DEF(SYM_EQUAL, "="),
|
||||
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+="),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-="),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/="),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*="),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%="),
|
||||
LEX_TOKEN_DEF(SYM_PLUS, "+", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN, "-", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_AMPERSAND,
|
||||
"&",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_SQUOTE,
|
||||
"'",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
||||
LEX_TOKEN_DEF(SYM_DQUOTE, "\"", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_HASH,
|
||||
"#",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_DOLLAR,
|
||||
"$",
|
||||
LEX_STATES(
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
||||
| LEX_STATE_STRING)),
|
||||
LEX_TOKEN_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_PIPE, "|", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_COMMA, ",", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_SEMICOLON, ";", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_BRACE, "{", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACE, "}", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_PAREN, "(", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_PAREN, ")", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_EQUAL, "=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH_EQUAL, "/=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK_EQUAL, "*=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT_EQUAL, "%=", LEX_STATE_ARITHMETIC),
|
||||
};
|
||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||
|
||||
typedef enum bshell_status (*pump_token_impl)(struct lex_ctx *);
|
||||
extern const struct lex_state_type lex_statement_state;
|
||||
extern const struct lex_state_type lex_expression_state;
|
||||
extern const struct lex_state_type lex_command_state;
|
||||
extern const struct lex_state_type lex_arithmetic_state;
|
||||
extern const struct lex_state_type lex_string_state;
|
||||
|
||||
static enum bshell_status do_pump_token_normal(struct lex_ctx *);
|
||||
static enum bshell_status do_pump_token_string(struct lex_ctx *ctx);
|
||||
static const pump_token_impl token_pump_functions[] = {
|
||||
[LEX_STATE_NORMAL] = do_pump_token_normal,
|
||||
[LEX_STATE_STRING] = do_pump_token_string,
|
||||
[LEX_STATE_INTERPOLATION] = do_pump_token_normal,
|
||||
static const struct lex_state_type *state_types[] = {
|
||||
[LEX_STATE_STATEMENT] = &lex_statement_state,
|
||||
[LEX_STATE_EXPRESSION] = &lex_expression_state,
|
||||
[LEX_STATE_COMMAND] = &lex_command_state,
|
||||
[LEX_STATE_ARITHMETIC] = &lex_arithmetic_state,
|
||||
[LEX_STATE_STRING] = &lex_string_state,
|
||||
};
|
||||
|
||||
static bool char_can_begin_symbol(struct lex_ctx *ctx, char c);
|
||||
static bool char_can_begin_symbol_in_context(
|
||||
struct lex_state *lex_state_push(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum token_type context);
|
||||
|
||||
static struct lex_state *push_lex_state(
|
||||
struct lex_ctx *ctx,
|
||||
enum lex_state_type state_type)
|
||||
enum lex_state_type_id state_type)
|
||||
{
|
||||
struct lex_state *state = malloc(sizeof *state);
|
||||
if (!state) {
|
||||
@@ -75,24 +94,40 @@ static struct lex_state *push_lex_state(
|
||||
|
||||
memset(state, 0x0, sizeof *state);
|
||||
|
||||
state->s_type = state_type;
|
||||
state->s_type = state_types[state_type];
|
||||
fx_queue_push_back(&ctx->lex_state, &state->s_entry);
|
||||
|
||||
if (state->s_type->s_begin) {
|
||||
state->s_type->s_begin(ctx);
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
static void pop_lex_state(struct lex_ctx *ctx)
|
||||
void lex_state_pop(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_queue_entry *entry = fx_queue_pop_back(&ctx->lex_state);
|
||||
if (!entry) {
|
||||
fx_queue_entry *entry = fx_queue_last(&ctx->lex_state);
|
||||
if (!entry || !fx_queue_prev(entry)) {
|
||||
/* don't pop if this is the root state */
|
||||
return;
|
||||
}
|
||||
|
||||
struct lex_state *state = fx_unbox(struct lex_state, entry, s_entry);
|
||||
|
||||
if (state->s_type->s_end) {
|
||||
state->s_type->s_end(ctx);
|
||||
}
|
||||
|
||||
fx_queue_pop_back(&ctx->lex_state);
|
||||
|
||||
if (state->s_tempstr) {
|
||||
fx_string_unref(state->s_tempstr);
|
||||
}
|
||||
|
||||
free(state);
|
||||
}
|
||||
|
||||
static struct lex_state *get_lex_state(struct lex_ctx *ctx)
|
||||
struct lex_state *lex_state_get(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_queue_entry *entry = fx_queue_last(&ctx->lex_state);
|
||||
if (!entry) {
|
||||
@@ -102,6 +137,42 @@ static struct lex_state *get_lex_state(struct lex_ctx *ctx)
|
||||
return fx_unbox(struct lex_state, entry, s_entry);
|
||||
}
|
||||
|
||||
void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type)
|
||||
{
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
if (!state) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (state->s_type->s_end) {
|
||||
state->s_type->s_end(ctx);
|
||||
}
|
||||
|
||||
state->s_type = state_types[type];
|
||||
|
||||
if (state->s_type->s_begin) {
|
||||
state->s_type->s_begin(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
fx_string *lex_state_get_tempstr(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
if (!state) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!state->s_tempstr) {
|
||||
state->s_tempstr = fx_string_create();
|
||||
}
|
||||
|
||||
if (!state->s_tempstr) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return state->s_tempstr;
|
||||
}
|
||||
|
||||
static struct lex_symbol_node *get_symbol_node(
|
||||
struct lex_symbol_node *node,
|
||||
char c)
|
||||
@@ -201,8 +272,7 @@ enum bshell_status lex_ctx_init(
|
||||
ctx->lex_status = BSHELL_SUCCESS;
|
||||
ctx->lex_buf = fx_stringstream_create();
|
||||
ctx->lex_sym_tree = build_symbol_tree();
|
||||
ctx->lex_wordbuf = fx_string_create();
|
||||
push_lex_state(ctx, LEX_STATE_NORMAL);
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
ctx->lex_src = src;
|
||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||
|
||||
@@ -259,22 +329,18 @@ static fx_wchar __peek_char(struct lex_ctx *ctx, bool noread)
|
||||
return ctx->lex_ch;
|
||||
}
|
||||
|
||||
static fx_wchar peek_char(struct lex_ctx *ctx)
|
||||
fx_wchar peek_char(struct lex_ctx *ctx)
|
||||
{
|
||||
return __peek_char(ctx, false);
|
||||
}
|
||||
|
||||
static fx_wchar peek_char_noread(struct lex_ctx *ctx)
|
||||
fx_wchar peek_char_noread(struct lex_ctx *ctx)
|
||||
{
|
||||
return __peek_char(ctx, true);
|
||||
}
|
||||
|
||||
static void __advance_char(struct lex_ctx *ctx, bool noread)
|
||||
{
|
||||
if (!fx_wchar_is_space(ctx->lex_ch)) {
|
||||
fx_string_append_wc(ctx->lex_wordbuf, ctx->lex_ch);
|
||||
}
|
||||
|
||||
if (ctx->lex_ch != FX_WCHAR_INVALID) {
|
||||
ctx->lex_ch = FX_WCHAR_INVALID;
|
||||
return;
|
||||
@@ -296,19 +362,17 @@ static void __advance_char(struct lex_ctx *ctx, bool noread)
|
||||
}
|
||||
}
|
||||
|
||||
static void advance_char(struct lex_ctx *ctx)
|
||||
void advance_char(struct lex_ctx *ctx)
|
||||
{
|
||||
return __advance_char(ctx, false);
|
||||
}
|
||||
|
||||
static void advance_char_noread(struct lex_ctx *ctx)
|
||||
void advance_char_noread(struct lex_ctx *ctx)
|
||||
{
|
||||
return __advance_char(ctx, true);
|
||||
}
|
||||
|
||||
static bool convert_word_to_keyword(
|
||||
const struct lex_token *tok,
|
||||
struct lex_token *out)
|
||||
bool convert_word_to_keyword(struct lex_token *tok)
|
||||
{
|
||||
if (!lex_token_has_string_value(tok)) {
|
||||
return false;
|
||||
@@ -320,10 +384,8 @@ static bool convert_word_to_keyword(
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(out, tok, sizeof *out);
|
||||
memset(&out->tok_entry, 0x0, sizeof out->tok_entry);
|
||||
out->tok_type = TOK_KEYWORD;
|
||||
out->tok_keyword = keywords[i].id;
|
||||
lex_token_change_type(tok, TOK_KEYWORD);
|
||||
tok->tok_keyword = keywords[i].id;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -378,7 +440,7 @@ static size_t get_int_multiplier_by_suffix(const char *suffix)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool string_is_valid_number(const char *s, long long *out)
|
||||
bool string_is_valid_number(const char *s, long long *out)
|
||||
{
|
||||
int base = get_int_base_by_prefix(&s);
|
||||
|
||||
@@ -398,9 +460,7 @@ static bool string_is_valid_number(const char *s, long long *out)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool convert_word_to_int(
|
||||
const struct lex_token *tok,
|
||||
struct lex_token *out)
|
||||
bool convert_word_to_int(struct lex_token *tok)
|
||||
{
|
||||
if (!lex_token_has_string_value(tok)) {
|
||||
return false;
|
||||
@@ -408,15 +468,14 @@ static bool convert_word_to_int(
|
||||
|
||||
const char *s = tok->tok_str;
|
||||
long long value = 0;
|
||||
bool ok = string_is_valid_number(s, &value);
|
||||
if (ok) {
|
||||
memcpy(out, tok, sizeof *out);
|
||||
memset(&out->tok_entry, 0x0, sizeof out->tok_entry);
|
||||
out->tok_type = TOK_INT;
|
||||
out->tok_int = value;
|
||||
if (!string_is_valid_number(s, &value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return ok;
|
||||
lex_token_change_type(tok, TOK_INT);
|
||||
tok->tok_int = value;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct lex_token *get_next_token(struct lex_ctx *ctx)
|
||||
@@ -425,13 +484,7 @@ static struct lex_token *get_next_token(struct lex_ctx *ctx)
|
||||
return fx_unbox(struct lex_token, entry, tok_entry);
|
||||
}
|
||||
|
||||
static struct lex_token *get_next_word(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_queue_entry *entry = fx_queue_first(&ctx->lex_words);
|
||||
return fx_unbox(struct lex_token, entry, tok_entry);
|
||||
}
|
||||
|
||||
static void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
|
||||
void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok)
|
||||
{
|
||||
if (tok && (ctx->lex_flags & LEX_PRINT_TOKENS)) {
|
||||
print_lex_token(tok);
|
||||
@@ -446,12 +499,6 @@ static struct lex_token *dequeue_next_token(struct lex_ctx *ctx)
|
||||
return fx_unbox(struct lex_token, entry, tok_entry);
|
||||
}
|
||||
|
||||
static struct lex_token *dequeue_next_word(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_queue_entry *entry = fx_queue_pop_front(&ctx->lex_words);
|
||||
return fx_unbox(struct lex_token, entry, tok_entry);
|
||||
}
|
||||
|
||||
static fx_string *get_temp_string(struct lex_ctx *ctx)
|
||||
{
|
||||
if (!ctx->lex_tmp) {
|
||||
@@ -462,9 +509,7 @@ static fx_string *get_temp_string(struct lex_ctx *ctx)
|
||||
return ctx->lex_tmp;
|
||||
}
|
||||
|
||||
static enum bshell_status push_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_symbol sym)
|
||||
enum bshell_status push_symbol(struct lex_ctx *ctx, enum token_symbol sym)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_SYMBOL);
|
||||
if (!tok) {
|
||||
@@ -476,82 +521,10 @@ static enum bshell_status push_symbol(
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_word(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
bool word_is_number = false;
|
||||
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (word_is_number && char_can_begin_symbol(ctx, c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol_in_context(ctx, c, TOK_WORD)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '{':
|
||||
case '}':
|
||||
case '(':
|
||||
case ')':
|
||||
case ';':
|
||||
case ',':
|
||||
case '|':
|
||||
case '&':
|
||||
case '$':
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
word_is_number
|
||||
= string_is_valid_number(fx_string_get_cstr(tmp), NULL);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
if (fx_string_get_size(tmp, FX_STRLEN_NORMAL) == 0) {
|
||||
if (ctx->lex_status == BSHELL_SUCCESS) {
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
return ctx->lex_status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_WORD,
|
||||
fx_string_get_cstr(tmp));
|
||||
#if 0
|
||||
bool converted = convert_word_to_keyword(tok);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(tok);
|
||||
}
|
||||
#endif
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_var(struct lex_ctx *ctx, enum token_type type)
|
||||
enum bshell_status read_var(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_type type,
|
||||
struct lex_token **out)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
|
||||
@@ -583,13 +556,14 @@ static enum bshell_status read_var(struct lex_ctx *ctx, enum token_type type)
|
||||
struct lex_token *tok
|
||||
= lex_token_create_with_string(type, fx_string_get_cstr(tmp));
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
*out = tok;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_braced_var(
|
||||
enum bshell_status read_braced_var(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_type type)
|
||||
enum token_type type,
|
||||
struct lex_token **out)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
bool ok = false;
|
||||
@@ -621,10 +595,11 @@ static enum bshell_status read_braced_var(
|
||||
struct lex_token *tok
|
||||
= lex_token_create_with_string(type, fx_string_get_cstr(tmp));
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
*out = tok;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static enum bshell_status read_flag(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
@@ -691,7 +666,29 @@ static enum bshell_status read_flag(struct lex_ctx *ctx)
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_literal_string(struct lex_ctx *ctx)
|
||||
static enum bshell_status read_interpolation_marker(struct lex_ctx *ctx)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
if (state->s_type != LEX_STATE_STRING) {
|
||||
return BSHELL_ERR_INTERNAL_FAILURE;
|
||||
}
|
||||
|
||||
/* start of a new interpolation */
|
||||
if (!lex_state_push(ctx, LEX_STATE_STATEMENT)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum bshell_status read_literal_string(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token **out)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
|
||||
@@ -717,12 +714,12 @@ static enum bshell_status read_literal_string(struct lex_ctx *ctx)
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_STRING,
|
||||
fx_string_get_cstr(tmp));
|
||||
enqueue_token(ctx, tok);
|
||||
|
||||
*out = tok;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_line_comment(struct lex_ctx *lex)
|
||||
enum bshell_status read_line_comment(struct lex_ctx *lex)
|
||||
{
|
||||
while (true) {
|
||||
fx_wchar c = peek_char(lex);
|
||||
@@ -741,16 +738,17 @@ static enum bshell_status read_line_comment(struct lex_ctx *lex)
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_dquote_marker(struct lex_ctx *ctx)
|
||||
#if 0
|
||||
enum bshell_status read_dquote_marker(struct lex_ctx *ctx)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
struct lex_state *state = get_lex_state(ctx);
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
if (state->s_type == LEX_STATE_STRING) {
|
||||
/* already within an fstring */
|
||||
pop_lex_state(ctx);
|
||||
lex_state_pop(ctx);
|
||||
tok = lex_token_create(TOK_STR_END);
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
@@ -760,39 +758,94 @@ static enum bshell_status read_dquote_marker(struct lex_ctx *ctx)
|
||||
tok = lex_token_create(TOK_STR_START);
|
||||
enqueue_token(ctx, tok);
|
||||
|
||||
if (!push_lex_state(ctx, LEX_STATE_STRING)) {
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static enum bshell_status read_interpolation_marker(struct lex_ctx *ctx)
|
||||
enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
struct lex_state *state = get_lex_state(ctx);
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
bool word_is_number = false;
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (state->s_type != LEX_STATE_STRING) {
|
||||
return BSHELL_ERR_INTERNAL_FAILURE;
|
||||
if (fx_wchar_is_space(c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (word_is_number && char_can_begin_symbol(ctx, c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '{':
|
||||
case '}':
|
||||
case '(':
|
||||
case ')':
|
||||
case ';':
|
||||
case ',':
|
||||
case '|':
|
||||
case '&':
|
||||
case '$':
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
word_is_number
|
||||
= string_is_valid_number(fx_string_get_cstr(tmp), NULL);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
/* start of a new interpolation */
|
||||
if (!push_lex_state(ctx, LEX_STATE_INTERPOLATION)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
if (fx_string_get_size(tmp, FX_STRLEN_NORMAL) == 0) {
|
||||
if (ctx->lex_status == BSHELL_SUCCESS) {
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
return ctx->lex_status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_WORD,
|
||||
fx_string_get_cstr(tmp));
|
||||
#if 0
|
||||
bool converted = convert_word_to_keyword(tok);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(tok);
|
||||
}
|
||||
#endif
|
||||
|
||||
*out = tok;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_symbol(struct lex_ctx *ctx)
|
||||
enum bshell_status read_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
const struct lex_token_def **out)
|
||||
{
|
||||
struct lex_state *state = get_lex_state(ctx);
|
||||
enum lex_token_flags required_flags = 0;
|
||||
if (state->s_type == LEX_STATE_STRING) {
|
||||
required_flags |= LEX_TOKEN_ENABLE_IN_STRING;
|
||||
}
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
struct lex_symbol_node *node = ctx->lex_sym_tree;
|
||||
char prev = 0;
|
||||
@@ -805,8 +858,7 @@ static enum bshell_status read_symbol(struct lex_ctx *ctx)
|
||||
|
||||
struct lex_symbol_node *next = get_symbol_node(node, c);
|
||||
if (!next
|
||||
|| (next->s_def->flags & required_flags)
|
||||
!= required_flags) {
|
||||
|| !(next->s_def->enabled_states & state->s_type->s_id)) {
|
||||
prev = c;
|
||||
break;
|
||||
}
|
||||
@@ -820,6 +872,7 @@ static enum bshell_status read_symbol(struct lex_ctx *ctx)
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
#if 0
|
||||
struct lex_token *tok = NULL;
|
||||
switch (node->s_def->id) {
|
||||
case SYM_SQUOTE:
|
||||
@@ -829,7 +882,7 @@ static enum bshell_status read_symbol(struct lex_ctx *ctx)
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
push_symbol(ctx, SYM_DOLLAR_LEFT_PAREN);
|
||||
if (state->s_type == LEX_STATE_STRING) {
|
||||
push_lex_state(ctx, LEX_STATE_INTERPOLATION);
|
||||
lex_state_push(ctx, LEX_STATE_STRING);
|
||||
}
|
||||
break;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
@@ -838,18 +891,11 @@ static enum bshell_status read_symbol(struct lex_ctx *ctx)
|
||||
return read_line_comment(ctx);
|
||||
case SYM_LEFT_PAREN:
|
||||
push_symbol(ctx, SYM_LEFT_PAREN);
|
||||
state->s_paren_depth++;
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
break;
|
||||
case SYM_RIGHT_PAREN:
|
||||
push_symbol(ctx, SYM_RIGHT_PAREN);
|
||||
|
||||
if (state->s_type == LEX_STATE_INTERPOLATION
|
||||
&& state->s_paren_depth == 0) {
|
||||
pop_lex_state(ctx);
|
||||
} else {
|
||||
state->s_paren_depth--;
|
||||
}
|
||||
|
||||
lex_state_pop(ctx);
|
||||
break;
|
||||
case SYM_DOLLAR:
|
||||
return read_var(ctx, TOK_VAR);
|
||||
@@ -859,70 +905,40 @@ static enum bshell_status read_symbol(struct lex_ctx *ctx)
|
||||
push_symbol(ctx, node->s_def->id);
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
*out = node->s_def;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
|
||||
bool char_can_begin_symbol_in_state(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum lex_state_type_id state_type)
|
||||
{
|
||||
struct lex_state *state = get_lex_state(ctx);
|
||||
enum lex_token_flags required_flags = 0;
|
||||
if (state->s_type == LEX_STATE_STRING) {
|
||||
required_flags |= LEX_TOKEN_ENABLE_IN_STRING;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (symbols[i].name[0] != c) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((symbols[i].flags & required_flags) != required_flags) {
|
||||
continue;
|
||||
if (symbols[i].enabled_states & state_type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool char_can_begin_symbol_in_context(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum token_type context)
|
||||
bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
|
||||
{
|
||||
enum lex_token_flags required_flags = 0;
|
||||
switch (context) {
|
||||
case TOK_WORD:
|
||||
required_flags = LEX_TOKEN_ENABLE_IN_WORD;
|
||||
break;
|
||||
case TOK_STRING:
|
||||
required_flags = LEX_TOKEN_ENABLE_IN_STRING;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (symbols[i].name[0] != c) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((symbols[i].flags & required_flags) != required_flags) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
return char_can_begin_symbol_in_state(ctx, c, state->s_type->s_id);
|
||||
}
|
||||
|
||||
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = FX_WCHAR_INVALID;
|
||||
fx_string *str = get_temp_string(ctx);
|
||||
struct lex_state *state = get_lex_state(ctx);
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
if (!str) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
@@ -955,6 +971,7 @@ static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static enum bshell_status do_pump_token_string(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
@@ -973,32 +990,13 @@ static enum bshell_status do_pump_token_string(struct lex_ctx *ctx)
|
||||
return status;
|
||||
}
|
||||
|
||||
static void flush_wordbuf(struct lex_ctx *ctx)
|
||||
{
|
||||
if (fx_string_get_size(ctx->lex_wordbuf, FX_STRLEN_NORMAL) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_WORD,
|
||||
fx_string_get_cstr(ctx->lex_wordbuf));
|
||||
fx_queue_push_back(&ctx->lex_words, &tok->tok_entry);
|
||||
fx_string_clear(ctx->lex_wordbuf);
|
||||
}
|
||||
|
||||
static enum bshell_status do_pump_token_normal(struct lex_ctx *ctx)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool whitespace = false;
|
||||
bool newline = false;
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
flush_wordbuf(ctx);
|
||||
whitespace = true;
|
||||
}
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
@@ -1011,9 +1009,6 @@ static enum bshell_status do_pump_token_normal(struct lex_ctx *ctx)
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
}
|
||||
|
||||
if (whitespace) {
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1027,26 +1022,19 @@ static enum bshell_status do_pump_token_normal(struct lex_ctx *ctx)
|
||||
|
||||
return read_word(ctx);
|
||||
}
|
||||
#endif
|
||||
|
||||
static enum bshell_status pump_tokens(struct lex_ctx *ctx)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
while (fx_queue_empty(&ctx->lex_words) && status == BSHELL_SUCCESS) {
|
||||
struct lex_state *state = get_lex_state(ctx);
|
||||
pump_token_impl impl = token_pump_functions[state->s_type];
|
||||
|
||||
status = impl(ctx);
|
||||
while (fx_queue_empty(&ctx->lex_tokens) && status == BSHELL_SUCCESS) {
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
status = state->s_type->s_pump_token(ctx);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool any_tokens_available(struct lex_ctx *ctx)
|
||||
{
|
||||
return !fx_queue_empty(&ctx->lex_tokens)
|
||||
|| !fx_queue_empty(&ctx->lex_words);
|
||||
}
|
||||
|
||||
static void discard_all_tokens(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_queue_entry *cur = fx_queue_first(&ctx->lex_tokens);
|
||||
@@ -1063,17 +1051,6 @@ static void discard_all_tokens(struct lex_ctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
static void discard_all_words(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_queue_entry *cur = fx_queue_pop_front(&ctx->lex_words);
|
||||
while (cur) {
|
||||
struct lex_token *tok
|
||||
= fx_unbox(struct lex_token, cur, tok_entry);
|
||||
lex_token_destroy(tok);
|
||||
cur = fx_queue_pop_front(&ctx->lex_words);
|
||||
}
|
||||
}
|
||||
|
||||
struct lex_token *lex_ctx_peek(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = get_next_token(ctx);
|
||||
@@ -1081,21 +1058,6 @@ struct lex_token *lex_ctx_peek(struct lex_ctx *ctx)
|
||||
return tok;
|
||||
}
|
||||
|
||||
discard_all_words(ctx);
|
||||
pump_tokens(ctx);
|
||||
tok = get_next_token(ctx);
|
||||
|
||||
return tok;
|
||||
}
|
||||
|
||||
struct lex_token *lex_ctx_peek_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = get_next_word(ctx);
|
||||
if (tok) {
|
||||
return tok;
|
||||
}
|
||||
|
||||
discard_all_tokens(ctx);
|
||||
pump_tokens(ctx);
|
||||
tok = get_next_token(ctx);
|
||||
|
||||
@@ -1106,13 +1068,6 @@ struct lex_token *lex_ctx_claim(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = dequeue_next_token(ctx);
|
||||
if (tok) {
|
||||
struct lex_token *tmp = get_next_token(ctx);
|
||||
|
||||
if (tmp && tmp->tok_type == TOK_LINEFEED) {
|
||||
tmp = dequeue_next_word(ctx);
|
||||
lex_token_destroy(tmp);
|
||||
}
|
||||
|
||||
return tok;
|
||||
}
|
||||
|
||||
@@ -1125,39 +1080,11 @@ struct lex_token *lex_ctx_claim(struct lex_ctx *ctx)
|
||||
return dequeue_next_token(ctx);
|
||||
}
|
||||
|
||||
struct lex_token *lex_ctx_claim_word(struct lex_ctx *ctx)
|
||||
{
|
||||
/* since we're claiming the whole word, discard any sub-tokens already
|
||||
* generated up to the next linefeed */
|
||||
discard_all_tokens(ctx);
|
||||
struct lex_token *tok = dequeue_next_word(ctx);
|
||||
if (tok) {
|
||||
return tok;
|
||||
}
|
||||
|
||||
if (fx_queue_empty(&ctx->lex_words)) {
|
||||
pump_tokens(ctx);
|
||||
|
||||
tok = get_next_token(ctx);
|
||||
}
|
||||
|
||||
return dequeue_next_word(ctx);
|
||||
}
|
||||
|
||||
void lex_ctx_discard(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = dequeue_next_token(ctx);
|
||||
if (tok) {
|
||||
lex_token_destroy(tok);
|
||||
tok = get_next_token(ctx);
|
||||
|
||||
/* if the next token is a linefeed, we've reached the end
|
||||
* of the current word, and should discard it */
|
||||
if (tok && tok->tok_type == TOK_LINEFEED) {
|
||||
tok = dequeue_next_word(ctx);
|
||||
lex_token_destroy(tok);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_LEFT_BRACE:
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
case SYM_RIGHT_BRACE:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (sym->enabled_states & LEX_STATE_COMMAND) {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
} else if (sym->enabled_states & LEX_STATE_ARITHMETIC) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status statement_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_keyword(word);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(word);
|
||||
}
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return statement_symbol(ctx);
|
||||
}
|
||||
|
||||
return statement_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_statement_state = {
|
||||
.s_id = LEX_STATE_STATEMENT,
|
||||
.s_pump_token = statement_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,136 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
status = push_symbol(ctx, sym->id);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DQUOTE:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
static enum bshell_status string_content(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = FX_WCHAR_INVALID;
|
||||
fx_string *temp = lex_state_get_tempstr(ctx);
|
||||
fx_string_clear(temp);
|
||||
|
||||
while (1) {
|
||||
c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
/* EOF without end of string */
|
||||
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
fx_string_append_wc(temp, c);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_STRING,
|
||||
fx_string_get_cstr(temp));
|
||||
enqueue_token(ctx, tok);
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status string_begin(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_STR_START);
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status string_end(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_STR_END);
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status string_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return string_symbol(ctx);
|
||||
}
|
||||
|
||||
return string_content(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_string_state = {
|
||||
.s_id = LEX_STATE_STRING,
|
||||
.s_begin = string_begin,
|
||||
.s_end = string_end,
|
||||
.s_pump_token = string_pump_token,
|
||||
};
|
||||
@@ -105,6 +105,8 @@ const char *token_keyword_to_string(enum token_keyword keyword)
|
||||
switch (keyword) {
|
||||
ENUM_STR(KW_NONE);
|
||||
ENUM_STR(KW_FUNC);
|
||||
ENUM_STR(KW_IF);
|
||||
ENUM_STR(KW_ELSE);
|
||||
default:
|
||||
return "<unknown>";
|
||||
}
|
||||
|
||||
@@ -30,6 +30,8 @@ enum token_keyword {
|
||||
KW_NONE = 0,
|
||||
__KW_INDEX_BASE = 200,
|
||||
KW_FUNC,
|
||||
KW_IF,
|
||||
KW_ELSE,
|
||||
__KW_INDEX_LIMIT,
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user