parse: lex: replace expression scanner with statement; implement complex-word scanner
also fix a bunch of scanning edge-cases
This commit is contained in:
+16
-5
@@ -2,24 +2,34 @@
|
||||
#define LEX_H_
|
||||
|
||||
#include "../status.h"
|
||||
#include "token.h"
|
||||
|
||||
#include <fx/queue.h>
|
||||
#include <fx/string.h>
|
||||
#include <fx/stringstream.h>
|
||||
|
||||
struct lex_token;
|
||||
struct line_source;
|
||||
|
||||
enum lex_flags {
|
||||
LEX_PRINT_TOKENS = 0x01u,
|
||||
};
|
||||
|
||||
enum lex_token_flags {
|
||||
/* a token with this flag not only interrupts the word currently being
|
||||
* scanned, but also stops multi-words */
|
||||
LEX_TOKEN_TERMINATES_WORD = 0x01u,
|
||||
/* a token with this flag can appear at the start of an arithmetic
|
||||
* expression. a statement that encounters this token as its first char
|
||||
* will switch to arithmetic mode */
|
||||
LEX_TOKEN_UNARY_ARITHMETIC = 0x02u,
|
||||
};
|
||||
|
||||
enum lex_state_type_id {
|
||||
LEX_STATE_STATEMENT = 0x01u,
|
||||
LEX_STATE_EXPRESSION = 0x02u,
|
||||
LEX_STATE_COMMAND = 0x04u,
|
||||
LEX_STATE_ARITHMETIC = 0x08u,
|
||||
LEX_STATE_STRING = 0x10u,
|
||||
LEX_STATE_COMMAND = 0x02u,
|
||||
LEX_STATE_ARITHMETIC = 0x04u,
|
||||
LEX_STATE_STRING = 0x08u,
|
||||
LEX_STATE_WORD = 0x10u,
|
||||
};
|
||||
|
||||
struct lex_token_def {
|
||||
@@ -27,6 +37,7 @@ struct lex_token_def {
|
||||
const char *name;
|
||||
uint64_t name_hash;
|
||||
enum lex_state_type_id enabled_states;
|
||||
enum lex_token_flags flags;
|
||||
};
|
||||
|
||||
struct lex_symbol_node {
|
||||
|
||||
@@ -1,5 +1,49 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (!fx_wchar_is_alnum(c)) {
|
||||
push_symbol(ctx, SYM_HYPHEN);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
enum bshell_status status = read_word(
|
||||
ctx,
|
||||
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
|
||||
&tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_int(tok);
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
||||
* tokens are detected properly), the resulting number will be
|
||||
* negative.
|
||||
* this token will be preceded by a HYPHEN token, so the number
|
||||
* must be positive */
|
||||
tok->tok_int *= -1;
|
||||
push_symbol(ctx, SYM_HYPHEN);
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
converted = convert_word_to_operator(ctx, tok);
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
@@ -18,7 +62,8 @@ static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HYPHEN:
|
||||
return arithmetic_hyphen(ctx);
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
|
||||
@@ -1,5 +1,26 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static bool char_can_continue_word(struct lex_ctx *ctx, fx_wchar c)
|
||||
{
|
||||
if (fx_wchar_is_alnum(c)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (c == '$') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
@@ -33,6 +54,10 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
||||
return status;
|
||||
}
|
||||
|
||||
if (char_can_continue_word(ctx, peek_char(ctx))) {
|
||||
lex_state_push(ctx, LEX_STATE_WORD, 0);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
@@ -49,6 +74,10 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
||||
return status;
|
||||
}
|
||||
|
||||
if (char_can_continue_word(ctx, peek_char(ctx))) {
|
||||
lex_state_push(ctx, LEX_STATE_WORD, 0);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
@@ -91,11 +120,27 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
||||
static enum bshell_status command_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
enum bshell_status status
|
||||
= read_word(ctx, READ_NO_NUMBER_RECOGNITION, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool continue_word = false;
|
||||
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) {
|
||||
continue_word = true;
|
||||
}
|
||||
|
||||
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
|
||||
continue_word = false;
|
||||
}
|
||||
|
||||
if (continue_word) {
|
||||
lex_state_push(ctx, LEX_STATE_WORD, 0);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status expression_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status expression_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_int(word);
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status expression_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return expression_symbol(ctx);
|
||||
}
|
||||
|
||||
return expression_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_expression_state = {
|
||||
.s_id = LEX_STATE_EXPRESSION,
|
||||
.s_pump_token = expression_pump_token,
|
||||
};
|
||||
@@ -11,6 +11,12 @@ enum state_flags {
|
||||
STATEMENT_F_DISABLE_KEYWORDS = 0x01u,
|
||||
};
|
||||
|
||||
enum read_flags {
|
||||
READ_APPEND_HYPHEN = 0x01u,
|
||||
READ_NO_SET_TOKEN_START = 0x02u,
|
||||
READ_NO_NUMBER_RECOGNITION = 0x04u,
|
||||
};
|
||||
|
||||
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
|
||||
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
|
||||
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
|
||||
@@ -42,12 +48,17 @@ extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
|
||||
|
||||
extern fx_wchar peek_char(struct lex_ctx *ctx);
|
||||
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
|
||||
extern fx_wchar peek2_char(struct lex_ctx *ctx);
|
||||
extern fx_wchar peek2_char_noread(struct lex_ctx *ctx);
|
||||
extern void advance_char(struct lex_ctx *ctx);
|
||||
extern void advance_char_noread(struct lex_ctx *ctx);
|
||||
|
||||
extern bool string_is_valid_number(const char *s, long long *out);
|
||||
extern bool convert_word_to_int(struct lex_token *tok);
|
||||
extern bool convert_word_to_keyword(struct lex_token *tok);
|
||||
extern bool convert_word_to_operator(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token *tok);
|
||||
|
||||
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
|
||||
extern void enqueue_token_with_coordinates(
|
||||
@@ -58,6 +69,7 @@ extern void enqueue_token_with_coordinates(
|
||||
|
||||
extern enum bshell_status read_word(
|
||||
struct lex_ctx *ctx,
|
||||
enum read_flags flags,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
@@ -84,5 +96,12 @@ extern bool char_can_begin_symbol_in_state(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum lex_state_type_id state_type);
|
||||
extern bool char_has_flags(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum lex_token_flags flags);
|
||||
extern enum token_operator get_operator_with_string(
|
||||
struct lex_ctx *ctx,
|
||||
const char *s);
|
||||
|
||||
#endif
|
||||
|
||||
+196
-202
@@ -6,6 +6,8 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
#define LEX_TOKEN_DEF(i, n, s) {.id = (i), .name = (n), .enabled_states = (s)}
|
||||
#define LEX_TOKEN_DEF2(i, n, s, f) \
|
||||
{.id = (i), .name = (n), .enabled_states = (s), .flags = (f)}
|
||||
|
||||
#define CONVERSION_REQUESTED(flags) \
|
||||
((flags) & (LEX_ENABLE_INT | LEX_ENABLE_KEYWORD))
|
||||
@@ -17,20 +19,68 @@ static struct lex_token_def keywords[] = {
|
||||
};
|
||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||
|
||||
#define LEX_STATES(states) (LEX_STATE_STATEMENT | LEX_STATE_EXPRESSION | states)
|
||||
static struct lex_token_def operators[] = {
|
||||
LEX_TOKEN_DEF(OP_BAND, "-band", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_BOR, "-bor", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_BXOR, "-bxor", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
OP_BNOT,
|
||||
"-bnot",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_SHL, "-shl", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_SHR, "-shr", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_EQ, "-eq", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_NE, "-ne", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_GT, "-gt", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_LT, "-lt", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_GE, "-ge", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_LE, "-le", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_MATCH, "-match", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_NOTMATCH, "-notmatch", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_REPLACE, "-replace", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_LIKE, "-like", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_NOTLIKE, "-notlike", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_CONTAINS, "-contains", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_NOTCONTAINS, "-notcontains", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_AND, "-and", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_OR, "-OR", LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_XOR, "-xor", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
OP_NOT,
|
||||
"-not",
|
||||
LEX_STATE_STATEMENT | LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_SPLIT, "-split", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_JOIN, "-join", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_IS, "-is", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_ISNOT, "-isnot", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(OP_AS, "-as", LEX_STATE_ARITHMETIC),
|
||||
};
|
||||
static const size_t nr_operators = sizeof operators / sizeof operators[0];
|
||||
|
||||
#define LEX_STATES(states) (LEX_STATE_STATEMENT | states)
|
||||
#define LEX_STATE_ALL \
|
||||
(LEX_STATE_ARITHMETIC | LEX_STATE_STATEMENT | LEX_STATE_COMMAND \
|
||||
| LEX_STATE_STRING | LEX_STATE_EXPRESSION)
|
||||
| LEX_STATE_STRING | LEX_STATE_WORD)
|
||||
|
||||
static struct lex_token_def symbols[] = {
|
||||
LEX_TOKEN_DEF(SYM_PLUS, "+", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN, "-", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_PLUS,
|
||||
"+",
|
||||
LEX_STATE_ARITHMETIC,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_HYPHEN,
|
||||
"-",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_FORWARD_SLASH, "/", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_ASTERISK, "*", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_AMPERSAND,
|
||||
"&",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
||||
LEX_STATES(
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
||||
| LEX_STATE_WORD)),
|
||||
LEX_TOKEN_DEF(SYM_PERCENT, "%", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_SQUOTE,
|
||||
@@ -40,26 +90,62 @@ static struct lex_token_def symbols[] = {
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_HASH,
|
||||
"#",
|
||||
LEX_STATES(LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND)),
|
||||
LEX_TOKEN_DEF(
|
||||
SYM_DOLLAR,
|
||||
"$",
|
||||
LEX_STATES(
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND
|
||||
| LEX_STATE_STRING)),
|
||||
LEX_TOKEN_DEF(SYM_DOLLAR_LEFT_PAREN, "$(", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_DOLLAR_LEFT_BRACE, "${", LEX_STATE_ALL),
|
||||
| LEX_STATE_WORD)),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_DOLLAR,
|
||||
"$",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
"$(",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
"${",
|
||||
LEX_STATE_ARITHMETIC | LEX_STATE_COMMAND | LEX_STATE_STRING
|
||||
| LEX_STATE_WORD,
|
||||
LEX_TOKEN_UNARY_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_AT, "@", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_PIPE, "|", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_COMMA, ",", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_SEMICOLON, ";", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF2(SYM_PIPE, "|", LEX_STATE_ALL, LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_COMMA,
|
||||
",",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_SEMICOLON,
|
||||
";",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF(SYM_AT_LEFT_BRACE, "@{", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_BRACE, "{", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACE, "}", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_LEFT_BRACE,
|
||||
"{",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_RIGHT_BRACE,
|
||||
"}",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_BRACKET, "[", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_BRACKET, "]", LEX_STATES(LEX_STATE_ARITHMETIC)),
|
||||
LEX_TOKEN_DEF(SYM_LEFT_PAREN, "(", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF(SYM_RIGHT_PAREN, ")", LEX_STATE_ALL),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_LEFT_PAREN,
|
||||
"(",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF2(
|
||||
SYM_RIGHT_PAREN,
|
||||
")",
|
||||
LEX_STATE_ALL,
|
||||
LEX_TOKEN_TERMINATES_WORD),
|
||||
LEX_TOKEN_DEF(SYM_EQUAL, "=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_PLUS_EQUAL, "+=", LEX_STATE_ARITHMETIC),
|
||||
LEX_TOKEN_DEF(SYM_HYPHEN_EQUAL, "-=", LEX_STATE_ARITHMETIC),
|
||||
@@ -70,17 +156,17 @@ static struct lex_token_def symbols[] = {
|
||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||
|
||||
extern const struct lex_state_type lex_statement_state;
|
||||
extern const struct lex_state_type lex_expression_state;
|
||||
extern const struct lex_state_type lex_command_state;
|
||||
extern const struct lex_state_type lex_arithmetic_state;
|
||||
extern const struct lex_state_type lex_string_state;
|
||||
extern const struct lex_state_type lex_word_state;
|
||||
|
||||
static const struct lex_state_type *state_types[] = {
|
||||
[LEX_STATE_STATEMENT] = &lex_statement_state,
|
||||
[LEX_STATE_EXPRESSION] = &lex_expression_state,
|
||||
[LEX_STATE_COMMAND] = &lex_command_state,
|
||||
[LEX_STATE_ARITHMETIC] = &lex_arithmetic_state,
|
||||
[LEX_STATE_STRING] = &lex_string_state,
|
||||
[LEX_STATE_WORD] = &lex_word_state,
|
||||
};
|
||||
|
||||
void set_token_start(struct lex_ctx *ctx)
|
||||
@@ -411,6 +497,22 @@ bool convert_word_to_keyword(struct lex_token *tok)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool convert_word_to_operator(struct lex_ctx *ctx, struct lex_token *tok)
|
||||
{
|
||||
if (!lex_token_has_string_value(tok)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
enum token_operator op = get_operator_with_string(ctx, tok->tok_str);
|
||||
if (op == OP_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lex_token_change_type(tok, TOK_OPERATOR);
|
||||
tok->tok_operator = op;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int get_int_base_by_prefix(const char **s)
|
||||
{
|
||||
#define CH(x) (tolower(value[x]))
|
||||
@@ -461,6 +563,10 @@ static size_t get_int_multiplier_by_suffix(const char *suffix)
|
||||
|
||||
bool string_is_valid_number(const char *s, long long *out)
|
||||
{
|
||||
if (s[0] == '\0') {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int base = get_int_base_by_prefix(&s);
|
||||
|
||||
char *ep = NULL;
|
||||
@@ -636,93 +742,6 @@ enum bshell_status read_braced_var(
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static enum bshell_status read_flag(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '{':
|
||||
case '}':
|
||||
case '(':
|
||||
case ')':
|
||||
case ';':
|
||||
case ',':
|
||||
case '|':
|
||||
case '&':
|
||||
case '$':
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
if (fx_string_get_size(tmp, FX_STRLEN_NORMAL) == 1) {
|
||||
tok = lex_token_create(TOK_SYMBOL);
|
||||
tok->tok_symbol = SYM_HYPHEN;
|
||||
} else {
|
||||
tok = lex_token_create_with_string(
|
||||
TOK_FLAG,
|
||||
fx_string_get_cstr(tmp));
|
||||
}
|
||||
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (convert_word_to_int(tok)) {
|
||||
tok->tok_int *= -1;
|
||||
struct lex_token *prefix = lex_token_create(TOK_SYMBOL);
|
||||
prefix->tok_symbol = SYM_HYPHEN;
|
||||
enqueue_token(ctx, prefix);
|
||||
}
|
||||
#endif
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status read_interpolation_marker(struct lex_ctx *ctx)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
if (state->s_type != LEX_STATE_STRING) {
|
||||
return BSHELL_ERR_INTERNAL_FAILURE;
|
||||
}
|
||||
|
||||
/* start of a new interpolation */
|
||||
if (!lex_state_push(ctx, LEX_STATE_STATEMENT)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum bshell_status read_literal_string(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token **out)
|
||||
@@ -776,39 +795,25 @@ enum bshell_status read_line_comment(struct lex_ctx *lex)
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
enum bshell_status read_dquote_marker(struct lex_ctx *ctx)
|
||||
{
|
||||
enum bshell_status status = BSHELL_SUCCESS;
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
if (state->s_type == LEX_STATE_STRING) {
|
||||
/* already within an fstring */
|
||||
lex_state_pop(ctx);
|
||||
tok = lex_token_create(TOK_STR_END);
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
/* start of a new fstring */
|
||||
tok = lex_token_create(TOK_STR_START);
|
||||
enqueue_token(ctx, tok);
|
||||
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
|
||||
enum bshell_status read_word(
|
||||
struct lex_ctx *ctx,
|
||||
enum read_flags flags,
|
||||
struct lex_token **out)
|
||||
{
|
||||
fx_string *tmp = get_temp_string(ctx);
|
||||
bool word_is_number = false;
|
||||
|
||||
if (!(flags & READ_NO_SET_TOKEN_START)) {
|
||||
set_token_start(ctx);
|
||||
}
|
||||
|
||||
if (flags & READ_APPEND_HYPHEN) {
|
||||
fx_string_append_c(tmp, '-');
|
||||
}
|
||||
|
||||
bool number_recog = !(flags & READ_NO_NUMBER_RECOGNITION);
|
||||
|
||||
enum token_operator op = OP_NONE;
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
fx_wchar c = peek_char(ctx);
|
||||
@@ -821,39 +826,32 @@ enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
|
||||
break;
|
||||
}
|
||||
|
||||
if (word_is_number && char_can_begin_symbol(ctx, c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '{':
|
||||
case '}':
|
||||
case '(':
|
||||
case ')':
|
||||
case ';':
|
||||
case ',':
|
||||
case '|':
|
||||
case '&':
|
||||
case '$':
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
const char *s = fx_string_get_cstr(tmp);
|
||||
if (number_recog && string_is_valid_number(s, NULL)) {
|
||||
if (char_can_begin_symbol_in_state(
|
||||
ctx,
|
||||
c,
|
||||
LEX_STATE_ARITHMETIC)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (done) {
|
||||
break;
|
||||
if (!fx_wchar_is_alpha(c)) {
|
||||
op = get_operator_with_string(ctx, s);
|
||||
if (op != OP_NONE) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
word_is_number
|
||||
= string_is_valid_number(fx_string_get_cstr(tmp), NULL);
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
@@ -868,12 +866,6 @@ enum bshell_status read_word(struct lex_ctx *ctx, struct lex_token **out)
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_WORD,
|
||||
fx_string_get_cstr(tmp));
|
||||
#if 0
|
||||
bool converted = convert_word_to_keyword(tok);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(tok);
|
||||
}
|
||||
#endif
|
||||
|
||||
*out = tok;
|
||||
return BSHELL_SUCCESS;
|
||||
@@ -912,40 +904,6 @@ enum bshell_status read_symbol(
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
#if 0
|
||||
struct lex_token *tok = NULL;
|
||||
switch (node->s_def->id) {
|
||||
case SYM_SQUOTE:
|
||||
return read_literal_string(ctx);
|
||||
case SYM_DQUOTE:
|
||||
return read_dquote_marker(ctx);
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
push_symbol(ctx, SYM_DOLLAR_LEFT_PAREN);
|
||||
if (state->s_type == LEX_STATE_STRING) {
|
||||
lex_state_push(ctx, LEX_STATE_STRING);
|
||||
}
|
||||
break;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
return read_braced_var(ctx, TOK_VAR);
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_LEFT_PAREN:
|
||||
push_symbol(ctx, SYM_LEFT_PAREN);
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
break;
|
||||
case SYM_RIGHT_PAREN:
|
||||
push_symbol(ctx, SYM_RIGHT_PAREN);
|
||||
lex_state_pop(ctx);
|
||||
break;
|
||||
case SYM_DOLLAR:
|
||||
return read_var(ctx, TOK_VAR);
|
||||
case SYM_AT:
|
||||
return read_var(ctx, TOK_VAR_SPLAT);
|
||||
default:
|
||||
push_symbol(ctx, node->s_def->id);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
*out = node->s_def;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
@@ -974,6 +932,42 @@ bool char_can_begin_symbol(struct lex_ctx *ctx, char c)
|
||||
return char_can_begin_symbol_in_state(ctx, c, state->s_type->s_id);
|
||||
}
|
||||
|
||||
extern bool char_has_flags(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum lex_token_flags flags)
|
||||
{
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (symbols[i].name[0] != c) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return (symbols[i].flags & flags) == flags;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
enum token_operator get_operator_with_string(struct lex_ctx *ctx, const char *s)
|
||||
{
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
for (size_t i = 0; i < nr_operators; i++) {
|
||||
const char *op_str = operators[i].name;
|
||||
if (strcmp(op_str, s) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(operators[i].enabled_states & state->s_type->s_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return operators[i].id;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static enum bshell_status read_string_content(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = FX_WCHAR_INVALID;
|
||||
|
||||
@@ -1,5 +1,126 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
#if 0
|
||||
#define APPEND_HYPHEN 0x8000u
|
||||
|
||||
static enum bshell_status __read_word(
|
||||
struct lex_ctx *ctx,
|
||||
int flags,
|
||||
struct lex_token **out)
|
||||
{
|
||||
fx_string *tmp = lex_state_get_tempstr(ctx);
|
||||
fx_string_clear(tmp);
|
||||
bool word_is_number = false;
|
||||
|
||||
if (flags & APPEND_HYPHEN) {
|
||||
fx_string_append_c(tmp, '-');
|
||||
}
|
||||
|
||||
if (!(flags & READ_NO_SET_TOKEN_START)) {
|
||||
set_token_start(ctx);
|
||||
}
|
||||
|
||||
enum token_operator op = OP_NONE;
|
||||
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
op = get_operator_with_string(
|
||||
ctx,
|
||||
fx_string_get_cstr(tmp));
|
||||
if (op != OP_NONE) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fx_string_append_wc(tmp, c);
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
if (fx_string_get_size(tmp, FX_STRLEN_NORMAL) == 0) {
|
||||
if (ctx->lex_status == BSHELL_SUCCESS) {
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
return ctx->lex_status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
if (op != OP_NONE) {
|
||||
tok = lex_token_create(TOK_OPERATOR);
|
||||
tok->tok_operator = op;
|
||||
} else {
|
||||
tok = lex_token_create_with_string(
|
||||
TOK_WORD,
|
||||
fx_string_get_cstr(tmp));
|
||||
}
|
||||
|
||||
*out = tok;
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
if (!fx_wchar_is_alnum(c)) {
|
||||
push_symbol(ctx, SYM_HYPHEN);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
enum bshell_status status = read_word(
|
||||
ctx,
|
||||
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
|
||||
&tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_int(tok);
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
/* because of APPEND_HYPHEN (which is needed to ensure operator
|
||||
* tokens are detected properly), the resulting number will be
|
||||
* negative.
|
||||
* this token will be preceded by a HYPHEN token, so the number
|
||||
* must be positive */
|
||||
tok->tok_int *= -1;
|
||||
push_symbol(ctx, SYM_HYPHEN);
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
converted = convert_word_to_operator(ctx, tok);
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
@@ -11,6 +132,8 @@ static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_HYPHEN:
|
||||
return statement_hyphen(ctx);
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
@@ -117,7 +240,14 @@ static enum bshell_status statement_word(struct lex_ctx *ctx)
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_keyword(word);
|
||||
struct lex_state *state = lex_state_get(ctx);
|
||||
|
||||
bool converted = false;
|
||||
|
||||
if (!(state->s_flags & STATEMENT_F_DISABLE_KEYWORDS)) {
|
||||
converted = convert_word_to_keyword(word);
|
||||
}
|
||||
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(word);
|
||||
}
|
||||
@@ -154,10 +284,35 @@ static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_ARITHMETIC)) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_COMMAND)) {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return statement_symbol(ctx);
|
||||
}
|
||||
|
||||
if (char_has_flags(ctx, c, LEX_TOKEN_UNARY_ARITHMETIC)) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (fx_wchar_is_number(c)) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
#endif
|
||||
|
||||
return statement_word(ctx);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status word_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
status = push_symbol(ctx, sym->id);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
|
||||
status = push_symbol(ctx, sym->id);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
static enum bshell_status word_content(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = FX_WCHAR_INVALID;
|
||||
fx_string *temp = lex_state_get_tempstr(ctx);
|
||||
set_token_start(ctx);
|
||||
fx_string_clear(temp);
|
||||
|
||||
while (1) {
|
||||
c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
/* EOF without end of word */
|
||||
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
fx_string_append_wc(temp, c);
|
||||
set_token_end(ctx);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_WORD,
|
||||
fx_string_get_cstr(temp));
|
||||
enqueue_token(ctx, tok);
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status word_begin(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_WORD_START);
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
enqueue_token_with_coordinates(
|
||||
ctx,
|
||||
tok,
|
||||
&ctx->lex_start,
|
||||
&ctx->lex_start);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status word_end(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_WORD_END);
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
enqueue_token_with_coordinates(ctx, tok, &ctx->lex_end, &ctx->lex_end);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status word_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
|
||||
if (fx_wchar_is_space(c)) {
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return word_symbol(ctx);
|
||||
}
|
||||
|
||||
return word_content(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_word_state = {
|
||||
.s_id = LEX_STATE_WORD,
|
||||
.s_begin = word_begin,
|
||||
.s_end = word_end,
|
||||
.s_pump_token = word_pump_token,
|
||||
};
|
||||
@@ -75,6 +75,19 @@ struct lex_token *lex_token_change_type(
|
||||
return tok;
|
||||
}
|
||||
|
||||
void lex_token_change_string(struct lex_token *tok, const char *s)
|
||||
{
|
||||
if (!lex_token_has_string_value(tok)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tok->tok_str) {
|
||||
free(tok->tok_str);
|
||||
}
|
||||
|
||||
tok->tok_str = fx_strdup(s);
|
||||
}
|
||||
|
||||
#define ENUM_STR(x) \
|
||||
case x: \
|
||||
return #x
|
||||
@@ -88,6 +101,9 @@ const char *token_type_to_string(enum token_type type)
|
||||
ENUM_STR(TOK_INT);
|
||||
ENUM_STR(TOK_DOUBLE);
|
||||
ENUM_STR(TOK_WORD);
|
||||
ENUM_STR(TOK_WORD_START);
|
||||
ENUM_STR(TOK_WORD_END);
|
||||
ENUM_STR(TOK_OPERATOR);
|
||||
ENUM_STR(TOK_VAR);
|
||||
ENUM_STR(TOK_VAR_SPLAT);
|
||||
ENUM_STR(TOK_FLAG);
|
||||
@@ -148,3 +164,41 @@ const char *token_symbol_to_string(enum token_symbol sym)
|
||||
return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
const char *token_operator_to_string(enum token_operator op)
|
||||
{
|
||||
switch (op) {
|
||||
ENUM_STR(OP_BAND);
|
||||
ENUM_STR(OP_BOR);
|
||||
ENUM_STR(OP_BXOR);
|
||||
ENUM_STR(OP_BNOT);
|
||||
ENUM_STR(OP_SHL);
|
||||
ENUM_STR(OP_SHR);
|
||||
ENUM_STR(OP_EQ);
|
||||
ENUM_STR(OP_NE);
|
||||
ENUM_STR(OP_GT);
|
||||
ENUM_STR(OP_LT);
|
||||
ENUM_STR(OP_GE);
|
||||
ENUM_STR(OP_LE);
|
||||
ENUM_STR(OP_MATCH);
|
||||
ENUM_STR(OP_NOTMATCH);
|
||||
ENUM_STR(OP_REPLACE);
|
||||
ENUM_STR(OP_LIKE);
|
||||
ENUM_STR(OP_NOTLIKE);
|
||||
ENUM_STR(OP_IN);
|
||||
ENUM_STR(OP_NOTIN);
|
||||
ENUM_STR(OP_CONTAINS);
|
||||
ENUM_STR(OP_NOTCONTAINS);
|
||||
ENUM_STR(OP_AND);
|
||||
ENUM_STR(OP_OR);
|
||||
ENUM_STR(OP_XOR);
|
||||
ENUM_STR(OP_NOT);
|
||||
ENUM_STR(OP_SPLIT);
|
||||
ENUM_STR(OP_JOIN);
|
||||
ENUM_STR(OP_IS);
|
||||
ENUM_STR(OP_ISNOT);
|
||||
ENUM_STR(OP_AS);
|
||||
default:
|
||||
return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
+43
-1
@@ -16,7 +16,10 @@ enum token_type {
|
||||
TOK_INT,
|
||||
TOK_DOUBLE,
|
||||
TOK_WORD,
|
||||
TOK_WORD_START,
|
||||
TOK_WORD_END,
|
||||
TOK_FLAG,
|
||||
TOK_OPERATOR,
|
||||
TOK_VAR,
|
||||
TOK_VAR_SPLAT,
|
||||
TOK_STRING,
|
||||
@@ -35,9 +38,45 @@ enum token_keyword {
|
||||
__KW_INDEX_LIMIT,
|
||||
};
|
||||
|
||||
enum token_operator {
|
||||
OP_NONE = 0,
|
||||
__OP_INDEX_BASE = 300,
|
||||
OP_BAND,
|
||||
OP_BOR,
|
||||
OP_BXOR,
|
||||
OP_BNOT,
|
||||
OP_SHL,
|
||||
OP_SHR,
|
||||
OP_EQ,
|
||||
OP_NE,
|
||||
OP_GT,
|
||||
OP_LT,
|
||||
OP_GE,
|
||||
OP_LE,
|
||||
OP_MATCH,
|
||||
OP_NOTMATCH,
|
||||
OP_REPLACE,
|
||||
OP_LIKE,
|
||||
OP_NOTLIKE,
|
||||
OP_IN,
|
||||
OP_NOTIN,
|
||||
OP_CONTAINS,
|
||||
OP_NOTCONTAINS,
|
||||
OP_AND,
|
||||
OP_OR,
|
||||
OP_XOR,
|
||||
OP_NOT,
|
||||
OP_SPLIT,
|
||||
OP_JOIN,
|
||||
OP_IS,
|
||||
OP_ISNOT,
|
||||
OP_AS,
|
||||
__OP_INDEX_LIMIT,
|
||||
};
|
||||
|
||||
enum token_symbol {
|
||||
SYM_NONE = 0,
|
||||
__SYM_INDEX_BASE = 300,
|
||||
__SYM_INDEX_BASE = 400,
|
||||
SYM_PLUS,
|
||||
SYM_HYPHEN,
|
||||
SYM_FORWARD_SLASH,
|
||||
@@ -80,6 +119,7 @@ struct lex_token {
|
||||
union {
|
||||
enum token_keyword tok_keyword;
|
||||
enum token_symbol tok_symbol;
|
||||
enum token_operator tok_operator;
|
||||
long long tok_int;
|
||||
double tok_double;
|
||||
char *tok_str;
|
||||
@@ -95,6 +135,7 @@ extern void lex_token_destroy(struct lex_token *tok);
|
||||
extern struct lex_token *lex_token_change_type(
|
||||
struct lex_token *tok,
|
||||
enum token_type new_type);
|
||||
extern void lex_token_change_string(struct lex_token *tok, const char *s);
|
||||
|
||||
static inline bool lex_token_is_symbol(
|
||||
struct lex_token *tok,
|
||||
@@ -129,5 +170,6 @@ static inline bool lex_token_has_string_value(const struct lex_token *tok)
|
||||
extern const char *token_type_to_string(enum token_type type);
|
||||
extern const char *token_keyword_to_string(enum token_keyword keyword);
|
||||
extern const char *token_symbol_to_string(enum token_symbol sym);
|
||||
extern const char *token_operator_to_string(enum token_operator op);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user