Compare commits

...

9 Commits

34 changed files with 3743 additions and 2 deletions
+27 -1
View File
@@ -14,8 +14,29 @@ syn keyword bshellKeyword func
syn keyword bshellTodo contained TODO FIXME XXX NOTE HACK TBD
syn match bshellLineComment /#.*$/ contains=bshellTodo
syn region bshellInterpolatedString matchgroup=bshellString start=+"+ end=+"+ extend contains=bshellVariable
syn region bshellInterpolation matchgroup=bshellInterpolationDelimiter start=+$(+ end=+)+ keepend contained contains=@bshellAll
syn region bshellInterpolation matchgroup=bshellInterpolationDelimiter start="$(" end=")" contained contains=ALL
syn region bshellInterpolatedString matchgroup=bshellString start=+"+ end=+"+ extend contains=bshellVariable,bshellInterpolation
syn region bshellLiteralString matchgroup=bshellString start=+\'+ end=+\'+ extend contains=bshellSpecialChar,bshellSpecialError,bshellUnicodeNumber,@Spell
syn region bshell1NestedParentheses start="(" skip="\\\\\|\\)" matchgroup=bshellInterpolation end=")" transparent contained
syn case ignore
syn match bshellInteger "\<0b[01_]*[01]\%([lu]\|lu\|ul\)\=\>" display
syn match bshellInteger "\<\d\+\%(_\+\d\+\)*\%([lu]\|lu\|ul\)\=\>" display
syn match bshellInteger "\<-\d\+\%(_\+\d\+\)*\%([lu]\|lu\|ul\)\=\>" display
syn match bshellInteger "\<0x[[:xdigit:]_]*\x\%([lu]\|lu\|ul\)\=\>" display
syn match bshellReal "\<\d\+\%(_\+\d\+\)*\.\d\+\%(_\+\d\+\)*\%\(e[-+]\=\d\+\%(_\+\d\+\)*\)\=[fdm]\=" display
syn match bshellReal "\.\d\+\%(_\+\d\+\)*\%(e[-+]\=\d\+\%(_\+\d\+\)*\)\=[fdm]\=\>" display
syn match bshellReal "\<\d\+\%(_\+\d\+\)*e[-+]\=\d\+\%(_\+\d\+\)*[fdm]\=\>" display
syn match bshellReal "\<\d\+\%(_\+\d\+\)*[fdm]\>" display
syn case match
syn cluster bshellNumber contains=bshellInteger,bshellReal
syn cluster bshellLiteral contains=@bshellNumber,@bshellString
syn cluster bshellAll contains=@bshellLiteral,bshellVariable,bshellKeyword,bshellArgFlag,bshellFunctionRef,bshellSymbolOp,bshellKeywordOp
syn match bshellSymbolOp "[+]" display
syn keyword bshellKeywordOp is not understands and or
hi def link bshellKeyword Statement
hi def link bshellArgFlag Tag
@@ -25,7 +46,12 @@ hi def link bshellFunctionRef Function
hi def link bshellString String
hi def link bshellInterpolatedString String
hi def link bshellLiteralString String
hi def link bshellInteger Number
hi def link bshellReal Float
hi def link bshellInterpolationDelimiter Delimiter
hi def link bshellSymbolOp Operator
hi def link bshellKeywordOp Operator
" The default highlighting.
" hi def link bshellUnspecifiedStatement Statement
+166
View File
@@ -0,0 +1,166 @@
#include "ast.h"
#include "../status.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
extern struct ast_node_definition int_ast_node;
extern struct ast_node_definition double_ast_node;
extern struct ast_node_definition word_ast_node;
extern struct ast_node_definition var_ast_node;
extern struct ast_node_definition string_ast_node;
extern struct ast_node_definition fstring_ast_node;
extern struct ast_node_definition cmdcall_ast_node;
extern struct ast_node_definition pipeline_ast_node;
extern struct ast_node_definition redirection_ast_node;
static const struct ast_node_definition *ast_node_defintions[] = {
[AST_INT] = &int_ast_node,
[AST_DOUBLE] = &double_ast_node,
[AST_WORD] = &word_ast_node,
[AST_VAR] = &var_ast_node,
[AST_STRING] = &string_ast_node,
[AST_FSTRING] = &fstring_ast_node,
[AST_CMDCALL] = &cmdcall_ast_node,
[AST_PIPELINE] = &pipeline_ast_node,
[AST_REDIRECTION] = &redirection_ast_node,
};
static const size_t nr_ast_node_definitions
= sizeof ast_node_defintions / sizeof ast_node_defintions[0];
struct ast_node *ast_node_create(enum ast_node_type type)
{
assert(type < nr_ast_node_definitions);
const struct ast_node_definition *def = ast_node_defintions[type];
struct ast_node *out = malloc(def->def_node_size);
if (!out) {
return NULL;
}
memset(out, 0x0, def->def_node_size);
out->n_type = type;
return out;
}
void ast_node_destroy(struct ast_node *node)
{
assert(node->n_type < nr_ast_node_definitions);
struct ast_iterator it = {0};
ast_iterator_enqueue(&it, node);
while (1) {
node = ast_iterator_peek(&it);
if (!node) {
break;
}
const struct ast_node_definition *def
= ast_node_defintions[node->n_type];
if (def->def_cleanup) {
def->def_cleanup(node);
}
ast_iterator_dequeue(&it);
free(node);
}
}
void ast_node_iterate(struct ast_node *node, struct ast_iterator *it)
{
ast_iterator_enqueue(it, node);
}
void ast_node_to_string(const struct ast_node *node, fx_bstr *out)
{
const struct ast_node_definition *def
= ast_node_defintions[node->n_type];
if (def->def_to_string) {
def->def_to_string(node, out);
}
}
#define ENUM_STR(x) \
case x: \
return #x
const char *ast_node_type_to_string(enum ast_node_type type)
{
switch (type) {
ENUM_STR(AST_NONE);
ENUM_STR(AST_INT);
ENUM_STR(AST_DOUBLE);
ENUM_STR(AST_WORD);
ENUM_STR(AST_STRING);
ENUM_STR(AST_FSTRING);
ENUM_STR(AST_VAR);
ENUM_STR(AST_VAR_SPLAT);
ENUM_STR(AST_FLAG);
ENUM_STR(AST_CMDCALL);
ENUM_STR(AST_PIPELINE);
ENUM_STR(AST_REDIRECTION);
default:
return "<unknown>";
}
}
struct ast_node *ast_iterator_peek(struct ast_iterator *it)
{
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
if (!cur) {
return NULL;
}
return fx_unbox(struct ast_node, cur, n_it.e_entry);
}
struct ast_node *ast_iterator_dequeue(struct ast_iterator *it)
{
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
if (!cur) {
return NULL;
}
struct ast_node *node = fx_unbox(struct ast_node, cur, n_it.e_entry);
const struct ast_node_definition *def
= ast_node_defintions[node->n_type];
it->it_insert_after = cur;
if (def->def_collect_children) {
def->def_collect_children(node, it);
}
fx_queue_pop_front(&it->it_queue);
return fx_unbox(struct ast_node, cur, n_it.e_entry);
}
void ast_iterator_enqueue(struct ast_iterator *it, struct ast_node *node)
{
unsigned long new_depth = 0;
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
if (cur) {
struct ast_node *cur_node
= fx_unbox(struct ast_node, cur, n_it.e_entry);
new_depth = cur_node->n_it.e_depth + 1;
}
node->n_it.e_depth = new_depth;
if (!it->it_insert_after) {
fx_queue_push_back(&it->it_queue, &node->n_it.e_entry);
return;
}
fx_queue_insert_after(
&it->it_queue,
&node->n_it.e_entry,
it->it_insert_after);
it->it_insert_after = &node->n_it.e_entry;
}
+126
View File
@@ -0,0 +1,126 @@
#ifndef AST_H_
#define AST_H_
#include "../status.h"
#include <fx/bstr.h>
#include <fx/queue.h>
struct lex_token;
enum ast_node_type {
AST_NONE = 0x00u,
AST_INT,
AST_DOUBLE,
AST_WORD,
AST_STRING,
AST_FSTRING,
AST_VAR,
AST_VAR_SPLAT,
AST_FLAG,
AST_CMDCALL,
AST_PIPELINE,
AST_REDIRECTION,
};
struct ast_iterator_entry {
fx_queue_entry e_entry;
unsigned long e_depth;
};
struct ast_node {
enum ast_node_type n_type;
struct ast_node *n_parent;
fx_queue_entry n_entry;
struct ast_iterator_entry n_it;
};
struct int_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct double_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct word_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct string_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct fstring_ast_node {
struct ast_node n_base;
fx_queue n_elements;
};
struct var_ast_node {
struct ast_node n_base;
struct lex_token *n_ident;
};
struct var_splat_ast_node {
struct ast_node n_base;
struct lex_token *n_ident;
};
struct cmdcall_ast_node {
struct ast_node n_base;
fx_queue n_args;
fx_queue n_redirect;
};
struct pipeline_ast_node {
struct ast_node n_base;
fx_queue n_stages;
};
struct redirection_ast_node {
struct ast_node n_base;
bool n_append : 1;
bool n_out_is_fd : 1;
bool n_out_is_expr : 1;
unsigned int n_in, n_out;
struct ast_node *n_out_path_expr;
const char *n_out_path;
struct lex_token *n_out_tok;
};
struct ast_iterator {
struct ast_node *it_cur;
fx_queue it_queue;
unsigned int it_depth;
fx_queue_entry *it_insert_after;
};
struct ast_node_definition {
enum ast_node_type def_id;
size_t def_node_size;
enum bshell_status (*def_collect_children)(
struct ast_node *,
struct ast_iterator *);
enum bshell_status (*def_cleanup)(struct ast_node *);
void (*def_to_string)(const struct ast_node *, fx_bstr *);
};
extern struct ast_node *ast_node_create(enum ast_node_type type);
extern void ast_node_destroy(struct ast_node *node);
extern void ast_node_iterate(struct ast_node *node, struct ast_iterator *it);
extern void ast_node_to_string(const struct ast_node *node, fx_bstr *out);
extern const char *ast_node_type_to_string(enum ast_node_type type);
extern struct ast_node *ast_iterator_peek(struct ast_iterator *it);
extern struct ast_node *ast_iterator_dequeue(struct ast_iterator *it);
extern void ast_iterator_enqueue(
struct ast_iterator *it,
struct ast_node *node);
#endif
+31
View File
@@ -0,0 +1,31 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct cmdcall_ast_node *cmdcall = (struct cmdcall_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&cmdcall->n_args);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
cur = fx_queue_first(&cmdcall->n_redirect);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition cmdcall_ast_node = {
.def_id = AST_CMDCALL,
.def_node_size = sizeof(struct cmdcall_ast_node),
.def_collect_children = collect_children,
};
+6
View File
@@ -0,0 +1,6 @@
#include "ast.h"
struct ast_node_definition double_ast_node = {
.def_id = AST_DOUBLE,
.def_node_size = sizeof(struct double_ast_node),
};
+6
View File
@@ -0,0 +1,6 @@
#include "ast.h"
struct ast_node_definition fstring_ast_node = {
.def_id = AST_FSTRING,
.def_node_size = sizeof(struct fstring_ast_node),
};
+14
View File
@@ -0,0 +1,14 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
struct int_ast_node *i = (struct int_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%lld", i->n_value->tok_int);
}
struct ast_node_definition int_ast_node = {
.def_id = AST_INT,
.def_node_size = sizeof(struct int_ast_node),
.def_to_string = to_string,
};
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct pipeline_ast_node *pipeline = (struct pipeline_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&pipeline->n_stages);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition pipeline_ast_node = {
.def_id = AST_PIPELINE,
.def_node_size = sizeof(struct pipeline_ast_node),
.def_collect_children = collect_children,
};
+49
View File
@@ -0,0 +1,49 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct redirection_ast_node *redirection
= (struct redirection_ast_node *)node;
if (redirection->n_out_path_expr) {
ast_iterator_enqueue(it, redirection->n_out_path_expr);
}
return BSHELL_SUCCESS;
}
static void to_string(const struct ast_node *node, fx_bstr *out)
{
struct redirection_ast_node *redirection
= (struct redirection_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "&%u", redirection->n_in);
if (redirection->n_append) {
fx_bstr_write_fmt(out, NULL, " >>");
} else {
fx_bstr_write_fmt(out, NULL, " >");
}
if (redirection->n_out_is_fd) {
fx_bstr_write_fmt(out, NULL, " &");
} else {
fx_bstr_write_fmt(out, NULL, " ");
}
if (redirection->n_out_is_expr) {
fx_bstr_write_fmt(out, NULL, "<expr>");
} else if (redirection->n_out_path) {
fx_bstr_write_fmt(out, NULL, "'%s'", redirection->n_out_path);
} else {
fx_bstr_write_fmt(out, NULL, "%u", redirection->n_out);
}
}
struct ast_node_definition redirection_ast_node = {
.def_id = AST_REDIRECTION,
.def_node_size = sizeof(struct redirection_ast_node),
.def_collect_children = collect_children,
.def_to_string = to_string,
};
+15
View File
@@ -0,0 +1,15 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct string_ast_node *string
= (const struct string_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", string->n_value->tok_str);
}
struct ast_node_definition string_ast_node = {
.def_id = AST_STRING,
.def_node_size = sizeof(struct string_ast_node),
.def_to_string = to_string,
};
+14
View File
@@ -0,0 +1,14 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct var_ast_node *var = (const struct var_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", var->n_ident->tok_str);
}
struct ast_node_definition var_ast_node = {
.def_id = AST_VAR,
.def_node_size = sizeof(struct var_ast_node),
.def_to_string = to_string,
};
+14
View File
@@ -0,0 +1,14 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct word_ast_node *word = (const struct word_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", word->n_value->tok_str);
}
struct ast_node_definition word_ast_node = {
.def_id = AST_WORD,
.def_node_size = sizeof(struct word_ast_node),
.def_to_string = to_string,
};
+135
View File
@@ -0,0 +1,135 @@
#include "debug.h"
#include "ast/ast.h"
#include "parse/token.h"
#include <fx/string.h>
#include <fx/term/print.h>
#include <stdio.h>
extern void print_lex_token(struct lex_token *tok)
{
printf("[%lu:%lu - %lu:%lu] ",
tok->tok_start.c_row,
tok->tok_start.c_col,
tok->tok_end.c_row,
tok->tok_end.c_col);
switch (tok->tok_type) {
case TOK_KEYWORD:
fx_puts("[magenta]");
break;
case TOK_SYMBOL:
fx_puts("[blue]");
break;
case TOK_INT:
case TOK_DOUBLE:
fx_puts("[yellow]");
break;
case TOK_FLAG:
fx_puts("[red]");
break;
case TOK_WORD:
case TOK_VAR:
case TOK_VAR_SPLAT:
fx_puts("[cyan]");
break;
case TOK_STRING:
fx_puts("[green]");
break;
case TOK_STR_START:
fx_puts("[green]");
break;
case TOK_STR_END:
fx_puts("[green]");
break;
case TOK_LINEFEED:
fx_puts("[dark_grey]");
break;
default:
break;
}
fx_puts(token_type_to_string(tok->tok_type));
switch (tok->tok_type) {
case TOK_WORD:
case TOK_FLAG:
case TOK_STRING:
case TOK_VAR:
case TOK_VAR_SPLAT:
printf("(%s)", tok->tok_str);
break;
case TOK_SYMBOL:
printf("(%s)", token_symbol_to_string(tok->tok_symbol));
break;
case TOK_KEYWORD:
printf("(%s)", token_keyword_to_string(tok->tok_keyword));
break;
case TOK_INT:
printf("(%lld)", tok->tok_int);
break;
case TOK_DOUBLE:
printf("(%lf)", tok->tok_double);
break;
default:
break;
}
fx_puts("[reset]\n");
}
void print_ast_node(struct ast_node *node)
{
struct ast_iterator it = {0};
ast_node_iterate(node, &it);
while (1) {
node = ast_iterator_peek(&it);
if (!node) {
break;
}
for (unsigned long i = 0; i < node->n_it.e_depth; i++) {
fx_puts(" ");
}
switch (node->n_type) {
case AST_REDIRECTION:
case AST_PIPELINE:
fx_puts("[blue]");
break;
case AST_CMDCALL:
fx_puts("[red]");
break;
case AST_INT:
case AST_DOUBLE:
fx_puts("[yellow]");
break;
case AST_WORD:
fx_puts("[cyan]");
break;
case AST_STRING:
case AST_FSTRING:
fx_puts("[green]");
break;
default:
break;
}
fx_printf("%s", ast_node_type_to_string(node->n_type));
char s[128] = {0};
fx_bstr str;
fx_bstr_begin(&str, s, sizeof s);
ast_node_to_string(node, &str);
if (fx_bstr_get_size(&str)) {
fx_printf("(%s)", fx_bstr_end(&str));
}
fx_printf("[reset]\n");
ast_iterator_dequeue(&it);
}
}
+12
View File
@@ -0,0 +1,12 @@
#ifndef DEBUG_H_
#define DEBUG_H_
#include <stdbool.h>
struct ast_node;
struct lex_token;
extern void print_lex_token(struct lex_token *tok);
extern void print_ast_node(struct ast_node *node);
#endif
+69
View File
@@ -0,0 +1,69 @@
#ifndef LEX_H_
#define LEX_H_
#include "../status.h"
#include <fx/queue.h>
#include <fx/string.h>
#include <fx/stringstream.h>
struct lex_token;
struct line_source;
enum lex_flags {
LEX_PRINT_TOKENS = 0x01u,
};
enum lex_state_type_id {
LEX_STATE_STATEMENT = 0x01u,
LEX_STATE_EXPRESSION = 0x02u,
LEX_STATE_COMMAND = 0x04u,
LEX_STATE_ARITHMETIC = 0x08u,
LEX_STATE_STRING = 0x10u,
};
struct lex_token_def {
int id;
const char *name;
uint64_t name_hash;
enum lex_state_type_id enabled_states;
};
struct lex_symbol_node {
char s_char;
struct lex_token_def *s_def;
fx_queue_entry s_entry;
fx_queue s_children;
};
struct lex_state {
const struct lex_state_type *s_type;
unsigned int s_paren_depth;
fx_queue_entry s_entry;
fx_string *s_tempstr;
};
struct lex_ctx {
enum lex_flags lex_flags;
fx_queue lex_tokens;
struct line_source *lex_src;
fx_stringstream *lex_buf;
fx_string *lex_tmp;
fx_wchar lex_ch;
fx_queue lex_state;
struct lex_symbol_node *lex_sym_tree;
enum bshell_status lex_status;
};
extern enum bshell_status lex_ctx_init(
struct lex_ctx *ctx,
enum lex_flags flags,
struct line_source *src);
extern enum bshell_status lex_ctx_cleanup(struct lex_ctx *ctx);
extern struct lex_token *lex_ctx_peek(struct lex_ctx *ctx);
extern struct lex_token *lex_ctx_claim(struct lex_ctx *ctx);
extern void lex_ctx_discard(struct lex_ctx *ctx);
#endif
+136
View File
@@ -0,0 +1,136 @@
#include "lex-internal.h"
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_SEMICOLON:
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
default:
break;
}
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool converted = convert_word_to_keyword(word);
if (!converted) {
converted = convert_word_to_int(word);
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return arithmetic_symbol(ctx);
}
return arithmetic_word(ctx);
}
const struct lex_state_type lex_arithmetic_state = {
.s_id = LEX_STATE_ARITHMETIC,
.s_pump_token = arithmetic_pump_token,
};
+131
View File
@@ -0,0 +1,131 @@
#include "lex-internal.h"
static enum bshell_status command_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_SEMICOLON:
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
default:
break;
}
return BSHELL_SUCCESS;
}
static enum bshell_status command_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
enum bshell_status command_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return command_symbol(ctx);
}
return command_word(ctx);
}
const struct lex_state_type lex_command_state = {
.s_id = LEX_STATE_COMMAND,
.s_pump_token = command_pump_token,
};
+134
View File
@@ -0,0 +1,134 @@
#include "lex-internal.h"
static enum bshell_status expression_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_SEMICOLON:
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
default:
break;
}
return BSHELL_SUCCESS;
}
static enum bshell_status expression_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool converted = convert_word_to_int(word);
if (converted) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
} else {
lex_state_change(ctx, LEX_STATE_COMMAND);
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status expression_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
lex_state_change(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return expression_symbol(ctx);
}
return expression_word(ctx);
}
const struct lex_state_type lex_expression_state = {
.s_id = LEX_STATE_EXPRESSION,
.s_pump_token = expression_pump_token,
};
+75
View File
@@ -0,0 +1,75 @@
#ifndef PARSE_LEX_INTERNAL_H_
#define PARSE_LEX_INTERNAL_H_
#include "../../status.h"
#include "../lex.h"
#include "../token.h"
struct lex_ctx;
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
struct lex_state_type {
enum lex_state_type_id s_id;
lex_state_pump_token s_pump_token;
lex_state_begin s_begin;
lex_state_end s_end;
};
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
extern enum bshell_status pump_token_expression(struct lex_ctx *ctx);
extern enum bshell_status pump_token_command(struct lex_ctx *ctx);
extern enum bshell_status pump_token_arithmetic(struct lex_ctx *ctx);
extern enum bshell_status pump_token_string(struct lex_ctx *ctx);
extern struct lex_state *lex_state_push(
struct lex_ctx *ctx,
enum lex_state_type_id state_type);
extern void lex_state_pop(struct lex_ctx *ctx);
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
extern fx_wchar peek_char(struct lex_ctx *ctx);
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
extern void advance_char(struct lex_ctx *ctx);
extern void advance_char_noread(struct lex_ctx *ctx);
extern bool string_is_valid_number(const char *s, long long *out);
extern bool convert_word_to_int(struct lex_token *tok);
extern bool convert_word_to_keyword(struct lex_token *tok);
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
extern enum bshell_status read_word(
struct lex_ctx *ctx,
struct lex_token **out);
extern enum bshell_status read_symbol(
struct lex_ctx *ctx,
const struct lex_token_def **out);
extern enum bshell_status read_literal_string(
struct lex_ctx *ctx,
struct lex_token **out);
extern enum bshell_status read_line_comment(struct lex_ctx *lex);
extern enum bshell_status read_var(
struct lex_ctx *ctx,
enum token_type type,
struct lex_token **out);
extern enum bshell_status read_braced_var(
struct lex_ctx *ctx,
enum token_type type,
struct lex_token **out);
extern enum bshell_status push_symbol(
struct lex_ctx *ctx,
enum token_symbol sym);
extern bool char_can_begin_symbol(struct lex_ctx *ctx, char c);
extern bool char_can_begin_symbol_in_state(
struct lex_ctx *ctx,
char c,
enum lex_state_type_id state_type);
#endif
File diff suppressed because it is too large Load Diff
+162
View File
@@ -0,0 +1,162 @@
#include "lex-internal.h"
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DQUOTE:
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
return BSHELL_ERR_NO_MEMORY;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
return BSHELL_ERR_NO_MEMORY;
}
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
switch (sym->id) {
case SYM_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_EXPRESSION);
return BSHELL_SUCCESS;
case SYM_LEFT_BRACE:
case SYM_DOLLAR_LEFT_PAREN:
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
default:
break;
}
if (sym->enabled_states & LEX_STATE_COMMAND) {
lex_state_change(ctx, LEX_STATE_COMMAND);
} else if (sym->enabled_states & LEX_STATE_ARITHMETIC) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
}
return BSHELL_SUCCESS;
}
static enum bshell_status statement_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool converted = convert_word_to_keyword(word);
if (!converted) {
converted = convert_word_to_int(word);
}
if (converted) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
} else {
lex_state_change(ctx, LEX_STATE_COMMAND);
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return statement_symbol(ctx);
}
return statement_word(ctx);
}
const struct lex_state_type lex_statement_state = {
.s_id = LEX_STATE_STATEMENT,
.s_pump_token = statement_pump_token,
};
+136
View File
@@ -0,0 +1,136 @@
#include "lex-internal.h"
static enum bshell_status string_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DOLLAR_LEFT_PAREN:
status = push_symbol(ctx, sym->id);
if (status != BSHELL_SUCCESS) {
return status;
}
lex_state_push(ctx, LEX_STATE_STATEMENT);
return BSHELL_SUCCESS;
case SYM_DQUOTE:
lex_state_pop(ctx);
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
return BSHELL_ERR_BAD_SYNTAX;
}
static enum bshell_status string_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
fx_string *temp = lex_state_get_tempstr(ctx);
fx_string_clear(temp);
while (1) {
c = peek_char(ctx);
if (c == FX_WCHAR_INVALID) {
/* EOF without end of string */
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
}
if (char_can_begin_symbol(ctx, c)) {
break;
}
fx_string_append_wc(temp, c);
advance_char(ctx);
}
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
return BSHELL_SUCCESS;
}
struct lex_token *tok = lex_token_create_with_string(
TOK_STRING,
fx_string_get_cstr(temp));
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status string_begin(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_STR_START);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status string_end(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_STR_END);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status string_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (char_can_begin_symbol(ctx, c)) {
return string_symbol(ctx);
}
return string_content(ctx);
}
const struct lex_state_type lex_string_state = {
.s_id = LEX_STATE_STRING,
.s_begin = string_begin,
.s_end = string_end,
.s_pump_token = string_pump_token,
};
+30
View File
@@ -0,0 +1,30 @@
#include "parse.h"
#include "../ast/ast.h"
#include "lex.h"
#include "syntax.h"
#include "token.h"
#include <stdio.h>
#include <string.h>
enum bshell_status parse_ctx_init(struct parse_ctx *ctx, struct lex_ctx *src)
{
memset(ctx, 0x0, sizeof *ctx);
ctx->p_src = src;
return BSHELL_SUCCESS;
}
void parse_ctx_cleanup(struct parse_ctx *ctx)
{
}
struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx)
{
struct ast_node *result = NULL;
bool ok = parse_statement(ctx, &result);
return ok ? result : NULL;
}
+21
View File
@@ -0,0 +1,21 @@
#ifndef PARSE_H_
#define PARSE_H_
#include "../status.h"
struct lex_ctx;
struct ast_node;
struct parse_ctx {
struct lex_ctx *p_src;
enum bshell_status p_status;
};
extern enum bshell_status parse_ctx_init(
struct parse_ctx *ctx,
struct lex_ctx *src);
extern void parse_ctx_cleanup(struct parse_ctx *ctx);
extern struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx);
#endif
+48
View File
@@ -0,0 +1,48 @@
#ifndef PARSE_SYNTAX_H_
#define PARSE_SYNTAX_H_
#include "../ast/ast.h"
#include "lex.h"
#include "parse.h"
#include "token.h"
#include <stdbool.h>
enum parse_operand_flags {
OPERAND_BASIC = 0x01u,
};
extern struct lex_token *peek_token(struct parse_ctx *ctx);
extern enum token_type peek_token_type(struct parse_ctx *ctx);
extern enum token_keyword peek_unknown_keyword(struct parse_ctx *ctx);
extern enum token_symbol peek_unknown_symbol(struct parse_ctx *ctx);
extern struct lex_token *claim_token(struct parse_ctx *ctx);
extern void discard_token(struct parse_ctx *ctx);
extern bool peek_linefeed(struct parse_ctx *ctx);
extern bool peek_symbol(struct parse_ctx *ctx, enum token_symbol sym);
extern bool peek_word(struct parse_ctx *ctx, struct lex_token **out);
extern bool peek_int(struct parse_ctx *ctx);
extern bool parse_linefeed(struct parse_ctx *ctx);
extern bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym);
extern bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw);
extern bool parse_int(struct parse_ctx *ctx, long long *out);
extern bool parse_flag(struct parse_ctx *ctx, struct lex_token **out);
extern bool peek_arith_expr(struct parse_ctx *ctx);
extern bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_operand(
struct parse_ctx *ctx,
enum parse_operand_flags flags,
struct ast_node **out);
extern bool parse_statement(struct parse_ctx *ctx, struct ast_node **out);
extern bool peek_command(struct parse_ctx *ctx);
extern bool parse_command(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out);
#endif
+27
View File
@@ -0,0 +1,27 @@
#include "../syntax.h"
bool peek_arith_expr(struct parse_ctx *ctx)
{
switch (peek_token_type(ctx)) {
case TOK_SYMBOL:
switch (peek_unknown_symbol(ctx)) {
case SYM_PLUS:
case SYM_HYPHEN:
return true;
default:
return false;
}
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_STR_START:
return true;
default:
return false;
}
}
bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out)
{
return false;
}
+419
View File
@@ -0,0 +1,419 @@
#include "../syntax.h"
#include <fx/encoding.h>
static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
struct ast_node *arg = NULL;
switch (tok->tok_type) {
case TOK_WORD: {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(AST_WORD);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_value = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
#if 0
case TOK_FLAG: {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(AST_WORD);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_value = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
#endif
case TOK_VAR: {
struct var_ast_node *n
= (struct var_ast_node *)ast_node_create(AST_VAR);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_ident = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
case TOK_VAR_SPLAT: {
struct var_splat_ast_node *n
= (struct var_splat_ast_node *)ast_node_create(
AST_VAR_SPLAT);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_ident = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
case TOK_STRING: {
struct string_ast_node *n
= (struct string_ast_node *)ast_node_create(AST_STRING);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_value = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
default:
return false;
}
return true;
}
static bool parse_redirect_to_fd(
struct parse_ctx *ctx,
unsigned int in_fd,
bool append,
struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct redirection_ast_node *redirect
= (struct redirection_ast_node *)ast_node_create(
AST_REDIRECTION);
redirect->n_in = in_fd;
redirect->n_append = append;
if (!parse_symbol(ctx, SYM_AMPERSAND)) {
ast_node_destroy((struct ast_node *)redirect);
return false;
}
struct lex_token *out_tok = NULL;
struct ast_node *out_expr = NULL;
long long out_fd = -1;
if (peek_word(ctx, &out_tok)) {
const char *s = out_tok->tok_str;
char *ep;
out_fd = strtoll(s, &ep, 10);
if (*ep == '\0') {
discard_token(ctx);
out_tok = NULL;
} else {
out_fd = -1;
}
} else if (!parse_cmdcall_arg(ctx, &out_expr)) {
return false;
}
redirect->n_out_is_fd = (out_fd >= 0) || out_expr;
redirect->n_out_is_expr = out_expr != NULL;
redirect->n_out = (unsigned int)out_fd;
redirect->n_out_path_expr = out_expr;
if (out_tok) {
redirect->n_out_tok = claim_token(ctx);
redirect->n_out_path = out_tok->tok_str;
}
*out = (struct ast_node *)redirect;
return true;
}
static bool parse_redirect_to_file_squashed(
struct parse_ctx *ctx,
unsigned int in_fd,
bool append,
const char *str,
struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct lex_token *tok = peek_token(ctx);
if (*str == '\0') {
return false;
}
struct redirection_ast_node *redirect
= (struct redirection_ast_node *)ast_node_create(
AST_REDIRECTION);
redirect->n_in = in_fd;
redirect->n_append = append;
redirect->n_out_is_fd = false;
redirect->n_out_is_expr = false;
redirect->n_out_path = str;
redirect->n_out_tok = claim_token(ctx);
*out = (struct ast_node *)redirect;
return true;
}
static bool parse_redirect_to_file_separate(
struct parse_ctx *ctx,
unsigned int in_fd,
bool append,
struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct ast_node *out_path = NULL;
if (!parse_cmdcall_arg(ctx, &out_path)) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
struct redirection_ast_node *redirect
= (struct redirection_ast_node *)ast_node_create(
AST_REDIRECTION);
redirect->n_in = in_fd;
redirect->n_append = append;
redirect->n_out_is_fd = false;
redirect->n_out_is_expr = true;
redirect->n_out_path_expr = out_path;
*out = (struct ast_node *)redirect;
return true;
}
bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok || tok->tok_type != TOK_WORD) {
return false;
}
unsigned int in_fd = 1;
const char *str = tok->tok_str;
bool append = false;
if (fx_wchar_is_number(*str)) {
in_fd = *str - '0';
str++;
}
if (*str != '>') {
return false;
}
str++;
if (*str == '>') {
append = true;
str++;
}
if (*str != '\0') {
return parse_redirect_to_file_squashed(
ctx,
in_fd,
append,
str,
out);
}
discard_token(ctx);
if (parse_redirect_to_fd(ctx, in_fd, append, out)) {
return true;
}
if (parse_redirect_to_file_separate(ctx, in_fd, append, out)) {
return true;
}
return false;
}
static bool peek_cmdcall_item(struct parse_ctx *ctx, bool unrestricted)
{
/* each token type falls into one of three categories:
* - cmdcall item: the token can be used as part of a command call. the
* token indicates the start of a command call.
* - NOT a cmdcall item: the token cannot be used as part of a command
* call, usually because it as a cmdcall operator like | or &.
* encountering one of these tokens ends the cmdcall currently being
* parsed.
* - RESTRICTED cmdcall item: the token can be used as part of a
* command, but will not be considered the start of a cmdcall. to run
* a command with this token as its name, the call operator must be
* used.
*/
switch (peek_token_type(ctx)) {
case TOK_KEYWORD:
case TOK_INT:
case TOK_DOUBLE:
case TOK_VAR:
case TOK_VAR_SPLAT:
case TOK_STRING:
case TOK_STR_START:
return unrestricted;
case TOK_SYMBOL:
switch (peek_unknown_symbol(ctx)) {
case SYM_PLUS:
case SYM_HYPHEN:
return unrestricted;
case SYM_PIPE:
case SYM_AMPERSAND:
case SYM_SEMICOLON:
return false;
default:
return true;
}
case TOK_NONE:
case TOK_LINEFEED:
return false;
default:
return true;
}
}
bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out)
{
struct cmdcall_ast_node *node
= (struct cmdcall_ast_node *)ast_node_create(AST_CMDCALL);
if (!node) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
struct ast_node *child = NULL;
bool unrestricted = false;
bool ok = true;
bool stop = false;
if (parse_symbol(ctx, SYM_AMPERSAND)) {
unrestricted = true;
}
if (!peek_cmdcall_item(ctx, unrestricted)) {
return false;
}
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (!parse_cmdcall_arg(ctx, &child)) {
return false;
}
fx_queue_push_back(&node->n_args, &child->n_entry);
while (ok && !stop) {
if (!peek_cmdcall_item(ctx, true)) {
break;
}
struct lex_token *tok = peek_token(ctx);
if (!tok) {
break;
}
if (parse_redirect(ctx, &child)) {
fx_queue_push_back(&node->n_redirect, &child->n_entry);
} else if (parse_cmdcall_arg(ctx, &child)) {
fx_queue_push_back(&node->n_args, &child->n_entry);
} else {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
ok = false;
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)node);
node = NULL;
}
*out = (struct ast_node *)node;
return ok;
}
bool peek_command(struct parse_ctx *ctx)
{
if (peek_symbol(ctx, SYM_AMPERSAND)) {
return true;
}
return peek_cmdcall_item(ctx, false);
}
bool parse_command(struct parse_ctx *ctx, struct ast_node **out)
{
struct ast_node *cmdcall = NULL;
if (!parse_cmdcall(ctx, &cmdcall)) {
return false;
}
struct pipeline_ast_node *pipeline = NULL;
while (1) {
if (parse_symbol(ctx, SYM_SEMICOLON) || parse_linefeed(ctx)) {
break;
}
if (!parse_symbol(ctx, SYM_PIPE)) {
break;
}
if (!pipeline) {
pipeline = (struct pipeline_ast_node *)ast_node_create(
AST_PIPELINE);
if (!pipeline) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(cmdcall);
return false;
}
fx_queue_push_back(
&pipeline->n_stages,
&cmdcall->n_entry);
}
if (!parse_cmdcall(ctx, &cmdcall)) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
fx_queue_push_back(&pipeline->n_stages, &cmdcall->n_entry);
}
if (pipeline) {
*out = (struct ast_node *)pipeline;
} else {
*out = cmdcall;
}
return true;
}
+143
View File
@@ -0,0 +1,143 @@
#include "../lex.h"
#include "../parse.h"
#include "../syntax.h"
#include "../token.h"
struct lex_token *claim_token(struct parse_ctx *ctx)
{
return lex_ctx_claim(ctx->p_src);
}
void discard_token(struct parse_ctx *ctx)
{
return lex_ctx_discard(ctx->p_src);
}
struct lex_token *peek_token(struct parse_ctx *ctx)
{
return lex_ctx_peek(ctx->p_src);
}
enum token_type peek_token_type(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
return tok ? tok->tok_type : TOK_NONE;
}
enum token_symbol peek_unknown_symbol(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
return (tok && tok->tok_type == TOK_SYMBOL) ? tok->tok_symbol
: SYM_NONE;
}
enum token_keyword peek_unknown_keyword(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
return (tok && tok->tok_type == TOK_KEYWORD) ? tok->tok_keyword
: KW_NONE;
}
bool peek_word(struct parse_ctx *ctx, struct lex_token **out)
{
struct lex_token *tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_WORD) {
*out = tok;
return true;
}
return false;
}
bool peek_linefeed(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_LINEFEED) {
return true;
}
return false;
}
bool peek_symbol(struct parse_ctx *ctx, enum token_symbol sym)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_SYMBOL) {
return false;
}
if (tok->tok_symbol != sym) {
return false;
}
return true;
}
bool parse_linefeed(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_LINEFEED) {
discard_token(ctx);
return true;
}
return false;
}
bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_SYMBOL) {
return false;
}
if (tok->tok_symbol != sym) {
return false;
}
discard_token(ctx);
return true;
}
bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_KEYWORD) {
return false;
}
if (tok->tok_keyword != kw) {
return false;
}
discard_token(ctx);
return true;
}
bool parse_int(struct parse_ctx *ctx, long long *out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_INT) {
return false;
}
*out = tok->tok_int;
discard_token(ctx);
return true;
}
+15
View File
@@ -0,0 +1,15 @@
#include "../syntax.h"
bool parse_statement(struct parse_ctx *ctx, struct ast_node **out)
{
bool ok = false;
if (peek_arith_expr(ctx)) {
ok = parse_arith_expr(ctx, out);
}
if (!ok && peek_command(ctx)) {
ok = parse_command(ctx, out);
}
return ok;
}
+150
View File
@@ -0,0 +1,150 @@
#include "token.h"
#include <fx/string.h>
#include <stdlib.h>
#include <string.h>
struct lex_token *lex_token_create(enum token_type type)
{
struct lex_token *out = malloc(sizeof *out);
if (!out) {
return NULL;
}
memset(out, 0x0, sizeof *out);
out->tok_type = type;
return out;
}
struct lex_token *lex_token_create_with_string(
enum token_type type,
const char *s)
{
struct lex_token *tok = lex_token_create(type);
if (!tok) {
return NULL;
}
tok->tok_str = fx_strdup(s);
if (!tok->tok_str) {
free(tok);
return NULL;
}
return tok;
}
void lex_token_destroy(struct lex_token *tok)
{
switch (tok->tok_type) {
case TOK_WORD:
case TOK_FLAG:
case TOK_STRING:
if (tok->tok_str) {
free(tok->tok_str);
}
break;
default:
break;
}
free(tok);
}
struct lex_token *lex_token_change_type(
struct lex_token *tok,
enum token_type new_type)
{
switch (tok->tok_type) {
case TOK_WORD:
case TOK_FLAG:
case TOK_STRING:
if (tok->tok_str) {
free(tok->tok_str);
tok->tok_str = NULL;
}
break;
default:
break;
}
tok->tok_type = new_type;
return tok;
}
#define ENUM_STR(x) \
case x: \
return #x
const char *token_type_to_string(enum token_type type)
{
switch (type) {
ENUM_STR(TOK_NONE);
ENUM_STR(TOK_KEYWORD);
ENUM_STR(TOK_SYMBOL);
ENUM_STR(TOK_INT);
ENUM_STR(TOK_DOUBLE);
ENUM_STR(TOK_WORD);
ENUM_STR(TOK_VAR);
ENUM_STR(TOK_VAR_SPLAT);
ENUM_STR(TOK_FLAG);
ENUM_STR(TOK_STRING);
ENUM_STR(TOK_STR_START);
ENUM_STR(TOK_STR_END);
ENUM_STR(TOK_LINEFEED);
default:
return "<unknown>";
}
}
const char *token_keyword_to_string(enum token_keyword keyword)
{
switch (keyword) {
ENUM_STR(KW_NONE);
ENUM_STR(KW_FUNC);
ENUM_STR(KW_IF);
ENUM_STR(KW_ELSE);
default:
return "<unknown>";
}
}
const char *token_symbol_to_string(enum token_symbol sym)
{
switch (sym) {
ENUM_STR(SYM_NONE);
ENUM_STR(SYM_PLUS);
ENUM_STR(SYM_HYPHEN);
ENUM_STR(SYM_FORWARD_SLASH);
ENUM_STR(SYM_ASTERISK);
ENUM_STR(SYM_AMPERSAND);
ENUM_STR(SYM_PERCENT);
ENUM_STR(SYM_SQUOTE);
ENUM_STR(SYM_DQUOTE);
ENUM_STR(SYM_HASH);
ENUM_STR(SYM_SEMICOLON);
ENUM_STR(SYM_COMMA);
ENUM_STR(SYM_DOLLAR);
ENUM_STR(SYM_DOLLAR_LEFT_PAREN);
ENUM_STR(SYM_PIPE);
ENUM_STR(SYM_AT);
ENUM_STR(SYM_AT_LEFT_BRACE);
ENUM_STR(SYM_LEFT_BRACE);
ENUM_STR(SYM_RIGHT_BRACE);
ENUM_STR(SYM_LEFT_BRACKET);
ENUM_STR(SYM_RIGHT_BRACKET);
ENUM_STR(SYM_LEFT_PAREN);
ENUM_STR(SYM_RIGHT_PAREN);
ENUM_STR(SYM_EQUAL);
ENUM_STR(SYM_PLUS_EQUAL);
ENUM_STR(SYM_HYPHEN_EQUAL);
ENUM_STR(SYM_FORWARD_SLASH_EQUAL);
ENUM_STR(SYM_ASTERISK_EQUAL);
ENUM_STR(SYM_PERCENT_EQUAL);
default:
return "<unknown>";
}
}
+133
View File
@@ -0,0 +1,133 @@
#ifndef IVY_LANG_LEX_H_
#define IVY_LANG_LEX_H_
#include <fx/queue.h>
#include <stdbool.h>
struct char_cell {
unsigned long c_row, c_col;
};
enum token_type {
TOK_NONE = 0,
__TOK_INDEX_BASE = 100,
TOK_KEYWORD,
TOK_SYMBOL,
TOK_INT,
TOK_DOUBLE,
TOK_WORD,
TOK_FLAG,
TOK_VAR,
TOK_VAR_SPLAT,
TOK_STRING,
TOK_STR_START,
TOK_STR_END,
TOK_LINEFEED,
__TOK_INDEX_LIMIT,
};
enum token_keyword {
KW_NONE = 0,
__KW_INDEX_BASE = 200,
KW_FUNC,
KW_IF,
KW_ELSE,
__KW_INDEX_LIMIT,
};
enum token_symbol {
SYM_NONE = 0,
__SYM_INDEX_BASE = 300,
SYM_PLUS,
SYM_HYPHEN,
SYM_FORWARD_SLASH,
SYM_ASTERISK,
SYM_AMPERSAND,
SYM_PERCENT,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_SEMICOLON,
SYM_COMMA,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_PIPE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_EQUAL,
SYM_PLUS_EQUAL,
SYM_HYPHEN_EQUAL,
SYM_ASTERISK_EQUAL,
SYM_FORWARD_SLASH_EQUAL,
SYM_PERCENT_EQUAL,
__SYM_INDEX_LIMIT,
};
struct lex_token {
enum token_type tok_type;
struct char_cell tok_start, tok_end;
fx_queue_entry tok_entry;
union {
enum token_keyword tok_keyword;
enum token_symbol tok_symbol;
long long tok_int;
double tok_double;
char *tok_str;
};
};
extern struct lex_token *lex_token_create(enum token_type type);
extern struct lex_token *lex_token_create_with_string(
enum token_type type,
const char *s);
extern void lex_token_destroy(struct lex_token *tok);
extern struct lex_token *lex_token_change_type(
struct lex_token *tok,
enum token_type new_type);
static inline bool lex_token_is_symbol(
struct lex_token *tok,
enum token_symbol sym)
{
return (tok->tok_type == TOK_SYMBOL && tok->tok_symbol == sym);
}
static inline bool lex_token_is_keyword(
struct lex_token *tok,
enum token_keyword kw)
{
return (tok->tok_type == TOK_KEYWORD && tok->tok_keyword == kw);
}
static inline bool lex_token_type_has_string_value(enum token_type type)
{
switch (type) {
case TOK_WORD:
case TOK_STRING:
case TOK_FLAG:
case TOK_VAR:
case TOK_VAR_SPLAT:
return true;
default:
return false;
}
}
static inline bool lex_token_has_string_value(const struct lex_token *tok)
{
return lex_token_type_has_string_value(tok->tok_type);
}
extern const char *token_type_to_string(enum token_type type);
extern const char *token_keyword_to_string(enum token_keyword keyword);
extern const char *token_symbol_to_string(enum token_symbol sym);
#endif
+1
View File
@@ -0,0 +1 @@
echo hello 2> error.txt | ls -la | echo done; exit -1
+7 -1
View File
@@ -1,8 +1,14 @@
func test-function($name) {
echo "Hello, $name!"
echo "Hello, $name! $(2 + 4 + 2) wow"
}
# Example of instantiating an FX runtime object.
$obj = new-object -type-name fx.string -arguments "John Doe"
$hash = @{
1 = 'one'
2 = 'two'
'three' = 3
}
test-function -name $obj
+173
View File
@@ -0,0 +1,173 @@
# The lexer has three modes: ARITHMETIC, COMMAND, and STRING
# ARITHMETIC mode is operand-based, all symbols, keywords, and constant parsing
# is enabled.
# COMMAND mode is word-based, only a subset of symbols are enabled, no keyword
# or constant parsing is performed, and more liberal word formations and
# substitutions are allowed
# STRING mode is used to read string literals (i.e. those strings that DON'T
# support variable substitutions). All chars read are appended to the resulting
# string, with no further parsing performed.
# Initially, the lexer mode is unspecified, until:
# a) The lexer reads a character, from which the correct mode is deduced.
# b) The parser manually switches the lexer's mode
# Lexer state supports nesting.
# ARITHMETIC
# both of these are equivalant
$a = 2
# VAR(a)
# SYMBOL(=)
# INT(2)
$b=4
# VAR(b)
# SYMBOL(=)
# INT(4)
# ARITHMETIC
# this is a syntax error (there should be an operator between the two vars)
$a$b
# VAR(a)
# VAR(b)
# When the parser encounters SYMBOL(%) it should switch the lexer to COMMAND
# mode, which will allow the following word construction to be used.
# this executes the command whose name is equal to concatenating the values
# of $a and $b (in this case, '24')
% $a$b
# SYMBOL(%)
# WORD_START
# VAR(a)
# VAR(b)
# WORD_END
# executes the command with the name 'a+2b'. because the first char encountered
# by the lexer is alphabetic, it reads a regular word in COMMAND mode.
a+2b
# WORD(a+2b)
# executes the command with the name '-no$a' ($a is not substituted).
# the first char encountered is a symbol, which is read as a word in COMMAND
# mode
-no$a
# WORD(-no)
# returns the result of applying the NOT operator to the value of $a.
# the first char encountered is a symbol, which is read as a word in COMMAND
# mode. as characters are read, they are compared against registered operators.
# if a match is found, the operator is emitted, and the parser will switch
# the lexer to ARITHMETIC mode
-not$a
# OP(not)
# VAR(a)
# executes the command with the name '-not$a' ($a is NOT substituted)
# because of the preceding hyphen, variable substitution is not performed.
% -not$a
# SYMBOL(%)
# WORD(-not$a)
# executes the command with the name '-not2' ($a IS substituted)
# variable substitution IS performed in dquote strings regardless of the hyphen.
% "-not$a"
# SYMBOL(%)
# STR_START
# STRING(-not)
# VAR(a)
# STR_END
# interpreted as a command with args ['a', '+b', '/c']
# the first char encountered is alpbabetic, so the expression is parsed in
# COMMAND mode
a +b /c
# WORD(a)
# WORD(+b)
# WORD(/c)
# interpreted as an arithmetic expression (but not a well-formed one)
+b /c
# SYM(+)
# WORD(b)
# SYM(/)
# WORD(c)
# interpreted as a command with name '%+'
%+
# WORD(%+)
# interpreted as a command with args ['%', '+']
% +
# WORD(%)
# WORD(+)
# interpreted as a command with name '%'
%;
# WORD(%)
# SYMBOL(;)
# interpreted as a command with name '+'
&+
# SYMBOL(&)
# WORD(+)
# interpreted as a string, which triggers the parser to enter ARITHMETIC mode
'hello world'
# STRING(hello world)
# interpreted as a command with args ['echo', 'hello world']
echo 'hello world'
# WORD(echo)
# STRING(hello world)
# interpreted as an interpolated string
"Hello $(if ($x -lt 5) { echo 'yes' } else {echo 'no'})"
###############################################################################
# The lexer operates as a state machine, moving between different states as
# different characters are encountered
# The states are stored in a stack, to allow recursive parsing.
# The lexer has the following states:
# STATEMENT: A generic statement, could be a command, keyword, arithmetic
# expression, etc. The next char or symbol encountered will cause the
# lexer to switch to the appropriate state type:
# letters, word-symbols -> COMMAND
# squote -> ARITHMETIC
# dquote -> ARITHMETIC, FSTRING
# Digits, vars, var-splats, keywords, all other symbols -> ARITHMETIC
# EXPRESSION: Similar to STATEMENT, but only allows a single command or
# arithmetic expression. CANNOT use keywords or statement terminators.
# Letters, word-symbols -> COMMAND
# squote -> ARITHMETIC
# dquote -> ARITHMETIC, FSTRING
# Digits, vars, var-splats, keywords, all other symbols -> ARITHMETIC
# COMMAND: Only words, (f)strings, vars, var-splats, and a subset of symbols are
# parsed.
# ARITHMETIC: Words, strings, vars, var-splats, all symbols, keywords are parsed.
# STRING: Only a subset of symbols are parsed, all other characters are appended
# to the resulting string.
#
# Once a state has changed from EXPRESSION to one of the other three state
# types, certain characters will result in the current state either changing
# type or being popped from the stack:
# STATEMENT: semicolon -> STATEMENT
# left-paren, left-brace -> POP
# EXPRESSION: semicolon -> POP
# left-paren, left-brace -> POP
# COMMAND: semicolon -> STATEMENT
# left-paren, left-brace -> POP
# ARITHMETIC: semicolon -> STATEMENT
# left-paren, left-brace -> POP
#
# Certain symbols require recursive parsing:
# - dquote strings allow string interpolation, so expressions withing the string
# may be parsed in a different state. Once the expression is complete, the
# lexer returns to the previous state.
# - in most cases, $(...) can be used to delimit sub-expressions (including in
# strings. When '$(' is encountered, a new state entry of type EXPRESSION is
# pushed onto the stack. When the corresponding ')' is encountered, that state
# entry is popped from the stack.
# - similarly to $(...), (...) can be used to group expressions, just like in
# mathematical expressions.