Compare commits
9 Commits
8b0295faf2
...
f5d847736a
| Author | SHA1 | Date | |
|---|---|---|---|
| f5d847736a | |||
| 94048c6508 | |||
| 3398de6fa9 | |||
| ba8a2111eb | |||
| 5ea41fcc6e | |||
| cfaf53040b | |||
| 7d95d57f98 | |||
| b12f59ed2c | |||
| 090f6a0002 |
@@ -14,8 +14,29 @@ syn keyword bshellKeyword func
|
||||
syn keyword bshellTodo contained TODO FIXME XXX NOTE HACK TBD
|
||||
syn match bshellLineComment /#.*$/ contains=bshellTodo
|
||||
|
||||
syn region bshellInterpolatedString matchgroup=bshellString start=+"+ end=+"+ extend contains=bshellVariable
|
||||
syn region bshellInterpolation matchgroup=bshellInterpolationDelimiter start=+$(+ end=+)+ keepend contained contains=@bshellAll
|
||||
syn region bshellInterpolation matchgroup=bshellInterpolationDelimiter start="$(" end=")" contained contains=ALL
|
||||
syn region bshellInterpolatedString matchgroup=bshellString start=+"+ end=+"+ extend contains=bshellVariable,bshellInterpolation
|
||||
syn region bshellLiteralString matchgroup=bshellString start=+\'+ end=+\'+ extend contains=bshellSpecialChar,bshellSpecialError,bshellUnicodeNumber,@Spell
|
||||
syn region bshell1NestedParentheses start="(" skip="\\\\\|\\)" matchgroup=bshellInterpolation end=")" transparent contained
|
||||
|
||||
syn case ignore
|
||||
syn match bshellInteger "\<0b[01_]*[01]\%([lu]\|lu\|ul\)\=\>" display
|
||||
syn match bshellInteger "\<\d\+\%(_\+\d\+\)*\%([lu]\|lu\|ul\)\=\>" display
|
||||
syn match bshellInteger "\<-\d\+\%(_\+\d\+\)*\%([lu]\|lu\|ul\)\=\>" display
|
||||
syn match bshellInteger "\<0x[[:xdigit:]_]*\x\%([lu]\|lu\|ul\)\=\>" display
|
||||
syn match bshellReal "\<\d\+\%(_\+\d\+\)*\.\d\+\%(_\+\d\+\)*\%\(e[-+]\=\d\+\%(_\+\d\+\)*\)\=[fdm]\=" display
|
||||
syn match bshellReal "\.\d\+\%(_\+\d\+\)*\%(e[-+]\=\d\+\%(_\+\d\+\)*\)\=[fdm]\=\>" display
|
||||
syn match bshellReal "\<\d\+\%(_\+\d\+\)*e[-+]\=\d\+\%(_\+\d\+\)*[fdm]\=\>" display
|
||||
syn match bshellReal "\<\d\+\%(_\+\d\+\)*[fdm]\>" display
|
||||
syn case match
|
||||
syn cluster bshellNumber contains=bshellInteger,bshellReal
|
||||
|
||||
syn cluster bshellLiteral contains=@bshellNumber,@bshellString
|
||||
syn cluster bshellAll contains=@bshellLiteral,bshellVariable,bshellKeyword,bshellArgFlag,bshellFunctionRef,bshellSymbolOp,bshellKeywordOp
|
||||
|
||||
syn match bshellSymbolOp "[+]" display
|
||||
syn keyword bshellKeywordOp is not understands and or
|
||||
|
||||
hi def link bshellKeyword Statement
|
||||
hi def link bshellArgFlag Tag
|
||||
@@ -25,7 +46,12 @@ hi def link bshellFunctionRef Function
|
||||
hi def link bshellString String
|
||||
hi def link bshellInterpolatedString String
|
||||
hi def link bshellLiteralString String
|
||||
hi def link bshellInteger Number
|
||||
hi def link bshellReal Float
|
||||
|
||||
hi def link bshellInterpolationDelimiter Delimiter
|
||||
hi def link bshellSymbolOp Operator
|
||||
hi def link bshellKeywordOp Operator
|
||||
|
||||
" The default highlighting.
|
||||
" hi def link bshellUnspecifiedStatement Statement
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
#include "ast.h"
|
||||
|
||||
#include "../status.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
extern struct ast_node_definition int_ast_node;
|
||||
extern struct ast_node_definition double_ast_node;
|
||||
extern struct ast_node_definition word_ast_node;
|
||||
extern struct ast_node_definition var_ast_node;
|
||||
extern struct ast_node_definition string_ast_node;
|
||||
extern struct ast_node_definition fstring_ast_node;
|
||||
extern struct ast_node_definition cmdcall_ast_node;
|
||||
extern struct ast_node_definition pipeline_ast_node;
|
||||
extern struct ast_node_definition redirection_ast_node;
|
||||
|
||||
static const struct ast_node_definition *ast_node_defintions[] = {
|
||||
[AST_INT] = &int_ast_node,
|
||||
[AST_DOUBLE] = &double_ast_node,
|
||||
[AST_WORD] = &word_ast_node,
|
||||
[AST_VAR] = &var_ast_node,
|
||||
[AST_STRING] = &string_ast_node,
|
||||
[AST_FSTRING] = &fstring_ast_node,
|
||||
[AST_CMDCALL] = &cmdcall_ast_node,
|
||||
[AST_PIPELINE] = &pipeline_ast_node,
|
||||
[AST_REDIRECTION] = &redirection_ast_node,
|
||||
};
|
||||
static const size_t nr_ast_node_definitions
|
||||
= sizeof ast_node_defintions / sizeof ast_node_defintions[0];
|
||||
|
||||
struct ast_node *ast_node_create(enum ast_node_type type)
|
||||
{
|
||||
assert(type < nr_ast_node_definitions);
|
||||
|
||||
const struct ast_node_definition *def = ast_node_defintions[type];
|
||||
struct ast_node *out = malloc(def->def_node_size);
|
||||
if (!out) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(out, 0x0, def->def_node_size);
|
||||
|
||||
out->n_type = type;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void ast_node_destroy(struct ast_node *node)
|
||||
{
|
||||
assert(node->n_type < nr_ast_node_definitions);
|
||||
|
||||
struct ast_iterator it = {0};
|
||||
ast_iterator_enqueue(&it, node);
|
||||
|
||||
while (1) {
|
||||
node = ast_iterator_peek(&it);
|
||||
if (!node) {
|
||||
break;
|
||||
}
|
||||
|
||||
const struct ast_node_definition *def
|
||||
= ast_node_defintions[node->n_type];
|
||||
|
||||
if (def->def_cleanup) {
|
||||
def->def_cleanup(node);
|
||||
}
|
||||
|
||||
ast_iterator_dequeue(&it);
|
||||
free(node);
|
||||
}
|
||||
}
|
||||
|
||||
void ast_node_iterate(struct ast_node *node, struct ast_iterator *it)
|
||||
{
|
||||
ast_iterator_enqueue(it, node);
|
||||
}
|
||||
|
||||
void ast_node_to_string(const struct ast_node *node, fx_bstr *out)
|
||||
{
|
||||
const struct ast_node_definition *def
|
||||
= ast_node_defintions[node->n_type];
|
||||
if (def->def_to_string) {
|
||||
def->def_to_string(node, out);
|
||||
}
|
||||
}
|
||||
|
||||
#define ENUM_STR(x) \
|
||||
case x: \
|
||||
return #x
|
||||
|
||||
const char *ast_node_type_to_string(enum ast_node_type type)
|
||||
{
|
||||
switch (type) {
|
||||
ENUM_STR(AST_NONE);
|
||||
ENUM_STR(AST_INT);
|
||||
ENUM_STR(AST_DOUBLE);
|
||||
ENUM_STR(AST_WORD);
|
||||
ENUM_STR(AST_STRING);
|
||||
ENUM_STR(AST_FSTRING);
|
||||
ENUM_STR(AST_VAR);
|
||||
ENUM_STR(AST_VAR_SPLAT);
|
||||
ENUM_STR(AST_FLAG);
|
||||
ENUM_STR(AST_CMDCALL);
|
||||
ENUM_STR(AST_PIPELINE);
|
||||
ENUM_STR(AST_REDIRECTION);
|
||||
default:
|
||||
return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
struct ast_node *ast_iterator_peek(struct ast_iterator *it)
|
||||
{
|
||||
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
|
||||
if (!cur) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return fx_unbox(struct ast_node, cur, n_it.e_entry);
|
||||
}
|
||||
|
||||
struct ast_node *ast_iterator_dequeue(struct ast_iterator *it)
|
||||
{
|
||||
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
|
||||
if (!cur) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ast_node *node = fx_unbox(struct ast_node, cur, n_it.e_entry);
|
||||
const struct ast_node_definition *def
|
||||
= ast_node_defintions[node->n_type];
|
||||
|
||||
it->it_insert_after = cur;
|
||||
if (def->def_collect_children) {
|
||||
def->def_collect_children(node, it);
|
||||
}
|
||||
|
||||
fx_queue_pop_front(&it->it_queue);
|
||||
return fx_unbox(struct ast_node, cur, n_it.e_entry);
|
||||
}
|
||||
|
||||
void ast_iterator_enqueue(struct ast_iterator *it, struct ast_node *node)
|
||||
{
|
||||
unsigned long new_depth = 0;
|
||||
|
||||
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
|
||||
if (cur) {
|
||||
struct ast_node *cur_node
|
||||
= fx_unbox(struct ast_node, cur, n_it.e_entry);
|
||||
new_depth = cur_node->n_it.e_depth + 1;
|
||||
}
|
||||
|
||||
node->n_it.e_depth = new_depth;
|
||||
|
||||
if (!it->it_insert_after) {
|
||||
fx_queue_push_back(&it->it_queue, &node->n_it.e_entry);
|
||||
return;
|
||||
}
|
||||
|
||||
fx_queue_insert_after(
|
||||
&it->it_queue,
|
||||
&node->n_it.e_entry,
|
||||
it->it_insert_after);
|
||||
it->it_insert_after = &node->n_it.e_entry;
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
#ifndef AST_H_
|
||||
#define AST_H_
|
||||
|
||||
#include "../status.h"
|
||||
|
||||
#include <fx/bstr.h>
|
||||
#include <fx/queue.h>
|
||||
|
||||
struct lex_token;
|
||||
|
||||
enum ast_node_type {
|
||||
AST_NONE = 0x00u,
|
||||
AST_INT,
|
||||
AST_DOUBLE,
|
||||
AST_WORD,
|
||||
AST_STRING,
|
||||
AST_FSTRING,
|
||||
AST_VAR,
|
||||
AST_VAR_SPLAT,
|
||||
AST_FLAG,
|
||||
AST_CMDCALL,
|
||||
AST_PIPELINE,
|
||||
AST_REDIRECTION,
|
||||
};
|
||||
|
||||
struct ast_iterator_entry {
|
||||
fx_queue_entry e_entry;
|
||||
unsigned long e_depth;
|
||||
};
|
||||
|
||||
struct ast_node {
|
||||
enum ast_node_type n_type;
|
||||
struct ast_node *n_parent;
|
||||
fx_queue_entry n_entry;
|
||||
struct ast_iterator_entry n_it;
|
||||
};
|
||||
|
||||
struct int_ast_node {
|
||||
struct ast_node n_base;
|
||||
struct lex_token *n_value;
|
||||
};
|
||||
|
||||
struct double_ast_node {
|
||||
struct ast_node n_base;
|
||||
struct lex_token *n_value;
|
||||
};
|
||||
|
||||
struct word_ast_node {
|
||||
struct ast_node n_base;
|
||||
struct lex_token *n_value;
|
||||
};
|
||||
|
||||
struct string_ast_node {
|
||||
struct ast_node n_base;
|
||||
struct lex_token *n_value;
|
||||
};
|
||||
|
||||
struct fstring_ast_node {
|
||||
struct ast_node n_base;
|
||||
fx_queue n_elements;
|
||||
};
|
||||
|
||||
struct var_ast_node {
|
||||
struct ast_node n_base;
|
||||
struct lex_token *n_ident;
|
||||
};
|
||||
|
||||
struct var_splat_ast_node {
|
||||
struct ast_node n_base;
|
||||
struct lex_token *n_ident;
|
||||
};
|
||||
|
||||
struct cmdcall_ast_node {
|
||||
struct ast_node n_base;
|
||||
fx_queue n_args;
|
||||
fx_queue n_redirect;
|
||||
};
|
||||
|
||||
struct pipeline_ast_node {
|
||||
struct ast_node n_base;
|
||||
fx_queue n_stages;
|
||||
};
|
||||
|
||||
struct redirection_ast_node {
|
||||
struct ast_node n_base;
|
||||
bool n_append : 1;
|
||||
bool n_out_is_fd : 1;
|
||||
bool n_out_is_expr : 1;
|
||||
|
||||
unsigned int n_in, n_out;
|
||||
struct ast_node *n_out_path_expr;
|
||||
const char *n_out_path;
|
||||
struct lex_token *n_out_tok;
|
||||
};
|
||||
|
||||
struct ast_iterator {
|
||||
struct ast_node *it_cur;
|
||||
fx_queue it_queue;
|
||||
unsigned int it_depth;
|
||||
fx_queue_entry *it_insert_after;
|
||||
};
|
||||
|
||||
struct ast_node_definition {
|
||||
enum ast_node_type def_id;
|
||||
size_t def_node_size;
|
||||
enum bshell_status (*def_collect_children)(
|
||||
struct ast_node *,
|
||||
struct ast_iterator *);
|
||||
enum bshell_status (*def_cleanup)(struct ast_node *);
|
||||
void (*def_to_string)(const struct ast_node *, fx_bstr *);
|
||||
};
|
||||
|
||||
extern struct ast_node *ast_node_create(enum ast_node_type type);
|
||||
extern void ast_node_destroy(struct ast_node *node);
|
||||
extern void ast_node_iterate(struct ast_node *node, struct ast_iterator *it);
|
||||
extern void ast_node_to_string(const struct ast_node *node, fx_bstr *out);
|
||||
|
||||
extern const char *ast_node_type_to_string(enum ast_node_type type);
|
||||
|
||||
extern struct ast_node *ast_iterator_peek(struct ast_iterator *it);
|
||||
extern struct ast_node *ast_iterator_dequeue(struct ast_iterator *it);
|
||||
extern void ast_iterator_enqueue(
|
||||
struct ast_iterator *it,
|
||||
struct ast_node *node);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,31 @@
|
||||
#include "ast.h"
|
||||
|
||||
static enum bshell_status collect_children(
|
||||
struct ast_node *node,
|
||||
struct ast_iterator *it)
|
||||
{
|
||||
struct cmdcall_ast_node *cmdcall = (struct cmdcall_ast_node *)node;
|
||||
fx_queue_entry *cur = fx_queue_first(&cmdcall->n_args);
|
||||
while (cur) {
|
||||
struct ast_node *child
|
||||
= fx_unbox(struct ast_node, cur, n_entry);
|
||||
ast_iterator_enqueue(it, child);
|
||||
cur = fx_queue_next(cur);
|
||||
}
|
||||
|
||||
cur = fx_queue_first(&cmdcall->n_redirect);
|
||||
while (cur) {
|
||||
struct ast_node *child
|
||||
= fx_unbox(struct ast_node, cur, n_entry);
|
||||
ast_iterator_enqueue(it, child);
|
||||
cur = fx_queue_next(cur);
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct ast_node_definition cmdcall_ast_node = {
|
||||
.def_id = AST_CMDCALL,
|
||||
.def_node_size = sizeof(struct cmdcall_ast_node),
|
||||
.def_collect_children = collect_children,
|
||||
};
|
||||
@@ -0,0 +1,6 @@
|
||||
#include "ast.h"
|
||||
|
||||
struct ast_node_definition double_ast_node = {
|
||||
.def_id = AST_DOUBLE,
|
||||
.def_node_size = sizeof(struct double_ast_node),
|
||||
};
|
||||
@@ -0,0 +1,6 @@
|
||||
#include "ast.h"
|
||||
|
||||
struct ast_node_definition fstring_ast_node = {
|
||||
.def_id = AST_FSTRING,
|
||||
.def_node_size = sizeof(struct fstring_ast_node),
|
||||
};
|
||||
@@ -0,0 +1,14 @@
|
||||
#include "../parse/token.h"
|
||||
#include "ast.h"
|
||||
|
||||
static void to_string(const struct ast_node *node, fx_bstr *out)
|
||||
{
|
||||
struct int_ast_node *i = (struct int_ast_node *)node;
|
||||
fx_bstr_write_fmt(out, NULL, "%lld", i->n_value->tok_int);
|
||||
}
|
||||
|
||||
struct ast_node_definition int_ast_node = {
|
||||
.def_id = AST_INT,
|
||||
.def_node_size = sizeof(struct int_ast_node),
|
||||
.def_to_string = to_string,
|
||||
};
|
||||
@@ -0,0 +1,23 @@
|
||||
#include "ast.h"
|
||||
|
||||
static enum bshell_status collect_children(
|
||||
struct ast_node *node,
|
||||
struct ast_iterator *it)
|
||||
{
|
||||
struct pipeline_ast_node *pipeline = (struct pipeline_ast_node *)node;
|
||||
fx_queue_entry *cur = fx_queue_first(&pipeline->n_stages);
|
||||
while (cur) {
|
||||
struct ast_node *child
|
||||
= fx_unbox(struct ast_node, cur, n_entry);
|
||||
ast_iterator_enqueue(it, child);
|
||||
cur = fx_queue_next(cur);
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct ast_node_definition pipeline_ast_node = {
|
||||
.def_id = AST_PIPELINE,
|
||||
.def_node_size = sizeof(struct pipeline_ast_node),
|
||||
.def_collect_children = collect_children,
|
||||
};
|
||||
@@ -0,0 +1,49 @@
|
||||
#include "ast.h"
|
||||
|
||||
static enum bshell_status collect_children(
|
||||
struct ast_node *node,
|
||||
struct ast_iterator *it)
|
||||
{
|
||||
struct redirection_ast_node *redirection
|
||||
= (struct redirection_ast_node *)node;
|
||||
|
||||
if (redirection->n_out_path_expr) {
|
||||
ast_iterator_enqueue(it, redirection->n_out_path_expr);
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static void to_string(const struct ast_node *node, fx_bstr *out)
|
||||
{
|
||||
struct redirection_ast_node *redirection
|
||||
= (struct redirection_ast_node *)node;
|
||||
fx_bstr_write_fmt(out, NULL, "&%u", redirection->n_in);
|
||||
|
||||
if (redirection->n_append) {
|
||||
fx_bstr_write_fmt(out, NULL, " >>");
|
||||
} else {
|
||||
fx_bstr_write_fmt(out, NULL, " >");
|
||||
}
|
||||
|
||||
if (redirection->n_out_is_fd) {
|
||||
fx_bstr_write_fmt(out, NULL, " &");
|
||||
} else {
|
||||
fx_bstr_write_fmt(out, NULL, " ");
|
||||
}
|
||||
|
||||
if (redirection->n_out_is_expr) {
|
||||
fx_bstr_write_fmt(out, NULL, "<expr>");
|
||||
} else if (redirection->n_out_path) {
|
||||
fx_bstr_write_fmt(out, NULL, "'%s'", redirection->n_out_path);
|
||||
} else {
|
||||
fx_bstr_write_fmt(out, NULL, "%u", redirection->n_out);
|
||||
}
|
||||
}
|
||||
|
||||
struct ast_node_definition redirection_ast_node = {
|
||||
.def_id = AST_REDIRECTION,
|
||||
.def_node_size = sizeof(struct redirection_ast_node),
|
||||
.def_collect_children = collect_children,
|
||||
.def_to_string = to_string,
|
||||
};
|
||||
@@ -0,0 +1,15 @@
|
||||
#include "../parse/token.h"
|
||||
#include "ast.h"
|
||||
|
||||
static void to_string(const struct ast_node *node, fx_bstr *out)
|
||||
{
|
||||
const struct string_ast_node *string
|
||||
= (const struct string_ast_node *)node;
|
||||
fx_bstr_write_fmt(out, NULL, "%s", string->n_value->tok_str);
|
||||
}
|
||||
|
||||
struct ast_node_definition string_ast_node = {
|
||||
.def_id = AST_STRING,
|
||||
.def_node_size = sizeof(struct string_ast_node),
|
||||
.def_to_string = to_string,
|
||||
};
|
||||
@@ -0,0 +1,14 @@
|
||||
#include "../parse/token.h"
|
||||
#include "ast.h"
|
||||
|
||||
static void to_string(const struct ast_node *node, fx_bstr *out)
|
||||
{
|
||||
const struct var_ast_node *var = (const struct var_ast_node *)node;
|
||||
fx_bstr_write_fmt(out, NULL, "%s", var->n_ident->tok_str);
|
||||
}
|
||||
|
||||
struct ast_node_definition var_ast_node = {
|
||||
.def_id = AST_VAR,
|
||||
.def_node_size = sizeof(struct var_ast_node),
|
||||
.def_to_string = to_string,
|
||||
};
|
||||
@@ -0,0 +1,14 @@
|
||||
#include "../parse/token.h"
|
||||
#include "ast.h"
|
||||
|
||||
static void to_string(const struct ast_node *node, fx_bstr *out)
|
||||
{
|
||||
const struct word_ast_node *word = (const struct word_ast_node *)node;
|
||||
fx_bstr_write_fmt(out, NULL, "%s", word->n_value->tok_str);
|
||||
}
|
||||
|
||||
struct ast_node_definition word_ast_node = {
|
||||
.def_id = AST_WORD,
|
||||
.def_node_size = sizeof(struct word_ast_node),
|
||||
.def_to_string = to_string,
|
||||
};
|
||||
+135
@@ -0,0 +1,135 @@
|
||||
#include "debug.h"
|
||||
|
||||
#include "ast/ast.h"
|
||||
#include "parse/token.h"
|
||||
|
||||
#include <fx/string.h>
|
||||
#include <fx/term/print.h>
|
||||
#include <stdio.h>
|
||||
|
||||
extern void print_lex_token(struct lex_token *tok)
|
||||
{
|
||||
printf("[%lu:%lu - %lu:%lu] ",
|
||||
tok->tok_start.c_row,
|
||||
tok->tok_start.c_col,
|
||||
tok->tok_end.c_row,
|
||||
tok->tok_end.c_col);
|
||||
|
||||
switch (tok->tok_type) {
|
||||
case TOK_KEYWORD:
|
||||
fx_puts("[magenta]");
|
||||
break;
|
||||
case TOK_SYMBOL:
|
||||
fx_puts("[blue]");
|
||||
break;
|
||||
case TOK_INT:
|
||||
case TOK_DOUBLE:
|
||||
fx_puts("[yellow]");
|
||||
break;
|
||||
case TOK_FLAG:
|
||||
fx_puts("[red]");
|
||||
break;
|
||||
case TOK_WORD:
|
||||
case TOK_VAR:
|
||||
case TOK_VAR_SPLAT:
|
||||
fx_puts("[cyan]");
|
||||
break;
|
||||
case TOK_STRING:
|
||||
fx_puts("[green]");
|
||||
break;
|
||||
case TOK_STR_START:
|
||||
fx_puts("[green]");
|
||||
break;
|
||||
case TOK_STR_END:
|
||||
fx_puts("[green]");
|
||||
break;
|
||||
case TOK_LINEFEED:
|
||||
fx_puts("[dark_grey]");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
fx_puts(token_type_to_string(tok->tok_type));
|
||||
|
||||
switch (tok->tok_type) {
|
||||
case TOK_WORD:
|
||||
case TOK_FLAG:
|
||||
case TOK_STRING:
|
||||
case TOK_VAR:
|
||||
case TOK_VAR_SPLAT:
|
||||
printf("(%s)", tok->tok_str);
|
||||
break;
|
||||
case TOK_SYMBOL:
|
||||
printf("(%s)", token_symbol_to_string(tok->tok_symbol));
|
||||
break;
|
||||
case TOK_KEYWORD:
|
||||
printf("(%s)", token_keyword_to_string(tok->tok_keyword));
|
||||
break;
|
||||
case TOK_INT:
|
||||
printf("(%lld)", tok->tok_int);
|
||||
break;
|
||||
case TOK_DOUBLE:
|
||||
printf("(%lf)", tok->tok_double);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
fx_puts("[reset]\n");
|
||||
}
|
||||
|
||||
void print_ast_node(struct ast_node *node)
|
||||
{
|
||||
struct ast_iterator it = {0};
|
||||
ast_node_iterate(node, &it);
|
||||
|
||||
while (1) {
|
||||
node = ast_iterator_peek(&it);
|
||||
if (!node) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned long i = 0; i < node->n_it.e_depth; i++) {
|
||||
fx_puts(" ");
|
||||
}
|
||||
|
||||
switch (node->n_type) {
|
||||
case AST_REDIRECTION:
|
||||
case AST_PIPELINE:
|
||||
fx_puts("[blue]");
|
||||
break;
|
||||
case AST_CMDCALL:
|
||||
fx_puts("[red]");
|
||||
break;
|
||||
case AST_INT:
|
||||
case AST_DOUBLE:
|
||||
fx_puts("[yellow]");
|
||||
break;
|
||||
case AST_WORD:
|
||||
fx_puts("[cyan]");
|
||||
break;
|
||||
case AST_STRING:
|
||||
case AST_FSTRING:
|
||||
fx_puts("[green]");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
fx_printf("%s", ast_node_type_to_string(node->n_type));
|
||||
|
||||
char s[128] = {0};
|
||||
fx_bstr str;
|
||||
fx_bstr_begin(&str, s, sizeof s);
|
||||
ast_node_to_string(node, &str);
|
||||
|
||||
if (fx_bstr_get_size(&str)) {
|
||||
fx_printf("(%s)", fx_bstr_end(&str));
|
||||
}
|
||||
|
||||
fx_printf("[reset]\n");
|
||||
|
||||
ast_iterator_dequeue(&it);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
#ifndef DEBUG_H_
|
||||
#define DEBUG_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
struct ast_node;
|
||||
struct lex_token;
|
||||
|
||||
extern void print_lex_token(struct lex_token *tok);
|
||||
extern void print_ast_node(struct ast_node *node);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,69 @@
|
||||
#ifndef LEX_H_
|
||||
#define LEX_H_
|
||||
|
||||
#include "../status.h"
|
||||
|
||||
#include <fx/queue.h>
|
||||
#include <fx/string.h>
|
||||
#include <fx/stringstream.h>
|
||||
|
||||
struct lex_token;
|
||||
struct line_source;
|
||||
|
||||
enum lex_flags {
|
||||
LEX_PRINT_TOKENS = 0x01u,
|
||||
};
|
||||
|
||||
enum lex_state_type_id {
|
||||
LEX_STATE_STATEMENT = 0x01u,
|
||||
LEX_STATE_EXPRESSION = 0x02u,
|
||||
LEX_STATE_COMMAND = 0x04u,
|
||||
LEX_STATE_ARITHMETIC = 0x08u,
|
||||
LEX_STATE_STRING = 0x10u,
|
||||
};
|
||||
|
||||
struct lex_token_def {
|
||||
int id;
|
||||
const char *name;
|
||||
uint64_t name_hash;
|
||||
enum lex_state_type_id enabled_states;
|
||||
};
|
||||
|
||||
struct lex_symbol_node {
|
||||
char s_char;
|
||||
struct lex_token_def *s_def;
|
||||
|
||||
fx_queue_entry s_entry;
|
||||
fx_queue s_children;
|
||||
};
|
||||
|
||||
struct lex_state {
|
||||
const struct lex_state_type *s_type;
|
||||
unsigned int s_paren_depth;
|
||||
fx_queue_entry s_entry;
|
||||
fx_string *s_tempstr;
|
||||
};
|
||||
|
||||
struct lex_ctx {
|
||||
enum lex_flags lex_flags;
|
||||
fx_queue lex_tokens;
|
||||
struct line_source *lex_src;
|
||||
fx_stringstream *lex_buf;
|
||||
fx_string *lex_tmp;
|
||||
fx_wchar lex_ch;
|
||||
fx_queue lex_state;
|
||||
struct lex_symbol_node *lex_sym_tree;
|
||||
enum bshell_status lex_status;
|
||||
};
|
||||
|
||||
extern enum bshell_status lex_ctx_init(
|
||||
struct lex_ctx *ctx,
|
||||
enum lex_flags flags,
|
||||
struct line_source *src);
|
||||
extern enum bshell_status lex_ctx_cleanup(struct lex_ctx *ctx);
|
||||
|
||||
extern struct lex_token *lex_ctx_peek(struct lex_ctx *ctx);
|
||||
extern struct lex_token *lex_ctx_claim(struct lex_ctx *ctx);
|
||||
extern void lex_ctx_discard(struct lex_ctx *ctx);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,136 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_keyword(word);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(word);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return arithmetic_symbol(ctx);
|
||||
}
|
||||
|
||||
return arithmetic_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_arithmetic_state = {
|
||||
.s_id = LEX_STATE_ARITHMETIC,
|
||||
.s_pump_token = arithmetic_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,131 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status command_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status command_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
enum bshell_status command_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return command_symbol(ctx);
|
||||
}
|
||||
|
||||
return command_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_command_state = {
|
||||
.s_id = LEX_STATE_COMMAND,
|
||||
.s_pump_token = command_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,134 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status expression_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_SEMICOLON:
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status expression_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_int(word);
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status expression_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
lex_state_change(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return expression_symbol(ctx);
|
||||
}
|
||||
|
||||
return expression_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_expression_state = {
|
||||
.s_id = LEX_STATE_EXPRESSION,
|
||||
.s_pump_token = expression_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,75 @@
|
||||
#ifndef PARSE_LEX_INTERNAL_H_
|
||||
#define PARSE_LEX_INTERNAL_H_
|
||||
|
||||
#include "../../status.h"
|
||||
#include "../lex.h"
|
||||
#include "../token.h"
|
||||
|
||||
struct lex_ctx;
|
||||
|
||||
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
|
||||
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
|
||||
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
|
||||
|
||||
struct lex_state_type {
|
||||
enum lex_state_type_id s_id;
|
||||
lex_state_pump_token s_pump_token;
|
||||
lex_state_begin s_begin;
|
||||
lex_state_end s_end;
|
||||
};
|
||||
|
||||
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_expression(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_command(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_arithmetic(struct lex_ctx *ctx);
|
||||
extern enum bshell_status pump_token_string(struct lex_ctx *ctx);
|
||||
|
||||
extern struct lex_state *lex_state_push(
|
||||
struct lex_ctx *ctx,
|
||||
enum lex_state_type_id state_type);
|
||||
extern void lex_state_pop(struct lex_ctx *ctx);
|
||||
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
|
||||
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
|
||||
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
|
||||
|
||||
extern fx_wchar peek_char(struct lex_ctx *ctx);
|
||||
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
|
||||
extern void advance_char(struct lex_ctx *ctx);
|
||||
extern void advance_char_noread(struct lex_ctx *ctx);
|
||||
|
||||
extern bool string_is_valid_number(const char *s, long long *out);
|
||||
extern bool convert_word_to_int(struct lex_token *tok);
|
||||
extern bool convert_word_to_keyword(struct lex_token *tok);
|
||||
|
||||
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
|
||||
|
||||
extern enum bshell_status read_word(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
const struct lex_token_def **out);
|
||||
extern enum bshell_status read_literal_string(
|
||||
struct lex_ctx *ctx,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_line_comment(struct lex_ctx *lex);
|
||||
extern enum bshell_status read_var(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_type type,
|
||||
struct lex_token **out);
|
||||
extern enum bshell_status read_braced_var(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_type type,
|
||||
struct lex_token **out);
|
||||
|
||||
extern enum bshell_status push_symbol(
|
||||
struct lex_ctx *ctx,
|
||||
enum token_symbol sym);
|
||||
|
||||
extern bool char_can_begin_symbol(struct lex_ctx *ctx, char c);
|
||||
extern bool char_can_begin_symbol_in_state(
|
||||
struct lex_ctx *ctx,
|
||||
char c,
|
||||
enum lex_state_type_id state_type);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,162 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
switch (sym->id) {
|
||||
case SYM_SQUOTE:
|
||||
status = read_literal_string(ctx, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
|
||||
case SYM_HASH:
|
||||
return read_line_comment(ctx);
|
||||
case SYM_DQUOTE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_STRING)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
if (!lex_state_push(ctx, LEX_STATE_ARITHMETIC)) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
push_symbol(ctx, sym->id);
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_EXPRESSION);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_LEFT_BRACE:
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_RIGHT_PAREN:
|
||||
case SYM_RIGHT_BRACE:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (sym->enabled_states & LEX_STATE_COMMAND) {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
} else if (sym->enabled_states & LEX_STATE_ARITHMETIC) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
}
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status statement_word(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *word = NULL;
|
||||
enum bshell_status status = read_word(ctx, &word);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bool converted = convert_word_to_keyword(word);
|
||||
if (!converted) {
|
||||
converted = convert_word_to_int(word);
|
||||
}
|
||||
|
||||
if (converted) {
|
||||
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
|
||||
} else {
|
||||
lex_state_change(ctx, LEX_STATE_COMMAND);
|
||||
}
|
||||
|
||||
enqueue_token(ctx, word);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
bool newline = false;
|
||||
|
||||
while (fx_wchar_is_space(c)) {
|
||||
if (c == '\n') {
|
||||
newline = true;
|
||||
}
|
||||
|
||||
advance_char_noread(ctx);
|
||||
c = peek_char_noread(ctx);
|
||||
}
|
||||
|
||||
if (newline) {
|
||||
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return statement_symbol(ctx);
|
||||
}
|
||||
|
||||
return statement_word(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_statement_state = {
|
||||
.s_id = LEX_STATE_STATEMENT,
|
||||
.s_pump_token = statement_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,136 @@
|
||||
#include "lex-internal.h"
|
||||
|
||||
static enum bshell_status string_symbol(struct lex_ctx *ctx)
|
||||
{
|
||||
const struct lex_token_def *sym = NULL;
|
||||
enum bshell_status status = read_symbol(ctx, &sym);
|
||||
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct lex_token *tok = NULL;
|
||||
|
||||
switch (sym->id) {
|
||||
case SYM_DOLLAR_LEFT_PAREN:
|
||||
status = push_symbol(ctx, sym->id);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
lex_state_push(ctx, LEX_STATE_STATEMENT);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DQUOTE:
|
||||
lex_state_pop(ctx);
|
||||
return BSHELL_SUCCESS;
|
||||
case SYM_DOLLAR:
|
||||
status = read_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT:
|
||||
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_DOLLAR_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
case SYM_AT_LEFT_BRACE:
|
||||
status = read_braced_var(ctx, TOK_VAR_SPLAT, &tok);
|
||||
if (status != BSHELL_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return status;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
static enum bshell_status string_content(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = FX_WCHAR_INVALID;
|
||||
fx_string *temp = lex_state_get_tempstr(ctx);
|
||||
fx_string_clear(temp);
|
||||
|
||||
while (1) {
|
||||
c = peek_char(ctx);
|
||||
if (c == FX_WCHAR_INVALID) {
|
||||
/* EOF without end of string */
|
||||
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
fx_string_append_wc(temp, c);
|
||||
advance_char(ctx);
|
||||
}
|
||||
|
||||
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
struct lex_token *tok = lex_token_create_with_string(
|
||||
TOK_STRING,
|
||||
fx_string_get_cstr(temp));
|
||||
enqueue_token(ctx, tok);
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status string_begin(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_STR_START);
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status string_end(struct lex_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(TOK_STR_END);
|
||||
if (!tok) {
|
||||
return BSHELL_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
enqueue_token(ctx, tok);
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
static enum bshell_status string_pump_token(struct lex_ctx *ctx)
|
||||
{
|
||||
fx_wchar c = peek_char(ctx);
|
||||
|
||||
if (char_can_begin_symbol(ctx, c)) {
|
||||
return string_symbol(ctx);
|
||||
}
|
||||
|
||||
return string_content(ctx);
|
||||
}
|
||||
|
||||
const struct lex_state_type lex_string_state = {
|
||||
.s_id = LEX_STATE_STRING,
|
||||
.s_begin = string_begin,
|
||||
.s_end = string_end,
|
||||
.s_pump_token = string_pump_token,
|
||||
};
|
||||
@@ -0,0 +1,30 @@
|
||||
#include "parse.h"
|
||||
|
||||
#include "../ast/ast.h"
|
||||
#include "lex.h"
|
||||
#include "syntax.h"
|
||||
#include "token.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
enum bshell_status parse_ctx_init(struct parse_ctx *ctx, struct lex_ctx *src)
|
||||
{
|
||||
memset(ctx, 0x0, sizeof *ctx);
|
||||
|
||||
ctx->p_src = src;
|
||||
|
||||
return BSHELL_SUCCESS;
|
||||
}
|
||||
|
||||
void parse_ctx_cleanup(struct parse_ctx *ctx)
|
||||
{
|
||||
}
|
||||
|
||||
struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx)
|
||||
{
|
||||
struct ast_node *result = NULL;
|
||||
bool ok = parse_statement(ctx, &result);
|
||||
|
||||
return ok ? result : NULL;
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
#ifndef PARSE_H_
|
||||
#define PARSE_H_
|
||||
|
||||
#include "../status.h"
|
||||
|
||||
struct lex_ctx;
|
||||
struct ast_node;
|
||||
|
||||
struct parse_ctx {
|
||||
struct lex_ctx *p_src;
|
||||
enum bshell_status p_status;
|
||||
};
|
||||
|
||||
extern enum bshell_status parse_ctx_init(
|
||||
struct parse_ctx *ctx,
|
||||
struct lex_ctx *src);
|
||||
extern void parse_ctx_cleanup(struct parse_ctx *ctx);
|
||||
|
||||
extern struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef PARSE_SYNTAX_H_
|
||||
#define PARSE_SYNTAX_H_
|
||||
|
||||
#include "../ast/ast.h"
|
||||
#include "lex.h"
|
||||
#include "parse.h"
|
||||
#include "token.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
enum parse_operand_flags {
|
||||
OPERAND_BASIC = 0x01u,
|
||||
};
|
||||
|
||||
extern struct lex_token *peek_token(struct parse_ctx *ctx);
|
||||
extern enum token_type peek_token_type(struct parse_ctx *ctx);
|
||||
extern enum token_keyword peek_unknown_keyword(struct parse_ctx *ctx);
|
||||
extern enum token_symbol peek_unknown_symbol(struct parse_ctx *ctx);
|
||||
|
||||
extern struct lex_token *claim_token(struct parse_ctx *ctx);
|
||||
extern void discard_token(struct parse_ctx *ctx);
|
||||
|
||||
extern bool peek_linefeed(struct parse_ctx *ctx);
|
||||
extern bool peek_symbol(struct parse_ctx *ctx, enum token_symbol sym);
|
||||
extern bool peek_word(struct parse_ctx *ctx, struct lex_token **out);
|
||||
extern bool peek_int(struct parse_ctx *ctx);
|
||||
|
||||
extern bool parse_linefeed(struct parse_ctx *ctx);
|
||||
extern bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym);
|
||||
extern bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw);
|
||||
extern bool parse_int(struct parse_ctx *ctx, long long *out);
|
||||
extern bool parse_flag(struct parse_ctx *ctx, struct lex_token **out);
|
||||
|
||||
extern bool peek_arith_expr(struct parse_ctx *ctx);
|
||||
extern bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out);
|
||||
extern bool parse_operand(
|
||||
struct parse_ctx *ctx,
|
||||
enum parse_operand_flags flags,
|
||||
struct ast_node **out);
|
||||
|
||||
extern bool parse_statement(struct parse_ctx *ctx, struct ast_node **out);
|
||||
|
||||
extern bool peek_command(struct parse_ctx *ctx);
|
||||
extern bool parse_command(struct parse_ctx *ctx, struct ast_node **out);
|
||||
extern bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out);
|
||||
extern bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,27 @@
|
||||
#include "../syntax.h"
|
||||
|
||||
bool peek_arith_expr(struct parse_ctx *ctx)
|
||||
{
|
||||
switch (peek_token_type(ctx)) {
|
||||
case TOK_SYMBOL:
|
||||
switch (peek_unknown_symbol(ctx)) {
|
||||
case SYM_PLUS:
|
||||
case SYM_HYPHEN:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
case TOK_INT:
|
||||
case TOK_DOUBLE:
|
||||
case TOK_STRING:
|
||||
case TOK_STR_START:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_arith_expr(struct parse_ctx *ctx, struct ast_node **out)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,419 @@
|
||||
#include "../syntax.h"
|
||||
|
||||
#include <fx/encoding.h>
|
||||
|
||||
static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
|
||||
{
|
||||
if (ctx->p_status != BSHELL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ast_node *arg = NULL;
|
||||
|
||||
switch (tok->tok_type) {
|
||||
case TOK_WORD: {
|
||||
struct word_ast_node *n
|
||||
= (struct word_ast_node *)ast_node_create(AST_WORD);
|
||||
if (!n) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
return false;
|
||||
}
|
||||
|
||||
n->n_value = claim_token(ctx);
|
||||
*out = (struct ast_node *)n;
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 0
|
||||
case TOK_FLAG: {
|
||||
struct word_ast_node *n
|
||||
= (struct word_ast_node *)ast_node_create(AST_WORD);
|
||||
if (!n) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
return false;
|
||||
}
|
||||
|
||||
n->n_value = claim_token(ctx);
|
||||
*out = (struct ast_node *)n;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
case TOK_VAR: {
|
||||
struct var_ast_node *n
|
||||
= (struct var_ast_node *)ast_node_create(AST_VAR);
|
||||
if (!n) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
return false;
|
||||
}
|
||||
|
||||
n->n_ident = claim_token(ctx);
|
||||
*out = (struct ast_node *)n;
|
||||
return true;
|
||||
}
|
||||
|
||||
case TOK_VAR_SPLAT: {
|
||||
struct var_splat_ast_node *n
|
||||
= (struct var_splat_ast_node *)ast_node_create(
|
||||
AST_VAR_SPLAT);
|
||||
if (!n) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
return false;
|
||||
}
|
||||
|
||||
n->n_ident = claim_token(ctx);
|
||||
*out = (struct ast_node *)n;
|
||||
return true;
|
||||
}
|
||||
|
||||
case TOK_STRING: {
|
||||
struct string_ast_node *n
|
||||
= (struct string_ast_node *)ast_node_create(AST_STRING);
|
||||
if (!n) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
return false;
|
||||
}
|
||||
|
||||
n->n_value = claim_token(ctx);
|
||||
*out = (struct ast_node *)n;
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_redirect_to_fd(
|
||||
struct parse_ctx *ctx,
|
||||
unsigned int in_fd,
|
||||
bool append,
|
||||
struct ast_node **out)
|
||||
{
|
||||
if (ctx->p_status != BSHELL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct redirection_ast_node *redirect
|
||||
= (struct redirection_ast_node *)ast_node_create(
|
||||
AST_REDIRECTION);
|
||||
|
||||
redirect->n_in = in_fd;
|
||||
redirect->n_append = append;
|
||||
|
||||
if (!parse_symbol(ctx, SYM_AMPERSAND)) {
|
||||
ast_node_destroy((struct ast_node *)redirect);
|
||||
return false;
|
||||
}
|
||||
|
||||
struct lex_token *out_tok = NULL;
|
||||
struct ast_node *out_expr = NULL;
|
||||
long long out_fd = -1;
|
||||
|
||||
if (peek_word(ctx, &out_tok)) {
|
||||
const char *s = out_tok->tok_str;
|
||||
char *ep;
|
||||
out_fd = strtoll(s, &ep, 10);
|
||||
if (*ep == '\0') {
|
||||
discard_token(ctx);
|
||||
out_tok = NULL;
|
||||
} else {
|
||||
out_fd = -1;
|
||||
}
|
||||
} else if (!parse_cmdcall_arg(ctx, &out_expr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
redirect->n_out_is_fd = (out_fd >= 0) || out_expr;
|
||||
redirect->n_out_is_expr = out_expr != NULL;
|
||||
redirect->n_out = (unsigned int)out_fd;
|
||||
redirect->n_out_path_expr = out_expr;
|
||||
if (out_tok) {
|
||||
redirect->n_out_tok = claim_token(ctx);
|
||||
redirect->n_out_path = out_tok->tok_str;
|
||||
}
|
||||
|
||||
*out = (struct ast_node *)redirect;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_redirect_to_file_squashed(
|
||||
struct parse_ctx *ctx,
|
||||
unsigned int in_fd,
|
||||
bool append,
|
||||
const char *str,
|
||||
struct ast_node **out)
|
||||
{
|
||||
if (ctx->p_status != BSHELL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (*str == '\0') {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct redirection_ast_node *redirect
|
||||
= (struct redirection_ast_node *)ast_node_create(
|
||||
AST_REDIRECTION);
|
||||
|
||||
redirect->n_in = in_fd;
|
||||
redirect->n_append = append;
|
||||
redirect->n_out_is_fd = false;
|
||||
redirect->n_out_is_expr = false;
|
||||
redirect->n_out_path = str;
|
||||
|
||||
redirect->n_out_tok = claim_token(ctx);
|
||||
|
||||
*out = (struct ast_node *)redirect;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_redirect_to_file_separate(
|
||||
struct parse_ctx *ctx,
|
||||
unsigned int in_fd,
|
||||
bool append,
|
||||
struct ast_node **out)
|
||||
{
|
||||
if (ctx->p_status != BSHELL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ast_node *out_path = NULL;
|
||||
if (!parse_cmdcall_arg(ctx, &out_path)) {
|
||||
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct redirection_ast_node *redirect
|
||||
= (struct redirection_ast_node *)ast_node_create(
|
||||
AST_REDIRECTION);
|
||||
|
||||
redirect->n_in = in_fd;
|
||||
redirect->n_append = append;
|
||||
redirect->n_out_is_fd = false;
|
||||
redirect->n_out_is_expr = true;
|
||||
redirect->n_out_path_expr = out_path;
|
||||
|
||||
*out = (struct ast_node *)redirect;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok || tok->tok_type != TOK_WORD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int in_fd = 1;
|
||||
const char *str = tok->tok_str;
|
||||
bool append = false;
|
||||
|
||||
if (fx_wchar_is_number(*str)) {
|
||||
in_fd = *str - '0';
|
||||
str++;
|
||||
}
|
||||
|
||||
if (*str != '>') {
|
||||
return false;
|
||||
}
|
||||
|
||||
str++;
|
||||
if (*str == '>') {
|
||||
append = true;
|
||||
str++;
|
||||
}
|
||||
|
||||
if (*str != '\0') {
|
||||
return parse_redirect_to_file_squashed(
|
||||
ctx,
|
||||
in_fd,
|
||||
append,
|
||||
str,
|
||||
out);
|
||||
}
|
||||
|
||||
discard_token(ctx);
|
||||
|
||||
if (parse_redirect_to_fd(ctx, in_fd, append, out)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (parse_redirect_to_file_separate(ctx, in_fd, append, out)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool peek_cmdcall_item(struct parse_ctx *ctx, bool unrestricted)
|
||||
{
|
||||
/* each token type falls into one of three categories:
|
||||
* - cmdcall item: the token can be used as part of a command call. the
|
||||
* token indicates the start of a command call.
|
||||
* - NOT a cmdcall item: the token cannot be used as part of a command
|
||||
* call, usually because it as a cmdcall operator like | or &.
|
||||
* encountering one of these tokens ends the cmdcall currently being
|
||||
* parsed.
|
||||
* - RESTRICTED cmdcall item: the token can be used as part of a
|
||||
* command, but will not be considered the start of a cmdcall. to run
|
||||
* a command with this token as its name, the call operator must be
|
||||
* used.
|
||||
*/
|
||||
switch (peek_token_type(ctx)) {
|
||||
case TOK_KEYWORD:
|
||||
case TOK_INT:
|
||||
case TOK_DOUBLE:
|
||||
case TOK_VAR:
|
||||
case TOK_VAR_SPLAT:
|
||||
case TOK_STRING:
|
||||
case TOK_STR_START:
|
||||
return unrestricted;
|
||||
case TOK_SYMBOL:
|
||||
switch (peek_unknown_symbol(ctx)) {
|
||||
case SYM_PLUS:
|
||||
case SYM_HYPHEN:
|
||||
return unrestricted;
|
||||
case SYM_PIPE:
|
||||
case SYM_AMPERSAND:
|
||||
case SYM_SEMICOLON:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
case TOK_NONE:
|
||||
case TOK_LINEFEED:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out)
|
||||
{
|
||||
struct cmdcall_ast_node *node
|
||||
= (struct cmdcall_ast_node *)ast_node_create(AST_CMDCALL);
|
||||
if (!node) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ast_node *child = NULL;
|
||||
bool unrestricted = false;
|
||||
bool ok = true;
|
||||
bool stop = false;
|
||||
|
||||
if (parse_symbol(ctx, SYM_AMPERSAND)) {
|
||||
unrestricted = true;
|
||||
}
|
||||
|
||||
if (!peek_cmdcall_item(ctx, unrestricted)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parse_cmdcall_arg(ctx, &child)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
fx_queue_push_back(&node->n_args, &child->n_entry);
|
||||
|
||||
while (ok && !stop) {
|
||||
if (!peek_cmdcall_item(ctx, true)) {
|
||||
break;
|
||||
}
|
||||
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (parse_redirect(ctx, &child)) {
|
||||
fx_queue_push_back(&node->n_redirect, &child->n_entry);
|
||||
} else if (parse_cmdcall_arg(ctx, &child)) {
|
||||
fx_queue_push_back(&node->n_args, &child->n_entry);
|
||||
} else {
|
||||
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
ast_node_destroy((struct ast_node *)node);
|
||||
node = NULL;
|
||||
}
|
||||
|
||||
*out = (struct ast_node *)node;
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool peek_command(struct parse_ctx *ctx)
|
||||
{
|
||||
if (peek_symbol(ctx, SYM_AMPERSAND)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return peek_cmdcall_item(ctx, false);
|
||||
}
|
||||
|
||||
bool parse_command(struct parse_ctx *ctx, struct ast_node **out)
|
||||
{
|
||||
struct ast_node *cmdcall = NULL;
|
||||
if (!parse_cmdcall(ctx, &cmdcall)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct pipeline_ast_node *pipeline = NULL;
|
||||
|
||||
while (1) {
|
||||
if (parse_symbol(ctx, SYM_SEMICOLON) || parse_linefeed(ctx)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!parse_symbol(ctx, SYM_PIPE)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pipeline) {
|
||||
pipeline = (struct pipeline_ast_node *)ast_node_create(
|
||||
AST_PIPELINE);
|
||||
if (!pipeline) {
|
||||
ctx->p_status = BSHELL_ERR_NO_MEMORY;
|
||||
ast_node_destroy(cmdcall);
|
||||
return false;
|
||||
}
|
||||
|
||||
fx_queue_push_back(
|
||||
&pipeline->n_stages,
|
||||
&cmdcall->n_entry);
|
||||
}
|
||||
|
||||
if (!parse_cmdcall(ctx, &cmdcall)) {
|
||||
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
|
||||
return false;
|
||||
}
|
||||
|
||||
fx_queue_push_back(&pipeline->n_stages, &cmdcall->n_entry);
|
||||
}
|
||||
|
||||
if (pipeline) {
|
||||
*out = (struct ast_node *)pipeline;
|
||||
} else {
|
||||
*out = cmdcall;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
#include "../lex.h"
|
||||
#include "../parse.h"
|
||||
#include "../syntax.h"
|
||||
#include "../token.h"
|
||||
|
||||
struct lex_token *claim_token(struct parse_ctx *ctx)
|
||||
{
|
||||
return lex_ctx_claim(ctx->p_src);
|
||||
}
|
||||
|
||||
void discard_token(struct parse_ctx *ctx)
|
||||
{
|
||||
return lex_ctx_discard(ctx->p_src);
|
||||
}
|
||||
|
||||
struct lex_token *peek_token(struct parse_ctx *ctx)
|
||||
{
|
||||
return lex_ctx_peek(ctx->p_src);
|
||||
}
|
||||
|
||||
enum token_type peek_token_type(struct parse_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
return tok ? tok->tok_type : TOK_NONE;
|
||||
}
|
||||
|
||||
enum token_symbol peek_unknown_symbol(struct parse_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
return (tok && tok->tok_type == TOK_SYMBOL) ? tok->tok_symbol
|
||||
: SYM_NONE;
|
||||
}
|
||||
|
||||
enum token_keyword peek_unknown_keyword(struct parse_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
return (tok && tok->tok_type == TOK_KEYWORD) ? tok->tok_keyword
|
||||
: KW_NONE;
|
||||
}
|
||||
|
||||
bool peek_word(struct parse_ctx *ctx, struct lex_token **out)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (tok && tok->tok_type == TOK_WORD) {
|
||||
*out = tok;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool peek_linefeed(struct parse_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (tok && tok->tok_type == TOK_LINEFEED) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool peek_symbol(struct parse_ctx *ctx, enum token_symbol sym)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_type != TOK_SYMBOL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_symbol != sym) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_linefeed(struct parse_ctx *ctx)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (tok && tok->tok_type == TOK_LINEFEED) {
|
||||
discard_token(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_type != TOK_SYMBOL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_symbol != sym) {
|
||||
return false;
|
||||
}
|
||||
|
||||
discard_token(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_type != TOK_KEYWORD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_keyword != kw) {
|
||||
return false;
|
||||
}
|
||||
|
||||
discard_token(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_int(struct parse_ctx *ctx, long long *out)
|
||||
{
|
||||
struct lex_token *tok = peek_token(ctx);
|
||||
if (!tok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tok->tok_type != TOK_INT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*out = tok->tok_int;
|
||||
discard_token(ctx);
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
#include "../syntax.h"
|
||||
|
||||
bool parse_statement(struct parse_ctx *ctx, struct ast_node **out)
|
||||
{
|
||||
bool ok = false;
|
||||
if (peek_arith_expr(ctx)) {
|
||||
ok = parse_arith_expr(ctx, out);
|
||||
}
|
||||
|
||||
if (!ok && peek_command(ctx)) {
|
||||
ok = parse_command(ctx, out);
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
#include "token.h"
|
||||
|
||||
#include <fx/string.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct lex_token *lex_token_create(enum token_type type)
|
||||
{
|
||||
struct lex_token *out = malloc(sizeof *out);
|
||||
if (!out) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(out, 0x0, sizeof *out);
|
||||
|
||||
out->tok_type = type;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
struct lex_token *lex_token_create_with_string(
|
||||
enum token_type type,
|
||||
const char *s)
|
||||
{
|
||||
struct lex_token *tok = lex_token_create(type);
|
||||
if (!tok) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tok->tok_str = fx_strdup(s);
|
||||
if (!tok->tok_str) {
|
||||
free(tok);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return tok;
|
||||
}
|
||||
|
||||
void lex_token_destroy(struct lex_token *tok)
|
||||
{
|
||||
switch (tok->tok_type) {
|
||||
case TOK_WORD:
|
||||
case TOK_FLAG:
|
||||
case TOK_STRING:
|
||||
if (tok->tok_str) {
|
||||
free(tok->tok_str);
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
free(tok);
|
||||
}
|
||||
|
||||
struct lex_token *lex_token_change_type(
|
||||
struct lex_token *tok,
|
||||
enum token_type new_type)
|
||||
{
|
||||
switch (tok->tok_type) {
|
||||
case TOK_WORD:
|
||||
case TOK_FLAG:
|
||||
case TOK_STRING:
|
||||
if (tok->tok_str) {
|
||||
free(tok->tok_str);
|
||||
tok->tok_str = NULL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
tok->tok_type = new_type;
|
||||
return tok;
|
||||
}
|
||||
|
||||
#define ENUM_STR(x) \
|
||||
case x: \
|
||||
return #x
|
||||
|
||||
const char *token_type_to_string(enum token_type type)
|
||||
{
|
||||
switch (type) {
|
||||
ENUM_STR(TOK_NONE);
|
||||
ENUM_STR(TOK_KEYWORD);
|
||||
ENUM_STR(TOK_SYMBOL);
|
||||
ENUM_STR(TOK_INT);
|
||||
ENUM_STR(TOK_DOUBLE);
|
||||
ENUM_STR(TOK_WORD);
|
||||
ENUM_STR(TOK_VAR);
|
||||
ENUM_STR(TOK_VAR_SPLAT);
|
||||
ENUM_STR(TOK_FLAG);
|
||||
ENUM_STR(TOK_STRING);
|
||||
ENUM_STR(TOK_STR_START);
|
||||
ENUM_STR(TOK_STR_END);
|
||||
ENUM_STR(TOK_LINEFEED);
|
||||
default:
|
||||
return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
const char *token_keyword_to_string(enum token_keyword keyword)
|
||||
{
|
||||
switch (keyword) {
|
||||
ENUM_STR(KW_NONE);
|
||||
ENUM_STR(KW_FUNC);
|
||||
ENUM_STR(KW_IF);
|
||||
ENUM_STR(KW_ELSE);
|
||||
default:
|
||||
return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
const char *token_symbol_to_string(enum token_symbol sym)
|
||||
{
|
||||
switch (sym) {
|
||||
ENUM_STR(SYM_NONE);
|
||||
ENUM_STR(SYM_PLUS);
|
||||
ENUM_STR(SYM_HYPHEN);
|
||||
ENUM_STR(SYM_FORWARD_SLASH);
|
||||
ENUM_STR(SYM_ASTERISK);
|
||||
ENUM_STR(SYM_AMPERSAND);
|
||||
ENUM_STR(SYM_PERCENT);
|
||||
ENUM_STR(SYM_SQUOTE);
|
||||
ENUM_STR(SYM_DQUOTE);
|
||||
ENUM_STR(SYM_HASH);
|
||||
ENUM_STR(SYM_SEMICOLON);
|
||||
ENUM_STR(SYM_COMMA);
|
||||
ENUM_STR(SYM_DOLLAR);
|
||||
ENUM_STR(SYM_DOLLAR_LEFT_PAREN);
|
||||
ENUM_STR(SYM_PIPE);
|
||||
ENUM_STR(SYM_AT);
|
||||
ENUM_STR(SYM_AT_LEFT_BRACE);
|
||||
ENUM_STR(SYM_LEFT_BRACE);
|
||||
ENUM_STR(SYM_RIGHT_BRACE);
|
||||
ENUM_STR(SYM_LEFT_BRACKET);
|
||||
ENUM_STR(SYM_RIGHT_BRACKET);
|
||||
ENUM_STR(SYM_LEFT_PAREN);
|
||||
ENUM_STR(SYM_RIGHT_PAREN);
|
||||
ENUM_STR(SYM_EQUAL);
|
||||
ENUM_STR(SYM_PLUS_EQUAL);
|
||||
ENUM_STR(SYM_HYPHEN_EQUAL);
|
||||
ENUM_STR(SYM_FORWARD_SLASH_EQUAL);
|
||||
ENUM_STR(SYM_ASTERISK_EQUAL);
|
||||
ENUM_STR(SYM_PERCENT_EQUAL);
|
||||
default:
|
||||
return "<unknown>";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
#ifndef IVY_LANG_LEX_H_
|
||||
#define IVY_LANG_LEX_H_
|
||||
|
||||
#include <fx/queue.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct char_cell {
|
||||
unsigned long c_row, c_col;
|
||||
};
|
||||
|
||||
enum token_type {
|
||||
TOK_NONE = 0,
|
||||
__TOK_INDEX_BASE = 100,
|
||||
TOK_KEYWORD,
|
||||
TOK_SYMBOL,
|
||||
TOK_INT,
|
||||
TOK_DOUBLE,
|
||||
TOK_WORD,
|
||||
TOK_FLAG,
|
||||
TOK_VAR,
|
||||
TOK_VAR_SPLAT,
|
||||
TOK_STRING,
|
||||
TOK_STR_START,
|
||||
TOK_STR_END,
|
||||
TOK_LINEFEED,
|
||||
__TOK_INDEX_LIMIT,
|
||||
};
|
||||
|
||||
enum token_keyword {
|
||||
KW_NONE = 0,
|
||||
__KW_INDEX_BASE = 200,
|
||||
KW_FUNC,
|
||||
KW_IF,
|
||||
KW_ELSE,
|
||||
__KW_INDEX_LIMIT,
|
||||
};
|
||||
|
||||
enum token_symbol {
|
||||
SYM_NONE = 0,
|
||||
__SYM_INDEX_BASE = 300,
|
||||
SYM_PLUS,
|
||||
SYM_HYPHEN,
|
||||
SYM_FORWARD_SLASH,
|
||||
SYM_ASTERISK,
|
||||
SYM_AMPERSAND,
|
||||
SYM_PERCENT,
|
||||
SYM_SQUOTE,
|
||||
SYM_DQUOTE,
|
||||
SYM_HASH,
|
||||
SYM_SEMICOLON,
|
||||
SYM_COMMA,
|
||||
SYM_DOLLAR,
|
||||
SYM_DOLLAR_LEFT_PAREN,
|
||||
SYM_DOLLAR_LEFT_BRACE,
|
||||
SYM_PIPE,
|
||||
SYM_AT,
|
||||
SYM_AT_LEFT_BRACE,
|
||||
SYM_LEFT_BRACE,
|
||||
SYM_RIGHT_BRACE,
|
||||
SYM_LEFT_BRACKET,
|
||||
SYM_RIGHT_BRACKET,
|
||||
SYM_LEFT_PAREN,
|
||||
SYM_RIGHT_PAREN,
|
||||
SYM_EQUAL,
|
||||
SYM_PLUS_EQUAL,
|
||||
SYM_HYPHEN_EQUAL,
|
||||
SYM_ASTERISK_EQUAL,
|
||||
SYM_FORWARD_SLASH_EQUAL,
|
||||
SYM_PERCENT_EQUAL,
|
||||
__SYM_INDEX_LIMIT,
|
||||
};
|
||||
|
||||
struct lex_token {
|
||||
enum token_type tok_type;
|
||||
|
||||
struct char_cell tok_start, tok_end;
|
||||
|
||||
fx_queue_entry tok_entry;
|
||||
|
||||
union {
|
||||
enum token_keyword tok_keyword;
|
||||
enum token_symbol tok_symbol;
|
||||
long long tok_int;
|
||||
double tok_double;
|
||||
char *tok_str;
|
||||
};
|
||||
};
|
||||
|
||||
extern struct lex_token *lex_token_create(enum token_type type);
|
||||
extern struct lex_token *lex_token_create_with_string(
|
||||
enum token_type type,
|
||||
const char *s);
|
||||
extern void lex_token_destroy(struct lex_token *tok);
|
||||
|
||||
extern struct lex_token *lex_token_change_type(
|
||||
struct lex_token *tok,
|
||||
enum token_type new_type);
|
||||
|
||||
static inline bool lex_token_is_symbol(
|
||||
struct lex_token *tok,
|
||||
enum token_symbol sym)
|
||||
{
|
||||
return (tok->tok_type == TOK_SYMBOL && tok->tok_symbol == sym);
|
||||
}
|
||||
static inline bool lex_token_is_keyword(
|
||||
struct lex_token *tok,
|
||||
enum token_keyword kw)
|
||||
{
|
||||
return (tok->tok_type == TOK_KEYWORD && tok->tok_keyword == kw);
|
||||
}
|
||||
static inline bool lex_token_type_has_string_value(enum token_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case TOK_WORD:
|
||||
case TOK_STRING:
|
||||
case TOK_FLAG:
|
||||
case TOK_VAR:
|
||||
case TOK_VAR_SPLAT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
static inline bool lex_token_has_string_value(const struct lex_token *tok)
|
||||
{
|
||||
return lex_token_type_has_string_value(tok->tok_type);
|
||||
}
|
||||
|
||||
extern const char *token_type_to_string(enum token_type type);
|
||||
extern const char *token_keyword_to_string(enum token_keyword keyword);
|
||||
extern const char *token_symbol_to_string(enum token_symbol sym);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1 @@
|
||||
echo hello 2> error.txt | ls -la | echo done; exit -1
|
||||
@@ -1,8 +1,14 @@
|
||||
func test-function($name) {
|
||||
echo "Hello, $name!"
|
||||
echo "Hello, $name! $(2 + 4 + 2) wow"
|
||||
}
|
||||
|
||||
# Example of instantiating an FX runtime object.
|
||||
$obj = new-object -type-name fx.string -arguments "John Doe"
|
||||
|
||||
$hash = @{
|
||||
1 = 'one'
|
||||
2 = 'two'
|
||||
'three' = 3
|
||||
}
|
||||
|
||||
test-function -name $obj
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
# The lexer has three modes: ARITHMETIC, COMMAND, and STRING
|
||||
# ARITHMETIC mode is operand-based, all symbols, keywords, and constant parsing
|
||||
# is enabled.
|
||||
# COMMAND mode is word-based, only a subset of symbols are enabled, no keyword
|
||||
# or constant parsing is performed, and more liberal word formations and
|
||||
# substitutions are allowed
|
||||
# STRING mode is used to read string literals (i.e. those strings that DON'T
|
||||
# support variable substitutions). All chars read are appended to the resulting
|
||||
# string, with no further parsing performed.
|
||||
|
||||
# Initially, the lexer mode is unspecified, until:
|
||||
# a) The lexer reads a character, from which the correct mode is deduced.
|
||||
# b) The parser manually switches the lexer's mode
|
||||
# Lexer state supports nesting.
|
||||
|
||||
# ARITHMETIC
|
||||
# both of these are equivalant
|
||||
$a = 2
|
||||
# VAR(a)
|
||||
# SYMBOL(=)
|
||||
# INT(2)
|
||||
|
||||
$b=4
|
||||
# VAR(b)
|
||||
# SYMBOL(=)
|
||||
# INT(4)
|
||||
|
||||
# ARITHMETIC
|
||||
# this is a syntax error (there should be an operator between the two vars)
|
||||
$a$b
|
||||
# VAR(a)
|
||||
# VAR(b)
|
||||
|
||||
# When the parser encounters SYMBOL(%) it should switch the lexer to COMMAND
|
||||
# mode, which will allow the following word construction to be used.
|
||||
# this executes the command whose name is equal to concatenating the values
|
||||
# of $a and $b (in this case, '24')
|
||||
% $a$b
|
||||
# SYMBOL(%)
|
||||
# WORD_START
|
||||
# VAR(a)
|
||||
# VAR(b)
|
||||
# WORD_END
|
||||
|
||||
# executes the command with the name 'a+2b'. because the first char encountered
|
||||
# by the lexer is alphabetic, it reads a regular word in COMMAND mode.
|
||||
a+2b
|
||||
# WORD(a+2b)
|
||||
|
||||
# executes the command with the name '-no$a' ($a is not substituted).
|
||||
# the first char encountered is a symbol, which is read as a word in COMMAND
|
||||
# mode
|
||||
-no$a
|
||||
# WORD(-no)
|
||||
|
||||
# returns the result of applying the NOT operator to the value of $a.
|
||||
# the first char encountered is a symbol, which is read as a word in COMMAND
|
||||
# mode. as characters are read, they are compared against registered operators.
|
||||
# if a match is found, the operator is emitted, and the parser will switch
|
||||
# the lexer to ARITHMETIC mode
|
||||
-not$a
|
||||
# OP(not)
|
||||
# VAR(a)
|
||||
|
||||
# executes the command with the name '-not$a' ($a is NOT substituted)
|
||||
# because of the preceding hyphen, variable substitution is not performed.
|
||||
% -not$a
|
||||
# SYMBOL(%)
|
||||
# WORD(-not$a)
|
||||
|
||||
# executes the command with the name '-not2' ($a IS substituted)
|
||||
# variable substitution IS performed in dquote strings regardless of the hyphen.
|
||||
% "-not$a"
|
||||
# SYMBOL(%)
|
||||
# STR_START
|
||||
# STRING(-not)
|
||||
# VAR(a)
|
||||
# STR_END
|
||||
|
||||
# interpreted as a command with args ['a', '+b', '/c']
|
||||
# the first char encountered is alpbabetic, so the expression is parsed in
|
||||
# COMMAND mode
|
||||
a +b /c
|
||||
# WORD(a)
|
||||
# WORD(+b)
|
||||
# WORD(/c)
|
||||
|
||||
# interpreted as an arithmetic expression (but not a well-formed one)
|
||||
+b /c
|
||||
# SYM(+)
|
||||
# WORD(b)
|
||||
# SYM(/)
|
||||
# WORD(c)
|
||||
|
||||
# interpreted as a command with name '%+'
|
||||
%+
|
||||
# WORD(%+)
|
||||
|
||||
# interpreted as a command with args ['%', '+']
|
||||
% +
|
||||
# WORD(%)
|
||||
# WORD(+)
|
||||
|
||||
# interpreted as a command with name '%'
|
||||
%;
|
||||
# WORD(%)
|
||||
# SYMBOL(;)
|
||||
|
||||
# interpreted as a command with name '+'
|
||||
&+
|
||||
# SYMBOL(&)
|
||||
# WORD(+)
|
||||
|
||||
# interpreted as a string, which triggers the parser to enter ARITHMETIC mode
|
||||
'hello world'
|
||||
# STRING(hello world)
|
||||
|
||||
# interpreted as a command with args ['echo', 'hello world']
|
||||
echo 'hello world'
|
||||
# WORD(echo)
|
||||
# STRING(hello world)
|
||||
|
||||
# interpreted as an interpolated string
|
||||
"Hello $(if ($x -lt 5) { echo 'yes' } else {echo 'no'})"
|
||||
|
||||
|
||||
###############################################################################
|
||||
# The lexer operates as a state machine, moving between different states as
|
||||
# different characters are encountered
|
||||
# The states are stored in a stack, to allow recursive parsing.
|
||||
# The lexer has the following states:
|
||||
# STATEMENT: A generic statement, could be a command, keyword, arithmetic
|
||||
# expression, etc. The next char or symbol encountered will cause the
|
||||
# lexer to switch to the appropriate state type:
|
||||
# letters, word-symbols -> COMMAND
|
||||
# squote -> ARITHMETIC
|
||||
# dquote -> ARITHMETIC, FSTRING
|
||||
# Digits, vars, var-splats, keywords, all other symbols -> ARITHMETIC
|
||||
# EXPRESSION: Similar to STATEMENT, but only allows a single command or
|
||||
# arithmetic expression. CANNOT use keywords or statement terminators.
|
||||
# Letters, word-symbols -> COMMAND
|
||||
# squote -> ARITHMETIC
|
||||
# dquote -> ARITHMETIC, FSTRING
|
||||
# Digits, vars, var-splats, keywords, all other symbols -> ARITHMETIC
|
||||
# COMMAND: Only words, (f)strings, vars, var-splats, and a subset of symbols are
|
||||
# parsed.
|
||||
# ARITHMETIC: Words, strings, vars, var-splats, all symbols, keywords are parsed.
|
||||
# STRING: Only a subset of symbols are parsed, all other characters are appended
|
||||
# to the resulting string.
|
||||
#
|
||||
# Once a state has changed from EXPRESSION to one of the other three state
|
||||
# types, certain characters will result in the current state either changing
|
||||
# type or being popped from the stack:
|
||||
# STATEMENT: semicolon -> STATEMENT
|
||||
# left-paren, left-brace -> POP
|
||||
# EXPRESSION: semicolon -> POP
|
||||
# left-paren, left-brace -> POP
|
||||
# COMMAND: semicolon -> STATEMENT
|
||||
# left-paren, left-brace -> POP
|
||||
# ARITHMETIC: semicolon -> STATEMENT
|
||||
# left-paren, left-brace -> POP
|
||||
#
|
||||
# Certain symbols require recursive parsing:
|
||||
# - dquote strings allow string interpolation, so expressions withing the string
|
||||
# may be parsed in a different state. Once the expression is complete, the
|
||||
# lexer returns to the previous state.
|
||||
# - in most cases, $(...) can be used to delimit sub-expressions (including in
|
||||
# strings. When '$(' is encountered, a new state entry of type EXPRESSION is
|
||||
# pushed onto the stack. When the corresponding ')' is encountered, that state
|
||||
# entry is popped from the stack.
|
||||
# - similarly to $(...), (...) can be used to group expressions, just like in
|
||||
# mathematical expressions.
|
||||
|
||||
Reference in New Issue
Block a user