Compare commits

..

39 Commits

Author SHA1 Message Date
wash 34114ca451 debug: add print support for new ast node types 2026-05-12 23:00:03 +01:00
wash 750e3df7d5 lang: add arithmetic operator definitions 2026-05-12 22:59:50 +01:00
wash 83189d4d9b ast: add lots of ast node definitions 2026-05-12 22:59:29 +01:00
wash 1ea3471f0d parse: implement parsing of complex command arguments 2026-05-12 22:58:59 +01:00
wash c4529d474a parse: implement parsing of arithmetic expressions and data structures 2026-05-12 22:58:48 +01:00
wash 227e73853c parse: implement parsing of function definitions 2026-05-12 22:57:49 +01:00
wash 7d2e45edcb parse: implement parsing of if-statements 2026-05-12 22:57:31 +01:00
wash 26e2a63200 parse: implement parsing of {...} statement blocks 2026-05-12 22:57:16 +01:00
wash 0cd7ca2dde parse: add a basic function to report parse errors 2026-05-12 22:56:40 +01:00
wash 5ce780e037 parse: add a range of internal parser definitions 2026-05-12 22:55:59 +01:00
wash 2235d8593b parse: lex: add a range of operator tokens 2026-05-12 22:54:50 +01:00
wash 64903c821c parse: lex: add missing lex ctx members 2026-05-12 22:54:17 +01:00
wash 39457aa7e6 parse: add some more generic token parser functions 2026-05-12 22:53:26 +01:00
wash 440561cb39 parse: implement parsing of semicolon-delimited statement lists 2026-05-12 22:52:48 +01:00
wash cc450da31e parse: lex: support tokens terminating multiple lex states in certain circumstances 2026-05-12 22:51:45 +01:00
wash e3b92fe4f2 parse: lex: fix scanning of sub-expressions and fstrings in statement mode 2026-05-12 22:51:15 +01:00
wash b2190dd4d0 parse: lex: improve scanning of more complex redirection expressions 2026-05-12 22:48:57 +01:00
wash 3dd5f12ee5 parse: lex: fix string state not terminating when encountering a dquote 2026-05-12 22:48:08 +01:00
wash 721e0f851a parse: lex: add a range of new symbol tokens 2026-05-12 22:47:29 +01:00
wash dee4e5dbf7 parse: lex: fix arithmetic state handling dquote symbols incorrectly 2026-05-12 22:45:43 +01:00
wash 39125cea50 parse: lex: switch from arithmetic to statement when scanning =, |, and \n 2026-05-12 22:45:05 +01:00
wash 7ddc140dbf parse: lex: fix arithmetic state not scanning operator tokens 2026-05-12 22:44:33 +01:00
wash a408b9efa2 parse: lex: move per-state token settings to state source files 2026-05-11 23:57:35 +01:00
wash 0c21be8d67 parse: lex: add proper data-driven state-machine functionality
movement between lexer states is now defined (almost) exclusively
by a table of outgoing links defined for each state type.

the main lexer system uses this table to determine when, how, and to
where the state should be changed.

also add a dedicated lexer state for scanning hashtables, due to the
particularly unique rules that apply within.
2026-05-11 23:02:02 +01:00
wash 304eb80e0d bshell: add debug output support for operator tokens 2026-05-10 19:15:41 +01:00
wash ffdb28ba22 parse: lex: replace expression scanner with statement; implement complex-word scanner
also fix a bunch of scanning edge-cases
2026-05-10 19:14:24 +01:00
wash 7aa2aee5bd parse: lex: implement recording coordinates of lex tokens 2026-05-10 19:13:29 +01:00
wash 7071630af8 parse: lex: add flags for lexer states 2026-05-10 19:10:14 +01:00
wash f5d847736a bshell.vim: fix highlighting of nested parentheses in string interpolation 2026-05-10 14:20:40 +01:00
wash 94048c6508 doc: sample: add/update sample script files 2026-05-10 14:20:08 +01:00
wash 3398de6fa9 ast: update redirection node to support fd-redirection with sub-expression 2026-05-10 14:19:29 +01:00
wash ba8a2111eb parse: update parser to support new lexer behaviour 2026-05-10 14:19:06 +01:00
wash 5ea41fcc6e parse: lex: re-implement lexer as a state machine to allow more complex scanning behaviour 2026-05-10 14:18:46 +01:00
wash cfaf53040b parse: improve command arg parsing 2026-05-09 21:21:51 +01:00
wash 7d95d57f98 bshell: first-pass implementation of a syntax lexer/parser 2026-05-09 19:00:02 +01:00
wash b12f59ed2c bshell: add functions for printing lex tokens and ast nodes 2026-05-09 18:59:42 +01:00
wash 090f6a0002 bshell: add ast node definitions 2026-05-09 18:59:13 +01:00
wash 8b0295faf2 bshell: add line-editor and file-based input support 2026-05-07 10:52:00 +01:00
wash 3c15bb1609 meta: add vim plugin for syntax highlighting 2026-05-07 10:51:04 +01:00
82 changed files with 8688 additions and 1 deletions
+1
View File
@@ -143,3 +143,4 @@ tags
# End of https://www.toptal.com/developers/gitignore/api/c,vim,linux,macos,cmake
build/
.cache/
+7
View File
@@ -1,7 +1,13 @@
cmake_minimum_required(VERSION 3.31)
project(bshell C)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
find_package(Python COMPONENTS Interpreter REQUIRED)
find_package(FX REQUIRED COMPONENTS
fx.runtime
fx.collections
fx.term)
execute_process(
COMMAND ${Python_EXECUTABLE}
@@ -16,5 +22,6 @@ message(STATUS "B Shell version: ${bshell_version}")
add_executable(bshell ${bshell_sources})
target_link_libraries(bshell FX::Runtime FX::Collections FX::Term)
target_compile_definitions(bshell PUBLIC
BSHELL_VERSION="${bshell_version}")
View File
+1
View File
@@ -0,0 +1 @@
autocmd BufNewFile,BufRead *.bshell setfiletype bshell
+4
View File
@@ -0,0 +1,4 @@
setlocal tabstop=8
setlocal softtabstop=4
setlocal shiftwidth=4
setlocal expandtab
+152
View File
@@ -0,0 +1,152 @@
if exists('b:current_syntax')
finish
endif
let s:save_cpo = &cpoptions
set cpoptions&vim
setlocal iskeyword+=-
syn match bshellFunctionRef /\<[A-Za-z][A-Za-z0-9]*\(-[A-Za-z0-9][A-Za-z0-9]*\)\+\>/
syn match bshellVariable /\$[A-Za-z][A-Za-z0-9_]*/
syn match bshellArgFlag /\<-[A-Za-z][A-Za-z0-9]*\(-[A-Za-z0-9][A-Za-z0-9]*\)*\>/
syn keyword bshellKeyword func
syn keyword bshellTodo contained TODO FIXME XXX NOTE HACK TBD
syn match bshellLineComment /#.*$/ contains=bshellTodo
syn region bshellInterpolation matchgroup=bshellInterpolationDelimiter start=+$(+ end=+)+ keepend contained contains=@bshellAll
syn region bshellInterpolation matchgroup=bshellInterpolationDelimiter start="$(" end=")" contained contains=ALL
syn region bshellInterpolatedString matchgroup=bshellString start=+"+ end=+"+ extend contains=bshellVariable,bshellInterpolation
syn region bshellLiteralString matchgroup=bshellString start=+\'+ end=+\'+ extend contains=bshellSpecialChar,bshellSpecialError,bshellUnicodeNumber,@Spell
syn region bshell1NestedParentheses start="(" skip="\\\\\|\\)" matchgroup=bshellInterpolation end=")" transparent contained
syn case ignore
syn match bshellInteger "\<0b[01_]*[01]\%([lu]\|lu\|ul\)\=\>" display
syn match bshellInteger "\<\d\+\%(_\+\d\+\)*\%([lu]\|lu\|ul\)\=\>" display
syn match bshellInteger "\<-\d\+\%(_\+\d\+\)*\%([lu]\|lu\|ul\)\=\>" display
syn match bshellInteger "\<0x[[:xdigit:]_]*\x\%([lu]\|lu\|ul\)\=\>" display
syn match bshellReal "\<\d\+\%(_\+\d\+\)*\.\d\+\%(_\+\d\+\)*\%\(e[-+]\=\d\+\%(_\+\d\+\)*\)\=[fdm]\=" display
syn match bshellReal "\.\d\+\%(_\+\d\+\)*\%(e[-+]\=\d\+\%(_\+\d\+\)*\)\=[fdm]\=\>" display
syn match bshellReal "\<\d\+\%(_\+\d\+\)*e[-+]\=\d\+\%(_\+\d\+\)*[fdm]\=\>" display
syn match bshellReal "\<\d\+\%(_\+\d\+\)*[fdm]\>" display
syn case match
syn cluster bshellNumber contains=bshellInteger,bshellReal
syn cluster bshellLiteral contains=@bshellNumber,@bshellString
syn cluster bshellAll contains=@bshellLiteral,bshellVariable,bshellKeyword,bshellArgFlag,bshellFunctionRef,bshellSymbolOp,bshellKeywordOp
syn match bshellSymbolOp "[+]" display
syn keyword bshellKeywordOp is not understands and or
hi def link bshellKeyword Statement
hi def link bshellArgFlag Tag
hi def link bshellVariable Identifier
hi def link bshellLineComment Comment
hi def link bshellFunctionRef Function
hi def link bshellString String
hi def link bshellInterpolatedString String
hi def link bshellLiteralString String
hi def link bshellInteger Number
hi def link bshellReal Float
hi def link bshellInterpolationDelimiter Delimiter
hi def link bshellSymbolOp Operator
hi def link bshellKeywordOp Operator
" The default highlighting.
" hi def link bshellUnspecifiedStatement Statement
" hi def link bshellUnsupportedStatement Statement
"
" hi def link bshellGlobalNamespaceAlias Include
"
" hi def link bshellType Type
"
" hi def link bshellStorage Keyword
" hi def link bshellIsAs Keyword
" hi def link bshellAccessor Keyword
" hi def link bshellBuiltinVar @variable.builtin
" hi def link bshellSelfVar @variable.builtin
"
" hi def link bshellStatement Statement
" hi def link bshellRepeat Repeat
" hi def link bshellConditional Conditional
" hi def link bshellSelectorLabel Tag
" hi def link bshellUnnamedLabel Comment
" hi def link bshellUnnamedVariable Comment
" hi def link bshellLambdaParameter @variable.builtin
" hi def link bshellException Exception
"
" hi def link bshellParens Delimiter
" hi def link bshellBraces Structure
" hi def link bshellControlSymbols Keyword
"
" hi def link bshellModifier StorageClass
" hi def link bshellAccessModifier bshellModifier
" hi def link bshellAsyncModifier bshellModifier
" hi def link bshellCheckedModifier bshellModifier
" hi def link bshellManagedModifier bshellModifier
" hi def link bshellUsingModifier bshellModifier
"
" hi def link bshellTodo Todo
" hi def link bshellComment Comment
" hi def link bshellLineComment bshellComment
" hi def link bshellBlockComment bshellComment
" hi def link bshellLineContinuation bshellComment
"
" hi def link bshellKeywordOperator Keyword
" hi def link bshellAsyncOperator bshellKeywordOperator
" hi def link bshellTypeOf bshellKeywordOperator
" hi def link bshellTypeOfOperand Typedef
" hi def link bshellTypeOfError Error
" hi def link bshellOpSymbols Operator
" hi def link bshellPackageAccessOperator Operator
" hi def link bshellOtherSymbols Structure
" hi def link bshellLogicSymbols Operator
" hi def link bshellWordOperator Operator
"
" hi def link bshellSpecialError Error
" hi def link bshellSpecialCharError Error
" hi def link bshellString String
" hi def link bshellQuote String
" hi def link bshellInterpolatedString String
" hi def link bshellVerbatimString String
" hi def link bshellInterVerbString String
" hi def link bshellVerbatimQuote SpecialChar
"
" hi def link bshellConstant Constant
" hi def link bshellNull Constant
" hi def link bshellBoolean Boolean
" hi def link bshellCharacter Character
" hi def link bshellSpecialChar SpecialChar
" hi def link bshellInteger Number
" hi def link bshellReal Float
" hi def link bshellWord Identifier
" hi def link bshellUnicodeNumber SpecialChar
" hi def link bshellUnicodeSpecifier SpecialChar
" hi def link bshellInterpolationDelimiter Delimiter
" hi def link bshellInterpolationAlignDel bshellInterpolationDelimiter
" hi def link bshellInterpolationFormat bshellInterpolationDelimiter
" hi def link bshellInterpolationFormatDel bshellInterpolationDelimiter
"
" hi def link bshellGenericBraces bshellBraces
"
" hi def link bshellAtomName Constant
"
" hi def link bshellComplexMessageName Function
" hi def link bshellUnaryMessageName Function
" hi def link bshellPropertyName @property
" hi def link bshellPropertySymbol Statement
"
" hi def link bshellStatementSeparator Comment
" hi def link bshellMessageTerminator @punctuation.special
"
" hi def link bshellPackageStmtIdentifier @string.special.url
" hi def link bshellUseStmtIdentifier @module
let b:current_syntax = 'bshell'
let &cpoptions = s:save_cpo
unlet s:save_cpo
" vim: vts=16,28
+25
View File
@@ -0,0 +1,25 @@
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct array_ast_node *array = (struct array_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&array->n_items);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition array_ast_node = {
.def_id = AST_ARRAY,
.def_node_size = sizeof(struct array_ast_node),
.def_collect_children = collect_children,
};
+196
View File
@@ -0,0 +1,196 @@
#include "ast.h"
#include "../status.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
extern struct ast_node_definition null_ast_node;
extern struct ast_node_definition int_ast_node;
extern struct ast_node_definition double_ast_node;
extern struct ast_node_definition word_ast_node;
extern struct ast_node_definition var_ast_node;
extern struct ast_node_definition string_ast_node;
extern struct ast_node_definition fstring_ast_node;
extern struct ast_node_definition cmdcall_ast_node;
extern struct ast_node_definition pipeline_ast_node;
extern struct ast_node_definition redirection_ast_node;
extern struct ast_node_definition block_ast_node;
extern struct ast_node_definition stmt_list_ast_node;
extern struct ast_node_definition func_ast_node;
extern struct ast_node_definition array_ast_node;
extern struct ast_node_definition hashtable_ast_node;
extern struct ast_node_definition hashtable_item_ast_node;
extern struct ast_node_definition if_ast_node;
extern struct ast_node_definition if_branch_ast_node;
extern struct ast_node_definition op_ast_node;
static const struct ast_node_definition *ast_node_defintions[] = {
[AST_NULL] = &null_ast_node,
[AST_INT] = &int_ast_node,
[AST_DOUBLE] = &double_ast_node,
[AST_WORD] = &word_ast_node,
[AST_VAR] = &var_ast_node,
[AST_STRING] = &string_ast_node,
[AST_FSTRING] = &fstring_ast_node,
[AST_CMDCALL] = &cmdcall_ast_node,
[AST_PIPELINE] = &pipeline_ast_node,
[AST_REDIRECTION] = &redirection_ast_node,
[AST_BLOCK] = &block_ast_node,
[AST_STMT_LIST] = &stmt_list_ast_node,
[AST_FUNC] = &func_ast_node,
[AST_ARRAY] = &array_ast_node,
[AST_IF] = &if_ast_node,
[AST_IF_BRANCH] = &if_branch_ast_node,
[AST_HASHTABLE] = &hashtable_ast_node,
[AST_HASHTABLE_ITEM] = &hashtable_item_ast_node,
[AST_OP] = &op_ast_node,
};
static const size_t nr_ast_node_definitions
= sizeof ast_node_defintions / sizeof ast_node_defintions[0];
struct ast_node *ast_node_create(enum ast_node_type type)
{
assert(type < nr_ast_node_definitions);
const struct ast_node_definition *def = ast_node_defintions[type];
struct ast_node *out = malloc(def->def_node_size);
if (!out) {
return NULL;
}
memset(out, 0x0, def->def_node_size);
out->n_type = type;
return out;
}
void ast_node_destroy(struct ast_node *node)
{
assert(node->n_type < nr_ast_node_definitions);
struct ast_iterator it = {0};
ast_iterator_enqueue(&it, node);
while (1) {
node = ast_iterator_peek(&it);
if (!node) {
break;
}
const struct ast_node_definition *def
= ast_node_defintions[node->n_type];
if (def->def_cleanup) {
def->def_cleanup(node);
}
ast_iterator_dequeue(&it);
free(node);
}
}
void ast_node_iterate(struct ast_node *node, struct ast_iterator *it)
{
ast_iterator_enqueue(it, node);
}
void ast_node_to_string(const struct ast_node *node, fx_bstr *out)
{
const struct ast_node_definition *def
= ast_node_defintions[node->n_type];
if (def->def_to_string) {
def->def_to_string(node, out);
}
}
#define ENUM_STR(x) \
case x: \
return #x
const char *ast_node_type_to_string(enum ast_node_type type)
{
switch (type) {
ENUM_STR(AST_NONE);
ENUM_STR(AST_NULL);
ENUM_STR(AST_STMT_LIST);
ENUM_STR(AST_INT);
ENUM_STR(AST_DOUBLE);
ENUM_STR(AST_WORD);
ENUM_STR(AST_STRING);
ENUM_STR(AST_FSTRING);
ENUM_STR(AST_VAR);
ENUM_STR(AST_VAR_SPLAT);
ENUM_STR(AST_FLAG);
ENUM_STR(AST_CMDCALL);
ENUM_STR(AST_PIPELINE);
ENUM_STR(AST_REDIRECTION);
ENUM_STR(AST_BLOCK);
ENUM_STR(AST_FUNC);
ENUM_STR(AST_IF);
ENUM_STR(AST_IF_BRANCH);
ENUM_STR(AST_OP);
ENUM_STR(AST_ARRAY);
ENUM_STR(AST_HASHTABLE);
ENUM_STR(AST_HASHTABLE_ITEM);
default:
return "<unknown>";
}
}
struct ast_node *ast_iterator_peek(struct ast_iterator *it)
{
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
if (!cur) {
return NULL;
}
return fx_unbox(struct ast_node, cur, n_it.e_entry);
}
struct ast_node *ast_iterator_dequeue(struct ast_iterator *it)
{
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
if (!cur) {
return NULL;
}
struct ast_node *node = fx_unbox(struct ast_node, cur, n_it.e_entry);
const struct ast_node_definition *def
= ast_node_defintions[node->n_type];
it->it_insert_after = cur;
if (def->def_collect_children) {
def->def_collect_children(node, it);
}
fx_queue_pop_front(&it->it_queue);
return fx_unbox(struct ast_node, cur, n_it.e_entry);
}
void ast_iterator_enqueue(struct ast_iterator *it, struct ast_node *node)
{
unsigned long new_depth = 0;
fx_queue_entry *cur = fx_queue_first(&it->it_queue);
if (cur) {
struct ast_node *cur_node
= fx_unbox(struct ast_node, cur, n_it.e_entry);
new_depth = cur_node->n_it.e_depth + 1;
}
node->n_it.e_depth = new_depth;
if (!it->it_insert_after) {
fx_queue_push_back(&it->it_queue, &node->n_it.e_entry);
return;
}
fx_queue_insert_after(
&it->it_queue,
&node->n_it.e_entry,
it->it_insert_after);
it->it_insert_after = &node->n_it.e_entry;
}
+196
View File
@@ -0,0 +1,196 @@
#ifndef AST_H_
#define AST_H_
#include "../status.h"
#include <fx/bstr.h>
#include <fx/queue.h>
struct lex_token;
enum ast_node_type {
AST_NONE = 0x00u,
AST_NULL,
AST_STMT_LIST,
AST_INT,
AST_DOUBLE,
AST_WORD,
AST_STRING,
AST_FSTRING,
AST_VAR,
AST_VAR_SPLAT,
AST_FLAG,
AST_CMDCALL,
AST_FUNCALL,
AST_PIPELINE,
AST_REDIRECTION,
AST_BLOCK,
AST_FUNC,
AST_ARRAY,
AST_HASHTABLE,
AST_HASHTABLE_ITEM,
AST_OP,
AST_IF,
AST_IF_BRANCH,
};
struct ast_iterator_entry {
fx_queue_entry e_entry;
unsigned long e_depth;
};
struct ast_node {
enum ast_node_type n_type;
struct ast_node *n_parent;
fx_queue_entry n_entry;
struct ast_iterator_entry n_it;
};
struct null_ast_node {
struct ast_node n_base;
};
struct int_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct double_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct word_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct string_ast_node {
struct ast_node n_base;
struct lex_token *n_value;
};
struct fstring_ast_node {
struct ast_node n_base;
fx_queue n_elements;
};
struct var_ast_node {
struct ast_node n_base;
struct lex_token *n_ident;
};
struct var_splat_ast_node {
struct ast_node n_base;
struct lex_token *n_ident;
};
struct cmdcall_ast_node {
struct ast_node n_base;
fx_queue n_args;
fx_queue n_redirect;
};
struct funcall_ast_node {
struct ast_node n_base;
struct ast_node *n_func;
fx_queue n_args;
};
struct pipeline_ast_node {
struct ast_node n_base;
fx_queue n_stages;
};
struct redirection_ast_node {
struct ast_node n_base;
bool n_append : 1;
bool n_out_is_fd : 1;
bool n_out_is_expr : 1;
unsigned int n_in, n_out;
struct ast_node *n_out_path_expr;
const char *n_out_path;
struct lex_token *n_out_tok;
};
struct stmt_list_ast_node {
struct ast_node n_base;
fx_queue n_statements;
};
struct block_ast_node {
struct ast_node n_base;
fx_queue n_statements;
};
struct func_ast_node {
struct ast_node n_base;
struct lex_token *n_name;
fx_queue n_params;
struct ast_node *n_body;
};
struct array_ast_node {
struct ast_node n_base;
fx_queue n_items;
};
struct hashtable_ast_node {
struct ast_node n_base;
fx_queue n_items;
};
struct hashtable_item_ast_node {
struct ast_node n_base;
struct ast_node *n_key, *n_value;
};
struct op_ast_node {
struct ast_node n_base;
const struct operator_info *n_op;
struct ast_node *n_left, *n_right;
};
struct if_branch_ast_node {
struct ast_node n_base;
struct ast_node *n_cond;
struct ast_node *n_body;
};
struct if_ast_node {
struct ast_node n_base;
fx_queue n_branches;
};
struct ast_iterator {
struct ast_node *it_cur;
fx_queue it_queue;
unsigned int it_depth;
fx_queue_entry *it_insert_after;
};
struct ast_node_definition {
enum ast_node_type def_id;
size_t def_node_size;
enum bshell_status (*def_collect_children)(
struct ast_node *,
struct ast_iterator *);
enum bshell_status (*def_cleanup)(struct ast_node *);
void (*def_to_string)(const struct ast_node *, fx_bstr *);
};
extern struct ast_node *ast_node_create(enum ast_node_type type);
extern void ast_node_destroy(struct ast_node *node);
extern void ast_node_iterate(struct ast_node *node, struct ast_iterator *it);
extern void ast_node_to_string(const struct ast_node *node, fx_bstr *out);
extern const char *ast_node_type_to_string(enum ast_node_type type);
extern struct ast_node *ast_iterator_peek(struct ast_iterator *it);
extern struct ast_node *ast_iterator_dequeue(struct ast_iterator *it);
extern void ast_iterator_enqueue(
struct ast_iterator *it,
struct ast_node *node);
#endif
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct block_ast_node *block = (struct block_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&block->n_statements);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition block_ast_node = {
.def_id = AST_BLOCK,
.def_node_size = sizeof(struct block_ast_node),
.def_collect_children = collect_children,
};
+31
View File
@@ -0,0 +1,31 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct cmdcall_ast_node *cmdcall = (struct cmdcall_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&cmdcall->n_args);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
cur = fx_queue_first(&cmdcall->n_redirect);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition cmdcall_ast_node = {
.def_id = AST_CMDCALL,
.def_node_size = sizeof(struct cmdcall_ast_node),
.def_collect_children = collect_children,
};
+6
View File
@@ -0,0 +1,6 @@
#include "ast.h"
struct ast_node_definition double_ast_node = {
.def_id = AST_DOUBLE,
.def_node_size = sizeof(struct double_ast_node),
};
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct fstring_ast_node *fstring = (struct fstring_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&fstring->n_elements);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition fstring_ast_node = {
.def_id = AST_FSTRING,
.def_node_size = sizeof(struct fstring_ast_node),
.def_collect_children = collect_children,
};
+36
View File
@@ -0,0 +1,36 @@
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct func_ast_node *func = (struct func_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&func->n_params);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
if (func->n_body) {
ast_iterator_enqueue(it, func->n_body);
}
return BSHELL_SUCCESS;
}
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct func_ast_node *func = (const struct func_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", func->n_name->tok_str);
}
struct ast_node_definition func_ast_node = {
.def_id = AST_FUNC,
.def_node_size = sizeof(struct func_ast_node),
.def_collect_children = collect_children,
.def_to_string = to_string,
};
+26
View File
@@ -0,0 +1,26 @@
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct hashtable_item_ast_node *item
= (struct hashtable_item_ast_node *)node;
if (item->n_key) {
ast_iterator_enqueue(it, item->n_key);
}
if (item->n_value) {
ast_iterator_enqueue(it, item->n_value);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition hashtable_item_ast_node = {
.def_id = AST_HASHTABLE_ITEM,
.def_node_size = sizeof(struct hashtable_item_ast_node),
.def_collect_children = collect_children,
};
+24
View File
@@ -0,0 +1,24 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct hashtable_ast_node *hashtable
= (struct hashtable_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&hashtable->n_items);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition hashtable_ast_node = {
.def_id = AST_HASHTABLE,
.def_node_size = sizeof(struct hashtable_ast_node),
.def_collect_children = collect_children,
};
+24
View File
@@ -0,0 +1,24 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct if_branch_ast_node *if_branch
= (struct if_branch_ast_node *)node;
if (if_branch->n_cond) {
ast_iterator_enqueue(it, if_branch->n_cond);
}
if (if_branch->n_body) {
ast_iterator_enqueue(it, if_branch->n_body);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition if_branch_ast_node = {
.def_id = AST_IF_BRANCH,
.def_node_size = sizeof(struct if_branch_ast_node),
.def_collect_children = collect_children,
};
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct if_ast_node *if_group = (struct if_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&if_group->n_branches);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition if_ast_node = {
.def_id = AST_IF,
.def_node_size = sizeof(struct if_ast_node),
.def_collect_children = collect_children,
};
+14
View File
@@ -0,0 +1,14 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
struct int_ast_node *i = (struct int_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%lld", i->n_value->tok_int);
}
struct ast_node_definition int_ast_node = {
.def_id = AST_INT,
.def_node_size = sizeof(struct int_ast_node),
.def_to_string = to_string,
};
+7
View File
@@ -0,0 +1,7 @@
#include "../parse/token.h"
#include "ast.h"
struct ast_node_definition null_ast_node = {
.def_id = AST_NULL,
.def_node_size = sizeof(struct null_ast_node),
};
+37
View File
@@ -0,0 +1,37 @@
#include "../operator.h"
#include "../parse/token.h"
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct op_ast_node *op = (struct op_ast_node *)node;
if (op->n_left) {
ast_iterator_enqueue(it, op->n_left);
}
if (op->n_right) {
ast_iterator_enqueue(it, op->n_right);
}
return BSHELL_SUCCESS;
}
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct op_ast_node *op = (const struct op_ast_node *)node;
fx_bstr_write_fmt(
out,
NULL,
"%s",
operator_id_to_string(op->n_op->op_id));
}
struct ast_node_definition op_ast_node = {
.def_id = AST_OP,
.def_node_size = sizeof(struct op_ast_node),
.def_collect_children = collect_children,
.def_to_string = to_string,
};
+23
View File
@@ -0,0 +1,23 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct pipeline_ast_node *pipeline = (struct pipeline_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&pipeline->n_stages);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition pipeline_ast_node = {
.def_id = AST_PIPELINE,
.def_node_size = sizeof(struct pipeline_ast_node),
.def_collect_children = collect_children,
};
+49
View File
@@ -0,0 +1,49 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct redirection_ast_node *redirection
= (struct redirection_ast_node *)node;
if (redirection->n_out_path_expr) {
ast_iterator_enqueue(it, redirection->n_out_path_expr);
}
return BSHELL_SUCCESS;
}
static void to_string(const struct ast_node *node, fx_bstr *out)
{
struct redirection_ast_node *redirection
= (struct redirection_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "&%u", redirection->n_in);
if (redirection->n_append) {
fx_bstr_write_fmt(out, NULL, " >>");
} else {
fx_bstr_write_fmt(out, NULL, " >");
}
if (redirection->n_out_is_fd) {
fx_bstr_write_fmt(out, NULL, " &");
} else {
fx_bstr_write_fmt(out, NULL, " ");
}
if (redirection->n_out_is_expr) {
fx_bstr_write_fmt(out, NULL, "<expr>");
} else if (redirection->n_out_path) {
fx_bstr_write_fmt(out, NULL, "'%s'", redirection->n_out_path);
} else {
fx_bstr_write_fmt(out, NULL, "%u", redirection->n_out);
}
}
struct ast_node_definition redirection_ast_node = {
.def_id = AST_REDIRECTION,
.def_node_size = sizeof(struct redirection_ast_node),
.def_collect_children = collect_children,
.def_to_string = to_string,
};
+24
View File
@@ -0,0 +1,24 @@
#include "ast.h"
static enum bshell_status collect_children(
struct ast_node *node,
struct ast_iterator *it)
{
struct stmt_list_ast_node *stmt_list
= (struct stmt_list_ast_node *)node;
fx_queue_entry *cur = fx_queue_first(&stmt_list->n_statements);
while (cur) {
struct ast_node *child
= fx_unbox(struct ast_node, cur, n_entry);
ast_iterator_enqueue(it, child);
cur = fx_queue_next(cur);
}
return BSHELL_SUCCESS;
}
struct ast_node_definition stmt_list_ast_node = {
.def_id = AST_STMT_LIST,
.def_node_size = sizeof(struct stmt_list_ast_node),
.def_collect_children = collect_children,
};
+15
View File
@@ -0,0 +1,15 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct string_ast_node *string
= (const struct string_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", string->n_value->tok_str);
}
struct ast_node_definition string_ast_node = {
.def_id = AST_STRING,
.def_node_size = sizeof(struct string_ast_node),
.def_to_string = to_string,
};
+14
View File
@@ -0,0 +1,14 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct var_ast_node *var = (const struct var_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", var->n_ident->tok_str);
}
struct ast_node_definition var_ast_node = {
.def_id = AST_VAR,
.def_node_size = sizeof(struct var_ast_node),
.def_to_string = to_string,
};
+14
View File
@@ -0,0 +1,14 @@
#include "../parse/token.h"
#include "ast.h"
static void to_string(const struct ast_node *node, fx_bstr *out)
{
const struct word_ast_node *word = (const struct word_ast_node *)node;
fx_bstr_write_fmt(out, NULL, "%s", word->n_value->tok_str);
}
struct ast_node_definition word_ast_node = {
.def_id = AST_WORD,
.def_node_size = sizeof(struct word_ast_node),
.def_to_string = to_string,
};
+149
View File
@@ -0,0 +1,149 @@
#include "debug.h"
#include "ast/ast.h"
#include "parse/token.h"
#include <fx/string.h>
#include <fx/term/print.h>
#include <stdio.h>
extern void print_lex_token(struct lex_token *tok)
{
printf("[%lu:%lu - %lu:%lu] ",
tok->tok_start.c_row,
tok->tok_start.c_col,
tok->tok_end.c_row,
tok->tok_end.c_col);
switch (tok->tok_type) {
case TOK_KEYWORD:
fx_puts("[magenta]");
break;
case TOK_SYMBOL:
fx_puts("[blue]");
break;
case TOK_INT:
case TOK_DOUBLE:
case TOK_VAR:
case TOK_VAR_SPLAT:
fx_puts("[yellow]");
break;
case TOK_OPERATOR:
fx_puts("[red]");
break;
case TOK_WORD:
case TOK_WORD_START:
case TOK_WORD_END:
fx_puts("[cyan]");
break;
case TOK_STRING:
case TOK_STR_START:
case TOK_STR_END:
fx_puts("[green]");
break;
case TOK_LINEFEED:
fx_puts("[dark_grey]");
break;
default:
break;
}
fx_puts(token_type_to_string(tok->tok_type));
switch (tok->tok_type) {
case TOK_WORD:
case TOK_FLAG:
case TOK_STRING:
case TOK_VAR:
case TOK_VAR_SPLAT:
printf("(%s)", tok->tok_str);
break;
case TOK_OPERATOR:
printf("(%s)", token_operator_to_string(tok->tok_operator));
break;
case TOK_SYMBOL:
printf("(%s)", token_symbol_to_string(tok->tok_symbol));
break;
case TOK_KEYWORD:
printf("(%s)", token_keyword_to_string(tok->tok_keyword));
break;
case TOK_INT:
printf("(%lld)", tok->tok_int);
break;
case TOK_DOUBLE:
printf("(%lf)", tok->tok_double);
break;
default:
break;
}
fx_puts("[reset]\n");
}
void print_ast_node(struct ast_node *node)
{
struct ast_iterator it = {0};
ast_node_iterate(node, &it);
while (1) {
node = ast_iterator_peek(&it);
if (!node) {
break;
}
for (unsigned long i = 0; i < node->n_it.e_depth; i++) {
fx_puts(" ");
}
switch (node->n_type) {
case AST_IF:
case AST_IF_BRANCH:
case AST_BLOCK:
case AST_FUNC:
case AST_STMT_LIST:
fx_puts("[magenta]");
break;
case AST_REDIRECTION:
case AST_PIPELINE:
case AST_OP:
case AST_ARRAY:
case AST_HASHTABLE:
case AST_HASHTABLE_ITEM:
fx_puts("[blue]");
break;
case AST_CMDCALL:
case AST_NULL:
fx_puts("[red]");
break;
case AST_INT:
case AST_DOUBLE:
case AST_VAR:
fx_puts("[yellow]");
break;
case AST_WORD:
fx_puts("[cyan]");
break;
case AST_STRING:
case AST_FSTRING:
fx_puts("[green]");
break;
default:
break;
}
fx_printf("%s", ast_node_type_to_string(node->n_type));
char s[128] = {0};
fx_bstr str;
fx_bstr_begin(&str, s, sizeof s);
ast_node_to_string(node, &str);
if (fx_bstr_get_size(&str)) {
fx_printf("(%s)", fx_bstr_end(&str));
}
fx_printf("[reset]\n");
ast_iterator_dequeue(&it);
}
}
+12
View File
@@ -0,0 +1,12 @@
#ifndef DEBUG_H_
#define DEBUG_H_
#include <stdbool.h>
struct ast_node;
struct lex_token;
extern void print_lex_token(struct lex_token *tok);
extern void print_ast_node(struct ast_node *node);
#endif
+116
View File
@@ -0,0 +1,116 @@
#include "file.h"
#include "line-source.h"
#include <errno.h>
#include <fx/collections/array.h>
#include <fx/string.h>
#include <stdlib.h>
#include <string.h>
static enum bshell_status get_name(
struct line_source *src,
char *buf,
size_t count,
size_t *nr_read)
{
struct file *f = (struct file *)src;
*nr_read = snprintf(buf, count, "%s", f->f_path);
return BSHELL_SUCCESS;
}
static enum bshell_status get_row(
struct line_source *src,
size_t row,
char *buf,
size_t count,
size_t *nr_read)
{
struct file *f = (struct file *)src;
size_t nr_rows = fx_array_size(f->f_lines);
if (row > nr_rows) {
return BSHELL_ERR_EOF;
}
fx_string *line = fx_array_at(f->f_lines, row - 1);
const char *line_str = fx_string_get_cstr(line);
size_t line_len = fx_string_get_size(line, FX_STRLEN_NORMAL);
size_t copy_len = fx_min(ulong, count, line_len);
memcpy(buf, line_str, copy_len);
buf[copy_len] = 0;
buf[strcspn(buf, "\n")] = 0;
*nr_read = copy_len;
return BSHELL_SUCCESS;
}
static enum bshell_status readline(
struct line_source *src,
fx_stringstream *out)
{
struct file *f = (struct file *)src;
fx_wchar c = FX_WCHAR_INVALID;
size_t nr_read = 0;
while (1) {
fx_status status = fx_stream_read_char(f->f_strp, &c);
if (!FX_OK(status)) {
break;
}
fx_stream_write_char(out, c);
nr_read++;
if (c == '\n') {
break;
}
}
if (nr_read == 0) {
return BSHELL_ERR_EOF;
}
return BSHELL_SUCCESS;
}
enum bshell_status file_open(const char *path, struct file **out)
{
FILE *fp = fopen(path, "r");
if (!fp) {
return bshell_status_from_errno(errno);
}
fx_stream *strp = fx_stream_open_fp(fp);
struct file *file = malloc(sizeof *file);
if (!file) {
fclose(fp);
return BSHELL_ERR_NO_MEMORY;
}
memset(file, 0x0, sizeof *file);
file->f_base.s_get_name = get_name;
file->f_base.s_get_row = get_row;
file->f_base.s_readline = readline;
file->f_fp = fp;
file->f_strp = strp;
file->f_path = fx_strdup(path);
file->f_lines = fx_array_create();
*out = file;
return BSHELL_SUCCESS;
}
void file_close(struct file *file)
{
fx_stream_unref(file->f_strp);
fx_array_unref(file->f_lines);
free(file->f_path);
fclose(file->f_fp);
free(file);
}
+20
View File
@@ -0,0 +1,20 @@
#ifndef FILE_H_
#define FILE_H_
#include "line-source.h"
#include <fx/collections/array.h>
#include <stdio.h>
struct file {
struct line_source f_base;
fx_array *f_lines;
char *f_path;
fx_stream *f_strp;
FILE *f_fp;
};
extern enum bshell_status file_open(const char *path, struct file **out);
extern void file_close(struct file *file);
#endif
+40
View File
@@ -0,0 +1,40 @@
#include "buffer.h"
#include "line-ed.h"
const char *line_start(struct line_ed *ed, size_t y)
{
const char *line = ed->l_buf;
for (size_t i = 0; i < y; i++) {
line += strcspn(line, "\n");
if (*line == '\n') {
line++;
}
}
return line;
}
size_t line_length(struct line_ed *ed, size_t y)
{
const char *line = ed->l_buf;
for (size_t i = 0; i < y; i++) {
line += strcspn(line, "\n");
if (*line == '\n') {
line++;
}
}
if (*line == '\0') {
return 0;
}
size_t len = strcspn(line, "\n");
if (line[len] == '\n') {
len++;
}
return len;
}
+16
View File
@@ -0,0 +1,16 @@
#ifndef LINE_ED_BUFFER_H_
#define LINE_ED_BUFFER_H_
#include <stddef.h>
struct line_ed;
/* returns a pointer to the start of the line based on the given `y`
* coordinate */
extern const char *line_start(struct line_ed *ed, size_t y);
/* returns the length of the line based on the given `y` coordinate.
* for any line other than the last line in the buffer, this length
* INCLUDES the trailing linefeed. */
extern size_t line_length(struct line_ed *ed, size_t y);
#endif
+26
View File
@@ -0,0 +1,26 @@
#include "line-ed.h"
#include "cursor.h"
#include "prompt.h"
void line_ed_coords_to_physical_coords(
struct line_ed *ed, size_t x, size_t y, size_t *out_x, size_t *out_y)
{
size_t prompt_len = 0;
if (ed->l_cursor_y == 0) {
prompt_len = prompt_length(ed, PROMPT_MAIN);
} else if (ed->l_cursor_y <= ed->l_continuations) {
prompt_len = prompt_length(ed, PROMPT_CONT);
}
if (y == 0) {
x += prompt_len;
}
if (out_x) {
*out_x = x;
}
if (out_y) {
*out_y = y;
}
}
+9
View File
@@ -0,0 +1,9 @@
#ifndef LINE_ED_CURSOR_H_
#define LINE_ED_CURSOR_H_
struct line_ed;
extern void line_ed_coords_to_physical_coords(
struct line_ed *ed, size_t x, size_t y, size_t *out_x, size_t *out_y);
#endif
+72
View File
@@ -0,0 +1,72 @@
#include "../misc.h"
#include "line-ed.h"
#include <fx/collections/array.h>
#include <fx/string.h>
void alloc_empty_history_entry(struct line_ed *ed)
{
fx_string *str = (fx_string *)fx_array_at(
ed->l_history,
fx_array_size(ed->l_history) - 1);
if (!str || fx_string_get_size(str, FX_STRLEN_NORMAL) > 0) {
str = fx_string_create();
fx_array_append(ed->l_history, (fx_object *)str);
}
ed->l_history_pos = fx_array_size(ed->l_history) - 1;
}
void save_buf_to_history(struct line_ed *ed)
{
fx_string *cur
= (fx_string *)fx_array_get(ed->l_history, ed->l_history_pos);
fx_string_replace_all(cur, ed->l_buf);
}
void append_buf_to_history(struct line_ed *ed)
{
fx_string *cur
= (fx_string *)fx_array_get(ed->l_history, ed->l_history_pos);
char s[] = {'\n', 0};
fx_string_append_cstr(cur, s);
fx_string_append_cstr(cur, ed->l_buf);
}
void load_buf_from_history(struct line_ed *ed)
{
fx_string *cur
= (fx_string *)fx_array_at(ed->l_history, ed->l_history_pos);
size_t len
= MIN((size_t)(ed->l_buf_end - ed->l_buf - 1),
fx_string_get_size(cur, FX_STRLEN_NORMAL));
memcpy(ed->l_buf, fx_string_get_cstr(cur), len);
ed->l_buf[len] = '\0';
unsigned int x = 0, y = 0;
for (size_t i = 0; ed->l_buf[i]; i++) {
if (ed->l_buf[i] == '\n') {
x = 0;
y++;
} else {
x++;
}
}
ed->l_buf_ptr = ed->l_buf + len;
ed->l_line_end = ed->l_buf_ptr;
ed->l_cursor_x = x;
ed->l_cursor_y = y;
}
const char *last_history_line(struct line_ed *ed)
{
size_t nlines = fx_array_size(ed->l_history);
if (nlines < 2) {
return NULL;
}
fx_string *last = (fx_string *)fx_array_at(ed->l_history, nlines - 2);
return fx_string_get_cstr(last);
}
+12
View File
@@ -0,0 +1,12 @@
#ifndef LINE_ED_HISTORY_H_
#define LINE_ED_HISTORY_H_
struct line_ed;
extern void alloc_empty_history_entry(struct line_ed *ed);
extern void save_buf_to_history(struct line_ed *ed);
extern void append_buf_to_history(struct line_ed *ed);
extern void load_buf_from_history(struct line_ed *ed);
extern const char *last_history_line(struct line_ed *ed);
#endif
+41
View File
@@ -0,0 +1,41 @@
#include "hook.h"
#include "line-ed.h"
void line_ed_add_hook(struct line_ed *ed, struct line_ed_hook *hook)
{
fx_queue_push_back(&ed->l_hooks, &hook->hook_entry);
}
void line_ed_remove_hook(struct line_ed *ed, struct line_ed_hook *hook)
{
fx_queue_delete(&ed->l_hooks, &hook->hook_entry);
}
void hook_keypress(struct line_ed *ed, fx_keycode key)
{
fx_queue_entry *entry = fx_queue_first(&ed->l_hooks);
while (entry) {
struct line_ed_hook *hook
= fx_unbox(struct line_ed_hook, entry, hook_entry);
if (hook->hook_keypress) {
hook->hook_keypress(ed, hook, key);
}
entry = fx_queue_next(entry);
}
}
void hook_buffer_modified(struct line_ed *ed)
{
fx_queue_entry *entry = fx_queue_first(&ed->l_hooks);
while (entry) {
struct line_ed_hook *hook
= fx_unbox(struct line_ed_hook, entry, hook_entry);
if (hook->hook_buffer_modified) {
hook->hook_buffer_modified(ed, hook);
}
entry = fx_queue_next(entry);
}
}
+16
View File
@@ -0,0 +1,16 @@
#ifndef LINE_ED_HOOK_H_
#define LINE_ED_HOOK_H_
#include <fx/term/tty.h>
enum hook_id {
HOOK_KEYPRESS,
HOOK_BEFORE_PAINT,
};
struct line_ed;
extern void hook_keypress(struct line_ed *ed, fx_keycode key);
extern void hook_buffer_modified(struct line_ed *ed);
#endif
+218
View File
@@ -0,0 +1,218 @@
#include "buffer.h"
#include "cursor.h"
#include "history.h"
#include "hook.h"
#include "line-ed.h"
#include "prompt.h"
#include "refresh.h"
#include <stdio.h>
void put_char(struct line_ed *ed, char c)
{
if (ed->l_buf_ptr > ed->l_line_end + 1) {
return;
}
struct refresh_state state = {
.r_prev_cursor_x = ed->l_cursor_x,
.r_prev_cursor_y = ed->l_cursor_y,
};
size_t prev_cursor = ed->l_buf_ptr - ed->l_buf;
char *dest = ed->l_buf_ptr;
size_t len = ed->l_line_end - ed->l_buf_ptr + 1;
if (dest < ed->l_line_end) {
memmove(dest + 1, dest, len);
}
ed->l_cursor_x++;
ed->l_line_end++;
ed->l_buf_ptr++;
*dest = c;
if (ed->l_buf_ptr == ed->l_line_end) {
*ed->l_buf_ptr = '\0';
}
hook_buffer_modified(ed);
put_refresh(ed, &state);
}
static void backspace_simple(struct line_ed *ed)
{
if (ed->l_buf_ptr == ed->l_buf) {
return;
}
struct refresh_state state = {
.r_prev_cursor_x = ed->l_cursor_x,
.r_prev_cursor_y = ed->l_cursor_y,
};
size_t prev_cursor = ed->l_buf_ptr - ed->l_buf;
char *dest = ed->l_buf_ptr;
size_t len = ed->l_line_end - ed->l_buf_ptr + 1;
memmove(dest - 1, dest, len);
ed->l_cursor_x--;
ed->l_line_end--;
ed->l_buf_ptr--;
hook_buffer_modified(ed);
backspace_simple_refresh(ed, &state);
}
static void backspace_nl(struct line_ed *ed)
{
size_t prev_line_len = line_length(ed, ed->l_cursor_y - 1);
struct refresh_state state = {
.r_prev_cursor_x = ed->l_cursor_x,
.r_prev_cursor_y = ed->l_cursor_y,
.r_prev_line_len = prev_line_len,
};
char *dest = ed->l_buf_ptr;
size_t len = ed->l_line_end - ed->l_buf_ptr + 1;
memmove(dest - 1, dest, len);
ed->l_cursor_x = prev_line_len - 1;
ed->l_cursor_y--;
ed->l_buf_ptr--;
ed->l_line_end--;
hook_buffer_modified(ed);
backspace_nl_refresh(ed, &state);
}
void backspace(struct line_ed *ed)
{
if (ed->l_buf_ptr == ed->l_buf) {
return;
}
if (ed->l_cursor_x == 0 && ed->l_cursor_y <= ed->l_continuations) {
return;
}
if (ed->l_cursor_x == 0 && ed->l_cursor_y > 0) {
backspace_nl(ed);
} else {
backspace_simple(ed);
}
}
void cursor_left(struct line_ed *ed)
{
if (ed->l_cursor_x != 0) {
//fputs("\010", stdout);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_CURSOR, -1);
fflush(stdout);
ed->l_cursor_x--;
ed->l_buf_ptr--;
return;
}
if (ed->l_cursor_y <= ed->l_continuations || ed->l_buf_ptr <= ed->l_buf) {
return;
}
ed->l_cursor_y--;
ed->l_buf_ptr--;
size_t prompt_len = 0;
if (ed->l_cursor_y == 0) {
prompt_len = prompt_length(ed, PROMPT_MAIN);
}
size_t len = line_length(ed, ed->l_cursor_y);
ed->l_cursor_x = len - 1;
//printf("\033[A\033[%dG", len + prompt_len);
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, -1);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_START, (int)(len + prompt_len));
fflush(stdout);
}
void cursor_right(struct line_ed *ed)
{
if (ed->l_buf_ptr >= ed->l_line_end) {
return;
}
if (*ed->l_buf_ptr != '\n') {
ed->l_cursor_x++;
ed->l_buf_ptr++;
//fputs("\033[C", stdout);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_CURSOR, 1);
fflush(stdout);
return;
}
if (ed->l_buf_ptr >= ed->l_line_end) {
return;
}
ed->l_cursor_y++;
ed->l_cursor_x = 0;
ed->l_buf_ptr++;
//printf("\033[B\033[G");
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, 1);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_START, 0);
fflush(stdout);
}
void arrow_up(struct line_ed *ed)
{
if (ed->l_history_pos == 0) {
return;
}
if (ed->l_cursor_y > 0) {
//printf("\033[%uA", ed->l_cursor_y);
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, (long long)ed->l_cursor_y);
}
//printf("\033[%zuG\033[J", prompt_length(ed, PROMPT_MAIN) + 1);
fx_tty_move_cursor_x(
ed->l_tty, FX_TTY_POS_START, (long long)prompt_length(ed, PROMPT_MAIN));
fx_tty_clear(ed->l_tty, FX_TTY_CLEAR_SCREEN | FX_TTY_CLEAR_FROM_CURSOR);
save_buf_to_history(ed);
ed->l_history_pos--;
load_buf_from_history(ed);
print_buffer(ed);
fflush(stdout);
}
void arrow_down(struct line_ed *ed)
{
if (ed->l_history_pos == fx_array_size(ed->l_history) - 1) {
return;
}
if (ed->l_cursor_y > 0) {
//printf("\033[%uA", ed->l_cursor_y);
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, (int)ed->l_cursor_y);
}
//printf("\033[%zuG\033[J", prompt_length(ed, PROMPT_MAIN) + 1);
fx_tty_move_cursor_x(
ed->l_tty, FX_TTY_POS_START, prompt_length(ed, PROMPT_MAIN) + 1);
save_buf_to_history(ed);
ed->l_history_pos++;
load_buf_from_history(ed);
print_buffer(ed);
fflush(stdout);
}
+13
View File
@@ -0,0 +1,13 @@
#ifndef LINE_ED_INPUT_H_
#define LINE_ED_INPUT_H_
struct line_ed;
extern void put_char(struct line_ed *ed, char c);
extern void backspace(struct line_ed *ed);
extern void cursor_left(struct line_ed *ed);
extern void cursor_right(struct line_ed *ed);
extern void arrow_up(struct line_ed *ed);
extern void arrow_down(struct line_ed *ed);
#endif
+301
View File
@@ -0,0 +1,301 @@
#include "line-ed.h"
#include "history.h"
#include "hook.h"
#include "input.h"
#include "prompt.h"
#include <ctype.h>
#include <fx/term/tty.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#define LINE_ED_FROM_LEX_SOURCE(p) \
((struct line_ed *)((char *)p \
- offsetof(struct line_ed, l_line_source)))
static enum bshell_status readline(
struct line_source *src,
fx_stringstream *out)
{
struct line_ed *ed = LINE_ED_FROM_LEX_SOURCE(src);
long r = line_ed_readline(ed, out);
line_ed_set_scope_type(ed, NULL);
if (r < 0) {
return BSHELL_ERR_EOF;
}
return BSHELL_SUCCESS;
}
struct line_ed *line_ed_create(void)
{
struct line_ed *out = malloc(sizeof *out);
if (!out) {
return NULL;
}
memset(out, 0x0, sizeof *out);
out->l_buf = malloc(LINE_MAX);
if (!out->l_buf) {
free(out);
return NULL;
}
out->l_history = fx_array_create();
if (!out->l_history) {
free(out->l_buf);
free(out);
return NULL;
}
out->l_tty = fx_stdtty;
out->l_buf_end = out->l_buf + LINE_MAX;
out->l_buf_ptr = out->l_buf;
out->l_line_end = out->l_buf;
out->l_prompt[PROMPT_MAIN] = ">>> ";
out->l_prompt[PROMPT_CONT] = "> ";
out->l_line_source.s_readline = readline;
return out;
}
void line_ed_destroy(struct line_ed *ed)
{
fx_array_unref(ed->l_history);
free(ed->l_buf);
free(ed);
}
void line_ed_set_flags(struct line_ed *ed, enum line_ed_flags flags)
{
ed->l_flags |= flags;
}
void line_ed_set_scope_type(struct line_ed *ed, const char *scope_type)
{
ed->l_scope_type = scope_type;
}
static void clear_buffer(struct line_ed *ed)
{
memset(ed->l_buf, 0x0, ed->l_buf_end - ed->l_buf);
ed->l_buf_ptr = ed->l_buf;
ed->l_line_end = ed->l_buf;
ed->l_cursor_x = 0;
ed->l_cursor_y = 0;
ed->l_continuations = 0;
}
static void convert_continuation_feeds(char *s, size_t max)
{
char *end = s + max;
size_t len = strlen(s);
while (1) {
size_t r = strcspn(s, "\\");
if (s + r > end) {
break;
}
s += r;
len -= r;
if (*s == '\0') {
break;
}
if (*(s + 1) != '\n') {
s++;
continue;
}
memmove(s, s + 1, len);
s += 1;
}
}
static void remove_continuation_feeds(char *s, size_t max)
{
char *end = s + max;
size_t len = strlen(s);
while (1) {
size_t r = strcspn(s, "\\");
if (s + r > end) {
break;
}
s += r;
len -= r;
if (*s == '\0') {
break;
}
if (*(s + 1) != '\n') {
s++;
continue;
}
memmove(s, s + 2, len);
s += 2;
}
}
static bool input_is_empty(struct line_ed *ed)
{
const char *p = ed->l_buf;
while (p < ed->l_line_end) {
if (!isspace(*p)) {
return false;
}
}
return true;
}
size_t line_ed_readline(struct line_ed *ed, fx_stringstream *out)
{
clear_buffer(ed);
bool append_history = false;
size_t append_to_index = (size_t)-1;
if (!ed->l_scope_type) {
alloc_empty_history_entry(ed);
} else {
append_history = true;
append_to_index = ed->l_history_pos;
}
fx_tty *tty = ed->l_tty;
fx_tty_set_mode(tty, FX_TTY_RAW);
show_prompt(ed);
for (int i = 0; ed->l_buf[i]; i++) {
if (ed->l_buf[i] == '\n') {
fputc('\r', stdout);
}
fputc(ed->l_buf[i], stdout);
}
fflush(stdout);
bool end = false;
bool eof = false;
while (!end) {
fx_keycode key = fx_tty_read_key(tty);
hook_keypress(ed, key);
if (key == FX_TTY_CTRL_KEY('d')) {
if (!input_is_empty(ed)) {
continue;
}
eof = true;
break;
}
if (key & FX_MOD_CTRL) {
continue;
}
switch (key) {
case FX_KEY_RETURN:
fx_tty_reset_vmode(tty);
if (ed->l_line_end > ed->l_buf
&& *(ed->l_line_end - 1) != '\\') {
end = true;
break;
}
if (input_is_empty(ed)) {
fputc('\r', stdout);
fputc('\n', stdout);
clear_buffer(ed);
show_prompt(ed);
break;
}
*ed->l_line_end = '\n';
ed->l_line_end++;
ed->l_buf_ptr = ed->l_line_end;
ed->l_cursor_x = 0;
ed->l_cursor_y++;
ed->l_continuations++;
fputc('\r', stdout);
fputc('\n', stdout);
// fputs("\033[G\n", stdout);
show_prompt(ed);
break;
case FX_KEY_BACKSPACE:
backspace(ed);
break;
case FX_KEY_ARROW_LEFT:
cursor_left(ed);
break;
case FX_KEY_ARROW_RIGHT:
cursor_right(ed);
break;
case FX_KEY_ARROW_UP:
arrow_up(ed);
break;
case FX_KEY_ARROW_DOWN:
arrow_down(ed);
break;
default:
if (iswgraph(key) || key == ' ') {
put_char(ed, key);
}
break;
}
}
fx_tty_set_mode(tty, FX_TTY_CANONICAL);
fputc('\n', stdout);
if (*ed->l_buf == '\0') {
return eof ? -1 : 0;
}
if (append_history) {
ed->l_history_pos = append_to_index;
append_buf_to_history(ed);
} else {
ed->l_history_pos = fx_array_size(ed->l_history) - 1;
const char *last = last_history_line(ed);
if (!last || strcmp(last, ed->l_buf) != 0) {
save_buf_to_history(ed);
}
}
size_t sz = ed->l_line_end - ed->l_buf + 1;
if ((ed->l_flags & LINE_ED_REMOVE_CONTINUATIONS)
== LINE_ED_REMOVE_CONTINUATIONS) {
remove_continuation_feeds(ed->l_buf, sz);
} else if (
(ed->l_flags & LINE_ED_CONVERT_CONTINUATIONS)
== LINE_ED_CONVERT_CONTINUATIONS) {
convert_continuation_feeds(ed->l_buf, sz);
}
fx_stream_write_cstr(out, ed->l_buf, NULL);
fx_stream_write_char(out, '\n');
return sz;
}
+107
View File
@@ -0,0 +1,107 @@
#ifndef LINE_ED_H_
#define LINE_ED_H_
#define LINE_MAX 4096
#include "../line-source.h"
#include <fx/collections/array.h>
#include <fx/queue.h>
#include <fx/term/tty.h>
#include <stddef.h>
struct s_tty;
struct fx_tty_vmode;
struct line_ed;
struct line_ed_hook {
void (*hook_keypress)(
struct line_ed *,
struct line_ed_hook *,
fx_keycode);
void (*hook_buffer_modified)(struct line_ed *, struct line_ed_hook *);
fx_queue_entry hook_entry;
};
enum line_ed_flags {
/* always reprint an entire line when a character is added/deleted.
* without this flag, only the modified character any subsequent
* characters are reprinted. */
LINE_ED_FULL_REPRINT = 0x01u,
/* keep line continuation (backslash-newline) tokens in the output
* buffer (default behaviour) */
LINE_ED_KEEP_CONTINUATIONS = 0x00u,
/* convert line continuation tokens in the output buffer to simple
* linefeeds. */
LINE_ED_CONVERT_CONTINUATIONS = 0x02u,
/* remove line continuation tokens from the output buffer, so that all
* chars are on a single line */
LINE_ED_REMOVE_CONTINUATIONS = 0x06u,
};
struct line_ed {
enum line_ed_flags l_flags;
/* array of basic prompt strings */
const char *l_prompt[2];
/* input buffer, pointer to the buffer cell that corresponds to
* the current cursor position, and pointer to the byte AFTER the last
* usable byte in the buffer */
char *l_buf, *l_buf_ptr, *l_buf_end;
/* pointer to the byte AFTER the last byte of the user's input */
char *l_line_end;
/* 2-dimensional coordinates of the current cursor position.
* this does NOT include any prompts that are visible on the terminal */
size_t l_cursor_x, l_cursor_y;
/* the number of line continuations that have been inputted */
unsigned int l_continuations;
struct line_source l_line_source;
/* pointer to tty interface */
fx_tty *l_tty;
/* the lexical scope that we are currently in.
* this is provided by components further up the input pipeline,
* for example, when we are inside a string or if-statement. */
const char *l_scope_type;
/* array of previously entered commands */
fx_array *l_history;
/* index of the currently selected history entry */
size_t l_history_pos;
/* list of defined highlight ranges */
fx_queue l_hl_ranges;
/* list of installed hooks */
fx_queue l_hooks;
};
extern struct line_ed *line_ed_create(void);
extern void line_ed_destroy(struct line_ed *ed);
extern void line_ed_set_flags(struct line_ed *ed, enum line_ed_flags flags);
extern void line_ed_set_scope_type(struct line_ed *ed, const char *scope_type);
extern void line_ed_put_highlight(
struct line_ed *ed,
unsigned long start_x,
unsigned long start_y,
unsigned long end_x,
unsigned long end_y,
const struct fx_tty_vmode *vmode);
extern void line_ed_clear_highlights(struct line_ed *ed);
extern void line_ed_print_highlights(struct line_ed *ed);
extern void line_ed_add_hook(struct line_ed *ed, struct line_ed_hook *hook);
extern void line_ed_remove_hook(struct line_ed *ed, struct line_ed_hook *hook);
extern size_t line_ed_readline(struct line_ed *ed, fx_stringstream *out);
static inline struct line_source *line_ed_to_line_source(struct line_ed *ed)
{
return &ed->l_line_source;
}
#endif
+27
View File
@@ -0,0 +1,27 @@
#include <stdio.h>
#include "line-ed.h"
#include "prompt.h"
void show_prompt(struct line_ed *ed)
{
int type = PROMPT_MAIN;
if (ed->l_scope_type) {
type = PROMPT_CONT;
/* this is a temporary solution to show the current
* scope type, until prompts are implemented properly. */
fputs(ed->l_scope_type, stdout);
}
if (ed->l_continuations > 0) {
type = PROMPT_CONT;
}
fputs(ed->l_prompt[type], stdout);
fflush(stdout);
}
size_t prompt_length(struct line_ed *ed, int prompt_id)
{
return strlen(ed->l_prompt[prompt_id]);
}
+12
View File
@@ -0,0 +1,12 @@
#ifndef LINE_ED_PROMPT_H_
#define LINE_ED_PROMPT_H_
#define PROMPT_MAIN 0
#define PROMPT_CONT 1
struct line_ed;
extern void show_prompt(struct line_ed *ed);
extern size_t prompt_length(struct line_ed *ed, int prompt_id);
#endif
+265
View File
@@ -0,0 +1,265 @@
#include "refresh.h"
#include "buffer.h"
#include "cursor.h"
#include "line-ed.h"
#include <fx/term/tty.h>
#include <stdio.h>
#include <stdlib.h>
/* prints the provided string to the terminal, applying any relevant highlight
* ranges. this function prints all characters in `s` until it encounters a null
* char (\0) or linefeed (\n).
*
* the (x, y) coordinates provided should be the data coordinates of the
* first character in `s`.
*/
void print_text(struct line_ed *ed, size_t x, size_t y, const char *s)
{
fx_tty *tty = ed->l_tty;
for (size_t i = 0; s[i] != '\n' && s[i] != '\0'; i++) {
fputc(s[i], stdout);
}
}
void print_buffer(struct line_ed *ed)
{
const char *line_buf = ed->l_buf;
size_t line_len = strcspn(line_buf, "\n");
unsigned int y = 0;
while (1) {
print_text(ed, 0, y, line_buf);
line_buf += line_len;
if (*line_buf == '\n') {
line_buf++;
}
if (*line_buf == '\0') {
break;
}
y++;
line_len = strcspn(line_buf, "\n");
fputc('\r', stdout);
fputc('\n', stdout);
}
}
/* this function is called after a character is inserted into the line_ed
*buffer. the function performs the following steps:
* 1. get a pointer to the start of the line that was modified.
* 2. determine the first character in the line that needs to be redrawn.
* this may result in an append, a partial reprint, or a full reprint.
* 3. re-print the relevant portion of the buffer:
* for an append (a character added to the end of the line):
* * write the inserted char to the terminal.
* * done.
* for a partial reprint:
* * clear all printed chars from the logical cursor position to
*the end of the line.
* * print the portion of the line corresponding to the cleared
*section.
* * move the physical (terminal) cursor backwards until its
*position matches the logical (line_ed) cursor. for a full reprint:
* * same as a partial reprint except that, rather than reprinting
* from the logical cursor position, the entire line is
*reprinted.
*/
void put_refresh(struct line_ed *ed, struct refresh_state *state)
{
/* get the data for the line that is being refreshed */
const char *line_buf = line_start(ed, ed->l_cursor_y);
size_t line_len = strcspn(line_buf, "\n");
/* the index of the first char in line_buf that needs to be reprinted */
size_t start_x = state->r_prev_cursor_x;
/* the distance between the first char to be reprinted and the end
* of the line.
* the physical cursor will be moved back by this amount after the
* line is reprinted. */
long cursor_rdelta = (long)(line_len - start_x);
if (ed->l_flags & LINE_ED_FULL_REPRINT) {
if (start_x) {
// fprintf(stdout, "\033[%uD", start_x);
fx_tty_move_cursor_x(
ed->l_tty,
FX_TTY_POS_CURSOR,
-(long long)start_x);
}
start_x = 0;
}
print_text(ed, start_x, ed->l_cursor_y, line_buf + start_x);
/* decrement the rdelta (move the cursor back one fewer cells),
* so that the physical cursor will be placed AFTER the character that
* was just inserted. */
cursor_rdelta--;
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_CURSOR, -cursor_rdelta);
#if 0
for (unsigned int i = 0; i < cursor_rdelta; i++) {
fputs("\010", stdout);
}
#endif
fflush(stdout);
}
/* this function is called after a character is removed from the line_ed buffer.
* IF the character was a linefeed.
*
* this is separate from backspace_simple_refresh because, in this situation,
* the cursor position depends on the length of the previous line before
* the linefeed was deleted, and we have to reprint every line following the
* two that were combined.
*/
void backspace_nl_refresh(struct line_ed *ed, struct refresh_state *state)
{
/* get the data for the line that is being refreshed.
* relative to where the cursor was before the linefeed was deleted,
* this is the PREVIOUS line. */
const char *line_buf = line_start(ed, ed->l_cursor_y);
size_t line_len = strcspn(line_buf, "\n");
/* the index of the first char in line_buf that needs to be reprinted.
* subtract one to account for the linefeed that has since been deleted.
*/
size_t start_x = state->r_prev_line_len - 1;
/* the column to move the physical cursor to after it has been moved
* to the previous line.
* NOTE that this number includes the length of the prompt!
* we add 1 to start_x to ensure that the cursor is moved to the cell
* AFTER the last char of the line. */
size_t new_x;
line_ed_coords_to_physical_coords(
ed,
start_x + 1,
ed->l_cursor_y,
&new_x,
NULL);
/* the physical cursor is currently at the beginning of the line that
* has just been moved up. from here, clear this line and the rest
* from the screen. */
// fputs("\033[J", stdout);
fx_tty_clear(ed->l_tty, FX_TTY_CLEAR_SCREEN | FX_TTY_CLEAR_FROM_CURSOR);
if (ed->l_flags & LINE_ED_FULL_REPRINT) {
/* next, move the physical cursor up and to the beginning of the
* previous line */
size_t tmp_x;
line_ed_coords_to_physical_coords(
ed,
0,
ed->l_cursor_y,
&tmp_x,
NULL);
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, -1);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_START, tmp_x);
// fprintf(stdout, "\033[A\033[%uG", tmp_x + 1);
start_x = 0;
} else {
/* next, move the physical cursor up and to the end of the
* previous line */
// fprintf(stdout, "\033[A\033[%uG", new_x);
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, -1);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_START, new_x);
}
/* now reprint all of the buffer lines, starting with the first of the
* two lines that were concatenated. */
size_t ydiff = 0;
while (1) {
print_text(
ed,
start_x,
ed->l_cursor_y + ydiff,
line_buf + start_x);
line_buf += line_len + 1;
line_len = strcspn(line_buf, "\n");
start_x = 0;
if (*line_buf == '\0') {
break;
}
fputc('\r', stdout);
fputc('\n', stdout);
ydiff++;
}
/* finally, move the cursor BACK to the point where the two lines
* were concatenated. */
if (ydiff) {
// fprintf(stdout, "\033[%uA", ydiff);
fx_tty_move_cursor_y(ed->l_tty, FX_TTY_POS_CURSOR, ydiff);
}
// fprintf(stdout, "\033[%uG", new_x);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_START, new_x);
fflush(stdout);
}
/* this function is called after a character is removed from the line_ed buffer.
* IF the character was not a linefeed.
*/
void backspace_simple_refresh(struct line_ed *ed, struct refresh_state *state)
{
/* get the data for the line that is being refreshed */
const char *line_buf = line_start(ed, ed->l_cursor_y);
size_t line_len = strcspn(line_buf, "\n");
/* the index of the first char in line_buf that needs to be reprinted */
size_t start_x = ed->l_cursor_x;
// get_data_cursor_position(ed, &start_x, NULL);
/* the distance between the first char to be reprinted and the end
* of the line.
* the physical cursor will be moved back by this amount after the
* line is reprinted. */
long long cursor_rdelta = (long long)(line_len - start_x);
if (ed->l_flags & LINE_ED_FULL_REPRINT) {
if (start_x) {
// fprintf(stdout, "\033[%uD", start_x);
fx_tty_move_cursor_x(
ed->l_tty,
FX_TTY_POS_CURSOR,
-(long long)start_x);
}
start_x = 0;
}
// fputc('\010', stdout);
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_CURSOR, -1);
print_text(ed, start_x, ed->l_cursor_y, line_buf + start_x);
fputc(' ', stdout);
/* increment the rdelta (move the cursor back one more cell), so
* that the cursor will appear to move back one cell (to accord with
* the fact that backspace was just pressed) */
cursor_rdelta++;
fx_tty_move_cursor_x(ed->l_tty, FX_TTY_POS_CURSOR, -cursor_rdelta);
#if 0
for (unsigned int i = 0; i < cursor_rdelta; i++) {
fputs("\010", stdout);
}
#endif
fflush(stdout);
}
+28
View File
@@ -0,0 +1,28 @@
#ifndef LINE_ED_REFRESH_H_
#define LINE_ED_REFRESH_H_
#include <stddef.h>
struct line_ed;
struct refresh_state {
/* cursor position before the update was performed (excluding the
* prompt) */
size_t r_prev_cursor_x, r_prev_cursor_y;
/* when a backspace results in two separate lines being combined,
* this property contains the length of the first of the two combined
* lines BEFORE the concotenation was performed */
size_t r_prev_line_len;
};
extern void print_text(struct line_ed *ed, size_t x, size_t y, const char *s);
extern void print_buffer(struct line_ed *ed);
extern void put_refresh(struct line_ed *ed, struct refresh_state *state);
extern void backspace_nl_refresh(
struct line_ed *ed,
struct refresh_state *state);
extern void backspace_simple_refresh(
struct line_ed *ed,
struct refresh_state *state);
#endif
+39
View File
@@ -0,0 +1,39 @@
#include "line-source.h"
enum bshell_status line_source_get_name(
struct line_source *src,
char *buf,
size_t count,
size_t *nr_read)
{
if (src->s_get_name) {
return src->s_get_name(src, buf, count, nr_read);
}
return BSHELL_ERR_NOT_SUPPORTED;
}
enum bshell_status line_source_readline(
struct line_source *src,
fx_stringstream *out)
{
if (src->s_readline) {
return src->s_readline(src, out);
}
return BSHELL_ERR_NOT_SUPPORTED;
}
enum bshell_status line_source_get_row(
struct line_source *src,
size_t row,
char *buf,
size_t count,
size_t *nr_read)
{
if (src->s_get_row) {
return src->s_get_row(src, row, buf, count, nr_read);
}
return BSHELL_ERR_NOT_SUPPORTED;
}
+37
View File
@@ -0,0 +1,37 @@
#ifndef LINE_SOURCE_H_
#define LINE_SOURCE_H_
#include "status.h"
#include <fx/stringstream.h>
#include <stddef.h>
struct line_source {
enum bshell_status (
*s_get_name)(struct line_source *, char *, size_t, size_t *);
enum bshell_status (
*s_readline)(struct line_source *, fx_stringstream *);
enum bshell_status (*s_get_row)(
struct line_source *,
size_t,
char *,
size_t,
size_t *);
};
extern enum bshell_status line_source_get_name(
struct line_source *src,
char *buf,
size_t count,
size_t *nr_read);
extern enum bshell_status line_source_readline(
struct line_source *src,
fx_stringstream *out);
extern enum bshell_status line_source_get_row(
struct line_source *src,
size_t row,
char *buf,
size_t count,
size_t *nr_read);
#endif
+38
View File
@@ -0,0 +1,38 @@
#include "file.h"
#include "line-ed/line-ed.h"
#include <stdio.h>
int main(int argc, const char **argv)
{
printf("B Shell " BSHELL_VERSION "\n");
struct line_source *linesrc = NULL;
enum bshell_status status = BSHELL_SUCCESS;
if (argc > 1) {
struct file *file = NULL;
status = file_open(argv[1], &file);
linesrc = &file->f_base;
} else {
struct line_ed *ed = line_ed_create();
linesrc = line_ed_to_line_source(ed);
}
fx_stringstream *linebuf = fx_stringstream_create();
while (1) {
enum bshell_status status
= line_source_readline(linesrc, linebuf);
if (status != BSHELL_SUCCESS) {
break;
}
printf("%s", fx_stringstream_ptr(linebuf));
fx_stringstream_reset(linebuf);
}
return 0;
}
+7
View File
@@ -0,0 +1,7 @@
#ifndef MISC_H_
#define MISC_H_
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
+264
View File
@@ -0,0 +1,264 @@
#include "operator.h"
#include "parse/token.h"
#define OP(id, p, a, l, u) \
[OP_##id] = { \
.op_id = (OP_##id), \
.op_precedence = (PRECEDENCE_##p), \
.op_associativity = (ASSOCIATIVITY_##a), \
.op_location = (OPL_##l), \
.op_arity = (OPA_##u), \
}
#define TOK_OP(id, tok) [TOK_##tok - __TOK_INDEX_BASE] = &operators[OP_##id]
#define SYM_OP(id, sym) [SYM_##sym - __SYM_INDEX_BASE] = &operators[OP_##id]
#define KW_OP(id, kw) [KW_##kw - __KW_INDEX_BASE] = &operators[OP_##id]
#define TKOP_OP(id, kw) [TKOP_##kw - __TKOP_INDEX_BASE] = &operators[OP_##id]
/* clang-format off */
static const struct operator_info operators[] = {
OP(SUBEXPR, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(ARRAY_START, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(PAREN, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(HASHTABLE_START, PARENTHESIS, LEFT, PREFIX, UNARY),
OP(ACCESS, MEMBER_ACCESS, LEFT, INFIX, BINARY),
OP(CONDITIONAL_ACCESS, MEMBER_ACCESS, LEFT, INFIX, BINARY),
OP(STATIC_ACCESS, STATIC_ACCESS, LEFT, INFIX, BINARY),
OP(SUBSCRIPT, SUBSCRIPT, LEFT, INFIX, BINARY),
OP(CONDITIONAL_SUBSCRIPT, SUBSCRIPT, LEFT, INFIX, BINARY),
OP(CAST, CAST, LEFT, PREFIX, UNARY),
OP(USPLIT, SPLIT, LEFT, PREFIX, UNARY),
OP(UJOIN, JOIN, LEFT, PREFIX, UNARY),
OP(ARRAY_DELIMITER, ARRAY, LEFT, INFIX, BINARY),
OP(INCREMENT, INCREMENT, LEFT, INFIX, BINARY),
OP(LOGICAL_NOT, NOT, LEFT, PREFIX, UNARY),
OP(RANGE, RANGE, LEFT, INFIX, BINARY),
OP(FORMAT, FORMAT, LEFT, INFIX, BINARY),
OP(MULTIPLY, MULTIPLICATION, LEFT, INFIX, BINARY),
OP(DIVIDE, MULTIPLICATION, LEFT, INFIX, BINARY),
OP(MODULO, MULTIPLICATION, LEFT, INFIX, BINARY),
OP(ADD, ADDITION, LEFT, INFIX, BINARY),
OP(SUBTRACT, ADDITION, LEFT, INFIX, BINARY),
OP(BSPLIT, COMPARISON, LEFT, INFIX, BINARY),
OP(BJOIN, COMPARISON, LEFT, INFIX, BINARY),
OP(IS, COMPARISON, LEFT, INFIX, BINARY),
OP(ISNOT, COMPARISON, LEFT, INFIX, BINARY),
OP(AS, COMPARISON, LEFT, INFIX, BINARY),
OP(EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(NOT_EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(GREATER_THAN, COMPARISON, LEFT, INFIX, BINARY),
OP(LESS_THAN, COMPARISON, LEFT, INFIX, BINARY),
OP(GREATER_EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(LESS_EQUAL, COMPARISON, LEFT, INFIX, BINARY),
OP(LIKE, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTLIKE, COMPARISON, LEFT, INFIX, BINARY),
OP(MATCH, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTMATCH, COMPARISON, LEFT, INFIX, BINARY),
OP(IN, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTIN, COMPARISON, LEFT, INFIX, BINARY),
OP(CONTAINS, COMPARISON, LEFT, INFIX, BINARY),
OP(NOTCONTAINS, COMPARISON, LEFT, INFIX, BINARY),
OP(REPLACE, COMPARISON, LEFT, INFIX, BINARY),
OP(LOGICAL_AND, LOGICAL, LEFT, INFIX, BINARY),
OP(LOGICAL_OR, LOGICAL, LEFT, INFIX, BINARY),
OP(LOGICAL_XOR, LOGICAL, LEFT, INFIX, BINARY),
OP(BINARY_AND, BITWISE, LEFT, INFIX, BINARY),
OP(BINARY_OR, BITWISE, LEFT, INFIX, BINARY),
OP(BINARY_NOT, BITWISE, LEFT, INFIX, BINARY),
OP(BINARY_XOR, BITWISE, LEFT, INFIX, BINARY),
OP(LEFT_SHIFT, BITWISE, LEFT, INFIX, BINARY),
OP(RIGHT_SHIFT, BITWISE, LEFT, INFIX, BINARY),
OP(ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(ADD_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(SUBTRACT_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(MULTIPLY_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(DIVIDE_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
OP(MODULO_ASSIGN, ASSIGN, LEFT, INFIX, BINARY),
};
static const size_t nr_operators = sizeof operators / sizeof operators[0];
static const struct operator_info *operator_symbols[] = {
SYM_OP(LOGICAL_NOT, BANG),
SYM_OP(ASSIGN, EQUAL),
SYM_OP(ADD, PLUS),
SYM_OP(SUBTRACT, HYPHEN),
SYM_OP(MULTIPLY, ASTERISK),
SYM_OP(DIVIDE, FORWARD_SLASH),
SYM_OP(MODULO, PERCENT),
SYM_OP(ADD_ASSIGN, PLUS_EQUAL),
SYM_OP(SUBTRACT_ASSIGN, HYPHEN_EQUAL),
SYM_OP(MULTIPLY_ASSIGN, ASTERISK_EQUAL),
SYM_OP(DIVIDE_ASSIGN, FORWARD_SLASH_EQUAL),
SYM_OP(MODULO_ASSIGN, PERCENT_EQUAL),
SYM_OP(RANGE, DOT_DOT),
SYM_OP(SUBSCRIPT, LEFT_BRACKET),
SYM_OP(CONDITIONAL_SUBSCRIPT, QUESTION_LEFT_BRACKET),
SYM_OP(ACCESS, DOT),
SYM_OP(CONDITIONAL_ACCESS, QUESTION_DOT),
SYM_OP(STATIC_ACCESS, COLON_COLON),
/* parser-internal pseudo-operators. */
/* CAST uses the same symbol as SUBSCRIPT */
/* SYM_OP(CAST, LEFT_BRACKET), */
SYM_OP(SUBEXPR, DOLLAR_LEFT_PAREN),
SYM_OP(PAREN, LEFT_PAREN),
SYM_OP(ARRAY_START, AT_LEFT_PAREN),
SYM_OP(HASHTABLE_START, AT_LEFT_BRACE),
};
static const size_t nr_operator_symbols = sizeof operator_symbols / sizeof operator_symbols[0];
static const struct operator_info *operator_token_ops[] = {
TKOP_OP(FORMAT, F),
TKOP_OP(BINARY_AND, BAND),
TKOP_OP(BINARY_OR, BOR),
TKOP_OP(BINARY_XOR, BXOR),
TKOP_OP(BINARY_NOT, BNOT),
TKOP_OP(LEFT_SHIFT, SHL),
TKOP_OP(RIGHT_SHIFT, SHR),
TKOP_OP(EQUAL, EQ),
TKOP_OP(NOT_EQUAL, NE),
TKOP_OP(GREATER_THAN, GT),
TKOP_OP(LESS_THAN, LT),
TKOP_OP(GREATER_EQUAL, GE),
TKOP_OP(LESS_EQUAL, LE),
TKOP_OP(MATCH, MATCH),
TKOP_OP(NOTMATCH, NOTMATCH),
TKOP_OP(REPLACE, REPLACE),
TKOP_OP(LIKE, LIKE),
TKOP_OP(NOTLIKE, NOTLIKE),
TKOP_OP(IN, IN),
TKOP_OP(NOTIN, NOTIN),
TKOP_OP(CONTAINS, CONTAINS),
TKOP_OP(NOTCONTAINS, NOTCONTAINS),
TKOP_OP(LOGICAL_AND, AND),
TKOP_OP(LOGICAL_OR, OR),
TKOP_OP(LOGICAL_XOR, XOR),
TKOP_OP(LOGICAL_NOT, NOT),
/* there are also unary versions of these operators */
TKOP_OP(BSPLIT, SPLIT),
TKOP_OP(BJOIN, JOIN),
TKOP_OP(IS, IS),
TKOP_OP(ISNOT, ISNOT),
TKOP_OP(AS, AS),
};
static const size_t nr_operator_token_ops = sizeof operator_token_ops / sizeof operator_token_ops[0];
/* clang-format on */
const struct operator_info *operator_get_by_token(unsigned int token)
{
const struct operator_info **op_list = NULL;
size_t base = 0;
size_t op_list_size = 0;
if (token > __TKOP_INDEX_BASE && token < __TKOP_INDEX_LIMIT) {
op_list = operator_token_ops;
base = __TKOP_INDEX_BASE;
op_list_size = nr_operator_token_ops;
} else if (token > __SYM_INDEX_BASE && token < __SYM_INDEX_LIMIT) {
op_list = operator_symbols;
base = __SYM_INDEX_BASE;
op_list_size = nr_operator_symbols;
} else {
return NULL;
}
if (token - base >= op_list_size) {
return NULL;
}
return op_list[token - base];
}
const struct operator_info *operator_get_by_id(enum operator_id id)
{
if (id >= nr_operators) {
return NULL;
}
const struct operator_info *op = &operators[id];
if (op->op_id != id) {
return NULL;
}
return op;
}
#define ENUM_STR(x) \
case x: \
return #x
const char *operator_id_to_string(enum operator_id op)
{
switch (op) {
ENUM_STR(OP_NONE);
ENUM_STR(OP_ADD);
ENUM_STR(OP_SUBTRACT);
ENUM_STR(OP_MULTIPLY);
ENUM_STR(OP_DIVIDE);
ENUM_STR(OP_MODULO);
ENUM_STR(OP_INCREMENT);
ENUM_STR(OP_DECREMENT);
ENUM_STR(OP_LEFT_SHIFT);
ENUM_STR(OP_RIGHT_SHIFT);
ENUM_STR(OP_BINARY_AND);
ENUM_STR(OP_BINARY_OR);
ENUM_STR(OP_BINARY_XOR);
ENUM_STR(OP_BINARY_NOT);
ENUM_STR(OP_LESS_THAN);
ENUM_STR(OP_GREATER_THAN);
ENUM_STR(OP_EQUAL);
ENUM_STR(OP_NOT_EQUAL);
ENUM_STR(OP_LESS_EQUAL);
ENUM_STR(OP_GREATER_EQUAL);
ENUM_STR(OP_ASSIGN);
ENUM_STR(OP_ADD_ASSIGN);
ENUM_STR(OP_SUBTRACT_ASSIGN);
ENUM_STR(OP_MULTIPLY_ASSIGN);
ENUM_STR(OP_DIVIDE_ASSIGN);
ENUM_STR(OP_MODULO_ASSIGN);
ENUM_STR(OP_LOGICAL_AND);
ENUM_STR(OP_LOGICAL_OR);
ENUM_STR(OP_LOGICAL_XOR);
ENUM_STR(OP_LOGICAL_NOT);
ENUM_STR(OP_RANGE);
ENUM_STR(OP_MATCH);
ENUM_STR(OP_NOTMATCH);
ENUM_STR(OP_REPLACE);
ENUM_STR(OP_LIKE);
ENUM_STR(OP_NOTLIKE);
ENUM_STR(OP_IN);
ENUM_STR(OP_NOTIN);
ENUM_STR(OP_FORMAT);
ENUM_STR(OP_CONTAINS);
ENUM_STR(OP_NOTCONTAINS);
ENUM_STR(OP_USPLIT);
ENUM_STR(OP_BSPLIT);
ENUM_STR(OP_UJOIN);
ENUM_STR(OP_BJOIN);
ENUM_STR(OP_IS);
ENUM_STR(OP_ISNOT);
ENUM_STR(OP_AS);
ENUM_STR(OP_SUBSCRIPT);
ENUM_STR(OP_CONDITIONAL_SUBSCRIPT);
ENUM_STR(OP_ARRAY_DELIMITER);
ENUM_STR(OP_ACCESS);
ENUM_STR(OP_STATIC_ACCESS);
ENUM_STR(OP_CONDITIONAL_ACCESS);
ENUM_STR(OP_CAST);
ENUM_STR(OP_SUBEXPR);
ENUM_STR(OP_PAREN);
ENUM_STR(OP_ARRAY_START);
ENUM_STR(OP_HASHTABLE_START);
default:
return "";
}
}
+123
View File
@@ -0,0 +1,123 @@
#ifndef OPERATOR_H_
#define OPERATOR_H_
enum operator_precedence {
PRECEDENCE_MINIMUM = 0,
PRECEDENCE_ASSIGN,
PRECEDENCE_PIPELINE,
PRECEDENCE_LOGICAL,
PRECEDENCE_BITWISE,
PRECEDENCE_COMPARISON,
PRECEDENCE_ADDITION,
PRECEDENCE_MULTIPLICATION,
PRECEDENCE_NEGATE,
PRECEDENCE_FORMAT,
PRECEDENCE_RANGE,
PRECEDENCE_NOT,
PRECEDENCE_INCREMENT,
PRECEDENCE_ARRAY,
PRECEDENCE_JOIN,
PRECEDENCE_SPLIT,
PRECEDENCE_CAST,
PRECEDENCE_SUBSCRIPT,
PRECEDENCE_STATIC_ACCESS,
PRECEDENCE_MEMBER_ACCESS,
PRECEDENCE_PARENTHESIS,
};
enum operator_associativity {
ASSOCIATIVITY_LEFT,
ASSOCIATIVITY_RIGHT,
};
enum operator_location {
OPL_PREFIX,
OPL_INFIX,
OPL_POSTFIX,
};
enum operator_arity {
OPA_UNARY,
OPA_BINARY,
};
enum operator_id {
OP_NONE = 0,
OP_ADD,
OP_SUBTRACT,
OP_MULTIPLY,
OP_DIVIDE,
OP_MODULO,
OP_INCREMENT,
OP_DECREMENT,
OP_LEFT_SHIFT,
OP_RIGHT_SHIFT,
OP_BINARY_AND,
OP_BINARY_OR,
OP_BINARY_XOR,
OP_BINARY_NOT,
OP_LESS_THAN,
OP_GREATER_THAN,
OP_EQUAL,
OP_NOT_EQUAL,
OP_LESS_EQUAL,
OP_GREATER_EQUAL,
OP_ASSIGN,
OP_ADD_ASSIGN,
OP_SUBTRACT_ASSIGN,
OP_MULTIPLY_ASSIGN,
OP_DIVIDE_ASSIGN,
OP_MODULO_ASSIGN,
OP_LOGICAL_AND,
OP_LOGICAL_OR,
OP_LOGICAL_XOR,
OP_LOGICAL_NOT,
OP_RANGE,
OP_MATCH,
OP_NOTMATCH,
OP_REPLACE,
OP_LIKE,
OP_NOTLIKE,
OP_IN,
OP_NOTIN,
OP_FORMAT,
OP_CONTAINS,
OP_NOTCONTAINS,
OP_USPLIT,
OP_BSPLIT,
OP_UJOIN,
OP_BJOIN,
OP_IS,
OP_ISNOT,
OP_AS,
OP_SUBSCRIPT,
OP_CONDITIONAL_SUBSCRIPT,
OP_ARRAY_DELIMITER,
OP_ACCESS,
OP_STATIC_ACCESS,
OP_CONDITIONAL_ACCESS,
/* these are not real operators, and are just used internally by the
* parser. */
OP_CAST,
OP_SUBEXPR,
OP_PAREN,
OP_ARRAY_START,
OP_HASHTABLE_START,
};
struct operator_info {
enum operator_id op_id;
enum operator_precedence op_precedence;
enum operator_associativity op_associativity;
enum operator_location op_location;
enum operator_arity op_arity;
};
extern const struct operator_info *operator_get_by_id(enum operator_id id);
extern const struct operator_info *operator_get_by_token(unsigned int token);
extern const char *operator_id_to_string(enum operator_id op);
#endif
+94
View File
@@ -0,0 +1,94 @@
#ifndef LEX_H_
#define LEX_H_
#include "../status.h"
#include "token.h"
#include <fx/queue.h>
#include <fx/string.h>
#include <fx/stringstream.h>
#define LEX_STATE_MAX_TERMINATORS 16
struct line_source;
enum lex_flags {
LEX_PRINT_TOKENS = 0x01u,
};
enum lex_token_flags {
/* a token with this flag not only interrupts the word currently being
* scanned, but also stops multi-words */
LEX_TOKEN_TERMINATES_WORD = 0x01u,
/* a token with this flag can appear at the start of an arithmetic
* expression. a statement that encounters this token as its first char
* will switch to arithmetic mode */
LEX_TOKEN_UNARY_ARITHMETIC = 0x02u,
/* if a token has this flag defined, the lexer will
* switch to command mode after encountering it. */
LEX_TOKEN_COMMAND_MODE = 0x08u,
/* if a token has this flag defined, the lexer will
* switch to statement mode after encountering it. */
LEX_TOKEN_STATEMENT_MODE = 0x10u,
};
enum lex_state_type_id {
LEX_STATE_STATEMENT = 0x01u,
LEX_STATE_COMMAND = 0x02u,
LEX_STATE_ARITHMETIC = 0x04u,
LEX_STATE_STRING = 0x08u,
LEX_STATE_WORD = 0x10u,
LEX_STATE_HASHTABLE = 0x20u,
};
struct lex_token_def {
int id;
const char *name;
uint64_t name_hash;
enum lex_state_type_id enabled_states;
enum lex_token_flags flags;
};
struct lex_symbol_node {
char s_char;
struct lex_token_def *s_def;
fx_queue_entry s_entry;
fx_queue s_children;
};
struct lex_state {
const struct lex_state_type *s_type;
unsigned int s_terminators[LEX_STATE_MAX_TERMINATORS];
unsigned int s_nr_terminators;
unsigned int s_paren_depth;
fx_queue_entry s_entry;
fx_string *s_tempstr;
unsigned int s_flags;
};
struct lex_ctx {
enum lex_flags lex_flags;
fx_queue lex_tokens;
struct line_source *lex_src;
fx_stringstream *lex_buf;
fx_string *lex_tmp;
fx_wchar lex_ch;
fx_queue lex_state;
enum token_type lex_prev_token;
struct char_cell lex_cursor, lex_start, lex_end;
struct lex_symbol_node *lex_sym_tree;
enum bshell_status lex_status;
};
extern enum bshell_status lex_ctx_init(
struct lex_ctx *ctx,
enum lex_flags flags,
struct line_source *src);
extern enum bshell_status lex_ctx_cleanup(struct lex_ctx *ctx);
extern struct lex_token *lex_ctx_peek(struct lex_ctx *ctx);
extern struct lex_token *lex_ctx_claim(struct lex_ctx *ctx);
extern void lex_ctx_discard(struct lex_ctx *ctx);
#endif
+232
View File
@@ -0,0 +1,232 @@
#include "lex-internal.h"
static enum bshell_status arithmetic_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = TOK_OPERATOR;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return arithmetic_hyphen(ctx);
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
bool kw = false, number = false;
if (convert_word_to_keyword(word)) {
token_type = word->tok_keyword;
} else if (convert_word_to_int(word)) {
token_type = TOK_INT;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status arithmetic_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return arithmetic_symbol(ctx);
}
return arithmetic_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
LINK_CHANGE(SYM_EQUAL, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_PAREN),
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
LINK_CHANGE(SYM_PIPE, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_END,
};
static const unsigned int keywords[] = {
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_F, TKOP_BAND, TKOP_BOR, TKOP_BXOR,
TKOP_BNOT, TKOP_SHL, TKOP_SHR, TKOP_EQ,
TKOP_NE, TKOP_GT, TKOP_LT, TKOP_GE,
TKOP_LE, TKOP_MATCH, TKOP_NOTMATCH, TKOP_REPLACE,
TKOP_LIKE, TKOP_NOTLIKE, TKOP_IN, TKOP_NOTIN,
TKOP_CONTAINS, TKOP_NOTCONTAINS, TKOP_AND, TKOP_OR,
TKOP_XOR, TKOP_NOT, TKOP_SPLIT, TKOP_JOIN,
TKOP_IS, TKOP_ISNOT, TKOP_AS, TKOP_NONE,
};
static const unsigned int symbols[] = {
SYM_BANG,
SYM_PLUS,
SYM_HYPHEN,
SYM_FORWARD_SLASH,
SYM_ASTERISK,
SYM_AMPERSAND,
SYM_PERCENT,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_QUESTION_DOT,
SYM_QUESTION_LEFT_BRACKET,
SYM_EQUAL,
SYM_PLUS_EQUAL,
SYM_HYPHEN_EQUAL,
SYM_FORWARD_SLASH_EQUAL,
SYM_ASTERISK_EQUAL,
SYM_PERCENT_EQUAL,
SYM_DOT,
SYM_DOT_DOT,
SYM_COLON_COLON,
SYM_NONE,
};
const struct lex_state_type lex_arithmetic_state = {
.s_id = LEX_STATE_ARITHMETIC,
.s_pump_token = arithmetic_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};
+226
View File
@@ -0,0 +1,226 @@
#include "../token.h"
#include "lex-internal.h"
static bool char_can_continue_word(struct lex_ctx *ctx, fx_wchar c)
{
if (fx_wchar_is_alnum(c)) {
return true;
}
if (fx_wchar_is_space(c)) {
return false;
}
if (c == '$') {
return true;
}
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) {
return false;
}
return true;
}
static enum bshell_status command_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
if (char_can_continue_word(ctx, peek_char(ctx))) {
lex_state_push(ctx, LEX_STATE_WORD, 0);
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
if (char_can_continue_word(ctx, peek_char(ctx))) {
lex_state_push(ctx, LEX_STATE_WORD, 0);
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static bool string_is_redirection(const char *s)
{
if (!*s) {
return false;
}
if (!strcmp(s, ">") || !strcmp(s, ">>")) {
return true;
}
long nr_angles = 0;
for (size_t i = 0; s[i];) {
fx_wchar c = fx_wchar_utf8_codepoint_decode(s);
if (fx_wchar_is_number(c)) {
if (nr_angles) {
return false;
}
} else if (c == '>') {
nr_angles++;
if (nr_angles > 2) {
return false;
}
} else {
return false;
}
s += fx_wchar_utf8_codepoint_stride(s);
}
return true;
}
static enum bshell_status command_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status
= read_word(ctx, READ_NO_NUMBER_RECOGNITION, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
bool continue_word = false;
fx_wchar c = peek_char(ctx);
const char *s = word->tok_str;
if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) {
continue_word = true;
}
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
continue_word = false;
}
if (string_is_redirection(s)) {
continue_word = false;
}
if (continue_word) {
lex_state_push(ctx, LEX_STATE_WORD, 0);
}
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
enum bshell_status command_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return command_symbol(ctx);
}
return command_word(ctx);
}
const struct lex_state_link links[] = {
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_RIGHT_PAREN),
LINK_POP(SYM_RIGHT_BRACE),
LINK_CHANGE(SYM_SEMICOLON, LEX_STATE_STATEMENT),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_CHANGE(TOK_LINEFEED, LEX_STATE_STATEMENT),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_DQUOTE,
SYM_SQUOTE,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_AT_LEFT_PAREN,
SYM_AMPERSAND,
SYM_PIPE,
SYM_SEMICOLON,
SYM_RIGHT_PAREN,
SYM_LEFT_PAREN,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_NONE,
};
const struct lex_state_type lex_command_state = {
.s_id = LEX_STATE_COMMAND,
.s_pump_token = command_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+184
View File
@@ -0,0 +1,184 @@
#include "lex-internal.h"
static enum bshell_status hashtable_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = tok->tok_operator;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status hashtable_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return hashtable_hyphen(ctx);
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static enum bshell_status hashtable_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
convert_word_to_int(word);
handle_lex_state_transition(ctx, word->tok_type);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status hashtable_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
#if 1
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
#endif
if (char_can_begin_symbol(ctx, c)) {
return hashtable_symbol(ctx);
}
return hashtable_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH_WITH_TERM(
SYM_EQUAL,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_BRACE,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH_WITH_TERM(
TOK_LINEFEED,
LEX_STATE_STATEMENT,
0,
SYM_SEMICOLON,
TOK_LINEFEED),
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
LINK_PUSH(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS),
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP2(SYM_RIGHT_BRACE, LINK_ALLOW_RECURSION),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_EQUAL,
SYM_DQUOTE,
SYM_SQUOTE,
SYM_SEMICOLON,
SYM_RIGHT_BRACE,
SYM_DOLLAR_LEFT_PAREN,
SYM_LEFT_PAREN,
SYM_HASH,
SYM_NONE,
};
const struct lex_state_type lex_hashtable_state = {
.s_id = LEX_STATE_HASHTABLE,
.s_pump_token = hashtable_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+195
View File
@@ -0,0 +1,195 @@
#ifndef PARSE_LEX_INTERNAL_H_
#define PARSE_LEX_INTERNAL_H_
#include "../../status.h"
#include "../lex.h"
#include "../token.h"
struct lex_ctx;
enum state_flags {
/* statement: don't convert matching words to keywords */
STATEMENT_F_DISABLE_KEYWORDS = 0x01u,
/* arithmetic: don't switch back to statement mode even when
* encountering a token that would otherwise require it. */
ARITHMETIC_F_DISABLE_STATEMENTS = 0x01u,
};
enum read_flags {
READ_APPEND_HYPHEN = 0x01u,
READ_NO_SET_TOKEN_START = 0x02u,
READ_NO_NUMBER_RECOGNITION = 0x04u,
};
enum link_flags {
LINK_ALLOW_RECURSION = 0x01u,
};
#define LINK_PUSH(tok, target, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_PUSH, \
.l_target = (target), \
.l_target_flags = (flags), \
})
#define LINK_PUSH_WITH_TERM(tok, target, flags, ...) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_PUSH, \
.l_target = (target), \
.l_target_flags = (flags), \
.l_terminators = {__VA_ARGS__, TOK_NONE}, \
})
#define LINK_CHANGE(tok, target) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_CHANGE, \
.l_target = (target), \
})
#define LINK_POP(tok) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
})
#define LINK_POP2(tok, flags) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_POP, \
.l_flags = (flags), \
})
#define LINK_NONE(tok) \
((struct lex_state_link) { \
.l_token = (tok), \
.l_type = LEX_STATE_LINK_NONE, \
})
#define LINK_END ((struct lex_state_link) {})
struct lex_state_link {
unsigned int l_token;
enum {
LEX_STATE_LINK_NONE,
LEX_STATE_LINK_PUSH,
LEX_STATE_LINK_CHANGE,
LEX_STATE_LINK_POP,
} l_type;
enum link_flags l_flags;
enum lex_state_type_id l_target;
enum state_flags l_target_flags;
unsigned int l_terminators[LEX_STATE_MAX_TERMINATORS];
};
typedef enum bshell_status (*lex_state_pump_token)(struct lex_ctx *);
typedef enum bshell_status (*lex_state_begin)(struct lex_ctx *);
typedef enum bshell_status (*lex_state_end)(struct lex_ctx *);
struct lex_state_type {
enum lex_state_type_id s_id;
lex_state_pump_token s_pump_token;
lex_state_begin s_begin;
lex_state_end s_end;
const unsigned int *s_keywords;
const unsigned int *s_operators;
const unsigned int *s_symbols;
const struct lex_state_link *s_links;
};
extern enum bshell_status pump_token_statement(struct lex_ctx *ctx);
extern enum bshell_status pump_token_expression(struct lex_ctx *ctx);
extern enum bshell_status pump_token_command(struct lex_ctx *ctx);
extern enum bshell_status pump_token_arithmetic(struct lex_ctx *ctx);
extern enum bshell_status pump_token_string(struct lex_ctx *ctx);
extern void set_token_start(struct lex_ctx *ctx);
extern void set_token_end(struct lex_ctx *ctx);
extern struct lex_state *lex_state_push(
struct lex_ctx *ctx,
enum lex_state_type_id state_type,
enum state_flags flags);
extern void lex_state_pop(struct lex_ctx *ctx);
extern struct lex_state *lex_state_get(struct lex_ctx *ctx);
extern void lex_state_change(struct lex_ctx *ctx, enum lex_state_type_id type);
extern fx_string *lex_state_get_tempstr(struct lex_ctx *ctx);
extern void lex_state_add_terminator(struct lex_state *state, unsigned int tok);
extern bool lex_state_terminates_at_token(
struct lex_ctx *ctx,
unsigned int tok);
extern fx_wchar peek_char(struct lex_ctx *ctx);
extern fx_wchar peek_char_noread(struct lex_ctx *ctx);
extern fx_wchar peek2_char(struct lex_ctx *ctx);
extern fx_wchar peek2_char_noread(struct lex_ctx *ctx);
extern void advance_char(struct lex_ctx *ctx);
extern void advance_char_noread(struct lex_ctx *ctx);
extern bool string_is_valid_number(const char *s, long long *out);
extern bool convert_word_to_int(struct lex_token *tok);
extern bool convert_word_to_keyword(struct lex_token *tok);
extern bool convert_word_to_operator(
struct lex_ctx *ctx,
struct lex_token *tok);
extern void enqueue_token(struct lex_ctx *ctx, struct lex_token *tok);
extern void enqueue_token_with_coordinates(
struct lex_ctx *ctx,
struct lex_token *tok,
const struct char_cell *start,
const struct char_cell *end);
extern enum bshell_status read_word(
struct lex_ctx *ctx,
enum read_flags flags,
struct lex_token **out);
extern enum bshell_status read_symbol(
struct lex_ctx *ctx,
const struct lex_token_def **out);
extern enum bshell_status read_literal_string(
struct lex_ctx *ctx,
struct lex_token **out);
extern enum bshell_status read_line_comment(struct lex_ctx *lex);
extern enum bshell_status read_var(
struct lex_ctx *ctx,
enum token_type type,
struct lex_token **out);
extern enum bshell_status read_braced_var(
struct lex_ctx *ctx,
enum token_type type,
struct lex_token **out);
extern enum bshell_status push_symbol(
struct lex_ctx *ctx,
enum token_symbol sym);
extern bool char_can_begin_symbol(struct lex_ctx *ctx, char c);
extern bool char_can_begin_symbol_in_state(
struct lex_ctx *ctx,
char c,
enum lex_state_type_id state_type);
extern bool char_has_flags(
struct lex_ctx *ctx,
char c,
enum lex_token_flags flags);
extern bool keyword_has_flags(
struct lex_ctx *ctx,
enum token_keyword kw,
enum lex_token_flags flags);
extern enum lex_token_flags keyword_get_flags(
struct lex_ctx *ctx,
enum token_keyword kw);
extern bool symbol_has_flags(
struct lex_ctx *ctx,
enum token_symbol sym,
enum lex_token_flags flags);
extern enum lex_token_flags symbol_get_flags(
struct lex_ctx *ctx,
enum token_symbol sym);
extern enum token_operator get_operator_with_string(
struct lex_ctx *ctx,
const char *s);
extern void handle_lex_state_transition(
struct lex_ctx *ctx,
unsigned int token);
#endif
File diff suppressed because it is too large Load Diff
+236
View File
@@ -0,0 +1,236 @@
#include "lex-internal.h"
static enum bshell_status statement_hyphen(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (!fx_wchar_is_alnum(c)) {
push_symbol(ctx, SYM_HYPHEN);
handle_lex_state_transition(ctx, SYM_HYPHEN);
return BSHELL_SUCCESS;
}
struct lex_token *tok = NULL;
enum bshell_status status = read_word(
ctx,
READ_NO_SET_TOKEN_START | READ_APPEND_HYPHEN,
&tok);
if (status != BSHELL_SUCCESS) {
return status;
}
unsigned int token_type = TOK_WORD;
if (convert_word_to_int(tok)) {
token_type = TOK_INT;
/* because of APPEND_HYPHEN (which is needed to ensure operator
* tokens are detected properly), the resulting number will be
* negative.
* this token will be preceded by a HYPHEN token, so the number
* must be positive */
tok->tok_int *= -1;
push_symbol(ctx, SYM_HYPHEN);
} else if (convert_word_to_operator(ctx, tok)) {
token_type = TOK_OPERATOR;
}
handle_lex_state_transition(ctx, token_type);
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_HYPHEN:
return statement_hyphen(ctx);
case SYM_SQUOTE:
status = read_literal_string(ctx, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
case SYM_HASH:
return read_line_comment(ctx);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
push_symbol(ctx, sym->id);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_word(struct lex_ctx *ctx)
{
struct lex_token *word = NULL;
enum bshell_status status = read_word(ctx, 0, &word);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_state *state = lex_state_get(ctx);
bool enable_keywords = !(state->s_flags & STATEMENT_F_DISABLE_KEYWORDS);
unsigned int token = TOK_WORD;
if (enable_keywords && convert_word_to_keyword(word)) {
token = word->tok_keyword;
} else if (convert_word_to_int(word)) {
token = TOK_INT;
}
handle_lex_state_transition(ctx, token);
enqueue_token(ctx, word);
return BSHELL_SUCCESS;
}
static enum bshell_status statement_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
bool newline = false;
set_token_start(ctx);
while (fx_wchar_is_space(c)) {
if (c == '\n') {
newline = true;
}
set_token_end(ctx);
advance_char_noread(ctx);
c = peek_char_noread(ctx);
}
if (newline) {
struct lex_token *tok = lex_token_create(TOK_LINEFEED);
enqueue_token(ctx, tok);
handle_lex_state_transition(ctx, TOK_LINEFEED);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return statement_symbol(ctx);
}
if (char_has_flags(ctx, c, LEX_TOKEN_UNARY_ARITHMETIC)) {
lex_state_change(ctx, LEX_STATE_ARITHMETIC);
return BSHELL_SUCCESS;
}
return statement_word(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH(SYM_DQUOTE, LEX_STATE_STRING, 0),
/* arithmetic tokens */
LINK_CHANGE(TOK_KEYWORD, LEX_STATE_ARITHMETIC),
LINK_CHANGE(TOK_INT, LEX_STATE_ARITHMETIC),
LINK_PUSH(SYM_DOLLAR, LEX_STATE_ARITHMETIC, 0),
LINK_PUSH(SYM_DOLLAR_LEFT_BRACE, LEX_STATE_ARITHMETIC, 0),
LINK_CHANGE(SYM_AT_LEFT_BRACE, LEX_STATE_ARITHMETIC),
LINK_PUSH(SYM_AT_LEFT_BRACE, LEX_STATE_HASHTABLE, 0),
LINK_PUSH(SYM_AT, LEX_STATE_ARITHMETIC, 0),
LINK_CHANGE(SYM_LEFT_PAREN, LEX_STATE_ARITHMETIC),
LINK_CHANGE(SYM_BANG, LEX_STATE_ARITHMETIC),
LINK_PUSH_WITH_TERM(
SYM_LEFT_PAREN,
LEX_STATE_STATEMENT,
STATEMENT_F_DISABLE_KEYWORDS,
SYM_RIGHT_PAREN),
/* statement tokens */
LINK_PUSH(SYM_LEFT_BRACE, LEX_STATE_STATEMENT, 0),
LINK_PUSH_WITH_TERM(
SYM_DOLLAR_LEFT_PAREN,
LEX_STATE_STATEMENT,
0,
SYM_RIGHT_PAREN),
/* command tokens */
LINK_CHANGE(KW_FUNC, LEX_STATE_COMMAND),
LINK_CHANGE(SYM_AMPERSAND, LEX_STATE_COMMAND),
LINK_CHANGE(TOK_WORD, LEX_STATE_COMMAND),
LINK_END,
};
static const unsigned int keywords[] = {
KW_FUNC,
KW_IF,
KW_ELSEIF,
KW_ELSE,
KW_NONE,
};
static const unsigned int operators[] = {
TKOP_BNOT,
TKOP_NOT,
TKOP_NONE,
};
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_BANG,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_AT,
SYM_AT_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_statement_state = {
.s_id = LEX_STATE_STATEMENT,
.s_pump_token = statement_pump_token,
.s_links = links,
.s_keywords = keywords,
.s_operators = operators,
.s_symbols = symbols,
};
+141
View File
@@ -0,0 +1,141 @@
#include "lex-internal.h"
static enum bshell_status string_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
handle_lex_state_transition(ctx, sym->id);
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DQUOTE:
return BSHELL_SUCCESS;
case SYM_DOLLAR_LEFT_PAREN:
return push_symbol(ctx, sym->id);
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_DOLLAR_LEFT_BRACE:
status = read_braced_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
return BSHELL_ERR_BAD_SYNTAX;
}
static enum bshell_status string_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
fx_string *temp = lex_state_get_tempstr(ctx);
set_token_start(ctx);
fx_string_clear(temp);
while (1) {
c = peek_char(ctx);
if (c == FX_WCHAR_INVALID) {
/* EOF without end of string */
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
}
if (char_can_begin_symbol(ctx, c)) {
break;
}
fx_string_append_wc(temp, c);
set_token_end(ctx);
advance_char(ctx);
}
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
return BSHELL_SUCCESS;
}
struct lex_token *tok = lex_token_create_with_string(
TOK_STRING,
fx_string_get_cstr(temp));
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status string_begin(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_STR_START);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status string_end(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_STR_END);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status string_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (char_can_begin_symbol(ctx, c)) {
return string_symbol(ctx);
}
return string_content(ctx);
}
static const struct lex_state_link links[] = {
LINK_PUSH(SYM_DOLLAR_LEFT_PAREN, LEX_STATE_STATEMENT, 0),
LINK_POP(SYM_DQUOTE),
LINK_END,
};
static const unsigned int symbols[] = {
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_DQUOTE,
SYM_NONE,
};
const struct lex_state_type lex_string_state = {
.s_id = LEX_STATE_STRING,
.s_begin = string_begin,
.s_end = string_end,
.s_pump_token = string_pump_token,
.s_links = links,
.s_symbols = symbols,
};
+162
View File
@@ -0,0 +1,162 @@
#include "lex-internal.h"
static enum bshell_status word_symbol(struct lex_ctx *ctx)
{
const struct lex_token_def *sym = NULL;
enum bshell_status status = read_symbol(ctx, &sym);
if (status != BSHELL_SUCCESS) {
return status;
}
struct lex_token *tok = NULL;
switch (sym->id) {
case SYM_DOLLAR_LEFT_PAREN:
status = push_symbol(ctx, sym->id);
if (status != BSHELL_SUCCESS) {
return status;
}
lex_state_push(ctx, LEX_STATE_STATEMENT, 0);
return BSHELL_SUCCESS;
case SYM_RIGHT_PAREN:
lex_state_pop(ctx);
status = push_symbol(ctx, sym->id);
if (status != BSHELL_SUCCESS) {
return status;
}
return BSHELL_SUCCESS;
case SYM_DOLLAR:
status = read_var(ctx, TOK_VAR, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
case SYM_AT:
status = read_var(ctx, TOK_VAR_SPLAT, &tok);
if (status != BSHELL_SUCCESS) {
return status;
}
enqueue_token(ctx, tok);
return status;
default:
break;
}
return BSHELL_ERR_BAD_SYNTAX;
}
static enum bshell_status word_content(struct lex_ctx *ctx)
{
fx_wchar c = FX_WCHAR_INVALID;
fx_string *temp = lex_state_get_tempstr(ctx);
set_token_start(ctx);
fx_string_clear(temp);
while (1) {
c = peek_char(ctx);
if (c == FX_WCHAR_INVALID) {
/* EOF without end of word */
ctx->lex_status = BSHELL_ERR_BAD_SYNTAX;
}
if (fx_wchar_is_space(c)) {
break;
}
if (char_can_begin_symbol(ctx, c)) {
break;
}
fx_string_append_wc(temp, c);
set_token_end(ctx);
advance_char(ctx);
}
if (fx_string_get_size(temp, FX_STRLEN_NORMAL) == 0) {
return BSHELL_SUCCESS;
}
struct lex_token *tok = lex_token_create_with_string(
TOK_WORD,
fx_string_get_cstr(temp));
enqueue_token(ctx, tok);
return BSHELL_SUCCESS;
}
static enum bshell_status word_begin(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_WORD_START);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token_with_coordinates(
ctx,
tok,
&ctx->lex_start,
&ctx->lex_start);
return BSHELL_SUCCESS;
}
static enum bshell_status word_end(struct lex_ctx *ctx)
{
struct lex_token *tok = lex_token_create(TOK_WORD_END);
if (!tok) {
return BSHELL_ERR_NO_MEMORY;
}
enqueue_token_with_coordinates(ctx, tok, &ctx->lex_end, &ctx->lex_end);
return BSHELL_SUCCESS;
}
static enum bshell_status word_pump_token(struct lex_ctx *ctx)
{
fx_wchar c = peek_char(ctx);
if (fx_wchar_is_space(c)) {
lex_state_pop(ctx);
return BSHELL_SUCCESS;
}
if (char_has_flags(ctx, c, LEX_TOKEN_TERMINATES_WORD)) {
lex_state_pop(ctx);
return BSHELL_SUCCESS;
}
if (char_can_begin_symbol(ctx, c)) {
return word_symbol(ctx);
}
return word_content(ctx);
}
static const unsigned int symbols[] = {
SYM_AMPERSAND,
SYM_HASH,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_PIPE,
SYM_COMMA,
SYM_SEMICOLON,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_NONE,
};
const struct lex_state_type lex_word_state = {
.s_id = LEX_STATE_WORD,
.s_begin = word_begin,
.s_end = word_end,
.s_pump_token = word_pump_token,
.s_symbols = symbols,
};
+55
View File
@@ -0,0 +1,55 @@
#include "parse.h"
#include "../ast/ast.h"
#include "../debug.h"
#include "lex.h"
#include "syntax.h"
#include "token.h"
#include <stdio.h>
#include <string.h>
enum bshell_status parse_ctx_init(struct parse_ctx *ctx, struct lex_ctx *src)
{
memset(ctx, 0x0, sizeof *ctx);
ctx->p_src = src;
return BSHELL_SUCCESS;
}
void parse_ctx_cleanup(struct parse_ctx *ctx)
{
}
struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx)
{
parse_symbol(ctx, SYM_SEMICOLON);
parse_linefeed(ctx);
struct ast_node *result = NULL;
bool ok = parse_statement(ctx, &result);
return ok ? result : NULL;
}
void report_error(struct parse_ctx *ctx, const char *format, ...)
{
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
fprintf(stderr, "PARSE: ");
va_list arg;
va_start(arg, format);
vfprintf(stderr, format, arg);
va_end(arg);
fprintf(stderr, "\n");
struct lex_token *tok = peek_token(ctx);
fprintf(stderr, " peek_token = ");
if (tok) {
print_lex_token(tok);
} else {
fprintf(stderr, " EOF\n");
}
}
+21
View File
@@ -0,0 +1,21 @@
#ifndef PARSE_H_
#define PARSE_H_
#include "../status.h"
struct lex_ctx;
struct ast_node;
struct parse_ctx {
struct lex_ctx *p_src;
enum bshell_status p_status;
};
extern enum bshell_status parse_ctx_init(
struct parse_ctx *ctx,
struct lex_ctx *src);
extern void parse_ctx_cleanup(struct parse_ctx *ctx);
extern struct ast_node *parse_ctx_read_node(struct parse_ctx *ctx);
#endif
+66
View File
@@ -0,0 +1,66 @@
#ifndef PARSE_SYNTAX_H_
#define PARSE_SYNTAX_H_
#include "../ast/ast.h"
#include "../operator.h"
#include "lex.h"
#include "parse.h"
#include "token.h"
#include <stdbool.h>
#include <stdio.h>
extern void report_error(struct parse_ctx *ctx, const char *format, ...);
extern struct lex_token *peek_token(struct parse_ctx *ctx);
extern enum token_type peek_token_type(struct parse_ctx *ctx);
extern enum token_keyword peek_unknown_keyword(struct parse_ctx *ctx);
extern enum token_symbol peek_unknown_symbol(struct parse_ctx *ctx);
extern struct lex_token *claim_token(struct parse_ctx *ctx);
extern void discard_token(struct parse_ctx *ctx);
extern bool peek_linefeed(struct parse_ctx *ctx);
extern bool peek_symbol(struct parse_ctx *ctx, enum token_symbol sym);
extern bool peek_word(struct parse_ctx *ctx, struct lex_token **out);
extern bool peek_int(struct parse_ctx *ctx);
extern bool parse_linefeed(struct parse_ctx *ctx);
extern bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym);
extern bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw);
extern bool parse_word(struct parse_ctx *ctx, struct lex_token **out);
extern bool parse_var(struct parse_ctx *ctx, struct lex_token **out);
extern bool parse_flag(struct parse_ctx *ctx, struct lex_token **out);
extern bool peek_arith_expr(struct parse_ctx *ctx);
extern bool parse_arith_value(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_arith_expr(
struct parse_ctx *ctx,
enum operator_precedence minimum_precedence,
struct ast_node **out);
extern bool peek_keyword_expr(struct parse_ctx *ctx);
extern bool parse_keyword_expr(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_if(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_func(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_fstring(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_block(struct parse_ctx *ctx, struct ast_node **out);
extern bool peek_command(struct parse_ctx *ctx);
extern bool parse_pipeline(
struct parse_ctx *ctx,
struct ast_node *first_item,
struct ast_node **out);
extern bool parse_command(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_expr(struct parse_ctx *ctx, struct ast_node **out);
extern bool peek_statement(struct parse_ctx *ctx);
extern bool parse_statement(struct parse_ctx *ctx, struct ast_node **out);
extern bool parse_statement_list(struct parse_ctx *ctx, struct ast_node **out);
#endif
+902
View File
@@ -0,0 +1,902 @@
#include "../../debug.h"
#include "../../operator.h"
#include "../syntax.h"
#include <fx/queue.h>
enum expr_component {
EXPR_C_NONE = 0,
EXPR_C_OPERAND,
EXPR_C_BINARY_OP,
EXPR_C_UNARY_OP,
};
struct expr_parse_ctx {
fx_queue expr_operator_stack, expr_out_queue;
enum expr_component expr_prev;
unsigned int expr_prev_symbol;
enum operator_precedence expr_minimum_precedence;
bool expr_done, expr_fail;
};
static bool op_node_is_complete(struct op_ast_node *node)
{
if (!node->n_op) {
return false;
}
switch (node->n_op->op_arity) {
case OPA_UNARY:
return node->n_right != NULL;
case OPA_BINARY:
return (node->n_left != NULL && node->n_right != NULL);
default:
return false;
}
}
static bool finalise_expr(
struct expr_parse_ctx *ctx,
struct ast_node **out,
enum operator_precedence minimum_precedence)
{
fx_queue_entry *entry = NULL;
while (true) {
entry = fx_queue_pop_back(&ctx->expr_operator_stack);
if (!entry) {
break;
}
struct op_ast_node *node
= fx_unbox(struct op_ast_node, entry, n_base.n_entry);
if (!node) {
/* this should never happen */
return false;
}
const struct operator_info *op = node->n_op;
/* if we aren't processing operators below a certain precedence
* then leave them on the stack and stop here. */
if (op->op_precedence < minimum_precedence) {
fx_queue_push_back(&ctx->expr_operator_stack, entry);
break;
}
fx_queue_push_back(&ctx->expr_out_queue, entry);
}
fx_queue q = FX_QUEUE_INIT;
fx_queue_entry *tmp = NULL;
entry = fx_queue_first(&ctx->expr_out_queue);
int i = 0;
while (entry) {
struct ast_node *item
= fx_unbox(struct ast_node, entry, n_entry);
fx_queue_entry *next = fx_queue_next(entry);
fx_queue_delete(&ctx->expr_out_queue, entry);
/* if the node is an operand, just push it to a
* temporary queue and come back to it later. */
if (item->n_type != AST_OP) {
/* operand */
fx_queue_push_back(&q, &item->n_entry);
goto next;
}
const struct operator_info *op = NULL;
struct op_ast_node *op_node = (struct op_ast_node *)item;
/* if an operator node is already complete (i.e. it
* already has all the operands it needs, it can be
* pushed to the operand queue as-is */
if (op_node_is_complete(op_node)) {
fx_queue_push_back(&q, &item->n_entry);
goto next;
}
/* otherwise, pop the relevant operands from the operand
* queue... */
op = op_node->n_op;
tmp = fx_queue_pop_back(&q);
op_node->n_right = fx_unbox(struct ast_node, tmp, n_entry);
if (op_node->n_right) {
op_node->n_right->n_parent = (struct ast_node *)op_node;
#if 0
ast_node_extend_bounds_recursive(
(struct ivy_ast_node *)op_node,
(struct ivy_ast_node *)tmp);
#endif
}
if (op->op_arity == OPA_BINARY) {
tmp = fx_queue_pop_back(&q);
op_node->n_left
= fx_unbox(struct ast_node, tmp, n_entry);
if (op_node->n_left) {
op_node->n_left->n_parent
= (struct ast_node *)op_node;
#if 0
ast_node_extend_bounds_recursive(
(struct ivy_ast_node *)op_node,
(struct ivy_ast_node *)tmp);
#endif
}
}
/* ...and push the newly-completed operator node to the
* operand queue */
fx_queue_push_back(&q, &op_node->n_base.n_entry);
next:
entry = next;
}
#if 0
debug_printf("** after hierarchisation:\n");
print_expr_queues(state);
#endif
/* if we are not processing operators below a certain precedence,
* i.e. when determining the recipient of a keyword-message), these
* operators will still be on the parser state's operator stack, but
* their operands have just been moved to the temporary operand stack
* used above. move them back to the parser state's output queue here
* so they can be used later. */
entry = fx_queue_first(&ctx->expr_operator_stack);
while (entry) {
fx_queue_entry *entry2 = fx_queue_pop_front(&q);
if (!entry2) {
return false;
}
fx_queue_push_back(&ctx->expr_out_queue, entry2);
entry = fx_queue_next(entry);
}
#if 0
debug_printf("** after de-linearisation:\n");
print_expr_queues(state);
ivy_ast_node_print(*expr_tree);
debug_printf("------\n");
#endif
/* the final node remaining on the temp operand stack is the
* root node of the new expression tree */
tmp = fx_queue_pop_back(&q);
*out = fx_unbox(struct ast_node, tmp, n_entry);
return true;
}
bool peek_arith_expr(struct parse_ctx *ctx)
{
switch (peek_token_type(ctx)) {
case TOK_SYMBOL:
return operator_get_by_token(peek_unknown_symbol(ctx));
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_VAR:
case TOK_STR_START:
case TOK_OPERATOR:
return true;
default:
return false;
}
}
static bool parse_subexpr(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(`");
}
struct ast_node *v = NULL;
if (!parse_expr(ctx, &v)) {
report_error(ctx, "error while parsing parenthesis expression");
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected `)` after parenthesis expression");
return false;
}
*out = v;
return true;
}
static bool parse_stmt_block(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_DOLLAR_LEFT_PAREN)) {
report_error(ctx, "expected `$(`");
return false;
}
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
*out = ast_node_create(AST_NULL);
return true;
}
struct ast_node *v = NULL;
if (!parse_statement_list(ctx, &v)) {
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected ')' after subexpression");
ast_node_destroy(v);
return false;
}
*out = v;
return true;
}
static bool parse_hashtable(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_AT_LEFT_BRACE)) {
report_error(ctx, "expected `@{`");
return false;
}
parse_linefeed(ctx);
struct hashtable_ast_node *table
= (struct hashtable_ast_node *)ast_node_create(AST_HASHTABLE);
if (!table) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
size_t nr_items = 0;
bool ok = true;
while (ok) {
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
parse_linefeed(ctx);
struct hashtable_item_ast_node *item
= (struct hashtable_item_ast_node *)ast_node_create(
AST_HASHTABLE_ITEM);
struct lex_token *tok = NULL;
if (parse_word(ctx, &tok)) {
struct string_ast_node *v
= (struct string_ast_node *)ast_node_create(
AST_STRING);
v->n_value = tok;
item->n_key = (struct ast_node *)v;
} else if (!parse_arith_value(ctx, &item->n_key)) {
report_error(ctx, "failed to parse hashtable key");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
if (!parse_symbol(ctx, SYM_EQUAL)) {
report_error(ctx, "expected `=` after hashtable key");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
if (!parse_expr(ctx, &item->n_value)) {
report_error(ctx, "failed to parse hashtable value");
ast_node_destroy((struct ast_node *)item);
ok = false;
break;
}
fx_queue_push_back(&table->n_items, &item->n_base.n_entry);
nr_items++;
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
if (!parse_linefeed(ctx) && !parse_symbol(ctx, SYM_SEMICOLON)) {
report_error(
ctx,
"expected `;`, `}`, or linefeed after "
"hashtable value");
ok = false;
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)table);
return false;
}
*out = (struct ast_node *)table;
return true;
}
static bool parse_array(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_AT_LEFT_PAREN)) {
report_error(ctx, "expected `@(`");
return false;
}
struct array_ast_node *array
= (struct array_ast_node *)ast_node_create(AST_ARRAY);
if (!array) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
size_t nr_items = 0;
bool ok = true;
while (ok) {
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
break;
}
if (nr_items && !parse_symbol(ctx, SYM_COMMA)) {
report_error(
ctx,
"expected `,` or `)` after array value");
ok = false;
}
struct ast_node *item = NULL;
if (!parse_arith_value(ctx, &item)) {
report_error(ctx, "failed to parse array item");
ok = false;
break;
}
fx_queue_push_back(&array->n_items, &item->n_entry);
nr_items++;
}
if (!ok) {
ast_node_destroy((struct ast_node *)array);
return false;
}
*out = (struct ast_node *)array;
return true;
}
bool parse_fstring(struct parse_ctx *ctx, struct ast_node **out)
{
if (peek_token_type(ctx) != TOK_STR_START) {
return false;
}
discard_token(ctx);
struct fstring_ast_node *fstring
= (struct fstring_ast_node *)ast_node_create(AST_FSTRING);
if (!fstring) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
bool ok = true;
while (ok) {
if (peek_token_type(ctx) == TOK_STR_END) {
discard_token(ctx);
break;
}
struct ast_node *item = NULL;
if (!parse_arith_value(ctx, &item)) {
ok = false;
break;
}
fx_queue_push_back(&fstring->n_elements, &item->n_entry);
}
if (!ok) {
ast_node_destroy((struct ast_node *)fstring);
fstring = NULL;
}
*out = (struct ast_node *)fstring;
return ok;
}
bool parse_arith_value(struct parse_ctx *ctx, struct ast_node **out)
{
struct lex_token *tok = peek_token(ctx);
switch (tok->tok_type) {
case TOK_INT: {
struct int_ast_node *v
= (struct int_ast_node *)ast_node_create(AST_INT);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_DOUBLE: {
struct double_ast_node *v
= (struct double_ast_node *)ast_node_create(AST_DOUBLE);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_STRING: {
struct string_ast_node *v
= (struct string_ast_node *)ast_node_create(AST_STRING);
v->n_value = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_VAR: {
struct var_ast_node *v
= (struct var_ast_node *)ast_node_create(AST_VAR);
v->n_ident = claim_token(ctx);
*out = (struct ast_node *)v;
return true;
}
case TOK_STR_START:
return parse_fstring(ctx, out);
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_LEFT_PAREN:
return parse_subexpr(ctx, out);
case SYM_DOLLAR_LEFT_PAREN:
return parse_stmt_block(ctx, out);
case SYM_AT_LEFT_BRACE:
return parse_hashtable(ctx, out);
case SYM_AT_LEFT_PAREN:
return parse_array(ctx, out);
case SYM_LEFT_BRACE:
return parse_block(ctx, out);
default:
report_error(ctx, "token is not a valid operand");
return false;
}
break;
default:
report_error(ctx, "token is not a valid operand");
return false;
}
}
static bool parse_operand(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
if (expr->expr_prev == EXPR_C_OPERAND) {
report_error(ctx, "encountered two operands in a row");
return false;
}
expr->expr_prev = EXPR_C_OPERAND;
struct ast_node *v = NULL;
if (!parse_arith_value(ctx, &v)) {
return false;
}
fx_queue_push_back(&expr->expr_out_queue, &v->n_entry);
return true;
}
void arith_push_operator(struct expr_parse_ctx *state, struct op_ast_node *node)
{
const struct operator_info *op = node->n_op;
if (!op) {
return;
}
while (true) {
fx_queue_entry *top
= fx_queue_last(&state->expr_operator_stack);
if (!top) {
break;
}
struct ast_node *top_node
= fx_unbox(struct ast_node, top, n_entry);
const struct operator_info *top_op = NULL;
switch (top_node->n_type) {
case AST_OP: {
struct op_ast_node *op_node
= (struct op_ast_node *)top_node;
top_op = op_node->n_op;
break;
}
default:
return;
}
if (top_op->op_precedence < op->op_precedence
|| (top_op->op_precedence == op->op_precedence
&& op->op_associativity != ASSOCIATIVITY_LEFT)) {
break;
}
fx_queue_delete(&state->expr_operator_stack, top);
fx_queue_push_back(&state->expr_out_queue, top);
}
fx_queue_push_back(&state->expr_operator_stack, &node->n_base.n_entry);
}
static bool parse_unary_operator(
struct parse_ctx *ctx,
struct expr_parse_ctx *expr)
{
struct lex_token *tok = peek_token(ctx);
const struct operator_info *op = NULL;
switch (tok->tok_type) {
case TOK_SYMBOL:
op = operator_get_by_token(tok->tok_symbol);
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
case TKOP_SPLIT:
op = operator_get_by_id(OP_USPLIT);
break;
case TKOP_JOIN:
op = operator_get_by_id(OP_USPLIT);
break;
default:
op = operator_get_by_token(tok->tok_operator);
break;
}
break;
default:
break;
}
if (expr->expr_prev == EXPR_C_OPERAND
&& op->op_location == OPL_PREFIX) {
report_error(
ctx,
"unexpected operand before unary "
"operator");
return false;
}
if (!op) {
report_error(ctx, "unknown unary operator");
return false;
}
if (op->op_precedence < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
expr->expr_prev = EXPR_C_BINARY_OP;
struct op_ast_node *op_node
= (struct op_ast_node *)ast_node_create(AST_OP);
if (!op_node) {
return false;
}
op_node->n_op = op;
discard_token(ctx);
arith_push_operator(expr, op_node);
return true;
}
static bool parse_binary_operator(
struct parse_ctx *ctx,
struct expr_parse_ctx *expr)
{
struct lex_token *tok = peek_token(ctx);
const struct operator_info *op = NULL;
switch (tok->tok_type) {
case TOK_SYMBOL:
op = operator_get_by_token(tok->tok_symbol);
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
case TKOP_SPLIT:
op = operator_get_by_id(OP_BSPLIT);
break;
case TKOP_JOIN:
op = operator_get_by_id(OP_BJOIN);
break;
default:
op = operator_get_by_token(tok->tok_operator);
break;
}
default:
break;
}
if (!op) {
report_error(ctx, "unknown binary operator");
return false;
}
if (op->op_precedence < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
if (expr->expr_prev != EXPR_C_OPERAND) {
switch (op->op_id) {
case OP_PAREN:
break;
default:
report_error(
ctx,
"expected operand before binary "
"operator");
return false;
}
}
expr->expr_prev = EXPR_C_BINARY_OP;
struct op_ast_node *op_node
= (struct op_ast_node *)ast_node_create(AST_OP);
if (!op_node) {
return false;
}
op_node->n_op = op;
discard_token(ctx);
arith_push_operator(expr, op_node);
return true;
}
static bool parse_call(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
return false;
}
static bool parse_comma(struct parse_ctx *ctx, struct expr_parse_ctx *expr)
{
if (PRECEDENCE_ARRAY < expr->expr_minimum_precedence) {
expr->expr_done = true;
return true;
}
struct ast_node *item = NULL;
if (!finalise_expr(expr, &item, PRECEDENCE_ARRAY)) {
report_error(ctx, "failed to collect first array item.");
return false;
}
struct array_ast_node *array
= (struct array_ast_node *)ast_node_create(AST_ARRAY);
if (!array) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(item);
return false;
}
if (item) {
fx_queue_push_back(&array->n_items, &item->n_entry);
}
while (1) {
if (!parse_symbol(ctx, SYM_COMMA)) {
break;
}
if (!parse_arith_expr(ctx, PRECEDENCE_ARRAY + 1, &item)) {
report_error(ctx, "failed to parse array item.");
ast_node_destroy((struct ast_node *)array);
return false;
}
fx_queue_push_back(&array->n_items, &item->n_entry);
}
fx_queue_push_back(&expr->expr_out_queue, &array->n_base.n_entry);
expr->expr_prev = EXPR_C_OPERAND;
return true;
}
static void dump_expr_ctx(struct expr_parse_ctx *expr)
{
printf("op stack:\n");
fx_queue_entry *entry = fx_queue_first(&expr->expr_operator_stack);
while (entry) {
struct ast_node *node
= fx_unbox(struct ast_node, entry, n_entry);
print_ast_node(node);
entry = fx_queue_next(entry);
}
printf("out queue:\n");
entry = fx_queue_first(&expr->expr_out_queue);
while (entry) {
struct ast_node *node
= fx_unbox(struct ast_node, entry, n_entry);
print_ast_node(node);
entry = fx_queue_next(entry);
}
}
static bool can_use_command(struct expr_parse_ctx *ctx)
{
switch (ctx->expr_prev_symbol) {
case TOK_NONE:
case SYM_EQUAL:
case SYM_PLUS_EQUAL:
case SYM_HYPHEN_EQUAL:
case SYM_ASTERISK_EQUAL:
case SYM_FORWARD_SLASH_EQUAL:
case SYM_PERCENT_EQUAL:
return true;
default:
return false;
}
}
bool parse_arith_expr(
struct parse_ctx *ctx,
enum operator_precedence minimum_precedence,
struct ast_node **out)
{
struct expr_parse_ctx expr = {
.expr_minimum_precedence = minimum_precedence,
};
while (!expr.expr_fail && !expr.expr_done) {
struct lex_token *tok = peek_token(ctx);
if (!tok) {
break;
}
switch (tok->tok_type) {
case TOK_LINEFEED:
expr.expr_done = true;
break;
case TOK_WORD: {
if (!can_use_command(&expr)) {
report_error(
ctx,
"expected a value expression");
expr.expr_fail = true;
break;
}
struct ast_node *value = NULL;
if (!parse_command(ctx, &value)) {
expr.expr_fail = true;
break;
}
fx_queue_push_back(
&expr.expr_out_queue,
&value->n_entry);
break;
}
case TOK_VAR:
case TOK_INT:
case TOK_DOUBLE:
case TOK_STRING:
case TOK_STR_START:
expr.expr_fail = !parse_operand(ctx, &expr);
expr.expr_prev_symbol = tok->tok_type;
break;
case TOK_OPERATOR:
switch (tok->tok_operator) {
/* these two are special cases, as they are both
* unary AND binary operators */
case TKOP_SPLIT:
case TKOP_JOIN:
if (expr.expr_prev == EXPR_C_OPERAND) {
expr.expr_fail = !parse_binary_operator(
ctx,
&expr);
} else {
expr.expr_fail = !parse_unary_operator(
ctx,
&expr);
}
break;
case TKOP_BNOT:
case TKOP_NOT:
expr.expr_fail
= !parse_unary_operator(ctx, &expr);
break;
default:
expr.expr_fail
= !parse_binary_operator(ctx, &expr);
break;
}
expr.expr_prev_symbol = tok->tok_operator;
break;
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_SEMICOLON:
case SYM_AMPERSAND:
case SYM_PIPE:
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
case SYM_RIGHT_BRACKET:
expr.expr_done = true;
break;
case SYM_COMMA:
expr.expr_fail = !parse_comma(ctx, &expr);
break;
case SYM_LEFT_PAREN: {
if (expr.expr_prev == EXPR_C_OPERAND) {
return parse_call(ctx, &expr);
}
struct ast_node *v = NULL;
expr.expr_fail = !parse_subexpr(ctx, &v);
if (expr.expr_fail) {
break;
}
fx_queue_push_back(
&expr.expr_out_queue,
&v->n_entry);
expr.expr_prev = EXPR_C_OPERAND;
break;
}
case SYM_DOLLAR_LEFT_PAREN:
case SYM_AT_LEFT_PAREN:
case SYM_AT_LEFT_BRACE:
expr.expr_fail = !parse_operand(ctx, &expr);
break;
default: {
const struct operator_info *op
= operator_get_by_token(
tok->tok_symbol);
if (op->op_arity == OPA_BINARY) {
expr.expr_fail = !parse_binary_operator(
ctx,
&expr);
} else {
expr.expr_fail = !parse_unary_operator(
ctx,
&expr);
}
break;
}
}
expr.expr_prev_symbol = tok->tok_symbol;
break;
default:
report_error(
ctx,
"unexpected token in arithmetic "
"expression");
expr.expr_fail = true;
break;
}
}
if (expr.expr_fail) {
/* TODO cleanup */
return false;
}
struct ast_node *value = NULL;
if (!finalise_expr(&expr, &value, PRECEDENCE_ASSIGN)) {
report_error(ctx, "failed to convert expression to AST");
/* TODO cleanup */
return false;
}
if (PRECEDENCE_PIPELINE >= expr.expr_minimum_precedence) {
if (peek_symbol(ctx, SYM_PIPE)) {
return parse_pipeline(ctx, value, out);
}
}
*out = value;
return true;
}
+30
View File
@@ -0,0 +1,30 @@
#include "../syntax.h"
bool parse_block(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_symbol(ctx, SYM_LEFT_BRACE)) {
return false;
}
struct block_ast_node *block
= (struct block_ast_node *)ast_node_create(AST_BLOCK);
while (1) {
parse_linefeed(ctx);
if (parse_symbol(ctx, SYM_RIGHT_BRACE)) {
break;
}
struct ast_node *stmt = NULL;
if (!parse_statement(ctx, &stmt)) {
ast_node_destroy((struct ast_node *)block);
return false;
}
fx_queue_push_back(&block->n_statements, &stmt->n_entry);
}
*out = (struct ast_node *)block;
return true;
}
+515
View File
@@ -0,0 +1,515 @@
#include "../../debug.h"
#include "../syntax.h"
#include <fx/encoding.h>
static bool parse_fword(struct parse_ctx *ctx, struct ast_node **out)
{
if (peek_token_type(ctx) != TOK_WORD_START) {
return false;
}
discard_token(ctx);
struct fstring_ast_node *fstring
= (struct fstring_ast_node *)ast_node_create(AST_FSTRING);
if (!fstring) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
bool ok = true;
while (ok) {
if (peek_token_type(ctx) == TOK_WORD_END) {
discard_token(ctx);
break;
}
struct ast_node *item = NULL;
if (peek_token_type(ctx) == TOK_WORD) {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(
AST_WORD);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ok = false;
break;
}
n->n_value = claim_token(ctx);
item = (struct ast_node *)n;
} else {
if (!parse_arith_value(ctx, &item)) {
ok = false;
break;
}
}
fx_queue_push_back(&fstring->n_elements, &item->n_entry);
}
if (!ok) {
ast_node_destroy((struct ast_node *)fstring);
fstring = NULL;
}
*out = (struct ast_node *)fstring;
return ok;
return false;
}
static bool parse_cmdcall_arg(struct parse_ctx *ctx, struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
struct ast_node *arg = NULL;
switch (tok->tok_type) {
case TOK_WORD_START:
return parse_fword(ctx, out);
case TOK_STR_START:
return parse_fstring(ctx, out);
case TOK_WORD: {
struct word_ast_node *n
= (struct word_ast_node *)ast_node_create(AST_WORD);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_value = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
case TOK_VAR: {
struct var_ast_node *n
= (struct var_ast_node *)ast_node_create(AST_VAR);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_ident = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
case TOK_VAR_SPLAT: {
struct var_splat_ast_node *n
= (struct var_splat_ast_node *)ast_node_create(
AST_VAR_SPLAT);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_ident = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
case TOK_STRING: {
struct string_ast_node *n
= (struct string_ast_node *)ast_node_create(AST_STRING);
if (!n) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
n->n_value = claim_token(ctx);
*out = (struct ast_node *)n;
return true;
}
case TOK_SYMBOL:
switch (tok->tok_symbol) {
case SYM_LEFT_PAREN:
case SYM_LEFT_BRACE:
case SYM_DOLLAR_LEFT_PAREN:
case SYM_AT_LEFT_BRACE:
case SYM_AT_LEFT_PAREN:
return parse_arith_value(ctx, out);
default:
report_error(
ctx,
"encountered unsupported command arg");
return false;
}
break;
default:
report_error(ctx, "encountered unsupported command arg");
return false;
}
return true;
}
static bool parse_redirect_to_fd(
struct parse_ctx *ctx,
unsigned int in_fd,
bool append,
struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct redirection_ast_node *redirect
= (struct redirection_ast_node *)ast_node_create(
AST_REDIRECTION);
redirect->n_in = in_fd;
redirect->n_append = append;
if (!parse_symbol(ctx, SYM_AMPERSAND)) {
ast_node_destroy((struct ast_node *)redirect);
return false;
}
struct lex_token *out_tok = NULL;
struct ast_node *out_expr = NULL;
long long out_fd = -1;
if (peek_word(ctx, &out_tok)) {
const char *s = out_tok->tok_str;
char *ep;
out_fd = strtoll(s, &ep, 10);
if (*ep == '\0') {
discard_token(ctx);
out_tok = NULL;
} else {
out_fd = -1;
}
} else if (!parse_cmdcall_arg(ctx, &out_expr)) {
return false;
}
redirect->n_out_is_fd = (out_fd >= 0) || out_expr;
redirect->n_out_is_expr = out_expr != NULL;
redirect->n_out = (unsigned int)out_fd;
redirect->n_out_path_expr = out_expr;
if (out_tok) {
redirect->n_out_tok = claim_token(ctx);
redirect->n_out_path = out_tok->tok_str;
}
*out = (struct ast_node *)redirect;
return true;
}
static bool parse_redirect_to_file_squashed(
struct parse_ctx *ctx,
unsigned int in_fd,
bool append,
const char *str,
struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct lex_token *tok = peek_token(ctx);
if (*str == '\0') {
return false;
}
struct redirection_ast_node *redirect
= (struct redirection_ast_node *)ast_node_create(
AST_REDIRECTION);
redirect->n_in = in_fd;
redirect->n_append = append;
redirect->n_out_is_fd = false;
redirect->n_out_is_expr = false;
redirect->n_out_path = str;
redirect->n_out_tok = claim_token(ctx);
*out = (struct ast_node *)redirect;
return true;
}
static bool parse_redirect_to_file_separate(
struct parse_ctx *ctx,
unsigned int in_fd,
bool append,
struct ast_node **out)
{
if (ctx->p_status != BSHELL_SUCCESS) {
return false;
}
struct ast_node *out_path = NULL;
if (!parse_cmdcall_arg(ctx, &out_path)) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
struct redirection_ast_node *redirect
= (struct redirection_ast_node *)ast_node_create(
AST_REDIRECTION);
redirect->n_in = in_fd;
redirect->n_append = append;
redirect->n_out_is_fd = false;
redirect->n_out_is_expr = true;
redirect->n_out_path_expr = out_path;
*out = (struct ast_node *)redirect;
return true;
}
bool parse_redirect(struct parse_ctx *ctx, struct ast_node **out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok || tok->tok_type != TOK_WORD) {
return false;
}
unsigned int in_fd = 1;
const char *str = tok->tok_str;
bool append = false;
if (fx_wchar_is_number(*str)) {
in_fd = 0;
while (fx_wchar_is_number(*str)) {
in_fd *= 10;
in_fd += *str - '0';
str++;
}
}
if (*str != '>') {
return false;
}
str++;
if (*str == '>') {
append = true;
str++;
}
if (*str != '\0') {
return parse_redirect_to_file_squashed(
ctx,
in_fd,
append,
str,
out);
}
discard_token(ctx);
if (parse_redirect_to_fd(ctx, in_fd, append, out)) {
return true;
}
if (parse_redirect_to_file_separate(ctx, in_fd, append, out)) {
return true;
}
return false;
}
static bool peek_cmdcall_item(struct parse_ctx *ctx, bool unrestricted)
{
/* each token type falls into one of three categories:
* - cmdcall item: the token can be used as part of a command call. the
* token indicates the start of a command call.
* - NOT a cmdcall item: the token cannot be used as part of a command
* call, usually because it as a cmdcall operator like | or &.
* encountering one of these tokens ends the cmdcall currently being
* parsed.
* - RESTRICTED cmdcall item: the token can be used as part of a
* command, but will not be considered the start of a cmdcall. to run
* a command with this token as its name, the call operator must be
* used.
*/
switch (peek_token_type(ctx)) {
case TOK_KEYWORD:
case TOK_INT:
case TOK_DOUBLE:
case TOK_VAR:
case TOK_VAR_SPLAT:
case TOK_STRING:
case TOK_WORD_START:
return unrestricted;
case TOK_SYMBOL:
switch (peek_unknown_symbol(ctx)) {
case SYM_PLUS:
case SYM_HYPHEN:
return unrestricted;
case SYM_PIPE:
case SYM_AMPERSAND:
case SYM_SEMICOLON:
case SYM_RIGHT_PAREN:
case SYM_RIGHT_BRACE:
case SYM_RIGHT_BRACKET:
return false;
default:
return true;
}
case TOK_NONE:
case TOK_LINEFEED:
return false;
default:
return true;
}
}
bool parse_cmdcall(struct parse_ctx *ctx, struct ast_node **out)
{
struct cmdcall_ast_node *node
= (struct cmdcall_ast_node *)ast_node_create(AST_CMDCALL);
if (!node) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
return false;
}
struct ast_node *child = NULL;
bool unrestricted = false;
bool ok = true;
bool stop = false;
if (parse_symbol(ctx, SYM_AMPERSAND)) {
unrestricted = true;
}
if (!peek_cmdcall_item(ctx, unrestricted)) {
return false;
}
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (!parse_cmdcall_arg(ctx, &child)) {
return false;
}
fx_queue_push_back(&node->n_args, &child->n_entry);
while (ok && !stop) {
if (!peek_cmdcall_item(ctx, true)) {
break;
}
struct lex_token *tok = peek_token(ctx);
if (!tok) {
break;
}
if (parse_redirect(ctx, &child)) {
fx_queue_push_back(&node->n_redirect, &child->n_entry);
} else if (parse_cmdcall_arg(ctx, &child)) {
fx_queue_push_back(&node->n_args, &child->n_entry);
} else {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
ok = false;
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)node);
node = NULL;
}
*out = (struct ast_node *)node;
return ok;
}
bool peek_command(struct parse_ctx *ctx)
{
if (peek_symbol(ctx, SYM_AMPERSAND)) {
return true;
}
return peek_cmdcall_item(ctx, false);
}
bool parse_command(struct parse_ctx *ctx, struct ast_node **out)
{
struct ast_node *cmdcall = NULL;
if (!parse_cmdcall(ctx, &cmdcall)) {
return false;
}
struct pipeline_ast_node *pipeline = NULL;
while (1) {
if (peek_symbol(ctx, SYM_SEMICOLON) || peek_linefeed(ctx)) {
break;
}
if (!parse_symbol(ctx, SYM_PIPE)) {
break;
}
if (!pipeline) {
pipeline = (struct pipeline_ast_node *)ast_node_create(
AST_PIPELINE);
if (!pipeline) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(cmdcall);
return false;
}
fx_queue_push_back(
&pipeline->n_stages,
&cmdcall->n_entry);
}
if (!parse_cmdcall(ctx, &cmdcall)) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
fx_queue_push_back(&pipeline->n_stages, &cmdcall->n_entry);
}
if (pipeline) {
*out = (struct ast_node *)pipeline;
} else {
*out = cmdcall;
}
return true;
}
bool parse_pipeline(
struct parse_ctx *ctx,
struct ast_node *first_item,
struct ast_node **out)
{
struct pipeline_ast_node *pipeline
= (struct pipeline_ast_node *)ast_node_create(AST_PIPELINE);
fx_queue_push_back(&pipeline->n_stages, &first_item->n_entry);
while (1) {
if (!parse_symbol(ctx, SYM_PIPE)) {
break;
}
struct ast_node *cmdcall = NULL;
if (!parse_cmdcall(ctx, &cmdcall)) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
fx_queue_push_back(&pipeline->n_stages, &cmdcall->n_entry);
}
*out = (struct ast_node *)pipeline;
return true;
}
+15
View File
@@ -0,0 +1,15 @@
#include "../syntax.h"
bool parse_expr(struct parse_ctx *ctx, struct ast_node **out)
{
bool ok = false;
if (!ok && peek_arith_expr(ctx)) {
ok = parse_arith_expr(ctx, PRECEDENCE_MINIMUM, out);
}
if (!ok && peek_command(ctx)) {
ok = parse_command(ctx, out);
}
return ok;
}
+85
View File
@@ -0,0 +1,85 @@
#include "../syntax.h"
bool parse_func(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_keyword(ctx, KW_FUNC)) {
return false;
}
struct lex_token *name = NULL;
if (!parse_word(ctx, &name)) {
report_error(ctx, "expected function identifier");
return false;
}
struct func_ast_node *func
= (struct func_ast_node *)ast_node_create(AST_FUNC);
if (!func) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
lex_token_destroy(name);
return false;
}
func->n_name = name;
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(` after function identifier");
ast_node_destroy((struct ast_node *)func);
return false;
}
size_t nr_args = 0;
bool ok = true;
while (1) {
if (parse_symbol(ctx, SYM_RIGHT_PAREN)) {
break;
}
if (nr_args > 0 && !parse_symbol(ctx, SYM_COMMA)) {
report_error(
ctx,
"expected `,` or `)` after parameter name");
ok = false;
break;
}
struct lex_token *param_token = NULL;
struct var_ast_node *param_node = NULL;
if (!parse_var(ctx, &param_token)) {
report_error(ctx, "expected parameter variable");
ok = false;
break;
}
param_node = (struct var_ast_node *)ast_node_create(AST_VAR);
if (!param_node) {
ok = false;
ctx->p_status = BSHELL_ERR_NO_MEMORY;
lex_token_destroy(param_token);
break;
}
param_node->n_ident = param_token;
fx_queue_push_back(
&func->n_params,
&param_node->n_base.n_entry);
}
if (!ok) {
if (ctx->p_status == BSHELL_SUCCESS) {
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
}
ast_node_destroy((struct ast_node *)func);
return false;
}
if (!parse_block(ctx, &func->n_body)) {
report_error(ctx, "failed to parse function body");
ast_node_destroy((struct ast_node *)func);
return false;
}
*out = (struct ast_node *)func;
return true;
}
+173
View File
@@ -0,0 +1,173 @@
#include "../lex.h"
#include "../parse.h"
#include "../syntax.h"
#include "../token.h"
struct lex_token *claim_token(struct parse_ctx *ctx)
{
return lex_ctx_claim(ctx->p_src);
}
void discard_token(struct parse_ctx *ctx)
{
return lex_ctx_discard(ctx->p_src);
}
struct lex_token *peek_token(struct parse_ctx *ctx)
{
return lex_ctx_peek(ctx->p_src);
}
enum token_type peek_token_type(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
return tok ? tok->tok_type : TOK_NONE;
}
enum token_symbol peek_unknown_symbol(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
return (tok && tok->tok_type == TOK_SYMBOL) ? tok->tok_symbol
: SYM_NONE;
}
enum token_keyword peek_unknown_keyword(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
return (tok && tok->tok_type == TOK_KEYWORD) ? tok->tok_keyword
: KW_NONE;
}
bool peek_word(struct parse_ctx *ctx, struct lex_token **out)
{
struct lex_token *tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_WORD) {
*out = tok;
return true;
}
return false;
}
bool peek_linefeed(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_LINEFEED) {
return true;
}
return false;
}
bool peek_symbol(struct parse_ctx *ctx, enum token_symbol sym)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_SYMBOL) {
return false;
}
if (tok->tok_symbol != sym) {
return false;
}
return true;
}
bool parse_linefeed(struct parse_ctx *ctx)
{
struct lex_token *tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_LINEFEED) {
discard_token(ctx);
return true;
}
return false;
}
bool parse_symbol(struct parse_ctx *ctx, enum token_symbol sym)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_SYMBOL) {
return false;
}
if (tok->tok_symbol != sym) {
return false;
}
discard_token(ctx);
return true;
}
bool parse_keyword(struct parse_ctx *ctx, enum token_keyword kw)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_KEYWORD) {
return false;
}
if (tok->tok_keyword != kw) {
return false;
}
discard_token(ctx);
return true;
}
bool parse_word(struct parse_ctx *ctx, struct lex_token **out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_WORD) {
return false;
}
*out = claim_token(ctx);
return true;
}
bool parse_var(struct parse_ctx *ctx, struct lex_token **out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_VAR) {
return false;
}
*out = claim_token(ctx);
return true;
}
bool parse_int(struct parse_ctx *ctx, long long *out)
{
struct lex_token *tok = peek_token(ctx);
if (!tok) {
return false;
}
if (tok->tok_type != TOK_INT) {
return false;
}
*out = tok->tok_int;
discard_token(ctx);
return true;
}
+110
View File
@@ -0,0 +1,110 @@
#include "../syntax.h"
static bool add_branch(
struct if_ast_node *group,
struct ast_node *cond,
struct ast_node *body)
{
struct if_branch_ast_node *branch
= (struct if_branch_ast_node *)ast_node_create(AST_IF_BRANCH);
if (!branch) {
return false;
}
branch->n_cond = cond;
branch->n_body = body;
fx_queue_push_back(&group->n_branches, &branch->n_base.n_entry);
return true;
}
bool parse_if(struct parse_ctx *ctx, struct ast_node **out)
{
if (!parse_keyword(ctx, KW_IF)) {
return false;
}
if (!parse_symbol(ctx, SYM_LEFT_PAREN)) {
report_error(ctx, "expected `(` after `if`");
return false;
}
struct ast_node *if_cond = NULL, *if_body = NULL;
if (!parse_expr(ctx, &if_cond)) {
report_error(ctx, "invalid if condition");
return false;
}
if (!parse_symbol(ctx, SYM_RIGHT_PAREN)) {
report_error(ctx, "expected `)` after if-condition");
ast_node_destroy(if_cond);
return false;
}
if (!parse_block(ctx, &if_body)) {
report_error(ctx, "invalid if body");
ast_node_destroy(if_cond);
return false;
}
struct if_ast_node *if_group
= (struct if_ast_node *)ast_node_create(AST_IF);
if (!if_group) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(if_cond);
ast_node_destroy(if_body);
return false;
}
if (!add_branch(if_group, if_cond, if_body)) {
ctx->p_status = BSHELL_ERR_NO_MEMORY;
ast_node_destroy(if_cond);
ast_node_destroy(if_body);
ast_node_destroy((struct ast_node *)if_group);
return false;
}
bool done = false;
while (!done) {
struct ast_node *cond = NULL, *body = NULL;
if (parse_keyword(ctx, KW_ELSE)) {
done = true;
} else if (parse_keyword(ctx, KW_ELSEIF)) {
if (!parse_expr(ctx, &cond)) {
report_error(
ctx,
"invalid conditional expression");
ast_node_destroy((struct ast_node *)if_group);
return false;
}
} else {
done = true;
break;
}
if (!parse_block(ctx, &body)) {
report_error(ctx, "invalid conditional body");
if (cond) {
ast_node_destroy(cond);
}
ast_node_destroy((struct ast_node *)if_group);
return false;
}
if (!add_branch(if_group, cond, body)) {
report_error(ctx, "failed to add branch to if-group");
if (cond) {
ast_node_destroy(cond);
}
ast_node_destroy(body);
ast_node_destroy((struct ast_node *)if_group);
return false;
}
}
*out = (struct ast_node *)if_group;
return true;
}
+21
View File
@@ -0,0 +1,21 @@
#include "../syntax.h"
bool peek_keyword_expr(struct parse_ctx *ctx)
{
return peek_unknown_keyword(ctx) != KW_NONE;
}
bool parse_keyword_expr(struct parse_ctx *ctx, struct ast_node **out)
{
switch (peek_unknown_keyword(ctx)) {
case KW_NONE:
return false;
case KW_IF:
return parse_if(ctx, out);
case KW_FUNC:
return parse_func(ctx, out);
default:
ctx->p_status = BSHELL_ERR_BAD_SYNTAX;
return false;
}
}
+99
View File
@@ -0,0 +1,99 @@
#include "../syntax.h"
bool peek_statement(struct parse_ctx *ctx)
{
if (peek_keyword_expr(ctx)) {
return true;
}
if (peek_arith_expr(ctx)) {
return true;
}
if (peek_command(ctx)) {
return true;
}
return false;
}
bool parse_statement(struct parse_ctx *ctx, struct ast_node **out)
{
if (!peek_token(ctx)) {
/* error, or EOF */
return false;
}
bool unknown = true;
bool ok = false;
if (peek_keyword_expr(ctx)) {
unknown = false;
ok = parse_keyword_expr(ctx, out);
}
if (!ok && peek_arith_expr(ctx)) {
unknown = false;
ok = parse_arith_expr(ctx, PRECEDENCE_MINIMUM, out);
}
if (!ok && peek_command(ctx)) {
unknown = false;
ok = parse_command(ctx, out);
}
if (!ok && unknown) {
report_error(
ctx,
"encountered unknown token while parsing statement");
return false;
}
return ok;
}
static struct ast_node *convert_single_statement(
struct stmt_list_ast_node *list)
{
fx_queue_entry *first_entry = fx_queue_first(&list->n_statements);
if (!first_entry || fx_queue_next(first_entry)) {
return (struct ast_node *)list;
}
fx_queue_delete(&list->n_statements, first_entry);
struct ast_node *first
= fx_unbox(struct ast_node, first_entry, n_entry);
ast_node_destroy((struct ast_node *)list);
return first;
}
bool parse_statement_list(struct parse_ctx *ctx, struct ast_node **out)
{
struct stmt_list_ast_node *stmt_list
= (struct stmt_list_ast_node *)ast_node_create(AST_STMT_LIST);
bool ok = true;
while (ok) {
parse_linefeed(ctx);
struct ast_node *stmt = NULL;
if (!parse_statement(ctx, &stmt)) {
ok = false;
break;
}
fx_queue_push_back(&stmt_list->n_statements, &stmt->n_entry);
if (!parse_symbol(ctx, SYM_SEMICOLON)) {
break;
}
}
if (!ok) {
ast_node_destroy((struct ast_node *)stmt_list);
return false;
}
*out = convert_single_statement(stmt_list);
return true;
}
+213
View File
@@ -0,0 +1,213 @@
#include "token.h"
#include <fx/string.h>
#include <stdlib.h>
#include <string.h>
struct lex_token *lex_token_create(enum token_type type)
{
struct lex_token *out = malloc(sizeof *out);
if (!out) {
return NULL;
}
memset(out, 0x0, sizeof *out);
out->tok_type = type;
return out;
}
struct lex_token *lex_token_create_with_string(
enum token_type type,
const char *s)
{
struct lex_token *tok = lex_token_create(type);
if (!tok) {
return NULL;
}
tok->tok_str = fx_strdup(s);
if (!tok->tok_str) {
free(tok);
return NULL;
}
return tok;
}
void lex_token_destroy(struct lex_token *tok)
{
switch (tok->tok_type) {
case TOK_WORD:
case TOK_FLAG:
case TOK_STRING:
if (tok->tok_str) {
free(tok->tok_str);
}
break;
default:
break;
}
free(tok);
}
struct lex_token *lex_token_change_type(
struct lex_token *tok,
enum token_type new_type)
{
switch (tok->tok_type) {
case TOK_WORD:
case TOK_FLAG:
case TOK_STRING:
if (tok->tok_str) {
free(tok->tok_str);
tok->tok_str = NULL;
}
break;
default:
break;
}
tok->tok_type = new_type;
return tok;
}
void lex_token_change_string(struct lex_token *tok, const char *s)
{
if (!lex_token_has_string_value(tok)) {
return;
}
if (tok->tok_str) {
free(tok->tok_str);
}
tok->tok_str = fx_strdup(s);
}
#define ENUM_STR(x) \
case x: \
return #x
const char *token_type_to_string(enum token_type type)
{
switch (type) {
ENUM_STR(TOK_NONE);
ENUM_STR(TOK_KEYWORD);
ENUM_STR(TOK_SYMBOL);
ENUM_STR(TOK_INT);
ENUM_STR(TOK_DOUBLE);
ENUM_STR(TOK_WORD);
ENUM_STR(TOK_WORD_START);
ENUM_STR(TOK_WORD_END);
ENUM_STR(TOK_OPERATOR);
ENUM_STR(TOK_VAR);
ENUM_STR(TOK_VAR_SPLAT);
ENUM_STR(TOK_FLAG);
ENUM_STR(TOK_STRING);
ENUM_STR(TOK_STR_START);
ENUM_STR(TOK_STR_END);
ENUM_STR(TOK_LINEFEED);
default:
return "<unknown>";
}
}
const char *token_keyword_to_string(enum token_keyword keyword)
{
switch (keyword) {
ENUM_STR(KW_NONE);
ENUM_STR(KW_FUNC);
ENUM_STR(KW_IF);
ENUM_STR(KW_ELSEIF);
ENUM_STR(KW_ELSE);
default:
return "<unknown>";
}
}
const char *token_symbol_to_string(enum token_symbol sym)
{
switch (sym) {
ENUM_STR(SYM_NONE);
ENUM_STR(SYM_PLUS);
ENUM_STR(SYM_HYPHEN);
ENUM_STR(SYM_FORWARD_SLASH);
ENUM_STR(SYM_ASTERISK);
ENUM_STR(SYM_AMPERSAND);
ENUM_STR(SYM_PERCENT);
ENUM_STR(SYM_SQUOTE);
ENUM_STR(SYM_DQUOTE);
ENUM_STR(SYM_HASH);
ENUM_STR(SYM_COLON_COLON);
ENUM_STR(SYM_SEMICOLON);
ENUM_STR(SYM_COMMA);
ENUM_STR(SYM_DOLLAR);
ENUM_STR(SYM_DOLLAR_LEFT_PAREN);
ENUM_STR(SYM_DOLLAR_LEFT_BRACE);
ENUM_STR(SYM_DOT);
ENUM_STR(SYM_DOT_DOT);
ENUM_STR(SYM_PIPE);
ENUM_STR(SYM_AT);
ENUM_STR(SYM_AT_LEFT_PAREN);
ENUM_STR(SYM_AT_LEFT_BRACE);
ENUM_STR(SYM_LEFT_BRACE);
ENUM_STR(SYM_RIGHT_BRACE);
ENUM_STR(SYM_LEFT_BRACKET);
ENUM_STR(SYM_RIGHT_BRACKET);
ENUM_STR(SYM_LEFT_PAREN);
ENUM_STR(SYM_RIGHT_PAREN);
ENUM_STR(SYM_EQUAL);
ENUM_STR(SYM_PLUS_EQUAL);
ENUM_STR(SYM_HYPHEN_EQUAL);
ENUM_STR(SYM_ASTERISK_EQUAL);
ENUM_STR(SYM_FORWARD_SLASH_EQUAL);
ENUM_STR(SYM_PERCENT_EQUAL);
ENUM_STR(SYM_QUESTION_DOT);
ENUM_STR(SYM_QUESTION_LEFT_BRACKET);
default:
return "<unknown>";
}
}
const char *token_operator_to_string(enum token_operator op)
{
switch (op) {
ENUM_STR(TKOP_BAND);
ENUM_STR(TKOP_BOR);
ENUM_STR(TKOP_BXOR);
ENUM_STR(TKOP_BNOT);
ENUM_STR(TKOP_SHL);
ENUM_STR(TKOP_SHR);
ENUM_STR(TKOP_EQ);
ENUM_STR(TKOP_NE);
ENUM_STR(TKOP_GT);
ENUM_STR(TKOP_LT);
ENUM_STR(TKOP_GE);
ENUM_STR(TKOP_LE);
ENUM_STR(TKOP_MATCH);
ENUM_STR(TKOP_NOTMATCH);
ENUM_STR(TKOP_REPLACE);
ENUM_STR(TKOP_LIKE);
ENUM_STR(TKOP_NOTLIKE);
ENUM_STR(TKOP_IN);
ENUM_STR(TKOP_F);
ENUM_STR(TKOP_NOTIN);
ENUM_STR(TKOP_CONTAINS);
ENUM_STR(TKOP_NOTCONTAINS);
ENUM_STR(TKOP_AND);
ENUM_STR(TKOP_OR);
ENUM_STR(TKOP_XOR);
ENUM_STR(TKOP_NOT);
ENUM_STR(TKOP_SPLIT);
ENUM_STR(TKOP_JOIN);
ENUM_STR(TKOP_IS);
ENUM_STR(TKOP_ISNOT);
ENUM_STR(TKOP_AS);
default:
return "<unknown>";
}
}
+184
View File
@@ -0,0 +1,184 @@
#ifndef IVY_LANG_LEX_H_
#define IVY_LANG_LEX_H_
#include <fx/queue.h>
#include <stdbool.h>
struct char_cell {
unsigned long c_row, c_col;
};
enum token_type {
TOK_NONE = 0,
__TOK_INDEX_BASE = 100,
TOK_KEYWORD,
TOK_SYMBOL,
TOK_INT,
TOK_DOUBLE,
TOK_WORD,
TOK_WORD_START,
TOK_WORD_END,
TOK_FLAG,
TOK_OPERATOR,
TOK_VAR,
TOK_VAR_SPLAT,
TOK_STRING,
TOK_STR_START,
TOK_STR_END,
TOK_LINEFEED,
__TOK_INDEX_LIMIT,
};
enum token_keyword {
KW_NONE = 0,
__KW_INDEX_BASE = 200,
KW_FUNC,
KW_IF,
KW_ELSEIF,
KW_ELSE,
__KW_INDEX_LIMIT,
};
enum token_operator {
TKOP_NONE = 0,
__TKOP_INDEX_BASE = 300,
TKOP_F,
TKOP_BAND,
TKOP_BOR,
TKOP_BXOR,
TKOP_BNOT,
TKOP_SHL,
TKOP_SHR,
TKOP_EQ,
TKOP_NE,
TKOP_GT,
TKOP_LT,
TKOP_GE,
TKOP_LE,
TKOP_MATCH,
TKOP_NOTMATCH,
TKOP_REPLACE,
TKOP_LIKE,
TKOP_NOTLIKE,
TKOP_IN,
TKOP_NOTIN,
TKOP_CONTAINS,
TKOP_NOTCONTAINS,
TKOP_AND,
TKOP_OR,
TKOP_XOR,
TKOP_NOT,
TKOP_SPLIT,
TKOP_JOIN,
TKOP_IS,
TKOP_ISNOT,
TKOP_AS,
__TKOP_INDEX_LIMIT,
};
enum token_symbol {
SYM_NONE = 0,
__SYM_INDEX_BASE = 400,
SYM_BANG,
SYM_PLUS,
SYM_HYPHEN,
SYM_FORWARD_SLASH,
SYM_ASTERISK,
SYM_AMPERSAND,
SYM_PERCENT,
SYM_SQUOTE,
SYM_DQUOTE,
SYM_HASH,
SYM_COLON_COLON,
SYM_SEMICOLON,
SYM_COMMA,
SYM_DOLLAR,
SYM_DOLLAR_LEFT_PAREN,
SYM_DOLLAR_LEFT_BRACE,
SYM_DOT,
SYM_DOT_DOT,
SYM_PIPE,
SYM_AT,
SYM_AT_LEFT_PAREN,
SYM_AT_LEFT_BRACE,
SYM_LEFT_BRACE,
SYM_RIGHT_BRACE,
SYM_LEFT_BRACKET,
SYM_RIGHT_BRACKET,
SYM_LEFT_PAREN,
SYM_RIGHT_PAREN,
SYM_EQUAL,
SYM_PLUS_EQUAL,
SYM_HYPHEN_EQUAL,
SYM_ASTERISK_EQUAL,
SYM_FORWARD_SLASH_EQUAL,
SYM_PERCENT_EQUAL,
SYM_QUESTION_DOT,
SYM_QUESTION_LEFT_BRACKET,
__SYM_INDEX_LIMIT,
};
struct lex_token {
enum token_type tok_type;
struct char_cell tok_start, tok_end;
fx_queue_entry tok_entry;
union {
enum token_keyword tok_keyword;
enum token_symbol tok_symbol;
enum token_operator tok_operator;
long long tok_int;
double tok_double;
char *tok_str;
};
};
extern struct lex_token *lex_token_create(enum token_type type);
extern struct lex_token *lex_token_create_with_string(
enum token_type type,
const char *s);
extern void lex_token_destroy(struct lex_token *tok);
extern struct lex_token *lex_token_change_type(
struct lex_token *tok,
enum token_type new_type);
extern void lex_token_change_string(struct lex_token *tok, const char *s);
static inline bool lex_token_is_symbol(
struct lex_token *tok,
enum token_symbol sym)
{
return (tok->tok_type == TOK_SYMBOL && tok->tok_symbol == sym);
}
static inline bool lex_token_is_keyword(
struct lex_token *tok,
enum token_keyword kw)
{
return (tok->tok_type == TOK_KEYWORD && tok->tok_keyword == kw);
}
static inline bool lex_token_type_has_string_value(enum token_type type)
{
switch (type) {
case TOK_WORD:
case TOK_STRING:
case TOK_FLAG:
case TOK_VAR:
case TOK_VAR_SPLAT:
return true;
default:
return false;
}
}
static inline bool lex_token_has_string_value(const struct lex_token *tok)
{
return lex_token_type_has_string_value(tok->tok_type);
}
extern const char *token_type_to_string(enum token_type type);
extern const char *token_keyword_to_string(enum token_keyword keyword);
extern const char *token_symbol_to_string(enum token_symbol sym);
extern const char *token_operator_to_string(enum token_operator op);
#endif
+20
View File
@@ -0,0 +1,20 @@
#include "status.h"
#include <errno.h>
enum bshell_status bshell_status_from_errno(int err)
{
switch (err) {
case 0:
return BSHELL_SUCCESS;
case EIO:
return BSHELL_ERR_IO_FAILURE;
case ENOENT:
return BSHELL_ERR_NO_ENTRY;
case EPERM:
case EACCES:
return BSHELL_ERR_ACCESS_DENIED;
default:
return BSHELL_ERR_INTERNAL_FAILURE;
}
}
+24
View File
@@ -0,0 +1,24 @@
#ifndef STATUS_H_
#define STATUS_H_
enum bshell_status {
BSHELL_SUCCESS = 0,
BSHELL_ERR_EOF,
BSHELL_ERR_BAD_SYNTAX,
BSHELL_ERR_BAD_FORMAT,
BSHELL_ERR_BAD_STATE,
BSHELL_ERR_INVALID_VALUE,
BSHELL_ERR_INVALID_ARGUMENT,
BSHELL_ERR_NO_MEMORY,
BSHELL_ERR_NO_ENTRY,
BSHELL_ERR_NO_DATA,
BSHELL_ERR_NAME_EXISTS,
BSHELL_ERR_NOT_SUPPORTED,
BSHELL_ERR_IO_FAILURE,
BSHELL_ERR_ACCESS_DENIED,
BSHELL_ERR_INTERNAL_FAILURE,
};
extern enum bshell_status bshell_status_from_errno(int err);
#endif
+124
View File
@@ -0,0 +1,124 @@
#[=======================================================================[.rst:
FindFX
------------
Find the FX library and header directories
Imported Targets
^^^^^^^^^^^^^^^^
This module defines the following :prop_tgt:`IMPORTED` target:
``FX::FX``
The FX library, if found
Result Variables
^^^^^^^^^^^^^^^^
This module will set the following variables in your project:
``FX_FOUND``
true if the FX C headers and libraries were found
``FX_INCLUDE_DIR``
directories containing the FX C headers.
``FX_LIBRARY``
the C library to link against
Hints
^^^^^
The user may set the environment variable ``FX_PREFIX`` to the root
directory of a FX library installation.
#]=======================================================================]
set (FX_SEARCH_PATHS
~/Library/Frameworks
/Library/Frameworks
/usr/local
/usr/local/share
/usr
/sw # Fink
/opt/local # DarwinPorts
/opt/csw # Blastwave
/opt
${FX_PREFIX}
$ENV{FX_PREFIX})
if (FX_STATIC)
set(_lib_suffix "-s")
endif ()
set(assemblies ${FX_FIND_COMPONENTS})
set(required_vars)
if (NOT FX_INCLUDE_DIR)
find_path(FX_INCLUDE_DIR
NAMES fx/misc.h ${FX_FIND_ARGS}
PATH_SUFFIXES include
PATHS ${FX_SEARCH_PATHS})
endif ()
set(required_vars FX_INCLUDE_DIR)
foreach (assembly ${assemblies})
string(TOLOWER ${assembly} header_name)
string(REPLACE "." "_" macro_name ${assembly})
string(TOUPPER ${macro_name} macro_name)
set(lib_name ${assembly}${_lib_suffix})
if (NOT ${macro_name}_LIBRARY)
find_library(${macro_name}_LIBRARY
NAMES ${lib_name} ${FX_FIND_ARGS}
PATH_SUFFIXES lib
PATHS ${FX_SEARCH_PATHS})
else ()
# on Windows, ensure paths are in canonical format (forward slahes):
file(TO_CMAKE_PATH "${${macro_name}_LIBRARY}" ${macro_name}_LIBRARY)
endif()
list(APPEND required_vars ${macro_name}_LIBRARY)
endforeach (assembly)
unset(FX_FIND_ARGS)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FX
REQUIRED_VARS ${required_vars})
if (FX_FOUND)
set(created_targets)
foreach (assembly ${assemblies})
set(target_name ${assembly})
string(REPLACE "fx." "" target_name ${target_name})
string(SUBSTRING ${target_name} 0 1 target_name_prefix)
string(TOUPPER ${target_name_prefix} target_name_prefix)
string(SUBSTRING ${target_name} 1 -1 target_name_suffix)
set(target_name ${target_name_prefix}${target_name_suffix})
string(TOLOWER ${assembly} header_name)
string(REPLACE "." "_" macro_name ${assembly})
string(REPLACE "." "_" macro_name ${assembly})
string(TOUPPER ${macro_name} macro_name)
set(lib_name ${assembly}${_lib_suffix})
if (NOT TARGET FX::${target_name})
add_library(FX::${target_name} UNKNOWN IMPORTED)
set_target_properties(FX::${target_name} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${FX_INCLUDE_DIR}")
target_compile_definitions(FX::${target_name} INTERFACE _CRT_SECURE_NO_WARNINGS=1)
if (FX_STATIC)
target_compile_definitions(FX::${target_name} INTERFACE FX_STATIC=1)
endif ()
set_target_properties(FX::${target_name} PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${${macro_name}_LIBRARY}")
set(created_targets ${created_targets} ${assembly})
endif ()
endforeach (assembly)
endif()
+1
View File
@@ -0,0 +1 @@
echo hello 2> error.txt | ls -la | echo done; exit -1
+7 -1
View File
@@ -1,8 +1,14 @@
func test-function($name) {
echo "Hello, $name!"
echo "Hello, $name! $(2 + 4 + 2) wow"
}
# Example of instantiating an FX runtime object.
$obj = new-object -type-name fx.string -arguments "John Doe"
$hash = @{
1 = 'one'
2 = 'two'
'three' = 3
}
test-function -name $obj
+173
View File
@@ -0,0 +1,173 @@
# The lexer has three modes: ARITHMETIC, COMMAND, and STRING
# ARITHMETIC mode is operand-based, all symbols, keywords, and constant parsing
# is enabled.
# COMMAND mode is word-based, only a subset of symbols are enabled, no keyword
# or constant parsing is performed, and more liberal word formations and
# substitutions are allowed
# STRING mode is used to read string literals (i.e. those strings that DON'T
# support variable substitutions). All chars read are appended to the resulting
# string, with no further parsing performed.
# Initially, the lexer mode is unspecified, until:
# a) The lexer reads a character, from which the correct mode is deduced.
# b) The parser manually switches the lexer's mode
# Lexer state supports nesting.
# ARITHMETIC
# both of these are equivalant
$a = 2
# VAR(a)
# SYMBOL(=)
# INT(2)
$b=4
# VAR(b)
# SYMBOL(=)
# INT(4)
# ARITHMETIC
# this is a syntax error (there should be an operator between the two vars)
$a$b
# VAR(a)
# VAR(b)
# When the parser encounters SYMBOL(%) it should switch the lexer to COMMAND
# mode, which will allow the following word construction to be used.
# this executes the command whose name is equal to concatenating the values
# of $a and $b (in this case, '24')
% $a$b
# SYMBOL(%)
# WORD_START
# VAR(a)
# VAR(b)
# WORD_END
# executes the command with the name 'a+2b'. because the first char encountered
# by the lexer is alphabetic, it reads a regular word in COMMAND mode.
a+2b
# WORD(a+2b)
# executes the command with the name '-no$a' ($a is not substituted).
# the first char encountered is a symbol, which is read as a word in COMMAND
# mode
-no$a
# WORD(-no)
# returns the result of applying the NOT operator to the value of $a.
# the first char encountered is a symbol, which is read as a word in COMMAND
# mode. as characters are read, they are compared against registered operators.
# if a match is found, the operator is emitted, and the parser will switch
# the lexer to ARITHMETIC mode
-not$a
# OP(not)
# VAR(a)
# executes the command with the name '-not$a' ($a is NOT substituted)
# because of the preceding hyphen, variable substitution is not performed.
% -not$a
# SYMBOL(%)
# WORD(-not$a)
# executes the command with the name '-not2' ($a IS substituted)
# variable substitution IS performed in dquote strings regardless of the hyphen.
% "-not$a"
# SYMBOL(%)
# STR_START
# STRING(-not)
# VAR(a)
# STR_END
# interpreted as a command with args ['a', '+b', '/c']
# the first char encountered is alpbabetic, so the expression is parsed in
# COMMAND mode
a +b /c
# WORD(a)
# WORD(+b)
# WORD(/c)
# interpreted as an arithmetic expression (but not a well-formed one)
+b /c
# SYM(+)
# WORD(b)
# SYM(/)
# WORD(c)
# interpreted as a command with name '%+'
%+
# WORD(%+)
# interpreted as a command with args ['%', '+']
% +
# WORD(%)
# WORD(+)
# interpreted as a command with name '%'
%;
# WORD(%)
# SYMBOL(;)
# interpreted as a command with name '+'
&+
# SYMBOL(&)
# WORD(+)
# interpreted as a string, which triggers the parser to enter ARITHMETIC mode
'hello world'
# STRING(hello world)
# interpreted as a command with args ['echo', 'hello world']
echo 'hello world'
# WORD(echo)
# STRING(hello world)
# interpreted as an interpolated string
"Hello $(if ($x -lt 5) { echo 'yes' } else {echo 'no'})"
###############################################################################
# The lexer operates as a state machine, moving between different states as
# different characters are encountered
# The states are stored in a stack, to allow recursive parsing.
# The lexer has the following states:
# STATEMENT: A generic statement, could be a command, keyword, arithmetic
# expression, etc. The next char or symbol encountered will cause the
# lexer to switch to the appropriate state type:
# letters, word-symbols -> COMMAND
# squote -> ARITHMETIC
# dquote -> ARITHMETIC, FSTRING
# Digits, vars, var-splats, keywords, all other symbols -> ARITHMETIC
# EXPRESSION: Similar to STATEMENT, but only allows a single command or
# arithmetic expression. CANNOT use keywords or statement terminators.
# Letters, word-symbols -> COMMAND
# squote -> ARITHMETIC
# dquote -> ARITHMETIC, FSTRING
# Digits, vars, var-splats, keywords, all other symbols -> ARITHMETIC
# COMMAND: Only words, (f)strings, vars, var-splats, and a subset of symbols are
# parsed.
# ARITHMETIC: Words, strings, vars, var-splats, all symbols, keywords are parsed.
# STRING: Only a subset of symbols are parsed, all other characters are appended
# to the resulting string.
#
# Once a state has changed from EXPRESSION to one of the other three state
# types, certain characters will result in the current state either changing
# type or being popped from the stack:
# STATEMENT: semicolon -> STATEMENT
# left-paren, left-brace -> POP
# EXPRESSION: semicolon -> POP
# left-paren, left-brace -> POP
# COMMAND: semicolon -> STATEMENT
# left-paren, left-brace -> POP
# ARITHMETIC: semicolon -> STATEMENT
# left-paren, left-brace -> POP
#
# Certain symbols require recursive parsing:
# - dquote strings allow string interpolation, so expressions withing the string
# may be parsed in a different state. Once the expression is complete, the
# lexer returns to the previous state.
# - in most cases, $(...) can be used to delimit sub-expressions (including in
# strings. When '$(' is encountered, a new state entry of type EXPRESSION is
# pushed onto the stack. When the corresponding ')' is encountered, that state
# entry is popped from the stack.
# - similarly to $(...), (...) can be used to group expressions, just like in
# mathematical expressions.