From b2190dd4d008f090b89263fb0e56f36f946a52a8 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Tue, 12 May 2026 22:48:57 +0100 Subject: [PATCH] parse: lex: improve scanning of more complex redirection expressions --- bshell/parse/lex/command.c | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/bshell/parse/lex/command.c b/bshell/parse/lex/command.c index 2bd6122..402e3a8 100644 --- a/bshell/parse/lex/command.c +++ b/bshell/parse/lex/command.c @@ -1,3 +1,4 @@ +#include "../token.h" #include "lex-internal.h" static bool char_can_continue_word(struct lex_ctx *ctx, fx_wchar c) @@ -34,6 +35,8 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx) struct lex_token *tok = NULL; switch (sym->id) { + case SYM_DQUOTE: + return BSHELL_SUCCESS; case SYM_SQUOTE: status = read_literal_string(ctx, &tok); if (status != BSHELL_SUCCESS) { @@ -85,6 +88,39 @@ static enum bshell_status command_symbol(struct lex_ctx *ctx) return BSHELL_SUCCESS; } +static bool string_is_redirection(const char *s) +{ + if (!*s) { + return false; + } + + if (!strcmp(s, ">") || !strcmp(s, ">>")) { + return true; + } + + long nr_angles = 0; + for (size_t i = 0; s[i];) { + fx_wchar c = fx_wchar_utf8_codepoint_decode(s); + if (fx_wchar_is_number(c)) { + if (nr_angles) { + return false; + } + } else if (c == '>') { + nr_angles++; + + if (nr_angles > 2) { + return false; + } + } else { + return false; + } + + s += fx_wchar_utf8_codepoint_stride(s); + } + + return true; +} + static enum bshell_status command_word(struct lex_ctx *ctx) { struct lex_token *word = NULL; @@ -97,6 +133,7 @@ static enum bshell_status command_word(struct lex_ctx *ctx) bool continue_word = false; fx_wchar c = peek_char(ctx); + const char *s = word->tok_str; if (char_can_begin_symbol_in_state(ctx, c, LEX_STATE_WORD)) { continue_word = true; } @@ -105,6 +142,10 @@ static enum bshell_status command_word(struct lex_ctx *ctx) continue_word = false; } + if (string_is_redirection(s)) { + continue_word = false; + } + if (continue_word) { lex_state_push(ctx, LEX_STATE_WORD, 0); } @@ -171,6 +212,8 @@ static const unsigned int symbols[] = { SYM_PIPE, SYM_SEMICOLON, SYM_RIGHT_PAREN, + SYM_LEFT_PAREN, + SYM_LEFT_BRACE, SYM_RIGHT_BRACE, SYM_NONE, };