// tokenize — the fsh command tokenizer, the first FlashOS module ported to // Flash from its hand-written Zig. // // A whitespace splitter with an optional single `|` split. Pure: no syscalls, // no allocator. A caller-owned argv array and scratch buffer are filled in // place (fixed-size, no realloc); the tokenizer reports how the line // decomposes. Tokens are maximal runs of non-whitespace, non-`|` bytes; the // first `|` splits the line into a left and a right command. Each token is // copied NUL-terminated into `buf` and its argv slot points there, with a // `null` slot marking the pipe boundary and the line end so each side is an // execve-ready NULL-terminated vector. Overflow truncates; a second `|`, or a // `|` with an empty side, is a hard error. // // First port to exercise tagged-union result types end to end: `union(enum)` // with mixed void and payload variants, union literals including a nested // `.{ .piped = .{ … } }`, and bare enum-literal returns (`return .empty`). It // also drives the composite signature `argv *mut [MAX_ARGS]?[*:0]mut u8`, the // sentinel slice `buf[buf_pos .. buf_pos + tok.len :0].ptr`, an open-ended // chained slice `buf[buf_pos..][0..tok.len]`, and compound-condition `while` // scans. Doc comments are carried through verbatim. The host tests // that accompany the reference are not part of this core port. /// argv capacity, including the interleaved `null` separators (the pipe /// boundary and the trailing terminator). 16 covers a command plus a /// generous argument list for demoware; longer lines truncate. pub const MAX_ARGS usize = 16 /// Why the two sides of a `|` cannot both be commands, or why a second /// `|` appeared. pub const Err = enum { too_many_pipes, empty_side, } /// A single-pipe decomposition. The right command's argv begins at /// `argv[left_argc + 1]` (the `+ 1` skips the `null` the tokenizer wrote /// at the pipe boundary); both vectors are NULL-terminated in place. pub const Piped = struct { left_argc usize, right_argc usize, } /// How a line decomposed. pub const Result = union(enum) { /// Blank or whitespace-only line — fsh redraws the prompt. empty, /// One command; `argv[0..argc]` valid, `argv[argc] == null`. single usize, /// One pipe stage; see `Piped`. piped Piped, /// Malformed pipe usage. err Err, } inline fn is_space(c u8) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' } /// Split `line` into `argv` (pointers into `buf`). See the module header /// for the decomposition rules. `argv` and `buf` are caller-owned and /// reused per line; the returned pointers are valid until the next call /// that reuses them. pub fn tokenize(line []u8, argv *mut [MAX_ARGS]?[*:0]mut u8, buf []mut u8) Result { var argc usize = 0 var buf_pos usize = 0 var pipe_at ?usize = null var pipes usize = 0 var i usize = 0 while i < line.len { while i < line.len && is_space(line[i]) { i += 1 } if i >= line.len { break } if argc >= MAX_ARGS - 1 { break } if line[i] == '|' { pipes += 1 if pipes > 1 { return .{ .err = .too_many_pipes } } pipe_at = argc argv[argc] = null argc += 1 i += 1 continue } start := i while i < line.len && !is_space(line[i]) && line[i] != '|' { i += 1 } tok := line[start..i] if buf_pos + tok.len + 1 > buf.len { break } #memcpy(buf[buf_pos..][0..tok.len], tok) buf[buf_pos + tok.len] = 0 argv[argc] = buf[buf_pos .. buf_pos + tok.len :0].ptr argc += 1 buf_pos += tok.len + 1 } if argc < MAX_ARGS { argv[argc] = null } if pipe_at |p| { left_argc := p right_argc := argc - p - 1 if left_argc == 0 || right_argc == 0 { return .{ .err = .empty_side } } return .{ .piped = .{ .left_argc = left_argc, .right_argc = right_argc } } } if argc == 0 { return .empty } return .{ .single = argc } }