// grep match core — the pure substring matcher /bin/grep drives. // // A tool-local seam in the spirit of fsh's tokenize.flash and flibc's pager: // the one piece of grep worth host-testing in isolation, kept apart from the // driver half (argv parsing, open/read, line assembly) that lives in // tools/grep.flash. Pure by construction — no allocator, no module state, no // SVC, no flibc dependency — so it compiles and runs on the host under // `zig build test` exactly as it does on the device. // // The match is a plain windowed substring scan, optionally case-folded over // ASCII: grep needs no regex for its first cut (the product vision asks only // for literal-pattern line search). An empty pattern matches every line, the // GNU grep convention a `grep '' FILE` relies on. /// True when `line` contains `pat` as a contiguous substring. With /// `ignore_case`, A–Z fold to a–z on both sides before comparing (ASCII only; /// bytes >= 0x80 are matched verbatim). An empty `pat` matches every line; a /// `pat` longer than `line` never matches. O(line.len * pat.len) — fine for the /// short lines a serial-console grep sees. pub fn lineContains(line []u8, pat []u8, ignore_case bool) bool { if pat.len == 0 { return true } if pat.len > line.len { return false } var i usize = 0 // Last start offset that still leaves room for the whole pattern. while i + pat.len <= line.len { if windowEq(line[i .. i + pat.len], pat, ignore_case) { return true } i += 1 } return false } /// Byte-equality over two equal-length slices, folding case when asked. The /// caller guarantees `a.len == pat.len`, so the scan walks `pat.len` bytes. fn windowEq(a []u8, pat []u8, ignore_case bool) bool { var j usize = 0 while j < pat.len { var x = a[j] var y = pat[j] if ignore_case { x = lower(x) y = lower(y) } if x != y { return false } j += 1 } return true } /// ASCII lowercase fold: A–Z (0x41–0x5A) map to a–z by +0x20; every other byte /// is returned unchanged. The guard keeps the add in range (max 'Z'+32 == 'z'), /// so no overflow even with ReleaseSmall's traps compiled out. fn lower(c u8) u8 { return if (c >= 'A' && c <= 'Z') c + 32 else c } /// First offset >= `from` at which `needle` occurs in `hay`, or null if there is /// none. The offset-returning sibling of lineContains, for /bin/edit's ctrl-W /// search-from-cursor (grep itself stays on the bool form). Case-sensitive — /// this search does not fold case. An empty `needle` returns null so a blank search /// is an inert no-op rather than a match that never advances the cursor. Same /// windowed scan as lineContains; O((hay.len - from) * needle.len). pub fn find(hay []u8, needle []u8, from usize) ?usize { if needle.len == 0 || from >= hay.len { return null } var i usize = from while i + needle.len <= hay.len { if windowEq(hay[i .. i + needle.len], needle, false) { return i } i += 1 } return null } // ---- host tests ------------------------------------------------------------ const std = #import("std") const testing = std.testing test "substring hit anywhere in the line" { try testing.expect(lineContains("hello world", "world", false)) try testing.expect(lineContains("hello world", "hello", false)) try testing.expect(lineContains("hello world", "lo wo", false)) } test "no match returns false" { try testing.expect(!lineContains("hello world", "xyz", false)) try testing.expect(!lineContains("abc", "abcd", false)) // pat longer than line } test "empty pattern matches every line, even empty" { try testing.expect(lineContains("anything", "", false)) try testing.expect(lineContains("", "", false)) } test "empty line matches only the empty pattern" { try testing.expect(!lineContains("", "a", false)) } test "case-sensitive by default" { try testing.expect(!lineContains("Hello", "hello", false)) try testing.expect(lineContains("Hello", "Hello", false)) } test "ignore_case folds both sides over ASCII" { try testing.expect(lineContains("Hello World", "hello", true)) try testing.expect(lineContains("hello world", "WORLD", true)) try testing.expect(lineContains("MiXeD", "mixed", true)) } test "ignore_case leaves non-letters and high bytes alone" { try testing.expect(lineContains("a1b2c3", "1B2", true)) // 0x80 has no case; it must match itself, not fold. const hi = [_]u8{ 'x', 0x80, 'y' } try testing.expect(lineContains(&hi, &hi, true)) } test "match at the very start and very end" { try testing.expect(lineContains("abcdef", "ab", false)) try testing.expect(lineContains("abcdef", "ef", false)) try testing.expect(lineContains("abcdef", "abcdef", false)) } test "find returns the first offset at or after from" { try testing.expectEqual(#as(?usize, 0), find("abcabc", "abc", 0)) try testing.expectEqual(#as(?usize, 3), find("abcabc", "abc", 1)) // skip the first hit try testing.expectEqual(#as(?usize, 2), find("xxneedle", "needle", 0)) } test "find reports no match" { try testing.expectEqual(#as(?usize, null), find("abc", "xyz", 0)) try testing.expectEqual(#as(?usize, null), find("abc", "abcd", 0)) // needle longer than hay try testing.expectEqual(#as(?usize, null), find("abcabc", "abc", 4)) // no hit past offset 4 } test "find with from past the end or an empty needle yields null" { try testing.expectEqual(#as(?usize, null), find("abc", "a", 3)) try testing.expectEqual(#as(?usize, null), find("abc", "a", 99)) try testing.expectEqual(#as(?usize, null), find("abc", "", 0)) // blank search is inert }