use std::borrow::{Borrow, Cow}; use std::ops::Deref; use std::str::Utf8Error; /// OsStr, but specifically for Linux (since we aren't always processing native dumps). #[derive(Debug, PartialOrd, Ord, Eq, PartialEq)] pub struct LinuxOsStr([u8]); /// OsString, but specifically for Linux (since we aren't always processing native dumps). #[derive(Default, Debug, PartialOrd, Ord, Eq, PartialEq, Clone)] pub struct LinuxOsString(Vec); impl LinuxOsStr { pub fn new() -> &'static Self { Self::from_bytes(b"") } pub fn from_bytes(inner: &[u8]) -> &Self { // This is the idiom std uses for creating a type that wraps a slice. // Yes, there really isn't a way to do this without unsafe. No, it's // not at all a safety concern. unsafe { &*(inner as *const [u8] as *const LinuxOsStr) } } pub fn as_bytes(&self) -> &[u8] { &self.0 } /// Tries to interpret the LinuxOsStr as a utf8 str. /// /// While linux OsStrs are "arbitrary bytes" in general, there are often /// parts that are known to be utf8 (ascii even). /// /// For instance, when parsing /proc/self/mem, most of the line is ascii /// like "r-xp" or "1a23-4fe2". However the "path" at the end of each line /// is a true LinuxOsStr and may not be proper utf8. pub fn to_str(&self) -> Result<&str, Utf8Error> { std::str::from_utf8(self) } /// Converts to a utf8 string lossily (uses the usual std lossy algorithm). pub fn to_string_lossy(&self) -> Cow { // Ok so this is the end of the line for dancing around and acting // like we can just be fine with Linux OS strings being arbitrary bags // of bytes. We need some way to print this value in a reasonable way, // and the best precedent I can find for that is std::Path::display. // This wraps the a Path (which is just an OsStr) and provides a // Display impl. // // What does this Display impl do..? // // It just calls from_utf8_lossy. // // Whelp. Ok. // // (Strictly speaking it wraps it up in the internal/unstable // Utf8Lossy iterator so it avoids the allocation, but we don't // have that luxury, so we might as well make the allocation/conversion // explicit.) String::from_utf8_lossy(self.as_bytes()) } // ~Copies of a bunch of string APIs since [u8] doesn't have them (reasonably) pub fn split_once(&self, separator: u8) -> Option<(&LinuxOsStr, &LinuxOsStr)> { self.iter().position(|&b| b == separator).map(|idx| { ( Self::from_bytes(&self[..idx]), Self::from_bytes(&self[idx + 1..]), ) }) } pub fn rsplit_once(&self, separator: u8) -> Option<(&LinuxOsStr, &LinuxOsStr)> { self.iter().rposition(|&b| b == separator).map(|idx| { ( Self::from_bytes(&self[..idx]), Self::from_bytes(&self[idx + 1..]), ) }) } pub fn split(&self, separator: u8) -> impl Iterator { self.as_bytes() .split(move |&b| b == separator) .map(LinuxOsStr::from_bytes) } pub fn split_ascii_whitespace(&self) -> impl Iterator { // Quick and dirty impl: just split on every individual whitespace // char but discard all the empty substrings. self.as_bytes() .split(|b| b.is_ascii_whitespace()) .filter(|slice| !slice.is_empty()) .map(LinuxOsStr::from_bytes) } pub fn lines(&self) -> impl Iterator { // Intentionally doesn't mess around with stuff like \r // since we're processing files generated by the OS, but maybe // this will be a problem later? self.split(b'\n') } pub fn trim_ascii_whitespace(&self) -> &LinuxOsStr { let input = self.as_bytes(); let mut first = None; let mut last = None; // Find first non-whitespace index for (i, &c) in input.iter().enumerate() { if !c.is_ascii_whitespace() { first = Some(i); break; } } // Find last non-whitespace index for (i, &c) in input.iter().enumerate().rev() { if !c.is_ascii_whitespace() { last = Some(i); break; } } if let (Some(first), Some(last)) = (first, last) { Self::from_bytes(&input[first..=last]) } else { // string was entirely whitespace, return an empty string starting // at its position (so that it's still strictly a substring). Self::from_bytes(&input[0..0]) } } } impl LinuxOsString { /// Create a new LinuxOsString from an array of bytes. pub fn from_vec(vec: Vec) -> Self { Self(vec) } pub fn new() -> Self { Self(Vec::new()) } pub fn as_os_str(&self) -> &LinuxOsStr { self } } impl Borrow for LinuxOsString { fn borrow(&self) -> &LinuxOsStr { LinuxOsStr::from_bytes(&self.0) } } impl ToOwned for LinuxOsStr { type Owned = LinuxOsString; fn to_owned(&self) -> LinuxOsString { LinuxOsString::from_vec(self.0.to_owned()) } } impl Deref for LinuxOsString { type Target = LinuxOsStr; fn deref(&self) -> &LinuxOsStr { LinuxOsStr::from_bytes(&self.0) } } impl Deref for LinuxOsStr { type Target = [u8]; fn deref(&self) -> &[u8] { &self.0 } }