src/bytes.rs
src/bytes.rs, line 206-230
pub fn quote<'a>(&self, mut in_bytes: &'a [u8]) -> Result<Cow<'a, [u8]>, QuoteError> {
if in_bytes.is_empty() {
// Empty string. Special case that isn't meaningful as only part of a word.
return Ok(b"''"[..].into());
}
if !self.allow_nul && in_bytes.iter().any(|&b| b == b'\0') {
return Err(QuoteError::Nul);
}
let mut out: Vec<u8> = Vec::new();
while !in_bytes.is_empty() {
// Pick a quoting strategy for some prefix of the input. Normally this will cover the
// entire input, but in some case we might need to divide the input into multiple chunks
// that are quoted differently.
let (cur_len, strategy) = quoting_strategy(in_bytes);
if cur_len == in_bytes.len() && strategy == QuotingStrategy::Unquoted && out.is_empty() {
// Entire string can be represented unquoted. Reuse the allocation.
return Ok(in_bytes.into());
}
let (cur_chunk, rest) = in_bytes.split_at(cur_len);
assert!(rest.len() < in_bytes.len()); // no infinite loop
in_bytes = rest;
append_quoted_chunk(&mut out, cur_chunk, strategy);
}
Ok(out.into())
}
quote operates on bytes and only inserts ASCII delimiters (single/double quote, backslash) around existing bytes. Multi-byte UTF-8 codepoints are preserved because the byte-by-byte logic only inspects bytes < 0x80 for special handling. Implements quoting; tests cover empty, ASCII, and invalid UTF-8 inputs.
src/bytes.rs, line 60-156
fn parse_word(&mut self, mut ch: u8) -> Option<Vec<u8>> {
let mut result: Vec<u8> = Vec::new();
loop {
match ch as char {
'"' => if let Err(()) = self.parse_double(&mut result) {
self.had_error = true;
return None;
},
'\'' => if let Err(()) = self.parse_single(&mut result) {
self.had_error = true;
return None;
},
'\\' => if let Some(ch2) = self.next_char() {
if ch2 != '\n' as u8 { result.push(ch2); }
} else {
self.had_error = true;
return None;
},
' ' | '\t' | '\n' => { break; },
_ => { result.push(ch as u8); },
}
if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
}
Some(result)
}
fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
loop {
if let Some(ch2) = self.next_char() {
match ch2 as char {
'\\' => {
if let Some(ch3) = self.next_char() {
match ch3 as char {
// \$ => $
'$' | '`' | '"' | '\\' => { result.push(ch3); },
// \<newline> => nothing
'\n' => {},
// \x => =x
_ => { result.push('\\' as u8); result.push(ch3); }
}
} else {
return Err(());
}
},
'"' => { return Ok(()); },
_ => { result.push(ch2); },
}
} else {
return Err(());
}
}
}
fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
loop {
if let Some(ch2) = self.next_char() {
match ch2 as char {
'\'' => { return Ok(()); },
_ => { result.push(ch2); },
}
} else {
return Err(());
}
}
}
fn next_char(&mut self) -> Option<u8> {
let res = self.in_iter.next().copied();
if res == Some(b'\n') { self.line_no += 1; }
res
}
}
impl<'a> Iterator for Shlex<'a> {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(mut ch) = self.next_char() {
// skip initial whitespace
loop {
match ch as char {
' ' | '\t' | '\n' => {},
'#' => {
while let Some(ch2) = self.next_char() {
if ch2 as char == '\n' { break; }
}
},
_ => { break; }
}
if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
}
self.parse_word(ch)
} else { // no initial character
None
}
}
}
Shlex iterator implements POSIX-shell word splitting with single/double quote and backslash escapes. Errors (unclosed quote, trailing backslash) set had_error and end iteration. Implementation is straightforward byte-by-byte; no indexing arithmetic that could overflow.