src/escape.rs
src/escape.rs, line 255-305
pub fn unescape_with<'input, 'entity, F>(
raw: &'input str,
mut resolve_entity: F,
) -> Result<Cow<'input, str>, EscapeError>
where
// the lifetime of the output comes from a capture or is `'static`
F: FnMut(&str) -> Option<&'entity str>,
{
let bytes = raw.as_bytes();
let mut unescaped = None;
let mut last_end = 0;
let mut iter = memchr2_iter(b'&', b';', bytes);
while let Some(start) = iter.by_ref().find(|p| bytes[*p] == b'&') {
match iter.next() {
Some(end) if bytes[end] == b';' => {
// append valid data
if unescaped.is_none() {
unescaped = Some(String::with_capacity(raw.len()));
}
let unescaped = unescaped.as_mut().expect("initialized");
unescaped.push_str(&raw[last_end..start]);
// search for character correctness
let pat = &raw[start + 1..end];
if let Some(entity) = pat.strip_prefix('#') {
let codepoint = parse_number(entity).map_err(EscapeError::InvalidCharRef)?;
unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
} else if let Some(value) = resolve_entity(pat) {
unescaped.push_str(value);
} else {
return Err(EscapeError::UnrecognizedEntity(
start + 1..end,
pat.to_string(),
));
}
last_end = end + 1;
}
_ => return Err(EscapeError::UnterminatedEntity(start..raw.len())),
}
}
if let Some(mut unescaped) = unescaped {
if let Some(raw) = raw.get(last_end..) {
unescaped.push_str(raw);
}
Ok(Cow::Owned(unescaped))
} else {
Ok(Cow::Borrowed(raw))
}
}
The unescape_with function resolves entity references one level deep only; it does not recursively expand entities. Unrecognised entity names produce an EscapeError::UnrecognizedEntity error rather than silently passing through. The five predefined XML entities (lt, gt, amp, apos, quot) and numeric character references (&#N; and &#xN;) are always handled. The optional escape-html feature extends recognition to the full HTML5 entity table. There is no billowing-laughs risk: the crate never interprets entity values as XML or expands them recursively. This evidence supports parser-impl-safe and impl-parser.