pub fn parse_lit_str(s: &str) -> (Box<str>, Box<str>) {
match byte(s, 0) {
b'"' => parse_lit_str_cooked(s),
b'r' => parse_lit_str_raw(s),
_ => unreachable!(),
}
}
// Clippy false positive
// https://github.com/rust-lang-nursery/rust-clippy/issues/2329
#[allow(clippy::needless_continue)]
fn parse_lit_str_cooked(mut s: &str) -> (Box<str>, Box<str>) {
assert_eq!(byte(s, 0), b'"');
s = &s[1..];
let mut content = String::new();
'outer: loop {
let ch = match byte(s, 0) {
b'"' => break,
b'\\' => {
let b = byte(s, 1);
s = &s[2..];
match b {
b'x' => {
let (byte, rest) = backslash_x(s);
s = rest;
assert!(byte <= 0x80, "Invalid \\x byte in string literal");
char::from_u32(u32::from(byte)).unwrap()
}
b'u' => {
let (chr, rest) = backslash_u(s);
s = rest;
chr
}
b'n' => '\n',
b'r' => '\r',
b't' => '\t',
b'\\' => '\\',
b'0' => '\0',
b'\'' => '\'',
b'"' => '"',
b'\r' | b'\n' => loop {
let b = byte(s, 0);
match b {
b' ' | b'\t' | b'\n' | b'\r' => s = &s[1..],
_ => continue 'outer,
}
},
b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
}
}
b'\r' => {
assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
s = &s[2..];
'\n'
}
_ => {
let ch = next_chr(s);
s = &s[ch.len_utf8()..];
ch
}
};
content.push(ch);
}
assert!(s.starts_with('"'));
let content = content.into_boxed_str();
let suffix = s[1..].to_owned().into_boxed_str();
(content, suffix)
}
fn parse_lit_str_raw(mut s: &str) -> (Box<str>, Box<str>) {
assert_eq!(byte(s, 0), b'r');
s = &s[1..];
let mut pounds = 0;
while byte(s, pounds) == b'#' {
pounds += 1;
}
assert_eq!(byte(s, pounds), b'"');
let close = s.rfind('"').unwrap();
for end in s[close + 1..close + 1 + pounds].bytes() {
assert_eq!(end, b'#');
}
let content = s[pounds + 1..close].to_owned().into_boxed_str();
let suffix = s[close + 1 + pounds..].to_owned().into_boxed_str();
(content, suffix)
}
// Returns (content, suffix).
pub fn parse_lit_byte_str(s: &str) -> (Vec<u8>, Box<str>) {
assert_eq!(byte(s, 0), b'b');
match byte(s, 1) {
b'"' => parse_lit_byte_str_cooked(s),
b'r' => parse_lit_byte_str_raw(s),
_ => unreachable!(),
}
}
// Clippy false positive
// https://github.com/rust-lang-nursery/rust-clippy/issues/2329
#[allow(clippy::needless_continue)]
fn parse_lit_byte_str_cooked(mut s: &str) -> (Vec<u8>, Box<str>) {
assert_eq!(byte(s, 0), b'b');
assert_eq!(byte(s, 1), b'"');
s = &s[2..];
// We're going to want to have slices which don't respect codepoint boundaries.
let mut v = s.as_bytes();
let mut out = Vec::new();
'outer: loop {
let byte = match byte(v, 0) {
b'"' => break,
b'\\' => {
let b = byte(v, 1);
v = &v[2..];
match b {
b'x' => {
let (b, rest) = backslash_x(v);
v = rest;
b
}
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'0' => b'\0',
b'\'' => b'\'',
b'"' => b'"',
b'\r' | b'\n' => loop {
let byte = byte(v, 0);
let ch = char::from_u32(u32::from(byte)).unwrap();
if ch.is_whitespace() {
v = &v[1..];
} else {
continue 'outer;
}
},
b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
}
}
b'\r' => {
assert_eq!(byte(v, 1), b'\n', "Bare CR not allowed in string");
v = &v[2..];
b'\n'
}
b => {
v = &v[1..];
b
}
};
out.push(byte);
}
assert_eq!(byte(v, 0), b'"');
let suffix = s[s.len() - v.len() + 1..].to_owned().into_boxed_str();
(out, suffix)
}
fn parse_lit_byte_str_raw(s: &str) -> (Vec<u8>, Box<str>) {
assert_eq!(byte(s, 0), b'b');
let (value, suffix) = parse_lit_str_raw(&s[1..]);
(String::from(value).into_bytes(), suffix)
}
// Returns (value, suffix).
pub fn parse_lit_byte(s: &str) -> (u8, Box<str>) {
assert_eq!(byte(s, 0), b'b');
assert_eq!(byte(s, 1), b'\'');
// We're going to want to have slices which don't respect codepoint boundaries.
let mut v = s[2..].as_bytes();
let b = match byte(v, 0) {
b'\\' => {
let b = byte(v, 1);
v = &v[2..];
match b {
b'x' => {
let (b, rest) = backslash_x(v);
v = rest;
b
}
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'0' => b'\0',
b'\'' => b'\'',
b'"' => b'"',
b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
}
}
b => {
v = &v[1..];
b
}
};
assert_eq!(byte(v, 0), b'\'');
let suffix = s[s.len() - v.len() + 1..].to_owned().into_boxed_str();
(b, suffix)
}
// Returns (value, suffix).
pub fn parse_lit_char(mut s: &str) -> (char, Box<str>) {
assert_eq!(byte(s, 0), b'\'');
s = &s[1..];
let ch = match byte(s, 0) {
b'\\' => {
let b = byte(s, 1);
s = &s[2..];
match b {
b'x' => {
let (byte, rest) = backslash_x(s);
s = rest;
assert!(byte <= 0x80, "Invalid \\x byte in string literal");
char::from_u32(u32::from(byte)).unwrap()
}
b'u' => {
let (chr, rest) = backslash_u(s);
s = rest;
chr
}
b'n' => '\n',
b'r' => '\r',
b't' => '\t',
b'\\' => '\\',
b'0' => '\0',
b'\'' => '\'',
b'"' => '"',
b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
}
}
_ => {
let ch = next_chr(s);
s = &s[ch.len_utf8()..];
ch
}
};
assert_eq!(byte(s, 0), b'\'');
let suffix = s[1..].to_owned().into_boxed_str();
(ch, suffix)
}
fn backslash_x<S>(s: &S) -> (u8, &S)
where
S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized,
{
let mut ch = 0;
let b0 = byte(s, 0);
let b1 = byte(s, 1);
ch += 0x10
* match b0 {
b'0'..=b'9' => b0 - b'0',
b'a'..=b'f' => 10 + (b0 - b'a'),
b'A'..=b'F' => 10 + (b0 - b'A'),
_ => panic!("unexpected non-hex character after \\x"),
};
ch += match b1 {
b'0'..=b'9' => b1 - b'0',
b'a'..=b'f' => 10 + (b1 - b'a'),
b'A'..=b'F' => 10 + (b1 - b'A'),
_ => panic!("unexpected non-hex character after \\x"),
};
(ch, &s[2..])
}
fn backslash_u(mut s: &str) -> (char, &str) {
if byte(s, 0) != b'{' {
panic!("{}", "expected { after \\u");
}
s = &s[1..];
let mut ch = 0;
let mut digits = 0;
loop {
let b = byte(s, 0);
let digit = match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => 10 + b - b'a',
b'A'..=b'F' => 10 + b - b'A',
b'_' if digits > 0 => {
s = &s[1..];
continue;
}
b'}' if digits == 0 => panic!("invalid empty unicode escape"),
b'}' => break,
_ => panic!("unexpected non-hex character after \\u"),
};
if digits == 6 {
panic!("overlong unicode escape (must have at most 6 hex digits)");
}
ch *= 0x10;
ch += u32::from(digit);
digits += 1;
s = &s[1..];
}
assert!(byte(s, 0) == b'}');
s = &s[1..];
if let Some(ch) = char::from_u32(ch) {
(ch, s)
} else {
panic!("character code {:x} is not a valid unicode character", ch);
}
}
// Returns base 10 digits and suffix.
pub fn parse_lit_int(mut s: &str) -> Option<(Box<str>, Box<str>)> {
let negative = byte(s, 0) == b'-';
if negative {
s = &s[1..];
}
let base = match (byte(s, 0), byte(s, 1)) {
(b'0', b'x') => {
s = &s[2..];
16
}
(b'0', b'o') => {
s = &s[2..];