Fix UTF-8 parsing from title of song

This commit is contained in:
Stephen Seo 2021-09-16 15:21:33 +09:00
parent 079477b282
commit 6f5f25503c

View file

@ -76,61 +76,78 @@ fn check_next_chars(
return Err((String::from("idx out of bounds"), 0u8)); return Err((String::from("idx out of bounds"), 0u8));
} }
if buf[idx] & 0b10000000 == 0 { if buf[idx] & 0b10000000 == 0 {
Ok(( let result_str = String::from_utf8(vec![buf[idx]]);
char::from_u32(buf[idx] as u32) if let Ok(mut s) = result_str {
.ok_or_else(|| (String::from("Not one-byte UTF-8"), 0u8))?, let popped_char = s.pop();
1u8, if s.is_empty() {
)) Ok((popped_char.unwrap(), 1u8))
} else {
Err((String::from("Not one-byte UTF-8 char"), 0u8))
}
} else {
Err((String::from("Not one-byte UTF-8 char"), 0u8))
}
} else if buf[idx] & 0b11100000 == 0b11000000 { } else if buf[idx] & 0b11100000 == 0b11000000 {
if idx + 1 >= buf.len() { if idx + 1 >= buf.len() {
saved.push(buf[idx]); saved.push(buf[idx]);
return Err(( return Err((
String::from("Is two byte UTF-8, but not enough bytes provided"), String::from("Is two-byte UTF-8, but not enough bytes provided"),
1u8, 1u8,
)); ));
} }
Ok(( let result_str = String::from_utf8(vec![buf[idx], buf[idx + 1]]);
char::from_u32((buf[idx] as u32) | ((buf[idx + 1] as u32) << 8)) if let Ok(mut s) = result_str {
.ok_or_else(|| (String::from("Not two-byte UTF-8"), 0u8))?, let popped_char = s.pop();
2u8, if s.is_empty() {
)) Ok((popped_char.unwrap(), 2u8))
} else {
Err((String::from("Not two-byte UTF-8 char"), 0u8))
}
} else {
Err((String::from("Not two-byte UTF-8 char"), 0u8))
}
} else if buf[idx] & 0b11110000 == 0b11100000 { } else if buf[idx] & 0b11110000 == 0b11100000 {
if idx + 2 >= buf.len() { if idx + 2 >= buf.len() {
for c in buf.iter().skip(idx) { for c in buf.iter().skip(idx) {
saved.push(*c); saved.push(*c);
} }
return Err(( return Err((
String::from("Is three byte UTF-8, but not enough bytes provided"), String::from("Is three-byte UTF-8, but not enough bytes provided"),
(idx + 3 - buf.len()) as u8, (idx + 3 - buf.len()) as u8,
)); ));
} }
Ok(( let result_str = String::from_utf8(vec![buf[idx], buf[idx + 1], buf[idx + 2]]);
char::from_u32( if let Ok(mut s) = result_str {
(buf[idx] as u32) | ((buf[idx + 1] as u32) << 8) | ((buf[idx + 2] as u32) << 16), let popped_char = s.pop();
) if s.is_empty() {
.ok_or_else(|| (String::from("Not three-byte UTF-8"), 0u8))?, Ok((popped_char.unwrap(), 3u8))
3u8, } else {
)) Err((String::from("Not three-byte UTF-8 char"), 0u8))
}
} else {
Err((String::from("Not three-byte UTF-8 char"), 0u8))
}
} else if buf[idx] & 0b11111000 == 0b11110000 { } else if buf[idx] & 0b11111000 == 0b11110000 {
if idx + 2 >= buf.len() { if idx + 3 >= buf.len() {
for c in buf.iter().skip(idx) { for c in buf.iter().skip(idx) {
saved.push(*c); saved.push(*c);
} }
return Err(( return Err((
String::from("Is four byte UTF-8, but not enough bytes provided"), String::from("Is four-byte UTF-8, but not enough bytes provided"),
(idx + 4 - buf.len()) as u8, (idx + 4 - buf.len()) as u8,
)); ));
} }
Ok(( let result_str = String::from_utf8(vec![buf[idx], buf[idx + 1], buf[idx + 2]]);
char::from_u32( if let Ok(mut s) = result_str {
(buf[idx] as u32) let popped_char = s.pop();
| ((buf[idx + 1] as u32) << 8) if s.is_empty() {
| ((buf[idx + 2] as u32) << 16) Ok((popped_char.unwrap(), 4u8))
| ((buf[idx + 3] as u32) << 24), } else {
) Err((String::from("Not four-byte UTF-8 char"), 0u8))
.ok_or_else(|| (String::from("Not four-byte UTF-8"), 0u8))?, }
4u8, } else {
)) Err((String::from("Not four-byte UTF-8 char"), 0u8))
}
} else { } else {
Err((String::from("Invalid UTF-8 char"), 0u8)) Err((String::from("Invalid UTF-8 char"), 0u8))
} }