Fix UTF-8 parsing from title of song
This commit is contained in:
parent
079477b282
commit
6f5f25503c
1 changed files with 48 additions and 31 deletions
79
src/main.rs
79
src/main.rs
|
@ -76,61 +76,78 @@ fn check_next_chars(
|
||||||
return Err((String::from("idx out of bounds"), 0u8));
|
return Err((String::from("idx out of bounds"), 0u8));
|
||||||
}
|
}
|
||||||
if buf[idx] & 0b10000000 == 0 {
|
if buf[idx] & 0b10000000 == 0 {
|
||||||
Ok((
|
let result_str = String::from_utf8(vec![buf[idx]]);
|
||||||
char::from_u32(buf[idx] as u32)
|
if let Ok(mut s) = result_str {
|
||||||
.ok_or_else(|| (String::from("Not one-byte UTF-8"), 0u8))?,
|
let popped_char = s.pop();
|
||||||
1u8,
|
if s.is_empty() {
|
||||||
))
|
Ok((popped_char.unwrap(), 1u8))
|
||||||
|
} else {
|
||||||
|
Err((String::from("Not one-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Err((String::from("Not one-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
} else if buf[idx] & 0b11100000 == 0b11000000 {
|
} else if buf[idx] & 0b11100000 == 0b11000000 {
|
||||||
if idx + 1 >= buf.len() {
|
if idx + 1 >= buf.len() {
|
||||||
saved.push(buf[idx]);
|
saved.push(buf[idx]);
|
||||||
return Err((
|
return Err((
|
||||||
String::from("Is two byte UTF-8, but not enough bytes provided"),
|
String::from("Is two-byte UTF-8, but not enough bytes provided"),
|
||||||
1u8,
|
1u8,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
Ok((
|
let result_str = String::from_utf8(vec![buf[idx], buf[idx + 1]]);
|
||||||
char::from_u32((buf[idx] as u32) | ((buf[idx + 1] as u32) << 8))
|
if let Ok(mut s) = result_str {
|
||||||
.ok_or_else(|| (String::from("Not two-byte UTF-8"), 0u8))?,
|
let popped_char = s.pop();
|
||||||
2u8,
|
if s.is_empty() {
|
||||||
))
|
Ok((popped_char.unwrap(), 2u8))
|
||||||
|
} else {
|
||||||
|
Err((String::from("Not two-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Err((String::from("Not two-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
} else if buf[idx] & 0b11110000 == 0b11100000 {
|
} else if buf[idx] & 0b11110000 == 0b11100000 {
|
||||||
if idx + 2 >= buf.len() {
|
if idx + 2 >= buf.len() {
|
||||||
for c in buf.iter().skip(idx) {
|
for c in buf.iter().skip(idx) {
|
||||||
saved.push(*c);
|
saved.push(*c);
|
||||||
}
|
}
|
||||||
return Err((
|
return Err((
|
||||||
String::from("Is three byte UTF-8, but not enough bytes provided"),
|
String::from("Is three-byte UTF-8, but not enough bytes provided"),
|
||||||
(idx + 3 - buf.len()) as u8,
|
(idx + 3 - buf.len()) as u8,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
Ok((
|
let result_str = String::from_utf8(vec![buf[idx], buf[idx + 1], buf[idx + 2]]);
|
||||||
char::from_u32(
|
if let Ok(mut s) = result_str {
|
||||||
(buf[idx] as u32) | ((buf[idx + 1] as u32) << 8) | ((buf[idx + 2] as u32) << 16),
|
let popped_char = s.pop();
|
||||||
)
|
if s.is_empty() {
|
||||||
.ok_or_else(|| (String::from("Not three-byte UTF-8"), 0u8))?,
|
Ok((popped_char.unwrap(), 3u8))
|
||||||
3u8,
|
} else {
|
||||||
))
|
Err((String::from("Not three-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Err((String::from("Not three-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
} else if buf[idx] & 0b11111000 == 0b11110000 {
|
} else if buf[idx] & 0b11111000 == 0b11110000 {
|
||||||
if idx + 2 >= buf.len() {
|
if idx + 3 >= buf.len() {
|
||||||
for c in buf.iter().skip(idx) {
|
for c in buf.iter().skip(idx) {
|
||||||
saved.push(*c);
|
saved.push(*c);
|
||||||
}
|
}
|
||||||
return Err((
|
return Err((
|
||||||
String::from("Is four byte UTF-8, but not enough bytes provided"),
|
String::from("Is four-byte UTF-8, but not enough bytes provided"),
|
||||||
(idx + 4 - buf.len()) as u8,
|
(idx + 4 - buf.len()) as u8,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
Ok((
|
let result_str = String::from_utf8(vec![buf[idx], buf[idx + 1], buf[idx + 2]]);
|
||||||
char::from_u32(
|
if let Ok(mut s) = result_str {
|
||||||
(buf[idx] as u32)
|
let popped_char = s.pop();
|
||||||
| ((buf[idx + 1] as u32) << 8)
|
if s.is_empty() {
|
||||||
| ((buf[idx + 2] as u32) << 16)
|
Ok((popped_char.unwrap(), 4u8))
|
||||||
| ((buf[idx + 3] as u32) << 24),
|
} else {
|
||||||
)
|
Err((String::from("Not four-byte UTF-8 char"), 0u8))
|
||||||
.ok_or_else(|| (String::from("Not four-byte UTF-8"), 0u8))?,
|
}
|
||||||
4u8,
|
} else {
|
||||||
))
|
Err((String::from("Not four-byte UTF-8 char"), 0u8))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Err((String::from("Invalid UTF-8 char"), 0u8))
|
Err((String::from("Invalid UTF-8 char"), 0u8))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue