diff options
author | James Campos <james.r.campos@gmail.com> | 2020-07-16 23:18:29 -0700 |
---|---|---|
committer | James Campos <james.r.campos@gmail.com> | 2020-07-16 23:18:29 -0700 |
commit | 66954d88e8296254adc11b6535f04966404f74a7 (patch) | |
tree | fe8c27f3409a91d2987b2696644464a1ca81bf82 /src | |
parent | 4267d38a53bfa82ab5c8bcdfdc3eb8f3a4691e38 (diff) | |
download | bk-66954d88e8296254adc11b6535f04966404f74a7.tar.gz |
unicode width
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 42 |
1 files changed, 23 insertions, 19 deletions
diff --git a/src/main.rs b/src/main.rs index 50e7a4c..bf2be55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,56 +15,60 @@ use std::{ iter, process::exit, }; +use unicode_width::UnicodeWidthChar; mod epub; use epub::Chapter; -// XXX assumes a char is i unit wide -fn wrap(text: &str, width: usize) -> Vec<(usize, usize)> { +fn wrap(text: &str, max_cols: usize) -> Vec<(usize, usize)> { let mut lines = Vec::new(); // bytes let mut start = 0; let mut end = 0; - // chars after the break + // cols after the break let mut after = 0; - // chars in unbroken line - let mut len = 0; + // cols of unbroken line + let mut cols = 0; // are we breaking on whitespace? - let mut skip = false; + let mut space = false; + // should probably use unicode_segmentation grapheme_indices for (i, c) in text.char_indices() { - len += 1; + // https://github.com/unicode-rs/unicode-width/issues/6 + let char_cols = c.width().unwrap_or(0); + cols += char_cols; match c { '\n' => { after = 0; end = i; - skip = true; - len = width + 1; + space = true; + cols = max_cols + 1; } ' ' => { after = 0; end = i; - skip = true; + space = true; } - '-' | '—' if len <= width => { + '-' | '—' if cols <= max_cols => { after = 0; end = i + c.len_utf8(); - skip = false; + space = false; } - _ => after += 1, + _ => after += char_cols, } - if len > width { - if len == after { - after = 1; + if cols > max_cols { + // break a single long word + if cols == after { + after = char_cols; end = i; - skip = false; + space = false; } lines.push((start, end)); start = end; - if skip { + if space { start += 1; } - len = after; + cols = after; } } |