diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 42 | 
1 files changed, 23 insertions, 19 deletions
| diff --git a/src/main.rs b/src/main.rs index 50e7a4c..bf2be55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,56 +15,60 @@ use std::{      iter,      process::exit,  }; +use unicode_width::UnicodeWidthChar;  mod epub;  use epub::Chapter; -// XXX assumes a char is i unit wide -fn wrap(text: &str, width: usize) -> Vec<(usize, usize)> { +fn wrap(text: &str, max_cols: usize) -> Vec<(usize, usize)> {      let mut lines = Vec::new();      // bytes      let mut start = 0;      let mut end = 0; -    // chars after the break +    // cols after the break      let mut after = 0; -    // chars in unbroken line -    let mut len = 0; +    // cols of unbroken line +    let mut cols = 0;      // are we breaking on whitespace? -    let mut skip = false; +    let mut space = false; +    // should probably use unicode_segmentation grapheme_indices      for (i, c) in text.char_indices() { -        len += 1; +        // https://github.com/unicode-rs/unicode-width/issues/6 +        let char_cols = c.width().unwrap_or(0); +        cols += char_cols;          match c {              '\n' => {                  after = 0;                  end = i; -                skip = true; -                len = width + 1; +                space = true; +                cols = max_cols + 1;              }              ' ' => {                  after = 0;                  end = i; -                skip = true; +                space = true;              } -            '-' | '—' if len <= width => { +            '-' | '—' if cols <= max_cols => {                  after = 0;                  end = i + c.len_utf8(); -                skip = false; +                space = false;              } -            _ => after += 1, +            _ => after += char_cols,          } -        if len > width { -            if len == after { -                after = 1; +        if cols > max_cols { +            // break a single long word +            if cols == after { +                after = char_cols;                  end = i; -                skip = false; +                space = false;              }              lines.push((start, end));              start = end; -            if skip { +            if space {                  start += 1;              } -            len = after; +            cols = after;          }      } | 
