From 5249113c64fa35912ef649e4a03e7cd9b5dde846 Mon Sep 17 00:00:00 2001 From: James Campos Date: Fri, 15 May 2020 18:37:33 -0700 Subject: search --- src/epub.rs | 35 +++++----- src/main.rs | 207 +++++++++++++++++++++++++++++++++++------------------------- 2 files changed, 140 insertions(+), 102 deletions(-) diff --git a/src/epub.rs b/src/epub.rs index 508e6b1..f80e2bc 100644 --- a/src/epub.rs +++ b/src/epub.rs @@ -7,7 +7,7 @@ use roxmltree::{Document, Node}; pub struct Epub { container: zip::ZipArchive, pub nav: Vec, - pub pages: Vec>, + pub chapters: Vec, } impl Epub { @@ -16,55 +16,57 @@ impl Epub { let mut epub = Epub { container: zip::ZipArchive::new(file)?, nav: Vec::new(), - pages: Vec::new(), + chapters: Vec::new(), }; let nav = epub.get_nav(); epub.nav.reserve_exact(nav.len()); - epub.pages.reserve_exact(nav.len()); + epub.chapters.reserve_exact(nav.len()); for (path, label) in nav { epub.nav.push(label); let xml = epub.get_text(&path); + // https://github.com/RazrFalcon/roxmltree/issues/12 + // UnknownEntityReference for HTML entities let doc = Document::parse(&xml).unwrap(); let body = doc.root_element().last_element_child().unwrap(); - let mut page = Vec::new(); - Epub::render(&mut page, body); - epub.pages.push(page); + // XXX no initial string, buf.last is none + let mut chapter = String::new(); + Epub::render(&mut chapter, body); + epub.chapters.push(chapter); } Ok(epub) } - fn render(buf: &mut Vec, n: Node) { + fn render(buf: &mut String, n: Node) { if n.is_text() { let text = n.text().unwrap(); if !text.trim().is_empty() { - let last = buf.last_mut().unwrap(); - last.push_str(text); + buf.push_str(text); } return; } match n.tag_name().name() { "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => { - buf.push(String::from("\x1b\x5b1m")); + buf.push_str("\n\x1b\x5b1m"); for c in n.children() { Self::render(buf, c); } - buf.push(String::from("\x1b\x5b0m")); + buf.push_str("\x1b\x5b0m\n"); } "blockquote" | "p" => { - buf.push(String::new()); + buf.push('\n'); for c in n.children() { Self::render(buf, c); } - buf.push(String::new()); + buf.push('\n'); } "li" => { - buf.push(String::from("- ")); + buf.push_str("\n- "); for c in n.children() { Self::render(buf, c); } - buf.push(String::new()); + buf.push('\n'); } - "br" => buf.push(String::new()), + "br" => buf.push('\n'), _ => { for c in n.children() { Self::render(buf, c); @@ -106,7 +108,6 @@ impl Epub { manifest.insert(n.attribute("id").unwrap(), n.attribute("href").unwrap()); }); - // TODO check if epub3 nav is reliable w/o spine let mut nav = HashMap::new(); if doc.root_element().attribute("version") == Some("3.0") { let path = doc diff --git a/src/main.rs b/src/main.rs index 6a87411..c050d1b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,47 +11,55 @@ use crossterm::{ mod epub; use epub::Epub; -fn wrap(text: &String, width: u16) -> Vec { +fn wrap(text: &str, width: usize) -> Vec<(usize, String)> { // XXX assumes a char is 1 unit wide - let mut wrapped = Vec::new(); + let mut lines = Vec::new(); let mut start = 0; - let mut brk = 0; - let mut line = 0; + let mut end = 0; + let mut len = 0; let mut word = 0; let mut skip = 0; for (i, c) in text.char_indices() { + len += 1; match c { ' ' => { - brk = i; + end = i; skip = 1; word = 0; } - // https://www.unicode.org/reports/tr14/ - // https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap - // currently only break at hyphen and em-dash :shrug: '-' | '—' => { - brk = i + c.len_utf8(); - skip = 0; - word = 0; + if len > width { + // `end = i + 1` will extend over the margin + word += 1; + } else { + end = i + c.len_utf8(); + skip = 0; + word = 0; + } } _ => { word += 1; } } - - if line == width { - wrapped.push(String::from(&text[start..brk])); - start = brk + skip; - line = word; - } else { - line += 1; + if c == '\n' { + lines.push((start, String::from(&text[start..i]))); + start = i + 1; + len = 0; + } else if len > width { + let line = if word == len { + &text[start..i] + } else { + &text[start..end] + }; + lines.push((start, String::from(line))); + start = end + skip; + len = word; } } - wrapped.push(String::from(&text[start..])); - wrapped + lines } struct Position(String, usize, usize); @@ -104,8 +112,8 @@ impl View for Nav { bk.view = Some(&Page); } KeyCode::Enter | KeyCode::Tab | KeyCode::Right | KeyCode::Char('l') => { - bk.get_chapter(bk.nav_idx); - bk.pos = 0; + bk.chapter = bk.nav_idx; + bk.line = 0; bk.view = Some(&Page); } KeyCode::Down | KeyCode::Char('j') => { @@ -158,25 +166,28 @@ impl View for Page { match kc { KeyCode::Esc | KeyCode::Char('q') => bk.view = None, KeyCode::Tab => { - bk.nav_idx = bk.chapter_idx; + bk.nav_idx = bk.chapter; bk.nav_top = bk.nav_idx.saturating_sub(bk.rows - 1); bk.view = Some(&Nav); } KeyCode::F(1) | KeyCode::Char('?') => bk.view = Some(&Help), KeyCode::Char('/') => { bk.search = String::new(); + bk.jump = bk.line; bk.view = Some(&Search); } KeyCode::Char('N') => { bk.search(Direction::Backward); } KeyCode::Char('n') => { + // FIXME + bk.scroll_down(1); bk.search(Direction::Forward); } KeyCode::End | KeyCode::Char('G') => { - bk.pos = (bk.chapter.len() / bk.rows) * bk.rows; + bk.line = bk.lines().len().saturating_sub(bk.rows); } - KeyCode::Home | KeyCode::Char('g') => bk.pos = 0, + KeyCode::Home | KeyCode::Char('g') => bk.line = 0, KeyCode::Char('d') => { bk.scroll_down(bk.rows / 2); } @@ -205,8 +216,8 @@ impl View for Page { } } fn render(&self, bk: &Bk) -> Vec { - let end = std::cmp::min(bk.pos + bk.rows, bk.chapter.len()); - bk.chapter[bk.pos..end].iter().map(String::from).collect() + let end = std::cmp::min(bk.line + bk.rows, bk.lines().len()); + bk.lines()[bk.line..end].iter().map(String::from).collect() } } @@ -215,7 +226,7 @@ impl View for Search { fn run(&self, bk: &mut Bk, kc: KeyCode) { match kc { KeyCode::Esc => { - bk.search = String::new(); + bk.line = bk.jump; bk.view = Some(&Page); } KeyCode::Enter => { @@ -223,6 +234,7 @@ impl View for Search { } KeyCode::Backspace => { bk.search.pop(); + bk.search(Direction::Forward); } KeyCode::Char(c) => { bk.search.push(c); @@ -232,10 +244,10 @@ impl View for Search { } } fn render(&self, bk: &Bk) -> Vec { - let end = std::cmp::min(bk.pos + bk.rows - 1, bk.chapter.len()); + let end = std::cmp::min(bk.line + bk.rows - 1, bk.lines().len()); let mut buf = Vec::with_capacity(bk.rows); - for line in bk.chapter[bk.pos..end].iter() { + for line in bk.lines()[bk.line..end].iter() { if let Some(i) = line.find(&bk.search) { buf.push(format!( "{}{}{}{}{}", @@ -258,40 +270,62 @@ impl View for Search { } } +struct Chapter { + text: String, + lines: Vec, + bytes: Vec, +} + struct Bk<'a> { view: Option<&'a dyn View>, - pages: Vec>, + chapter: usize, cols: u16, - chapter: Vec, - chapter_idx: usize, + // ideally we could use string slices as pointers, but self referential structs are hard + chapters: Vec, nav_idx: usize, nav_top: usize, - pos: usize, + line: usize, + jump: usize, rows: usize, toc: Vec, - pad: u16, + max_width: u16, search: String, } impl Bk<'_> { - fn new(epub: Epub, pos: &Position, pad: u16) -> Self { + fn new(epub: Epub, line: &Position, max_width: u16) -> Self { let (cols, rows) = terminal::size().unwrap(); - let mut bk = Bk { + let width = std::cmp::min(cols, max_width) as usize; + let mut chapters = Vec::with_capacity(epub.chapters.len()); + for text in epub.chapters { + let wrap = wrap(&text, width); + let mut lines = Vec::with_capacity(wrap.len()); + let mut bytes = Vec::with_capacity(wrap.len()); + + for (byte, line) in wrap { + lines.push(line); + bytes.push(byte); + } + chapters.push(Chapter {text, lines, bytes}); + } + + Bk { + jump: 0, view: Some(&Page), - chapter: Vec::new(), - chapter_idx: 0, + chapter: line.1, nav_idx: 0, nav_top: 0, toc: epub.nav, - pages: epub.pages, - pos: pos.2, - pad, + chapters, + line: line.2, + max_width, cols, rows: rows as usize, search: String::new(), - }; - bk.get_chapter(pos.1); - bk + } + } + fn lines(&self) -> &Vec { + &self.chapters[self.chapter].lines } fn run(&mut self) -> crossterm::Result<()> { let mut stdout = stdout(); @@ -299,20 +333,18 @@ impl Bk<'_> { terminal::enable_raw_mode()?; while let Some(view) = self.view { + let pad = self.cols.saturating_sub(self.max_width) / 2; + queue!(stdout, terminal::Clear(terminal::ClearType::All))?; for (i, line) in view.render(self).iter().enumerate() { - queue!(stdout, cursor::MoveTo(self.pad, i as u16), Print(line))?; + queue!(stdout, cursor::MoveTo(pad, i as u16), Print(line))?; } stdout.flush().unwrap(); match event::read()? { Event::Key(e) => view.run(self, e.code), - Event::Resize(cols, rows) => { - self.cols = cols; - self.rows = rows as usize; - self.get_chapter(self.chapter_idx); - } // TODO + Event::Resize(_, _) => (), Event::Mouse(_) => (), } } @@ -320,58 +352,63 @@ impl Bk<'_> { queue!(stdout, terminal::LeaveAlternateScreen, cursor::Show)?; terminal::disable_raw_mode() } - fn get_chapter(&mut self, idx: usize) { - let width = self.cols - (self.pad * 2); - let page = &self.pages[idx]; - self.chapter = Vec::with_capacity(page.len() * 2); - for line in page { - self.chapter.append(&mut wrap(line, width)) - } - self.chapter_idx = idx; - } fn next_chapter(&mut self) { - if self.chapter_idx < self.toc.len() - 1 { - self.get_chapter(self.chapter_idx + 1); - self.pos = 0; + if self.chapter < self.toc.len() - 1 { + self.chapter += 1; + self.line = 0; } } fn prev_chapter(&mut self) { - if self.chapter_idx > 0 { - self.get_chapter(self.chapter_idx - 1); - self.pos = 0; + if self.chapter > 0 { + self.chapter -= 1; + self.line = 0; } } fn scroll_down(&mut self, n: usize) { - if self.rows < self.chapter.len() - self.pos { - self.pos += n; + if self.line + self.rows < self.lines().len() { + self.line += n; } else { self.next_chapter(); } } fn scroll_up(&mut self, n: usize) { - if self.pos > 0 { - self.pos = self.pos.saturating_sub(n); + if self.line > 0 { + self.line = self.line.saturating_sub(n); } else { self.prev_chapter(); - self.pos = (self.chapter.len() / self.rows) * self.rows; + self.line = self.lines().len().saturating_sub(self.rows); } } fn search(&mut self, dir: Direction) { + // https://doc.rust-lang.org/std/vec/struct.Vec.html#method.binary_search + // If the value is not found then Result::Err is returned, containing the index where a matching element + // could be inserted while maintaining sorted order. + let head = (self.chapter, self.chapters[self.chapter].bytes[self.line]); match dir { Direction::Forward => { - if let Some(i) = self.chapter[self.pos..] - .iter() - .position(|s| s.contains(&self.search)) - { - self.pos += i; + let rest = (self.chapter+1..self.chapters.len()-1).map(|n| (n, 0)); + for (c, byte) in std::iter::once(head).chain(rest) { + if let Some(index) = self.chapters[c].text[byte..].find(&self.search) { + self.line = match self.chapters[c].bytes.binary_search(&(byte + index)) { + Ok(n) => n, + Err(n) => n-1, + }; + self.chapter = c; + break; + } } } Direction::Backward => { - if let Some(i) = self.chapter[..self.pos] - .iter() - .rposition(|s| s.contains(&self.search)) - { - self.pos = i; + let rest = (0..self.chapter-1).rev().map(|c| (c, self.chapters[c].text.len())); + for (c, byte) in std::iter::once(head).chain(rest) { + if let Some(index) = self.chapters[c].text[..byte].rfind(&self.search) { + self.line = match self.chapters[c].bytes.binary_search(&index) { + Ok(n) => n, + Err(n) => n-1, + }; + self.chapter = c; + break; + } } } } @@ -408,23 +445,23 @@ fn restore() -> Option { } fn main() { - let pos = restore().unwrap_or_else(|| { + let line = restore().unwrap_or_else(|| { println!("usage: bk path"); std::process::exit(1); }); - let epub = Epub::new(&pos.0).unwrap_or_else(|e| { + let epub = Epub::new(&line.0).unwrap_or_else(|e| { println!("error reading epub: {}", e); std::process::exit(1); }); - let mut bk = Bk::new(epub, &pos, 3); + let mut bk = Bk::new(epub, &line, 75); // crossterm really shouldn't error bk.run().unwrap(); std::fs::write( format!("{}/.local/share/bk", std::env::var("HOME").unwrap()), - format!("{}\n{}\n{}", pos.0, bk.chapter_idx, bk.pos), + format!("{}\n{}\n{}", line.0, bk.chapter, bk.line), ) .unwrap_or_else(|e| { println!("error saving position: {}", e); -- cgit v1.2.3