From 5006bce04f59a07187e6890f733d6e38c63e45f2 Mon Sep 17 00:00:00 2001 From: James Campos Date: Mon, 11 May 2020 16:05:24 -0700 Subject: epub.rs --- src/main.rs | 198 +++--------------------------------------------------------- 1 file changed, 7 insertions(+), 191 deletions(-) (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs index e5aa752..6a87411 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,4 @@ -use std::collections::HashMap; -use std::fs::File; -use std::io::{stdout, Read, Write}; +use std::io::{stdout, Write}; use crossterm::{ cursor, @@ -10,190 +8,8 @@ use crossterm::{ terminal, }; -use roxmltree::{Document, Node}; - -struct Link { - path: String, - label: String, -} - -struct Epub { - container: zip::ZipArchive, - nav: Vec, - pages: Vec>, -} - -impl Epub { - fn new(path: &str) -> std::io::Result { - let file = File::open(path)?; - let mut epub = Epub { - container: zip::ZipArchive::new(file)?, - nav: Vec::new(), - pages: Vec::new(), - }; - let nav = epub.get_nav(); - epub.pages = Vec::with_capacity(nav.len()); - for link in &nav { - let xml = epub.get_text(&link.path); - let doc = Document::parse(&xml).unwrap(); - let body = doc.root_element().last_element_child().unwrap(); - let mut page = Vec::new(); - Epub::render(&mut page, body); - epub.pages.push(page); - } - epub.nav = nav; - Ok(epub) - } - fn render(buf: &mut Vec, n: Node) { - if n.is_text() { - let text = n.text().unwrap(); - if !text.trim().is_empty() { - let last = buf.last_mut().unwrap(); - last.push_str(text); - } - return; - } - - match n.tag_name().name() { - "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => { - buf.push(String::from("\x1b\x5b1m")); - for c in n.children() { - Self::render(buf, c); - } - buf.push(String::from("\x1b\x5b0m")); - } - "blockquote" | "p" => { - buf.push(String::new()); - for c in n.children() { - Self::render(buf, c); - } - buf.push(String::new()); - } - "li" => { - buf.push(String::from("- ")); - for c in n.children() { - Self::render(buf, c); - } - buf.push(String::new()); - } - "br" => buf.push(String::new()), - _ => { - for c in n.children() { - Self::render(buf, c); - } - } - } - } - fn get_text(&mut self, name: &str) -> String { - let mut text = String::new(); - self.container - .by_name(name) - .unwrap() - .read_to_string(&mut text) - .unwrap(); - text - } - fn get_nav(&mut self) -> Vec { - let xml = self.get_text("META-INF/container.xml"); - let doc = Document::parse(&xml).unwrap(); - let path = doc - .descendants() - .find(|n| n.has_tag_name("rootfile")) - .unwrap() - .attribute("full-path") - .unwrap(); - - let xml = self.get_text(path); - let doc = Document::parse(&xml).unwrap(); - let rootdir = std::path::Path::new(&path).parent().unwrap(); - - let mut manifest = HashMap::new(); - doc.root_element() - .children() - .find(|n| n.has_tag_name("manifest")) - .unwrap() - .children() - .filter(Node::is_element) - .for_each(|n| { - manifest.insert(n.attribute("id").unwrap(), n.attribute("href").unwrap()); - }); - - // TODO check if epub3 nav is reliable w/o spine - let mut nav = HashMap::new(); - if doc.root_element().attribute("version") == Some("3.0") { - let path = doc - .root_element() - .children() - .find(|n| n.has_tag_name("manifest")) - .unwrap() - .children() - .find(|n| n.attribute("properties") == Some("nav")) - .unwrap() - .attribute("href") - .unwrap(); - let xml = self.get_text(rootdir.join(path).to_str().unwrap()); - let doc = Document::parse(&xml).unwrap(); - - doc.descendants() - .find(|n| n.has_tag_name("nav")) - .unwrap() - .descendants() - .filter(|n| n.has_tag_name("a")) - .for_each(|n| { - let path = n.attribute("href").unwrap().to_string(); - let text = n - .descendants() - .filter(Node::is_text) - .map(|n| n.text().unwrap()) - .collect(); - nav.insert(path, text); - }) - } else { - let path = manifest.get("ncx").unwrap(); - let xml = self.get_text(rootdir.join(path).to_str().unwrap()); - let doc = Document::parse(&xml).unwrap(); - - doc.descendants() - .find(|n| n.has_tag_name("navMap")) - .unwrap() - .descendants() - .filter(|n| n.has_tag_name("navPoint")) - .for_each(|n| { - let path = n - .descendants() - .find(|n| n.has_tag_name("content")) - .unwrap() - .attribute("src") - .unwrap() - .to_string(); - let text = n - .descendants() - .find(|n| n.has_tag_name("text")) - .unwrap() - .text() - .unwrap() - .to_string(); - nav.insert(path, text); - }) - } - - doc.root_element() - .children() - .find(|n| n.has_tag_name("spine")) - .unwrap() - .children() - .filter(Node::is_element) - .enumerate() - .map(|(i, n)| { - let id = n.attribute("idref").unwrap(); - let path = manifest.remove(id).unwrap(); - let label = nav.remove(path).unwrap_or_else(|| i.to_string()); - let path = rootdir.join(path).to_str().unwrap().to_string(); - Link { path, label } - }) - .collect() - } -} +mod epub; +use epub::Epub; fn wrap(text: &String, width: u16) -> Vec { // XXX assumes a char is 1 unit wide @@ -325,11 +141,11 @@ impl View for Nav { bk.toc[bk.nav_top..end] .iter() .enumerate() - .map(|(i, link)| { + .map(|(i, label)| { if bk.nav_idx == bk.nav_top + i { - format!("{}{}{}", Attribute::Reverse, link.label, Attribute::Reset) + format!("{}{}{}", Attribute::Reverse, label, Attribute::Reset) } else { - link.label.to_string() + label.to_string() } }) .collect() @@ -452,7 +268,7 @@ struct Bk<'a> { nav_top: usize, pos: usize, rows: usize, - toc: Vec, + toc: Vec, pad: u16, search: String, } -- cgit v1.2.3