From 7d7fe1ad2c1232811c7b365847aa415ff6a4d90d Mon Sep 17 00:00:00 2001 From: James Campos Date: Tue, 7 Jul 2020 05:11:18 -0700 Subject: internal --- src/epub.rs | 204 +++++++++++++++++++++++++++++------------------------------- src/main.rs | 3 +- 2 files changed, 101 insertions(+), 106 deletions(-) diff --git a/src/epub.rs b/src/epub.rs index c4f79e3..882175e 100644 --- a/src/epub.rs +++ b/src/epub.rs @@ -8,18 +8,30 @@ pub struct Epub { } impl Epub { - pub fn new(path: &str) -> std::io::Result { + pub fn new(path: &str, meta: bool) -> std::io::Result { let file = File::open(path)?; let mut epub = Epub { container: zip::ZipArchive::new(file)?, chapters: Vec::new(), meta: String::new(), }; - epub.get_rootfile(); + let chapters = epub.get_rootfile(); + if !meta { + epub.get_chapters(chapters); + } Ok(epub) } - pub fn get_chapters(&mut self) { - self.chapters = std::mem::take(&mut self.chapters) + fn get_text(&mut self, name: &str) -> String { + let mut text = String::new(); + self.container + .by_name(name) + .unwrap() + .read_to_string(&mut text) + .unwrap(); + text + } + fn get_chapters(&mut self, chapters: Vec<(String, String)>) { + self.chapters = chapters .into_iter() .filter_map(|(path, title)| { let xml = self.get_text(&path); @@ -28,7 +40,7 @@ impl Epub { let doc = Document::parse(&xml).unwrap(); let body = doc.root_element().last_element_child().unwrap(); let mut chapter = String::new(); - Epub::render(&mut chapter, body); + render(body, &mut chapter); if chapter.is_empty() { None } else { @@ -37,55 +49,7 @@ impl Epub { }) .collect(); } - fn render(buf: &mut String, n: Node) { - if n.is_text() { - let text = n.text().unwrap(); - if !text.trim().is_empty() { - buf.push_str(text); - } - return; - } - - match n.tag_name().name() { - "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => { - buf.push_str("\n\x1b[1m"); - for c in n.children() { - Self::render(buf, c); - } - buf.push_str("\x1b[0m\n"); - } - "blockquote" | "p" | "tr" => { - buf.push('\n'); - for c in n.children() { - Self::render(buf, c); - } - buf.push('\n'); - } - "li" => { - buf.push_str("\n- "); - for c in n.children() { - Self::render(buf, c); - } - buf.push('\n'); - } - "br" => buf.push('\n'), - _ => { - for c in n.children() { - Self::render(buf, c); - } - } - } - } - fn get_text(&mut self, name: &str) -> String { - let mut text = String::new(); - self.container - .by_name(name) - .unwrap() - .read_to_string(&mut text) - .unwrap(); - text - } - fn get_rootfile(&mut self) { + fn get_rootfile(&mut self) -> Vec<(String, String)> { let xml = self.get_text("META-INF/container.xml"); let doc = Document::parse(&xml).unwrap(); let path = doc @@ -123,7 +87,6 @@ impl Epub { .for_each(|n| { manifest.insert(n.attribute("id").unwrap(), n.attribute("href").unwrap()); }); - if doc.root_element().attribute("version") == Some("3.0") { let path = manifest_node .children() @@ -133,52 +96,15 @@ impl Epub { .unwrap(); let xml = self.get_text(&format!("{}{}", rootdir, path)); let doc = Document::parse(&xml).unwrap(); - - doc.descendants() - .find(|n| n.has_tag_name("nav")) - .unwrap() - .descendants() - .filter(|n| n.has_tag_name("a")) - .for_each(|n| { - let path = n.attribute("href").unwrap().to_string(); - let text = n - .descendants() - .filter(Node::is_text) - .map(|n| n.text().unwrap()) - .collect(); - nav.insert(path, text); - }) + epub3(doc, &mut nav); } else { let toc = spine_node.attribute("toc").unwrap_or("ncx"); let path = manifest.get(toc).unwrap(); let xml = self.get_text(&format!("{}{}", rootdir, path)); let doc = Document::parse(&xml).unwrap(); - - doc.descendants() - .find(|n| n.has_tag_name("navMap")) - .unwrap() - .descendants() - .filter(|n| n.has_tag_name("navPoint")) - .for_each(|n| { - let path = n - .descendants() - .find(|n| n.has_tag_name("content")) - .unwrap() - .attribute("src") - .unwrap() - .to_string(); - let text = n - .descendants() - .find(|n| n.has_tag_name("text")) - .unwrap() - .text() - .unwrap() - .to_string(); - nav.insert(path, text); - }) + epub2(doc, &mut nav); } - - self.chapters = spine_node + spine_node .children() .filter(Node::is_element) .enumerate() @@ -189,17 +115,87 @@ impl Epub { let path = format!("{}{}", rootdir, path); (path, label) }) - .collect(); + .collect() } } -#[test] -fn test_dir() { - let path = "/mnt/lit/read"; - for entry in std::fs::read_dir(path).unwrap() { - let path = entry.unwrap().path(); - let s = path.to_str().unwrap(); - println!("testing: {}", s); - Epub::new(s).unwrap(); +fn render(n: Node, buf: &mut String) { + if n.is_text() { + let text = n.text().unwrap(); + if !text.trim().is_empty() { + buf.push_str(text); + } + return; } + + match n.tag_name().name() { + "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => { + buf.push_str("\n\x1b[1m"); + for c in n.children() { + render(c, buf); + } + buf.push_str("\x1b[0m\n"); + } + "blockquote" | "p" | "tr" => { + buf.push('\n'); + for c in n.children() { + render(c, buf); + } + buf.push('\n'); + } + "li" => { + buf.push_str("\n- "); + for c in n.children() { + render(c, buf); + } + buf.push('\n'); + } + "br" => buf.push('\n'), + _ => { + for c in n.children() { + render(c, buf); + } + } + } +} + +fn epub2(doc: Document, nav: &mut HashMap) { + doc.descendants() + .find(|n| n.has_tag_name("navMap")) + .unwrap() + .descendants() + .filter(|n| n.has_tag_name("navPoint")) + .for_each(|n| { + let path = n + .descendants() + .find(|n| n.has_tag_name("content")) + .unwrap() + .attribute("src") + .unwrap() + .to_string(); + let text = n + .descendants() + .find(|n| n.has_tag_name("text")) + .unwrap() + .text() + .unwrap() + .to_string(); + nav.insert(path, text); + }); +} +fn epub3(doc: Document, nav: &mut HashMap) { + doc.descendants() + .find(|n| n.has_tag_name("nav")) + .unwrap() + .descendants() + .filter(|n| n.has_tag_name("a")) + .for_each(|n| { + let path = n.attribute("href").unwrap().to_string(); + let text = n + .descendants() + .filter(Node::is_text) + .map(|n| n.text().unwrap()) + .collect(); + nav.insert(path, text); + }); } diff --git a/src/main.rs b/src/main.rs index 7e16c74..8626541 100644 --- a/src/main.rs +++ b/src/main.rs @@ -656,7 +656,7 @@ fn main() { exit(1); }); - let mut epub = epub::Epub::new(&path).unwrap_or_else(|e| { + let epub = epub::Epub::new(&path, meta).unwrap_or_else(|e| { println!("error reading epub: {}", e); exit(1); }); @@ -666,7 +666,6 @@ fn main() { exit(0); } - epub.get_chapters(); let mut bk = Bk::new(epub, args); // i have never seen crossterm error bk.run().unwrap(); -- cgit v1.2.3