Move vault_contents out of Context and into Exporter

This reduces the need to pass vault_contents around in various places
and restricts Context to dealing with the actual note which is being
processed, instead of also carrying program state information.

This will help with future feature development as note parsing functions
can now access Exporter directly.
This commit is contained in:
Nick Groenen 2020-12-22 11:10:45 +01:00
parent e9d5e69e24
commit 207ca1124e
No known key found for this signature in database
GPG Key ID: 4F0AD019928AE098
1 changed files with 233 additions and 217 deletions

View File

@ -84,36 +84,35 @@ pub struct Exporter<'a> {
root: PathBuf, root: PathBuf,
destination: PathBuf, destination: PathBuf,
frontmatter_strategy: FrontmatterStrategy, frontmatter_strategy: FrontmatterStrategy,
vault_contents: Option<Vec<PathBuf>>,
walk_options: WalkOptions<'a>, walk_options: WalkOptions<'a>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
/// Context holds parser metadata for the file/note currently being parsed. /// Context holds parser metadata for the file/note currently being parsed.
struct Context<'a> { struct Context {
file_tree: Vec<PathBuf>, file_tree: Vec<PathBuf>,
vault_contents: &'a [PathBuf],
frontmatter_strategy: FrontmatterStrategy, frontmatter_strategy: FrontmatterStrategy,
} }
impl<'a> Context<'a> { impl Context {
/// Create a new `Context` /// Create a new `Context`
fn new(file: PathBuf, vault_contents: &'a [PathBuf]) -> Context<'a> { fn new(file: PathBuf) -> Context {
Context { Context {
file_tree: vec![file.clone()], file_tree: vec![file.clone()],
vault_contents,
frontmatter_strategy: FrontmatterStrategy::Auto, frontmatter_strategy: FrontmatterStrategy::Auto,
} }
} }
/// Create a new `Context` which inherits from a parent Context. /// Create a new `Context` which inherits from a parent Context.
fn from_parent(context: &Context<'a>, child: &PathBuf) -> Context<'a> { fn from_parent(context: &Context, child: &PathBuf) -> Context {
let mut context = context.clone(); let mut context = context.clone();
context.file_tree.push(child.to_path_buf()); context.file_tree.push(child.to_path_buf());
context context
} }
/// Associate a new `FrontmatterStrategy` with this context. /// Associate a new `FrontmatterStrategy` with this context.
fn set_frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Context<'a> { fn set_frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Context {
self.frontmatter_strategy = strategy; self.frontmatter_strategy = strategy;
self self
} }
@ -146,6 +145,7 @@ impl<'a> Exporter<'a> {
destination, destination,
frontmatter_strategy: FrontmatterStrategy::Auto, frontmatter_strategy: FrontmatterStrategy::Auto,
walk_options: WalkOptions::default(), walk_options: WalkOptions::default(),
vault_contents: None,
} }
} }
@ -159,15 +159,18 @@ impl<'a> Exporter<'a> {
self self
} }
pub fn run(self) -> Result<()> { pub fn run(&mut self) -> Result<()> {
if !self.root.exists() { if !self.root.exists() {
return Err(ExportError::PathDoesNotExist { path: self.root }); return Err(ExportError::PathDoesNotExist {
path: self.root.clone(),
});
} }
// When a single file is specified, we can short-circuit contruction of walk and associated // When a single file is specified, we can short-circuit contruction of walk and associated
// directory traversal. This also allows us to accept destination as either a file or a // directory traversal. This also allows us to accept destination as either a file or a
// directory name. // directory name.
if self.root.is_file() { if self.root.is_file() {
self.vault_contents = Some(vec![self.root.clone()]);
let source_filename = self let source_filename = self
.root .root
.file_name() .file_name()
@ -188,265 +191,278 @@ impl<'a> Exporter<'a> {
self.destination.clone() self.destination.clone()
} }
}; };
return Ok(self.export_note(&self.root, &destination, &[self.root.clone()])?); return Ok(self.export_note(&self.root, &destination)?);
} }
if !self.destination.exists() { if !self.destination.exists() {
return Err(ExportError::PathDoesNotExist { return Err(ExportError::PathDoesNotExist {
path: self.destination, path: self.destination.clone(),
}); });
} }
let vault = vault_contents(self.root.as_path(), self.walk_options.clone())?; self.vault_contents = Some(vault_contents(
vault.clone().into_par_iter().try_for_each(|file| { self.root.as_path(),
let relative_path = file self.walk_options.clone(),
.strip_prefix(&self.root.clone()) )?);
.expect("file should always be nested under root") self.vault_contents
.to_path_buf(); .as_ref()
let destination = &self.destination.join(&relative_path); .unwrap()
self.export_note(&file, destination, &vault) .clone()
})?; .into_par_iter()
.try_for_each(|file| {
let relative_path = file
.strip_prefix(&self.root.clone())
.expect("file should always be nested under root")
.to_path_buf();
let destination = &self.destination.join(&relative_path);
self.export_note(&file, destination)
})?;
Ok(()) Ok(())
} }
fn export_note(&self, src: &Path, dest: &Path, vault_contents: &[PathBuf]) -> Result<()> { fn export_note(&self, src: &Path, dest: &Path) -> Result<()> {
match is_markdown_file(src) { match is_markdown_file(src) {
true => { true => self.parse_and_export_obsidian_note(src, dest, self.frontmatter_strategy),
parse_and_export_obsidian_note(src, dest, vault_contents, self.frontmatter_strategy)
}
false => copy_file(src, dest), false => copy_file(src, dest),
} }
.context(FileExportError { path: src }) .context(FileExportError { path: src })
} }
}
fn parse_and_export_obsidian_note( fn parse_and_export_obsidian_note(
src: &Path, &self,
dest: &Path, src: &Path,
vault_contents: &[PathBuf], dest: &Path,
frontmatter_strategy: FrontmatterStrategy, frontmatter_strategy: FrontmatterStrategy,
) -> Result<()> { ) -> Result<()> {
let content = fs::read_to_string(&src).context(ReadError { path: src })?; let content = fs::read_to_string(&src).context(ReadError { path: src })?;
let (mut frontmatter, _content) = let (mut frontmatter, _content) =
matter::matter(&content).unwrap_or(("".to_string(), content.to_string())); matter::matter(&content).unwrap_or(("".to_string(), content.to_string()));
frontmatter = frontmatter.trim().to_string(); frontmatter = frontmatter.trim().to_string();
//let mut outfile = create_file(&dest).context(FileIOError { filename: dest })?; //let mut outfile = create_file(&dest).context(FileIOError { filename: dest })?;
let mut outfile = create_file(&dest)?; let mut outfile = create_file(&dest)?;
let write_frontmatter = match frontmatter_strategy { let write_frontmatter = match frontmatter_strategy {
FrontmatterStrategy::Always => true, FrontmatterStrategy::Always => true,
FrontmatterStrategy::Never => false, FrontmatterStrategy::Never => false,
FrontmatterStrategy::Auto => frontmatter != "", FrontmatterStrategy::Auto => frontmatter != "",
}; };
if write_frontmatter { if write_frontmatter {
if frontmatter != "" && !frontmatter.ends_with('\n') { if frontmatter != "" && !frontmatter.ends_with('\n') {
frontmatter.push('\n'); frontmatter.push('\n');
}
outfile
.write_all(format!("---\n{}---\n\n", frontmatter).as_bytes())
.context(WriteError { path: &dest })?;
} }
let mut context = Context::new(src.to_path_buf());
context.set_frontmatter_strategy(frontmatter_strategy);
let markdown_tree = self.parse_obsidian_note(&src, &context)?;
outfile outfile
.write_all(format!("---\n{}---\n\n", frontmatter).as_bytes()) .write_all(render_mdtree_to_mdtext(markdown_tree).as_bytes())
.context(WriteError { path: &dest })?; .context(WriteError { path: &dest })?;
Ok(())
} }
let mut context = Context::new(src.to_path_buf(), vault_contents); fn parse_obsidian_note<'b>(&self, path: &Path, context: &Context) -> Result<MarkdownTree<'b>> {
context.set_frontmatter_strategy(frontmatter_strategy); if context.note_depth() > NOTE_RECURSION_LIMIT {
let markdown_tree = parse_obsidian_note(&src, &context)?; return Err(ExportError::RecursionLimitExceeded {
outfile file_tree: context.file_tree(),
.write_all(render_mdtree_to_mdtext(markdown_tree).as_bytes()) });
.context(WriteError { path: &dest })?; }
Ok(()) let content = fs::read_to_string(&path).context(ReadError { path })?;
} let (_frontmatter, content) =
matter::matter(&content).unwrap_or(("".to_string(), content.to_string()));
fn parse_obsidian_note<'a>(path: &Path, context: &Context) -> Result<MarkdownTree<'a>> { let mut parser_options = Options::empty();
if context.note_depth() > NOTE_RECURSION_LIMIT { parser_options.insert(Options::ENABLE_TABLES);
return Err(ExportError::RecursionLimitExceeded { parser_options.insert(Options::ENABLE_FOOTNOTES);
file_tree: context.file_tree(), parser_options.insert(Options::ENABLE_STRIKETHROUGH);
}); parser_options.insert(Options::ENABLE_TASKLISTS);
}
let content = fs::read_to_string(&path).context(ReadError { path })?;
let (_frontmatter, content) =
matter::matter(&content).unwrap_or(("".to_string(), content.to_string()));
let mut parser_options = Options::empty(); // Use of ENABLE_SMART_PUNCTUATION causes character replacements which breaks up the single
parser_options.insert(Options::ENABLE_TABLES); // Event::Text element that is emitted between `[[` and `]]` into an unpredictable number of
parser_options.insert(Options::ENABLE_FOOTNOTES); // additional elements. This completely throws off the current parsing strategy and is
parser_options.insert(Options::ENABLE_STRIKETHROUGH); // unsupported. If a user were to want this, a strategy would be to do a second-stage pass over
parser_options.insert(Options::ENABLE_TASKLISTS); // the rewritten markdown just before feeding to pulldown_cmark_to_cmark.
//parser_options.insert(Options::ENABLE_SMART_PUNCTUATION);
// Use of ENABLE_SMART_PUNCTUATION causes character replacements which breaks up the single let mut parser = Parser::new_ext(&content, parser_options);
// Event::Text element that is emitted between `[[` and `]]` into an unpredictable number of let mut tree = vec![];
// additional elements. This completely throws off the current parsing strategy and is let mut buffer = Vec::with_capacity(5);
// unsupported. If a user were to want this, a strategy would be to do a second-stage pass over
// the rewritten markdown just before feeding to pulldown_cmark_to_cmark.
//parser_options.insert(Options::ENABLE_SMART_PUNCTUATION);
let mut parser = Parser::new_ext(&content, parser_options); while let Some(event) = parser.next() {
let mut tree = vec![]; match event {
let mut buffer = Vec::with_capacity(5); Event::Text(CowStr::Borrowed("[")) | Event::Text(CowStr::Borrowed("![")) => {
buffer.push(event);
while let Some(event) = parser.next() { // A lone '[' or a '![' Text event signifies the possible start of a linked or
match event { // embedded note. However, we cannot be sure unless it's also followed by another
Event::Text(CowStr::Borrowed("[")) | Event::Text(CowStr::Borrowed("![")) => { // '[', the note reference itself and closed by a double ']'. To determine whether
buffer.push(event); // that's the case, we need to read ahead so we can backtrack later if needed.
// A lone '[' or a '![' Text event signifies the possible start of a linked or for _ in 1..5 {
// embedded note. However, we cannot be sure unless it's also followed by another if let Some(event) = parser.next() {
// '[', the note reference itself and closed by a double ']'. To determine whether buffer.push(event);
// that's the case, we need to read ahead so we can backtrack later if needed. }
for _ in 1..5 {
if let Some(event) = parser.next() {
buffer.push(event);
} }
} if buffer.len() != 5
if buffer.len() != 5
// buffer[0] has '[' or '![', but we already know this // buffer[0] has '[' or '![', but we already know this
|| buffer[1] != Event::Text(CowStr::Borrowed("[")) || buffer[1] != Event::Text(CowStr::Borrowed("["))
|| buffer[3] != Event::Text(CowStr::Borrowed("]")) || buffer[3] != Event::Text(CowStr::Borrowed("]"))
|| buffer[4] != Event::Text(CowStr::Borrowed("]")) || buffer[4] != Event::Text(CowStr::Borrowed("]"))
{ {
tree.append(&mut buffer); tree.append(&mut buffer);
buffer.clear(); buffer.clear();
continue; continue;
} }
if let Event::Text(CowStr::Borrowed(text)) = buffer[2] { if let Event::Text(CowStr::Borrowed(text)) = buffer[2] {
match buffer[0] { match buffer[0] {
Event::Text(CowStr::Borrowed("[")) => { Event::Text(CowStr::Borrowed("[")) => {
let mut link_events = obsidian_note_link_to_markdown(&text, context); let mut link_events =
tree.append(&mut link_events); self.obsidian_note_link_to_markdown(&text, context);
buffer.clear(); tree.append(&mut link_events);
continue; buffer.clear();
continue;
}
Event::Text(CowStr::Borrowed("![")) => {
let mut elements = self.embed_file(&text, &context)?;
tree.append(&mut elements);
buffer.clear();
continue;
}
// This arm can never be reached due to the outer match against event, but
// the compiler (currently) cannot infer this.
_ => {}
} }
Event::Text(CowStr::Borrowed("![")) => {
let mut elements = embed_file(&text, &context)?;
tree.append(&mut elements);
buffer.clear();
continue;
}
// This arm can never be reached due to the outer match against event, but
// the compiler (currently) cannot infer this.
_ => {}
} }
} }
_ => tree.push(event),
}
if !buffer.is_empty() {
tree.append(&mut buffer);
buffer.clear();
} }
_ => tree.push(event),
}
if !buffer.is_empty() {
tree.append(&mut buffer);
buffer.clear();
} }
tree.append(&mut buffer);
Ok(tree.into_iter().map(event_to_owned).collect())
} }
tree.append(&mut buffer);
Ok(tree.into_iter().map(event_to_owned).collect())
}
// Generate markdown elements for a file that is embedded within another note. // Generate markdown elements for a file that is embedded within another note.
// //
// - If the file being embedded is a note, it's content is included at the point of embed. // - If the file being embedded is a note, it's content is included at the point of embed.
// - If the file is an image, an image tag is generated. // - If the file is an image, an image tag is generated.
// - For other types of file, a regular link is created instead. // - For other types of file, a regular link is created instead.
fn embed_file<'a, 'b>(note_name: &'a str, context: &'b Context) -> Result<MarkdownTree<'a>> { fn embed_file<'b>(&self, note_name: &'a str, context: &'a Context) -> Result<MarkdownTree<'a>> {
// TODO: If a #section is specified, reduce returned MarkdownTree to just // TODO: If a #section is specified, reduce returned MarkdownTree to just
// that section. // that section.
let note_name = note_name.split('#').collect::<Vec<&str>>()[0]; let note_name = note_name.split('#').collect::<Vec<&str>>()[0];
let tree = match lookup_filename_in_vault(note_name, context.vault_contents) { let tree = match lookup_filename_in_vault(note_name, &self.vault_contents.as_ref().unwrap())
Some(path) => { {
let context = Context::from_parent(context, path); Some(path) => {
let no_ext = OsString::new(); let context = Context::from_parent(context, path);
match path.extension().unwrap_or(&no_ext).to_str() { let no_ext = OsString::new();
Some("md") => parse_obsidian_note(&path, &context)?, match path.extension().unwrap_or(&no_ext).to_str() {
Some("png") | Some("jpg") | Some("jpeg") | Some("gif") | Some("webp") => { Some("md") => self.parse_obsidian_note(&path, &context)?,
make_link_to_file(&note_name, &note_name, &context) Some("png") | Some("jpg") | Some("jpeg") | Some("gif") | Some("webp") => {
.into_iter() self.make_link_to_file(&note_name, &note_name, &context)
.map(|event| match event { .into_iter()
// make_link_to_file returns a link to a file. With this we turn the link .map(|event| match event {
// into an image reference instead. Slightly hacky, but avoids needing // make_link_to_file returns a link to a file. With this we turn the link
// to keep another utility function around for this, or introducing an // into an image reference instead. Slightly hacky, but avoids needing
// extra parameter on make_link_to_file. // to keep another utility function around for this, or introducing an
Event::Start(Tag::Link(linktype, cowstr1, cowstr2)) => { // extra parameter on make_link_to_file.
Event::Start(Tag::Image( Event::Start(Tag::Link(linktype, cowstr1, cowstr2)) => {
linktype, Event::Start(Tag::Image(
CowStr::from(cowstr1.into_string()), linktype,
CowStr::from(cowstr2.into_string()), CowStr::from(cowstr1.into_string()),
)) CowStr::from(cowstr2.into_string()),
} ))
Event::End(Tag::Link(linktype, cowstr1, cowstr2)) => { }
Event::End(Tag::Image( Event::End(Tag::Link(linktype, cowstr1, cowstr2)) => {
linktype, Event::End(Tag::Image(
CowStr::from(cowstr1.into_string()), linktype,
CowStr::from(cowstr2.into_string()), CowStr::from(cowstr1.into_string()),
)) CowStr::from(cowstr2.into_string()),
} ))
_ => event, }
}) _ => event,
.collect() })
.collect()
}
_ => self.make_link_to_file(&note_name, &note_name, &context),
} }
_ => make_link_to_file(&note_name, &note_name, &context),
} }
} None => {
None => { // TODO: Extract into configurable function.
println!(
"Warning: Unable to find embedded note\n\tReference: '{}'\n\tSource: '{}'\n",
note_name,
context.current_file().display(),
);
vec![]
}
};
Ok(tree)
}
fn obsidian_note_link_to_markdown(&self, content: &'a str, context: &Context) -> MarkdownTree {
let captures = OBSIDIAN_NOTE_LINK_RE
.captures(&content)
.expect("note link regex didn't match - bad input?");
let notename = captures
.name("file")
.expect("Obsidian links should always reference a file");
let label = captures.name("label").unwrap_or(notename);
self.make_link_to_file(notename.as_str(), label.as_str(), context)
}
fn make_link_to_file<'b>(
&self,
file: &'b str,
label: &'b str,
context: &Context,
) -> MarkdownTree<'b> {
let target_file = lookup_filename_in_vault(file, &self.vault_contents.as_ref().unwrap());
if target_file.is_none() {
// TODO: Extract into configurable function. // TODO: Extract into configurable function.
println!( println!(
"Warning: Unable to find embedded note\n\tReference: '{}'\n\tSource: '{}'\n", "Warning: Unable to find referenced note\n\tReference: '{}'\n\tSource: '{}'\n",
note_name, file,
context.current_file().display(), context.current_file().display(),
); );
vec![] return vec![
Event::Start(Tag::Emphasis),
Event::Text(CowStr::from(String::from(label))),
Event::End(Tag::Emphasis),
];
} }
}; let target_file = target_file.unwrap();
Ok(tree) let rel_link = diff_paths(
} target_file,
&context
.current_file()
.parent()
.expect("obsidian content files should always have a parent"),
)
.expect("should be able to build relative path when target file is found in vault");
let rel_link = rel_link.to_string_lossy();
let encoded_link = utf8_percent_encode(&rel_link, PERCENTENCODE_CHARS);
fn obsidian_note_link_to_markdown<'a>(content: &'a str, context: &Context) -> MarkdownTree<'a> { let link = pulldown_cmark::Tag::Link(
let captures = OBSIDIAN_NOTE_LINK_RE pulldown_cmark::LinkType::Inline,
.captures(&content) CowStr::from(encoded_link.to_string()),
.expect("note link regex didn't match - bad input?"); CowStr::from(""),
let notename = captures
.name("file")
.expect("Obsidian links should always reference a file");
let label = captures.name("label").unwrap_or(notename);
make_link_to_file(notename.as_str(), label.as_str(), context)
}
fn make_link_to_file<'a>(file: &'a str, label: &'a str, context: &Context) -> MarkdownTree<'a> {
let target_file = lookup_filename_in_vault(file, context.vault_contents);
if target_file.is_none() {
// TODO: Extract into configurable function.
println!(
"Warning: Unable to find referenced note\n\tReference: '{}'\n\tSource: '{}'\n",
file,
context.current_file().display(),
); );
return vec![
Event::Start(Tag::Emphasis), vec![
Event::Text(CowStr::from(String::from(label))), Event::Start(link.clone()),
Event::End(Tag::Emphasis), Event::Text(CowStr::from(label)),
]; Event::End(link.clone()),
]
} }
let target_file = target_file.unwrap();
let rel_link = diff_paths(
target_file,
&context
.current_file()
.parent()
.expect("obsidian content files should always have a parent"),
)
.expect("should be able to build relative path when target file is found in vault");
let rel_link = rel_link.to_string_lossy();
let encoded_link = utf8_percent_encode(&rel_link, PERCENTENCODE_CHARS);
let link = pulldown_cmark::Tag::Link(
pulldown_cmark::LinkType::Inline,
CowStr::from(encoded_link.to_string()),
CowStr::from(""),
);
vec![
Event::Start(link.clone()),
Event::Text(CowStr::from(label)),
Event::End(link.clone()),
]
} }
fn lookup_filename_in_vault<'a>( fn lookup_filename_in_vault<'a>(