From 58eb79e53d04d8757adeb8931d37bb74084d0fdc Mon Sep 17 00:00:00 2001 From: Nick Groenen Date: Sat, 20 Feb 2021 21:35:45 +0100 Subject: [PATCH] new: postprocessing support Add support for postprocessing of Markdown prior to writing converted notes to disk. Postprocessors may be used when making use of Obsidian export as a Rust library to do the following: 1. Modify a note's `Context`, for example to change the destination filename or update its Frontmatter. 2. Change a note's contents by altering `MarkdownEvents`. 3. Prevent later postprocessors from running or cause a note to be skipped entirely. Future releases of Obsidian export may come with built-in postprocessors for users of the command-line tool to use, if general use-cases can be identified. For example, a future release might include functionality to make notes more suitable for the Hugo static site generator. This functionality would be implemented as a postprocessor that could be enabled through command-line flags. --- Cargo.lock | 40 ++ Cargo.toml | 1 + src/context.rs | 93 +++ src/frontmatter.rs | 92 +++ src/lib.rs | 555 +++++++----------- src/references.rs | 204 +++++++ tests/export_test.rs | 113 +++- .../testdata/expected/postprocessors/Note.md | 8 + tests/testdata/input/postprocessors/Note.md | 7 + 9 files changed, 779 insertions(+), 334 deletions(-) create mode 100644 src/context.rs create mode 100644 src/frontmatter.rs create mode 100644 src/references.rs create mode 100644 tests/testdata/expected/postprocessors/Note.md create mode 100644 tests/testdata/input/postprocessors/Note.md diff --git a/Cargo.lock b/Cargo.lock index eb79a4b..8cab0cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dtoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d7ed2934d741c6b37e33e3832298e8850b53fd2d2bea03873375596c7cea4e" + [[package]] name = "either" version = "1.6.1" @@ -245,6 +251,12 @@ version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + [[package]] name = "log" version = "0.4.14" @@ -305,6 +317,7 @@ dependencies = [ "pulldown-cmark-to-cmark", "rayon", "regex", + "serde_yaml", "slug", "snafu", "tempfile", @@ -511,6 +524,24 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "serde" +version = "1.0.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" + +[[package]] +name = "serde_yaml" +version = "0.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15654ed4ab61726bf918a39cb8d98a2e2995b002387807fa6ba58fdf7f59bb23" +dependencies = [ + "dtoa", + "linked-hash-map", + "serde", + "yaml-rust", +] + [[package]] name = "slug" version = "0.1.4" @@ -649,3 +680,12 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] diff --git a/Cargo.toml b/Cargo.toml index 6d421f7..ab67a54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ pulldown-cmark = "0.8.0" pulldown-cmark-to-cmark = "6.0.0" rayon = "1.5.0" regex = "1.4.3" +serde_yaml = "0.8.17" slug = "0.1.4" snafu = "0.6.10" diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..1b72abd --- /dev/null +++ b/src/context.rs @@ -0,0 +1,93 @@ +use crate::Frontmatter; +use std::path::PathBuf; + +#[derive(Debug, Clone)] +/// Context holds metadata about a note which is being parsed. +/// +/// This is used internally to keep track of nesting and help with constructing proper references +/// to other notes. +/// +/// It is also passed to [postprocessors][crate::Postprocessor] to provide contextual information +/// and allow modification of a note's frontmatter. +pub struct Context { + file_tree: Vec, + + /// The path where this note will be written to when exported. + /// + /// Changing this path will result in the note being written to that new path instead, but + /// beware: links will not be updated automatically. If this is changed by a + /// [postprocessor][crate::Postprocessor], it's up to that postprocessor to rewrite any + /// existing links to this new path. + pub destination: PathBuf, + + /// The [Frontmatter] for this note. Frontmatter may be modified in-place (see + /// [serde_yaml::Mapping] for available methods) or replaced entirely. + /// + /// # Example + /// + /// Insert `foo: bar` into a note's frontmatter: + /// + /// ``` + /// # use obsidian_export::Frontmatter; + /// # use obsidian_export::Context; + /// # use std::path::PathBuf; + /// use obsidian_export::serde_yaml::Value; + /// + /// # let mut context = Context::new(PathBuf::from("source"), PathBuf::from("destination")); + /// let key = Value::String("foo".to_string()); + /// + /// context.frontmatter.insert( + /// key.clone(), + /// Value::String("bar".to_string()), + /// ); + /// ``` + pub frontmatter: Frontmatter, +} + +impl Context { + /// Create a new `Context` + pub fn new(src: PathBuf, dest: PathBuf) -> Context { + Context { + file_tree: vec![src], + destination: dest, + frontmatter: Frontmatter::new(), + } + } + + /// Create a new `Context` which inherits from a parent Context. + pub fn from_parent(context: &Context, child: &PathBuf) -> Context { + let mut context = context.clone(); + context.file_tree.push(child.to_path_buf()); + context + } + + /// Return the path of the file currently being parsed. + pub fn current_file(&self) -> &PathBuf { + self.file_tree + .last() + .expect("Context not initialized properly, file_tree is empty") + } + + /// Return the path of the root file. + /// + /// Typically this will yield the same element as `current_file`, but when a note is embedded + /// within another note, this will return the outer-most note. + pub fn root_file(&self) -> &PathBuf { + self.file_tree + .first() + .expect("Context not initialized properly, file_tree is empty") + } + + /// Return the note depth (nesting level) for this context. + pub fn note_depth(&self) -> usize { + self.file_tree.len() + } + + /// Return the list of files associated with this context. + /// + /// The first element corresponds to the root file, the final element corresponds to the file + /// which is currently being processed (see also `current_file`). + pub fn file_tree(&self) -> Vec { + self.file_tree.clone() + } +} diff --git a/src/frontmatter.rs b/src/frontmatter.rs new file mode 100644 index 0000000..2637815 --- /dev/null +++ b/src/frontmatter.rs @@ -0,0 +1,92 @@ +use serde_yaml::Result; + +/// YAML front matter from an Obsidian note. +/// +/// This is essentially an alias of [serde_yaml::Mapping] so all the methods available on that type +/// are available with `Frontmatter` as well. +/// +/// # Examples +/// +/// ``` +/// # use obsidian_export::Frontmatter; +/// use serde_yaml::Value; +/// +/// let mut frontmatter = Frontmatter::new(); +/// let key = Value::String("foo".to_string()); +/// +/// frontmatter.insert( +/// key.clone(), +/// Value::String("bar".to_string()), +/// ); +/// +/// assert_eq!( +/// frontmatter.get(&key), +/// Some(&Value::String("bar".to_string())), +/// ) +/// ``` +pub type Frontmatter = serde_yaml::Mapping; + +pub fn frontmatter_from_str(mut s: &str) -> Result { + if s.is_empty() { + s = "{}"; + } + let frontmatter: Frontmatter = serde_yaml::from_str(s)?; + Ok(frontmatter) +} + +pub fn frontmatter_to_str(frontmatter: Frontmatter) -> Result { + if frontmatter.is_empty() { + return Ok("---\n---\n".to_string()); + } + + let mut buffer = String::new(); + buffer.push_str(&serde_yaml::to_string(&frontmatter)?); + buffer.push_str("---\n"); + Ok(buffer) +} + +#[derive(Debug, Clone, Copy)] +/// Available strategies for the inclusion of frontmatter in notes. +pub enum FrontmatterStrategy { + /// Copy frontmatter when a note has frontmatter defined. + Auto, + /// Always add frontmatter header, including empty frontmatter when none was originally + /// specified. + Always, + /// Never add any frontmatter to notes. + Never, +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use serde_yaml::Value; + + #[test] + fn empty_string_should_yield_empty_frontmatter() { + assert_eq!(frontmatter_from_str("").unwrap(), Frontmatter::new()) + } + + #[test] + fn empty_frontmatter_to_str() { + let frontmatter = Frontmatter::new(); + assert_eq!( + frontmatter_to_str(frontmatter).unwrap(), + format!("---\n---\n") + ) + } + + #[test] + fn nonempty_frontmatter_to_str() { + let mut frontmatter = Frontmatter::new(); + frontmatter.insert( + Value::String("foo".to_string()), + Value::String("bar".to_string()), + ); + assert_eq!( + frontmatter_to_str(frontmatter).unwrap(), + format!("---\nfoo: bar\n---\n") + ) + } +} diff --git a/src/lib.rs b/src/lib.rs index 691949b..fe6616e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,25 @@ +pub extern crate pulldown_cmark; +pub extern crate serde_yaml; + #[macro_use] extern crate lazy_static; +mod context; +mod frontmatter; +mod references; mod walker; +pub use context::Context; +pub use frontmatter::{Frontmatter, FrontmatterStrategy}; pub use walker::{vault_contents, WalkOptions}; +use frontmatter::{frontmatter_from_str, frontmatter_to_str}; use pathdiff::diff_paths; use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag}; use pulldown_cmark_to_cmark::cmark_with_options; use rayon::prelude::*; -use regex::Regex; +use references::*; use slug::slugify; use snafu::{ResultExt, Snafu}; use std::ffi::OsString; @@ -21,13 +30,98 @@ use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::str; -type Result = std::result::Result; -type MarkdownTree<'a> = Vec>; +/// A series of markdown [Event]s that are generated while traversing an Obsidian markdown note. +pub type MarkdownEvents<'a> = Vec>; + +/// A post-processing function that is to be called after an Obsidian note has been fully parsed and +/// converted to regular markdown syntax. +/// +/// Postprocessors are called in the order they've been added through [Exporter::add_postprocessor] +/// just before notes are written out to their final destination. +/// They may be used to achieve the following: +/// +/// 1. Modify a note's [Context], for example to change the destination filename or update its [Frontmatter] (see [Context::frontmatter]). +/// 2. Change a note's contents by altering [MarkdownEvents]. +/// 3. Prevent later postprocessors from running ([PostprocessorResult::StopHere]) or cause a note +/// to be skipped entirely ([PostprocessorResult::StopAndSkipNote]). +/// +/// # Examples +/// +/// ## Update frontmatter +/// +/// This example shows how to make changes a note's frontmatter. In this case, the postprocessor is +/// defined inline as a closure. +/// +/// ``` +/// use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult}; +/// use obsidian_export::pulldown_cmark::{CowStr, Event}; +/// use obsidian_export::serde_yaml::Value; +/// # use std::path::PathBuf; +/// # use tempfile::TempDir; +/// +/// # let tmp_dir = TempDir::new().expect("failed to make tempdir"); +/// # let source = PathBuf::from("tests/testdata/input/postprocessors"); +/// # let destination = tmp_dir.path().to_path_buf(); +/// let mut exporter = Exporter::new(source, destination); +/// +/// // add_postprocessor registers a new postprocessor. In this example we use a closure. +/// exporter.add_postprocessor(&|mut context, events| { +/// // This is the key we'll insert into the frontmatter. In this case, the string "foo". +/// let key = Value::String("foo".to_string()); +/// // This is the value we'll insert into the frontmatter. In this case, the string "bar". +/// let value = Value::String("baz".to_string()); +/// +/// // Frontmatter can be updated in-place, so we can call insert on it directly. +/// context.frontmatter.insert(key, value); +/// +/// // Postprocessors must return their (modified) context, the markdown events that make +/// // up the note and a next action to take. +/// (context, events, PostprocessorResult::Continue) +/// }); +/// +/// exporter.run().unwrap(); +/// ``` +/// +/// ## Change note contents +/// +/// In this example a note's markdown content is changed by iterating over the [MarkdownEvents] and +/// changing the text when we encounter a [text element][Event::Text]. +/// +/// Instead of using a closure like above, this example shows how to use a separate function +/// definition. +/// ``` +/// # use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult}; +/// # use pulldown_cmark::{CowStr, Event}; +/// # use std::path::PathBuf; +/// # use tempfile::TempDir; +/// # +/// /// This postprocessor replaces any instance of "foo" with "bar" in the note body. +/// fn foo_to_bar( +/// context: Context, +/// events: MarkdownEvents, +/// ) -> (Context, MarkdownEvents, PostprocessorResult) { +/// let events = events +/// .into_iter() +/// .map(|event| match event { +/// Event::Text(text) => Event::Text(CowStr::from(text.replace("foo", "bar"))), +/// event => event, +/// }) +/// .collect(); +/// (context, events, PostprocessorResult::Continue) +/// } +/// +/// # let tmp_dir = TempDir::new().expect("failed to make tempdir"); +/// # let source = PathBuf::from("tests/testdata/input/postprocessors"); +/// # let destination = tmp_dir.path().to_path_buf(); +/// # let mut exporter = Exporter::new(source, destination); +/// exporter.add_postprocessor(&foo_to_bar); +/// # exporter.run().unwrap(); +/// ``` + +pub type Postprocessor = + dyn Fn(Context, MarkdownEvents) -> (Context, MarkdownEvents, PostprocessorResult) + Send + Sync; +type Result = std::result::Result; -lazy_static! { - static ref OBSIDIAN_NOTE_LINK_RE: Regex = - Regex::new(r"^(?P[^#|]+)??(#(?P
.+?))??(\|(?P