Got recursive export working. Added Test, updated readme
This commit is contained in:
parent
4b1a2918a4
commit
5e840f0eb9
10
README.md
10
README.md
|
@ -114,6 +114,16 @@ obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books exported-no
|
|||
|
||||
In this mode, all notes under the source (the first argument) are considered part of the vault so any references to these files will remain intact, even if they're not part of the exported notes.
|
||||
|
||||
#### Recursive export
|
||||
|
||||
If you would like to use `--start-at` but also export notes that are linked from the notes under that subdirectory, you can use the `--link-depth` argument:
|
||||
|
||||
````sh
|
||||
obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books --link-depth 1 exported-notes
|
||||
````
|
||||
|
||||
This will export all notes under `my-obsidian-vault/Books` as well as any notes that are linked from those notes. The level of recursion can be controlled by changing the value of `--link-depth`.
|
||||
|
||||
## Character encodings
|
||||
|
||||
At present, UTF-8 character encoding is assumed for all note text as well as filenames.
|
||||
|
|
|
@ -58,6 +58,16 @@ obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books exported-no
|
|||
|
||||
In this mode, all notes under the source (the first argument) are considered part of the vault so any references to these files will remain intact, even if they're not part of the exported notes.
|
||||
|
||||
#### Recursive export
|
||||
|
||||
If you would like to use `--start-at` but also export notes that are linked from the notes under that subdirectory, you can use the `--link-depth` argument:
|
||||
|
||||
```sh
|
||||
obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books --link-depth 1 exported-notes
|
||||
```
|
||||
|
||||
This will export all notes under `my-obsidian-vault/Books` as well as any notes that are linked from those notes. The level of recursion can be controlled by changing the value of `--link-depth`.
|
||||
|
||||
## Character encodings
|
||||
|
||||
At present, UTF-8 character encoding is assumed for all note text as well as filenames.
|
||||
|
|
|
@ -119,7 +119,7 @@ fn main() {
|
|||
if args.link_depth > 0 {
|
||||
dont_recurse = false;
|
||||
recursive_resolver =
|
||||
RecursiveResolver::new(root, path, destination, shared_state.clone());
|
||||
RecursiveResolver::new(root.clone(), path, destination, shared_state.clone());
|
||||
callback = |ctx: &mut obsidian_export::Context,
|
||||
events: &mut Vec<pulldown_cmark::Event<'_>>| {
|
||||
recursive_resolver.postprocess(ctx, events)
|
||||
|
@ -159,6 +159,8 @@ fn main() {
|
|||
}
|
||||
if dont_recurse || shared_state.update_and_check_should_continue() {
|
||||
break;
|
||||
} else if shared_state.get_current_depth() == 1 {
|
||||
exporter.start_at(root.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,15 +2,14 @@
|
|||
|
||||
use std::{
|
||||
collections::BTreeSet,
|
||||
fmt::DebugStruct,
|
||||
path::{Path, PathBuf},
|
||||
sync::{Arc, Mutex, RwLock},
|
||||
};
|
||||
|
||||
use super::{Context, MarkdownEvents, PostprocessorResult, PERCENTENCODE_CHARS};
|
||||
use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet};
|
||||
use percent_encoding::{percent_decode_str, utf8_percent_encode};
|
||||
use pulldown_cmark::{CowStr, Event, Tag};
|
||||
use rayon::iter::{ParallelDrainRange, ParallelIterator};
|
||||
use rayon::iter::{ParallelDrainRange, ParallelExtend};
|
||||
use serde_yaml::Value;
|
||||
|
||||
/// This postprocessor converts all soft line breaks to hard line breaks. Enabling this mimics
|
||||
|
@ -59,12 +58,13 @@ fn filter_by_tags_(
|
|||
PostprocessorResult::Continue
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SharedResolverState {
|
||||
depth: usize,
|
||||
current_depth: RwLock<usize>,
|
||||
files_to_parse: RwLock<BTreeSet<PathBuf>>,
|
||||
linked_files: Mutex<Vec<PathBuf>>,
|
||||
parsed_files: RwLock<BTreeSet<PathBuf>>,
|
||||
}
|
||||
|
||||
impl SharedResolverState {
|
||||
|
@ -74,6 +74,7 @@ impl SharedResolverState {
|
|||
current_depth: RwLock::new(0),
|
||||
files_to_parse: RwLock::new(BTreeSet::new()),
|
||||
linked_files: Mutex::new(Vec::new()),
|
||||
parsed_files: RwLock::new(BTreeSet::new()),
|
||||
})
|
||||
}
|
||||
pub fn update_and_check_should_continue(&self) -> bool {
|
||||
|
@ -81,25 +82,36 @@ impl SharedResolverState {
|
|||
|
||||
if *current_depth < self.depth {
|
||||
*current_depth += 1;
|
||||
let mut files_to_parse = self.files_to_parse.write().unwrap();
|
||||
*files_to_parse = self
|
||||
.linked_files
|
||||
.lock()
|
||||
.unwrap()
|
||||
.par_drain(..)
|
||||
.collect::<BTreeSet<PathBuf>>();
|
||||
|
||||
let parsed_files = &mut *self.parsed_files.write().unwrap();
|
||||
|
||||
let files_to_parse = &mut *self.files_to_parse.write().unwrap();
|
||||
parsed_files.append(files_to_parse);
|
||||
files_to_parse.par_extend(self.linked_files.lock().unwrap().par_drain(..));
|
||||
|
||||
if !files_to_parse.is_empty() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
true
|
||||
}
|
||||
pub fn get_current_depth(&self) -> usize {
|
||||
*self.current_depth.read().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// This stores the state for the recursively including linked files when
|
||||
/// using the `--start-at` option with a `--link-depth` greater than 0.
|
||||
/// Note the paths need to be canonicalized due to canonicalized being used to
|
||||
/// resolve relative paths outside of start_at
|
||||
pub struct RecursiveResolver {
|
||||
/// the canonicalized root of the vault
|
||||
root: PathBuf,
|
||||
/// the canonicalized path to start at
|
||||
start_at: PathBuf,
|
||||
destination: PathBuf,
|
||||
//the shared state between this and the caller
|
||||
//used to tell caller when to stop recursing
|
||||
shared_state: Arc<SharedResolverState>,
|
||||
}
|
||||
|
||||
|
@ -110,11 +122,12 @@ impl<'a: 'url, 'url> RecursiveResolver {
|
|||
destination: PathBuf,
|
||||
shared_state: Arc<SharedResolverState>,
|
||||
) -> RecursiveResolver {
|
||||
let root = root.canonicalize().unwrap();
|
||||
let start_at = start_at.canonicalize().unwrap();
|
||||
RecursiveResolver {
|
||||
root,
|
||||
start_at,
|
||||
destination,
|
||||
|
||||
shared_state: shared_state.clone(),
|
||||
}
|
||||
}
|
||||
|
@ -122,24 +135,27 @@ impl<'a: 'url, 'url> RecursiveResolver {
|
|||
pub fn start_at(&mut self, start_at: PathBuf) {
|
||||
self.start_at = start_at;
|
||||
}
|
||||
|
||||
/// postprocess function for recursively resolving links to files outside of start_at
|
||||
/// If this is the first iteration, links to files outside of start_at are changed so
|
||||
/// that they are to in the root of the destination
|
||||
/// if this is any other iteration, links to files outside of start_at are changed so
|
||||
/// they strip the difference between root and start_at
|
||||
pub fn postprocess(
|
||||
&self,
|
||||
context: &'a mut Context,
|
||||
events: &'url mut MarkdownEvents,
|
||||
) -> PostprocessorResult {
|
||||
println!("postprocess: recursive_resolver");
|
||||
match *self.shared_state.current_depth.read().unwrap() == 0 {
|
||||
true => self.first_run(context, events),
|
||||
false => {
|
||||
//files to parse should contain only files that have
|
||||
//not been parsed in a previous iteration
|
||||
if !self
|
||||
.shared_state
|
||||
.files_to_parse
|
||||
.read()
|
||||
.unwrap()
|
||||
.contains(context.current_file())
|
||||
.contains(&context.current_file().canonicalize().unwrap())
|
||||
{
|
||||
return PostprocessorResult::StopAndSkipNote;
|
||||
}
|
||||
|
@ -148,21 +164,22 @@ impl<'a: 'url, 'url> RecursiveResolver {
|
|||
}
|
||||
}
|
||||
|
||||
///first run of the postprocessor, changes links to files outside of start_at
|
||||
/// and aggregates the filepaths to export in the next iteration
|
||||
fn first_run(
|
||||
&self,
|
||||
context: &'a mut Context,
|
||||
_context: &'a mut Context,
|
||||
events: &'url mut MarkdownEvents,
|
||||
) -> PostprocessorResult {
|
||||
//let path_changed = context.current_file() != &self.start_at;
|
||||
for event in events.iter_mut() {
|
||||
if let Event::Start(Tag::Link(_, url, _)) = event {
|
||||
println!("url: {}", url);
|
||||
if let Event::End(Tag::Link(_, url, _)) = event {
|
||||
if url.starts_with("https://") || url.starts_with("http://") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let vault_path: PathBuf = get_vault_path(url, &self.start_at.as_path());
|
||||
println!("vault_path: {}", vault_path.to_string_lossy());
|
||||
let vault_path: PathBuf = get_vault_path(url, self.start_at.as_path());
|
||||
|
||||
// may still be within start_at
|
||||
if vault_path.starts_with(&self.start_at) {
|
||||
continue;
|
||||
|
@ -170,7 +187,9 @@ impl<'a: 'url, 'url> RecursiveResolver {
|
|||
|
||||
if vault_path.exists() {
|
||||
let vaultless_path = vault_path.strip_prefix(self.root.as_path()).unwrap();
|
||||
set_url(url, self.destination.join(vaultless_path));
|
||||
|
||||
set_url(url, vaultless_path.to_path_buf());
|
||||
|
||||
self.shared_state
|
||||
.linked_files
|
||||
.lock()
|
||||
|
@ -184,13 +203,12 @@ impl<'a: 'url, 'url> RecursiveResolver {
|
|||
|
||||
fn other_runs(
|
||||
&self,
|
||||
context: &'a mut Context,
|
||||
_context: &'a mut Context,
|
||||
events: &'url mut MarkdownEvents,
|
||||
) -> PostprocessorResult {
|
||||
//let path_changed = context.current_file() != self.start_at;
|
||||
for event in events.iter_mut() {
|
||||
let relative_start = self.start_at.clone().strip_prefix(&self.root).unwrap();
|
||||
if let Event::Start(Tag::Link(_, url, _)) = event {
|
||||
if let Event::End(Tag::Link(_, url, _)) = event {
|
||||
if url.starts_with("https://") || url.starts_with("http://") {
|
||||
continue;
|
||||
}
|
||||
|
@ -205,13 +223,25 @@ impl<'a: 'url, 'url> RecursiveResolver {
|
|||
.destination
|
||||
.join(vault_path.strip_prefix(&self.start_at).unwrap());
|
||||
set_url(url, link_destination);
|
||||
//don't need to add to linked_files, because it was parsed in the first iteration
|
||||
continue;
|
||||
}
|
||||
//only add if this is not the last iteration
|
||||
if *self.shared_state.current_depth.read().unwrap() < self.shared_state.depth {
|
||||
self.shared_state
|
||||
.linked_files
|
||||
.lock()
|
||||
//only add if it hasn't been parsed in a previous iteration
|
||||
if !self
|
||||
.shared_state
|
||||
.parsed_files
|
||||
.read()
|
||||
.unwrap()
|
||||
.push(vault_path);
|
||||
.contains(&vault_path)
|
||||
{
|
||||
self.shared_state
|
||||
.linked_files
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(vault_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -229,6 +259,16 @@ fn get_vault_path(url: &mut CowStr<'_>, root: &Path) -> PathBuf {
|
|||
root.join(path_stub).canonicalize().unwrap()
|
||||
}
|
||||
fn set_url(url: &mut CowStr<'_>, link_destination: PathBuf) {
|
||||
// let _=std::mem::replace(
|
||||
// url,
|
||||
// CowStr::from(
|
||||
// utf8_percent_encode(
|
||||
// &format!("{}", link_destination.to_string_lossy()),
|
||||
// PERCENTENCODE_CHARS,
|
||||
// )
|
||||
// .to_string(),
|
||||
// ),
|
||||
// );
|
||||
*url = CowStr::from(
|
||||
utf8_percent_encode(
|
||||
&format!("{}", link_destination.to_string_lossy()),
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
use obsidian_export::postprocessors::{filter_by_tags, softbreaks_to_hardbreaks};
|
||||
use obsidian_export::postprocessors::{
|
||||
filter_by_tags, softbreaks_to_hardbreaks, RecursiveResolver, SharedResolverState,
|
||||
};
|
||||
use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
|
||||
use pretty_assertions::assert_eq;
|
||||
use pulldown_cmark::{CowStr, Event};
|
||||
|
@ -6,7 +8,7 @@ use serde_yaml::Value;
|
|||
use std::collections::HashSet;
|
||||
use std::fs::{read_to_string, remove_file};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::{Mutex};
|
||||
use tempfile::TempDir;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
|
@ -290,3 +292,47 @@ fn test_filter_by_tags() {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_start_at_subdir_recursive() {
|
||||
let tmp_dir = TempDir::new().expect("failed to make tempdir");
|
||||
let root = PathBuf::from("tests/testdata/input/start-at/");
|
||||
let start_at = PathBuf::from("tests/testdata/input/start-at/subdir/");
|
||||
|
||||
let mut exporter = Exporter::new(root.clone(), tmp_dir.path().to_path_buf());
|
||||
exporter.start_at(start_at.clone());
|
||||
|
||||
let shared_state = SharedResolverState::new(1);
|
||||
let recursive_resolver = RecursiveResolver::new(
|
||||
root.clone(),
|
||||
start_at,
|
||||
tmp_dir.path().to_path_buf(),
|
||||
shared_state.clone(),
|
||||
);
|
||||
let recursive_start_at = |ctx: &mut Context, events: &mut Vec<pulldown_cmark::Event<'_>>| {
|
||||
recursive_resolver.postprocess(ctx, events)
|
||||
};
|
||||
exporter.add_postprocessor(&recursive_start_at);
|
||||
|
||||
for _i in 0..2 {
|
||||
println!("running exporter");
|
||||
exporter.run().unwrap();
|
||||
exporter.start_at(root.clone());
|
||||
shared_state.update_and_check_should_continue();
|
||||
println!("{:?}", shared_state.clone());
|
||||
}
|
||||
|
||||
let expected = if cfg!(windows) {
|
||||
read_to_string("tests/testdata/expected/start-at/recursive/Note B.md")
|
||||
.unwrap()
|
||||
.replace('/', "\\")
|
||||
} else {
|
||||
read_to_string("tests/testdata/expected/start-at/recursive/Note B.md").unwrap()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
expected,
|
||||
read_to_string(tmp_dir.path().join(PathBuf::from("Note B.md"))).unwrap(),
|
||||
);
|
||||
assert!(tmp_dir.path().join(PathBuf::from("Note A.md")).exists());
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
This is note A.
|
|
@ -0,0 +1,4 @@
|
|||
This is note B. It links to:
|
||||
|
||||
* [Note A](Note%20A.md)
|
||||
* [Note C](Note%20C.md)
|
|
@ -0,0 +1 @@
|
|||
This is note C.
|
Loading…
Reference in New Issue