diff --git a/website/scripts/docsmg/Cargo.lock b/website/scripts/docsmg/Cargo.lock index f3168bd0346e..0ea7aef5e9c8 100644 --- a/website/scripts/docsmg/Cargo.lock +++ b/website/scripts/docsmg/Cargo.lock @@ -17,6 +17,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.14" @@ -163,6 +172,7 @@ dependencies = [ "clap", "colored", "dotenv", + "regex", "tokio", ] @@ -250,6 +260,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "rustc-demangle" version = "0.1.24" diff --git a/website/scripts/docsmg/Cargo.toml b/website/scripts/docsmg/Cargo.toml index c444efc53046..9a7e18c86119 100644 --- a/website/scripts/docsmg/Cargo.toml +++ b/website/scripts/docsmg/Cargo.toml @@ -10,4 +10,5 @@ anyhow = "1.0.86" clap = { version = "4.5.9", features = ["derive", "env"] } colored = "2.1.0" dotenv = "0.15.0" +regex = "1.10.6" tokio = "1.38.0" diff --git a/website/scripts/docsmg/README.md b/website/scripts/docsmg/README.md index 2fb48672c361..7a3ad9817e93 100644 --- a/website/scripts/docsmg/README.md +++ b/website/scripts/docsmg/README.md @@ -19,6 +19,12 @@ Use this migration tool to: ## Steps to use +1. Generate a migratefile with `docsmg generate >> migratefile` +2. Find the files you want to move in `migratefile` and insert the path you want to move them to after the arrow; ex `path/to/move/from/file.md -> path/to/move/to/file.md` Note: make sure to put spaces on either side of the arrow or that line won't be recognized +3. Once you have entered all the paths you want to move, migrate the files with `docsmg migrate` +4. To revert the migration, use `docsmg unmigrate`; Note: DO NOT edit the migrate file in between steps 3 and 4 +5. Repeat steps 2-4 until you are satisfied with the result + ### Create the mapping file (`migratefile`) 1. Navigate to the `authentik/website` dir. diff --git a/website/scripts/docsmg/src/hackyfixes.rs b/website/scripts/docsmg/src/hackyfixes.rs new file mode 100644 index 000000000000..f7c3ab02044d --- /dev/null +++ b/website/scripts/docsmg/src/hackyfixes.rs @@ -0,0 +1,24 @@ +use std::{ffi::OsStr, fs::{read_to_string, write}, path::PathBuf}; + +use crate::recurse_directory; + +pub fn add_extra_dot_dot_to_expression_mdx(migrate_path: PathBuf) { + let binding = recurse_directory(migrate_path); + let files = binding.iter().filter(|x| if let Some(i) = x.file_name() { + if Some("expression.mdx") == i.to_str() || Some("expressions.md") == i.to_str() { + true + } else { + false + } + } else { + false + }); + + for file in files { + let content = match read_to_string(file) { + Ok(i) => i, + _ => continue, + }; + let _ = write(file, content.replace("../expressions", "../../expressions")); + } +} diff --git a/website/scripts/docsmg/src/links.rs b/website/scripts/docsmg/src/links.rs new file mode 100644 index 000000000000..26d61d9468b2 --- /dev/null +++ b/website/scripts/docsmg/src/links.rs @@ -0,0 +1,34 @@ +use std::{fs::read_to_string, path::PathBuf}; + +use regex::{Captures, Regex}; + +use crate::recurse_directory; + +pub fn shorten_all_external_links(migrate_path: PathBuf) { + let files = recurse_directory(migrate_path.clone()); + for file in files { + let file = migrate_path.join(file); + let absolute_file = file.clone().canonicalize().unwrap(); + let contents = if let Ok(x) = read_to_string(file) { + x + } else { + continue; + }; + let re = Regex::new(r"\[(?.*)\]\((?.*)\)").unwrap(); + let captures: Vec = re.captures_iter(&contents).collect(); + for capture in captures { + let link = &capture["link"]; + let link = PathBuf::from(link); + let absolute_link = absolute_file + .clone() + .parent() + .unwrap() + .join(link) + .canonicalize() + .unwrap(); + shorten_link_relative_to(absolute_link.clone(), absolute_file.clone()); + } + } +} + +fn shorten_link_relative_to(link_to_shorten: PathBuf, relative_to: PathBuf) {} diff --git a/website/scripts/docsmg/src/main.rs b/website/scripts/docsmg/src/main.rs index 2a2be1101601..b6ce1fedf17c 100644 --- a/website/scripts/docsmg/src/main.rs +++ b/website/scripts/docsmg/src/main.rs @@ -3,9 +3,11 @@ use std::{fs, path::PathBuf}; use clap::{Parser, Subcommand}; mod generate; +mod links; mod migrate; mod migratefile; mod r#move; +mod hackyfixes; #[derive(Parser)] struct Cli { diff --git a/website/scripts/docsmg/src/migrate.rs b/website/scripts/docsmg/src/migrate.rs index e2f0ab806a64..685929902571 100644 --- a/website/scripts/docsmg/src/migrate.rs +++ b/website/scripts/docsmg/src/migrate.rs @@ -1,12 +1,11 @@ use std::{ - ffi::OsStr, - fs::{read_to_string, write}, - path::PathBuf, + collections::{HashMap, HashSet, VecDeque}, env::consts::OS, ffi::OsStr, fs::{create_dir_all, read_to_string, remove_file, write, File}, path::{Component, PathBuf}, process::Command }; use colored::Colorize; +use regex::{Captures, Regex}; -use crate::{migratefile::read_migrate_file, recurse_directory}; +use crate::{hackyfixes::add_extra_dot_dot_to_expression_mdx, migratefile::read_migrate_file, recurse_directory, Cli}; pub fn migrate(quiet: bool, migratefile: PathBuf, migrate_path: PathBuf) { if !quiet { @@ -30,6 +29,13 @@ pub fn migrate(quiet: bool, migratefile: PathBuf, migrate_path: PathBuf) { replace_links(migrate_path.clone(), files.clone()); let successful_moves = move_files(quiet, migrate_path.clone(), files); add_redirects(successful_moves.clone(), migrate_path.clone()); + //shorten_all_external_links(migrate_path); + add_extra_dot_dot_to_expression_mdx(migrate_path.clone()); + let _ = Command::new("sh") + .arg("-c") + .arg("find . -empty -type d -delete") + .current_dir(migrate_path) + .output(); } pub fn unmigrate(quiet: bool, migratefile: PathBuf, migrate_path: PathBuf) { @@ -58,7 +64,8 @@ pub fn unmigrate(quiet: bool, migratefile: PathBuf, migrate_path: PathBuf) { .iter() .map(|x| (x.1.clone(), x.0.clone())) .collect(); //switch files to reverse a migration - remove_redirects(successful_moves, migrate_path); + remove_redirects(successful_moves, migrate_path.clone()); + //shorten_all_external_links(migrate_path); } fn move_files( @@ -95,44 +102,262 @@ fn move_files( successful_moves } -fn replace_links(migrate_path: PathBuf, successful_moves: Vec<(PathBuf, PathBuf)>) { +fn replace_links(migrate_path: PathBuf, moves: Vec<(PathBuf, PathBuf)>) { let files = recurse_directory(migrate_path.clone()); + let mut moved = HashSet::new(); + + let mut absolute_moves = vec![]; + for r#move in &moves { + let r#move = ( + migrate_path.join(r#move.0.clone()), + migrate_path.join(r#move.1.clone()), + ); + let absolute_move_0 = r#move + .0 + .canonicalize() + .expect(&format!("{}", r#move.0.display())); + + let _ = create_dir_all(r#move.1.parent().unwrap()); + let tmp_file = File::create_new(&r#move.1); + let absolute_move_1 = r#move.1.clone().canonicalize().expect(&format!( + "{} {:?}", + r#move.1.display(), + tmp_file + )); + // delete file if it didnt already exist + if let Ok(_) = tmp_file { + let _ = remove_file(&r#move.1); + }; + absolute_moves.push((absolute_move_0, absolute_move_1)); + } + let absolute_moves = absolute_moves + .iter() + .map(|x| x.clone()) + .collect::>(); for file in files { - let relative_file = file - .strip_prefix(migrate_path.clone()) - .unwrap() - .to_path_buf(); + let absolute_file = file.canonicalize().unwrap(); + println!("{}", absolute_file.display()); let mut contents = match read_to_string(file.clone()) { Ok(i) => i, Err(_) => continue, }; - let mut replace = vec![]; - for successful_move in &successful_moves { - if migrate_path - .join(successful_move.0.clone()) + + // replace old absolute file with the new absolute file + let old_absolute_file = absolute_file.clone(); + let absolute_file = match absolute_moves.get(&absolute_file) { + Some(file) => { + println!(" new file: {}", file.display()); + moved.insert(absolute_file); + file.clone() + } + None => absolute_file.clone(), + }; + + // get all links in file and remove web links and link to self + let re = Regex::new(r"\[(?[\w \-\*'`]*)\]\((?[\w\-\\/\\.#]*)\)").unwrap(); + let tmp_contents = contents.clone(); + let captures: Vec = re + .captures_iter(&tmp_contents) + .filter(|x| { + let link = &x["link"]; + + !["http", "#", "/"] + .iter() + .fold(false, |acc, x| acc || link.starts_with(x)) + }) + .collect(); + println!(" captures: {}\n", captures.len()); + + for capture in captures { + let mut capture_log = String::new(); + let link = capture["link"].to_owned(); + let link_path; + + let link_postfix_index = link.find('#'); + + let link_postfix = match link_postfix_index { + Some(i) => { + let link_postfix = link[i..].to_owned(); + link_path = link[..i].to_owned(); + Some(link_postfix) + } + None => { + link_path = link.clone(); + None + }, + }; + + let absolute_link = old_absolute_file.parent().unwrap().join(link_path.clone()); + //let _ = create_dir_all(absolute_link.parent().unwrap()); + //let tmp_file = File::create_new(&absolute_link); + + let absolute_link = match absolute_link .canonicalize() - .unwrap() - == file.clone().canonicalize().unwrap() + .or(absolute_link.with_extension("md").canonicalize()) + .or(absolute_link.with_extension("mdx").canonicalize()) { + Ok(link) => link, + _ => { + println!( + " {}: {} -> {}", + "failed".red(), + absolute_file.to_string_lossy().to_string().red(), + absolute_link.to_string_lossy().to_string().red() + ); + continue; + } + }; + let absolute_link = if absolute_link.is_file() { + absolute_link + } else if absolute_link.join("index.md").is_file() { + absolute_link.join("index.md") + } else if absolute_link.join("index.mdx").is_file() { + absolute_link.join("index.mdx") + } else { + println!( + " {}: {} -> {}", + "failed".red(), + absolute_file.to_string_lossy().to_string().red(), + absolute_link.to_string_lossy().to_string().red() + ); + continue; + }; + // delete file if it didnt already exist + //if let Ok(_) = tmp_file { + // let _ = remove_file(&absolute_link); + //}; + capture_log.push_str(&format!(" oldalink: {}\n", absolute_link.display())); + + // replace old absolute link with the new absolute link + let absolute_link = match absolute_moves.get(&absolute_link) { + Some(link) => link.clone(), + None => absolute_link.clone(), + }; + + capture_log.push_str(&format!(" newalink: {}\n", absolute_link.display())); + + // create tmp absolutes and make them into components + let tmp_absolute_file = absolute_file.clone(); + let mut tmp_absolute_file = tmp_absolute_file.components().collect::>(); + let tmp_absolute_link = absolute_link.clone(); + let mut tmp_absolute_link = tmp_absolute_link.components().collect::>(); + // remove the shared path components + loop { + if tmp_absolute_file.front() != tmp_absolute_link.front() + || tmp_absolute_file.front() == None + { + break; + } + tmp_absolute_file.pop_front(); + tmp_absolute_link.pop_front(); + } + capture_log.push_str(&format!( + " shrtfile: {}\n", + tmp_absolute_file.iter().collect::().display() + )); + capture_log.push_str(&format!( + " shrtlink: {}\n", + tmp_absolute_link.iter().collect::().display() + )); + + if tmp_absolute_file.len() <= 0 { + println!( + " {}: {} -> {}", + "failed".red(), + absolute_file.to_string_lossy().to_string().red(), + absolute_link.to_string_lossy().to_string().red() + ); continue; } - let new_successful_move_from = - make_path_relative(successful_move.0.clone(), relative_file.clone()); - let new_successful_move_to = - make_path_relative(successful_move.1.clone(), relative_file.clone()); - replace.push((new_successful_move_from, new_successful_move_to)); - } - for i in replace { - contents = contents.replace( - &format!("({})", i.0.display()), - &format!("({})", i.1.display()), - ); + let escapes = (0..tmp_absolute_file.len() - 1) + .map(|_| Component::Normal("..".as_ref())) + .collect::(); + + let new_link = escapes.join(tmp_absolute_link.iter().collect::()); + // add a . to the begining if it doesnt already start with . or .. + let new_link = match new_link + .components() + .collect::>() + .first() + .iter() + .collect::() + .to_str() + { + Some(".") => new_link, + Some("..") => new_link, + _ => PathBuf::from(".").join(new_link), + }; + let mut new_link = new_link.to_string_lossy().to_string(); + match link_postfix { + Some(i) => new_link.push_str(&i), + None => {} + } + capture_log.push_str(&format!(" old link: {}\n", link)); + capture_log.push_str(&format!(" new link: {}\n", new_link)); + print!("{}", capture_log); + //println!("{} {} {}", absolute_file.display(), absolute_link.display(), new_link.display()); + let tmp_contents = contents.replace(&format!("({})", link), &format!("({})", new_link)); + if tmp_contents == contents { + println!("{}", " nothing replaced".yellow()); + } else { + contents = tmp_contents; + }; + println!(""); } + write(file, contents).unwrap(); } } +fn fix_internal_links_in_file(migrate_path: PathBuf, move_from: PathBuf, move_to: PathBuf) { + let move_from = migrate_path.join(move_from); + let move_to = migrate_path.join(move_to); + let contents = read_to_string(&move_from); + let mut contents = match contents { + Ok(ok) => ok, + Err(_) => return, + }; + let re = Regex::new(r"\[(?.*)\]\((?.*)\)").unwrap(); + let captures: Vec = re.captures_iter(&contents).collect(); + let mut changes = vec![]; + for capture in captures { + //let name = &capture["name"]; + let link = &capture["link"]; + if link.starts_with('#') || link.starts_with("http") { + continue; + } + let link = PathBuf::from(link); + //println!("{} {}", move_from.display(), link.display()); + let absolute_link = move_from + .parent() + .unwrap() + .canonicalize() + .unwrap() + .join(&link); + if move_to.components().collect::>().len() > 1 { + let _ = create_dir_all(move_to.parent().unwrap()); + } + let tmp_file = File::create_new(move_to.clone()); + //println!("{} {} {} {}", name, link.display(), absolute_link.display(), make_path_relative(absolute_link.clone(), move_to.canonicalize().unwrap().clone()).display()); + let new_link = make_path_relative( + absolute_link.clone(), + move_to.canonicalize().unwrap().clone(), + ); + if let Ok(_) = tmp_file { + remove_file(move_to.clone()).unwrap() + }; + changes.push((link.clone(), new_link.clone())); + } + for i in changes { + contents = contents.replace( + &format!("({})", i.0.display()), + &format!("({})", i.1.display()), + ); + } + write(move_from, contents).unwrap(); +} + fn make_path_relative(path: PathBuf, relative_to: PathBuf) -> PathBuf { let mut subdirs = 0; let path_components = path.components().collect::>(); diff --git a/website/scripts/docsmg/src/migratefile.rs b/website/scripts/docsmg/src/migratefile.rs index e6378b5c55d0..e7b24d65025e 100644 --- a/website/scripts/docsmg/src/migratefile.rs +++ b/website/scripts/docsmg/src/migratefile.rs @@ -10,6 +10,7 @@ pub fn read_migrate_file(file: PathBuf) -> anyhow::Result ")) + .filter(|x| !(x.0 == x.1)) .map(|x| { ( x.0.parse().expect("a valid path"),