From 46164314db0c4989a16514866b670ad32ee590d7 Mon Sep 17 00:00:00 2001 From: tellyworth Date: Fri, 3 Jun 2022 11:06:38 +1000 Subject: [PATCH] Add a CLI script for scrubbing email addresses etc from export files --- env/scrub-export.php | 67 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 env/scrub-export.php diff --git a/env/scrub-export.php b/env/scrub-export.php new file mode 100644 index 000000000..8a73df096 --- /dev/null +++ b/env/scrub-export.php @@ -0,0 +1,67 @@ + [outfile.wxr]\n" ); +} + +$doc = new DomDocument(); +if ( !$doc->load( $infile ) ) { + fwrite( STDERR, "Unable to open $infile for writing.\n" ); + die(1); +} + +$fp_out = fopen( $outfile, 'x' ); +if ( !$fp_out ) { + fwrite( STDERR, "Unable to open $outfile for writing.\n" ); + die(1); +} + + +fwrite( STDERR, "Scrubbing $infile to $outfile\n" ); + +// These are all in the `` namespace +$wp_elements_to_scrub = [ + 'author_login', + 'author_email', + 'author_first_name', + 'author_last_name', + 'author_display_name', + 'comment_author', + 'comment_author_email', + 'comment_author_url', + 'comment_author_IP', +]; + +foreach ( $wp_elements_to_scrub as $tag ) { + $count_replaced = 0; + $nodes = $doc->getElementsByTagNameNS( $namespace, $tag ); + foreach( $nodes as $node ) { + // There should only be one child (a text node) but let's loop just in case + $done = 0; + while ( $node->firstChild ) { + if ( $node->removeChild( $node->firstChild ) ) { + ++ $done; + } + } + if ( $done ) { + $node->appendChild( new DOMText( '__REDACTED__' ) ); + ++ $count_replaced; + } + } + + fwrite( STDERR, "Replaced $count_replaced instances of wp:$tag\n" ); +} + +fwrite( $fp_out, $doc->saveXML() ); + +fclose( $fp_out ); \ No newline at end of file