From b29e28d4eb0cfa8ae27e39b4f14c34ecc2807462 Mon Sep 17 00:00:00 2001 From: Pavel Astakhov Date: Thu, 27 Jun 2024 22:53:28 +0500 Subject: [PATCH] Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt --- _sources/configs/robots.php | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/_sources/configs/robots.php b/_sources/configs/robots.php index acf21c6c..e6c63b02 100644 --- a/_sources/configs/robots.php +++ b/_sources/configs/robots.php @@ -5,8 +5,15 @@ ini_set( 'display_errors', 0 ); error_reporting( 0 ); +echo "# It generated by the robots.php file\n"; + header( 'Content-Type: text/plain' ); +$robotsDisallowed = getenv( 'ROBOTS_DISALLOWED' ); +if ( !empty( $robotsDisallowed ) && in_array( strtolower($robotsDisallowed), [ 'true', '1' ] ) ) { + die( "User-agent: *\nDisallow: /\n" ); +} + $enableSitemapEnv = getenv( 'MW_ENABLE_SITEMAP_GENERATOR'); // match the value check to the isTrue function at _sources/scripts/functions.sh if ( !empty( $enableSitemapEnv ) && in_array( $enableSitemapEnv, [ 'true', 'True', 'TRUE', '1' ] ) ) { @@ -17,11 +24,16 @@ $siteMapUrl = "$server$script/sitemap$subdir/sitemap-index-$identifier.xml"; - echo "# It generated by the robots.php file\n"; - echo "# Add the sitemap url:\n"; echo "Sitemap: $siteMapUrl\n"; - - echo "\n# Content of the robots.txt file:\n"; } readfile( 'robots.txt' ); + +// If the file `extra-robots.txt` is created under the name +// `/var/www/mediawiki/extra-robots.txt` then its contents get appended to the +// default `robots.txt` - see MW-312 +if ( is_readable( 'extra-robots.txt' ) ) { + // Extra line to separate the files so that rules don't combine + echo "\n"; + readfile( 'extra-robots.txt' ); +}