Don't group, as it's not supported by all UAs

Emilia-Capital · Feb 22, 2024 · 33fed56 · 33fed56
1 parent b3e26da
commit 33fed56
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -15,45 +15,7 @@ Optimizes your site's robots.txt to reduce server load and CO2 footprint by bloc
 
 ## Default output
 
-The default output of this plugin is as follows:
-
-```txt
-# This site is very specific about who it allows crawling from.
-# Our default is to not allow crawling:
-User-agent: *
-Disallow: /
-
-# Below are the crawlers that are allowed to crawl this site.
-# Below that list, you'll find paths that are blocked, even for them,
-# and then paths within those blocked paths that are allowed.
-User-agent: Applebot
-User-agent: ia_archiver
-User-agent: Baiduspider
-User-agent: Bingbot
-User-agent: DuckDuckBot
-User-agent: Googlebot
-User-agent: AdsBot-Google
-User-agent: MediaPartners-Google
-User-agent: Yandex
-User-agent: Slurp
-User-agent: FacebookExternalHit
-User-agent: LinkedInBot
-User-agent: WhatsApp
-User-agent: Twitterbot
-Allow: /
-Disallow: /wp-json/
-Disallow: /?rest_route=
-Disallow: /wp-admin/
-Disallow: /wp-content/cache/
-Disallow: /wp-content/plugins/
-Disallow: /xmlrpc.php
-Disallow: /wp-includes/
-Allow: /wp-includes/css/
-Allow: /wp-includes/js/
-
-# XML Sitemap:
-Sitemap: https://example.com/sitemap_index.xml
-```
+The default output of this plugin [can be seen here on joost.blog](https://joost.blog/robots.txt) or [here on emilia.capital](https://emilia.capital/robots.txt).
 
 ## Filters
 

diff --git a/src/class-plugin.php b/src/class-plugin.php
@@ -9,6 +9,27 @@ class Plugin {
 
 	const BACKUP_PATH = ABSPATH . 'robots.txt.eco-friendly-backup';
 
+	/**
+	 * The list of allowed crawlers.
+	 *
+	 * @var string[]
+	 */
+	public $allowed_spiders;
+
+	/**
+	 * The list of blocked paths.
+	 *
+	 * @var string[]
+	 */
+	public $blocked_paths;
+
+	/**
+	 * The list of allowed paths.
+	 *
+	 * @var string[]
+	 */
+	public $allowed_paths;
+
 	/**
 	 * Initialize the hooks and filters.
 	 */
@@ -60,6 +81,11 @@ public function modify_robots_txt( $output, $site_public ) {
 			return "User-agent: *\nDisallow: /\n";
 		}
 
+		// We only need to do this when we're actually sending a robots.txt, hence here.
+		$this->allowed_spiders = $this->get_allowed_spiders();
+		$this->blocked_paths   = $this->get_blocked_paths();
+		$this->allowed_paths   = $this->get_allowed_paths();
+
 		$robots_txt  = "# This site is very specific about who it allows crawling from.\n";
 		$robots_txt .= "# Our default is to not allow crawling:\n";
 		$robots_txt .= "User-agent: *\n";
@@ -68,17 +94,16 @@ public function modify_robots_txt( $output, $site_public ) {
 		$robots_txt .= "\n# Below are the crawlers that are allowed to crawl this site.\n";
 		$robots_txt .= "# Below that list, you'll find paths that are blocked, even for them,\n";
 		$robots_txt .= "# and then paths within those blocked paths that are allowed.\n";
-		foreach ( $this->get_allowed_spiders() as $crawler ) {
+		foreach ( $this->allowed_spiders as $crawler ) {
 			$robots_txt .= "User-agent: $crawler\n";
-		}
-		$robots_txt .= "Allow: /\n";
-
-		foreach ( $this->get_blocked_paths() as $path ) {
-			$robots_txt .= "Disallow: $path\n";
-		}
+			$robots_txt .= "Allow: /\n";
+			foreach ( $this->blocked_paths as $path ) {
+				$robots_txt .= "Disallow: $path\n";
+			}
 
-		foreach ( $this->get_allowed_paths() as $path ) {
-			$robots_txt .= "Allow: $path\n";
+			foreach ( $this->allowed_paths as $path ) {
+				$robots_txt .= "Allow: $path\n";
+			}
 		}
 
 		// Keep existing Sitemap references.