# Robots.txt for Casablanca Desert Tours # Advanced configuration for optimal search engine crawling and indexing # Last updated: January 2025 # Global crawling rules for all bots User-agent: * Allow: / # Specific optimization for major search engines User-agent: Googlebot Allow: / Crawl-delay: 1 # Allow Googlebot to access all resources for better rendering Allow: /assets/css/ Allow: /assets/js/ Allow: /assets/images/ User-agent: Bingbot Allow: / Crawl-delay: 2 # Bing-specific allowances Allow: /assets/ Allow: /pages/ User-agent: Slurp Allow: / Crawl-delay: 3 # Yahoo search bot User-agent: DuckDuckBot Allow: / Crawl-delay: 1 # DuckDuckGo bot User-agent: Baiduspider Allow: / Crawl-delay: 5 # Baidu search engine (Chinese market) User-agent: YandexBot Allow: / Crawl-delay: 3 # Yandex search engine (Russian market) # Block access to administrative and sensitive areas Disallow: /admin/ Disallow: /administrator/ Disallow: /wp-admin/ Disallow: /wp-content/ Disallow: /wp-includes/ Disallow: /private/ Disallow: /temp/ Disallow: /tmp/ Disallow: /cache/ Disallow: /cgi-bin/ Disallow: /errors/ Disallow: /logs/ Disallow: /stats/ Disallow: /backup/ Disallow: /include/ Disallow: /divi-builder/ # Block access to configuration and sensitive files Disallow: /*.env Disallow: /*.config Disallow: /*.conf Disallow: /*.log Disallow: /*.sql Disallow: /*.gz Disallow: /*.tar Disallow: /*.zip # Block access to development and testing files Disallow: /test/ Disallow: /tests/ Disallow: /dev/ Disallow: /development/ Disallow: /staging/ Disallow: /_test/ Disallow: /*.test.php # Block access to certain file types that shouldn't be indexed Disallow: /*.php$ Allow: /index.php Allow: /amp/ Disallow: /*.pdf$ Disallow: /*.doc$ Disallow: /*.docx$ Disallow: /*.xls$ Disallow: /*.xlsx$ # Block access to version control and development files Disallow: /.git/ Disallow: /.svn/ Disallow: /.htaccess Disallow: /.htpasswd # Allow access to important SEO resources Allow: /sitemap.xml Allow: /sitemap*.xml Allow: /robots.txt Allow: /favicon.ico Allow: /manifest.json Allow: /sw.js # Allow access to structured data and feeds Allow: /feed/ Allow: /rss/ Allow: /*.json$ Allow: /schema/ # Block problematic or spam bots User-agent: SemrushBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / # Allow social media bots for sharing User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / # Image optimization bots User-agent: GoogleBot-Image Allow: /assets/images/ Allow: /images/ Crawl-delay: 1 # Mobile-specific crawlers User-agent: Googlebot-Mobile Allow: / Allow: /amp/ Crawl-delay: 1 # Archive crawlers User-agent: ia_archiver Allow: / Crawl-delay: 10 # Performance monitoring bots User-agent: GTmetrix Allow: / User-agent: Pingdom Allow: / # Crawl delay to be respectful to server resources Crawl-delay: 1 # Sitemaps - comprehensive index for all content types Sitemap: https://www.casablancadeserttour.com/sitemap-index.xml # Individual sitemaps (referenced in index) Sitemap: https://www.casablancadeserttour.com/sitemap.xml Sitemap: https://www.casablancadeserttour.com/sitemap-images.xml Sitemap: https://www.casablancadeserttour.com/sitemap-news.xml Sitemap: https://www.casablancadeserttour.com/sitemap-videos.xml # Host directive (helps search engines understand canonical domain) Host: https://www.casablancadeserttour.com