Technical SEO Analysis

Comprehensive technical SEO analysis workflow with fixed HTTP task handling

Back
Workflow Information

ID: technical_seo_analysis_v3

Namespace: default

Version: 3.0.0

Created: 2025-07-07

Updated: 2025-07-07

Tasks: 8

Quick Actions
Manage Secrets
Inputs
Name Type Required Default
target_url string Required None
analysis_depth string Optional standard
Outputs
Name Type Source
seo_report string Complete SEO analysis report with recommendations
recommendations string AI-generated SEO recommendations
robots_analysis string Robots.txt analysis results
homepage_metrics string Detailed homepage SEO metrics
Tasks
validate_url
script

No description

fetch_robots_txt
http

No description

fetch_sitemap
http

No description

analyze_homepage
script

No description

analyze_robots
script

No description

analyze_robots_missing
script

No description

generate_recommendations
ai_agent

No description

generate_report
script

No description

YAML Source
id: technical_seo_analysis_v3
name: Technical SEO Analysis
tasks:
- id: validate_url
  type: script
  script: "import json\nimport os\nfrom urllib.parse import urlparse\n\ntarget_url\
    \ = os.environ.get('target_url', '')\n\ntry:\n    parsed = urlparse(target_url)\n\
    \    if not parsed.scheme or not parsed.netloc:\n        raise ValueError(\"Invalid\
    \ URL format\")\n    \n    domain = parsed.netloc\n    if domain.startswith('www.'):\n\
    \        domain = domain[4:]\n    \n    # Build URLs for robots and sitemap\n\
    \    robots_url = f\"{parsed.scheme}://{domain}/robots.txt\"\n    sitemap_url\
    \ = f\"{parsed.scheme}://{domain}/sitemap.xml\"\n    \n    result = {\n      \
    \  \"valid\": True,\n        \"domain\": domain,\n        \"full_url\": target_url,\n\
    \        \"scheme\": parsed.scheme,\n        \"path\": parsed.path or \"/\",\n\
    \        \"robots_url\": robots_url,\n        \"sitemap_url\": sitemap_url\n \
    \   }\nexcept Exception as e:\n    result = {\n        \"valid\": False,\n   \
    \     \"error\": str(e),\n        \"robots_url\": \"\",\n        \"sitemap_url\"\
    : \"\"\n    }\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\")\n"
- id: fetch_robots_txt
  url: ${validate_url.robots_url}
  type: http
  when: validate_url.valid == True
  method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
  timeout_seconds: 10
- id: fetch_sitemap
  url: ${validate_url.sitemap_url}
  type: http
  when: validate_url.valid == True
  method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
  timeout_seconds: 10
- id: analyze_homepage
  type: script
  when: validate_url.valid == True
  script: "import json\nimport requests\nfrom bs4 import BeautifulSoup\nimport time\n\
    import os\n\nurl = os.environ.get('target_url', '')\n\ntry:\n    start_time =\
    \ time.time()\n    response = requests.get(url, timeout=15, headers={\n      \
    \  'User-Agent': 'Mozilla/5.0 (compatible; SEO-Analyzer/1.0)'\n    })\n    load_time\
    \ = time.time() - start_time\n    \n    soup = BeautifulSoup(response.text, 'html.parser')\n\
    \    \n    # Extract SEO elements\n    title = soup.find('title')\n    meta_desc\
    \ = soup.find('meta', attrs={'name': 'description'})\n    meta_keywords = soup.find('meta',\
    \ attrs={'name': 'keywords'})\n    canonical = soup.find('link', attrs={'rel':\
    \ 'canonical'})\n    \n    # Headers\n    h1_tags = soup.find_all('h1')\n    h2_tags\
    \ = soup.find_all('h2')\n    h3_tags = soup.find_all('h3')\n    \n    # Images\n\
    \    images = soup.find_all('img')\n    images_without_alt = [img for img in images\
    \ if not img.get('alt')]\n    \n    # Links\n    links = soup.find_all('a', href=True)\n\
    \    internal_links = 0\n    external_links = 0\n    domain = os.environ.get('validate_url.domain',\
    \ '')\n    \n    for link in links:\n        href = link.get('href', '')\n   \
    \     if href.startswith('http'):\n            if domain in href:\n          \
    \      internal_links += 1\n            else:\n                external_links\
    \ += 1\n        elif href.startswith('/'):\n            internal_links += 1\n\
    \    \n    # Schema markup\n    schema_scripts = soup.find_all('script', type='application/ld+json')\n\
    \    \n    # Mobile viewport\n    viewport = soup.find('meta', attrs={'name':\
    \ 'viewport'})\n    \n    # Open Graph tags\n    og_title = soup.find('meta',\
    \ property='og:title')\n    og_description = soup.find('meta', property='og:description')\n\
    \    og_image = soup.find('meta', property='og:image')\n    \n    # Calculate\
    \ issues\n    issues = []\n    if not title or not title.text.strip():\n     \
    \   issues.append(\"Missing title tag\")\n    elif len(title.text) > 60:\n   \
    \     issues.append(\"Title tag too long (>60 chars)\")\n    elif len(title.text)\
    \ < 30:\n        issues.append(\"Title tag too short (<30 chars)\")\n    \n  \
    \  if not meta_desc:\n        issues.append(\"Missing meta description\")\n  \
    \  elif meta_desc and len(meta_desc.get('content', '')) > 160:\n        issues.append(\"\
    Meta description too long (>160 chars)\")\n    elif meta_desc and len(meta_desc.get('content',\
    \ '')) < 70:\n        issues.append(\"Meta description too short (<70 chars)\"\
    )\n    \n    if len(h1_tags) == 0:\n        issues.append(\"No H1 tag found\"\
    )\n    elif len(h1_tags) > 1:\n        issues.append(f\"Multiple H1 tags found\
    \ ({len(h1_tags)})\")\n    \n    if images_without_alt:\n        issues.append(f\"\
    {len(images_without_alt)} images without alt text\")\n    \n    if not viewport:\n\
    \        issues.append(\"No mobile viewport meta tag\")\n    \n    if load_time\
    \ > 3:\n        issues.append(f\"Slow page load time ({load_time:.2f}s)\")\n \
    \   \n    if not og_title:\n        issues.append(\"Missing Open Graph title\"\
    )\n    if not og_description:\n        issues.append(\"Missing Open Graph description\"\
    )\n    if not og_image:\n        issues.append(\"Missing Open Graph image\")\n\
    \    \n    result = {\n        \"url\": url,\n        \"status_code\": response.status_code,\n\
    \        \"load_time\": round(load_time, 2),\n        \"title\": title.text.strip()\
    \ if title else None,\n        \"title_length\": len(title.text.strip()) if title\
    \ else 0,\n        \"meta_description\": meta_desc.get('content') if meta_desc\
    \ else None,\n        \"meta_description_length\": len(meta_desc.get('content',\
    \ '')) if meta_desc else 0,\n        \"meta_keywords\": meta_keywords.get('content')\
    \ if meta_keywords else None,\n        \"canonical_url\": canonical.get('href')\
    \ if canonical else None,\n        \"h1_count\": len(h1_tags),\n        \"h2_count\"\
    : len(h2_tags),\n        \"h3_count\": len(h3_tags),\n        \"images_total\"\
    : len(images),\n        \"images_without_alt\": len(images_without_alt),\n   \
    \     \"internal_links\": internal_links,\n        \"external_links\": external_links,\n\
    \        \"has_schema_markup\": len(schema_scripts) > 0,\n        \"schema_count\"\
    : len(schema_scripts),\n        \"has_viewport\": viewport is not None,\n    \
    \    \"has_og_tags\": all([og_title, og_description, og_image]),\n        \"issues\"\
    : issues,\n        \"content_length\": len(response.text)\n    }\n    \nexcept\
    \ Exception as e:\n    result = {\n        \"url\": url,\n        \"error\": str(e),\n\
    \        \"status_code\": 0,\n        \"issues\": [f\"Failed to analyze: {str(e)}\"\
    ]\n    }\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\")\n"
  depends_on:
  - validate_url
  requirements:
  - requests==2.31.0
  - beautifulsoup4==4.12.2
- id: analyze_robots
  type: script
  when: fetch_robots_txt.status_code == 200
  script: "import json\nimport os\n\nrobots_content = os.environ.get('fetch_robots_txt.body',\
    \ '')\n\nissues = []\nrecommendations = []\n\n# Parse robots.txt\nlines = robots_content.strip().split('\\\
    n')\nuser_agents = {}\ncurrent_agent = None\nhas_sitemap = False\nsitemaps = []\n\
    \nfor line in lines:\n    line = line.strip()\n    if line.lower().startswith('sitemap:'):\n\
    \        has_sitemap = True\n        sitemap_url = line.split(':', 1)[1].strip()\n\
    \        sitemaps.append(sitemap_url)\n    elif line.startswith('User-agent:'):\n\
    \        current_agent = line.split(':', 1)[1].strip()\n        user_agents[current_agent]\
    \ = {'allow': [], 'disallow': []}\n    elif line.startswith('Disallow:') and current_agent:\n\
    \        path = line.split(':', 1)[1].strip()\n        if path:\n            user_agents[current_agent]['disallow'].append(path)\n\
    \    elif line.startswith('Allow:') and current_agent:\n        path = line.split(':',\
    \ 1)[1].strip()\n        if path:\n            user_agents[current_agent]['allow'].append(path)\n\
    \n# Check for issues\nif '*' in user_agents and '/' in user_agents['*']['disallow']:\n\
    \    issues.append(\"Site is blocking all crawlers\")\n    recommendations.append(\"\
    Remove 'Disallow: /' for all user agents unless intentional\")\n\nif not has_sitemap:\n\
    \    issues.append(\"No sitemap reference in robots.txt\")\n    recommendations.append(\"\
    Add sitemap URL to robots.txt\")\n\nif 'Googlebot' not in user_agents:\n    recommendations.append(\"\
    Consider adding specific rules for Googlebot\")\n\nresult = {\n    \"user_agents\"\
    : user_agents,\n    \"sitemaps\": sitemaps,\n    \"issues\": issues,\n    \"recommendations\"\
    : recommendations,\n    \"has_robots\": True\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\"\
    )\n"
  depends_on:
  - fetch_robots_txt
- id: analyze_robots_missing
  type: script
  when: fetch_robots_txt.status_code != 200
  script: "import json\n\nresult = {\n    \"user_agents\": {},\n    \"sitemaps\":\
    \ [],\n    \"issues\": [\"No robots.txt file found\"],\n    \"recommendations\"\
    : [\"Create a robots.txt file with basic rules and sitemap reference\"],\n   \
    \ \"has_robots\": False\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\")\n"
  depends_on:
  - fetch_robots_txt
- id: generate_recommendations
  type: ai_agent
  prompt: "You are an expert SEO analyst. Analyze the following technical SEO data\
    \ and provide actionable recommendations.\n\nSite: ${target_url}\n\nHomepage Analysis:\n\
    ${analyze_homepage}\n\nRobots.txt Analysis:\n${analyze_robots}${analyze_robots_missing}\n\
    \nPlease provide a comprehensive SEO analysis with:\n\n1. Executive Summary -\
    \ A 2-3 sentence overview of the site's SEO health\n2. Critical Issues - Issues\
    \ that need immediate attention\n3. High Priority Recommendations - Important\
    \ improvements\n4. Quick Wins - Easy fixes that will have immediate impact\n5.\
    \ Long-term Improvements - Strategic changes for better SEO\n6. Overall Health\
    \ Score - A score from 1-10\n\nConsider these factors:\n- Title and meta description\
    \ optimization\n- Header structure (H1, H2, H3)\n- Image optimization (alt text)\n\
    - Mobile readiness\n- Page speed\n- Schema markup\n- Open Graph tags\n- Internal/external\
    \ link balance\n- Robots.txt configuration\n\nReturn ONLY a valid JSON object\
    \ with this structure:\n{\n  \"executive_summary\": \"string\",\n  \"critical_issues\"\
    : [\"issue1\", \"issue2\"],\n  \"high_priority_recommendations\": [\"rec1\", \"\
    rec2\"],\n  \"quick_wins\": [\"win1\", \"win2\"],\n  \"long_term_improvements\"\
    : [\"improvement1\", \"improvement2\"],\n  \"overall_health_score\": 7\n}\n"
  agent_type: analyst
  depends_on:
  - analyze_homepage
  - analyze_robots
  - analyze_robots_missing
  model_client_id: seo_analyzer
- id: generate_report
  type: script
  script: "import json\nfrom datetime import datetime\nimport os\n\n# Get data from\
    \ environment\nhomepage_data = json.loads(os.environ.get('analyze_homepage', '{}'))\n\
    \n# Get robots data (either from successful or missing analysis)\nrobots_data\
    \ = json.loads(os.environ.get('analyze_robots', os.environ.get('analyze_robots_missing',\
    \ '{}')))\n\n# Handle AI recommendations\nai_rec = os.environ.get('generate_recommendations',\
    \ '{}')\ntry:\n    # Try to parse the AI response\n    if ai_rec and isinstance(ai_rec,\
    \ str):\n        # Remove any non-JSON content before/after the JSON object\n\
    \        start = ai_rec.find('{')\n        end = ai_rec.rfind('}') + 1\n     \
    \   if start >= 0 and end > start:\n            json_str = ai_rec[start:end]\n\
    \            recommendations = json.loads(json_str)\n        else:\n         \
    \   raise ValueError(\"No JSON object found\")\n    else:\n        recommendations\
    \ = {}\nexcept Exception as e:\n    # Fallback if AI response can't be parsed\n\
    \    recommendations = {\n        \"executive_summary\": \"Technical SEO analysis\
    \ completed. Review the detailed findings below.\",\n        \"critical_issues\"\
    : homepage_data.get('issues', [])[:3],\n        \"high_priority_recommendations\"\
    : [\"Implement missing SEO elements\", \"Optimize page performance\"],\n     \
    \   \"quick_wins\": [\"Add alt text to images\", \"Create robots.txt file\"],\n\
    \        \"long_term_improvements\": [\"Implement structured data\", \"Improve\
    \ site architecture\"],\n        \"overall_health_score\": 5\n    }\n\n# Calculate\
    \ additional metrics\ntotal_issues = len(homepage_data.get('issues', [])) + len(robots_data.get('issues',\
    \ []))\n\n# Compile the final report\nreport = {\n    \"report_metadata\": {\n\
    \        \"generated_at\": datetime.now().isoformat(),\n        \"target_url\"\
    : os.environ.get('target_url', ''),\n        \"analysis_depth\": os.environ.get('analysis_depth',\
    \ 'standard')\n    },\n    \"summary_metrics\": {\n        \"overall_health_score\"\
    : recommendations.get(\"overall_health_score\", 0),\n        \"page_load_time\"\
    : f\"{homepage_data.get('load_time', 0)}s\",\n        \"total_issues_found\":\
    \ total_issues,\n        \"critical_issues_count\": len(recommendations.get(\"\
    critical_issues\", [])),\n        \"has_mobile_viewport\": homepage_data.get('has_viewport',\
    \ False),\n        \"has_schema_markup\": homepage_data.get('has_schema_markup',\
    \ False),\n        \"has_robots_txt\": robots_data.get('has_robots', False),\n\
    \        \"images_without_alt\": homepage_data.get('images_without_alt', 0)\n\
    \    },\n    \"executive_summary\": recommendations.get(\"executive_summary\"\
    , \"\"),\n    \"issues_breakdown\": {\n        \"critical\": recommendations.get(\"\
    critical_issues\", []),\n        \"homepage\": homepage_data.get('issues', []),\n\
    \        \"robots\": robots_data.get('issues', [])\n    },\n    \"recommendations\"\
    : {\n        \"high_priority\": recommendations.get(\"high_priority_recommendations\"\
    , []),\n        \"quick_wins\": recommendations.get(\"quick_wins\", []),\n   \
    \     \"long_term\": recommendations.get(\"long_term_improvements\", [])\n   \
    \ },\n    \"technical_details\": {\n        \"homepage_analysis\": homepage_data,\n\
    \        \"robots_analysis\": robots_data\n    }\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(report)}\"\
    )\n"
  depends_on:
  - analyze_homepage
  - generate_recommendations
  - analyze_robots
  - analyze_robots_missing
inputs:
- name: target_url
  type: string
  required: true
  validation:
    pattern: ^https?://[\w\-\.]+(\.[\w\-\.]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?$
  description: The target website URL to analyze
- enum:
  - basic
  - standard
  - comprehensive
  name: analysis_depth
  type: string
  default: standard
  description: 'Depth of analysis: basic, standard, comprehensive'
outputs:
  seo_report:
    source: generate_report
    description: Complete SEO analysis report with recommendations
  recommendations:
    source: generate_recommendations
    description: AI-generated SEO recommendations
  robots_analysis:
    when: analyze_robots.completed
    source: analyze_robots
    description: Robots.txt analysis results
  homepage_metrics:
    source: analyze_homepage
    description: Detailed homepage SEO metrics
version: 3.0.0
description: Comprehensive technical SEO analysis workflow with fixed HTTP task handling
model_clients:
  seo_analyzer:
    model: gpt-4o-mini
    api_key: ${env.OPENAI_API_KEY}
    provider: openai
    temperature: 0.3
Execution ID Status Started Duration Actions
ce3ce13d... COMPLETED 2025-07-07
08:02:16
N/A View