Technical SEO Analysis

Comprehensive technical SEO analysis workflow that crawls websites, analyzes technical factors, and provides actionable recommendations

Back
Workflow Information

ID: technical_seo_analysis_v2

Namespace: default

Version: 2.0.0

Created: 2025-07-07

Updated: 2025-07-07

Tasks: 7

Quick Actions
Manage Secrets
Inputs
Name Type Required Default
target_url string Required None
analysis_depth string Optional standard
max_pages integer Optional 10
Outputs
Name Type Source
seo_report string Complete SEO analysis report
recommendations string AI-generated recommendations
homepage_metrics string Homepage SEO metrics
Tasks
validate_url
script

No description

fetch_robots_txt
http

No description

fetch_sitemap
http

No description

analyze_homepage
script

No description

analyze_robots
script

No description

generate_recommendations
ai_agent

No description

generate_report
script

No description

YAML Source
id: technical_seo_analysis_v2
name: Technical SEO Analysis
tasks:
- id: validate_url
  type: script
  script: "import json\nimport os\nfrom urllib.parse import urlparse\n\ntarget_url\
    \ = os.environ.get('target_url', '')\n\ntry:\n    parsed = urlparse(target_url)\n\
    \    if not parsed.scheme or not parsed.netloc:\n        raise ValueError(\"Invalid\
    \ URL format\")\n    \n    domain = parsed.netloc\n    if domain.startswith('www.'):\n\
    \        domain = domain[4:]\n    \n    result = {\n        \"valid\": True,\n\
    \        \"domain\": domain,\n        \"full_url\": target_url,\n        \"scheme\"\
    : parsed.scheme,\n        \"path\": parsed.path or \"/\"\n    }\nexcept Exception\
    \ as e:\n    result = {\n        \"valid\": False,\n        \"error\": str(e)\n\
    \    }\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\")\n"
- id: fetch_robots_txt
  type: http
  when: validate_url.valid == True
  request:
    url: ${validate_url.scheme}://${validate_url.domain}/robots.txt
    method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
  timeout_seconds: 10
- id: fetch_sitemap
  type: http
  when: validate_url.valid == True
  request:
    url: ${validate_url.scheme}://${validate_url.domain}/sitemap.xml
    method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
  timeout_seconds: 10
- id: analyze_homepage
  type: script
  when: validate_url.valid == True
  script: "import json\nimport requests\nfrom bs4 import BeautifulSoup\nimport time\n\
    import os\n\nurl = os.environ.get('target_url', '')\n\ntry:\n    start_time =\
    \ time.time()\n    response = requests.get(url, timeout=15, headers={\n      \
    \  'User-Agent': 'Mozilla/5.0 (compatible; SEO-Analyzer/1.0)'\n    })\n    load_time\
    \ = time.time() - start_time\n    \n    soup = BeautifulSoup(response.text, 'html.parser')\n\
    \    \n    # Extract SEO elements\n    title = soup.find('title')\n    meta_desc\
    \ = soup.find('meta', attrs={'name': 'description'})\n    canonical = soup.find('link',\
    \ attrs={'rel': 'canonical'})\n    \n    # Headers\n    h1_tags = soup.find_all('h1')\n\
    \    h2_tags = soup.find_all('h2')\n    \n    # Images\n    images = soup.find_all('img')\n\
    \    images_without_alt = [img for img in images if not img.get('alt')]\n    \n\
    \    # Schema markup\n    schema_scripts = soup.find_all('script', type='application/ld+json')\n\
    \    \n    # Mobile viewport\n    viewport = soup.find('meta', attrs={'name':\
    \ 'viewport'})\n    \n    # Calculate issues\n    issues = []\n    if not title\
    \ or not title.text.strip():\n        issues.append(\"Missing title tag\")\n \
    \   elif len(title.text) > 60:\n        issues.append(\"Title tag too long (>60\
    \ chars)\")\n    \n    if not meta_desc:\n        issues.append(\"Missing meta\
    \ description\")\n    elif meta_desc and len(meta_desc.get('content', '')) > 160:\n\
    \        issues.append(\"Meta description too long (>160 chars)\")\n    \n   \
    \ if len(h1_tags) == 0:\n        issues.append(\"No H1 tag found\")\n    elif\
    \ len(h1_tags) > 1:\n        issues.append(f\"Multiple H1 tags found ({len(h1_tags)})\"\
    )\n    \n    if images_without_alt:\n        issues.append(f\"{len(images_without_alt)}\
    \ images without alt text\")\n    \n    if not viewport:\n        issues.append(\"\
    No mobile viewport meta tag\")\n    \n    if load_time > 3:\n        issues.append(f\"\
    Slow page load time ({load_time:.2f}s)\")\n    \n    result = {\n        \"url\"\
    : url,\n        \"status_code\": response.status_code,\n        \"load_time\"\
    : round(load_time, 2),\n        \"title\": title.text.strip() if title else None,\n\
    \        \"title_length\": len(title.text.strip()) if title else 0,\n        \"\
    meta_description\": meta_desc.get('content') if meta_desc else None,\n       \
    \ \"meta_description_length\": len(meta_desc.get('content', '')) if meta_desc\
    \ else 0,\n        \"canonical_url\": canonical.get('href') if canonical else\
    \ None,\n        \"h1_count\": len(h1_tags),\n        \"h2_count\": len(h2_tags),\n\
    \        \"images_total\": len(images),\n        \"images_without_alt\": len(images_without_alt),\n\
    \        \"has_schema_markup\": len(schema_scripts) > 0,\n        \"has_viewport\"\
    : viewport is not None,\n        \"issues\": issues,\n        \"content_length\"\
    : len(response.text)\n    }\n    \nexcept Exception as e:\n    result = {\n  \
    \      \"url\": url,\n        \"error\": str(e),\n        \"status_code\": 0,\n\
    \        \"issues\": [f\"Failed to analyze: {str(e)}\"]\n    }\n\nprint(f\"__OUTPUTS__\
    \ {json.dumps(result)}\")\n"
  depends_on:
  - validate_url
  requirements:
  - requests==2.31.0
  - beautifulsoup4==4.12.2
- id: analyze_robots
  type: script
  when: fetch_robots_txt.status_code == 200
  script: "import json\nimport os\n\nrobots_content = os.environ.get('fetch_robots_txt.body',\
    \ '')\n\nissues = []\nrecommendations = []\n\n# Parse robots.txt\nlines = robots_content.strip().split('\\\
    n')\nuser_agents = {}\ncurrent_agent = None\nhas_sitemap = False\n\nfor line in\
    \ lines:\n    line = line.strip()\n    if line.lower().startswith('sitemap:'):\n\
    \        has_sitemap = True\n    elif line.startswith('User-agent:'):\n      \
    \  current_agent = line.split(':', 1)[1].strip()\n        user_agents[current_agent]\
    \ = {'allow': [], 'disallow': []}\n    elif line.startswith('Disallow:') and current_agent:\n\
    \        path = line.split(':', 1)[1].strip()\n        if path:\n            user_agents[current_agent]['disallow'].append(path)\n\
    \    elif line.startswith('Allow:') and current_agent:\n        path = line.split(':',\
    \ 1)[1].strip()\n        if path:\n            user_agents[current_agent]['allow'].append(path)\n\
    \n# Check for issues\nif '*' in user_agents and '/' in user_agents['*']['disallow']:\n\
    \    issues.append(\"Site is blocking all crawlers\")\n    recommendations.append(\"\
    Remove 'Disallow: /' for all user agents unless intentional\")\n\nif not has_sitemap:\n\
    \    issues.append(\"No sitemap reference in robots.txt\")\n    recommendations.append(\"\
    Add sitemap URL to robots.txt\")\n\nresult = {\n    \"user_agents\": user_agents,\n\
    \    \"issues\": issues,\n    \"recommendations\": recommendations,\n    \"has_robots\"\
    : True\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\")\n"
  depends_on:
  - fetch_robots_txt
- id: generate_recommendations
  type: ai_agent
  prompt: 'Analyze the following technical SEO data and provide actionable recommendations:


    Site: ${target_url}


    Homepage Analysis:

    ${analyze_homepage}


    Robots.txt Analysis:

    ${analyze_robots}


    Please provide:

    1. Executive Summary (2-3 sentences about the site''s SEO health)

    2. Critical Issues (must fix immediately)

    3. High Priority Recommendations

    4. Quick Wins (easy fixes with high impact)

    5. Long-term Improvements

    6. Overall Health Score (1-10)


    Format as a JSON object with these keys:

    - executive_summary

    - critical_issues (array)

    - high_priority_recommendations (array)

    - quick_wins (array)

    - long_term_improvements (array)

    - overall_health_score (number 1-10)

    '
  agent_type: analyst
  depends_on:
  - analyze_homepage
  - analyze_robots
  model_client_id: seo_analyzer
- id: generate_report
  type: script
  script: "import json\nfrom datetime import datetime\nimport os\n\n# Get data from\
    \ environment\nhomepage_data = json.loads(os.environ.get('analyze_homepage', '{}'))\n\
    \n# Handle AI recommendations - it might be a string that needs parsing\nai_rec\
    \ = os.environ.get('generate_recommendations', '{}')\ntry:\n    if isinstance(ai_rec,\
    \ str) and ai_rec.strip():\n        recommendations = json.loads(ai_rec)\n   \
    \ else:\n        recommendations = {}\nexcept:\n    recommendations = {\n    \
    \    \"executive_summary\": \"Unable to generate AI recommendations\",\n     \
    \   \"critical_issues\": [],\n        \"high_priority_recommendations\": [],\n\
    \        \"quick_wins\": [],\n        \"long_term_improvements\": [],\n      \
    \  \"overall_health_score\": 0\n    }\n\nrobots_data = json.loads(os.environ.get('analyze_robots',\
    \ '{}'))\n\n# Compile the final report\nreport = {\n    \"report_metadata\": {\n\
    \        \"generated_at\": datetime.now().isoformat(),\n        \"target_url\"\
    : os.environ.get('target_url', ''),\n        \"analysis_depth\": os.environ.get('analysis_depth',\
    \ 'standard')\n    },\n    \"summary_metrics\": {\n        \"overall_health_score\"\
    : recommendations.get(\"overall_health_score\", 0),\n        \"page_load_time\"\
    : f\"{homepage_data.get('load_time', 0)}s\",\n        \"issues_found\": len(homepage_data.get('issues',\
    \ [])) + len(robots_data.get('issues', [])),\n        \"has_mobile_viewport\"\
    : homepage_data.get('has_viewport', False),\n        \"has_schema_markup\": homepage_data.get('has_schema_markup',\
    \ False)\n    },\n    \"executive_summary\": recommendations.get(\"executive_summary\"\
    , \"\"),\n    \"critical_issues\": recommendations.get(\"critical_issues\", []),\n\
    \    \"recommendations\": {\n        \"high_priority\": recommendations.get(\"\
    high_priority_recommendations\", []),\n        \"quick_wins\": recommendations.get(\"\
    quick_wins\", []),\n        \"long_term\": recommendations.get(\"long_term_improvements\"\
    , [])\n    },\n    \"technical_details\": {\n        \"homepage_analysis\": homepage_data,\n\
    \        \"robots_analysis\": robots_data\n    }\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(report)}\"\
    )\n"
  depends_on:
  - analyze_homepage
  - generate_recommendations
  - analyze_robots
inputs:
- name: target_url
  type: string
  required: true
  validation:
    pattern: ^https?://[\w\-\.]+(\.[\w\-\.]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?$
  description: The target website URL to analyze
- enum:
  - basic
  - standard
  - comprehensive
  name: analysis_depth
  type: string
  default: standard
  description: 'Depth of analysis: basic, standard, comprehensive'
- name: max_pages
  type: integer
  default: 10
  validation:
    max: 100
    min: 1
  description: Maximum number of pages to analyze
outputs:
  seo_report:
    source: generate_report
    description: Complete SEO analysis report
  recommendations:
    source: generate_recommendations
    description: AI-generated recommendations
  homepage_metrics:
    source: analyze_homepage
    description: Homepage SEO metrics
version: 2.0.0
description: Comprehensive technical SEO analysis workflow that crawls websites, analyzes
  technical factors, and provides actionable recommendations
model_clients:
  seo_analyzer:
    model: gpt-4o-mini
    api_key: ${env.OPENAI_API_KEY}
    provider: openai
    temperature: 0.3
Execution ID Status Started Duration Actions
26e0d0e6... COMPLETED 2025-07-07
08:00:17
N/A View