Technical SEO Analysis

Comprehensive technical SEO analysis workflow that crawls websites, analyzes technical factors, and provides actionable recommendations

Back
Workflow Information

ID: technical_seo_analysis_v1

Namespace: default

Version: 1.0.0

Created: 2025-07-07

Updated: 2025-07-07

Tasks: 16

Quick Actions
Manage Secrets
Inputs
Name Type Required Default
target_url string Required None
analysis_depth string Optional standard
max_pages integer Optional 10
include_competitors boolean Optional None
Outputs
Name Type Source
seo_report string Complete SEO analysis report
recommendations string AI-generated recommendations
summary_metrics string High-level metrics summary
Tasks
validate_url
script

No description

fetch_robots_txt
http

No description

fetch_sitemap
http

No description

analyze_robots
script

No description

parse_sitemap
script

No description

discover_urls
script

No description

prepare_urls
conditional_router

No description

Conditional Router
Router Type: condition
Default Route: use_homepage_only
use_sitemap_urls
script

No description

use_discovered_urls
script

No description

use_homepage_only
script

No description

merge_url_sources
script

No description

analyze_pages
loop

No description

Loop Configuration
Type: while
Max Iterations: 100
analyze_single_page
script

No description

aggregate_results
script

No description

generate_recommendations
ai_agent

No description

generate_report
script

No description

YAML Source
id: technical_seo_analysis_v1
name: Technical SEO Analysis
tasks:
- id: validate_url
  type: script
  script: "import json\nfrom urllib.parse import urlparse\n\ntarget_url = \"\"\"${target_url}\"\
    \"\"\n\ntry:\n    parsed = urlparse(target_url)\n    if not parsed.scheme or not\
    \ parsed.netloc:\n        raise ValueError(\"Invalid URL format\")\n    \n   \
    \ domain = parsed.netloc\n    if domain.startswith('www.'):\n        domain =\
    \ domain[4:]\n    \n    result = {\n        \"valid\": True,\n        \"domain\"\
    : domain,\n        \"full_url\": target_url,\n        \"scheme\": parsed.scheme,\n\
    \        \"path\": parsed.path or \"/\"\n    }\nexcept Exception as e:\n    result\
    \ = {\n        \"valid\": False,\n        \"error\": str(e)\n    }\n\nprint(f\"\
    __OUTPUTS__ {json.dumps(result)}\")\n"
- id: fetch_robots_txt
  type: http
  when: validate_url.valid == True
  request:
    url: ${validate_url.scheme}://${validate_url.domain}/robots.txt
    method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
  timeout_seconds: 10
- id: fetch_sitemap
  type: http
  when: validate_url.valid == True
  request:
    url: ${validate_url.scheme}://${validate_url.domain}/sitemap.xml
    method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
  timeout_seconds: 10
- id: analyze_robots
  type: script
  when: fetch_robots_txt.status_code == 200
  script: "import json\n\nrobots_content = \"\"\"${fetch_robots_txt.body}\"\"\"\n\n\
    issues = []\nrecommendations = []\n\n# Parse robots.txt\nlines = robots_content.strip().split('\\\
    n')\nuser_agents = {}\ncurrent_agent = None\n\nfor line in lines:\n    line =\
    \ line.strip()\n    if line.startswith('User-agent:'):\n        current_agent\
    \ = line.split(':', 1)[1].strip()\n        user_agents[current_agent] = {'allow':\
    \ [], 'disallow': []}\n    elif line.startswith('Disallow:') and current_agent:\n\
    \        path = line.split(':', 1)[1].strip()\n        if path:\n            user_agents[current_agent]['disallow'].append(path)\n\
    \    elif line.startswith('Allow:') and current_agent:\n        path = line.split(':',\
    \ 1)[1].strip()\n        if path:\n            user_agents[current_agent]['allow'].append(path)\n\
    \n# Check for issues\nif '*' in user_agents and '/' in user_agents['*']['disallow']:\n\
    \    issues.append(\"Site is blocking all crawlers\")\n    recommendations.append(\"\
    Remove 'Disallow: /' for all user agents unless intentional\")\n\nif 'Sitemap:'\
    \ not in robots_content:\n    issues.append(\"No sitemap reference in robots.txt\"\
    )\n    recommendations.append(\"Add sitemap URL to robots.txt\")\n\nresult = {\n\
    \    \"user_agents\": user_agents,\n    \"issues\": issues,\n    \"recommendations\"\
    : recommendations,\n    \"has_robots\": True\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\"\
    )\n"
  depends_on:
  - fetch_robots_txt
- id: parse_sitemap
  type: script
  when: fetch_sitemap.status_code == 200
  script: "import json\nimport xml.etree.ElementTree as ET\n\nsitemap_content = \"\
    \"\"${fetch_sitemap.body}\"\"\"\nmax_pages = int(\"\"\"${max_pages}\"\"\")\n\n\
    urls = []\ntry:\n    root = ET.fromstring(sitemap_content)\n    \n    # Handle\
    \ different sitemap namespaces\n    ns = {'sm': 'http://www.sitemaps.org/schemas/sitemap/0.9'}\n\
    \    \n    for url in root.findall('.//sm:url', ns):\n        loc = url.find('sm:loc',\
    \ ns)\n        if loc is not None and loc.text:\n            urls.append(loc.text)\n\
    \    \n    # If no namespace, try without\n    if not urls:\n        for url in\
    \ root.findall('.//url'):\n            loc = url.find('loc')\n            if loc\
    \ is not None and loc.text:\n                urls.append(loc.text)\nexcept:\n\
    \    pass\n\n# Limit URLs based on max_pages\nurls = urls[:max_pages] if urls\
    \ else []\n\nresult = {\n    \"urls\": urls,\n    \"total_urls\": len(urls),\n\
    \    \"has_sitemap\": len(urls) > 0\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\"\
    )\n"
  depends_on:
  - fetch_sitemap
  requirements:
  - lxml==4.9.3
- id: discover_urls
  type: script
  when: parse_sitemap.total_urls == 0
  script: "import json\n\n# If no sitemap, start with homepage and common pages\n\
    base_url = \"\"\"${validate_url.full_url}\"\"\"\n\ncommon_paths = [\n    \"/\"\
    ,\n    \"/about\",\n    \"/services\",\n    \"/products\",\n    \"/blog\",\n \
    \   \"/contact\"\n]\n\nurls = []\nfor path in common_paths[:int(\"\"\"${max_pages}\"\
    \"\")]:\n    if not base_url.endswith('/') and not path.startswith('/'):\n   \
    \     url = base_url + '/' + path\n    else:\n        url = base_url.rstrip('/')\
    \ + path\n    urls.append(url)\n\nresult = {\n    \"urls\": urls,\n    \"total_urls\"\
    : len(urls),\n    \"source\": \"common_paths\"\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\"\
    )\n"
  depends_on:
  - parse_sitemap
  - validate_url
- id: prepare_urls
  type: conditional_router
  conditions:
  - route: use_sitemap_urls
    expression: parse_sitemap.total_urls > 0
  - route: use_discovered_urls
    expression: discover_urls.total_urls > 0
  depends_on:
  - parse_sitemap
  - discover_urls
  default_route: use_homepage_only
- id: use_sitemap_urls
  type: script
  when: prepare_urls == 'use_sitemap_urls'
  script: 'import json

    urls = ${parse_sitemap.urls}

    result = {"urls_to_analyze": urls, "source": "sitemap"}

    print(f"__OUTPUTS__ {json.dumps(result)}")

    '
  depends_on:
  - prepare_urls
- id: use_discovered_urls
  type: script
  when: prepare_urls == 'use_discovered_urls'
  script: 'import json

    urls = ${discover_urls.urls}

    result = {"urls_to_analyze": urls, "source": "discovery"}

    print(f"__OUTPUTS__ {json.dumps(result)}")

    '
  depends_on:
  - prepare_urls
- id: use_homepage_only
  type: script
  when: prepare_urls == 'use_homepage_only'
  script: 'import json

    result = {"urls_to_analyze": ["""${target_url}"""], "source": "homepage_only"}

    print(f"__OUTPUTS__ {json.dumps(result)}")

    '
  depends_on:
  - prepare_urls
- id: merge_url_sources
  type: script
  script: "import json\n\n# Get URLs from whichever source was used\nurls = []\nsource\
    \ = \"unknown\"\n\nif \"\"\"${use_sitemap_urls}\"\"\" != \"null\":\n    data =\
    \ json.loads(\"\"\"${use_sitemap_urls}\"\"\")\n    urls = data.get(\"urls_to_analyze\"\
    , [])\n    source = data.get(\"source\", \"sitemap\")\nelif \"\"\"${use_discovered_urls}\"\
    \"\" != \"null\":\n    data = json.loads(\"\"\"${use_discovered_urls}\"\"\")\n\
    \    urls = data.get(\"urls_to_analyze\", [])\n    source = data.get(\"source\"\
    , \"discovery\")\nelif \"\"\"${use_homepage_only}\"\"\" != \"null\":\n    data\
    \ = json.loads(\"\"\"${use_homepage_only}\"\"\")\n    urls = data.get(\"urls_to_analyze\"\
    , [])\n    source = data.get(\"source\", \"homepage_only\")\n\nresult = {\n  \
    \  \"urls\": urls,\n    \"count\": len(urls),\n    \"source\": source\n}\n\nprint(f\"\
    __OUTPUTS__ {json.dumps(result)}\")\n"
  depends_on:
  - use_sitemap_urls
  - use_discovered_urls
  - use_homepage_only
- id: analyze_pages
  type: loop
  task_id: analyze_single_page
  depends_on:
  - merge_url_sources
  concurrency: 3
  items_source: ${merge_url_sources.urls}
  max_iterations: 100
- id: analyze_single_page
  type: script
  script: "import json\nimport requests\nfrom bs4 import BeautifulSoup\nfrom urllib.parse\
    \ import urlparse, urljoin\nimport time\n\nurl = \"\"\"${item}\"\"\"\n\ntry:\n\
    \    start_time = time.time()\n    response = requests.get(url, timeout=15, headers={\n\
    \        'User-Agent': 'Mozilla/5.0 (compatible; SEO-Analyzer/1.0)'\n    })\n\
    \    load_time = time.time() - start_time\n    \n    soup = BeautifulSoup(response.text,\
    \ 'html.parser')\n    \n    # Extract SEO elements\n    title = soup.find('title')\n\
    \    meta_desc = soup.find('meta', attrs={'name': 'description'})\n    meta_keywords\
    \ = soup.find('meta', attrs={'name': 'keywords'})\n    canonical = soup.find('link',\
    \ attrs={'rel': 'canonical'})\n    \n    # Headers\n    h1_tags = soup.find_all('h1')\n\
    \    h2_tags = soup.find_all('h2')\n    \n    # Images\n    images = soup.find_all('img')\n\
    \    images_without_alt = [img for img in images if not img.get('alt')]\n    \n\
    \    # Links\n    links = soup.find_all('a', href=True)\n    internal_links =\
    \ []\n    external_links = []\n    \n    parsed_url = urlparse(url)\n    for link\
    \ in links:\n        href = link['href']\n        if href.startswith('http'):\n\
    \            if parsed_url.netloc in href:\n                internal_links.append(href)\n\
    \            else:\n                external_links.append(href)\n        elif\
    \ href.startswith('/'):\n            internal_links.append(urljoin(url, href))\n\
    \    \n    # Schema markup\n    schema_scripts = soup.find_all('script', type='application/ld+json')\n\
    \    \n    # Mobile viewport\n    viewport = soup.find('meta', attrs={'name':\
    \ 'viewport'})\n    \n    # Calculate issues\n    issues = []\n    if not title\
    \ or not title.text:\n        issues.append(\"Missing title tag\")\n    elif len(title.text)\
    \ > 60:\n        issues.append(\"Title tag too long (>60 chars)\")\n    \n   \
    \ if not meta_desc:\n        issues.append(\"Missing meta description\")\n   \
    \ elif meta_desc and len(meta_desc.get('content', '')) > 160:\n        issues.append(\"\
    Meta description too long (>160 chars)\")\n    \n    if len(h1_tags) == 0:\n \
    \       issues.append(\"No H1 tag found\")\n    elif len(h1_tags) > 1:\n     \
    \   issues.append(f\"Multiple H1 tags found ({len(h1_tags)})\")\n    \n    if\
    \ images_without_alt:\n        issues.append(f\"{len(images_without_alt)} images\
    \ without alt text\")\n    \n    if not viewport:\n        issues.append(\"No\
    \ mobile viewport meta tag\")\n    \n    if load_time > 3:\n        issues.append(f\"\
    Slow page load time ({load_time:.2f}s)\")\n    \n    result = {\n        \"url\"\
    : url,\n        \"status_code\": response.status_code,\n        \"load_time\"\
    : round(load_time, 2),\n        \"title\": title.text if title else None,\n  \
    \      \"title_length\": len(title.text) if title else 0,\n        \"meta_description\"\
    : meta_desc.get('content') if meta_desc else None,\n        \"meta_description_length\"\
    : len(meta_desc.get('content', '')) if meta_desc else 0,\n        \"canonical_url\"\
    : canonical.get('href') if canonical else None,\n        \"h1_count\": len(h1_tags),\n\
    \        \"h2_count\": len(h2_tags),\n        \"images_total\": len(images),\n\
    \        \"images_without_alt\": len(images_without_alt),\n        \"internal_links\"\
    : len(internal_links),\n        \"external_links\": len(external_links),\n   \
    \     \"has_schema_markup\": len(schema_scripts) > 0,\n        \"has_viewport\"\
    : viewport is not None,\n        \"issues\": issues,\n        \"content_length\"\
    : len(response.text)\n    }\n    \nexcept Exception as e:\n    result = {\n  \
    \      \"url\": url,\n        \"error\": str(e),\n        \"status_code\": 0\n\
    \    }\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\")\n"
  requirements:
  - requests==2.31.0
  - beautifulsoup4==4.12.2
  - lxml==4.9.3
- id: aggregate_results
  type: script
  script: "import json\n\npages_data = ${analyze_pages}\n\n# Aggregate metrics\ntotal_pages\
    \ = len(pages_data)\nsuccessful_pages = [p for p in pages_data if p.get('status_code')\
    \ == 200]\n\ntotal_issues = 0\nall_issues = {}\navg_load_time = 0\npages_with_schema\
    \ = 0\npages_with_viewport = 0\n\nfor page in successful_pages:\n    issues =\
    \ page.get('issues', [])\n    total_issues += len(issues)\n    \n    for issue\
    \ in issues:\n        all_issues[issue] = all_issues.get(issue, 0) + 1\n    \n\
    \    avg_load_time += page.get('load_time', 0)\n    if page.get('has_schema_markup'):\n\
    \        pages_with_schema += 1\n    if page.get('has_viewport'):\n        pages_with_viewport\
    \ += 1\n\nif successful_pages:\n    avg_load_time /= len(successful_pages)\n\n\
    # Sort issues by frequency\nsorted_issues = sorted(all_issues.items(), key=lambda\
    \ x: x[1], reverse=True)\n\nresult = {\n    \"total_pages_analyzed\": total_pages,\n\
    \    \"successful_pages\": len(successful_pages),\n    \"failed_pages\": total_pages\
    \ - len(successful_pages),\n    \"total_issues\": total_issues,\n    \"average_load_time\"\
    : round(avg_load_time, 2),\n    \"pages_with_schema\": pages_with_schema,\n  \
    \  \"pages_with_viewport\": pages_with_viewport,\n    \"mobile_ready_percentage\"\
    : round((pages_with_viewport / len(successful_pages) * 100) if successful_pages\
    \ else 0, 1),\n    \"schema_markup_percentage\": round((pages_with_schema / len(successful_pages)\
    \ * 100) if successful_pages else 0, 1),\n    \"top_issues\": sorted_issues[:10],\n\
    \    \"pages_data\": pages_data\n}\n\nprint(f\"__OUTPUTS__ {json.dumps(result)}\"\
    )\n"
  depends_on:
  - analyze_pages
- id: generate_recommendations
  type: ai_agent
  prompt: 'Analyze the following technical SEO data and provide actionable recommendations:


    Site: ${target_url}

    Analysis Type: ${analysis_depth}


    Aggregate Results:

    ${aggregate_results}


    Robots.txt Analysis:

    ${analyze_robots}


    Please provide:

    1. Executive Summary (2-3 sentences)

    2. Critical Issues (must fix immediately)

    3. High Priority Recommendations

    4. Quick Wins (easy fixes with high impact)

    5. Long-term Improvements

    6. Estimated Impact Score (1-10) for overall site health


    Format as structured JSON with these sections.

    '
  agent_type: analyst
  depends_on:
  - aggregate_results
  - analyze_robots
  model_client_id: seo_analyzer
- id: generate_report
  type: script
  script: "import json\nfrom datetime import datetime\n\n# Parse inputs\naggregate\
    \ = json.loads(\"\"\"${aggregate_results}\"\"\")\nrecommendations = json.loads(\"\
    \"\"${generate_recommendations}\"\"\")\n\nreport = {\n    \"report_metadata\"\
    : {\n        \"generated_at\": datetime.now().isoformat(),\n        \"target_url\"\
    : \"\"\"${target_url}\"\"\",\n        \"analysis_depth\": \"\"\"${analysis_depth}\"\
    \"\",\n        \"pages_analyzed\": aggregate[\"total_pages_analyzed\"]\n    },\n\
    \    \"summary_metrics\": {\n        \"overall_health_score\": recommendations.get(\"\
    estimated_impact_score\", 0),\n        \"total_issues_found\": aggregate[\"total_issues\"\
    ],\n        \"average_page_speed\": f\"{aggregate['average_load_time']}s\",\n\
    \        \"mobile_readiness\": f\"{aggregate['mobile_ready_percentage']}%\",\n\
    \        \"schema_implementation\": f\"{aggregate['schema_markup_percentage']}%\"\
    \n    },\n    \"executive_summary\": recommendations.get(\"executive_summary\"\
    , \"\"),\n    \"critical_issues\": recommendations.get(\"critical_issues\", []),\n\
    \    \"recommendations\": {\n        \"high_priority\": recommendations.get(\"\
    high_priority_recommendations\", []),\n        \"quick_wins\": recommendations.get(\"\
    quick_wins\", []),\n        \"long_term\": recommendations.get(\"long_term_improvements\"\
    , [])\n    },\n    \"detailed_findings\": {\n        \"top_issues\": aggregate[\"\
    top_issues\"],\n        \"page_analysis\": aggregate[\"pages_data\"]\n    }\n\
    }\n\nprint(f\"__OUTPUTS__ {json.dumps(report)}\")\n"
  depends_on:
  - aggregate_results
  - generate_recommendations
inputs:
- name: target_url
  type: string
  required: true
  validation:
    pattern: ^https?://[\w\-\.]+(\.[\w\-\.]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?$
  description: The target website URL to analyze
- enum:
  - basic
  - standard
  - comprehensive
  name: analysis_depth
  type: string
  default: standard
  description: 'Depth of analysis: basic, standard, comprehensive'
- name: max_pages
  type: integer
  default: 10
  validation:
    max: 100
    min: 1
  description: Maximum number of pages to analyze
- name: include_competitors
  type: boolean
  default: false
  description: Include competitor analysis
outputs:
  seo_report:
    source: generate_report
    description: Complete SEO analysis report
  recommendations:
    source: generate_recommendations
    description: AI-generated recommendations
  summary_metrics:
    source: aggregate_results
    description: High-level metrics summary
version: 1.0.0
description: Comprehensive technical SEO analysis workflow that crawls websites, analyzes
  technical factors, and provides actionable recommendations
model_clients:
  seo_analyzer:
    model: gpt-4o-mini
    api_key: ${env.OPENAI_API_KEY}
    provider: openai
    temperature: 0.3
Execution ID Status Started Duration Actions
683a9fba... COMPLETED 2025-07-07
07:57:38
N/A View