SEO Technical Audit Workflow

Comprehensive technical SEO audit for websites including crawling, analysis, and reporting

Back
Workflow Information

ID: seo_technical_audit_v1

Namespace: default

Version: 1.0.1

Created: 2025-07-08

Updated: 2025-07-08

Tasks: 13

Quick Actions
Manage Secrets
Inputs
Name Type Required Default
target_url string Required None
crawl_depth integer Optional 3
max_pages integer Optional 100
audit_type string Optional comprehensive
Outputs
Name Type Source
ai_insights string AI-generated SEO insights
audit_report string Complete SEO technical audit report
recommendations string Prioritized optimization recommendations
technical_summary string Technical issues summary
Tasks
validate_url
script

No description

check_robots_txt
http

No description

analyze_robots
script

No description

crawl_website
mcp

No description

process_crawl_data
script

No description

technical_seo_analysis
script

No description

get_serp_data
mcp

No description

get_domain_metrics
mcp

No description

get_backlinks
mcp

No description

ai_seo_analysis
ai_agent

No description

generate_recommendations
ai_agent

No description

compile_report
script

No description

store_audit_results
storage

No description

YAML Source
id: seo_technical_audit_v1
name: SEO Technical Audit Workflow
tasks:
- id: validate_url
  type: script
  script: "import json\nimport urllib.parse\nfrom urllib.parse import urlparse\n\n\
    target_url = \"${target_url}\"\n\n# Parse and validate URL\ntry:\n    parsed =\
    \ urlparse(target_url)\n    if not parsed.scheme or not parsed.netloc:\n     \
    \   raise ValueError(\"Invalid URL format\")\n    \n    domain = parsed.netloc\n\
    \    base_url = f\"{parsed.scheme}://{parsed.netloc}\"\n    \n    output = {\n\
    \        \"url\": target_url,\n        \"domain\": domain,\n        \"base_url\"\
    : base_url,\n        \"robots_txt_url\": f\"{base_url}/robots.txt\",\n       \
    \ \"sitemap_url\": f\"{base_url}/sitemap.xml\",\n        \"validation_status\"\
    : \"valid\"\n    }\nexcept Exception as e:\n    output = {\n        \"validation_status\"\
    : \"invalid\",\n        \"error\": str(e)\n    }\n\nprint(f\"__OUTPUTS__ {json.dumps(output)}\"\
    )\n"
  timeout_seconds: 30
- id: check_robots_txt
  type: http
  when: validation_status == 'valid'
  request:
    url: ${validate_url.robots_txt_url}
    method: GET
  depends_on:
  - validate_url
  retry_policy:
    max_attempts: 2
    initial_interval: 1
  timeout_seconds: 30
  continue_on_error: true
- id: analyze_robots
  type: script
  script: "import json\nimport os\n\nrobots_data = os.environ.get('check_robots_txt.response',\
    \ '{}')\nstatus_code = os.environ.get('check_robots_txt.status_code', '404')\n\
    \nanalysis = {\n    \"has_robots_txt\": status_code == \"200\",\n    \"allows_crawling\"\
    : True,\n    \"disallowed_paths\": [],\n    \"crawl_delay\": 0,\n    \"sitemaps\"\
    : []\n}\n\nif status_code == \"200\":\n    try:\n        response = json.loads(robots_data)\n\
    \        content = response.get('body', '')\n        \n        # Parse robots.txt\
    \ content\n        for line in content.split('\\n'):\n            line = line.strip().lower()\n\
    \            if line.startswith('disallow:'):\n                path = line.replace('disallow:',\
    \ '').strip()\n                if path and path != '/':\n                    analysis['disallowed_paths'].append(path)\n\
    \            elif line.startswith('crawl-delay:'):\n                try:\n   \
    \                 analysis['crawl_delay'] = int(line.replace('crawl-delay:', '').strip())\n\
    \                except:\n                    pass\n            elif line.startswith('sitemap:'):\n\
    \                sitemap = line.replace('sitemap:', '').strip()\n            \
    \    analysis['sitemaps'].append(sitemap)\n    except:\n        pass\n\nprint(f\"\
    __OUTPUTS__ {json.dumps(analysis)}\")\n"
  depends_on:
  - check_robots_txt
  timeout_seconds: 30
- id: crawl_website
  type: mcp
  tool_name: crawl
  depends_on:
  - analyze_robots
  retry_policy:
    max_attempts: 2
    initial_interval: 5
  deployment_id: pod-9qgkf5zz
  tool_arguments:
    url: ${target_url}
    limit: ${max_pages}
    max_depth: ${crawl_depth}
    include_raw_html: false
    only_main_content: true
  timeout_seconds: 300
- id: process_crawl_data
  type: script
  script: "import json\nimport os\nfrom urllib.parse import urlparse\n\n# Try different\
    \ access patterns for crawl data\ncrawl_data = None\nfor var_name in ['crawl_website.result',\
    \ 'crawl_website.output', 'crawl_website']:\n    var_value = os.environ.get(var_name,\
    \ '')\n    if var_value and var_value not in ['{}', '[]']:\n        try:\n   \
    \         crawl_data = json.loads(var_value)\n            break\n        except:\n\
    \            continue\n\nif not crawl_data:\n    crawl_data = {\"data\": []}\n\
    \npages_data = crawl_data.get('data', [])\n\n# Process each page\nprocessed_pages\
    \ = []\nfor page in pages_data[:50]:  # Limit to 50 pages for processing\n   \
    \ page_info = {\n        \"url\": page.get('url', ''),\n        \"title\": page.get('metadata',\
    \ {}).get('title', ''),\n        \"description\": page.get('metadata', {}).get('description',\
    \ ''),\n        \"status_code\": page.get('metadata', {}).get('statusCode', 200),\n\
    \        \"content_length\": len(page.get('content', '')),\n        \"has_title\"\
    : bool(page.get('metadata', {}).get('title')),\n        \"has_description\": bool(page.get('metadata',\
    \ {}).get('description')),\n        \"word_count\": len(page.get('content', '').split()),\n\
    \        \"headers\": page.get('metadata', {}).get('headers', {})\n    }\n   \
    \ processed_pages.append(page_info)\n\nsummary = {\n    \"total_pages_crawled\"\
    : len(pages_data),\n    \"pages_analyzed\": len(processed_pages),\n    \"pages_data\"\
    : processed_pages,\n    \"crawl_status\": \"success\" if pages_data else \"no_data\"\
    \n}\n\nprint(f\"__OUTPUTS__ {json.dumps(summary)}\")\n"
  depends_on:
  - crawl_website
  requirements:
  - beautifulsoup4==4.12.2
  - lxml==4.9.3
  timeout_seconds: 120
- id: technical_seo_analysis
  type: script
  script: "import json\nimport os\nfrom collections import Counter\n\n# Get processed\
    \ data\nprocess_data = json.loads(os.environ.get('process_crawl_data', '{}'))\n\
    pages_data = process_data.get('pages_data', [])\n\n# Initialize analysis results\n\
    analysis = {\n    \"meta_tags\": {\n        \"missing_titles\": [],\n        \"\
    duplicate_titles\": [],\n        \"missing_descriptions\": [],\n        \"duplicate_descriptions\"\
    : [],\n        \"title_length_issues\": []\n    },\n    \"status_codes\": {\n\
    \        \"200\": 0,\n        \"301\": 0,\n        \"302\": 0,\n        \"404\"\
    : 0,\n        \"500\": 0,\n        \"other\": 0\n    },\n    \"content_analysis\"\
    : {\n        \"thin_content_pages\": [],\n        \"average_word_count\": 0,\n\
    \        \"pages_under_300_words\": 0\n    },\n    \"url_structure\": {\n    \
    \    \"deep_urls\": [],\n        \"non_friendly_urls\": []\n    },\n    \"performance_indicators\"\
    : {\n        \"large_pages\": []\n    }\n}\n\n# Analyze pages\ntitles = []\ndescriptions\
    \ = []\nword_counts = []\n\nfor page in pages_data:\n    url = page['url']\n \
    \   \n    # Title analysis\n    if not page['has_title']:\n        analysis['meta_tags']['missing_titles'].append(url)\n\
    \    else:\n        title = page['title']\n        titles.append(title)\n    \
    \    if len(title) < 30 or len(title) > 60:\n            analysis['meta_tags']['title_length_issues'].append({\n\
    \                \"url\": url,\n                \"title\": title,\n          \
    \      \"length\": len(title)\n            })\n    \n    # Description analysis\n\
    \    if not page['has_description']:\n        analysis['meta_tags']['missing_descriptions'].append(url)\n\
    \    else:\n        descriptions.append(page['description'])\n    \n    # Status\
    \ code analysis\n    status = str(page.get('status_code', 200))\n    if status\
    \ in analysis['status_codes']:\n        analysis['status_codes'][status] += 1\n\
    \    else:\n        analysis['status_codes']['other'] += 1\n    \n    # Content\
    \ analysis\n    word_count = page['word_count']\n    word_counts.append(word_count)\n\
    \    if word_count < 300:\n        analysis['content_analysis']['thin_content_pages'].append({\n\
    \            \"url\": url,\n            \"word_count\": word_count\n        })\n\
    \        analysis['content_analysis']['pages_under_300_words'] += 1\n    \n  \
    \  # URL structure\n    url_depth = url.count('/') - 2\n    if url_depth > 3:\n\
    \        analysis['url_structure']['deep_urls'].append({\n            \"url\"\
    : url,\n            \"depth\": url_depth\n        })\n    \n    # Performance\n\
    \    if page['content_length'] > 100000:\n        analysis['performance_indicators']['large_pages'].append({\n\
    \            \"url\": url,\n            \"size_bytes\": page['content_length']\n\
    \        })\n\n# Find duplicates\ntitle_counts = Counter(titles)\ndesc_counts\
    \ = Counter(descriptions)\n\nfor title, count in title_counts.items():\n    if\
    \ count > 1:\n        analysis['meta_tags']['duplicate_titles'].append({\n   \
    \         \"title\": title,\n            \"count\": count\n        })\n\nfor desc,\
    \ count in desc_counts.items():\n    if count > 1:\n        analysis['meta_tags']['duplicate_descriptions'].append({\n\
    \            \"description\": desc[:100] + \"...\",\n            \"count\": count\n\
    \        })\n\n# Calculate averages\nif word_counts:\n    analysis['content_analysis']['average_word_count']\
    \ = sum(word_counts) // len(word_counts)\n\nprint(f\"__OUTPUTS__ {json.dumps(analysis)}\"\
    )\n"
  depends_on:
  - process_crawl_data
  timeout_seconds: 120
- id: get_serp_data
  type: mcp
  tool_name: mcp__dataforseo__serp_organic_live_advanced
  depends_on:
  - validate_url
  deployment_id: pod-ow0pvk2h
  tool_arguments:
    depth: 10
    keyword: ${validate_url.domain}
    language_code: en
    location_name: United States
    search_engine: google
  timeout_seconds: 60
  continue_on_error: true
- id: get_domain_metrics
  type: mcp
  tool_name: mcp__dataforseo__dataforseo_labs_google_domain_rank_overview
  depends_on:
  - validate_url
  deployment_id: pod-ow0pvk2h
  tool_arguments:
    target: ${validate_url.domain}
    language_code: en
    location_name: United States
  timeout_seconds: 60
  continue_on_error: true
- id: get_backlinks
  type: mcp
  tool_name: mcp__dataforseo__backlinks_summary
  depends_on:
  - validate_url
  deployment_id: pod-ow0pvk2h
  tool_arguments:
    target: ${validate_url.domain}
  timeout_seconds: 60
  continue_on_error: true
- id: ai_seo_analysis
  type: ai_agent
  prompt: 'Analyze the SEO technical audit results and provide comprehensive insights:


    Technical Analysis: ${technical_seo_analysis}

    SERP Data: ${get_serp_data}

    Domain Metrics: ${get_domain_metrics}


    Provide:

    1. Critical SEO issues that need immediate attention

    2. Medium priority improvements

    3. Quick wins for SEO improvement

    4. Technical debt assessment

    5. Competitive positioning analysis


    Format as structured JSON with actionable recommendations.

    '
  agent_type: analyst
  depends_on:
  - technical_seo_analysis
  - get_serp_data
  - get_domain_metrics
  model_client_id: seo_analyzer
- id: generate_recommendations
  type: ai_agent
  prompt: 'Based on the SEO analysis, generate specific optimization recommendations:


    Analysis Results: ${ai_seo_analysis}

    Technical Issues: ${technical_seo_analysis}


    Create a prioritized action plan with:

    1. Immediate fixes (0-7 days)

    2. Short-term improvements (1-4 weeks)

    3. Long-term strategy (1-3 months)


    Include specific implementation steps for each recommendation.

    Focus on ROI and impact on search rankings.

    '
  agent_type: optimizer
  depends_on:
  - ai_seo_analysis
  - technical_seo_analysis
  model_client_id: content_optimizer
- id: compile_report
  type: script
  script: "import json\nimport os\nfrom datetime import datetime\n\n# Gather all analysis\
    \ data\ntechnical_analysis = json.loads(os.environ.get('technical_seo_analysis',\
    \ '{}'))\nai_analysis = os.environ.get('ai_seo_analysis', '{}')\nrecommendations\
    \ = os.environ.get('generate_recommendations', '{}')\nbacklinks = os.environ.get('get_backlinks',\
    \ '{}')\n\n# Parse backlink data if available\nbacklink_summary = {}\ntry:\n \
    \   backlink_data = json.loads(backlinks)\n    if isinstance(backlink_data, dict):\n\
    \        backlink_summary = {\n            \"total_backlinks\": backlink_data.get('backlinks',\
    \ 0),\n            \"referring_domains\": backlink_data.get('referring_domains',\
    \ 0),\n            \"domain_rank\": backlink_data.get('rank', 0)\n        }\n\
    except:\n    backlink_summary = {\"status\": \"unavailable\"}\n\n# Compile comprehensive\
    \ report\nreport = {\n    \"audit_date\": datetime.utcnow().isoformat(),\n   \
    \ \"target_url\": \"${target_url}\",\n    \"audit_type\": \"${audit_type}\",\n\
    \    \"executive_summary\": {\n        \"total_pages_analyzed\": technical_analysis.get('content_analysis',\
    \ {}).get('pages_under_300_words', 0),\n        \"critical_issues_found\": len(technical_analysis.get('meta_tags',\
    \ {}).get('missing_titles', [])) + \n                               len(technical_analysis.get('meta_tags',\
    \ {}).get('missing_descriptions', [])),\n        \"overall_health_score\": \"\
    calculated_below\"\n    },\n    \"technical_issues\": technical_analysis,\n  \
    \  \"backlink_profile\": backlink_summary,\n    \"ai_insights\": ai_analysis,\n\
    \    \"recommendations\": recommendations,\n    \"metrics_summary\": {\n     \
    \   \"meta_tag_issues\": {\n            \"missing_titles\": len(technical_analysis.get('meta_tags',\
    \ {}).get('missing_titles', [])),\n            \"missing_descriptions\": len(technical_analysis.get('meta_tags',\
    \ {}).get('missing_descriptions', [])),\n            \"duplicate_titles\": len(technical_analysis.get('meta_tags',\
    \ {}).get('duplicate_titles', [])),\n            \"title_length_issues\": len(technical_analysis.get('meta_tags',\
    \ {}).get('title_length_issues', []))\n        },\n        \"content_issues\"\
    : {\n            \"thin_content_pages\": technical_analysis.get('content_analysis',\
    \ {}).get('pages_under_300_words', 0),\n            \"average_word_count\": technical_analysis.get('content_analysis',\
    \ {}).get('average_word_count', 0)\n        },\n        \"technical_issues\":\
    \ {\n            \"404_errors\": technical_analysis.get('status_codes', {}).get('404',\
    \ 0),\n            \"redirect_chains\": technical_analysis.get('status_codes',\
    \ {}).get('301', 0) + \n                             technical_analysis.get('status_codes',\
    \ {}).get('302', 0),\n            \"deep_urls\": len(technical_analysis.get('url_structure',\
    \ {}).get('deep_urls', []))\n        }\n    }\n}\n\n# Calculate health score\n\
    total_issues = sum([\n    report['metrics_summary']['meta_tag_issues']['missing_titles'],\n\
    \    report['metrics_summary']['meta_tag_issues']['missing_descriptions'],\n \
    \   report['metrics_summary']['content_issues']['thin_content_pages'],\n    report['metrics_summary']['technical_issues']['404_errors']\n\
    ])\n\nif total_issues == 0:\n    health_score = \"Excellent (90-100)\"\nelif total_issues\
    \ < 5:\n    health_score = \"Good (70-89)\"\nelif total_issues < 15:\n    health_score\
    \ = \"Fair (50-69)\"\nelse:\n    health_score = \"Poor (0-49)\"\n\nreport['executive_summary']['overall_health_score']\
    \ = health_score\n\nprint(f\"__OUTPUTS__ {json.dumps(report)}\")\n"
  depends_on:
  - ai_seo_analysis
  - generate_recommendations
  - technical_seo_analysis
  - get_backlinks
  timeout_seconds: 60
- id: store_audit_results
  data:
    audit_date: ${execution.started_at}
    audit_type: ${audit_type}
    target_url: ${target_url}
    report_data: ${compile_report}
    workflow_id: ${workflow.id}
    execution_id: ${execution.id}
    health_score: ${compile_report.executive_summary.overall_health_score}
  type: storage
  table: seo_audit_results
  operation: insert
  depends_on:
  - compile_report
  continue_on_error: true
inputs:
- name: target_url
  type: string
  required: true
  validation:
    pattern: ^https?://.*
  description: The URL of the website to audit
- name: crawl_depth
  type: integer
  default: 3
  validation:
    max: 10
    min: 1
  description: Maximum crawl depth
- name: max_pages
  type: integer
  default: 100
  validation:
    max: 1000
    min: 10
  description: Maximum number of pages to crawl
- enum:
  - basic
  - comprehensive
  - focused
  name: audit_type
  type: string
  default: comprehensive
  description: Type of audit to perform
outputs:
  ai_insights:
    source: ai_seo_analysis
    description: AI-generated SEO insights
  audit_report:
    source: compile_report
    description: Complete SEO technical audit report
  recommendations:
    source: generate_recommendations
    description: Prioritized optimization recommendations
  technical_summary:
    source: technical_seo_analysis
    description: Technical issues summary
version: 1.0.1
description: Comprehensive technical SEO audit for websites including crawling, analysis,
  and reporting
model_clients:
- id: seo_analyzer
  config:
    model: gpt-4o-mini
    api_key: ${env.OPENAI_API_KEY}
    max_tokens: 2000
    temperature: 0.3
  provider: openai
- id: content_optimizer
  config:
    model: gpt-4o-mini
    api_key: ${env.OPENAI_API_KEY}
    max_tokens: 1500
    temperature: 0.5
  provider: openai
Execution ID Status Started Duration Actions
2ea14260... COMPLETED 2025-07-08
05:49:01
N/A View
6f2c2fe5... COMPLETED 2025-07-08
05:47:14
N/A View