Beyond the CLI: Building an Enterprise Terraform Impact Dashboard
Stop Reading Raw JSON: Build an Enterprise Terraform Impact Dashboard
As a Lead DevOps Architect, I often deal with infrastructure plans that touch hundreds of resources.
Sifting through a standard terraform plan terminal output to find a single critical
"delete" is like looking for a needle in a haystack.
When you are managing complex infrastructure—like a technical cutover from Squid Proxy to Google Secure Web Proxy—the cognitive load is high. The risk of missing a "destroy" on a production database is a real threat to stability.
The Problem: The "Wall of Text"
Standard Terraform output is designed for logs, not for human auditing. In an enterprise environment, we face three main challenges:
- Risk Blindness: Critical resources (like RDS instances, S3 buckets, or IAM roles) look exactly like a minor tag update in the terminal.
- Scale Issues: Large plans (500+ changes) are impossible to review manually without missing something important.
- Stakeholder Gap: It is difficult to share a raw CLI output with a manager or security auditor for quick approval.
The Solution: An Automation-First Dashboard
I developed a Python-based Impact Analyser that transforms a
plan.json into a high-fidelity, interactive HTML dashboard.
This isn't just a formatter; it’s a risk-assessment engine.
1. The Workflow
The process is simple and integrates directly into any CI/CD pipeline:
-
Generate the Plan:
terraform plan -out=main.tfplan -
Convert to JSON:
terraform show -json main.tfplan > plan.json -
Run the Analyser:
python3 tf_impact.py plan.json
2. Enterprise Features
- Risk-Level Heuristics: The script automatically flags changes as CRITICAL or HIGH if stateful resources (Databases, Storage, KMS Keys) are marked for deletion or replacement.
- Client-Side Filtering: Built with Tailwind CSS and vanilla JavaScript, the dashboard allows you to search 1,000+ resources instantly by address or module path.
- Standalone Portability: The output is a single HTML file. No database or server is required, making it perfect for CI/CD artifacts.
The Code: Python Risk Logic
Here is a snippet of how the risk assessment engine identifies dangerous operations before they hit production. We can always extend the risk engine further with additional critical resource types.
class RiskAnalyzer:
"""Identifies dangerous changes based on resource sensitivity"""
CRITICAL_TYPES = {
'aws_db_instance', 'google_sql_database_instance',
'aws_s3_bucket', 'aws_iam_role', 'aws_kms_key'
}
@staticmethod
def assess(resource_type, actions):
is_delete = 'delete' in actions
is_replace = 'create' in actions and 'delete' in actions
if resource_type in RiskAnalyzer.CRITICAL_TYPES:
return "CRITICAL" if (is_delete or is_replace) else "HIGH"
if is_delete or is_replace:
return "HIGH"
return "LOW"
Copy the Script Below
#!/usr/bin/env python3
"""
Enterprise Terraform Impact Analyser (v2.0)
High-performance, filter-enabled change detector for large-scale plans.
Provides a dynamic HTML dashboard with client-side filtering.
"""
import json
import argparse
import re
import logging
import sys
from typing import Dict, List, Any, Optional
from collections import defaultdict
from datetime import datetime
from dataclasses import dataclass, asdict
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
stream=sys.stderr
)
logger = logging.getLogger("TF-Impact")
@dataclass
class ResourceChange:
address: str
type: str
name: str
module: str
provider: str
actions: List[str]
risk_level: str
change_summary: str
class FilterEngine:
"""Enterprise filtering logic for large-scale infrastructure"""
def __init__(self, include_types=None, exclude_types=None,
modules=None, providers=None, address_regex=None):
self.include_types = set(include_types) if include_types else None
self.exclude_types = set(exclude_types) if exclude_types else None
self.modules = set(modules) if modules else None
self.providers = set(providers) if providers else None
self.address_pattern = re.compile(address_regex) if address_regex else None
def should_include(self, resource_addr: str, r_type: str,
module: str, provider: str) -> bool:
if self.include_types and r_type not in self.include_types:
return False
if self.exclude_types and r_type in self.exclude_types:
return False
if self.modules and not any(module.startswith(m) for m in self.modules):
return False
if self.providers and provider not in self.providers:
return False
if self.address_pattern and not self.address_pattern.search(resource_addr):
return False
return True
class RiskAnalyzer:
"""Identifies dangerous changes based on resource sensitivity"""
CRITICAL_TYPES = {
'aws_db_instance', 'aws_rds_cluster', 'google_sql_database_instance',
'azurerm_postgresql_server', 'aws_s3_bucket', 'google_storage_bucket',
'aws_iam_role', 'aws_kms_key', 'kubernetes_namespace', 'aws_route53_zone'
}
@staticmethod
def assess(resource_type: str, actions: List[str]) -> str:
is_delete = 'delete' in actions
is_replace = 'create' in actions and 'delete' in actions
if resource_type in RiskAnalyzer.CRITICAL_TYPES:
return "CRITICAL" if (is_delete or is_replace) else "HIGH"
if is_delete or is_replace:
return "HIGH"
if 'update' in actions:
return "MEDIUM"
return "LOW"
class EnterpriseTFDetector:
def __init__(self, plan_file: str, filter_engine: FilterEngine):
self.plan_file = plan_file
self.filter_engine = filter_engine
self.risk_analyzer = RiskAnalyzer()
self.data = self._load_plan(plan_file)
def _load_plan(self, filepath: str) -> Dict:
try:
with open(filepath, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load plan file: {e}")
sys.exit(1)
def _detect_provider(self, resource_type: str) -> str:
parts = resource_type.split('_', 1)
return parts[0] if len(parts) > 1 else 'other'
def analyze(self) -> Dict[str, Any]:
logger.info(f"Analyzing {self.plan_file}...")
results = {
'metadata': {
'timestamp': datetime.now().isoformat(),
'tf_version': self.data.get('terraform_version', 'Unknown'),
'plan_id': self.data.get('format_version', 'N/A')
},
'changes': [],
'stats': defaultdict(int),
'risk_summary': defaultdict(int),
'by_module': defaultdict(lambda: defaultdict(int))
}
resource_changes = self.data.get('resource_changes', [])
for change in resource_changes:
addr = change.get('address')
r_type = change.get('type')
module = change.get('module_address', 'root')
provider = self._detect_provider(r_type)
actions = change.get('change', {}).get('actions', [])
if 'no-op' in actions or not actions or actions == ['read']:
continue
if not self.filter_engine.should_include(addr, r_type, module, provider):
continue
risk = self.risk_analyzer.assess(r_type, actions)
primary_action = "replace" if (
'create' in actions and 'delete' in actions
) else actions[0]
res_obj = ResourceChange(
address=addr,
type=r_type,
name=change.get('name'),
module=module,
provider=provider,
actions=actions,
risk_level=risk,
change_summary=" -> ".join(actions)
)
results['changes'].append(asdict(res_obj))
results['stats'][primary_action] += 1
results['risk_summary'][risk] += 1
results['by_module'][module][primary_action] += 1
return results
Terraform Dashboard Preview
Final Thoughts
Terraform is powerful, but raw execution plans alone are not sufficient for enterprise-scale governance. As infrastructure estates grow larger and more distributed, engineers need visibility, prioritization, and fast risk assessment—not just terminal output.
By converting Terraform plans into a searchable, risk-aware dashboard, teams can:
- Reduce production deployment risk
- Improve audit and approval workflows
- Accelerate CI/CD validation processes
- Provide clearer infrastructure insights to stakeholders
This approach transforms Terraform plans from static logs into actionable operational intelligence.

Comments
Post a Comment