Initial documentation structure
This commit is contained in:
56
scripts/orchestrator/core/config.yaml.template
Normal file
56
scripts/orchestrator/core/config.yaml.template
Normal file
@@ -0,0 +1,56 @@
|
||||
# Network AI Orchestrator Configuration
|
||||
elasticsearch:
|
||||
host: "192.168.100.85:9200"
|
||||
index: "netflow-*"
|
||||
verify_certs: false
|
||||
timeout: 30
|
||||
|
||||
analysis:
|
||||
interval_minutes: 60
|
||||
window_hours: 168
|
||||
min_traffic_bytes: 1000000
|
||||
|
||||
pr_creation:
|
||||
enabled: true
|
||||
frequency: "smart" # Options: weekly, daily, manual, smart
|
||||
triggers:
|
||||
- high_traffic anomaly #Create PR if traffic spike
|
||||
- security_event #Create PR if security issue
|
||||
- scheduled: "weekly"
|
||||
thresholds:
|
||||
traffic_spike: 200 #200% increase triggers PR
|
||||
new_hosts: 10 #10+ new IPs triggers PR
|
||||
day_of_week: "saturday" # 0=Monday, 6=Sunday
|
||||
hour_of_day: 22 # 24-hour format (9 = 9 AM)
|
||||
skip_if_pending: true # Don't create if PR already open
|
||||
min_days_between: 7 # Minimum days between PRs
|
||||
|
||||
gitea:
|
||||
url: "https://git.salmutt.dev"
|
||||
repo: "sal/srx-config"
|
||||
token: "${GITEA_TOKEN}" # Use actual token
|
||||
branch: "main"
|
||||
labels: ["ai-generated", "auto-config", "pending-review"]
|
||||
|
||||
srx:
|
||||
host: "192.168.100.1"
|
||||
port: 830
|
||||
username: "netops"
|
||||
ssh_key: "/home/netops/.ssh/srx_key"
|
||||
|
||||
shared_storage:
|
||||
path: "/shared/ai-gitops"
|
||||
|
||||
state_tracking:
|
||||
enabled: true
|
||||
state_file: '/shared/ai-gitops/state/orchestrator_state.json'
|
||||
track_pr_history: true
|
||||
|
||||
ai:
|
||||
request_timeout: 120
|
||||
max_retries: 3
|
||||
|
||||
logging:
|
||||
level: "INFO"
|
||||
max_file_size: "100MB"
|
||||
retention_days: 30
|
||||
771
scripts/orchestrator/core/orchestrator_main.py
Normal file
771
scripts/orchestrator/core/orchestrator_main.py
Normal file
@@ -0,0 +1,771 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced Network AI Orchestrator - Production Version with Gitea Integration
|
||||
Compatible with Elasticsearch 7.x
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import signal
|
||||
import hashlib
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
import yaml
|
||||
import uuid
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from elasticsearch import Elasticsearch # Using sync version for ES 7.x
|
||||
from git import Repo
|
||||
import requests
|
||||
|
||||
# Load environment variables from home directory .env
|
||||
from dotenv import load_dotenv
|
||||
env_path = Path.home() / '.env' # This gets /home/netops/.env
|
||||
load_dotenv(env_path)
|
||||
|
||||
# ADD THIS IMPORT - New for Phase 3
|
||||
from gitea_integration import GiteaIntegration
|
||||
|
||||
# Configure production logging
|
||||
def setup_logging():
|
||||
"""Configure comprehensive logging for production"""
|
||||
log_dir = Path("/home/netops/orchestrator/logs")
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
|
||||
log_file = log_dir / f"orchestrator_{datetime.now().strftime('%Y%m%d')}.log"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler(log_file),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
return logging.getLogger(__name__)
|
||||
|
||||
logger = setup_logging()
|
||||
|
||||
class NetworkOrchestrator:
|
||||
def __init__(self, config_path: str = "/home/netops/orchestrator/config.yaml"):
|
||||
"""Initialize the orchestrator with configuration"""
|
||||
self.config = self.load_config(config_path)
|
||||
self.es_client = None
|
||||
self.git_repo = None
|
||||
self.running = True
|
||||
self.shared_dir = Path("/shared/ai-gitops")
|
||||
self.request_dir = self.shared_dir / "requests"
|
||||
self.response_dir = self.shared_dir / "responses"
|
||||
|
||||
# ADD THIS - Initialize state for Phase 3
|
||||
self.state = self.load_state()
|
||||
|
||||
# Ensure directories exist
|
||||
self.request_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.response_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info("Orchestrator initialized")
|
||||
|
||||
def should_create_pr(self):
|
||||
"""Check if we should create a PR based on schedule and state"""
|
||||
if not self.config.get('pr_creation', {}).get('enabled', True):
|
||||
return False
|
||||
|
||||
# Load state
|
||||
state = self.load_state()
|
||||
|
||||
# Check if pending PR exists
|
||||
if self.config['pr_creation'].get('skip_if_pending', True):
|
||||
if state.get('pending_pr', False):
|
||||
logger.info("Skipping PR - existing PR is pending")
|
||||
return False
|
||||
|
||||
# Check frequency
|
||||
frequency = self.config['pr_creation'].get('frequency', 'weekly')
|
||||
|
||||
if frequency == 'weekly':
|
||||
# Check if it's the right day and hour
|
||||
now = datetime.now()
|
||||
target_day = self.config['pr_creation'].get('day_of_week', 'saturday')
|
||||
target_hour = self.config['pr_creation'].get('hour_of_day', 5)
|
||||
|
||||
# Convert day name to number
|
||||
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
|
||||
target_day_num = days.index(target_day.lower())
|
||||
|
||||
# Check if it's the right day and hour
|
||||
if now.weekday() != target_day_num or now.hour != target_hour:
|
||||
return False
|
||||
|
||||
# Check minimum days between PRs
|
||||
if state.get('last_pr_created'):
|
||||
last_pr_date = datetime.fromisoformat(state['last_pr_created'])
|
||||
days_since = (datetime.now() - last_pr_date).days
|
||||
min_days = self.config['pr_creation'].get('min_days_between', 7)
|
||||
|
||||
if days_since < min_days:
|
||||
logger.info(f"Skipping PR - only {days_since} days since last PR")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def load_state(self):
|
||||
"""Load orchestrator state"""
|
||||
state_file = self.config.get('state_tracking', {}).get('state_file', '/var/lib/orchestrator/state.json')
|
||||
if Path(state_file).exists():
|
||||
with open(state_file, 'r') as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
def check_smart_triggers(self, traffic_data: Dict) -> bool:
|
||||
"""Check if any smart triggers are met"""
|
||||
if self.config['pr_creation'].get('frequency') != 'smart':
|
||||
return False
|
||||
|
||||
triggers = self.config['pr_creation'].get('triggers', [])
|
||||
thresholds = self.config['pr_creation'].get('thresholds', {})
|
||||
|
||||
# Load previous state for comparison
|
||||
state = self.load_state()
|
||||
last_data = state.get('last_traffic_data', {})
|
||||
|
||||
# Check traffic spike
|
||||
if 'high_traffic_anomaly' in str(triggers):
|
||||
current_flows = sum(
|
||||
b['doc_count']
|
||||
for b in traffic_data.get('top_talkers', {}).get('buckets', [])
|
||||
)
|
||||
last_flows = last_data.get('total_flows', current_flows)
|
||||
|
||||
if last_flows > 0:
|
||||
spike_percent = ((current_flows - last_flows) / last_flows) * 100
|
||||
if spike_percent >= thresholds.get('traffic_spike', 200):
|
||||
logger.info(f"🚨 Traffic spike detected: {spike_percent:.1f}% increase!")
|
||||
return True
|
||||
|
||||
# Check new hosts
|
||||
if 'security_event' in str(triggers):
|
||||
current_ips = set(
|
||||
b['key']
|
||||
for b in traffic_data.get('top_talkers', {}).get('buckets', [])
|
||||
)
|
||||
last_ips = set(last_data.get('top_ips', []))
|
||||
|
||||
new_hosts = current_ips - last_ips
|
||||
if len(new_hosts) >= thresholds.get('new_hosts', 10):
|
||||
logger.info(f"🚨 Security event: {len(new_hosts)} new hosts detected!")
|
||||
logger.info(f" New IPs: {', '.join(list(new_hosts)[:5])}...")
|
||||
return True
|
||||
|
||||
# Check weekly schedule fallback
|
||||
if any('scheduled' in str(t) for t in triggers):
|
||||
if self.should_create_pr(): # Use existing weekly logic
|
||||
logger.info("📅 Weekly schedule triggered")
|
||||
return True
|
||||
|
||||
# Save current data for next comparison
|
||||
self.save_state({
|
||||
'last_traffic_data': {
|
||||
'total_flows': sum(
|
||||
b['doc_count']
|
||||
for b in traffic_data.get('top_talkers', {}).get('buckets', [])
|
||||
),
|
||||
'top_ips': [
|
||||
b['key']
|
||||
for b in traffic_data.get('top_talkers', {}).get('buckets', [])
|
||||
]
|
||||
}
|
||||
})
|
||||
|
||||
return False
|
||||
|
||||
def save_state(self, updates):
|
||||
"""Save orchestrator state"""
|
||||
state_file = self.config.get('state_tracking', {}).get('state_file', '/var/lib/orchestrator/state.json')
|
||||
state = self.load_state()
|
||||
state.update(updates)
|
||||
state['last_updated'] = datetime.now().isoformat()
|
||||
|
||||
with open(state_file, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
def load_config(self, config_path: str) -> Dict:
|
||||
"""Load configuration from YAML file"""
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
logger.info(f"Configuration loaded from {config_path}")
|
||||
|
||||
# Replace environment variables
|
||||
if 'gitea' in config and 'token' in config['gitea']:
|
||||
if config['gitea']['token'] == '${GITEA_TOKEN}':
|
||||
config['gitea']['token'] = os.environ.get('GITEA_TOKEN', '')
|
||||
|
||||
return config
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load config: {e}")
|
||||
# Use defaults if config fails
|
||||
return {
|
||||
'elasticsearch': {
|
||||
'host': '192.168.100.85:9200',
|
||||
'index': 'netflow-*'
|
||||
},
|
||||
'analysis': {
|
||||
'interval_minutes': 60,
|
||||
'window_hours': 24
|
||||
},
|
||||
'gitea': {
|
||||
'url': 'https://git.salmutt.dev',
|
||||
'repo': 'sal/srx-config',
|
||||
'token': os.environ.get('GITEA_TOKEN', '')
|
||||
}
|
||||
}
|
||||
|
||||
def setup_elasticsearch(self):
|
||||
"""Initialize Elasticsearch connection (synchronous for ES 7.x)"""
|
||||
try:
|
||||
es_config = self.config['elasticsearch']
|
||||
self.es_client = Elasticsearch(
|
||||
hosts=[es_config['host']],
|
||||
verify_certs=False,
|
||||
timeout=30
|
||||
)
|
||||
# Test connection
|
||||
info = self.es_client.info()
|
||||
logger.info(f"Connected to Elasticsearch: {info['version']['number']}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Elasticsearch: {e}")
|
||||
self.es_client = None
|
||||
return False
|
||||
|
||||
def collect_traffic_data(self) -> Dict:
|
||||
"""Collect traffic data from Elasticsearch (synchronous)"""
|
||||
if not self.es_client:
|
||||
logger.warning("Elasticsearch not connected, using mock data")
|
||||
return self.generate_mock_data()
|
||||
|
||||
try:
|
||||
window_hours = self.config['analysis']['window_hours']
|
||||
query = {
|
||||
"query": {
|
||||
"range": {
|
||||
"@timestamp": {
|
||||
"gte": f"now-{window_hours}h",
|
||||
"lte": "now"
|
||||
}
|
||||
}
|
||||
},
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"top_talkers": {
|
||||
"terms": {
|
||||
"field": "netflow.ipv4_src_addr",
|
||||
"size": 20
|
||||
},
|
||||
"aggs": {
|
||||
"bytes": {"sum": {"field": "netflow.in_bytes"}},
|
||||
"packets": {"sum": {"field": "netflow.in_pkts"}}
|
||||
}
|
||||
},
|
||||
"protocols": {
|
||||
"terms": {
|
||||
"field": "netflow.protocol",
|
||||
"size": 10
|
||||
}
|
||||
},
|
||||
"top_destinations": {
|
||||
"terms": {
|
||||
"field": "netflow.ipv4_dst_addr", # NEW
|
||||
"size": 20
|
||||
}
|
||||
},
|
||||
"vlans": {
|
||||
"terms": {
|
||||
"field": "vlan_id",
|
||||
"size": 20
|
||||
},
|
||||
"aggs": {
|
||||
"bytes": {"sum": {"field": "bytes"}}
|
||||
}
|
||||
},
|
||||
"hourly_traffic": {
|
||||
"date_histogram": {
|
||||
"field": "@timestamp",
|
||||
"calendar_interval": "hour"
|
||||
},
|
||||
"aggs": {
|
||||
"bytes": {"sum": {"field": "bytes"}}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = self.es_client.search(
|
||||
index=self.config['elasticsearch']['index'],
|
||||
body=query
|
||||
)
|
||||
|
||||
total_hits = result['hits']['total']
|
||||
# Handle both ES 6.x and 7.x response formats
|
||||
if isinstance(total_hits, dict):
|
||||
total_count = total_hits['value']
|
||||
else:
|
||||
total_count = total_hits
|
||||
|
||||
logger.info(f"Collected traffic data: {total_count} flows")
|
||||
return result['aggregations']
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error collecting traffic data: {e}")
|
||||
return self.generate_mock_data()
|
||||
|
||||
def generate_mock_data(self) -> Dict:
|
||||
"""Generate mock traffic data for testing"""
|
||||
return {
|
||||
"top_talkers": {
|
||||
"buckets": [
|
||||
{"key": "192.168.100.50", "doc_count": 15000,
|
||||
"bytes": {"value": 5000000}, "packets": {"value": 10000}},
|
||||
{"key": "192.168.100.51", "doc_count": 12000,
|
||||
"bytes": {"value": 4000000}, "packets": {"value": 8000}},
|
||||
{"key": "192.168.100.11", "doc_count": 8000,
|
||||
"bytes": {"value": 2000000}, "packets": {"value": 5000}},
|
||||
{"key": "10.0.0.5", "doc_count": 6000,
|
||||
"bytes": {"value": 1500000}, "packets": {"value": 3000}}
|
||||
]
|
||||
},
|
||||
"protocols": {
|
||||
"buckets": [
|
||||
{"key": "tcp", "doc_count": 25000},
|
||||
{"key": "udp", "doc_count": 15000},
|
||||
{"key": "icmp", "doc_count": 2000}
|
||||
]
|
||||
},
|
||||
"vlans": {
|
||||
"buckets": [
|
||||
{"key": 100, "doc_count": 20000, "bytes": {"value": 8000000}},
|
||||
{"key": 200, "doc_count": 15000, "bytes": {"value": 6000000}},
|
||||
{"key": 300, "doc_count": 5000, "bytes": {"value": 2000000}}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# def request_ai_analysis(self, traffic_data: Dict) -> Optional[Dict]:
|
||||
# """Send traffic data to AI for analysis"""
|
||||
# request_id = str(uuid.uuid4())
|
||||
# request_file = self.request_dir / f"{request_id}.json"
|
||||
#
|
||||
# request_data = {
|
||||
# "request_id": request_id,
|
||||
# "timestamp": datetime.now().isoformat(),
|
||||
# "type": "traffic_analysis",
|
||||
# "data": traffic_data,
|
||||
# "prompt": self.build_analysis_prompt(traffic_data)
|
||||
# }
|
||||
# try:
|
||||
# with open(request_file, 'w') as f:
|
||||
# json.dump(request_data, f, indent=2)
|
||||
# logger.info(f"AI request created: {request_id}")
|
||||
#
|
||||
# # Wait for response (with timeout)
|
||||
# response = self.wait_for_ai_response(request_id, timeout=600)
|
||||
# return response
|
||||
#
|
||||
# except Exception as e:
|
||||
# logger.error(f"Error requesting AI analysis: {e}")
|
||||
# return None
|
||||
def request_ai_analysis(self, traffic_data: Dict) -> Optional[Dict]:
|
||||
"""Send traffic data to AI for analysis"""
|
||||
request_id = str(uuid.uuid4())
|
||||
request_file = self.request_dir / f"{request_id}.json"
|
||||
|
||||
request_data = {
|
||||
"request_id": request_id,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"type": "traffic_analysis",
|
||||
"data": traffic_data,
|
||||
"prompt": self.build_analysis_prompt(traffic_data)
|
||||
}
|
||||
|
||||
try:
|
||||
with open(request_file, 'w') as f:
|
||||
json.dump(request_data, f, indent=2)
|
||||
logger.info(f"AI request created: {request_id}")
|
||||
|
||||
# Wait for response (with timeout)
|
||||
response = self.wait_for_ai_response(request_id, timeout=600)
|
||||
|
||||
# Convert to pipeline format for PR creation
|
||||
if response and 'response' in response:
|
||||
# Create pipeline-compatible format
|
||||
pipeline_response = {
|
||||
"suggestions": [],
|
||||
"focus_area": "security",
|
||||
"feedback_aware": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Extract SRX config lines from AI response
|
||||
ai_text = response.get('response', '')
|
||||
lines = ai_text.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
# Capture any line that looks like SRX config
|
||||
if line.startswith('set '):
|
||||
# Remove any trailing braces
|
||||
clean_line = line.rstrip(' {')
|
||||
clean_line = clean_line.rstrip('{')
|
||||
pipeline_response['suggestions'].append({
|
||||
"config": clean_line,
|
||||
"reason": "AI-generated optimization"
|
||||
})
|
||||
|
||||
# Only save if we found config lines
|
||||
if pipeline_response['suggestions']:
|
||||
pipeline_file = self.response_dir / f"pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{request_id[:8]}_response.json"
|
||||
with open(pipeline_file, 'w') as f:
|
||||
json.dump(pipeline_response, f, indent=2)
|
||||
logger.info(f"Saved pipeline format response with {len(pipeline_response['suggestions'])} configs")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error requesting AI analysis: {e}")
|
||||
return None
|
||||
|
||||
def build_analysis_prompt(self, traffic_data: Dict) -> str:
|
||||
"""Build prompt for AI analysis"""
|
||||
prompt = """Analyze this network traffic data and suggest optimizations for a Juniper SRX firewall:
|
||||
|
||||
Traffic Summary:
|
||||
- Top Talkers: {}
|
||||
- Active VLANs: {}
|
||||
- Protocol Distribution: {}
|
||||
|
||||
Based on this data, please provide:
|
||||
1. Security rule optimizations (as Juniper SRX configuration commands)
|
||||
2. QoS improvements for high-traffic hosts
|
||||
3. VLAN segmentation recommendations
|
||||
4. Potential security concerns or anomalies
|
||||
|
||||
Format your response with specific Juniper SRX configuration commands that can be applied.
|
||||
Include comments explaining each change."""
|
||||
|
||||
# Extract key metrics
|
||||
top_ips = [b['key'] for b in traffic_data.get('top_talkers', {}).get('buckets', [])][:5]
|
||||
vlans = [str(b['key']) for b in traffic_data.get('vlans', {}).get('buckets', [])][:5]
|
||||
protocols = [str(b['key']) for b in traffic_data.get('protocols', {}).get('buckets', [])][:3]
|
||||
|
||||
return prompt.format(
|
||||
', '.join(top_ips) if top_ips else 'No data',
|
||||
', '.join(vlans) if vlans else 'No VLANs',
|
||||
', '.join(protocols) if protocols else 'No protocols'
|
||||
)
|
||||
|
||||
def wait_for_ai_response(self, request_id: str, timeout: int = 120) -> Optional[Dict]:
|
||||
"""Wait for AI response file"""
|
||||
response_file = self.response_dir / f"{request_id}_response.json"
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
if response_file.exists():
|
||||
try:
|
||||
time.sleep(1) # Give AI time to finish writing
|
||||
with open(response_file, 'r') as f:
|
||||
response = json.load(f)
|
||||
logger.info(f"AI response received: {request_id}")
|
||||
|
||||
# Log a snippet of the response
|
||||
if 'response' in response:
|
||||
snippet = response['response'][:200] + '...' if len(response['response']) > 200 else response['response']
|
||||
logger.info(f"AI suggestion snippet: {snippet}")
|
||||
|
||||
# Clean up files
|
||||
response_file.unlink()
|
||||
(self.request_dir / f"{request_id}.json").unlink(missing_ok=True)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading AI response: {e}")
|
||||
return None
|
||||
time.sleep(2)
|
||||
|
||||
logger.warning(f"AI response timeout for {request_id}")
|
||||
return None
|
||||
|
||||
# REPLACE THE EXISTING create_gitea_pr METHOD WITH THIS ENHANCED VERSION
|
||||
def create_gitea_pr(self, ai_response: Dict = None) -> bool:
|
||||
"""Create pull request in Gitea with suggested changes"""
|
||||
try:
|
||||
# If no AI response provided, get the latest one
|
||||
if not ai_response:
|
||||
latest_suggestion = self._get_latest_ai_suggestion()
|
||||
if not latest_suggestion:
|
||||
logger.warning("No AI suggestions found to create PR")
|
||||
return False
|
||||
|
||||
# Read the suggestion file
|
||||
with open(latest_suggestion['path'], 'r') as f:
|
||||
ai_response = json.load(f)
|
||||
|
||||
# Check if we should create a PR
|
||||
if not self.should_create_pr():
|
||||
logger.info("Skipping PR creation - conditions not met")
|
||||
return False
|
||||
|
||||
# Check for existing pending PR
|
||||
if self.state.get('pending_pr'):
|
||||
logger.info(f"Skipping PR creation - pending PR exists: {self.state['pending_pr']}")
|
||||
return False
|
||||
|
||||
logger.info("Creating Gitea pull request with AI suggestions...")
|
||||
|
||||
# Initialize Gitea integration
|
||||
gitea = GiteaIntegration(self.config['gitea'])
|
||||
|
||||
# Extract the SRX configuration
|
||||
srx_config = ai_response.get('suggestions', ai_response.get('response', ''))
|
||||
if not srx_config or srx_config.strip() == '':
|
||||
logger.warning("Empty or invalid suggestions, skipping PR creation")
|
||||
return False
|
||||
|
||||
# Create the PR
|
||||
pr_info = gitea.create_pr_with_config(
|
||||
srx_config=srx_config,
|
||||
title=f"AI Network Configuration Suggestions - {datetime.now().strftime('%B %d, %Y')}",
|
||||
description=None # Will auto-generate
|
||||
)
|
||||
|
||||
if pr_info:
|
||||
# Update state with PR information
|
||||
self.state['pending_pr'] = pr_info['number']
|
||||
self.state['last_pr_created'] = datetime.now().isoformat()
|
||||
self.state['pr_url'] = pr_info['url']
|
||||
self.save_state(self.state)
|
||||
|
||||
logger.info(f"Successfully created PR #{pr_info['number']}: {pr_info['url']}")
|
||||
|
||||
# Log to a separate file for notifications/monitoring
|
||||
with open('/var/log/orchestrator/pr_created.log', 'a') as f:
|
||||
f.write(f"{datetime.now().isoformat()} - Created PR #{pr_info['number']} - {pr_info['url']}\n")
|
||||
|
||||
return True
|
||||
else:
|
||||
logger.error("Failed to create PR in Gitea")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating Gitea PR: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
# ADD THIS NEW METHOD
|
||||
def _get_latest_ai_suggestion(self) -> Optional[Dict]:
|
||||
"""Get the most recent AI suggestion file"""
|
||||
response_dir = '/shared/ai-gitops/responses'
|
||||
|
||||
try:
|
||||
# List all response files
|
||||
response_files = []
|
||||
for filename in os.listdir(response_dir):
|
||||
if filename.startswith('response_') and filename.endswith('.json'):
|
||||
filepath = os.path.join(response_dir, filename)
|
||||
# Get file modification time
|
||||
mtime = os.path.getmtime(filepath)
|
||||
response_files.append({
|
||||
'path': filepath,
|
||||
'filename': filename,
|
||||
'mtime': mtime
|
||||
})
|
||||
|
||||
if not response_files:
|
||||
return None
|
||||
|
||||
# Sort by modification time and get the latest
|
||||
response_files.sort(key=lambda x: x['mtime'], reverse=True)
|
||||
return response_files[0]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding latest AI suggestion: {e}")
|
||||
return None
|
||||
|
||||
# ADD THIS NEW METHOD
|
||||
def check_pr_status(self):
|
||||
"""Check the status of pending pull requests"""
|
||||
if not self.state.get('pending_pr'):
|
||||
return
|
||||
|
||||
try:
|
||||
gitea = GiteaIntegration(self.config['gitea'])
|
||||
pr_status = gitea.get_pr_status(self.state['pending_pr'])
|
||||
|
||||
if pr_status:
|
||||
logger.info(f"PR #{pr_status['number']} status: {pr_status['state']} (merged: {pr_status['merged']})")
|
||||
|
||||
# If PR is closed or merged, clear the pending_pr flag
|
||||
if pr_status['state'] == 'closed':
|
||||
logger.info(f"PR #{pr_status['number']} has been closed")
|
||||
self.state['pending_pr'] = None
|
||||
self.state['last_pr_status'] = 'closed'
|
||||
self.state['last_pr_closed'] = datetime.now().isoformat()
|
||||
|
||||
if pr_status['merged']:
|
||||
self.state['last_pr_status'] = 'merged'
|
||||
logger.info(f"PR #{pr_status['number']} was merged!")
|
||||
# Mark for deployment
|
||||
self.state['pending_deployment'] = True
|
||||
self.state['deployment_pr'] = pr_status['number']
|
||||
|
||||
self.save_state(self.state)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking PR status: {e}")
|
||||
|
||||
def run_analysis_cycle(self):
|
||||
"""Run a complete analysis cycle"""
|
||||
logger.info("="*60)
|
||||
logger.info("Starting traffic analysis cycle")
|
||||
logger.info("="*60)
|
||||
|
||||
try:
|
||||
# Always collect traffic data
|
||||
logger.info("Step 1: Collecting traffic data from Elasticsearch...")
|
||||
traffic_data = self.collect_traffic_data()
|
||||
|
||||
if not traffic_data:
|
||||
logger.warning("No traffic data available, skipping analysis")
|
||||
return
|
||||
|
||||
# Log summary of collected data
|
||||
top_talkers = traffic_data.get('top_talkers', {}).get('buckets', [])
|
||||
if top_talkers:
|
||||
logger.info(f"Found {len(top_talkers)} top talkers")
|
||||
logger.info(f"Top IP: {top_talkers[0]['key']} with {top_talkers[0]['doc_count']} flows")
|
||||
|
||||
# Check smart triggers if configured
|
||||
trigger_fired = False
|
||||
if self.config['pr_creation'].get('frequency') == 'smart':
|
||||
trigger_fired = self.check_smart_triggers(traffic_data)
|
||||
if trigger_fired:
|
||||
logger.info("🎯 Smart trigger activated - will create PR")
|
||||
|
||||
# Always request AI analysis
|
||||
logger.info("Step 2: Requesting AI analysis...")
|
||||
ai_response = self.request_ai_analysis(traffic_data)
|
||||
|
||||
# Save state for analysis
|
||||
self.save_state({
|
||||
'last_analysis_run': datetime.now().isoformat(),
|
||||
'last_analysis_data': {
|
||||
'top_talkers_count': len(top_talkers),
|
||||
'response_received': bool(ai_response)
|
||||
}
|
||||
})
|
||||
|
||||
# Determine if we should create PR
|
||||
should_create = False
|
||||
|
||||
if self.config['pr_creation'].get('frequency') == 'smart':
|
||||
# Smart mode: create if trigger fired
|
||||
should_create = trigger_fired
|
||||
else:
|
||||
# Regular mode: use existing schedule logic
|
||||
should_create = self.should_create_pr()
|
||||
|
||||
# Create PR if conditions met
|
||||
if should_create:
|
||||
if ai_response and (ai_response.get('response') or ai_response.get('suggestions')):
|
||||
logger.info("Step 3: Creating PR with AI suggestions...")
|
||||
if self.create_gitea_pr(ai_response):
|
||||
logger.info("✓ PR created successfully")
|
||||
if trigger_fired:
|
||||
logger.info(" Reason: Smart trigger fired")
|
||||
else:
|
||||
logger.warning("Failed to create Gitea PR")
|
||||
else:
|
||||
logger.info("No actionable suggestions from AI analysis")
|
||||
else:
|
||||
logger.info("No triggers met - analysis saved for future use")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in analysis cycle: {e}")
|
||||
|
||||
logger.info("="*60)
|
||||
|
||||
def main_loop(self):
|
||||
"""Main orchestrator loop"""
|
||||
logger.info("Starting Network AI Orchestrator")
|
||||
|
||||
# Setup Elasticsearch connection
|
||||
if not self.setup_elasticsearch():
|
||||
logger.warning("Running without Elasticsearch connection - using mock data")
|
||||
|
||||
interval = self.config['analysis'].get('interval_minutes', 60) * 60
|
||||
|
||||
# Run first analysis immediately
|
||||
self.run_analysis_cycle()
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
logger.info(f"Next analysis scheduled in {interval/60} minutes")
|
||||
logger.info(f"Next run at: {(datetime.now() + timedelta(seconds=interval)).strftime('%H:%M:%S')}")
|
||||
|
||||
# MODIFIED: Check PR status every 15 minutes during the wait
|
||||
for i in range(int(interval / 60)): # Check every minute
|
||||
if not self.running:
|
||||
break
|
||||
|
||||
time.sleep(60)
|
||||
|
||||
# Check PR status every 15 minutes
|
||||
if i % 15 == 14 and self.state.get('pending_pr'):
|
||||
logger.info("Checking PR status...")
|
||||
self.check_pr_status()
|
||||
|
||||
if self.running: # Check again after sleep
|
||||
self.run_analysis_cycle()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received keyboard interrupt")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in main loop: {e}")
|
||||
time.sleep(60) # Wait before retry
|
||||
|
||||
logger.info("Orchestrator shutdown complete")
|
||||
|
||||
def shutdown(self, signum=None, frame=None):
|
||||
"""Graceful shutdown handler"""
|
||||
if signum:
|
||||
logger.info(f"Received signal {signum}, initiating shutdown...")
|
||||
else:
|
||||
logger.info("Initiating shutdown...")
|
||||
self.running = False
|
||||
if self.es_client:
|
||||
try:
|
||||
# Close Elasticsearch connection
|
||||
self.es_client.transport.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
orchestrator = NetworkOrchestrator()
|
||||
|
||||
# Set up signal handlers
|
||||
signal.signal(signal.SIGTERM, orchestrator.shutdown)
|
||||
signal.signal(signal.SIGINT, orchestrator.shutdown)
|
||||
|
||||
try:
|
||||
orchestrator.main_loop()
|
||||
except Exception as e:
|
||||
logger.error(f"Fatal error: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
30
scripts/orchestrator/core/requirements.txt
Normal file
30
scripts/orchestrator/core/requirements.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
aiofiles==24.1.0
|
||||
bcrypt==4.3.0
|
||||
blinker==1.9.0
|
||||
certifi==2025.8.3
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.4.2
|
||||
click==8.2.1
|
||||
cryptography==45.0.5
|
||||
elastic-transport==9.1.0
|
||||
elasticsearch==7.17.9
|
||||
Flask==3.1.1
|
||||
gitdb==4.0.12
|
||||
GitPython==3.1.45
|
||||
idna==3.10
|
||||
itsdangerous==2.2.0
|
||||
Jinja2==3.1.6
|
||||
MarkupSafe==3.0.2
|
||||
paramiko==3.5.1
|
||||
pycparser==2.22
|
||||
PyNaCl==1.5.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.1.1
|
||||
PyYAML==6.0.2
|
||||
requests==2.32.4
|
||||
six==1.17.0
|
||||
smmap==5.0.2
|
||||
tabulate==0.9.0
|
||||
typing_extensions==4.14.1
|
||||
urllib3==1.26.20
|
||||
Werkzeug==3.1.3
|
||||
Reference in New Issue
Block a user