Initial documentation structure

2025-09-04 02:19:22 +00:00
commit 66d97011ab
18 changed files with 3114 additions and 0 deletions
--- a/scripts/orchestrator/core/config.yaml.template
+++ b/scripts/orchestrator/core/config.yaml.template
@@ -0,0 +1,56 @@
+# Network AI Orchestrator Configuration
+elasticsearch:
+  host: "192.168.100.85:9200"
+  index: "netflow-*"
+  verify_certs: false
+  timeout: 30
+
+analysis:
+  interval_minutes: 60
+  window_hours: 168
+  min_traffic_bytes: 1000000
+
+pr_creation:
+  enabled: true
+  frequency: "smart"          # Options: weekly, daily, manual, smart
+  triggers:
+    - high_traffic anomaly #Create PR if traffic spike
+    - security_event #Create PR if security issue
+    - scheduled: "weekly"
+  thresholds:
+    traffic_spike: 200 #200% increase triggers PR
+    new_hosts: 10 #10+ new IPs triggers PR
+  day_of_week: "saturday"        # 0=Monday, 6=Sunday
+  hour_of_day: 22              # 24-hour format (9 = 9 AM)
+  skip_if_pending: true       # Don't create if PR already open
+  min_days_between: 7         # Minimum days between PRs
+
+gitea:
+  url: "https://git.salmutt.dev"
+  repo: "sal/srx-config"
+  token: "${GITEA_TOKEN}"  # Use actual token
+  branch: "main"
+  labels: ["ai-generated", "auto-config", "pending-review"]
+
+srx:
+  host: "192.168.100.1"
+  port: 830
+  username: "netops"
+  ssh_key: "/home/netops/.ssh/srx_key"
+
+shared_storage:
+  path: "/shared/ai-gitops"
+
+state_tracking:
+  enabled: true
+  state_file: '/shared/ai-gitops/state/orchestrator_state.json'
+  track_pr_history: true
+
+ai:
+  request_timeout: 120
+  max_retries: 3
+  
+logging:
+  level: "INFO"
+  max_file_size: "100MB"
+  retention_days: 30
--- a/scripts/orchestrator/core/orchestrator_main.py
+++ b/scripts/orchestrator/core/orchestrator_main.py
@@ -0,0 +1,771 @@
+#!/usr/bin/env python3
+"""
+Enhanced Network AI Orchestrator - Production Version with Gitea Integration
+Compatible with Elasticsearch 7.x
+"""
+import os
+import sys
+import json
+import time
+import logging
+import signal
+import hashlib
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, List, Optional
+import yaml
+import uuid
+import threading
+from pathlib import Path
+from elasticsearch import Elasticsearch  # Using sync version for ES 7.x
+from git import Repo
+import requests
+
+# Load environment variables from home directory .env
+from dotenv import load_dotenv
+env_path = Path.home() / '.env'  # This gets /home/netops/.env
+load_dotenv(env_path)
+
+# ADD THIS IMPORT - New for Phase 3
+from gitea_integration import GiteaIntegration
+
+# Configure production logging
+def setup_logging():
+    """Configure comprehensive logging for production"""
+    log_dir = Path("/home/netops/orchestrator/logs")
+    log_dir.mkdir(exist_ok=True)
+
+    log_file = log_dir / f"orchestrator_{datetime.now().strftime('%Y%m%d')}.log"
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.FileHandler(log_file),
+            logging.StreamHandler(sys.stdout)
+        ]
+    )
+    return logging.getLogger(__name__)
+
+logger = setup_logging()
+
+class NetworkOrchestrator:
+    def __init__(self, config_path: str = "/home/netops/orchestrator/config.yaml"):
+        """Initialize the orchestrator with configuration"""
+        self.config = self.load_config(config_path)
+        self.es_client = None
+        self.git_repo = None
+        self.running = True
+        self.shared_dir = Path("/shared/ai-gitops")
+        self.request_dir = self.shared_dir / "requests"
+        self.response_dir = self.shared_dir / "responses"
+        
+        # ADD THIS - Initialize state for Phase 3
+        self.state = self.load_state()
+
+        # Ensure directories exist
+        self.request_dir.mkdir(parents=True, exist_ok=True)
+        self.response_dir.mkdir(parents=True, exist_ok=True)
+
+        logger.info("Orchestrator initialized")
+
+    def should_create_pr(self):
+        """Check if we should create a PR based on schedule and state"""
+        if not self.config.get('pr_creation', {}).get('enabled', True):
+            return False
+
+        # Load state
+        state = self.load_state()
+
+        # Check if pending PR exists
+        if self.config['pr_creation'].get('skip_if_pending', True):
+            if state.get('pending_pr', False):
+                logger.info("Skipping PR - existing PR is pending")
+                return False
+
+        # Check frequency
+        frequency = self.config['pr_creation'].get('frequency', 'weekly')
+
+        if frequency == 'weekly':
+            # Check if it's the right day and hour
+            now = datetime.now()
+            target_day = self.config['pr_creation'].get('day_of_week', 'saturday')
+            target_hour = self.config['pr_creation'].get('hour_of_day', 5)
+
+            # Convert day name to number
+            days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
+            target_day_num = days.index(target_day.lower())
+
+            # Check if it's the right day and hour
+            if now.weekday() != target_day_num or now.hour != target_hour:
+                return False
+
+        # Check minimum days between PRs
+        if state.get('last_pr_created'):
+            last_pr_date = datetime.fromisoformat(state['last_pr_created'])
+            days_since = (datetime.now() - last_pr_date).days
+            min_days = self.config['pr_creation'].get('min_days_between', 7)
+
+            if days_since < min_days:
+                logger.info(f"Skipping PR - only {days_since} days since last PR")
+                return False
+
+        return True
+
+    def load_state(self):
+        """Load orchestrator state"""
+        state_file = self.config.get('state_tracking', {}).get('state_file', '/var/lib/orchestrator/state.json')
+        if Path(state_file).exists():
+            with open(state_file, 'r') as f:
+                return json.load(f)
+        return {}
+
+    def check_smart_triggers(self, traffic_data: Dict) -> bool:
+        """Check if any smart triggers are met"""
+        if self.config['pr_creation'].get('frequency') != 'smart':
+            return False
+        
+        triggers = self.config['pr_creation'].get('triggers', [])
+        thresholds = self.config['pr_creation'].get('thresholds', {})
+        
+        # Load previous state for comparison
+        state = self.load_state()
+        last_data = state.get('last_traffic_data', {})
+        
+        # Check traffic spike
+        if 'high_traffic_anomaly' in str(triggers):
+            current_flows = sum(
+                b['doc_count'] 
+                for b in traffic_data.get('top_talkers', {}).get('buckets', [])
+            )
+            last_flows = last_data.get('total_flows', current_flows)
+            
+            if last_flows > 0:
+                spike_percent = ((current_flows - last_flows) / last_flows) * 100
+                if spike_percent >= thresholds.get('traffic_spike', 200):
+                    logger.info(f"🚨 Traffic spike detected: {spike_percent:.1f}% increase!")
+                    return True
+        
+        # Check new hosts
+        if 'security_event' in str(triggers):
+            current_ips = set(
+                b['key'] 
+                for b in traffic_data.get('top_talkers', {}).get('buckets', [])
+            )
+            last_ips = set(last_data.get('top_ips', []))
+            
+            new_hosts = current_ips - last_ips
+            if len(new_hosts) >= thresholds.get('new_hosts', 10):
+                logger.info(f"🚨 Security event: {len(new_hosts)} new hosts detected!")
+                logger.info(f"   New IPs: {', '.join(list(new_hosts)[:5])}...")
+                return True
+        
+        # Check weekly schedule fallback
+        if any('scheduled' in str(t) for t in triggers):
+            if self.should_create_pr():  # Use existing weekly logic
+                logger.info("📅 Weekly schedule triggered")
+                return True
+        
+        # Save current data for next comparison
+        self.save_state({
+            'last_traffic_data': {
+                'total_flows': sum(
+                    b['doc_count'] 
+                    for b in traffic_data.get('top_talkers', {}).get('buckets', [])
+                ),
+                'top_ips': [
+                    b['key'] 
+                    for b in traffic_data.get('top_talkers', {}).get('buckets', [])
+                ]
+            }
+        })
+        
+        return False
+
+    def save_state(self, updates):
+        """Save orchestrator state"""
+        state_file = self.config.get('state_tracking', {}).get('state_file', '/var/lib/orchestrator/state.json')
+        state = self.load_state()
+        state.update(updates)
+        state['last_updated'] = datetime.now().isoformat()
+
+        with open(state_file, 'w') as f:
+            json.dump(state, f, indent=2)
+
+    def load_config(self, config_path: str) -> Dict:
+        """Load configuration from YAML file"""
+        try:
+            with open(config_path, 'r') as f:
+                config = yaml.safe_load(f)
+            logger.info(f"Configuration loaded from {config_path}")
+
+            # Replace environment variables
+            if 'gitea' in config and 'token' in config['gitea']:
+                if config['gitea']['token'] == '${GITEA_TOKEN}':
+                    config['gitea']['token'] = os.environ.get('GITEA_TOKEN', '')
+
+            return config
+        except Exception as e:
+            logger.error(f"Failed to load config: {e}")
+            # Use defaults if config fails
+            return {
+                'elasticsearch': {
+                    'host': '192.168.100.85:9200',
+                    'index': 'netflow-*'
+                },
+                'analysis': {
+                    'interval_minutes': 60,
+                    'window_hours': 24
+                },
+                'gitea': {
+                    'url': 'https://git.salmutt.dev',
+                    'repo': 'sal/srx-config',
+                    'token': os.environ.get('GITEA_TOKEN', '')
+                }
+            }
+
+    def setup_elasticsearch(self):
+        """Initialize Elasticsearch connection (synchronous for ES 7.x)"""
+        try:
+            es_config = self.config['elasticsearch']
+            self.es_client = Elasticsearch(
+                hosts=[es_config['host']],
+                verify_certs=False,
+                timeout=30
+            )
+            # Test connection
+            info = self.es_client.info()
+            logger.info(f"Connected to Elasticsearch: {info['version']['number']}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to connect to Elasticsearch: {e}")
+            self.es_client = None
+            return False
+
+    def collect_traffic_data(self) -> Dict:
+        """Collect traffic data from Elasticsearch (synchronous)"""
+        if not self.es_client:
+            logger.warning("Elasticsearch not connected, using mock data")
+            return self.generate_mock_data()
+
+        try:
+            window_hours = self.config['analysis']['window_hours']
+            query = {
+                "query": {
+                    "range": {
+                        "@timestamp": {
+                            "gte": f"now-{window_hours}h",
+                            "lte": "now"
+                        }
+                    }
+                },
+                "size": 0,
+                "aggs": {
+                    "top_talkers": {
+                        "terms": {
+                            "field": "netflow.ipv4_src_addr",
+                            "size": 20
+                        },
+                        "aggs": {
+                            "bytes": {"sum": {"field": "netflow.in_bytes"}},
+                            "packets": {"sum": {"field": "netflow.in_pkts"}}
+                        }
+                    },
+                    "protocols": {
+                        "terms": {
+                            "field": "netflow.protocol",
+                            "size": 10
+                        }
+                    },
+                    "top_destinations": {
+                    "terms": {
+                            "field": "netflow.ipv4_dst_addr",  # NEW
+                            "size": 20
+                        }
+            },
+                    "vlans": {
+                        "terms": {
+                            "field": "vlan_id",
+                            "size": 20
+                        },
+                        "aggs": {
+                            "bytes": {"sum": {"field": "bytes"}}
+                        }
+                    },
+                    "hourly_traffic": {
+                        "date_histogram": {
+                            "field": "@timestamp",
+                            "calendar_interval": "hour"
+                        },
+                        "aggs": {
+                            "bytes": {"sum": {"field": "bytes"}}
+                        }
+                    }
+                }
+            }
+
+            result = self.es_client.search(
+                index=self.config['elasticsearch']['index'],
+                body=query
+            )
+
+            total_hits = result['hits']['total']
+            # Handle both ES 6.x and 7.x response formats
+            if isinstance(total_hits, dict):
+                total_count = total_hits['value']
+            else:
+                total_count = total_hits
+
+            logger.info(f"Collected traffic data: {total_count} flows")
+            return result['aggregations']
+
+        except Exception as e:
+            logger.error(f"Error collecting traffic data: {e}")
+            return self.generate_mock_data()
+
+    def generate_mock_data(self) -> Dict:
+        """Generate mock traffic data for testing"""
+        return {
+            "top_talkers": {
+                "buckets": [
+                    {"key": "192.168.100.50", "doc_count": 15000,
+                     "bytes": {"value": 5000000}, "packets": {"value": 10000}},
+                    {"key": "192.168.100.51", "doc_count": 12000,
+                     "bytes": {"value": 4000000}, "packets": {"value": 8000}},
+                    {"key": "192.168.100.11", "doc_count": 8000,
+                     "bytes": {"value": 2000000}, "packets": {"value": 5000}},
+                    {"key": "10.0.0.5", "doc_count": 6000,
+                     "bytes": {"value": 1500000}, "packets": {"value": 3000}}
+                ]
+            },
+            "protocols": {
+                "buckets": [
+                    {"key": "tcp", "doc_count": 25000},
+                    {"key": "udp", "doc_count": 15000},
+                    {"key": "icmp", "doc_count": 2000}
+                ]
+            },
+            "vlans": {
+                "buckets": [
+                    {"key": 100, "doc_count": 20000, "bytes": {"value": 8000000}},
+                    {"key": 200, "doc_count": 15000, "bytes": {"value": 6000000}},
+                    {"key": 300, "doc_count": 5000, "bytes": {"value": 2000000}}
+                ]
+            }
+        }
+
+#    def request_ai_analysis(self, traffic_data: Dict) -> Optional[Dict]:
+#        """Send traffic data to AI for analysis"""
+#        request_id = str(uuid.uuid4())
+#        request_file = self.request_dir / f"{request_id}.json"
+#
+#        request_data = {
+#            "request_id": request_id,
+#            "timestamp": datetime.now().isoformat(),
+#            "type": "traffic_analysis",
+#            "data": traffic_data,
+#            "prompt": self.build_analysis_prompt(traffic_data)
+#        }
+#        try:
+#            with open(request_file, 'w') as f:
+#                json.dump(request_data, f, indent=2)
+#            logger.info(f"AI request created: {request_id}")
+#
+#            # Wait for response (with timeout)
+#           response = self.wait_for_ai_response(request_id, timeout=600)
+#            return response
+#
+#        except Exception as e:
+#            logger.error(f"Error requesting AI analysis: {e}")
+#            return None
+    def request_ai_analysis(self, traffic_data: Dict) -> Optional[Dict]:
+        """Send traffic data to AI for analysis"""
+        request_id = str(uuid.uuid4())
+        request_file = self.request_dir / f"{request_id}.json"
+
+        request_data = {
+            "request_id": request_id,
+            "timestamp": datetime.now().isoformat(),
+            "type": "traffic_analysis",
+            "data": traffic_data,
+            "prompt": self.build_analysis_prompt(traffic_data)
+        }
+
+        try:
+            with open(request_file, 'w') as f:
+                json.dump(request_data, f, indent=2)
+            logger.info(f"AI request created: {request_id}")
+
+            # Wait for response (with timeout)
+            response = self.wait_for_ai_response(request_id, timeout=600)
+
+            # Convert to pipeline format for PR creation
+            if response and 'response' in response:
+                # Create pipeline-compatible format
+                pipeline_response = {
+                    "suggestions": [],
+                    "focus_area": "security",
+                    "feedback_aware": True,
+                    "timestamp": datetime.now().isoformat()
+                }
+
+                # Extract SRX config lines from AI response
+                ai_text = response.get('response', '')
+                lines = ai_text.split('\n')
+
+                for line in lines:
+                    line = line.strip()
+                    # Capture any line that looks like SRX config
+                    if line.startswith('set '):
+                        # Remove any trailing braces
+                        clean_line = line.rstrip(' {')
+                        clean_line = clean_line.rstrip('{')
+                        pipeline_response['suggestions'].append({
+                            "config": clean_line,
+                            "reason": "AI-generated optimization"
+                        })
+
+                # Only save if we found config lines
+                if pipeline_response['suggestions']:
+                    pipeline_file = self.response_dir / f"pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{request_id[:8]}_response.json"
+                    with open(pipeline_file, 'w') as f:
+                        json.dump(pipeline_response, f, indent=2)
+                    logger.info(f"Saved pipeline format response with {len(pipeline_response['suggestions'])} configs")
+
+            return response
+
+        except Exception as e:
+            logger.error(f"Error requesting AI analysis: {e}")
+            return None
+
+    def build_analysis_prompt(self, traffic_data: Dict) -> str:
+        """Build prompt for AI analysis"""
+        prompt = """Analyze this network traffic data and suggest optimizations for a Juniper SRX firewall:
+
+Traffic Summary:
+- Top Talkers: {}
+- Active VLANs: {}
+- Protocol Distribution: {}
+
+Based on this data, please provide:
+1. Security rule optimizations (as Juniper SRX configuration commands)
+2. QoS improvements for high-traffic hosts
+3. VLAN segmentation recommendations
+4. Potential security concerns or anomalies
+
+Format your response with specific Juniper SRX configuration commands that can be applied.
+Include comments explaining each change."""
+
+        # Extract key metrics
+        top_ips = [b['key'] for b in traffic_data.get('top_talkers', {}).get('buckets', [])][:5]
+        vlans = [str(b['key']) for b in traffic_data.get('vlans', {}).get('buckets', [])][:5]
+        protocols = [str(b['key']) for b in traffic_data.get('protocols', {}).get('buckets', [])][:3]
+
+        return prompt.format(
+            ', '.join(top_ips) if top_ips else 'No data',
+            ', '.join(vlans) if vlans else 'No VLANs',
+            ', '.join(protocols) if protocols else 'No protocols'
+        )
+
+    def wait_for_ai_response(self, request_id: str, timeout: int = 120) -> Optional[Dict]:
+        """Wait for AI response file"""
+        response_file = self.response_dir / f"{request_id}_response.json"
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            if response_file.exists():
+                try:
+                    time.sleep(1)  # Give AI time to finish writing
+                    with open(response_file, 'r') as f:
+                        response = json.load(f)
+                    logger.info(f"AI response received: {request_id}")
+
+                    # Log a snippet of the response
+                    if 'response' in response:
+                        snippet = response['response'][:200] + '...' if len(response['response']) > 200 else response['response']
+                        logger.info(f"AI suggestion snippet: {snippet}")
+
+                    # Clean up files
+                    response_file.unlink()
+                    (self.request_dir / f"{request_id}.json").unlink(missing_ok=True)
+                    return response
+                except Exception as e:
+                    logger.error(f"Error reading AI response: {e}")
+                    return None
+            time.sleep(2)
+
+        logger.warning(f"AI response timeout for {request_id}")
+        return None
+
+    # REPLACE THE EXISTING create_gitea_pr METHOD WITH THIS ENHANCED VERSION
+    def create_gitea_pr(self, ai_response: Dict = None) -> bool:
+        """Create pull request in Gitea with suggested changes"""
+        try:
+            # If no AI response provided, get the latest one
+            if not ai_response:
+                latest_suggestion = self._get_latest_ai_suggestion()
+                if not latest_suggestion:
+                    logger.warning("No AI suggestions found to create PR")
+                    return False
+                
+                # Read the suggestion file
+                with open(latest_suggestion['path'], 'r') as f:
+                    ai_response = json.load(f)
+            
+            # Check if we should create a PR
+            if not self.should_create_pr():
+                logger.info("Skipping PR creation - conditions not met")
+                return False
+            
+            # Check for existing pending PR
+            if self.state.get('pending_pr'):
+                logger.info(f"Skipping PR creation - pending PR exists: {self.state['pending_pr']}")
+                return False
+            
+            logger.info("Creating Gitea pull request with AI suggestions...")
+            
+            # Initialize Gitea integration
+            gitea = GiteaIntegration(self.config['gitea'])
+            
+            # Extract the SRX configuration
+            srx_config = ai_response.get('suggestions', ai_response.get('response', ''))
+            if not srx_config or srx_config.strip() == '':
+                logger.warning("Empty or invalid suggestions, skipping PR creation")
+                return False
+            
+            # Create the PR
+            pr_info = gitea.create_pr_with_config(
+                srx_config=srx_config,
+                title=f"AI Network Configuration Suggestions - {datetime.now().strftime('%B %d, %Y')}",
+                description=None  # Will auto-generate
+            )
+            
+            if pr_info:
+                # Update state with PR information
+                self.state['pending_pr'] = pr_info['number']
+                self.state['last_pr_created'] = datetime.now().isoformat()
+                self.state['pr_url'] = pr_info['url']
+                self.save_state(self.state)
+                
+                logger.info(f"Successfully created PR #{pr_info['number']}: {pr_info['url']}")
+                
+                # Log to a separate file for notifications/monitoring
+                with open('/var/log/orchestrator/pr_created.log', 'a') as f:
+                    f.write(f"{datetime.now().isoformat()} - Created PR #{pr_info['number']} - {pr_info['url']}\n")
+                
+                return True
+            else:
+                logger.error("Failed to create PR in Gitea")
+                return False
+                
+        except Exception as e:
+            logger.error(f"Error creating Gitea PR: {e}", exc_info=True)
+            return False
+
+    # ADD THIS NEW METHOD
+    def _get_latest_ai_suggestion(self) -> Optional[Dict]:
+        """Get the most recent AI suggestion file"""
+        response_dir = '/shared/ai-gitops/responses'
+        
+        try:
+            # List all response files
+            response_files = []
+            for filename in os.listdir(response_dir):
+                if filename.startswith('response_') and filename.endswith('.json'):
+                    filepath = os.path.join(response_dir, filename)
+                    # Get file modification time
+                    mtime = os.path.getmtime(filepath)
+                    response_files.append({
+                        'path': filepath,
+                        'filename': filename,
+                        'mtime': mtime
+                    })
+            
+            if not response_files:
+                return None
+            
+            # Sort by modification time and get the latest
+            response_files.sort(key=lambda x: x['mtime'], reverse=True)
+            return response_files[0]
+            
+        except Exception as e:
+            logger.error(f"Error finding latest AI suggestion: {e}")
+            return None
+
+    # ADD THIS NEW METHOD
+    def check_pr_status(self):
+        """Check the status of pending pull requests"""
+        if not self.state.get('pending_pr'):
+            return
+        
+        try:
+            gitea = GiteaIntegration(self.config['gitea'])
+            pr_status = gitea.get_pr_status(self.state['pending_pr'])
+            
+            if pr_status:
+                logger.info(f"PR #{pr_status['number']} status: {pr_status['state']} (merged: {pr_status['merged']})")
+                
+                # If PR is closed or merged, clear the pending_pr flag
+                if pr_status['state'] == 'closed':
+                    logger.info(f"PR #{pr_status['number']} has been closed")
+                    self.state['pending_pr'] = None
+                    self.state['last_pr_status'] = 'closed'
+                    self.state['last_pr_closed'] = datetime.now().isoformat()
+                    
+                    if pr_status['merged']:
+                        self.state['last_pr_status'] = 'merged'
+                        logger.info(f"PR #{pr_status['number']} was merged!")
+                        # Mark for deployment
+                        self.state['pending_deployment'] = True
+                        self.state['deployment_pr'] = pr_status['number']
+                    
+                    self.save_state(self.state)
+                    
+        except Exception as e:
+            logger.error(f"Error checking PR status: {e}")
+
+    def run_analysis_cycle(self):
+        """Run a complete analysis cycle"""
+        logger.info("="*60)
+        logger.info("Starting traffic analysis cycle")
+        logger.info("="*60)
+        
+        try:
+            # Always collect traffic data
+            logger.info("Step 1: Collecting traffic data from Elasticsearch...")
+            traffic_data = self.collect_traffic_data()
+            
+            if not traffic_data:
+                logger.warning("No traffic data available, skipping analysis")
+                return
+            
+            # Log summary of collected data
+            top_talkers = traffic_data.get('top_talkers', {}).get('buckets', [])
+            if top_talkers:
+                logger.info(f"Found {len(top_talkers)} top talkers")
+                logger.info(f"Top IP: {top_talkers[0]['key']} with {top_talkers[0]['doc_count']} flows")
+            
+            # Check smart triggers if configured
+            trigger_fired = False
+            if self.config['pr_creation'].get('frequency') == 'smart':
+                trigger_fired = self.check_smart_triggers(traffic_data)
+                if trigger_fired:
+                    logger.info("🎯 Smart trigger activated - will create PR")
+            
+            # Always request AI analysis
+            logger.info("Step 2: Requesting AI analysis...")
+            ai_response = self.request_ai_analysis(traffic_data)
+            
+            # Save state for analysis
+            self.save_state({
+                'last_analysis_run': datetime.now().isoformat(),
+                'last_analysis_data': {
+                    'top_talkers_count': len(top_talkers),
+                    'response_received': bool(ai_response)
+                }
+            })
+            
+            # Determine if we should create PR
+            should_create = False
+            
+            if self.config['pr_creation'].get('frequency') == 'smart':
+                # Smart mode: create if trigger fired
+                should_create = trigger_fired
+            else:
+                # Regular mode: use existing schedule logic
+                should_create = self.should_create_pr()
+            
+            # Create PR if conditions met
+            if should_create:
+                if ai_response and (ai_response.get('response') or ai_response.get('suggestions')):
+                    logger.info("Step 3: Creating PR with AI suggestions...")
+                    if self.create_gitea_pr(ai_response):
+                        logger.info("✓ PR created successfully")
+                        if trigger_fired:
+                            logger.info("   Reason: Smart trigger fired")
+                    else:
+                        logger.warning("Failed to create Gitea PR")
+                else:
+                    logger.info("No actionable suggestions from AI analysis")
+            else:
+                logger.info("No triggers met - analysis saved for future use")
+                
+        except Exception as e:
+            logger.error(f"Error in analysis cycle: {e}")
+        
+        logger.info("="*60)
+
+    def main_loop(self):
+        """Main orchestrator loop"""
+        logger.info("Starting Network AI Orchestrator")
+
+        # Setup Elasticsearch connection
+        if not self.setup_elasticsearch():
+            logger.warning("Running without Elasticsearch connection - using mock data")
+
+        interval = self.config['analysis'].get('interval_minutes', 60) * 60
+
+        # Run first analysis immediately
+        self.run_analysis_cycle()
+
+        while self.running:
+            try:
+                logger.info(f"Next analysis scheduled in {interval/60} minutes")
+                logger.info(f"Next run at: {(datetime.now() + timedelta(seconds=interval)).strftime('%H:%M:%S')}")
+                
+                # MODIFIED: Check PR status every 15 minutes during the wait
+                for i in range(int(interval / 60)):  # Check every minute
+                    if not self.running:
+                        break
+                    
+                    time.sleep(60)
+                    
+                    # Check PR status every 15 minutes
+                    if i % 15 == 14 and self.state.get('pending_pr'):
+                        logger.info("Checking PR status...")
+                        self.check_pr_status()
+
+                if self.running:  # Check again after sleep
+                    self.run_analysis_cycle()
+
+            except KeyboardInterrupt:
+                logger.info("Received keyboard interrupt")
+                break
+            except Exception as e:
+                logger.error(f"Error in main loop: {e}")
+                time.sleep(60)  # Wait before retry
+
+        logger.info("Orchestrator shutdown complete")
+
+    def shutdown(self, signum=None, frame=None):
+        """Graceful shutdown handler"""
+        if signum:
+            logger.info(f"Received signal {signum}, initiating shutdown...")
+        else:
+            logger.info("Initiating shutdown...")
+        self.running = False
+        if self.es_client:
+            try:
+                # Close Elasticsearch connection
+                self.es_client.transport.close()
+            except:
+                pass
+
+def main():
+    """Main entry point"""
+    orchestrator = NetworkOrchestrator()
+
+    # Set up signal handlers
+    signal.signal(signal.SIGTERM, orchestrator.shutdown)
+    signal.signal(signal.SIGINT, orchestrator.shutdown)
+
+    try:
+        orchestrator.main_loop()
+    except Exception as e:
+        logger.error(f"Fatal error: {e}", exc_info=True)
+        sys.exit(1)
+
+    sys.exit(0)
+
+if __name__ == "__main__":
+    main()
--- a/scripts/orchestrator/core/requirements.txt
+++ b/scripts/orchestrator/core/requirements.txt
@@ -0,0 +1,30 @@
+aiofiles==24.1.0
+bcrypt==4.3.0
+blinker==1.9.0
+certifi==2025.8.3
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.1
+cryptography==45.0.5
+elastic-transport==9.1.0
+elasticsearch==7.17.9
+Flask==3.1.1
+gitdb==4.0.12
+GitPython==3.1.45
+idna==3.10
+itsdangerous==2.2.0
+Jinja2==3.1.6
+MarkupSafe==3.0.2
+paramiko==3.5.1
+pycparser==2.22
+PyNaCl==1.5.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+PyYAML==6.0.2
+requests==2.32.4
+six==1.17.0
+smmap==5.0.2
+tabulate==0.9.0
+typing_extensions==4.14.1
+urllib3==1.26.20
+Werkzeug==3.1.3