==============================================
Data Directory Contents Dump
Generated on: Mon Jul 14 03:04:50 UTC 2025
Data Directory: /home/ubuntu/repos/sre-agent/backend/data
==============================================

Files to be processed:
=====================
  - data/k8s_data/deployments.json
  - data/k8s_data/events.json
  - data/k8s_data/nodes.json
  - data/k8s_data/pods.json
  - data/k8s_data/resource_usage.json
  - data/k8s_data/services.json
  - data/logs_data/access.log
  - data/logs_data/application.log
  - data/logs_data/error.log
  - data/logs_data/log_counts.json
  - data/logs_data/log_patterns.json
  - data/logs_data/performance.log
  - data/logs_data/security.log
  - data/metrics_data/availability.json
  - data/metrics_data/error_rates.json
  - data/metrics_data/resource_usage.json
  - data/metrics_data/response_times.json
  - data/metrics_data/throughput.json
  - data/metrics_data/trends.json
  - data/runbooks_data/common_resolutions.json
  - data/runbooks_data/escalation_procedures.json
  - data/runbooks_data/incident_playbooks.json
  - data/runbooks_data/service_recovery.json
  - data/runbooks_data/troubleshooting_guides.json


Processing: data/k8s_data/deployments.json
==================================================
FILE: data/k8s_data/deployments.json
==================================================
{
  "deployments": [
    {
      "name": "web-app-deployment",
      "namespace": "production",
      "replicas": 3,
      "available_replicas": 2,
      "unavailable_replicas": 1,
      "status": "Degraded",
      "created_at": "2024-01-10T00:00:00Z",
      "updated_at": "2024-01-15T10:00:00Z",
      "strategy": "RollingUpdate",
      "conditions": [
        {
          "type": "Progressing",
          "status": "True",
          "reason": "ReplicaSetUpdated",
          "message": "ReplicaSet 'web-app-deployment-5c8d7f9b6d' is progressing."
        },
        {
          "type": "Available",
          "status": "False",
          "reason": "MinimumReplicasUnavailable",
          "message": "Deployment does not have minimum availability."
        }
      ]
    },
    {
      "name": "database-deployment",
      "namespace": "production",
      "replicas": 1,
      "available_replicas": 0,
      "unavailable_replicas": 1,
      "status": "Failed",
      "created_at": "2024-01-01T00:00:00Z",
      "updated_at": "2024-01-15T09:00:00Z",
      "strategy": "Recreate",
      "conditions": [
        {
          "type": "Progressing",
          "status": "False",
          "reason": "ProgressDeadlineExceeded",
          "message": "Deployment 'database-deployment' has exceeded the progression deadline."
        }
      ]
    },
    {
      "name": "api-service-deployment",
      "namespace": "production",
      "replicas": 5,
      "available_replicas": 5,
      "unavailable_replicas": 0,
      "status": "Healthy",
      "created_at": "2024-01-05T00:00:00Z",
      "updated_at": "2024-01-15T08:00:00Z",
      "strategy": "RollingUpdate",
      "conditions": [
        {
          "type": "Available",
          "status": "True",
          "reason": "MinimumReplicasAvailable",
          "message": "Deployment has minimum availability."
        },
        {
          "type": "Progressing",
          "status": "True",
          "reason": "NewReplicaSetAvailable",
          "message": "ReplicaSet 'api-service-deployment-8d9e2f1b3c' has successfully progressed."
        }
      ]
    }
  ]
}

Processing: data/k8s_data/events.json
==================================================
FILE: data/k8s_data/events.json
==================================================
{
  "events": [
    {
      "type": "Warning",
      "reason": "FailedScheduling",
      "object": "pod/web-app-deployment-5c8d7f9b6d-k2n8p",
      "message": "0/3 nodes are available: 3 Insufficient memory",
      "timestamp": "2024-01-15T14:20:00Z",
      "namespace": "production",
      "count": 5
    },
    {
      "type": "Error",
      "reason": "BackOffStart",
      "object": "pod/database-pod-7b9c4d8f2a-x5m1q",
      "message": "Back-off restarting failed container",
      "timestamp": "2024-01-15T14:25:00Z",
      "namespace": "production",
      "count": 15
    },
    {
      "type": "Warning",
      "reason": "Unhealthy",
      "object": "pod/web-app-deployment-5c8d7f9b6d-k2n8p",
      "message": "Readiness probe failed: HTTP probe failed with statuscode: 503",
      "timestamp": "2024-01-15T14:22:00Z",
      "namespace": "production",
      "count": 3
    },
    {
      "type": "Normal",
      "reason": "ScalingReplicaSet",
      "object": "deployment/api-service-deployment",
      "message": "Scaled up replica set api-service-deployment-8d9e2f1b3c to 5",
      "timestamp": "2024-01-15T08:00:00Z",
      "namespace": "production",
      "count": 1
    },
    {
      "type": "Error",
      "reason": "FailedMount",
      "object": "pod/database-pod-7b9c4d8f2a-x5m1q",
      "message": "Unable to attach or mount volumes: unmounted volumes=[data], unattached volumes=[data]",
      "timestamp": "2024-01-15T14:24:00Z",
      "namespace": "production",
      "count": 10
    },
    {
      "type": "Error",
      "reason": "CrashLoopBackOff",
      "object": "pod/database-pod-7b9c4d8f2a-x5m1q",
      "message": "Back-off 5m0s restarting failed container=database pod=database-pod-7b9c4d8f2a-x5m1q_production",
      "timestamp": "2024-01-15T14:26:00Z",
      "namespace": "production",
      "count": 15
    },
    {
      "type": "Warning",
      "reason": "Failed",
      "object": "pod/database-pod-7b9c4d8f2a-x5m1q",
      "message": "Error: couldn't find key config/database.conf in ConfigMap production/database-config",
      "timestamp": "2024-01-15T14:23:00Z",
      "namespace": "production",
      "count": 15
    },
    {
      "type": "Warning",
      "reason": "FailedPreStopHook",
      "object": "pod/database-pod-7b9c4d8f2a-x5m1q",
      "message": "PreStopHook failed: exec [/bin/sh -c pg_ctl stop -D /var/lib/postgresql/data -m fast] failed: exit status 1",
      "timestamp": "2024-01-15T14:22:30Z",
      "namespace": "production",
      "count": 12
    }
  ]
}

Processing: data/k8s_data/nodes.json
==================================================
FILE: data/k8s_data/nodes.json
==================================================
{
  "nodes": [
    {
      "name": "node-1",
      "status": "Ready",
      "roles": ["master", "worker"],
      "created_at": "2023-12-01T00:00:00Z",
      "capacity": {
        "cpu": "4",
        "memory": "8Gi",
        "pods": "110"
      },
      "allocatable": {
        "cpu": "3.5",
        "memory": "7Gi",
        "pods": "100"
      },
      "usage": {
        "cpu": "2.5",
        "memory": "6Gi",
        "pods": "45"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "message": "kubelet is posting ready status"
        },
        {
          "type": "MemoryPressure",
          "status": "False"
        },
        {
          "type": "DiskPressure",
          "status": "False"
        }
      ]
    },
    {
      "name": "node-2",
      "status": "Ready",
      "roles": ["worker"],
      "created_at": "2023-12-01T00:00:00Z",
      "capacity": {
        "cpu": "4",
        "memory": "8Gi",
        "pods": "110"
      },
      "allocatable": {
        "cpu": "3.5",
        "memory": "7Gi",
        "pods": "100"
      },
      "usage": {
        "cpu": "1.0",
        "memory": "2Gi",
        "pods": "20"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "message": "kubelet is posting ready status"
        },
        {
          "type": "MemoryPressure",
          "status": "False"
        },
        {
          "type": "DiskPressure",
          "status": "False"
        }
      ]
    },
    {
      "name": "node-3",
      "status": "Ready",
      "roles": ["worker"],
      "created_at": "2023-12-01T00:00:00Z",
      "capacity": {
        "cpu": "4",
        "memory": "8Gi",
        "pods": "110"
      },
      "allocatable": {
        "cpu": "3.5",
        "memory": "7Gi",
        "pods": "100"
      },
      "usage": {
        "cpu": "3.0",
        "memory": "5Gi",
        "pods": "60"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "message": "kubelet is posting ready status"
        },
        {
          "type": "MemoryPressure",
          "status": "True",
          "message": "Node memory usage is above 85%"
        },
        {
          "type": "DiskPressure",
          "status": "False"
        }
      ]
    }
  ]
}

Processing: data/k8s_data/pods.json
==================================================
FILE: data/k8s_data/pods.json
==================================================
{
  "pods": [
    {
      "name": "web-app-deployment-5c8d7f9b6d-k2n8p",
      "namespace": "production",
      "status": "Running",
      "phase": "Running",
      "node": "node-1",
      "created_at": "2024-01-15T10:30:00Z",
      "resource_usage": {
        "cpu": "250m",
        "memory": "512Mi",
        "cpu_utilization": "75%",
        "memory_utilization": "85%"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "last_transition_time": "2024-01-15T10:31:00Z"
        }
      ],
      "events": [
        {
          "type": "Warning",
          "reason": "MemoryPressure",
          "message": "Pod memory usage is approaching limits",
          "timestamp": "2024-01-15T14:25:00Z"
        }
      ]
    },
    {
      "name": "database-pod-7b9c4d8f2a-x5m1q",
      "namespace": "production",
      "status": "CrashLoopBackOff",
      "phase": "Failed",
      "node": "node-2",
      "created_at": "2024-01-15T09:15:00Z",
      "resource_usage": {
        "cpu": "0",
        "memory": "0",
        "cpu_utilization": "0%",
        "memory_utilization": "0%"
      },
      "restart_count": 15,
      "conditions": [
        {
          "type": "Ready",
          "status": "False",
          "last_transition_time": "2024-01-15T14:20:00Z"
        }
      ]
    },
    {
      "name": "api-service-8d9e2f1b3c-p7q2r",
      "namespace": "production",
      "status": "Running",
      "phase": "Running",
      "node": "node-3",
      "created_at": "2024-01-15T08:00:00Z",
      "resource_usage": {
        "cpu": "150m",
        "memory": "256Mi",
        "cpu_utilization": "45%",
        "memory_utilization": "60%"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "last_transition_time": "2024-01-15T08:01:00Z"
        }
      ]
    },
    {
      "name": "product-catalog-service-6f7a8b9c2d-h4k3m",
      "namespace": "production",
      "status": "Running",
      "phase": "Running",
      "node": "node-1",
      "created_at": "2024-01-14T06:00:00Z",
      "resource_usage": {
        "cpu": "100m",
        "memory": "256Mi",
        "cpu_utilization": "25%",
        "memory_utilization": "40%"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "last_transition_time": "2024-01-14T06:01:00Z"
        },
        {
          "type": "ContainersReady",
          "status": "True",
          "last_transition_time": "2024-01-14T06:01:00Z"
        },
        {
          "type": "PodScheduled",
          "status": "True",
          "last_transition_time": "2024-01-14T06:00:30Z"
        }
      ],
      "events": [
        {
          "type": "Normal",
          "reason": "Started",
          "message": "Container started successfully",
          "timestamp": "2024-01-14T06:01:00Z"
        },
        {
          "type": "Normal",
          "reason": "Pulled",
          "message": "Container image pulled successfully",
          "timestamp": "2024-01-14T06:00:45Z"
        }
      ]
    },
    {
      "name": "product-catalog-service-6f7a8b9c2d-m8n2p",
      "namespace": "production",
      "status": "Running",
      "phase": "Running",
      "node": "node-2",
      "created_at": "2024-01-14T06:00:00Z",
      "resource_usage": {
        "cpu": "95m",
        "memory": "248Mi",
        "cpu_utilization": "24%",
        "memory_utilization": "39%"
      },
      "conditions": [
        {
          "type": "Ready",
          "status": "True",
          "last_transition_time": "2024-01-14T06:01:00Z"
        },
        {
          "type": "ContainersReady",
          "status": "True",
          "last_transition_time": "2024-01-14T06:01:00Z"
        },
        {
          "type": "PodScheduled",
          "status": "True",
          "last_transition_time": "2024-01-14T06:00:30Z"
        }
      ],
      "events": [
        {
          "type": "Normal",
          "reason": "Started",
          "message": "Container started successfully",
          "timestamp": "2024-01-14T06:01:00Z"
        }
      ]
    }
  ]
}

Processing: data/k8s_data/resource_usage.json
==================================================
FILE: data/k8s_data/resource_usage.json
==================================================
{
  "resource_usage": {
    "timestamp": "2024-01-15T14:30:00Z",
    "cluster": {
      "cpu": {
        "total": "12",
        "used": "6.5",
        "percentage": 54.2
      },
      "memory": {
        "total": "24Gi",
        "used": "13Gi",
        "percentage": 54.2
      },
      "pods": {
        "total": 300,
        "used": 125,
        "percentage": 41.7
      }
    },
    "namespace_usage": {
      "production": {
        "cpu": "4.5",
        "memory": "10Gi",
        "pods": 85
      },
      "staging": {
        "cpu": "1.5",
        "memory": "2.5Gi",
        "pods": 30
      },
      "default": {
        "cpu": "0.5",
        "memory": "0.5Gi",
        "pods": 10
      }
    },
    "top_consumers": [
      {
        "pod": "web-app-deployment-5c8d7f9b6d-k2n8p",
        "namespace": "production",
        "cpu": "250m",
        "memory": "512Mi"
      },
      {
        "pod": "api-service-deployment-8d9e2f1b3c-p7q2r",
        "namespace": "production",
        "cpu": "150m",
        "memory": "256Mi"
      }
    ]
  }
}

Processing: data/k8s_data/services.json
==================================================
FILE: data/k8s_data/services.json
==================================================
{
  "services": [
    {
      "name": "web-app-service",
      "namespace": "production",
      "type": "LoadBalancer",
      "cluster_ip": "10.0.0.100",
      "external_ip": "203.0.113.10",
      "ports": [
        {
          "name": "http",
          "port": 80,
          "target_port": 8080,
          "protocol": "TCP"
        }
      ],
      "selector": {
        "app": "web-app"
      },
      "endpoints": [
        "10.1.1.10:8080",
        "10.1.1.11:8080"
      ],
      "status": "Active"
    },
    {
      "name": "database-service",
      "namespace": "production",
      "type": "ClusterIP",
      "cluster_ip": "10.0.0.101",
      "ports": [
        {
          "name": "postgres",
          "port": 5432,
          "target_port": 5432,
          "protocol": "TCP"
        }
      ],
      "selector": {
        "app": "database"
      },
      "endpoints": [],
      "status": "No endpoints"
    },
    {
      "name": "api-service",
      "namespace": "production",
      "type": "NodePort",
      "cluster_ip": "10.0.0.102",
      "ports": [
        {
          "name": "api",
          "port": 8000,
          "target_port": 8000,
          "node_port": 30080,
          "protocol": "TCP"
        }
      ],
      "selector": {
        "app": "api-service"
      },
      "endpoints": [
        "10.1.1.20:8000",
        "10.1.1.21:8000",
        "10.1.1.22:8000",
        "10.1.1.23:8000",
        "10.1.1.24:8000"
      ],
      "status": "Active"
    },
    {
      "name": "product-catalog-service",
      "namespace": "production",
      "type": "ClusterIP",
      "cluster_ip": "10.0.0.150",
      "ports": [
        {
          "name": "http",
          "port": 8080,
          "target_port": 8080,
          "protocol": "TCP"
        },
        {
          "name": "grpc",
          "port": 9090,
          "target_port": 9090,
          "protocol": "TCP"
        }
      ],
      "selector": {
        "app": "product-catalog-service"
      },
      "endpoints": [
        "10.1.1.50:8080",
        "10.1.1.51:8080"
      ],
      "status": "Active",
      "load_balancer": {
        "algorithm": "round_robin",
        "health_check": {
          "path": "/health",
          "interval": "10s",
          "timeout": "5s",
          "healthy_threshold": 2,
          "unhealthy_threshold": 3
        }
      }
    }
  ]
}

Processing: data/logs_data/access.log
==================================================
FILE: data/logs_data/access.log
==================================================
192.168.1.100 - - [15/Jan/2024:14:21:00 +0000] "GET /api/users HTTP/1.1" 200 1542 333
192.168.1.101 - - [15/Jan/2024:14:21:15 +0000] "POST /api/users HTTP/1.1" 201 245 125
192.168.1.102 - - [15/Jan/2024:14:21:30 +0000] "GET /api/products HTTP/1.1" 200 3456 445
192.168.1.103 - - [15/Jan/2024:14:22:00 +0000] "GET /api/users/123 HTTP/1.1" 200 512 1250
192.168.1.104 - - [15/Jan/2024:14:22:15 +0000] "PUT /api/users/123 HTTP/1.1" 200 512 1350
192.168.1.105 - - [15/Jan/2024:14:22:30 +0000] "GET /api/orders HTTP/1.1" 200 4567 1450
192.168.1.106 - - [15/Jan/2024:14:23:00 +0000] "GET /api/users/bulk HTTP/1.1" 500 145 2500
192.168.1.107 - - [15/Jan/2024:14:23:15 +0000] "GET /api/health HTTP/1.1" 503 89 50
192.168.1.108 - - [15/Jan/2024:14:23:30 +0000] "GET /api/users/789 HTTP/1.1" 500 145 5000
192.168.1.109 - - [15/Jan/2024:14:23:45 +0000] "GET /api/users/790 HTTP/1.1" 500 145 5000
192.168.1.110 - - [15/Jan/2024:14:24:00 +0000] "GET /api/status HTTP/1.1" 503 89 25
192.168.1.111 - - [15/Jan/2024:14:24:15 +0000] "GET /api/health HTTP/1.1" 503 89 20
192.168.1.112 - - [15/Jan/2024:14:24:30 +0000] "GET /api/users/bulk HTTP/1.1" 500 145 100
192.168.1.113 - - [15/Jan/2024:14:25:00 +0000] "GET /api/health HTTP/1.1" 503 89 15

Processing: data/logs_data/application.log
==================================================
FILE: data/logs_data/application.log
==================================================
2024-01-15T14:20:15.123Z [INFO] web-service Starting application server on port 8080
2024-01-15T14:20:16.456Z [INFO] web-service Database connection pool initialized with 10 connections
2024-01-15T14:20:17.789Z [INFO] web-service Health check endpoint registered at /health
2024-01-15T14:21:00.123Z [INFO] web-service Processing request from 192.168.1.100 - GET /api/users
2024-01-15T14:21:00.456Z [INFO] web-service Request completed in 333ms - Status: 200
2024-01-15T14:22:15.789Z [WARN] web-service Slow query detected: SELECT * FROM users WHERE status='active' - Duration: 1250ms
2024-01-15T14:23:45.234Z [WARN] web-service Database connection pool exhausted, waiting for available connection
2024-01-15T14:23:46.567Z [ERROR] web-service Database connection timeout after 5000ms
2024-01-15T14:23:47.890Z [ERROR] web-service Failed to process request: java.sql.SQLException: Connection timed out
2024-01-15T14:24:00.123Z [INFO] web-service Circuit breaker activated for database service
2024-01-15T14:24:15.456Z [WARN] web-service Memory usage at 82% - consider scaling up
2024-01-15T14:24:30.789Z [ERROR] web-service java.lang.OutOfMemoryError: Java heap space at UserService.loadAllUsers(UserService.java:45)
2024-01-15T14:25:10.123Z [ERROR] web-service OutOfMemoryError: Java heap space
2024-01-15T14:25:11.456Z [ERROR] web-service Application shutting down due to critical error
2024-01-15T14:25:12.789Z [INFO] web-service Graceful shutdown initiated
2024-01-15T14:25:13.123Z [INFO] web-service All active connections closed
2024-01-15T14:25:14.456Z [INFO] web-service Application terminated
2024-01-15T14:22:00.111Z [ERROR] api-service Failed to connect to database: Connection refused (Connection refused)
2024-01-15T14:22:00.222Z [ERROR] api-service Database pod database-pod-7b9c4d8f2a-x5m1q is not responding
2024-01-15T14:22:00.333Z [WARN] api-service Attempting database connection retry 1 of 3
2024-01-15T14:22:05.444Z [ERROR] api-service Database connection failed after all retries
2024-01-15T14:22:05.555Z [ERROR] api-service Service degraded: Unable to serve data requests
2024-01-15T14:23:00.666Z [ERROR] web-service Database health check failed: no response from database-pod-7b9c4d8f2a-x5m1q
2024-01-15T14:23:00.777Z [WARN] web-service Switching to cache-only mode due to database unavailability
2024-01-15T14:23:00.888Z [ERROR] web-service User authentication failed: Cannot verify credentials without database
2024-01-15T14:24:00.999Z [CRITICAL] web-service Database has been unavailable for 120 seconds
2024-01-15T14:24:01.111Z [INFO] web-service Triggering database incident alert
2024-01-15T14:25:00.222Z [ERROR] api-service All database-dependent endpoints returning 503 Service Unavailable
2024-01-15T14:18:00.123Z [INFO] product-catalog-service Starting product catalog service on port 8080
2024-01-15T14:18:00.234Z [INFO] product-catalog-service Cache layer initialized successfully with Redis cluster
2024-01-15T14:18:00.345Z [INFO] product-catalog-service Database connection pool initialized with 20 connections
2024-01-15T14:18:00.456Z [INFO] product-catalog-service Health check endpoint registered at /health
2024-01-15T14:18:00.567Z [INFO] product-catalog-service gRPC server started on port 9090
2024-01-15T14:18:00.678Z [INFO] product-catalog-service Product index loaded successfully - 2,500,000 products
2024-01-15T14:18:00.789Z [INFO] product-catalog-service Service ready to accept traffic
2024-01-15T14:19:00.123Z [INFO] product-catalog-service Processing request: GET /products/search?q=laptop - 25ms
2024-01-15T14:19:05.234Z [INFO] product-catalog-service Processing request: GET /products/category/electronics - 18ms
2024-01-15T14:19:10.345Z [INFO] product-catalog-service Processing request: GET /products/12345 - 12ms
2024-01-15T14:19:15.456Z [INFO] product-catalog-service Cache hit rate: 94.2% - excellent performance
2024-01-15T14:19:20.567Z [INFO] product-catalog-service Processing request: GET /products/recommendations/user/67890 - 22ms
2024-01-15T14:19:25.678Z [INFO] product-catalog-service Health check passed - all systems operational
2024-01-15T14:19:30.789Z [INFO] product-catalog-service Processing request: GET /products/search?q=shoes&category=footwear - 16ms
2024-01-15T14:19:35.123Z [INFO] product-catalog-service Product metadata sync completed successfully
2024-01-15T14:19:40.234Z [INFO] product-catalog-service Processing request: GET /products/deals/daily - 20ms
2024-01-15T14:19:45.345Z [INFO] product-catalog-service Background indexing completed - 50,000 new products indexed
2024-01-15T14:19:50.456Z [INFO] product-catalog-service Processing request: GET /products/brands/nike - 14ms
2024-01-15T14:19:55.567Z [INFO] product-catalog-service Memory usage optimal at 38% - well within limits
2024-01-15T14:20:00.678Z [INFO] product-catalog-service Processing request: GET /products/reviews/98765 - 19ms

Processing: data/logs_data/error.log
==================================================
FILE: data/logs_data/error.log
==================================================
[
  {
    "timestamp": "2024-01-15T14:23:46.567Z",
    "level": "ERROR",
    "service": "web-service",
    "message": "Database connection timeout after 5000ms",
    "stack_trace": "java.sql.SQLException: Connection timed out\n\tat com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:183)\n\tat com.example.DatabasePool.getConnection(DatabasePool.java:45)\n\tat com.example.UserService.getUser(UserService.java:23)\n\tat com.example.UserController.getUser(UserController.java:67)",
    "correlation_id": "req-123456",
    "user_id": "user-789",
    "endpoint": "/api/users/789"
  },
  {
    "timestamp": "2024-01-15T14:23:47.890Z",
    "level": "ERROR",
    "service": "web-service",
    "message": "Failed to process request: java.sql.SQLException: Connection timed out",
    "stack_trace": "java.sql.SQLException: Connection timed out\n\tat com.example.UserService.getUser(UserService.java:25)\n\tat com.example.UserController.getUser(UserController.java:67)",
    "correlation_id": "req-123457",
    "user_id": "user-790",
    "endpoint": "/api/users/790"
  },
  {
    "timestamp": "2024-01-15T14:24:30.789Z",
    "level": "ERROR",
    "service": "web-service",
    "message": "java.lang.OutOfMemoryError: Java heap space",
    "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n\tat java.util.Arrays.copyOf(Arrays.java:3210)\n\tat java.util.Arrays.copyOf(Arrays.java:3181)\n\tat java.util.ArrayList.grow(ArrayList.java:237)\n\tat java.util.ArrayList.ensureCapacity(ArrayList.java:218)\n\tat com.example.UserService.loadAllUsers(UserService.java:45)",
    "correlation_id": "req-123458",
    "endpoint": "/api/users/bulk"
  },
  {
    "timestamp": "2024-01-15T14:25:10.123Z",
    "level": "ERROR",
    "service": "web-service",
    "message": "OutOfMemoryError: Java heap space",
    "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n\tat java.util.ArrayList.grow(ArrayList.java:237)\n\tat java.util.ArrayList.ensureCapacity(ArrayList.java:218)",
    "correlation_id": "req-789012"
  },
  {
    "timestamp": "2024-01-15T14:25:11.456Z",
    "level": "ERROR",
    "service": "web-service",
    "message": "Application shutting down due to critical error",
    "error_type": "CRITICAL_FAILURE",
    "shutdown_reason": "OutOfMemoryError"
  },
  {
    "timestamp": "2024-01-15T14:22:30.123Z",
    "level": "ERROR",
    "service": "database",
    "pod": "database-pod-7b9c4d8f2a-x5m1q",
    "message": "FATAL: could not open configuration file '/etc/postgresql/database.conf': No such file or directory",
    "container": "postgres",
    "namespace": "production"
  },
  {
    "timestamp": "2024-01-15T14:22:31.456Z",
    "level": "ERROR",
    "service": "database",
    "pod": "database-pod-7b9c4d8f2a-x5m1q",
    "message": "PostgreSQL init process failed: missing required configuration",
    "container": "postgres",
    "namespace": "production",
    "exit_code": 1
  },
  {
    "timestamp": "2024-01-15T14:23:00.789Z",
    "level": "ERROR",
    "service": "database",
    "pod": "database-pod-7b9c4d8f2a-x5m1q",
    "message": "FATAL: data directory '/var/lib/postgresql/data' has invalid permissions",
    "details": "Permission denied: data directory must be owned by the postgres user",
    "container": "postgres",
    "namespace": "production"
  },
  {
    "timestamp": "2024-01-15T14:23:30.012Z",
    "level": "ERROR",
    "service": "database",
    "pod": "database-pod-7b9c4d8f2a-x5m1q",
    "message": "ERROR: ConfigMap 'database-config' not found in namespace 'production'",
    "container": "postgres",
    "namespace": "production",
    "config_path": "/etc/postgresql/database.conf"
  },
  {
    "timestamp": "2024-01-15T14:24:30.345Z",
    "level": "ERROR",
    "service": "database",
    "pod": "database-pod-7b9c4d8f2a-x5m1q",
    "message": "Container postgres failed liveness probe, will be restarted",
    "probe_type": "liveness",
    "probe_result": "connection refused on port 5432",
    "container": "postgres",
    "namespace": "production"
  }
]

Processing: data/logs_data/log_counts.json
==================================================
FILE: data/logs_data/log_counts.json
==================================================
{
  "error_counts": {
    "total_count": 45,
    "by_service": [
      {"group": "web-service", "count": 35, "percentage": 77.8},
      {"group": "api-service", "count": 8, "percentage": 17.8},
      {"group": "database-service", "count": 2, "percentage": 4.4}
    ],
    "by_level": [
      {"group": "ERROR", "count": 30, "percentage": 66.7},
      {"group": "CRITICAL", "count": 15, "percentage": 33.3}
    ]
  },
  "all_counts": {
    "total_count": 150,
    "by_level": [
      {"group": "INFO", "count": 100, "percentage": 66.7},
      {"group": "WARN", "count": 30, "percentage": 20.0},
      {"group": "ERROR", "count": 20, "percentage": 13.3}
    ]
  }
}

Processing: data/logs_data/log_patterns.json
==================================================
FILE: data/logs_data/log_patterns.json
==================================================
{
  "patterns": [
    {
      "pattern": "Database connection timeout",
      "count": 15,
      "first_seen": "2024-01-15T14:23:46.567Z",
      "last_seen": "2024-01-15T14:24:30.789Z",
      "severity": "ERROR",
      "occurrences": [
        {
          "timestamp": "2024-01-15T14:23:46.567Z",
          "service": "web-service",
          "message": "Database connection timeout after 5000ms"
        },
        {
          "timestamp": "2024-01-15T14:23:47.890Z",
          "service": "web-service", 
          "message": "Failed to process request: java.sql.SQLException: Connection timed out"
        }
      ]
    },
    {
      "pattern": "OutOfMemoryError",
      "count": 8,
      "first_seen": "2024-01-15T14:24:30.789Z",
      "last_seen": "2024-01-15T14:25:10.123Z",
      "severity": "CRITICAL",
      "occurrences": [
        {
          "timestamp": "2024-01-15T14:24:30.789Z",
          "service": "web-service",
          "message": "java.lang.OutOfMemoryError: Java heap space at UserService.loadAllUsers(UserService.java:45)"
        },
        {
          "timestamp": "2024-01-15T14:25:10.123Z",
          "service": "web-service",
          "message": "OutOfMemoryError: Java heap space"
        }
      ]
    },
    {
      "pattern": "Slow query detected",
      "count": 25,
      "first_seen": "2024-01-15T14:22:15.789Z",
      "last_seen": "2024-01-15T14:23:45.234Z",
      "severity": "WARN",
      "occurrences": [
        {
          "timestamp": "2024-01-15T14:22:15.789Z",
          "service": "web-service",
          "message": "Slow query detected: SELECT * FROM users WHERE status='active' - Duration: 1250ms"
        }
      ]
    }
  ]
}

Processing: data/logs_data/performance.log
==================================================
FILE: data/logs_data/performance.log
==================================================
{
  "performance_logs": [
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "method": "GET",
      "response_time_ms": 150,
      "db_query_time_ms": 50,
      "cache_hit": true,
      "cpu_usage_percent": 25,
      "memory_usage_mb": 512
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "method": "GET",
      "response_time_ms": 333,
      "db_query_time_ms": 250,
      "cache_hit": false,
      "cpu_usage_percent": 35,
      "memory_usage_mb": 650
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "web-service",
      "endpoint": "/api/users/123",
      "method": "GET",
      "response_time_ms": 1250,
      "db_query_time_ms": 1200,
      "cache_hit": false,
      "cpu_usage_percent": 55,
      "memory_usage_mb": 780,
      "slow_query": true
    },
    {
      "timestamp": "2024-01-15T14:23:00Z",
      "service": "web-service",
      "endpoint": "/api/users/bulk",
      "method": "GET",
      "response_time_ms": 2500,
      "db_query_time_ms": 2400,
      "cache_hit": false,
      "cpu_usage_percent": 75,
      "memory_usage_mb": 920,
      "slow_query": true,
      "warning": "High memory usage detected"
    },
    {
      "timestamp": "2024-01-15T14:24:00Z",
      "service": "web-service",
      "endpoint": "/api/users/bulk",
      "method": "GET",
      "response_time_ms": 100,
      "error": "OutOfMemoryError",
      "cpu_usage_percent": 95,
      "memory_usage_mb": 1024,
      "critical": true
    }
  ]
}

Processing: data/logs_data/security.log
==================================================
FILE: data/logs_data/security.log
==================================================
{
  "security_events": [
    {
      "timestamp": "2024-01-15T14:15:00Z",
      "event_type": "AUTH_SUCCESS",
      "user_id": "user-123",
      "ip_address": "192.168.1.100",
      "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
      "endpoint": "/api/login"
    },
    {
      "timestamp": "2024-01-15T14:16:00Z",
      "event_type": "AUTH_FAILURE",
      "user_id": "user-456",
      "ip_address": "192.168.1.200",
      "user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
      "endpoint": "/api/login",
      "reason": "Invalid credentials",
      "attempts": 3
    },
    {
      "timestamp": "2024-01-15T14:17:00Z",
      "event_type": "RATE_LIMIT_EXCEEDED",
      "ip_address": "192.168.1.300",
      "endpoint": "/api/users",
      "requests_per_minute": 150,
      "limit": 100
    },
    {
      "timestamp": "2024-01-15T14:18:00Z",
      "event_type": "SUSPICIOUS_ACTIVITY",
      "ip_address": "192.168.1.400",
      "pattern": "SQL_INJECTION_ATTEMPT",
      "payload": "'; DROP TABLE users; --",
      "endpoint": "/api/search",
      "blocked": true
    },
    {
      "timestamp": "2024-01-15T14:19:00Z",
      "event_type": "API_KEY_EXPIRED",
      "api_key_id": "key-789",
      "client_id": "client-xyz",
      "endpoint": "/api/data"
    }
  ]
}

Processing: data/metrics_data/availability.json
==================================================
FILE: data/metrics_data/availability.json
==================================================
{
  "availability_metrics": [
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "web-service",
      "uptime_seconds": 86400,
      "availability_percentage": 99.99,
      "health_check_success": 1440,
      "health_check_total": 1440,
      "last_downtime": "2024-01-14T02:00:00Z",
      "downtime_duration_seconds": 300
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "web-service",
      "uptime_seconds": 86460,
      "availability_percentage": 99.95,
      "health_check_success": 1439,
      "health_check_total": 1441,
      "last_downtime": "2024-01-14T02:00:00Z",
      "downtime_duration_seconds": 300
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "web-service",
      "uptime_seconds": 86520,
      "availability_percentage": 99.90,
      "health_check_success": 1438,
      "health_check_total": 1442,
      "last_downtime": "2024-01-14T02:00:00Z",
      "downtime_duration_seconds": 300
    },
    {
      "timestamp": "2024-01-15T14:23:00Z",
      "service": "web-service",
      "uptime_seconds": 86580,
      "availability_percentage": 99.50,
      "health_check_success": 1435,
      "health_check_total": 1443,
      "last_downtime": "2024-01-15T14:23:00Z",
      "downtime_duration_seconds": 0,
      "status": "degraded"
    },
    {
      "timestamp": "2024-01-15T14:24:00Z",
      "service": "web-service",
      "uptime_seconds": 86640,
      "availability_percentage": 99.00,
      "health_check_success": 1430,
      "health_check_total": 1444,
      "last_downtime": "2024-01-15T14:24:00Z",
      "downtime_duration_seconds": 60,
      "status": "critical"
    },
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "database",
      "uptime_seconds": 0,
      "availability_percentage": 0.0,
      "health_check_success": 0,
      "health_check_total": 1440,
      "last_downtime": "2024-01-15T09:15:00Z",
      "downtime_duration_seconds": 18300,
      "status": "critical",
      "pod": "database-pod-7b9c4d8f2a-x5m1q"
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "database",
      "uptime_seconds": 0,
      "availability_percentage": 0.0,
      "health_check_success": 0,
      "health_check_total": 1441,
      "last_downtime": "2024-01-15T09:15:00Z",
      "downtime_duration_seconds": 18360,
      "status": "critical",
      "pod": "database-pod-7b9c4d8f2a-x5m1q"
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "database",
      "uptime_seconds": 0,
      "availability_percentage": 0.0,
      "health_check_success": 0,
      "health_check_total": 1442,
      "last_downtime": "2024-01-15T09:15:00Z",
      "downtime_duration_seconds": 18420,
      "status": "critical",
      "restart_count": 15,
      "pod": "database-pod-7b9c4d8f2a-x5m1q"
    },
    {
      "timestamp": "2024-01-15T14:18:00Z",
      "service": "product-catalog-service",
      "uptime_seconds": 867600,
      "availability_percentage": 99.99,
      "health_check_success": 8676,
      "health_check_total": 8676,
      "last_downtime": "2024-01-05T03:00:00Z",
      "downtime_duration_seconds": 120,
      "status": "healthy",
      "pod": "product-catalog-service-6f7a8b9c2d-h4k3m"
    },
    {
      "timestamp": "2024-01-15T14:19:00Z",
      "service": "product-catalog-service",
      "uptime_seconds": 867660,
      "availability_percentage": 99.99,
      "health_check_success": 8677,
      "health_check_total": 8677,
      "last_downtime": "2024-01-05T03:00:00Z",
      "downtime_duration_seconds": 120,
      "status": "healthy",
      "pod": "product-catalog-service-6f7a8b9c2d-h4k3m"
    },
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "product-catalog-service",
      "uptime_seconds": 867720,
      "availability_percentage": 99.99,
      "health_check_success": 8678,
      "health_check_total": 8678,
      "last_downtime": "2024-01-05T03:00:00Z",
      "downtime_duration_seconds": 120,
      "status": "healthy",
      "pod": "product-catalog-service-6f7a8b9c2d-m8n2p"
    }
  ]
}

Processing: data/metrics_data/error_rates.json
==================================================
FILE: data/metrics_data/error_rates.json
==================================================
{
  "error_rates": [
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "web-service",
      "total_requests": 1000,
      "error_count": 5,
      "error_rate": 0.5,
      "status_codes": {
        "200": 950,
        "201": 20,
        "400": 20,
        "401": 5,
        "500": 3,
        "503": 2
      },
      "error_types": {
        "client_errors": 25,
        "server_errors": 5
      }
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "web-service",
      "total_requests": 900,
      "error_count": 45,
      "error_rate": 5.0,
      "status_codes": {
        "200": 820,
        "201": 15,
        "400": 20,
        "401": 5,
        "500": 30,
        "503": 10
      },
      "error_types": {
        "client_errors": 25,
        "server_errors": 40
      }
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "web-service",
      "total_requests": 700,
      "error_count": 140,
      "error_rate": 20.0,
      "status_codes": {
        "200": 500,
        "201": 10,
        "400": 50,
        "401": 10,
        "500": 100,
        "503": 30
      },
      "error_types": {
        "client_errors": 60,
        "server_errors": 130
      }
    },
    {
      "timestamp": "2024-01-15T14:23:00Z",
      "service": "web-service",
      "total_requests": 500,
      "error_count": 250,
      "error_rate": 50.0,
      "status_codes": {
        "200": 200,
        "201": 5,
        "400": 45,
        "401": 5,
        "500": 200,
        "503": 45
      },
      "error_types": {
        "client_errors": 50,
        "server_errors": 245
      }
    },
    {
      "timestamp": "2024-01-15T14:24:00Z",
      "service": "web-service",
      "total_requests": 200,
      "error_count": 150,
      "error_rate": 75.0,
      "status_codes": {
        "200": 40,
        "201": 2,
        "400": 8,
        "401": 2,
        "500": 120,
        "503": 28
      },
      "error_types": {
        "client_errors": 10,
        "server_errors": 148
      }
    },
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "database",
      "total_requests": 0,
      "error_count": 0,
      "error_rate": 100.0,
      "status_codes": {},
      "error_types": {
        "connection_refused": 1440,
        "startup_failures": 15
      },
      "pod": "database-pod-7b9c4d8f2a-x5m1q",
      "failure_reason": "ConfigMap not found"
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "database",
      "total_requests": 0,
      "error_count": 0,
      "error_rate": 100.0,
      "status_codes": {},
      "error_types": {
        "connection_refused": 1441,
        "startup_failures": 15
      },
      "pod": "database-pod-7b9c4d8f2a-x5m1q",
      "failure_reason": "ConfigMap not found"
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "database",
      "total_requests": 0,
      "error_count": 0,
      "error_rate": 100.0,
      "status_codes": {},
      "error_types": {
        "connection_refused": 1442,
        "startup_failures": 15
      },
      "pod": "database-pod-7b9c4d8f2a-x5m1q",
      "failure_reason": "Permission denied on data directory"
    },
    {
      "timestamp": "2024-01-15T14:18:00Z",
      "service": "product-catalog-service",
      "total_requests": 15000,
      "error_count": 3,
      "error_rate": 0.02,
      "status_codes": {
        "200": 14800,
        "201": 120,
        "202": 75,
        "400": 2,
        "404": 1,
        "500": 0,
        "503": 0
      },
      "error_types": {
        "client_errors": 3,
        "server_errors": 0
      }
    },
    {
      "timestamp": "2024-01-15T14:19:00Z",
      "service": "product-catalog-service",
      "total_requests": 16500,
      "error_count": 2,
      "error_rate": 0.01,
      "status_codes": {
        "200": 16200,
        "201": 180,
        "202": 115,
        "400": 1,
        "404": 1,
        "500": 0,
        "503": 0
      },
      "error_types": {
        "client_errors": 2,
        "server_errors": 0
      }
    },
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "product-catalog-service",
      "total_requests": 17200,
      "error_count": 1,
      "error_rate": 0.006,
      "status_codes": {
        "200": 16850,
        "201": 210,
        "202": 135,
        "400": 0,
        "404": 1,
        "500": 0,
        "503": 0
      },
      "error_types": {
        "client_errors": 1,
        "server_errors": 0
      }
    }
  ]
}

Processing: data/metrics_data/resource_usage.json
==================================================
FILE: data/metrics_data/resource_usage.json
==================================================
{
  "metrics": [
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "web-service",
      "cpu_usage_percent": 25,
      "memory_usage_mb": 512,
      "memory_usage_percent": 50,
      "disk_io_read_mb": 10,
      "disk_io_write_mb": 5,
      "network_in_mb": 20,
      "network_out_mb": 40,
      "thread_count": 50,
      "connection_pool_active": 5,
      "connection_pool_idle": 5
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "web-service",
      "cpu_usage_percent": 45,
      "memory_usage_mb": 650,
      "memory_usage_percent": 63,
      "disk_io_read_mb": 15,
      "disk_io_write_mb": 8,
      "network_in_mb": 25,
      "network_out_mb": 45,
      "thread_count": 65,
      "connection_pool_active": 8,
      "connection_pool_idle": 2
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "web-service",
      "cpu_usage_percent": 65,
      "memory_usage_mb": 780,
      "memory_usage_percent": 76,
      "disk_io_read_mb": 20,
      "disk_io_write_mb": 12,
      "network_in_mb": 30,
      "network_out_mb": 35,
      "thread_count": 80,
      "connection_pool_active": 10,
      "connection_pool_idle": 0
    },
    {
      "timestamp": "2024-01-15T14:23:00Z",
      "service": "web-service",
      "cpu_usage_percent": 85,
      "memory_usage_mb": 920,
      "memory_usage_percent": 90,
      "disk_io_read_mb": 25,
      "disk_io_write_mb": 15,
      "network_in_mb": 15,
      "network_out_mb": 20,
      "thread_count": 95,
      "connection_pool_active": 10,
      "connection_pool_idle": 0,
      "connection_pool_waiting": 15
    },
    {
      "timestamp": "2024-01-15T14:24:00Z",
      "service": "web-service",
      "cpu_usage_percent": 95,
      "memory_usage_mb": 1024,
      "memory_usage_percent": 100,
      "disk_io_read_mb": 5,
      "disk_io_write_mb": 2,
      "network_in_mb": 5,
      "network_out_mb": 5,
      "thread_count": 100,
      "connection_pool_active": 0,
      "connection_pool_idle": 0,
      "connection_pool_waiting": 50,
      "critical": true
    }
  ]
}

Processing: data/metrics_data/response_times.json
==================================================
FILE: data/metrics_data/response_times.json
==================================================
{
  "metrics": [
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "response_time_ms": 150,
      "percentile_50": 120,
      "percentile_95": 200,
      "percentile_99": 350,
      "sample_count": 100
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "response_time_ms": 1200,
      "percentile_50": 800,
      "percentile_95": 1500,
      "percentile_99": 2000,
      "sample_count": 95
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "response_time_ms": 2500,
      "percentile_50": 2000,
      "percentile_95": 3000,
      "percentile_99": 4500,
      "sample_count": 80
    },
    {
      "timestamp": "2024-01-15T14:23:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "response_time_ms": 3500,
      "percentile_50": 3000,
      "percentile_95": 4500,
      "percentile_99": 5000,
      "sample_count": 50
    },
    {
      "timestamp": "2024-01-15T14:24:00Z",
      "service": "web-service",
      "endpoint": "/api/users",
      "response_time_ms": 5000,
      "percentile_50": 5000,
      "percentile_95": 5000,
      "percentile_99": 5000,
      "sample_count": 20,
      "errors": 15
    },
    {
      "timestamp": "2024-01-15T14:18:00Z",
      "service": "product-catalog-service",
      "endpoint": "/products/search",
      "response_time_ms": 25,
      "percentile_50": 20,
      "percentile_95": 35,
      "percentile_99": 50,
      "sample_count": 1500
    },
    {
      "timestamp": "2024-01-15T14:19:00Z",
      "service": "product-catalog-service",
      "endpoint": "/products/search",
      "response_time_ms": 22,
      "percentile_50": 18,
      "percentile_95": 32,
      "percentile_99": 48,
      "sample_count": 1600
    },
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "product-catalog-service",
      "endpoint": "/products/search",
      "response_time_ms": 20,
      "percentile_50": 16,
      "percentile_95": 30,
      "percentile_99": 45,
      "sample_count": 1650
    },
    {
      "timestamp": "2024-01-15T14:18:30Z",
      "service": "product-catalog-service",
      "endpoint": "/products/{id}",
      "response_time_ms": 12,
      "percentile_50": 10,
      "percentile_95": 18,
      "percentile_99": 25,
      "sample_count": 3200
    },
    {
      "timestamp": "2024-01-15T14:19:30Z",
      "service": "product-catalog-service",
      "endpoint": "/products/{id}",
      "response_time_ms": 14,
      "percentile_50": 11,
      "percentile_95": 19,
      "percentile_99": 26,
      "sample_count": 3350
    },
    {
      "timestamp": "2024-01-15T14:20:30Z",
      "service": "product-catalog-service",
      "endpoint": "/health",
      "response_time_ms": 2,
      "percentile_50": 1,
      "percentile_95": 3,
      "percentile_99": 5,
      "sample_count": 1200
    }
  ]
}

Processing: data/metrics_data/throughput.json
==================================================
FILE: data/metrics_data/throughput.json
==================================================
{
  "metrics": [
    {
      "timestamp": "2024-01-15T14:20:00Z",
      "service": "web-service",
      "requests_per_second": 150,
      "successful_requests": 148,
      "failed_requests": 2,
      "average_request_size_bytes": 2048,
      "average_response_size_bytes": 4096
    },
    {
      "timestamp": "2024-01-15T14:21:00Z",
      "service": "web-service",
      "requests_per_second": 120,
      "successful_requests": 115,
      "failed_requests": 5,
      "average_request_size_bytes": 2048,
      "average_response_size_bytes": 4096
    },
    {
      "timestamp": "2024-01-15T14:22:00Z",
      "service": "web-service",
      "requests_per_second": 80,
      "successful_requests": 70,
      "failed_requests": 10,
      "average_request_size_bytes": 2048,
      "average_response_size_bytes": 4096
    },
    {
      "timestamp": "2024-01-15T14:23:00Z",
      "service": "web-service",
      "requests_per_second": 50,
      "successful_requests": 25,
      "failed_requests": 25,
      "average_request_size_bytes": 2048,
      "average_response_size_bytes": 512
    },
    {
      "timestamp": "2024-01-15T14:24:00Z",
      "service": "web-service",
      "requests_per_second": 20,
      "successful_requests": 5,
      "failed_requests": 15,
      "average_request_size_bytes": 2048,
      "average_response_size_bytes": 256
    }
  ]
}

Processing: data/metrics_data/trends.json
==================================================
FILE: data/metrics_data/trends.json
==================================================
{
  "response_time_trends": {
    "trend": "increasing",
    "average_value": 2500,
    "standard_deviation": 1200,
    "anomalies": [
      {
        "timestamp": "2024-01-15T14:23:00Z",
        "value": 3500,
        "deviation_percentage": 40
      },
      {
        "timestamp": "2024-01-15T14:24:00Z",
        "value": 5000,
        "deviation_percentage": 100
      }
    ]
  },
  "error_rate_trends": {
    "trend": "increasing",
    "average_value": 25,
    "standard_deviation": 20,
    "anomalies": [
      {
        "timestamp": "2024-01-15T14:23:00Z",
        "value": 50,
        "deviation_percentage": 100
      },
      {
        "timestamp": "2024-01-15T14:24:00Z",
        "value": 75,
        "deviation_percentage": 200
      }
    ]
  },
  "cpu_trends": {
    "trend": "stable",
    "average_value": 45,
    "standard_deviation": 5,
    "anomalies": []
  },
  "memory_trends": {
    "trend": "increasing",
    "average_value": 70,
    "standard_deviation": 15,
    "anomalies": [
      {
        "timestamp": "2024-01-15T14:24:00Z",
        "value": 95,
        "deviation_percentage": 35
      }
    ]
  }
}

Processing: data/runbooks_data/common_resolutions.json
==================================================
FILE: data/runbooks_data/common_resolutions.json
==================================================
{
  "resolutions": [
    {
      "id": "oom-resolution",
      "issue": "OutOfMemoryError",
      "symptoms": [
        "Java heap space errors",
        "Pod evictions",
        "Application crashes"
      ],
      "quick_fixes": [
        {
          "action": "Restart affected pods",
          "command": "kubectl delete pod <pod-name>",
          "duration": "2 minutes",
          "effectiveness": "Temporary"
        },
        {
          "action": "Increase memory limits",
          "command": "kubectl set resources deployment <name> --limits=memory=2Gi",
          "duration": "5 minutes",
          "effectiveness": "Medium-term"
        }
      ],
      "permanent_solutions": [
        "Optimize memory usage in code",
        "Implement proper caching strategies",
        "Configure JVM heap settings appropriately",
        "Enable horizontal pod autoscaling"
      ]
    },
    {
      "id": "connection-pool-exhaustion",
      "issue": "Database Connection Pool Exhausted",
      "symptoms": [
        "Connection timeout errors",
        "Slow response times",
        "Service unavailability"
      ],
      "quick_fixes": [
        {
          "action": "Increase connection pool size",
          "config": "spring.datasource.hikari.maximum-pool-size=20",
          "duration": "5 minutes",
          "effectiveness": "Immediate"
        },
        {
          "action": "Restart application pods",
          "command": "kubectl rollout restart deployment <name>",
          "duration": "3 minutes",
          "effectiveness": "Temporary"
        }
      ],
      "permanent_solutions": [
        "Optimize database queries",
        "Implement connection pooling best practices",
        "Add read replicas for load distribution",
        "Implement caching layer"
      ]
    },
    {
      "id": "high-cpu-usage",
      "issue": "High CPU Usage",
      "symptoms": [
        "Slow response times",
        "Service timeouts",
        "Pod throttling"
      ],
      "quick_fixes": [
        {
          "action": "Scale horizontally",
          "command": "kubectl scale deployment <name> --replicas=10",
          "duration": "2 minutes",
          "effectiveness": "Immediate"
        },
        {
          "action": "Increase CPU limits",
          "command": "kubectl set resources deployment <name> --limits=cpu=2",
          "duration": "5 minutes",
          "effectiveness": "Medium-term"
        }
      ],
      "permanent_solutions": [
        "Profile and optimize CPU-intensive code",
        "Implement efficient algorithms",
        "Add caching for expensive computations",
        "Consider async processing for heavy tasks"
      ]
    },
    {
      "id": "pod-crashloop",
      "issue": "Pod CrashLoopBackOff",
      "symptoms": [
        "Pods constantly restarting",
        "Service unavailable",
        "Failed health checks"
      ],
      "quick_fixes": [
        {
          "action": "Check pod logs",
          "command": "kubectl logs <pod-name> --previous",
          "duration": "1 minute",
          "effectiveness": "Diagnostic"
        },
        {
          "action": "Describe pod for events",
          "command": "kubectl describe pod <pod-name>",
          "duration": "1 minute",
          "effectiveness": "Diagnostic"
        },
        {
          "action": "Delete and recreate pod",
          "command": "kubectl delete pod <pod-name>",
          "duration": "2 minutes",
          "effectiveness": "Sometimes effective"
        }
      ],
      "common_root_causes": [
        "Missing environment variables or secrets",
        "Incorrect liveness probe configuration",
        "Insufficient resources",
        "Image pull errors",
        "Configuration file issues"
      ]
    },
    {
      "id": "network-timeout",
      "issue": "Network Timeouts",
      "symptoms": [
        "Intermittent connection failures",
        "Slow service responses",
        "Gateway timeouts"
      ],
      "quick_fixes": [
        {
          "action": "Increase timeout values",
          "config": "timeout: 30s",
          "duration": "5 minutes",
          "effectiveness": "Temporary"
        },
        {
          "action": "Check service endpoints",
          "command": "kubectl get endpoints <service-name>",
          "duration": "1 minute",
          "effectiveness": "Diagnostic"
        }
      ],
      "permanent_solutions": [
        "Implement circuit breakers",
        "Add retry logic with exponential backoff",
        "Optimize network routes",
        "Implement service mesh for better control"
      ]
    }
  ]
}

Processing: data/runbooks_data/escalation_procedures.json
==================================================
FILE: data/runbooks_data/escalation_procedures.json
==================================================
{
  "escalation_procedures": [
    {
      "id": "critical-incident-escalation",
      "title": "Critical Incident Escalation",
      "severity": "critical",
      "trigger_conditions": [
        "Complete service outage",
        "Data loss or corruption",
        "Security breach",
        "Customer-facing impact > 50%"
      ],
      "escalation_chain": [
        {
          "level": 1,
          "role": "on-call-engineer",
          "response_time": "5 minutes",
          "actions": ["Initial assessment", "Start incident response", "Notify team"]
        },
        {
          "level": 2,
          "role": "team-lead",
          "response_time": "10 minutes",
          "actions": ["Coordinate response", "Allocate resources", "Update stakeholders"]
        },
        {
          "level": 3,
          "role": "engineering-manager",
          "response_time": "15 minutes",
          "actions": ["Executive communication", "Resource approval", "Strategic decisions"]
        },
        {
          "level": 4,
          "role": "cto",
          "response_time": "30 minutes",
          "actions": ["Company-wide coordination", "External communication", "Business continuity"]
        }
      ],
      "communication_templates": {
        "initial_notification": "CRITICAL: {service} experiencing complete outage. Impact: {impact}. Response initiated.",
        "update": "UPDATE: {service} incident. Status: {status}. ETA: {eta}. Actions: {actions}",
        "resolution": "RESOLVED: {service} incident resolved. Duration: {duration}. Root cause: {root_cause}"
      }
    },
    {
      "id": "high-severity-escalation",
      "title": "High Severity Escalation",
      "severity": "high",
      "trigger_conditions": [
        "Service degradation > 30 minutes",
        "Error rate > 25%",
        "Performance degradation > 50%",
        "Multiple service impacts"
      ],
      "escalation_chain": [
        {
          "level": 1,
          "role": "on-call-engineer",
          "response_time": "10 minutes",
          "actions": ["Investigate issue", "Implement quick fixes", "Document findings"]
        },
        {
          "level": 2,
          "role": "senior-engineer",
          "response_time": "20 minutes",
          "actions": ["Deep dive analysis", "Complex troubleshooting", "Solution implementation"]
        },
        {
          "level": 3,
          "role": "team-lead",
          "response_time": "30 minutes",
          "actions": ["Resource coordination", "Decision making", "Stakeholder updates"]
        }
      ]
    },
    {
      "id": "communication-procedures",
      "title": "Incident Communication Procedures",
      "channels": [
        {
          "name": "Internal Slack",
          "channel": "#incidents",
          "purpose": "Real-time team coordination",
          "update_frequency": "Every 15 minutes"
        },
        {
          "name": "Status Page",
          "url": "https://status.example.com",
          "purpose": "Customer communication",
          "update_frequency": "Every 30 minutes"
        },
        {
          "name": "Executive Email",
          "distribution": "exec-team@example.com",
          "purpose": "Leadership updates",
          "update_frequency": "Hourly or on major changes"
        }
      ],
      "templates": {
        "incident_start": "Investigating reports of {service} issues. More updates to follow.",
        "incident_identified": "We've identified an issue with {service} causing {impact}. Working on resolution.",
        "incident_update": "{service} issue update: {progress}. Current impact: {impact}. ETA: {eta}",
        "incident_resolved": "{service} issue has been resolved. All systems operational."
      }
    }
  ]
}

Processing: data/runbooks_data/incident_playbooks.json
==================================================
FILE: data/runbooks_data/incident_playbooks.json
==================================================
{
  "playbooks": [
    {
      "id": "memory-pressure-playbook",
      "title": "High Memory Usage Incident Response",
      "incident_type": "performance",
      "severity": "high",
      "description": "Procedure for handling high memory usage incidents",
      "triggers": [
        "Memory utilization > 85%",
        "OutOfMemoryError in logs",
        "Pod evictions due to memory pressure"
      ],
      "steps": [
        "1. Identify affected pods using kubectl get pods --field-selector=status.phase=Running",
        "2. Check memory usage: kubectl top pods -n production",
        "3. Review recent memory metrics and trends",
        "4. Scale up deployment if horizontal scaling is possible",
        "5. Increase memory limits in deployment configuration",
        "6. Restart affected pods if necessary",
        "7. Monitor recovery and validate normal operation"
      ],
      "escalation": {
        "primary": "on-call-engineer",
        "secondary": "platform-team",
        "manager": "engineering-manager"
      },
      "estimated_resolution_time": "15-30 minutes",
      "related_runbooks": ["pod-crashloop-troubleshooting", "resource-optimization"]
    },
    {
      "id": "database-connection-failure",
      "title": "Database Connection Failure Response",
      "incident_type": "availability",
      "severity": "critical",
      "description": "Procedure for handling database connectivity issues",
      "triggers": [
        "Database connection timeout errors",
        "Connection pool exhaustion",
        "Database pods in CrashLoopBackOff"
      ],
      "steps": [
        "1. Check database pod status: kubectl get pods -l app=database",
        "2. Review database logs: kubectl logs -f database-pod-name",
        "3. Verify database service endpoints",
        "4. Check network connectivity between services",
        "5. Restart database pod if configuration is correct",
        "6. Scale connection pool if needed",
        "7. Verify application can connect to database"
      ],
      "escalation": {
        "primary": "database-admin",
        "secondary": "infrastructure-team",
        "manager": "site-reliability-manager"
      },
      "estimated_resolution_time": "5-15 minutes",
      "related_runbooks": ["database-recovery", "connection-pool-tuning"]
    },
    {
      "id": "high-error-rate-response",
      "title": "High Error Rate Response",
      "incident_type": "availability",
      "severity": "high",
      "description": "Procedure for handling increased error rates",
      "triggers": [
        "Error rate > 10%",
        "5xx errors increasing",
        "Multiple service failures"
      ],
      "steps": [
        "1. Check current error rates across all services",
        "2. Identify the source service causing errors",
        "3. Review application logs for error patterns",
        "4. Check recent deployments or configuration changes",
        "5. Consider rolling back if recent deployment",
        "6. Scale affected services if load-related",
        "7. Enable circuit breakers if cascading failures"
      ],
      "escalation": {
        "primary": "on-call-engineer",
        "secondary": "service-owner",
        "manager": "engineering-manager"
      },
      "estimated_resolution_time": "10-20 minutes",
      "related_runbooks": ["rollback-procedures", "circuit-breaker-configuration"]
    },
    {
      "id": "pod-startup-failure",
      "title": "Pod Startup Failure Resolution",
      "incident_type": "deployment",
      "severity": "medium",
      "description": "Procedure for resolving pod startup issues",
      "triggers": [
        "Pods stuck in Pending state",
        "ImagePullBackOff errors",
        "Init container failures"
      ],
      "steps": [
        "1. Check pod events: kubectl describe pod <pod-name>",
        "2. Verify image availability and pull secrets",
        "3. Check resource quotas and limits",
        "4. Review init container logs if applicable",
        "5. Verify configuration maps and secrets",
        "6. Check node resources and scheduling constraints",
        "7. Recreate pod with corrected configuration"
      ],
      "escalation": {
        "primary": "platform-team",
        "secondary": "infrastructure-team",
        "manager": "platform-manager"
      },
      "estimated_resolution_time": "10-30 minutes",
      "related_runbooks": ["kubernetes-troubleshooting", "deployment-best-practices"]
    },
    {
      "id": "database-pod-crashloop-incident",
      "title": "Database Pod CrashLoopBackOff Incident",
      "incident_type": "availability",
      "severity": "critical",
      "description": "Critical incident response for database pod continuously crashing",
      "specific_pod": "database-pod-7b9c4d8f2a-x5m1q",
      "triggers": [
        "Database pod in CrashLoopBackOff state",
        "ConfigMap 'database-config' not found errors",
        "PostgreSQL initialization failures",
        "Volume mount permission errors"
      ],
      "root_cause": "Missing ConfigMap 'database-config' preventing PostgreSQL initialization",
      "steps": [
        "1. IMMEDIATE: Check pod status: kubectl get pod database-pod-7b9c4d8f2a-x5m1q -n production",
        "2. Review pod logs: kubectl logs database-pod-7b9c4d8f2a-x5m1q --previous -n production",
        "3. Verify ConfigMap existence: kubectl get configmap database-config -n production",
        "4. If ConfigMap missing, create it: kubectl create configmap database-config --from-literal=database.conf='shared_buffers=256MB\\nmax_connections=100\\nlog_destination=stderr' -n production",
        "5. Check volume permissions: kubectl exec -it database-pod-7b9c4d8f2a-x5m1q -- ls -la /var/lib/postgresql/",
        "6. Force pod restart: kubectl delete pod database-pod-7b9c4d8f2a-x5m1q -n production",
        "7. Monitor pod startup: kubectl logs database-pod-7b9c4d8f2a-x5m1q -f -n production",
        "8. Verify database connectivity once running"
      ],
      "escalation": {
        "primary": "database-oncall@company.com",
        "secondary": "platform-oncall@company.com",
        "manager": "incident-manager@company.com",
        "escalation_time": "5 minutes"
      },
      "estimated_resolution_time": "10-15 minutes",
      "impact": {
        "services_affected": ["web-service", "api-service"],
        "users_affected": "All users - complete database outage",
        "business_impact": "Critical - No data operations possible"
      },
      "related_runbooks": ["database-crashloop-troubleshooting", "configmap-management"],
      "post_incident": [
        "Add ConfigMap to deployment manifest",
        "Implement ConfigMap validation in CI/CD",
        "Add monitoring for ConfigMap existence",
        "Document configuration requirements"
      ]
    }
  ]
}

Processing: data/runbooks_data/service_recovery.json
==================================================
FILE: data/runbooks_data/service_recovery.json
==================================================
{
  "recovery_procedures": [
    {
      "id": "web-service-recovery",
      "service": "web-service",
      "recovery_steps": [
        {
          "step": 1,
          "action": "Check service health",
          "command": "kubectl get pods -l app=web-app",
          "expected_result": "All pods should be in Running state"
        },
        {
          "step": 2,
          "action": "Restart unhealthy pods",
          "command": "kubectl delete pod <unhealthy-pod-name>",
          "expected_result": "New pod should start and become ready"
        },
        {
          "step": 3,
          "action": "Scale deployment if needed",
          "command": "kubectl scale deployment web-app-deployment --replicas=5",
          "expected_result": "Additional pods should start"
        },
        {
          "step": 4,
          "action": "Verify load balancer",
          "command": "kubectl get svc web-app-service",
          "expected_result": "External IP should be assigned"
        },
        {
          "step": 5,
          "action": "Test service endpoints",
          "command": "curl http://<external-ip>/health",
          "expected_result": "Should return 200 OK"
        }
      ],
      "rollback_procedure": {
        "trigger": "If recovery fails after 30 minutes",
        "steps": [
          "1. Get previous deployment revision: kubectl rollout history deployment/web-app-deployment",
          "2. Rollback to previous version: kubectl rollout undo deployment/web-app-deployment",
          "3. Monitor rollback status: kubectl rollout status deployment/web-app-deployment",
          "4. Verify service health after rollback"
        ]
      }
    },
    {
      "id": "database-recovery",
      "service": "database",
      "recovery_steps": [
        {
          "step": 1,
          "action": "Check database pod status",
          "command": "kubectl get pods -l app=database",
          "expected_result": "Pod should be running"
        },
        {
          "step": 2,
          "action": "Verify persistent volume",
          "command": "kubectl get pv,pvc -n production",
          "expected_result": "PVC should be bound"
        },
        {
          "step": 3,
          "action": "Check database logs",
          "command": "kubectl logs -f database-pod-name",
          "expected_result": "No critical errors"
        },
        {
          "step": 4,
          "action": "Test database connectivity",
          "command": "kubectl exec -it database-pod -- psql -U postgres -c 'SELECT 1'",
          "expected_result": "Query should return successfully"
        },
        {
          "step": 5,
          "action": "Verify replication if applicable",
          "command": "kubectl exec -it database-pod -- psql -U postgres -c 'SELECT * FROM pg_stat_replication'",
          "expected_result": "Replicas should be connected"
        }
      ],
      "data_recovery": {
        "backup_location": "s3://backup-bucket/database/",
        "restore_procedure": [
          "1. Stop application writes",
          "2. Create new database pod with empty volume",
          "3. Restore from latest backup: pg_restore -d dbname backup.dump",
          "4. Verify data integrity",
          "5. Resume application traffic"
        ]
      }
    },
    {
      "id": "full-stack-recovery",
      "title": "Complete Stack Recovery",
      "priority_order": [
        "database",
        "cache-service",
        "api-service",
        "web-service",
        "ingress-controller"
      ],
      "pre_recovery_checks": [
        "Verify cluster health: kubectl get nodes",
        "Check resource availability: kubectl top nodes",
        "Review recent events: kubectl get events --sort-by=.metadata.creationTimestamp"
      ],
      "recovery_phases": [
        {
          "phase": "Infrastructure",
          "steps": [
            "Verify node health",
            "Check network connectivity",
            "Ensure storage availability"
          ]
        },
        {
          "phase": "Data Layer",
          "steps": [
            "Recover database services",
            "Verify data integrity",
            "Restore cache if needed"
          ]
        },
        {
          "phase": "Application Layer",
          "steps": [
            "Start backend services",
            "Verify service discovery",
            "Start frontend services"
          ]
        },
        {
          "phase": "Validation",
          "steps": [
            "Run health checks",
            "Perform smoke tests",
            "Monitor metrics"
          ]
        }
      ]
    },
    {
      "id": "product-catalog-service-recovery",
      "service": "product-catalog-service",
      "recovery_steps": [
        {
          "step": 1,
          "action": "Check service health status",
          "command": "kubectl get pods -l app=product-catalog-service",
          "expected_result": "All pods should be in Running state with 2/2 ready"
        },
        {
          "step": 2,
          "action": "Verify service endpoints",
          "command": "kubectl get svc product-catalog-service",
          "expected_result": "Service should show active endpoints"
        },
        {
          "step": 3,
          "action": "Check application logs",
          "command": "kubectl logs -l app=product-catalog-service --tail=100",
          "expected_result": "No error messages, healthy operation logs"
        },
        {
          "step": 4,
          "action": "Test health endpoint",
          "command": "kubectl exec -it <pod-name> -- curl localhost:8080/health",
          "expected_result": "Should return 200 OK with healthy status"
        },
        {
          "step": 5,
          "action": "Verify cache connectivity",
          "command": "kubectl exec -it <pod-name> -- curl localhost:8080/internal/cache/status",
          "expected_result": "Cache should be connected with high hit rate"
        },
        {
          "step": 6,
          "action": "Test product search functionality",
          "command": "kubectl exec -it <pod-name> -- curl 'localhost:8080/products/search?q=test'",
          "expected_result": "Should return product results within 50ms"
        }
      ],
      "performance_optimization": {
        "trigger": "If response times exceed 100ms",
        "steps": [
          "1. Check cache hit rates: Monitor Redis cluster performance",
          "2. Review database query performance",
          "3. Scale pods if CPU > 70%: kubectl scale deployment product-catalog-service --replicas=4",
          "4. Enable query caching for frequent searches",
          "5. Warm up product index if cold start detected"
        ]
      },
      "scaling_guidelines": {
        "auto_scaling": {
          "min_replicas": 2,
          "max_replicas": 10,
          "target_cpu": "70%",
          "target_memory": "80%"
        },
        "manual_scaling": {
          "peak_hours": "Scale to 6 replicas during 9AM-6PM",
          "promotional_events": "Scale to 10 replicas during flash sales",
          "maintenance": "Scale to minimum 3 replicas during deployment"
        }
      }
    }
  ]
}

Processing: data/runbooks_data/troubleshooting_guides.json
==================================================
FILE: data/runbooks_data/troubleshooting_guides.json
==================================================
{
  "guides": [
    {
      "id": "pod-crashloop-troubleshooting",
      "title": "Pod CrashLoopBackOff Troubleshooting",
      "category": "kubernetes",
      "steps": [
        "1. Get pod details: kubectl describe pod <pod-name>",
        "2. Check pod logs: kubectl logs <pod-name> --previous",
        "3. Look for recent events: kubectl get events --sort-by=.metadata.creationTimestamp",
        "4. Verify resource limits and requests",
        "5. Check liveness and readiness probes",
        "6. Review container image and configuration",
        "7. Validate environment variables and secrets"
      ],
      "common_causes": [
        "Insufficient resources (CPU/Memory)",
        "Incorrect liveness probe configuration",
        "Missing environment variables",
        "Invalid container image or tag",
        "Configuration errors in deployment"
      ],
      "diagnostic_commands": [
        "kubectl get pod <pod-name> -o yaml",
        "kubectl logs <pod-name> --previous",
        "kubectl describe pod <pod-name>",
        "kubectl get events --field-selector involvedObject.name=<pod-name>"
      ]
    },
    {
      "id": "database-crashloop-troubleshooting",
      "title": "Database Pod CrashLoopBackOff Resolution",
      "category": "kubernetes",
      "specific_pod": "database-pod-7b9c4d8f2a-x5m1q",
      "steps": [
        "1. Check pod logs: kubectl logs database-pod-7b9c4d8f2a-x5m1q --previous",
        "2. Verify ConfigMap exists: kubectl get configmap database-config -n production",
        "3. Check volume mounts: kubectl describe pod database-pod-7b9c4d8f2a-x5m1q",
        "4. Verify PVC status: kubectl get pvc -n production",
        "5. Check permissions on data directory",
        "6. Create missing ConfigMap if needed: kubectl create configmap database-config --from-file=database.conf",
        "7. Fix volume permissions: chmod 700 /var/lib/postgresql/data && chown postgres:postgres /var/lib/postgresql/data"
      ],
      "common_causes": [
        "Missing ConfigMap 'database-config'",
        "Incorrect file permissions on data directory",
        "Volume mount failures",
        "Missing database configuration file",
        "PostgreSQL initialization failures"
      ],
      "diagnostic_commands": [
        "kubectl logs database-pod-7b9c4d8f2a-x5m1q -c postgres --previous",
        "kubectl describe pod database-pod-7b9c4d8f2a-x5m1q",
        "kubectl get configmap -n production | grep database",
        "kubectl get pvc -n production",
        "kubectl get events --field-selector involvedObject.name=database-pod-7b9c4d8f2a-x5m1q --sort-by='.lastTimestamp'"
      ],
      "resolution": {
        "immediate_fix": "kubectl create configmap database-config --from-literal=database.conf='shared_buffers=256MB\\nmax_connections=100' -n production",
        "permanent_fix": "Update deployment manifest to include proper ConfigMap and volume permissions"
      }
    },
    {
      "id": "high-response-time-troubleshooting",
      "title": "High Response Time Investigation",
      "category": "performance",
      "steps": [
        "1. Check current response time metrics",
        "2. Identify affected endpoints and services",
        "3. Review CPU and memory utilization",
        "4. Examine database query performance",
        "5. Check for network latency issues",
        "6. Review application logs for bottlenecks",
        "7. Verify external service dependencies"
      ],
      "tools": [
        "kubectl top pods",
        "Application Performance Monitoring (APM)",
        "Database query analysis tools",
        "Network monitoring tools"
      ],
      "common_causes": [
        "Database query optimization needed",
        "Insufficient service resources",
        "Network latency or packet loss",
        "External service degradation",
        "Cache misses or invalidation"
      ]
    },
    {
      "id": "memory-leak-investigation",
      "title": "Memory Leak Investigation Guide",
      "category": "performance",
      "steps": [
        "1. Monitor memory usage trends over time",
        "2. Identify services with increasing memory usage",
        "3. Capture heap dumps if possible",
        "4. Review recent code changes",
        "5. Check for unclosed resources",
        "6. Analyze object allocation patterns",
        "7. Test fixes in staging environment"
      ],
      "diagnostic_commands": [
        "kubectl top pods --containers",
        "kubectl exec <pod> -- jmap -heap <pid>",
        "kubectl exec <pod> -- jstat -gcutil <pid>"
      ],
      "prevention": [
        "Implement proper resource cleanup",
        "Use connection pooling",
        "Set appropriate JVM heap settings",
        "Monitor memory metrics continuously"
      ]
    },
    {
      "id": "service-discovery-issues",
      "title": "Service Discovery Troubleshooting",
      "category": "networking",
      "steps": [
        "1. Verify service endpoints: kubectl get endpoints",
        "2. Check service selector labels",
        "3. Test DNS resolution from pods",
        "4. Verify network policies",
        "5. Check service port configurations",
        "6. Test connectivity between pods",
        "7. Review ingress configurations"
      ],
      "diagnostic_commands": [
        "kubectl get svc <service-name> -o yaml",
        "kubectl get endpoints <service-name>",
        "kubectl exec <pod> -- nslookup <service-name>",
        "kubectl exec <pod> -- curl <service-name>:<port>"
      ],
      "common_issues": [
        "Mismatched selector labels",
        "Incorrect port configurations",
        "Network policy restrictions",
        "DNS configuration issues"
      ]
    }
  ]
}

==============================================
Summary
==============================================
Total files (excluding all_data_dump.txt): 31
Total directories: 7
Total size: 264K

