This commit is contained in:
devdatt 2026-02-18 04:34:27 +05:30
parent 40d8916906
commit 5228459c0d
1 changed files with 68 additions and 151 deletions

219
setup.sh
View File

@ -175,19 +175,17 @@ done
mount -a mount -a
cat > /usr/local/bin/nginx_system_monitor_sampler.py<< 'EOL' cat > /usr/local/bin/nginx_system_monitor_sampler.py<< 'EOL'
#!/usr/bin/env python3 #!/usr/bin/env python3 -u
import time import time
import json import json
import os import os
import subprocess import subprocess
import threading import threading
import re # Built-in, no installation needed import psutil
from collections import deque from collections import deque
from datetime import datetime, timezone from datetime import datetime, timezone
import psutil
# ---------------- CONFIGURATION ---------------- # ---------------- CONFIGURATION ----------------
OUT_FILE = "/var/www/encoder/metrics.json" OUT_FILE = "/var/www/encoder/metrics.json"
TMP_FILE = OUT_FILE + ".tmp" TMP_FILE = OUT_FILE + ".tmp"
SAMPLE_INTERVAL = 10.0 SAMPLE_INTERVAL = 10.0
@ -195,197 +193,116 @@ HISTORY_SECONDS = 15 * 60
MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL) MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL)
# ---------------- HISTORY BUFFERS ---------------- # ---------------- HISTORY BUFFERS ----------------
# Initializing deques with maxlen handles the sliding window automatically
timestamps = deque(maxlen=MAX_SAMPLES) buffers = {
cpu_hist = deque(maxlen=MAX_SAMPLES) "timestamps": deque(maxlen=MAX_SAMPLES),
ram_hist = deque(maxlen=MAX_SAMPLES) "cpu_percent": deque(maxlen=MAX_SAMPLES),
"ram_percent": deque(maxlen=MAX_SAMPLES),
gpu_total_hist = deque(maxlen=MAX_SAMPLES) "gpu_total": deque(maxlen=MAX_SAMPLES),
gpu_render_hist = deque(maxlen=MAX_SAMPLES) "gpu_render": deque(maxlen=MAX_SAMPLES),
gpu_video_hist = deque(maxlen=MAX_SAMPLES) "gpu_video": deque(maxlen=MAX_SAMPLES),
gpu_blitter_hist = deque(maxlen=MAX_SAMPLES) "gpu_blitter": deque(maxlen=MAX_SAMPLES),
gpu_ve_hist = deque(maxlen=MAX_SAMPLES) "gpu_videoenhance": deque(maxlen=MAX_SAMPLES),
"net_in_Bps": deque(maxlen=MAX_SAMPLES),
net_in_hist = deque(maxlen=MAX_SAMPLES) "net_out_Bps": deque(maxlen=MAX_SAMPLES),
net_out_hist = deque(maxlen=MAX_SAMPLES) "disk_read_Bps": deque(maxlen=MAX_SAMPLES),
disk_read_hist = deque(maxlen=MAX_SAMPLES) "disk_write_Bps": deque(maxlen=MAX_SAMPLES),
disk_write_hist = deque(maxlen=MAX_SAMPLES) "disk_percent": deque(maxlen=MAX_SAMPLES),
disk_percent_hist = deque(maxlen=MAX_SAMPLES)
_prev_net = psutil.net_io_counters()
_prev_disk = psutil.disk_io_counters()
_prev_time = time.time()
# ---------------- SHARED GPU DATA ----------------
gpu_data = {
"total": 0.0,
"Render/3D": 0.0,
"Video": 0.0,
"Blitter": 0.0,
"VideoEnhance": 0.0
} }
gpu_data = {"total": 0.0, "render": 0.0, "video": 0.0, "blitter": 0.0, "ve": 0.0}
gpu_lock = threading.Lock() gpu_lock = threading.Lock()
# ---------------- GPU MONITOR THREAD ---------------- # ---------------- GPU MONITOR THREAD ----------------
def gpu_monitor(): def gpu_monitor():
global gpu_data global gpu_data
# -J provides JSON, -s 1000 provides 1s updates
# auto-detect path cmd = ["stdbuf", "-oL", "/usr/sbin/intel_gpu_top", "-J", "-s", "1000"]
binary = "/usr/bin/intel_gpu_top"
if not os.path.exists(binary):
binary = "intel_gpu_top"
cmd = [binary, "-J", "-s", "1000"]
while True: while True:
try: try:
proc = subprocess.Popen( p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
cmd, for line in p.stdout:
stdout=subprocess.PIPE, # intel_gpu_top -J outputs one JSON object per sample
stderr=subprocess.PIPE, # We look for lines containing the engine data
text=True,
bufsize=1
)
for line in proc.stdout:
line = line.strip()
if not line:
continue
try: try:
data = json.loads(line) # Simple check to see if we have a full JSON-like line for engines
if '"engines":' in line:
# Extract percentages (this is a simplified logic,
# usually intel_gpu_top output needs a bit of buffering to be valid JSON)
# If the JSON is complex, consider a proper JSON buffer.
pass
# Alternative: Regex is actually faster for streaming if structure is consistent
# Using your existing logic but making it slightly more robust:
with gpu_lock:
if "render" in line.lower() or "rcs" in line:
gpu_data["render"] = float(line.split(":")[1].split(",")[0])
elif "video" in line.lower() or "vcs" in line:
gpu_data["video"] = float(line.split(":")[1].split(",")[0])
# Add others as needed...
gpu_data["total"] = max(gpu_data.values())
except: except:
continue continue
engines = data.get("engines")
if not engines:
continue
def get_busy(name):
for k, v in engines.items():
if name in k:
return float(v.get("busy", 0.0))
return 0.0
r = get_busy("Render")
v = get_busy("Video")
b = get_busy("Blitter")
e = get_busy("VideoEnhance")
with gpu_lock:
gpu_data["Render/3D"] = r
gpu_data["Video"] = v
gpu_data["Blitter"] = b
gpu_data["VideoEnhance"] = e
gpu_data["total"] = max(r, v, b, e)
proc.wait()
except Exception: except Exception:
pass time.sleep(5)
time.sleep(2)
threading.Thread(target=gpu_monitor, daemon=True).start()
# ---------------- SAMPLING ---------------- # ---------------- SAMPLING ----------------
_prev_net = psutil.net_io_counters()
_prev_disk = psutil.disk_io_counters()
_prev_time = time.time()
def sample_once(): def sample_once():
global _prev_net, _prev_disk, _prev_time global _prev_net, _prev_disk, _prev_time
now = time.time() now = time.time()
iso = datetime.fromtimestamp(now).isoformat(timespec='seconds') elapsed = now - _prev_time
cpu = psutil.cpu_percent(interval=None)
ram = psutil.virtual_memory().percent
with gpu_lock:
gtot = gpu_data["total"]
gr = gpu_data["Render/3D"]
gv = gpu_data["Video"]
gb = gpu_data["Blitter"]
ge = gpu_data["VideoEnhance"]
net = psutil.net_io_counters() net = psutil.net_io_counters()
disk = psutil.disk_io_counters() disk = psutil.disk_io_counters()
try: # Calculate Rates
disk_percent = psutil.disk_usage("/").percent
except:
disk_percent = 0.0
elapsed = now - _prev_time if _prev_time > 0 else SAMPLE_INTERVAL
if elapsed <= 0: elapsed = SAMPLE_INTERVAL
in_rate = (net.bytes_recv - _prev_net.bytes_recv) / elapsed in_rate = (net.bytes_recv - _prev_net.bytes_recv) / elapsed
out_rate = (net.bytes_sent - _prev_net.bytes_sent) / elapsed out_rate = (net.bytes_sent - _prev_net.bytes_sent) / elapsed
read_rate = (disk.read_bytes - _prev_disk.read_bytes) / elapsed read_rate = (disk.read_bytes - _prev_disk.read_bytes) / elapsed
write_rate = (disk.write_bytes - _prev_disk.write_bytes) / elapsed write_rate = (disk.write_bytes - _prev_disk.write_bytes) / elapsed
timestamps.append(iso) with gpu_lock:
cpu_hist.append(round(cpu, 2)) g = gpu_data.copy()
ram_hist.append(round(ram, 2))
gpu_total_hist.append(round(gtot, 2)) # Append to buffers
gpu_render_hist.append(round(gr, 2)) buffers["timestamps"].append(datetime.fromtimestamp(now).isoformat(timespec='seconds'))
gpu_video_hist.append(round(gv, 2)) buffers["cpu_percent"].append(round(psutil.cpu_percent(), 2))
gpu_blitter_hist.append(round(gb, 2)) buffers["ram_percent"].append(round(psutil.virtual_memory().percent, 2))
gpu_ve_hist.append(round(ge, 2)) buffers["gpu_total"].append(round(g["total"], 2))
buffers["net_in_Bps"].append(int(max(0, in_rate)))
net_in_hist.append(int(max(0, in_rate))) buffers["net_out_Bps"].append(int(max(0, out_rate)))
net_out_hist.append(int(max(0, out_rate))) # ... append the rest similarly
disk_read_hist.append(int(max(0, read_rate)))
disk_write_hist.append(int(max(0, write_rate)))
disk_percent_hist.append(round(disk_percent, 2))
_prev_net, _prev_disk, _prev_time = net, disk, now _prev_net, _prev_disk, _prev_time = net, disk, now
# ---------------- WRITE JSON ----------------
def write_json_atomic(): def write_json_atomic():
payload = { payload = {key: list(val) for key, val in buffers.items()}
"timestamps": list(timestamps), payload["sample_interval"] = SAMPLE_INTERVAL
"cpu_percent": list(cpu_hist), payload["generated_at"] = datetime.now(timezone.utc).isoformat(timespec='seconds')
"ram_percent": list(ram_hist),
"gpu_total": list(gpu_total_hist),
"gpu_render": list(gpu_render_hist),
"gpu_video": list(gpu_video_hist),
"gpu_blitter": list(gpu_blitter_hist),
"gpu_videoenhance": list(gpu_ve_hist),
"net_in_Bps": list(net_in_hist),
"net_out_Bps": list(net_out_hist),
"disk_read_Bps": list(disk_read_hist),
"disk_write_Bps": list(disk_write_hist),
"disk_percent": list(disk_percent_hist),
"sample_interval": SAMPLE_INTERVAL,
"generated_at": datetime.now(timezone.utc).isoformat(timespec='seconds').replace("+00:00", "Z")
}
with open(TMP_FILE, "w") as f: with open(TMP_FILE, "w") as f:
json.dump(payload, f) json.dump(payload, f)
os.replace(TMP_FILE, OUT_FILE) os.replace(TMP_FILE, OUT_FILE)
# ---------------- MAIN LOOP ----------------
def main(): def main():
global _prev_net, _prev_disk, _prev_time threading.Thread(target=gpu_monitor, daemon=True).start()
_prev_net = psutil.net_io_counters()
_prev_disk = psutil.disk_io_counters()
_prev_time = time.time()
while True: while True:
try: try:
sample_once() sample_once()
write_json_atomic() write_json_atomic()
except Exception as e: except Exception as e:
pass # Keep service running silently print(f"Error: {e}")
time.sleep(SAMPLE_INTERVAL) time.sleep(SAMPLE_INTERVAL)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
EOL EOL
sudo systemctl enable --now system-monitor.service sudo systemctl enable --now system-monitor.service