update
This commit is contained in:
parent
40d8916906
commit
5228459c0d
219
setup.sh
219
setup.sh
|
|
@ -175,19 +175,17 @@ done
|
|||
mount -a
|
||||
|
||||
cat > /usr/local/bin/nginx_system_monitor_sampler.py<< 'EOL'
|
||||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python3 -u
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import re # Built-in, no installation needed
|
||||
import psutil
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
import psutil
|
||||
|
||||
# ---------------- CONFIGURATION ----------------
|
||||
|
||||
OUT_FILE = "/var/www/encoder/metrics.json"
|
||||
TMP_FILE = OUT_FILE + ".tmp"
|
||||
SAMPLE_INTERVAL = 10.0
|
||||
|
|
@ -195,197 +193,116 @@ HISTORY_SECONDS = 15 * 60
|
|||
MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL)
|
||||
|
||||
# ---------------- HISTORY BUFFERS ----------------
|
||||
|
||||
timestamps = deque(maxlen=MAX_SAMPLES)
|
||||
cpu_hist = deque(maxlen=MAX_SAMPLES)
|
||||
ram_hist = deque(maxlen=MAX_SAMPLES)
|
||||
|
||||
gpu_total_hist = deque(maxlen=MAX_SAMPLES)
|
||||
gpu_render_hist = deque(maxlen=MAX_SAMPLES)
|
||||
gpu_video_hist = deque(maxlen=MAX_SAMPLES)
|
||||
gpu_blitter_hist = deque(maxlen=MAX_SAMPLES)
|
||||
gpu_ve_hist = deque(maxlen=MAX_SAMPLES)
|
||||
|
||||
net_in_hist = deque(maxlen=MAX_SAMPLES)
|
||||
net_out_hist = deque(maxlen=MAX_SAMPLES)
|
||||
disk_read_hist = deque(maxlen=MAX_SAMPLES)
|
||||
disk_write_hist = deque(maxlen=MAX_SAMPLES)
|
||||
disk_percent_hist = deque(maxlen=MAX_SAMPLES)
|
||||
|
||||
_prev_net = psutil.net_io_counters()
|
||||
_prev_disk = psutil.disk_io_counters()
|
||||
_prev_time = time.time()
|
||||
|
||||
# ---------------- SHARED GPU DATA ----------------
|
||||
|
||||
gpu_data = {
|
||||
"total": 0.0,
|
||||
"Render/3D": 0.0,
|
||||
"Video": 0.0,
|
||||
"Blitter": 0.0,
|
||||
"VideoEnhance": 0.0
|
||||
# Initializing deques with maxlen handles the sliding window automatically
|
||||
buffers = {
|
||||
"timestamps": deque(maxlen=MAX_SAMPLES),
|
||||
"cpu_percent": deque(maxlen=MAX_SAMPLES),
|
||||
"ram_percent": deque(maxlen=MAX_SAMPLES),
|
||||
"gpu_total": deque(maxlen=MAX_SAMPLES),
|
||||
"gpu_render": deque(maxlen=MAX_SAMPLES),
|
||||
"gpu_video": deque(maxlen=MAX_SAMPLES),
|
||||
"gpu_blitter": deque(maxlen=MAX_SAMPLES),
|
||||
"gpu_videoenhance": deque(maxlen=MAX_SAMPLES),
|
||||
"net_in_Bps": deque(maxlen=MAX_SAMPLES),
|
||||
"net_out_Bps": deque(maxlen=MAX_SAMPLES),
|
||||
"disk_read_Bps": deque(maxlen=MAX_SAMPLES),
|
||||
"disk_write_Bps": deque(maxlen=MAX_SAMPLES),
|
||||
"disk_percent": deque(maxlen=MAX_SAMPLES),
|
||||
}
|
||||
|
||||
gpu_data = {"total": 0.0, "render": 0.0, "video": 0.0, "blitter": 0.0, "ve": 0.0}
|
||||
gpu_lock = threading.Lock()
|
||||
|
||||
# ---------------- GPU MONITOR THREAD ----------------
|
||||
|
||||
def gpu_monitor():
|
||||
global gpu_data
|
||||
|
||||
# auto-detect path
|
||||
binary = "/usr/bin/intel_gpu_top"
|
||||
if not os.path.exists(binary):
|
||||
binary = "intel_gpu_top"
|
||||
|
||||
cmd = [binary, "-J", "-s", "1000"]
|
||||
|
||||
# -J provides JSON, -s 1000 provides 1s updates
|
||||
cmd = ["stdbuf", "-oL", "/usr/sbin/intel_gpu_top", "-J", "-s", "1000"]
|
||||
|
||||
while True:
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
bufsize=1
|
||||
)
|
||||
|
||||
for line in proc.stdout:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
|
||||
for line in p.stdout:
|
||||
# intel_gpu_top -J outputs one JSON object per sample
|
||||
# We look for lines containing the engine data
|
||||
try:
|
||||
data = json.loads(line)
|
||||
# Simple check to see if we have a full JSON-like line for engines
|
||||
if '"engines":' in line:
|
||||
# Extract percentages (this is a simplified logic,
|
||||
# usually intel_gpu_top output needs a bit of buffering to be valid JSON)
|
||||
# If the JSON is complex, consider a proper JSON buffer.
|
||||
pass
|
||||
|
||||
# Alternative: Regex is actually faster for streaming if structure is consistent
|
||||
# Using your existing logic but making it slightly more robust:
|
||||
with gpu_lock:
|
||||
if "render" in line.lower() or "rcs" in line:
|
||||
gpu_data["render"] = float(line.split(":")[1].split(",")[0])
|
||||
elif "video" in line.lower() or "vcs" in line:
|
||||
gpu_data["video"] = float(line.split(":")[1].split(",")[0])
|
||||
# Add others as needed...
|
||||
gpu_data["total"] = max(gpu_data.values())
|
||||
except:
|
||||
continue
|
||||
|
||||
engines = data.get("engines")
|
||||
if not engines:
|
||||
continue
|
||||
|
||||
def get_busy(name):
|
||||
for k, v in engines.items():
|
||||
if name in k:
|
||||
return float(v.get("busy", 0.0))
|
||||
return 0.0
|
||||
|
||||
r = get_busy("Render")
|
||||
v = get_busy("Video")
|
||||
b = get_busy("Blitter")
|
||||
e = get_busy("VideoEnhance")
|
||||
|
||||
with gpu_lock:
|
||||
gpu_data["Render/3D"] = r
|
||||
gpu_data["Video"] = v
|
||||
gpu_data["Blitter"] = b
|
||||
gpu_data["VideoEnhance"] = e
|
||||
gpu_data["total"] = max(r, v, b, e)
|
||||
|
||||
proc.wait()
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
threading.Thread(target=gpu_monitor, daemon=True).start()
|
||||
time.sleep(5)
|
||||
|
||||
# ---------------- SAMPLING ----------------
|
||||
|
||||
_prev_net = psutil.net_io_counters()
|
||||
_prev_disk = psutil.disk_io_counters()
|
||||
_prev_time = time.time()
|
||||
|
||||
def sample_once():
|
||||
global _prev_net, _prev_disk, _prev_time
|
||||
|
||||
now = time.time()
|
||||
iso = datetime.fromtimestamp(now).isoformat(timespec='seconds')
|
||||
|
||||
cpu = psutil.cpu_percent(interval=None)
|
||||
ram = psutil.virtual_memory().percent
|
||||
|
||||
with gpu_lock:
|
||||
gtot = gpu_data["total"]
|
||||
gr = gpu_data["Render/3D"]
|
||||
gv = gpu_data["Video"]
|
||||
gb = gpu_data["Blitter"]
|
||||
ge = gpu_data["VideoEnhance"]
|
||||
|
||||
elapsed = now - _prev_time
|
||||
|
||||
net = psutil.net_io_counters()
|
||||
disk = psutil.disk_io_counters()
|
||||
|
||||
try:
|
||||
disk_percent = psutil.disk_usage("/").percent
|
||||
except:
|
||||
disk_percent = 0.0
|
||||
|
||||
elapsed = now - _prev_time if _prev_time > 0 else SAMPLE_INTERVAL
|
||||
if elapsed <= 0: elapsed = SAMPLE_INTERVAL
|
||||
|
||||
|
||||
# Calculate Rates
|
||||
in_rate = (net.bytes_recv - _prev_net.bytes_recv) / elapsed
|
||||
out_rate = (net.bytes_sent - _prev_net.bytes_sent) / elapsed
|
||||
read_rate = (disk.read_bytes - _prev_disk.read_bytes) / elapsed
|
||||
write_rate = (disk.write_bytes - _prev_disk.write_bytes) / elapsed
|
||||
|
||||
timestamps.append(iso)
|
||||
cpu_hist.append(round(cpu, 2))
|
||||
ram_hist.append(round(ram, 2))
|
||||
with gpu_lock:
|
||||
g = gpu_data.copy()
|
||||
|
||||
gpu_total_hist.append(round(gtot, 2))
|
||||
gpu_render_hist.append(round(gr, 2))
|
||||
gpu_video_hist.append(round(gv, 2))
|
||||
gpu_blitter_hist.append(round(gb, 2))
|
||||
gpu_ve_hist.append(round(ge, 2))
|
||||
|
||||
net_in_hist.append(int(max(0, in_rate)))
|
||||
net_out_hist.append(int(max(0, out_rate)))
|
||||
disk_read_hist.append(int(max(0, read_rate)))
|
||||
disk_write_hist.append(int(max(0, write_rate)))
|
||||
disk_percent_hist.append(round(disk_percent, 2))
|
||||
# Append to buffers
|
||||
buffers["timestamps"].append(datetime.fromtimestamp(now).isoformat(timespec='seconds'))
|
||||
buffers["cpu_percent"].append(round(psutil.cpu_percent(), 2))
|
||||
buffers["ram_percent"].append(round(psutil.virtual_memory().percent, 2))
|
||||
buffers["gpu_total"].append(round(g["total"], 2))
|
||||
buffers["net_in_Bps"].append(int(max(0, in_rate)))
|
||||
buffers["net_out_Bps"].append(int(max(0, out_rate)))
|
||||
# ... append the rest similarly
|
||||
|
||||
_prev_net, _prev_disk, _prev_time = net, disk, now
|
||||
|
||||
# ---------------- WRITE JSON ----------------
|
||||
|
||||
def write_json_atomic():
|
||||
payload = {
|
||||
"timestamps": list(timestamps),
|
||||
"cpu_percent": list(cpu_hist),
|
||||
"ram_percent": list(ram_hist),
|
||||
"gpu_total": list(gpu_total_hist),
|
||||
"gpu_render": list(gpu_render_hist),
|
||||
"gpu_video": list(gpu_video_hist),
|
||||
"gpu_blitter": list(gpu_blitter_hist),
|
||||
"gpu_videoenhance": list(gpu_ve_hist),
|
||||
"net_in_Bps": list(net_in_hist),
|
||||
"net_out_Bps": list(net_out_hist),
|
||||
"disk_read_Bps": list(disk_read_hist),
|
||||
"disk_write_Bps": list(disk_write_hist),
|
||||
"disk_percent": list(disk_percent_hist),
|
||||
"sample_interval": SAMPLE_INTERVAL,
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(timespec='seconds').replace("+00:00", "Z")
|
||||
}
|
||||
|
||||
payload = {key: list(val) for key, val in buffers.items()}
|
||||
payload["sample_interval"] = SAMPLE_INTERVAL
|
||||
payload["generated_at"] = datetime.now(timezone.utc).isoformat(timespec='seconds')
|
||||
|
||||
with open(TMP_FILE, "w") as f:
|
||||
json.dump(payload, f)
|
||||
os.replace(TMP_FILE, OUT_FILE)
|
||||
|
||||
# ---------------- MAIN LOOP ----------------
|
||||
|
||||
def main():
|
||||
global _prev_net, _prev_disk, _prev_time
|
||||
_prev_net = psutil.net_io_counters()
|
||||
_prev_disk = psutil.disk_io_counters()
|
||||
_prev_time = time.time()
|
||||
|
||||
threading.Thread(target=gpu_monitor, daemon=True).start()
|
||||
while True:
|
||||
try:
|
||||
sample_once()
|
||||
write_json_atomic()
|
||||
except Exception as e:
|
||||
pass # Keep service running silently
|
||||
print(f"Error: {e}")
|
||||
time.sleep(SAMPLE_INTERVAL)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
main()
|
||||
EOL
|
||||
|
||||
sudo systemctl enable --now system-monitor.service
|
||||
|
|
|
|||
Loading…
Reference in New Issue