This commit is contained in:
devdatt 2026-02-18 04:49:24 +05:30
parent dd71e27e3d
commit de6caee5b6
1 changed files with 83 additions and 37 deletions

104
setup.sh
View File

@ -183,6 +183,7 @@ import subprocess
import threading import threading
import re import re
import psutil import psutil
import shutil
from collections import deque from collections import deque
from datetime import datetime, timezone from datetime import datetime, timezone
@ -193,6 +194,10 @@ SAMPLE_INTERVAL = 10.0
HISTORY_SECONDS = 15 * 60 HISTORY_SECONDS = 15 * 60
MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL) MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL)
# ---------------- DEPENDENCY CHECK ----------------
if not shutil.which("intel_gpu_top"):
raise RuntimeError("intel_gpu_top not installed or not in PATH")
# ---------------- HISTORY BUFFERS ---------------- # ---------------- HISTORY BUFFERS ----------------
keys = [ keys = [
"timestamps", "cpu_percent", "ram_percent", "gpu_total", "gpu_render", "timestamps", "cpu_percent", "ram_percent", "gpu_total", "gpu_render",
@ -201,58 +206,93 @@ keys = [
] ]
hist = {k: deque(maxlen=MAX_SAMPLES) for k in keys} hist = {k: deque(maxlen=MAX_SAMPLES) for k in keys}
# State for rates
_prev_net = psutil.net_io_counters() _prev_net = psutil.net_io_counters()
_prev_disk = psutil.disk_io_counters() _prev_disk = psutil.disk_io_counters()
_prev_time = time.time() _prev_time = time.time()
# Prime CPU measurement
psutil.cpu_percent(None)
gpu_data = {"total": 0.0, "render": 0.0, "video": 0.0, "blitter": 0.0, "ve": 0.0} gpu_data = {"total": 0.0, "render": 0.0, "video": 0.0, "blitter": 0.0, "ve": 0.0}
gpu_lock = threading.Lock() gpu_lock = threading.Lock()
# ---------------- GPU MONITOR THREAD ---------------- # ---------------- GPU MONITOR THREAD ----------------
def gpu_monitor(): def gpu_monitor():
global gpu_data global gpu_data
# -J gives JSON, -s 1000 gives 1 second samples
# We use stdbuf to ensure the pipe doesn't buffer the JSON cmd = ["intel_gpu_top", "-J", "-s", "1000"]
cmd = ["stdbuf", "-oL", "intel_gpu_top", "-J", "-s", "1000"]
while True: while True:
try: try:
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) p = subprocess.Popen(
for line in p.stdout: cmd,
# We target the "engines" section specifically to avoid "clients" noise stdout=subprocess.PIPE,
# This regex captures the engine name and the very next 'busy' value stderr=subprocess.DEVNULL,
text=True,
bufsize=1
)
buf = ""
brace = 0
for chunk in iter(lambda: p.stdout.read(1), ""):
if chunk == "{":
brace += 1
if brace > 0:
buf += chunk
if chunk == "}":
brace -= 1
if brace == 0 and buf.strip():
try:
obj = json.loads(buf)
engines = obj.get("engines", {})
r = v = b = e = 0.0
for name, data in engines.items():
busy = float(data.get("busy", 0.0))
n = name.lower()
if "render" in n or "rcs" in n:
r = max(r, busy)
elif "video" in n or "vcs" in n:
v = max(v, busy)
elif "blitter" in n or "bcs" in n:
b = max(b, busy)
elif "enhance" in n or "vecs" in n:
e = max(e, busy)
with gpu_lock: with gpu_lock:
# Look for engine busy values gpu_data["render"] = r
m_render = re.search(r'"Render/3D":\s*{\s*"busy":\s*([\d\.]+)', line) gpu_data["video"] = v
m_video = re.search(r'"Video":\s*{\s*"busy":\s*([\d\.]+)', line) gpu_data["blitter"] = b
m_blit = re.search(r'"Blitter":\s*{\s*"busy":\s*([\d\.]+)', line) gpu_data["ve"] = e
m_ve = re.search(r'"VideoEnhance":\s*{\s*"busy":\s*([\d\.]+)', line) gpu_data["total"] = max(r, v, b, e)
if m_render: gpu_data["render"] = float(m_render.group(1)) except Exception:
if m_video: gpu_data["video"] = float(m_video.group(1)) pass
if m_blit: gpu_data["blitter"] = float(m_blit.group(1))
if m_ve: gpu_data["ve"] = float(m_ve.group(1)) buf = ""
p.wait()
gpu_data["total"] = max(gpu_data["render"], gpu_data["video"], gpu_data["blitter"], gpu_data["ve"])
except Exception: except Exception:
time.sleep(2) time.sleep(2)
# ---------------- SAMPLING ---------------- # ---------------- SAMPLING ----------------
def sample_once(): def sample_once():
global _prev_net, _prev_disk, _prev_time global _prev_net, _prev_disk, _prev_time
now = time.time() now = time.time()
elapsed = max(now - _prev_time, 0.1) elapsed = max(now - _prev_time, 0.1)
# Grab System stats
cpu = psutil.cpu_percent() cpu = psutil.cpu_percent()
ram = psutil.virtual_memory().percent ram = psutil.virtual_memory().percent
net = psutil.net_io_counters() net = psutil.net_io_counters()
disk = psutil.disk_io_counters() disk = psutil.disk_io_counters()
# Calculate rates
in_r = (net.bytes_recv - _prev_net.bytes_recv) / elapsed in_r = (net.bytes_recv - _prev_net.bytes_recv) / elapsed
out_r = (net.bytes_sent - _prev_net.bytes_sent) / elapsed out_r = (net.bytes_sent - _prev_net.bytes_sent) / elapsed
read_r = (disk.read_bytes - _prev_disk.read_bytes) / elapsed read_r = (disk.read_bytes - _prev_disk.read_bytes) / elapsed
@ -261,7 +301,10 @@ def sample_once():
with gpu_lock: with gpu_lock:
g = gpu_data.copy() g = gpu_data.copy()
# Log to deques # stale GPU protection
if time.time() - _prev_time > SAMPLE_INTERVAL * 2:
g = {"total": 0, "render": 0, "video": 0, "blitter": 0, "ve": 0}
hist["timestamps"].append(datetime.now().isoformat(timespec='seconds')) hist["timestamps"].append(datetime.now().isoformat(timespec='seconds'))
hist["cpu_percent"].append(round(cpu, 1)) hist["cpu_percent"].append(round(cpu, 1))
hist["ram_percent"].append(round(ram, 1)) hist["ram_percent"].append(round(ram, 1))
@ -278,28 +321,31 @@ def sample_once():
_prev_net, _prev_disk, _prev_time = net, disk, now _prev_net, _prev_disk, _prev_time = net, disk, now
# ---------------- MAIN LOOP ----------------
def main(): def main():
if os.geteuid() != 0:
print("WARNING: Script not running as root. GPU metrics will be 0.")
threading.Thread(target=gpu_monitor, daemon=True).start() threading.Thread(target=gpu_monitor, daemon=True).start()
while True: while True:
try: try:
sample_once() sample_once()
# Atomic Save
payload = {k: list(v) for k, v in hist.items()} payload = {k: list(v) for k, v in hist.items()}
payload.update({ payload.update({
"sample_interval": SAMPLE_INTERVAL, "sample_interval": SAMPLE_INTERVAL,
"generated_at": datetime.now(timezone.utc).isoformat() "generated_at": datetime.now(timezone.utc).isoformat()
}) })
with open(TMP_FILE, "w") as f: with open(TMP_FILE, "w") as f:
json.dump(payload, f) json.dump(payload, f)
os.replace(TMP_FILE, OUT_FILE) os.replace(TMP_FILE, OUT_FILE)
except Exception as e:
print(f"Error: {e}") except Exception:
pass
time.sleep(SAMPLE_INTERVAL) time.sleep(SAMPLE_INTERVAL)
# ---------------- ENTRY ----------------
if __name__ == "__main__": if __name__ == "__main__":
main() main()