This commit is contained in:
devdatt 2026-02-18 04:45:35 +05:30
parent e5873e7dc3
commit dd71e27e3d
1 changed files with 57 additions and 68 deletions

125
setup.sh
View File

@ -194,7 +194,6 @@ HISTORY_SECONDS = 15 * 60
MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL) MAX_SAMPLES = int(HISTORY_SECONDS / SAMPLE_INTERVAL)
# ---------------- HISTORY BUFFERS ---------------- # ---------------- HISTORY BUFFERS ----------------
# Using a dictionary to manage deques more cleanly
keys = [ keys = [
"timestamps", "cpu_percent", "ram_percent", "gpu_total", "gpu_render", "timestamps", "cpu_percent", "ram_percent", "gpu_total", "gpu_render",
"gpu_video", "gpu_blitter", "gpu_videoenhance", "net_in_Bps", "gpu_video", "gpu_blitter", "gpu_videoenhance", "net_in_Bps",
@ -202,118 +201,108 @@ keys = [
] ]
hist = {k: deque(maxlen=MAX_SAMPLES) for k in keys} hist = {k: deque(maxlen=MAX_SAMPLES) for k in keys}
# Global state for rates # State for rates
_prev_net = psutil.net_io_counters() _prev_net = psutil.net_io_counters()
_prev_disk = psutil.disk_io_counters() _prev_disk = psutil.disk_io_counters()
_prev_time = time.time() _prev_time = time.time()
gpu_data = {"total": 0.0, "Render/3D": 0.0, "Video": 0.0, "Blitter": 0.0, "VideoEnhance": 0.0} gpu_data = {"total": 0.0, "render": 0.0, "video": 0.0, "blitter": 0.0, "ve": 0.0}
gpu_lock = threading.Lock() gpu_lock = threading.Lock()
# ---------------- GPU MONITOR THREAD ---------------- # ---------------- GPU MONITOR THREAD ----------------
def gpu_monitor(): def gpu_monitor():
global gpu_data global gpu_data
# Note: Ensure this script runs as ROOT or with CAP_PERFMON for intel_gpu_top # -J gives JSON, -s 1000 gives 1 second samples
# We use stdbuf to ensure the pipe doesn't buffer the JSON
cmd = ["stdbuf", "-oL", "intel_gpu_top", "-J", "-s", "1000"] cmd = ["stdbuf", "-oL", "intel_gpu_top", "-J", "-s", "1000"]
while True: while True:
try: try:
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
for line in p.stdout: for line in p.stdout:
# Basic regex to grab "busy": X.XX values from the JSON stream # We target the "engines" section specifically to avoid "clients" noise
if '"busy":' in line: # This regex captures the engine name and the very next 'busy' value
val_match = re.search(r'"busy":\s*([\d\.]+)', line) with gpu_lock:
if val_match: # Look for engine busy values
val = float(val_match.group(1)) m_render = re.search(r'"Render/3D":\s*{\s*"busy":\s*([\d\.]+)', line)
with gpu_lock: m_video = re.search(r'"Video":\s*{\s*"busy":\s*([\d\.]+)', line)
if "Render/3D" in line or "rcs" in line: gpu_data["Render/3D"] = val m_blit = re.search(r'"Blitter":\s*{\s*"busy":\s*([\d\.]+)', line)
elif "Video" in line or "vcs" in line: gpu_data["Video"] = val m_ve = re.search(r'"VideoEnhance":\s*{\s*"busy":\s*([\d\.]+)', line)
elif "Blitter" in line or "bcs" in line: gpu_data["Blitter"] = val
elif "VideoEnhance" in line or "vecs" in line: gpu_data["VideoEnhance"] = val if m_render: gpu_data["render"] = float(m_render.group(1))
if m_video: gpu_data["video"] = float(m_video.group(1))
# Total is the max load of any single engine if m_blit: gpu_data["blitter"] = float(m_blit.group(1))
gpu_data["total"] = max(gpu_data.values()) if m_ve: gpu_data["ve"] = float(m_ve.group(1))
except Exception as e:
time.sleep(5) # Cool down on error gpu_data["total"] = max(gpu_data["render"], gpu_data["video"], gpu_data["blitter"], gpu_data["ve"])
except Exception:
time.sleep(2)
# ---------------- SAMPLING ---------------- # ---------------- SAMPLING ----------------
def sample_once(): def sample_once():
global _prev_net, _prev_disk, _prev_time global _prev_net, _prev_disk, _prev_time
now = time.time() now = time.time()
elapsed = max(now - _prev_time, 0.001) # Avoid division by zero elapsed = max(now - _prev_time, 0.1)
# System Basics # Grab System stats
cpu = psutil.cpu_percent() cpu = psutil.cpu_percent()
ram = psutil.virtual_memory().percent ram = psutil.virtual_memory().percent
# Network Rates
net = psutil.net_io_counters() net = psutil.net_io_counters()
in_rate = max(0, (net.bytes_recv - _prev_net.bytes_recv) / elapsed)
out_rate = max(0, (net.bytes_sent - _prev_net.bytes_sent) / elapsed)
# Disk Rates & Usage
disk = psutil.disk_io_counters() disk = psutil.disk_io_counters()
read_rate = max(0, (disk.read_bytes - _prev_disk.read_bytes) / elapsed)
write_rate = max(0, (disk.write_bytes - _prev_disk.write_bytes) / elapsed)
try:
d_perc = psutil.disk_usage('/').percent
except:
d_perc = 0.0
# GPU Data (Thread-safe copy) # Calculate rates
in_r = (net.bytes_recv - _prev_net.bytes_recv) / elapsed
out_r = (net.bytes_sent - _prev_net.bytes_sent) / elapsed
read_r = (disk.read_bytes - _prev_disk.read_bytes) / elapsed
write_r = (disk.write_bytes - _prev_disk.write_bytes) / elapsed
with gpu_lock: with gpu_lock:
g = gpu_data.copy() g = gpu_data.copy()
# Update History Buffers # Log to deques
hist["timestamps"].append(datetime.fromtimestamp(now).isoformat(timespec='seconds')) hist["timestamps"].append(datetime.now().isoformat(timespec='seconds'))
hist["cpu_percent"].append(round(cpu, 2)) hist["cpu_percent"].append(round(cpu, 1))
hist["ram_percent"].append(round(ram, 2)) hist["ram_percent"].append(round(ram, 1))
hist["net_in_Bps"].append(int(in_rate)) hist["net_in_Bps"].append(int(max(0, in_r)))
hist["net_out_Bps"].append(int(out_rate)) hist["net_out_Bps"].append(int(max(0, out_r)))
hist["disk_read_Bps"].append(int(read_rate)) hist["disk_read_Bps"].append(int(max(0, read_r)))
hist["disk_write_Bps"].append(int(write_rate)) hist["disk_write_Bps"].append(int(max(0, write_r)))
hist["disk_percent"].append(round(d_perc, 2)) hist["disk_percent"].append(round(psutil.disk_usage('/').percent, 1))
hist["gpu_total"].append(round(g["total"], 2)) hist["gpu_total"].append(round(g["total"], 1))
hist["gpu_render"].append(round(g["Render/3D"], 2)) hist["gpu_render"].append(round(g["render"], 1))
hist["gpu_video"].append(round(g["Video"], 2)) hist["gpu_video"].append(round(g["video"], 1))
hist["gpu_blitter"].append(round(g["Blitter"], 2)) hist["gpu_blitter"].append(round(g["blitter"], 1))
hist["gpu_videoenhance"].append(round(g["VideoEnhance"], 2)) hist["gpu_videoenhance"].append(round(g["ve"], 1))
# Save state for next tick
_prev_net, _prev_disk, _prev_time = net, disk, now _prev_net, _prev_disk, _prev_time = net, disk, now
def write_json_atomic():
payload = {k: list(v) for k, v in hist.items()}
payload["sample_interval"] = SAMPLE_INTERVAL
payload["generated_at"] = datetime.now(timezone.utc).isoformat(timespec='seconds').replace("+00:00", "Z")
try:
with open(TMP_FILE, "w") as f:
json.dump(payload, f)
os.replace(TMP_FILE, OUT_FILE)
except Exception as e:
print(f"File write error: {e}")
def main(): def main():
# Start GPU monitor if os.geteuid() != 0:
print("WARNING: Script not running as root. GPU metrics will be 0.")
threading.Thread(target=gpu_monitor, daemon=True).start() threading.Thread(target=gpu_monitor, daemon=True).start()
print(f"Monitoring started. Writing to {OUT_FILE}...")
while True: while True:
try: try:
sample_once() sample_once()
write_json_atomic() # Atomic Save
payload = {k: list(v) for k, v in hist.items()}
payload.update({
"sample_interval": SAMPLE_INTERVAL,
"generated_at": datetime.now(timezone.utc).isoformat()
})
with open(TMP_FILE, "w") as f:
json.dump(payload, f)
os.replace(TMP_FILE, OUT_FILE)
except Exception as e: except Exception as e:
print(f"Loop error: {e}") print(f"Error: {e}")
time.sleep(SAMPLE_INTERVAL) time.sleep(SAMPLE_INTERVAL)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
EOL EOL
sudo systemctl enable --now system-monitor.service sudo systemctl enable --now system-monitor.service