diff --git a/.gitignore b/.gitignore index f0606fe..061bbc1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ env/ __pycache__/ *.py[cod] *$py.class +.idea/ +venv/ + +logs/ \ No newline at end of file diff --git a/README.md b/README.md index e29edda..901a1ed 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,32 @@ Make sure you have the following installed on your system: - InfluxDB - [InfluxDB Python Client](https://github.com/influxdata/influxdb-client-python) +## VM Preparation + +Install `kvmtop` +```commandline +wget https://github.com/cha87de/kvmtop/releases/download/2.1.3/kvmtop_2.1.3_linux_amd64.deb +sudo dpkg -i kvmtop_2.1.3_linux_amd64.deb +``` +Try kvmtop it may fail with error saying missing package libncurses5.so +To fix it add below lines to `/etc/apt/sources.list` +```commandline +deb http://security.ubuntu.com/ubuntu focal-security main +deb http://archive.ubuntu.com/ubuntu/ focal main restricted universe multiverse +deb http://archive.ubuntu.com/ubuntu/ focal-updates main restricted universe multiverse +deb http://security.ubuntu.com/ubuntu/ focal-security main restricted universe multiverse +``` +And execute below commands: +```commandline +sudo apt update +sudo apt upgrade +sudo apt-get install libncurses5 libncurses5:i386 +``` +It will resolve this issue, try `kvmtop` command in terminal and verify everything working as expected + +Note: If you got stuck with the above error then, remove `libncurses5:i386` and install only `libncurses5` (Sample Error message: E: Unable to locate package libncurses5:i386) + + ## Usage 1. Set up your InfluxDB configuration by creating a `.env` file with the following variables: @@ -19,10 +45,22 @@ Make sure you have the following installed on your system: INFLUX_TOKEN="your-influxdb-token" INFLUX_ORG="your-influxdb-organization" INFLUX_BUCKET="your-influxdb-bucket" + ROOT_PASS="user-passof-ubuntu-terminal" + DISK_PATH="disk-path-to-be-monitored" ``` 2. Run the script: ```bash - python main.py + python system_monitor.py ``` +## Configure systemd service +1. Pull latest changes to the machine. +2. Update and validate `libmon.service` file in the Libmon directory +3. Copy the directory to `sudo cp libmon.service /etc/systemd/system/libmon.service` +4. Enable the service by `sudo systemctl enable libmon.service` +5. Start the service by `sudo systemctl start libmon.service` +6. Validate the process status by `sudo systemctl status libmon.service and tail -f /home/dev/Libmon/logs/system_monitor.log`. + + + diff --git a/config/modules_config.json b/config/modules_config.json index b21ea72..2bc1a06 100644 --- a/config/modules_config.json +++ b/config/modules_config.json @@ -1,8 +1,10 @@ { "modules": [ - {"name": "uptime", "interval_seconds": 600}, - {"name": "disk", "interval_seconds": 600}, - {"name": "partition", "interval_seconds": 3600}, - {"name": "sensors", "interval_seconds": 60} + {"name": "uptime", "interval_seconds": 30}, + {"name": "disk", "interval_seconds": 30}, + {"name": "partition", "interval_seconds": 30}, + {"name": "sensors", "interval_seconds": 30}, + {"name": "kvm_monitor", "interval_seconds": 30}, + {"name": "network", "interval_seconds": 30} ] } diff --git a/connection.py b/connection.py new file mode 100644 index 0000000..dc70f94 --- /dev/null +++ b/connection.py @@ -0,0 +1,29 @@ +import os +import influxdb_client + +from dotenv import load_dotenv +from influxdb_client.client.write_api import SYNCHRONOUS + +load_dotenv() + +INFLUX_URL = os.getenv("INFLUX_URL") +INFLUX_TOKEN = os.getenv("INFLUX_TOKEN") +INFLUX_ORG = os.getenv("INFLUX_ORG") +INFLUX_BUCKET = os.getenv("INFLUX_BUCKET") + +client = influxdb_client.InfluxDBClient(url=INFLUX_URL, token=INFLUX_TOKEN, org=INFLUX_ORG, verify_ssl=False) +write_api = client.write_api(write_options=SYNCHRONOUS) + + +def create_influxdb_point(measurement, data): + point = influxdb_client.Point(measurement) + for key, value in data.items(): + if key == 'host': + # hotfix for dot in hostname + point = point.tag(key, value.split('.')[0]) + if key == 'vm_name': + point = point.tag(key, value) + else: + point = point.field(key, value) + + return point diff --git a/launch.sh b/launch.sh new file mode 100644 index 0000000..170ad88 --- /dev/null +++ b/launch.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Name of the process to check (without path) +PROCESS_NAME="system_monitor.py" + +# Directory of your Python virtual environment +VENV_DIR="/path/to/your/venv" + +# Directory where your Python script is located +SCRIPT_DIR="/path/to/your/script" + +# Name of your Python script +SCRIPT_NAME="your_script.py" + +# Check if the process is running +if ! pgrep -f "$PROCESS_NAME" > /dev/null +then + echo "Process $PROCESS_NAME is not running. Starting it..." + + # Activate virtual environment + source "$VENV_DIR/bin/activate" + + # Change directory to the script location + cd "$SCRIPT_DIR" + + # Launch the Python script + nohup python "$SCRIPT_NAME" & + + echo "Process $PROCESS_NAME started." +else + echo "Process $PROCESS_NAME is already running." +fi diff --git a/main.py b/main.py deleted file mode 100644 index 254daec..0000000 --- a/main.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import time -import json -import importlib -import schedule -from dotenv import load_dotenv -import influxdb_client -from influxdb_client.client.write_api import SYNCHRONOUS - -load_dotenv() - -influx_url = os.getenv("INFLUX_URL") -influx_token = os.getenv("INFLUX_TOKEN") -influx_org = os.getenv("INFLUX_ORG") -influx_bucket = os.getenv("INFLUX_BUCKET") - -client = influxdb_client.InfluxDBClient(url=influx_url, token=influx_token, org=influx_org) -write_api = client.write_api(write_options=SYNCHRONOUS) - -def create_influxdb_point(measurement, data): - point = influxdb_client.Point(measurement) - for key, value in data.items(): - if key == 'host': - # hotfix for dot in hostname - point = point.tag(key, value.split('.')[0]) - else: - point = point.field(key, value) - - return point - -def load_config(): - with open('config/modules_config.json', 'r') as f: - config = json.load(f) - return config.get("modules", []) - - -def run_module(module_name): - module = importlib.import_module(f"modules.{module_name}") - data = module.collect_data() - - if isinstance(data, list): - # If collect data return multiple records - for record in data: - point = create_influxdb_point(module_name, record) - write_api.write(bucket=influx_bucket, org=influx_org, record=point) - print(f"writing record for {module_name} finished.") - - else: - point = create_influxdb_point(module_name, data) - write_api.write(bucket=influx_bucket, org=influx_org, record=point) - print(f"writing record for {module_name} finished.") - -def main(): - modules_config = load_config() - for module_config in modules_config: - module_name = module_config["name"] - interval_seconds = module_config.get("interval_seconds", False) - - schedule.every(interval_seconds).seconds.do( - run_module, module_name=module_name) - - while True: - schedule.run_pending() - time.sleep(1) - -main() diff --git a/modules/__init__.py b/modules/__init__.py index e69de29..061028e 100644 --- a/modules/__init__.py +++ b/modules/__init__.py @@ -0,0 +1,20 @@ +import logging +import os + +MONITORING_INTERVAL = 30 + +if not os.path.exists('./logs'): + os.makedirs('logs') + +# Configure logging +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler("./logs/system_monitor.log"), + logging.StreamHandler() + ] +) + +# Create a logger instance +logger = logging.getLogger(__name__) \ No newline at end of file diff --git a/modules/disk.py b/modules/disk.py index 3ffec00..31fc2b7 100644 --- a/modules/disk.py +++ b/modules/disk.py @@ -1,7 +1,11 @@ import os import socket import subprocess -import json + +import ujson + +from modules import logger + def get_nvme_disk_names(): try: @@ -12,20 +16,37 @@ def get_nvme_disk_names(): return nvme_disks except subprocess.CalledProcessError as e: - print(f"Error: {e}") + logger.debug(f"Error: {e}") return [] +def get_disk_stats(disk_path): + try: + output = subprocess.check_output( + args=['sudo', '-S', 'smartctl', '-j', '-a', disk_path], + input=f'{os.getenv("ROOT_PASS")}\n', + universal_newlines=True + ) + if output and output.startswith('{ "'): + data = ujson.loads(output) + return data + except subprocess.CalledProcessError as e: + logger.debug(f"Command failed with exit status {e.returncode}") + if e.output and e.output.startswith('{\n'): + data = ujson.loads(e.output) + return data + except Exception as e: + logger.debug(str(e)) + return {} + + def get_smartctl_data(disk_path): if disk_path is None: - print("DISK_PATH environment variable is not set.") + logger.debug("DISK_PATH environment variable is not set.") return None - command_output = subprocess.check_output(['smartctl', '-j', '-a', disk_path]).decode('utf-8') - - # Parse JSON output - smart_data = json.loads(command_output) + smart_data = get_disk_stats(disk_path) # Extract relevant values from the JSON data hostname = socket.gethostname() @@ -57,18 +78,23 @@ def get_smartctl_data(disk_path): "media_and_data_integrity_errors": int(media_and_data_integrity_errors) } + def collect_data(): - if os.getenv("STORAGE_SERVER"): - disks = get_nvme_disk_names() - disk_data = [] - for disk in disks: - disk_data.append(get_smartctl_data(f"/dev/{disk}")) + try: + if os.getenv("STORAGE_SERVER"): + disks = get_nvme_disk_names() + disk_data = [] + for disk in disks: + disk_data.append(get_smartctl_data(f"/dev/{disk}")) - return disk_data + return disk_data - disk_path = os.getenv("DISK_PATH") - return get_smartctl_data(disk_path) + disk_path = os.getenv("DISK_PATH") + return get_smartctl_data(disk_path) + except Exception as e: + logger.debug(str(e)) + return {} if __name__=="__main__": - print(collect_data()) + logger.debug(collect_data()) diff --git a/modules/kvm_monitor.py b/modules/kvm_monitor.py new file mode 100644 index 0000000..7b6ea38 --- /dev/null +++ b/modules/kvm_monitor.py @@ -0,0 +1,342 @@ +import os +import subprocess +from threading import Thread +from xml.etree import ElementTree + +import inotify.adapters +import libvirt +import psutil + +import ujson +import time +import traceback + +from connection import create_influxdb_point, write_api, INFLUX_BUCKET, INFLUX_ORG +from modules import MONITORING_INTERVAL, logger + +VM_STATE_DEFINITION = { + libvirt.VIR_DOMAIN_NOSTATE: "no_state", + libvirt.VIR_DOMAIN_RUNNING: "running", + libvirt.VIR_DOMAIN_BLOCKED: "blocked", + libvirt.VIR_DOMAIN_PAUSED: "paused", + libvirt.VIR_DOMAIN_SHUTDOWN: "shutdown", + libvirt.VIR_DOMAIN_SHUTOFF: "shutoff", + libvirt.VIR_DOMAIN_CRASHED: "crashed", + libvirt.VIR_DOMAIN_PMSUSPENDED: "suspended_by_power_manager" +} + +def get_vms_with_state(): + try: + output_lines = subprocess.check_output(['virsh', 'list', '--all']).decode('utf-8').strip().split('\n') + vm_stats = [] + keys = output_lines[0].split(' ') + keys = [key.lower().strip() for key in keys] + for line in output_lines[2:]: + values = line.split(' ') + vm_stat = dict() + vm_stat[keys[0]] = values[0].strip() + vm_stat[keys[1]] = values[1].strip() + vm_stat[keys[2]] = values[2].strip() + vm_stats.append(vm_stat) + return vm_stats + except Exception as e: + logger.debug(traceback.format_exc()) + return [] + + +def get_kvm_stats(): + try: + output = subprocess.check_output( + ['sudo', '-S', 'kvmtop', '--cpu', '--mem', '--disk', '--net', '--io', '--host', + '--printer=json', '--runs=1'], input=f'{os.getenv("ROOT_PASS")}\n', text=True) + if output and output.startswith('{ "'): + data = ujson.loads(output) + return data + except Exception as e: + logger.debug(traceback.format_exc()) + return {} + + +def sync_data_to_influx_db(data): + try: + point = create_influxdb_point('kvm_stats', data) + write_api.write(bucket=INFLUX_BUCKET, org=INFLUX_ORG, record=point) + logger.debug(f"writing record for kvm_stats finished.") + except Exception as e: + logger.debug(traceback.format_exc()) + return + + +def group_data_points(key_prefix, source_data): + return {key: source_data[key] for key in source_data if key.startswith(key_prefix)} + + +def filter_and_group_host_stats(hostname, host_uuid, data): + try: + to_return = {} + host_key_groups = {"cpu_": "cpustat", "ram_": "memory", "disk_": "disk", "net_": "nics", "psi_": "psistat"} + for key in host_key_groups: + data_group = group_data_points(key_prefix=key, source_data=data) + if data_group: + if host_key_groups[key] == "memory": + for k, v in data_group.items(): + data_group[k] = round(v / (1024*1024), 2) + if host_key_groups[key] == "cpustat": + data_group['cpu_usage'] = psutil.cpu_percent(interval=MONITORING_INTERVAL) + data_group.update({"host": hostname, "host_uuid": host_uuid}) + to_return[host_key_groups[key]] = data_group + return to_return + except Exception as e: + logger.debug(traceback.format_exc()) + return {} + + +def filter_and_group_vm_stats(hostname, host_uuid, data): + try: + to_return = {} + vm_name = data.get('name', "") + vm_id = data.get('UUID', "") + host_key_groups = {"cpu_": "cpustat", "ram_": "memory", "disk_": "disk", "net_": "nics", "io_": "iostat", } + for key in host_key_groups.keys(): + data_group = group_data_points(key_prefix=key, source_data=data) + if key == 'cpu_': + data_group.update({'state': data.get('state')}) + if host_key_groups[key] == "memory": + for k, v in data_group.items(): + data_group[k] = round(v / (1024 * 1024), 2) + if data_group: + data_group.update({"host": hostname, "host_uuid": host_uuid, "vm_name": vm_name, "vm_id": vm_id}) + to_return[f"vm_{host_key_groups[key]}"] = data_group + return to_return + except Exception as e: + logger.debug(traceback.format_exc()) + return {} + + +def send_data_to_influxdb(data): + for key, value in data.items(): + if value: + point = create_influxdb_point(key, value) + write_api.write(bucket=INFLUX_BUCKET, org=INFLUX_ORG, record=point) + logger.debug(f"writing record for {key} finished.") + else: + logger.debug('Uploaded all data points from kvm_monitor') + + +def merge_lists_of_dicts(list1, list2, key): + # Create a dictionary to hold merged results + merged_dict = {} + + # Add dictionaries from the first list to the merged_dict + for item in list1: + merged_dict[item[key]] = item + + # Update the merged_dict with dictionaries from the second list + for item in list2: + if item[key] in merged_dict: + # If the key exists, merge the dictionaries + merged_dict[item[key]].update(item) + else: + # If the key does not exist, add the new item + merged_dict[item[key]] = item + + # Convert merged_dict back to a list + return list(merged_dict.values()) + + +def send_data(log): + try: + vms_list = get_vms_with_state() + domains = log.get('domains', []) + if not domains: + for vm in vms_list: + vm_stat = dict() + vm_stat['name'] = vm.get('name') + vm_stat['state'] = vm.get('state') + domains.append(vm_stat) + else: + for domain in domains: + domain.update({'state': 'running'}) + + hostname = log.get("host", {}).get("host_name") + host_uuid = log.get("host", {}).get("host_uuid") + host, vms = get_vms_and_host_stats() + log['host'].update(host) + host_data = filter_and_group_host_stats(hostname, host_uuid, data=log.get('host')) + send_data_to_influxdb(host_data) + combined_vms = merge_lists_of_dicts(domains, vms, 'name') + for vm_stats in combined_vms: + if vm_stats.get('state') == 'running': + send_data_to_influxdb(filter_and_group_vm_stats(hostname, host_uuid, data=vm_stats)) + return True + except Exception as e: + logger.debug(traceback.format_exc()) + return + + +def collect_data_continuously(): + log_file = "/home/vignesh/dev/kvmtop.logs" + i = inotify.adapters.Inotify() + i.add_watch(log_file) + + try: + for event in i.event_gen(yield_nones=False): + (_, type_names, path, filename) = event + + if "IN_MODIFY" in type_names: + with open(log_file, 'r') as file: + lines = file.readlines() + last_line = lines[-1].strip() + t1 = Thread(target=send_data, args=(last_line,)) + t1.run() + + finally: + i.remove_watch(log_file) + + +def get_vm_last_known_cpu_time(vm_name): + try: + with open(f'./{vm_name}.dat', 'r') as f: + time_and_cpu_time = f.read() + if time_and_cpu_time: + timestamp, cpu_time = [float(data) for data in time_and_cpu_time.split(',')] + return timestamp, cpu_time + return time.time(), 0.0 + except FileNotFoundError: + return time.time(), 0.0 + + +def set_vm_last_known_cpu_time(vm_name, cpu_time, timestamp): + try: + with open(f'./{vm_name}.dat', 'w') as f: + return f.write(f"{timestamp}, {cpu_time}") + except FileNotFoundError: + return 0 + + +def get_cpu_usage_percentage(vm): + last_timestamp, prev_cpu_time = get_vm_last_known_cpu_time(vm.name()) + cpu_stats = vm.getCPUStats(True)[0] + user_time = cpu_stats['user_time'] / 1000000000 # Convert from nanoseconds to seconds + system_time = cpu_stats['system_time'] / 1000000000 + total_cpu_time = user_time + system_time + + # Calculate the CPU time used since the last measurement + cpu_time_used = total_cpu_time - prev_cpu_time + + # Get the number of virtual CPUs + v_cpus = vm.vcpus()[0] + no_v_cpus = len(v_cpus) + + # Calculate CPU usage percentage + current_time = time.time() + duration = round(current_time - last_timestamp) + if duration and no_v_cpus: + cpu_usage_percentage = (cpu_time_used / (duration * no_v_cpus)) * 100 # 1 second interval + else: + cpu_usage_percentage = 0.0 + + set_vm_last_known_cpu_time(vm.name(), total_cpu_time, current_time) + + return cpu_usage_percentage + + +def get_vms_and_host_stats(): + conn = libvirt.open("qemu:///system") + try: + stats = conn.getInfo() + host_information = { + "cpu_model": stats[0], + "ram_total": stats[1]/1024, + "cpu_cores": stats[2], + "cpu_max_freq": stats[3], + "cpu_numa_nodes": stats[4], + "cpu_sockets_per_node": stats[5], + "cpu_cores_per_socket": stats[6], + "cpu_max_threads_per_core": stats[7] + } + vms = conn.listAllDomains() + vm_stats = [] + if vms: + for vm in vms: + state, max_mem, mem, no_of_cpu, cpu_time = vm.info() + stats = { + "cpu_cores": no_of_cpu, + "cpu_time": cpu_time / 1000000000, + "state": VM_STATE_DEFINITION.get(state), + "name": vm.name(), + "ram_max": max_mem, + "ram_actual": mem, + } + if state == libvirt.VIR_DOMAIN_RUNNING: + cpu_usage_percentage = get_cpu_usage_percentage(vm) + stats["cpu_usage"] = cpu_usage_percentage + mem_stat = vm.memoryStats() + for key, value in mem_stat.items(): + stats.update({ + f"ram_{key}": value + }) + + tree = ElementTree.fromstring(vm.XMLDesc()) + disks = [path.get('file', '') for path in tree.findall("devices/disk/source")] + total_read_bytes, total_write_bytes = 0, 0 + total_read_req, total_write_req, total_no_errors = 0, 0, 0 + for disk in disks: + (rd_req, rd_bytes, wr_req, wr_bytes, err) = vm.blockStats(disk) + # stats.update({ + # "disk_id": disk, + # 'disk_no_read_req': rd_req, + # 'disk_read_bytes': rd_bytes, + # 'disk_no_write_req': wr_req, + # 'disk_write_bytes': wr_bytes, + # 'disk_no_errors': err, + # }) + total_read_bytes += rd_bytes + total_write_bytes += wr_bytes + total_read_req += rd_req + total_write_req += wr_req + total_no_errors += total_no_errors + stats.update({ + 'io_read_bytes': total_read_bytes, + 'io_write_bytes': total_write_bytes, + 'io_read_req': total_read_req, + 'io_write_req': total_write_req, + 'io_no_errors': total_no_errors, + }) + + interface = tree.find("devices/interface/target").get("dev") + net_stats = vm.interfaceStats(interface) + stats.update({ + 'net_read_bytes': net_stats[0], + 'net_read_packets': net_stats[1], + 'net_read_errors': net_stats[2], + 'net_read_drops': net_stats[3], + 'net_write_bytes': net_stats[4], + 'net_write_packets': net_stats[5], + 'net_write_errors': net_stats[6], + 'net_write_drops': net_stats[7], + }) + + vm_stats.append(stats) + + return host_information, vm_stats + + except Exception as e: + logger.debug(traceback.format_exc()) + finally: + conn.close() + + + + +def collect_data(): + try: + data = get_kvm_stats() + return send_data(data) + except Exception as e: + logger.debug(traceback.format_exc()) + return False + + +if __name__ == "__main__": + collect_data() diff --git a/modules/network.py b/modules/network.py index d3cc65c..00089e7 100644 --- a/modules/network.py +++ b/modules/network.py @@ -1,33 +1,32 @@ -import psutil -from time import sleep - -def get_network_data(interface): - stats_before = psutil.net_io_counters(pernic=True)[interface] - sleep(5) # Adjust the sleep duration as needed - stats_after = psutil.net_io_counters(pernic=True)[interface] +import socket +import time +import traceback - receive_bytes = stats_after.bytes_recv - stats_before.bytes_recv - transmit_bytes = stats_after.bytes_sent - stats_before.bytes_sent +import psutil - return receive_bytes, transmit_bytes +from modules import logger -def main(): - network_interface = "enp6s0" - interval_seconds = 5 +io, last_captured_time = psutil.net_io_counters(pernic=True), time.time() +def collect_data(): try: - while True: - receive_bytes, transmit_bytes = get_network_data(network_interface) - - print(f"Network Interface: {network_interface}") - print(f"Received Data: {receive_bytes} bytes") - print(f"Transmitted Data: {transmit_bytes} bytes") - print("=" * 30) - - sleep(interval_seconds) - - except KeyboardInterrupt: - print("Script terminated by user.") - -if __name__ == "__main__": - main() + global io, last_captured_time + io_2, recent_captured_time = psutil.net_io_counters(pernic=True), time.time() + delay = recent_captured_time - last_captured_time + data = {} + for iface, iface_io in io.items(): + bytes_sent_iface = io_2[iface].bytes_sent if io_2.get(iface) else 0 + bytes_recv_iface = io_2[iface].bytes_recv if io_2.get(iface) else 0 + upload_speed, download_speed = bytes_sent_iface - iface_io.bytes_sent, bytes_recv_iface - iface_io.bytes_recv + data.update({ + f"{iface}_download": bytes_recv_iface, + f"{iface}_upload": bytes_sent_iface, + f"{iface}_upload_speed": round(upload_speed / delay) , + f"{iface}_download_speed": round(download_speed / delay), + }) + io, last_captured_time = io_2, recent_captured_time + data.update({"host": socket.gethostname()}) + return data + except Exception: + logger.debug(f"Failed to capture network stats {traceback.format_exc()}") + return {} \ No newline at end of file diff --git a/modules/partition.py b/modules/partition.py index 16a972e..3d855cc 100644 --- a/modules/partition.py +++ b/modules/partition.py @@ -1,6 +1,7 @@ import socket import os import psutil +from modules import logger def get_disk_space_usage(device): try: @@ -40,12 +41,16 @@ def get_disk_usage_for_mount_points(mnt_directory='/mnt'): return data except Exception as e: - print(f"Error: {e}") + logger.debug(f"Error: {e}") def collect_data(): - mnt_directory = '/mnt' - return get_disk_usage_for_mount_points(mnt_directory) + try: + mnt_directory = '/mnt' + return get_disk_usage_for_mount_points(mnt_directory) + except Exception as e: + logger.debug(str(e)) + return {} if __name__ == "__main__": diff --git a/modules/sensors.py b/modules/sensors.py index f8b769d..ae57afc 100644 --- a/modules/sensors.py +++ b/modules/sensors.py @@ -2,6 +2,8 @@ import json import subprocess +from modules import logger + def get_sensor_data(): # Run the 'sensors -j' command @@ -23,8 +25,13 @@ def get_sensor_data(): "cpu tccd2": sensor_data.get("k10temp-pci-00c3", {}).get("Tccd2", {}).get("temp4_input"), } + def collect_data(): - return get_sensor_data() + try: + return get_sensor_data() + except Exception as e: + logger.debug(str(e)) + return {} if __name__=="__main__": diff --git a/modules/uptime.py b/modules/uptime.py index 3629da0..e406f81 100644 --- a/modules/uptime.py +++ b/modules/uptime.py @@ -1,34 +1,30 @@ -import subprocess -import re import socket +from modules import logger + def get_system_uptime_seconds(): try: - uptime_output = subprocess.check_output(['uptime']).decode('utf-8').strip() - uptime_match = re.search(r"up(?:\s+)?((\d+) days?,)?(?:\s+)?(\d+):(\d+)", uptime_output) - - if uptime_match: - # Corrected line: Convert numeric day value to integer - days = int(uptime_match.group(2)) if uptime_match.group(2) else 0 - hours, minutes = map(int, uptime_match.group(3, 4)) - - uptime_seconds = days * 24 * 60 * 60 + hours * 60 * 60 + minutes * 60 - - return uptime_seconds - else: - raise ValueError("Unable to parse uptime information") + with open('/proc/uptime', 'r') as f: + # Read the first line from /proc/uptime + uptime_seconds = float(f.readline().split()[0]) + return int(uptime_seconds) except Exception as e: - print(f"Error: {e}") - return None + logger.debug(f"Error reading uptime: {e}") + return 0 def collect_data(): - hostname = socket.gethostname() - uptime_seconds = get_system_uptime_seconds() - return { - "host": hostname, - "uptime": uptime_seconds - } + try: + hostname = socket.gethostname() + uptime_seconds = get_system_uptime_seconds() + return { + "host": hostname, + "uptime": uptime_seconds + } + except Exception as e: + logger.debug(str(e)) + return {} + if __name__=="__main__": print(collect_data()) diff --git a/modules/vmstatus.py b/modules/vmstatus.py index 8eba586..dc3998a 100644 --- a/modules/vmstatus.py +++ b/modules/vmstatus.py @@ -1,8 +1,8 @@ -import os import socket +import traceback from threading import Thread import inotify.adapters -import time +from modules import logger def parse_data(log): log_parts = log.split(':') @@ -32,9 +32,11 @@ def collect_data_continuously(): last_line = lines[-1].strip() t1 = Thread(target=parse_data, args=(last_line,)) t1.run() + except Exception: + logger.debug(f"Failed to capture vm_status {traceback.format_exc()}") finally: i.remove_watch(log_file) -if __name__=="__main__": - collect_data() +# if __name__=="__main__": +# collect_data() diff --git a/requirements.txt b/requirements.txt index 546dc0e..f83c191 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ schedule==1.2.1 six==1.16.0 typing_extensions==4.8.0 urllib3==2.1.0 +libvirt-python==10.7.0 +ujson==5.10.0 diff --git a/system_monitor.py b/system_monitor.py new file mode 100644 index 0000000..492dc5f --- /dev/null +++ b/system_monitor.py @@ -0,0 +1,54 @@ +import os +import time +import json +import importlib +import traceback + +import schedule + +from connection import create_influxdb_point, write_api, INFLUX_BUCKET, INFLUX_ORG +from modules import MONITORING_INTERVAL, logger + + +def load_config(): + with open('config/modules_config.json', 'r') as f: + config = json.load(f) + return config.get("modules", []) + + +def run_module(module_name): + try: + module = importlib.import_module(f"modules.{module_name}") + data = module.collect_data() + + if module_name == 'kvm_monitor': + return + if data: + if isinstance(data, list): + # If collect data return multiple records + for record in data: + point = create_influxdb_point(module_name, record) + write_api.write(bucket=INFLUX_BUCKET, org=INFLUX_ORG, record=point) + logger.debug(f"writing record for {module_name} finished.") + + else: + point = create_influxdb_point(module_name, data) + write_api.write(bucket=INFLUX_BUCKET, org=INFLUX_ORG, record=point) + logger.debug(f"writing record for {module_name} finished.") + except Exception as e: + logger.debug(traceback.format_exc()) + + + +if __name__ == '__main__': + modules_config = load_config() + for module_config in modules_config: + module_name = module_config["name"] + # interval_seconds = module_config.get("interval_seconds", False) + + schedule.every(MONITORING_INTERVAL).seconds.do( + run_module, module_name=module_name) + + while True: + schedule.run_pending() + time.sleep(1)