selfprivacy-rest-api/selfprivacy_api/utils/monitoring.py

265 lines
7.8 KiB
Python
Raw Normal View History

2024-06-15 18:17:08 +00:00
"""Prometheus monitoring queries."""
# pylint: disable=too-few-public-methods
2024-06-22 12:12:29 +00:00
import requests
2024-07-08 15:00:49 +00:00
import strawberry
2024-07-08 15:00:49 +00:00
from dataclasses import dataclass
from typing import Optional, Annotated, Union, List, Tuple
2024-06-25 17:25:31 +00:00
from datetime import datetime, timedelta
2024-06-15 18:17:08 +00:00
2024-06-17 18:56:58 +00:00
PROMETHEUS_URL = "http://localhost:9001"
2024-06-15 18:17:08 +00:00
2024-07-08 15:00:49 +00:00
@strawberry.type
@dataclass
class MonitoringValue:
timestamp: datetime
value: str
@strawberry.type
@dataclass
class MonitoringMetric:
id: str
values: List[MonitoringValue]
2024-06-15 18:17:08 +00:00
2024-07-26 11:50:12 +00:00
@strawberry.type
2024-07-26 12:20:31 +00:00
class MonitoringQueryError:
2024-07-26 11:50:12 +00:00
error: str
MonitoringValuesResult = Annotated[
Union[List[MonitoringValue], MonitoringQueryError],
strawberry.union("MonitoringValuesResult"),
]
MonitoringMetricsResult = Annotated[
Union[List[MonitoringMetric], MonitoringQueryError],
strawberry.union("MonitoringMetricsResult"),
2024-07-26 11:50:12 +00:00
]
2024-07-26 12:20:31 +00:00
class MonitoringQueries:
2024-06-15 18:17:08 +00:00
@staticmethod
def _send_query(
query: str, start: int, end: int, step: int, result_type: Optional[str] = None
) -> Union[dict, MonitoringQueryError]:
2024-06-15 18:17:08 +00:00
try:
2024-07-08 15:00:49 +00:00
response = requests.get(
2024-07-26 12:39:25 +00:00
f"{PROMETHEUS_URL}/api/v1/query_range",
2024-07-08 15:00:49 +00:00
params={
"query": query,
2024-07-16 02:41:06 +00:00
"start": start,
"end": end,
2024-07-08 15:00:49 +00:00
"step": step,
},
2024-06-15 18:17:08 +00:00
)
2024-07-08 15:00:49 +00:00
if response.status_code != 200:
2024-07-26 12:20:31 +00:00
return MonitoringQueryError(
2024-07-26 11:50:12 +00:00
error="Prometheus returned unexpected HTTP status code"
)
2024-07-08 15:00:49 +00:00
json = response.json()
if result_type and json["data"]["resultType"] != result_type:
return MonitoringQueryError(
error="Unexpected resultType returned from Prometheus, request failed"
)
return json["data"]
2024-07-08 15:00:49 +00:00
except Exception as error:
2024-07-26 12:20:31 +00:00
return MonitoringQueryError(
2024-07-26 11:50:12 +00:00
error=f"Prometheus request failed! Error: {str(error)}"
)
2024-06-15 18:17:08 +00:00
@staticmethod
def _prometheus_value_to_monitoring_value(x: Tuple[int, str]):
return MonitoringValue(timestamp=datetime.fromtimestamp(x[0]), value=x[1])
@staticmethod
2024-07-29 11:19:52 +00:00
def _prometheus_response_to_monitoring_metrics(
response: dict, id_key: str
) -> List[MonitoringMetric]:
return list(
map(
lambda x: MonitoringMetric(
id=x["metric"][id_key],
values=list(
map(
MonitoringQueries._prometheus_value_to_monitoring_value,
x["values"],
)
),
),
2024-07-29 11:19:52 +00:00
response["result"],
)
)
2024-06-15 18:17:08 +00:00
@staticmethod
2024-06-21 16:33:37 +00:00
def cpu_usage(
2024-07-25 13:48:34 +00:00
start: Optional[datetime] = None,
end: Optional[datetime] = None,
2024-07-07 12:33:15 +00:00
step: int = 60, # seconds
) -> MonitoringValuesResult:
2024-07-08 15:00:49 +00:00
"""
Get CPU information.
2024-07-07 12:33:15 +00:00
Args:
2024-07-25 13:48:34 +00:00
start (datetime, optional): The start time.
2024-07-07 12:33:15 +00:00
Defaults to 20 minutes ago if not provided.
2024-07-25 13:48:34 +00:00
end (datetime, optional): The end time.
2024-07-07 12:33:15 +00:00
Defaults to current time if not provided.
step (int): Interval in seconds for querying disk usage data.
"""
2024-07-25 13:48:34 +00:00
if start is None:
start = datetime.now() - timedelta(minutes=20)
2024-07-07 12:33:15 +00:00
2024-07-25 13:48:34 +00:00
if end is None:
end = datetime.now()
start_timestamp = int(start.timestamp())
end_timestamp = int(end.timestamp())
2024-06-21 16:33:37 +00:00
2024-06-15 18:17:08 +00:00
query = '100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)'
data = MonitoringQueries._send_query(
query, start_timestamp, end_timestamp, step, result_type="matrix"
)
if isinstance(data, MonitoringQueryError):
return data
return list(
map(
MonitoringQueries._prometheus_value_to_monitoring_value,
data["result"][0]["values"],
)
2024-07-25 13:48:34 +00:00
)
2024-07-08 15:18:07 +00:00
@staticmethod
def memory_usage(
2024-07-25 13:48:34 +00:00
start: Optional[datetime] = None,
end: Optional[datetime] = None,
2024-07-08 15:18:07 +00:00
step: int = 60, # seconds
) -> MonitoringValuesResult:
2024-07-08 15:18:07 +00:00
"""
Get memory usage.
Args:
2024-07-25 13:48:34 +00:00
start (datetime, optional): The start time.
2024-07-08 15:18:07 +00:00
Defaults to 20 minutes ago if not provided.
2024-07-25 13:48:34 +00:00
end (datetime, optional): The end time.
2024-07-08 15:18:07 +00:00
Defaults to current time if not provided.
step (int): Interval in seconds for querying memory usage data.
"""
2024-07-25 13:48:34 +00:00
if start is None:
start = datetime.now() - timedelta(minutes=20)
if end is None:
end = datetime.now()
2024-07-08 15:18:07 +00:00
2024-07-25 13:48:34 +00:00
start_timestamp = int(start.timestamp())
end_timestamp = int(end.timestamp())
2024-07-08 15:18:07 +00:00
query = "100 - (100 * (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))"
data = MonitoringQueries._send_query(
query, start_timestamp, end_timestamp, step, result_type="matrix"
)
if isinstance(data, MonitoringQueryError):
return data
return list(
map(
MonitoringQueries._prometheus_value_to_monitoring_value,
data["result"][0]["values"],
)
2024-07-25 13:48:34 +00:00
)
2024-06-15 18:17:08 +00:00
@staticmethod
2024-06-21 16:33:37 +00:00
def disk_usage(
2024-07-25 13:48:34 +00:00
start: Optional[datetime] = None,
end: Optional[datetime] = None,
2024-07-07 12:33:15 +00:00
step: int = 60, # seconds
) -> MonitoringMetricsResult:
2024-07-07 12:33:15 +00:00
"""
Get disk usage information.
Args:
2024-07-25 13:48:34 +00:00
start (datetime, optional): The start time.
2024-07-07 12:33:15 +00:00
Defaults to 20 minutes ago if not provided.
2024-07-25 13:48:34 +00:00
end (datetime, optional): The end time.
2024-07-07 12:33:15 +00:00
Defaults to current time if not provided.
step (int): Interval in seconds for querying disk usage data.
"""
2024-07-25 13:48:34 +00:00
if start is None:
start = datetime.now() - timedelta(minutes=20)
if end is None:
end = datetime.now()
2024-07-07 12:33:15 +00:00
2024-07-25 13:48:34 +00:00
start_timestamp = int(start.timestamp())
end_timestamp = int(end.timestamp())
2024-07-07 12:33:15 +00:00
2024-07-16 02:41:06 +00:00
query = """100 - (100 * sum by (device) (node_filesystem_avail_bytes{fstype!="rootfs"}) / sum by (device) (node_filesystem_size_bytes{fstype!="rootfs"}))"""
2024-06-16 19:01:25 +00:00
data = MonitoringQueries._send_query(
query, start_timestamp, end_timestamp, step, result_type="matrix"
)
if isinstance(data, MonitoringQueryError):
return data
2024-07-29 11:19:52 +00:00
return MonitoringQueries._prometheus_response_to_monitoring_metrics(
data, "device"
2024-07-25 13:48:34 +00:00
)
2024-07-25 16:01:48 +00:00
@staticmethod
def network_usage(
start: Optional[datetime] = None,
end: Optional[datetime] = None,
step: int = 60, # seconds
) -> MonitoringMetricsResult:
2024-07-25 16:01:48 +00:00
"""
Get network usage information for both download and upload.
Args:
start (datetime, optional): The start time.
Defaults to 20 minutes ago if not provided.
end (datetime, optional): The end time.
Defaults to current time if not provided.
step (int): Interval in seconds for querying network data.
"""
if start is None:
start = datetime.now() - timedelta(minutes=20)
if end is None:
end = datetime.now()
start_timestamp = int(start.timestamp())
end_timestamp = int(end.timestamp())
query = """
(
sum(rate(node_network_receive_bytes_total{device!="lo"}[5m])) as download,
sum(rate(node_network_transmit_bytes_total{device!="lo"}[5m])) as upload
)
"""
data = MonitoringQueries._send_query(
query, start_timestamp, end_timestamp, step, result_type="matrix"
)
if isinstance(data, MonitoringQueryError):
return data
2024-07-29 11:19:52 +00:00
return MonitoringQueries._prometheus_response_to_monitoring_metrics(
data, "device"
2024-07-25 16:01:48 +00:00
)