2024-06-15 22:17:08 +04:00
""" Prometheus monitoring queries. """
# pylint: disable=too-few-public-methods
2024-06-22 16:12:29 +04:00
import requests
2024-07-10 15:53:56 +03:00
2024-07-08 18:00:49 +03:00
import strawberry
2024-07-10 15:53:56 +03:00
from strawberry . scalars import JSON
2024-07-08 18:00:49 +03:00
from dataclasses import dataclass
2024-07-26 15:50:12 +04:00
from typing import Optional , Annotated , Union
2024-06-25 21:25:31 +04:00
from datetime import datetime , timedelta
2024-06-15 22:17:08 +04:00
2024-06-17 21:56:58 +03:00
PROMETHEUS_URL = " http://localhost:9001 "
2024-06-15 22:17:08 +04:00
2024-07-08 18:00:49 +03:00
@strawberry.type
@dataclass
2024-07-26 16:20:31 +04:00
class MonitoringQueryResult :
2024-07-08 18:00:49 +03:00
result_type : str
result : JSON
2024-06-15 22:17:08 +04:00
2024-07-26 15:50:12 +04:00
@strawberry.type
2024-07-26 16:20:31 +04:00
class MonitoringQueryError :
2024-07-26 15:50:12 +04:00
error : str
2024-07-26 16:20:31 +04:00
MonitoringResponse = Annotated [
Union [ MonitoringQueryResult , MonitoringQueryError ] ,
strawberry . union ( " MonitoringQueryResponse " ) ,
2024-07-26 15:50:12 +04:00
]
2024-07-26 16:20:31 +04:00
class MonitoringQueries :
2024-06-15 22:17:08 +04:00
@staticmethod
2024-07-26 16:20:31 +04:00
def _send_query ( query : str , start : int , end : int , step : int ) - > MonitoringResponse :
2024-06-15 22:17:08 +04:00
try :
2024-07-08 18:00:49 +03:00
response = requests . get (
2024-07-26 16:39:25 +04:00
f " { PROMETHEUS_URL } /api/v1/query_range " ,
2024-07-08 18:00:49 +03:00
params = {
" query " : query ,
2024-07-16 06:41:06 +04:00
" start " : start ,
" end " : end ,
2024-07-08 18:00:49 +03:00
" step " : step ,
} ,
2024-06-15 22:17:08 +04:00
)
2024-07-08 18:00:49 +03:00
if response . status_code != 200 :
2024-07-26 16:20:31 +04:00
return MonitoringQueryError (
2024-07-26 15:50:12 +04:00
error = " Prometheus returned unexpected HTTP status code "
)
2024-07-08 18:00:49 +03:00
json = response . json ( )
2024-07-26 16:20:31 +04:00
return MonitoringQueryResult (
2024-07-25 00:58:38 +04:00
result_type = json [ " data " ] [ " resultType " ] , result = json [ " data " ] [ " result " ]
2024-06-15 22:17:08 +04:00
)
2024-07-08 18:00:49 +03:00
except Exception as error :
2024-07-26 16:20:31 +04:00
return MonitoringQueryError (
2024-07-26 15:50:12 +04:00
error = f " Prometheus request failed! Error: { str ( error ) } "
)
2024-06-15 22:17:08 +04:00
@staticmethod
2024-06-21 20:33:37 +04:00
def cpu_usage (
2024-07-25 16:48:34 +03:00
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
2024-07-07 16:33:15 +04:00
step : int = 60 , # seconds
2024-07-26 16:20:31 +04:00
) - > MonitoringResponse :
2024-07-08 18:00:49 +03:00
"""
Get CPU information .
2024-07-07 16:33:15 +04:00
Args :
2024-07-25 16:48:34 +03:00
start ( datetime , optional ) : The start time .
2024-07-07 16:33:15 +04:00
Defaults to 20 minutes ago if not provided .
2024-07-25 16:48:34 +03:00
end ( datetime , optional ) : The end time .
2024-07-07 16:33:15 +04:00
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying disk usage data .
"""
2024-07-25 16:48:34 +03:00
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
2024-07-07 16:33:15 +04:00
2024-07-25 16:48:34 +03:00
if end is None :
end = datetime . now ( )
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
2024-06-21 20:33:37 +04:00
2024-06-15 22:17:08 +04:00
query = ' 100 - (avg by (instance) (rate(node_cpu_seconds_total { mode= " idle " }[5m])) * 100) '
2024-07-26 16:20:31 +04:00
return MonitoringQueries . _send_query (
2024-07-25 16:48:34 +03:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)
2024-07-08 18:18:07 +03:00
@staticmethod
def memory_usage (
2024-07-25 16:48:34 +03:00
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
2024-07-08 18:18:07 +03:00
step : int = 60 , # seconds
2024-07-26 16:20:31 +04:00
) - > MonitoringResponse :
2024-07-08 18:18:07 +03:00
"""
Get memory usage .
Args :
2024-07-25 16:48:34 +03:00
start ( datetime , optional ) : The start time .
2024-07-08 18:18:07 +03:00
Defaults to 20 minutes ago if not provided .
2024-07-25 16:48:34 +03:00
end ( datetime , optional ) : The end time .
2024-07-08 18:18:07 +03:00
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying memory usage data .
"""
2024-07-25 16:48:34 +03:00
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
if end is None :
end = datetime . now ( )
2024-07-08 18:18:07 +03:00
2024-07-25 16:48:34 +03:00
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
2024-07-08 18:18:07 +03:00
query = " 100 - (100 * (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) "
2024-07-26 16:20:31 +04:00
return MonitoringQueries . _send_query (
2024-07-25 16:48:34 +03:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)
2024-06-15 22:17:08 +04:00
@staticmethod
2024-06-21 20:33:37 +04:00
def disk_usage (
2024-07-25 16:48:34 +03:00
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
2024-07-07 16:33:15 +04:00
step : int = 60 , # seconds
2024-07-26 16:20:31 +04:00
) - > MonitoringResponse :
2024-07-07 16:33:15 +04:00
"""
Get disk usage information .
Args :
2024-07-25 16:48:34 +03:00
start ( datetime , optional ) : The start time .
2024-07-07 16:33:15 +04:00
Defaults to 20 minutes ago if not provided .
2024-07-25 16:48:34 +03:00
end ( datetime , optional ) : The end time .
2024-07-07 16:33:15 +04:00
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying disk usage data .
"""
2024-07-25 16:48:34 +03:00
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
if end is None :
end = datetime . now ( )
2024-07-07 16:33:15 +04:00
2024-07-25 16:48:34 +03:00
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
2024-07-07 16:33:15 +04:00
2024-07-16 06:41:06 +04:00
query = """ 100 - (100 * sum by (device) (node_filesystem_avail_bytes { fstype!= " rootfs " }) / sum by (device) (node_filesystem_size_bytes { fstype!= " rootfs " })) """
2024-06-16 23:01:25 +04:00
2024-07-26 16:20:31 +04:00
return MonitoringQueries . _send_query (
2024-07-25 16:48:34 +03:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)
2024-07-25 20:01:48 +04:00
@staticmethod
def network_usage (
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
step : int = 60 , # seconds
2024-07-26 16:20:31 +04:00
) - > MonitoringResponse :
2024-07-25 20:01:48 +04:00
"""
Get network usage information for both download and upload .
Args :
start ( datetime , optional ) : The start time .
Defaults to 20 minutes ago if not provided .
end ( datetime , optional ) : The end time .
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying network data .
"""
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
if end is None :
end = datetime . now ( )
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
query = """
(
sum ( rate ( node_network_receive_bytes_total { device != " lo " } [ 5 m ] ) ) as download ,
sum ( rate ( node_network_transmit_bytes_total { device != " lo " } [ 5 m ] ) ) as upload
)
"""
2024-07-26 16:20:31 +04:00
return MonitoringQueries . _send_query (
2024-07-25 20:01:48 +04:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)