2024-06-15 18:17:08 +00:00
""" Prometheus monitoring queries. """
# pylint: disable=too-few-public-methods
2024-06-22 12:12:29 +00:00
import requests
2024-07-10 12:53:56 +00:00
2024-07-08 15:00:49 +00:00
import strawberry
2024-07-10 12:53:56 +00:00
from strawberry . scalars import JSON
2024-07-08 15:00:49 +00:00
from dataclasses import dataclass
2024-07-26 11:50:12 +00:00
from typing import Optional , Annotated , Union
2024-06-25 17:25:31 +00:00
from datetime import datetime , timedelta
2024-06-15 18:17:08 +00:00
2024-06-17 18:56:58 +00:00
PROMETHEUS_URL = " http://localhost:9001 "
2024-06-15 18:17:08 +00:00
2024-07-08 15:00:49 +00:00
@strawberry.type
@dataclass
2024-07-26 12:20:31 +00:00
class MonitoringQueryResult :
2024-07-08 15:00:49 +00:00
result_type : str
result : JSON
2024-06-15 18:17:08 +00:00
2024-07-26 11:50:12 +00:00
@strawberry.type
2024-07-26 12:20:31 +00:00
class MonitoringQueryError :
2024-07-26 11:50:12 +00:00
error : str
2024-07-26 12:20:31 +00:00
MonitoringResponse = Annotated [
Union [ MonitoringQueryResult , MonitoringQueryError ] ,
strawberry . union ( " MonitoringQueryResponse " ) ,
2024-07-26 11:50:12 +00:00
]
2024-07-26 12:20:31 +00:00
class MonitoringQueries :
2024-06-15 18:17:08 +00:00
@staticmethod
2024-07-26 12:20:31 +00:00
def _send_query ( query : str , start : int , end : int , step : int ) - > MonitoringResponse :
2024-06-15 18:17:08 +00:00
try :
2024-07-08 15:00:49 +00:00
response = requests . get (
2024-07-26 12:39:25 +00:00
f " { PROMETHEUS_URL } /api/v1/query_range " ,
2024-07-08 15:00:49 +00:00
params = {
" query " : query ,
2024-07-16 02:41:06 +00:00
" start " : start ,
" end " : end ,
2024-07-08 15:00:49 +00:00
" step " : step ,
} ,
2024-06-15 18:17:08 +00:00
)
2024-07-08 15:00:49 +00:00
if response . status_code != 200 :
2024-07-26 12:20:31 +00:00
return MonitoringQueryError (
2024-07-26 11:50:12 +00:00
error = " Prometheus returned unexpected HTTP status code "
)
2024-07-08 15:00:49 +00:00
json = response . json ( )
2024-07-26 12:20:31 +00:00
return MonitoringQueryResult (
2024-07-24 20:58:38 +00:00
result_type = json [ " data " ] [ " resultType " ] , result = json [ " data " ] [ " result " ]
2024-06-15 18:17:08 +00:00
)
2024-07-08 15:00:49 +00:00
except Exception as error :
2024-07-26 12:20:31 +00:00
return MonitoringQueryError (
2024-07-26 11:50:12 +00:00
error = f " Prometheus request failed! Error: { str ( error ) } "
)
2024-06-15 18:17:08 +00:00
@staticmethod
2024-06-21 16:33:37 +00:00
def cpu_usage (
2024-07-25 13:48:34 +00:00
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
2024-07-07 12:33:15 +00:00
step : int = 60 , # seconds
2024-07-26 12:20:31 +00:00
) - > MonitoringResponse :
2024-07-08 15:00:49 +00:00
"""
Get CPU information .
2024-07-07 12:33:15 +00:00
Args :
2024-07-25 13:48:34 +00:00
start ( datetime , optional ) : The start time .
2024-07-07 12:33:15 +00:00
Defaults to 20 minutes ago if not provided .
2024-07-25 13:48:34 +00:00
end ( datetime , optional ) : The end time .
2024-07-07 12:33:15 +00:00
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying disk usage data .
"""
2024-07-25 13:48:34 +00:00
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
2024-07-07 12:33:15 +00:00
2024-07-25 13:48:34 +00:00
if end is None :
end = datetime . now ( )
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
2024-06-21 16:33:37 +00:00
2024-06-15 18:17:08 +00:00
query = ' 100 - (avg by (instance) (rate(node_cpu_seconds_total { mode= " idle " }[5m])) * 100) '
2024-07-26 12:20:31 +00:00
return MonitoringQueries . _send_query (
2024-07-25 13:48:34 +00:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)
2024-07-08 15:18:07 +00:00
@staticmethod
def memory_usage (
2024-07-25 13:48:34 +00:00
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
2024-07-08 15:18:07 +00:00
step : int = 60 , # seconds
2024-07-26 12:20:31 +00:00
) - > MonitoringResponse :
2024-07-08 15:18:07 +00:00
"""
Get memory usage .
Args :
2024-07-25 13:48:34 +00:00
start ( datetime , optional ) : The start time .
2024-07-08 15:18:07 +00:00
Defaults to 20 minutes ago if not provided .
2024-07-25 13:48:34 +00:00
end ( datetime , optional ) : The end time .
2024-07-08 15:18:07 +00:00
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying memory usage data .
"""
2024-07-25 13:48:34 +00:00
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
if end is None :
end = datetime . now ( )
2024-07-08 15:18:07 +00:00
2024-07-25 13:48:34 +00:00
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
2024-07-08 15:18:07 +00:00
query = " 100 - (100 * (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) "
2024-07-26 12:20:31 +00:00
return MonitoringQueries . _send_query (
2024-07-25 13:48:34 +00:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)
2024-06-15 18:17:08 +00:00
@staticmethod
2024-06-21 16:33:37 +00:00
def disk_usage (
2024-07-25 13:48:34 +00:00
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
2024-07-07 12:33:15 +00:00
step : int = 60 , # seconds
2024-07-26 12:20:31 +00:00
) - > MonitoringResponse :
2024-07-07 12:33:15 +00:00
"""
Get disk usage information .
Args :
2024-07-25 13:48:34 +00:00
start ( datetime , optional ) : The start time .
2024-07-07 12:33:15 +00:00
Defaults to 20 minutes ago if not provided .
2024-07-25 13:48:34 +00:00
end ( datetime , optional ) : The end time .
2024-07-07 12:33:15 +00:00
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying disk usage data .
"""
2024-07-25 13:48:34 +00:00
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
if end is None :
end = datetime . now ( )
2024-07-07 12:33:15 +00:00
2024-07-25 13:48:34 +00:00
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
2024-07-07 12:33:15 +00:00
2024-07-16 02:41:06 +00:00
query = """ 100 - (100 * sum by (device) (node_filesystem_avail_bytes { fstype!= " rootfs " }) / sum by (device) (node_filesystem_size_bytes { fstype!= " rootfs " })) """
2024-06-16 19:01:25 +00:00
2024-07-26 12:20:31 +00:00
return MonitoringQueries . _send_query (
2024-07-25 13:48:34 +00:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)
2024-07-25 16:01:48 +00:00
@staticmethod
def network_usage (
start : Optional [ datetime ] = None ,
end : Optional [ datetime ] = None ,
step : int = 60 , # seconds
2024-07-26 12:20:31 +00:00
) - > MonitoringResponse :
2024-07-25 16:01:48 +00:00
"""
Get network usage information for both download and upload .
Args :
start ( datetime , optional ) : The start time .
Defaults to 20 minutes ago if not provided .
end ( datetime , optional ) : The end time .
Defaults to current time if not provided .
step ( int ) : Interval in seconds for querying network data .
"""
if start is None :
start = datetime . now ( ) - timedelta ( minutes = 20 )
if end is None :
end = datetime . now ( )
start_timestamp = int ( start . timestamp ( ) )
end_timestamp = int ( end . timestamp ( ) )
query = """
(
sum ( rate ( node_network_receive_bytes_total { device != " lo " } [ 5 m ] ) ) as download ,
sum ( rate ( node_network_transmit_bytes_total { device != " lo " } [ 5 m ] ) ) as upload
)
"""
2024-07-26 12:20:31 +00:00
return MonitoringQueries . _send_query (
2024-07-25 16:01:48 +00:00
query ,
start_timestamp ,
end_timestamp ,
step ,
)