diff --git a/api/operations/approve_access_request.py b/api/operations/approve_access_request.py index 5f33da0c..005203bf 100644 --- a/api/operations/approve_access_request.py +++ b/api/operations/approve_access_request.py @@ -9,6 +9,7 @@ from api.operations.constraints import CheckForReason from api.operations.modify_group_users import ModifyGroupUsers from api.plugins import get_notification_hook +from api.plugins.metrics_reporter import get_metrics_reporter_hook from api.views.schemas import AuditLogSchema, EventType @@ -46,6 +47,7 @@ def __init__( self.notify = notify self.notification_hook = get_notification_hook() + self.metrics_hook = get_metrics_reporter_hook() def execute(self) -> AccessRequest: # Don't allow approving a request that is already resolved @@ -112,6 +114,30 @@ def execute(self) -> AccessRequest: ) ) + # Calculate approval time in seconds + creation_time = self.access_request.created_at + approval_time = datetime.utcnow() + resolution_time_seconds = (approval_time - creation_time).total_seconds() + + # Record metrics for access request approval + group_type = "app_group" if isinstance(self.access_request.active_requested_group, AppGroup) else "role_group" + self.metrics_hook.record_counter( + metric_name="access.request.approved", + value=1.0, + tags={ + "group_type": group_type, + "request_ownership": str(self.access_request.request_ownership).lower(), + }, + ) + self.metrics_hook.record_histogram( + metric_name="access.request.resolution_time", + value=resolution_time_seconds, + tags={ + "resolution_type": "approved", + "group_type": group_type, + }, + ) + if self.access_request.request_ownership: ModifyGroupUsers( group=self.access_request.requested_group_id, diff --git a/api/operations/create_access_request.py b/api/operations/create_access_request.py index 5ea6dae8..93b30d2d 100644 --- a/api/operations/create_access_request.py +++ b/api/operations/create_access_request.py @@ -21,6 +21,7 @@ from api.operations.approve_access_request import ApproveAccessRequest from api.operations.reject_access_request import RejectAccessRequest from api.plugins import get_conditional_access_hook, get_notification_hook +from api.plugins.metrics_reporter import get_metrics_reporter_hook from api.views.schemas import AuditLogSchema, EventType @@ -57,6 +58,7 @@ def __init__( self.conditional_access_hook = get_conditional_access_hook() self.notification_hook = get_notification_hook() + self.metrics_hook = get_metrics_reporter_hook() def execute(self) -> Optional[AccessRequest]: # Don't allow creating a request for an unmanaged group @@ -76,6 +78,17 @@ def execute(self) -> Optional[AccessRequest]: db.session.add(access_request) db.session.commit() + # Record metrics for access request creation + group_type = "app_group" if isinstance(self.requested_group, AppGroup) else "role_group" + self.metrics_hook.record_counter( + metric_name="access.request.created", + value=1.0, + tags={ + "group_type": group_type, + "request_ownership": str(self.request_ownership).lower(), + }, + ) + # Fetch the users to notify approvers = get_group_managers(self.requested_group.id) diff --git a/api/operations/create_group.py b/api/operations/create_group.py index c1af9fc1..fa572c47 100644 --- a/api/operations/create_group.py +++ b/api/operations/create_group.py @@ -6,6 +6,7 @@ from api.extensions import db from api.models import App, AppGroup, AppTagMap, OktaGroup, OktaGroupTagMap, OktaUser, RoleGroup, Tag +from api.plugins.metrics_reporter import get_metrics_reporter_hook from api.services import okta from api.views.schemas import AuditLogSchema, EventType @@ -32,6 +33,8 @@ def __init__(self, *, group: T | GroupDict, tags: list[str] = [], current_user_i None, ) + self.metrics_hook = get_metrics_reporter_hook() + def execute(self, *, _group: Optional[T] = None) -> T: # Do not allow non-deleted groups with the same name (case-insensitive) existing_group = ( @@ -57,6 +60,16 @@ def execute(self, *, _group: Optional[T] = None) -> T: db.session.add(self.group) db.session.commit() + # Record metrics for role creation + if isinstance(self.group, RoleGroup): + self.metrics_hook.record_counter( + metric_name="role.created", + value=1.0, + tags={ + "created_by_user_type": "admin" if self.current_user_id else "system", + }, + ) + # If this is an app group, add any app tags if type(self.group) is AppGroup: app_tag_maps = ( diff --git a/api/operations/modify_group_users.py b/api/operations/modify_group_users.py index 10871818..c67eceea 100644 --- a/api/operations/modify_group_users.py +++ b/api/operations/modify_group_users.py @@ -22,6 +22,7 @@ from api.models.tag import coalesce_ended_at from api.operations.constraints import CheckForReason, CheckForSelfAdd from api.plugins import get_notification_hook +from api.plugins.metrics_reporter import get_metrics_reporter_hook from api.services import okta from api.views.schemas import AuditLogSchema, EventType @@ -122,6 +123,7 @@ def __init__( self.notify = notify self.notification_hook = get_notification_hook() + self.metrics_hook = get_metrics_reporter_hook() def execute(self) -> OktaGroup: # Run asychronously to parallelize Okta API requests @@ -372,6 +374,29 @@ async def _execute(self) -> OktaGroup: # Commit all changes so far db.session.commit() + # Record metrics for group membership removals + group_type = "app_group" if isinstance(self.group, AppGroup) else "role_group" + + for member in self.members_to_remove: + self.metrics_hook.record_counter( + metric_name="group.membership.removed", + value=1.0, + tags={ + "group_type": group_type, + "is_owner": "false", + }, + ) + + for owner in self.owners_to_remove: + self.metrics_hook.record_counter( + metric_name="group.membership.removed", + value=1.0, + tags={ + "group_type": group_type, + "is_owner": "true", + }, + ) + # Mark relevant OktaUserGroupMembers as 'Should expire' # Only relevant for the expiring groups page so not adding checks for this field anywhere else since OK if marked to expire # then manually renewed from group page or with an access request @@ -505,6 +530,66 @@ async def _execute(self) -> OktaGroup: # Commit changes so far, so we can reference OktaUserGroupMember in approved AccessRequests db.session.commit() + # Record metrics for group membership additions + for member in self.members_to_add: + self.metrics_hook.record_counter( + metric_name="group.membership.added", + value=1.0, + tags={ + "group_type": group_type, + "is_owner": "false", + }, + ) + + for owner in self.owners_to_add: + self.metrics_hook.record_counter( + metric_name="group.membership.added", + value=1.0, + tags={ + "group_type": group_type, + "is_owner": "true", + }, + ) + + # Record gauge metrics for total group membership count + # Use queries instead of accessing relationships to avoid SQLAlchemy issues + total_members = ( + db.session.query(OktaUserGroupMember) + .filter( + OktaUserGroupMember.group_id == self.group.id, + OktaUserGroupMember.ended_at.is_(None), + OktaUserGroupMember.is_owner.is_(False), + ) + .count() + ) + total_owners = ( + db.session.query(OktaUserGroupMember) + .filter( + OktaUserGroupMember.group_id == self.group.id, + OktaUserGroupMember.ended_at.is_(None), + OktaUserGroupMember.is_owner.is_(True), + ) + .count() + ) + + self.metrics_hook.record_gauge( + metric_name="groups.total_members", + value=total_members, + tags={ + "group_type": group_type, + "membership_type": "member", + }, + ) + + self.metrics_hook.record_gauge( + metric_name="groups.total_members", + value=total_owners, + tags={ + "group_type": group_type, + "membership_type": "owner", + }, + ) + # Approve any pending access requests for access granted by this operation pending_requests_query = ( AccessRequest.query.options(joinedload(AccessRequest.requested_group)) diff --git a/api/operations/modify_role_groups.py b/api/operations/modify_role_groups.py index 5199e425..b7f0b494 100644 --- a/api/operations/modify_role_groups.py +++ b/api/operations/modify_role_groups.py @@ -22,6 +22,7 @@ from api.models.tag import coalesce_ended_at from api.operations.constraints import CheckForReason, CheckForSelfAdd from api.plugins import get_notification_hook +from api.plugins.metrics_reporter import get_metrics_reporter_hook from api.services import okta from api.views.schemas import AuditLogSchema, EventType @@ -121,6 +122,7 @@ def __init__( self.notify = notify self.notification_hook = get_notification_hook() + self.metrics_hook = get_metrics_reporter_hook() def execute(self) -> RoleGroup: # Run asychronously to parallelize Okta API requests @@ -286,6 +288,17 @@ async def _execute(self) -> RoleGroup: synchronize_session="fetch", ) + # Calculate total active role mappings using a query instead of accessing the relationship + # to avoid SQLAlchemy InvalidRequestError issues + total_active_role_mappings = ( + db.session.query(RoleGroupMap) + .filter( + RoleGroupMap.role_group_id == self.role.id, + RoleGroupMap.ended_at.is_(None), + ) + .count() + ) + # Commit all changes so far db.session.commit() @@ -338,6 +351,25 @@ async def _execute(self) -> RoleGroup: # Commit changes so far so we can reference the ids of the new role group maps in the OktaUserGroupMembers db.session.commit() + # Record metrics for role group mapping additions + for group in self.groups_to_add: + self.metrics_hook.record_counter( + metric_name="role.group_mapping.added", + value=1.0, + tags={ + "is_owner_mapping": "false", + }, + ) + + for owner_group in self.owner_groups_to_add: + self.metrics_hook.record_counter( + metric_name="role.group_mapping.added", + value=1.0, + tags={ + "is_owner_mapping": "true", + }, + ) + # Group members of a role should be added as members to all newly added groups # and owner groups associated with that role active_role_memberships = ( @@ -496,6 +528,18 @@ async def _execute(self) -> RoleGroup: # Commit all changes db.session.commit() + # Record gauge metrics for role statistics after final commit + self.metrics_hook.record_gauge( + metric_name="roles.total_active", + value=1, # This role is active + tags={}, + ) + + # Use the captured count for the histogram + self.metrics_hook.record_histogram( + metric_name="role.membership_count", value=total_active_role_mappings, tags={} + ) + if len(async_tasks) > 0: await asyncio.wait(async_tasks) diff --git a/api/operations/reject_access_request.py b/api/operations/reject_access_request.py index 8d88bce4..76107922 100644 --- a/api/operations/reject_access_request.py +++ b/api/operations/reject_access_request.py @@ -8,6 +8,7 @@ from api.models import AccessRequest, AccessRequestStatus, AppGroup, OktaGroup, OktaUser, RoleGroup from api.models.access_request import get_all_possible_request_approvers from api.plugins import get_notification_hook +from api.plugins.metrics_reporter import get_metrics_reporter_hook from api.views.schemas import AuditLogSchema, EventType @@ -42,12 +43,20 @@ def __init__( self.notify_requester = notify_requester self.notification_hook = get_notification_hook() + self.metrics_hook = get_metrics_reporter_hook() def execute(self) -> AccessRequest: # Don't allow approving a request that is already resolved if self.access_request.status != AccessRequestStatus.PENDING or self.access_request.resolved_at is not None: return self.access_request + # Calculate rejection time in seconds + from datetime import datetime + + creation_time = self.access_request.created_at + rejection_time = datetime.utcnow() + resolution_time_seconds = (rejection_time - creation_time).total_seconds() + self.access_request.status = AccessRequestStatus.REJECTED self.access_request.resolved_at = db.func.now() self.access_request.resolver_user_id = self.rejecter_id @@ -55,6 +64,33 @@ def execute(self) -> AccessRequest: db.session.commit() + # Get group type for metrics + requested_group = ( + db.session.query(OktaGroup) + .options(selectin_polymorphic(OktaGroup, [AppGroup, RoleGroup])) + .filter(OktaGroup.id == self.access_request.requested_group_id) + .first() + ) + + # Record metrics for access request rejection + group_type = "app_group" if isinstance(requested_group, AppGroup) else "role_group" + self.metrics_hook.record_counter( + metric_name="access.request.rejected", + value=1.0, + tags={ + "group_type": group_type, + "request_ownership": str(self.access_request.request_ownership).lower(), + }, + ) + self.metrics_hook.record_histogram( + metric_name="access.request.resolution_time", + value=resolution_time_seconds, + tags={ + "resolution_type": "rejected", + "group_type": group_type, + }, + ) + # Audit logging email = None if self.rejecter_id is not None: diff --git a/api/plugins/metrics_reporter.py b/api/plugins/metrics_reporter.py index 6725db01..5497ca6c 100644 --- a/api/plugins/metrics_reporter.py +++ b/api/plugins/metrics_reporter.py @@ -1,6 +1,7 @@ import logging import sys -from typing import ContextManager, Dict, List, Optional +from contextlib import nullcontext +from typing import ContextManager, Dict, Generator, List, Optional import pluggy @@ -100,6 +101,84 @@ def flush(self) -> None: """Force flush any buffered metrics to the backend.""" +@hookimpl(wrapper=True) +def record_counter( + metric_name: str, + value: float = 1.0, + tags: Optional[Dict[str, str]] = None, + monotonic: bool = True, +) -> Generator[None, None, None]: + try: + return (yield) + except Exception: + logger.exception(f"Failed to record counter metric: {metric_name}") + + +@hookimpl(wrapper=True) +def record_gauge( + metric_name: str, + value: float, + tags: Optional[Dict[str, str]] = None, +) -> Generator[None, None, None]: + try: + return (yield) + except Exception: + logger.exception(f"Failed to record gauge metric: {metric_name}") + + +@hookimpl(wrapper=True) +def record_histogram( + metric_name: str, + value: float, + tags: Optional[Dict[str, str]] = None, + buckets: Optional[List[float]] = None, +) -> Generator[None, None, None]: + try: + return (yield) + except Exception: + logger.exception(f"Failed to record histogram metric: {metric_name}") + + +@hookimpl(wrapper=True) +def record_summary( + metric_name: str, + value: float, + tags: Optional[Dict[str, str]] = None, +) -> Generator[None, None, None]: + try: + return (yield) + except Exception: + logger.exception(f"Failed to record summary metric: {metric_name}") + + +@hookimpl(wrapper=True) +def batch_metrics() -> Generator[None, None, ContextManager[None]]: + try: + result = yield + return result if result is not None else nullcontext() + except Exception: + logger.exception("Failed to create batch metrics context") + return nullcontext() + + +@hookimpl(wrapper=True) +def set_global_tags( + tags: Dict[str, str], +) -> Generator[None, None, None]: + try: + return (yield) + except Exception: + logger.exception("Failed to set global tags") + + +@hookimpl(wrapper=True) +def flush() -> Generator[None, None, None]: + try: + return (yield) + except Exception: + logger.exception("Failed to flush metrics") + + def get_metrics_reporter_hook() -> pluggy.HookRelay: global _cached_metrics_reporter_hook