diff --git a/src/uipath/_cli/_evals/_evaluator_factory.py b/src/uipath/_cli/_evals/_evaluator_factory.py index 87c418b4a..994a52c85 100644 --- a/src/uipath/_cli/_evals/_evaluator_factory.py +++ b/src/uipath/_cli/_evals/_evaluator_factory.py @@ -10,12 +10,12 @@ try_extract_file_and_class_name, ) from uipath._cli._evals._models._evaluator import ( - EqualsEvaluatorParams, EvaluatorConfig, - JsonSimilarityEvaluatorParams, + LegacyEqualsEvaluatorParams, LegacyEvaluator, - LLMEvaluatorParams, - TrajectoryEvaluatorParams, + LegacyJsonSimilarityEvaluatorParams, + LegacyLLMEvaluatorParams, + LegacyTrajectoryEvaluatorParams, ) from uipath._cli._evals._models._evaluator_base_params import EvaluatorBaseParams from uipath._utils.constants import EVALS_FOLDER @@ -401,15 +401,15 @@ def _create_legacy_evaluator_internal( params: EvaluatorBaseParams = TypeAdapter(LegacyEvaluator).validate_python(data) match params: - case EqualsEvaluatorParams(): + case LegacyEqualsEvaluatorParams(): return EvaluatorFactory._create_legacy_exact_match_evaluator(params) - case JsonSimilarityEvaluatorParams(): + case LegacyJsonSimilarityEvaluatorParams(): return EvaluatorFactory._create_legacy_json_similarity_evaluator(params) - case LLMEvaluatorParams(): + case LegacyLLMEvaluatorParams(): return EvaluatorFactory._create_legacy_llm_as_judge_evaluator( params, agent_model ) - case TrajectoryEvaluatorParams(): + case LegacyTrajectoryEvaluatorParams(): return EvaluatorFactory._create_legacy_trajectory_evaluator( params, agent_model ) @@ -418,21 +418,21 @@ def _create_legacy_evaluator_internal( @staticmethod def _create_legacy_exact_match_evaluator( - params: EqualsEvaluatorParams, + params: LegacyEqualsEvaluatorParams, ) -> LegacyExactMatchEvaluator: """Create a deterministic evaluator.""" return LegacyExactMatchEvaluator(**params.model_dump(), config={}) @staticmethod def _create_legacy_json_similarity_evaluator( - params: JsonSimilarityEvaluatorParams, + params: LegacyJsonSimilarityEvaluatorParams, ) -> LegacyJsonSimilarityEvaluator: """Create a deterministic evaluator.""" return LegacyJsonSimilarityEvaluator(**params.model_dump(), config={}) @staticmethod def _create_legacy_llm_as_judge_evaluator( - params: LLMEvaluatorParams, + params: LegacyLLMEvaluatorParams, agent_model: str | None = None, ) -> LegacyBaseEvaluator[Any]: """Create an LLM-as-a-judge evaluator or context precision evaluator based on type.""" @@ -465,7 +465,7 @@ def _create_legacy_llm_as_judge_evaluator( @staticmethod def _create_legacy_trajectory_evaluator( - params: TrajectoryEvaluatorParams, + params: LegacyTrajectoryEvaluatorParams, agent_model: str | None = None, ) -> LegacyTrajectoryEvaluator: """Create a trajectory evaluator.""" diff --git a/src/uipath/_cli/_evals/_models/_evaluator.py b/src/uipath/_cli/_evals/_models/_evaluator.py index 8da9c66b8..4aeaf973b 100644 --- a/src/uipath/_cli/_evals/_models/_evaluator.py +++ b/src/uipath/_cli/_evals/_models/_evaluator.py @@ -35,7 +35,7 @@ ) -class EvaluatorBaseParams(BaseModel): +class LegacyEvaluatorBaseParams(BaseModel): """Parameters for initializing the base evaluator.""" id: str @@ -48,7 +48,7 @@ class EvaluatorBaseParams(BaseModel): file_name: str = Field(..., alias="fileName") -class LLMEvaluatorParams(EvaluatorBaseParams): +class LegacyLLMEvaluatorParams(LegacyEvaluatorBaseParams): category: Literal[LegacyEvaluatorCategory.LlmAsAJudge] = Field( ..., alias="category" ) @@ -60,7 +60,7 @@ class LLMEvaluatorParams(EvaluatorBaseParams): ) -class TrajectoryEvaluatorParams(EvaluatorBaseParams): +class LegacyTrajectoryEvaluatorParams(LegacyEvaluatorBaseParams): category: Literal[LegacyEvaluatorCategory.Trajectory] = Field(..., alias="category") prompt: str = Field(..., alias="prompt") model: str = Field(..., alias="model") @@ -70,71 +70,71 @@ class TrajectoryEvaluatorParams(EvaluatorBaseParams): ) -class EqualsEvaluatorParams(EvaluatorBaseParams): +class LegacyEqualsEvaluatorParams(LegacyEvaluatorBaseParams): model_config = ConfigDict( validate_by_name=True, validate_by_alias=True, extra="allow" ) -class JsonSimilarityEvaluatorParams(EvaluatorBaseParams): +class LegacyJsonSimilarityEvaluatorParams(LegacyEvaluatorBaseParams): model_config = ConfigDict( validate_by_name=True, validate_by_alias=True, extra="allow" ) -class UnknownEvaluatorParams(EvaluatorBaseParams): +class LegacyUnknownEvaluatorParams(LegacyEvaluatorBaseParams): model_config = ConfigDict( validate_by_name=True, validate_by_alias=True, extra="allow" ) -def evaluator_discriminator(data: Any) -> str: +def legacy_evaluator_discriminator(data: Any) -> str: if isinstance(data, dict): category = data.get("category") evaluator_type = data.get("type") match category: case LegacyEvaluatorCategory.LlmAsAJudge: - return "LLMEvaluatorParams" + return "LegacyLLMEvaluatorParams" case LegacyEvaluatorCategory.Trajectory: - return "TrajectoryEvaluatorParams" + return "LegacyTrajectoryEvaluatorParams" case LegacyEvaluatorCategory.Deterministic: match evaluator_type: case LegacyEvaluatorType.Equals: - return "EqualsEvaluatorParams" + return "LegacyEqualsEvaluatorParams" case LegacyEvaluatorType.JsonSimilarity: - return "JsonSimilarityEvaluatorParams" + return "LegacyJsonSimilarityEvaluatorParams" case _: - return "UnknownEvaluatorParams" + return "LegacyUnknownEvaluatorParams" case _: - return "UnknownEvaluatorParams" + return "LegacyUnknownEvaluatorParams" else: - return "UnknownEvaluatorParams" + return "LegacyUnknownLegacyEvaluatorParams" -Evaluator = Annotated[ +LegacyEvaluator = Annotated[ Union[ Annotated[ - LLMEvaluatorParams, - Tag("LLMEvaluatorParams"), + LegacyLLMEvaluatorParams, + Tag("LegacyLLMEvaluatorParams"), ], Annotated[ - TrajectoryEvaluatorParams, - Tag("TrajectoryEvaluatorParams"), + LegacyTrajectoryEvaluatorParams, + Tag("LegacyTrajectoryEvaluatorParams"), ], Annotated[ - EqualsEvaluatorParams, - Tag("EqualsEvaluatorParams"), + LegacyEqualsEvaluatorParams, + Tag("LegacyEqualsEvaluatorParams"), ], Annotated[ - JsonSimilarityEvaluatorParams, - Tag("JsonSimilarityEvaluatorParams"), + LegacyJsonSimilarityEvaluatorParams, + Tag("LegacyJsonSimilarityEvaluatorParams"), ], Annotated[ - UnknownEvaluatorParams, - Tag("UnknownEvaluatorParams"), + LegacyUnknownEvaluatorParams, + Tag("LegacyUnknownEvaluatorParams"), ], ], - Field(discriminator=Discriminator(evaluator_discriminator)), + Field(discriminator=Discriminator(legacy_evaluator_discriminator)), ] @@ -144,29 +144,6 @@ class UnknownEvaluatorConfig(BaseEvaluatorConfig[Any]): ) -def legacy_evaluator_discriminator(data: Any) -> str: - if isinstance(data, dict): - category = data.get("category") - evaluator_type = data.get("type") - match category: - case LegacyEvaluatorCategory.LlmAsAJudge: - return "LLMEvaluatorParams" - case LegacyEvaluatorCategory.Trajectory: - return "TrajectoryEvaluatorParams" - case LegacyEvaluatorCategory.Deterministic: - match evaluator_type: - case LegacyEvaluatorType.Equals: - return "EqualsEvaluatorParams" - case LegacyEvaluatorType.JsonSimilarity: - return "JsonSimilarityEvaluatorParams" - case _: - return "UnknownEvaluatorParams" - case _: - return "UnknownEvaluatorParams" - else: - return "UnknownEvaluatorParams" - - def evaluator_config_discriminator(data: Any) -> str: if isinstance(data, dict): evaluator_type_id = data.get("evaluatorTypeId") @@ -199,32 +176,6 @@ def evaluator_config_discriminator(data: Any) -> str: return "UnknownEvaluatorConfig" -LegacyEvaluator = Annotated[ - Union[ - Annotated[ - LLMEvaluatorParams, - Tag("LLMEvaluatorParams"), - ], - Annotated[ - TrajectoryEvaluatorParams, - Tag("TrajectoryEvaluatorParams"), - ], - Annotated[ - EqualsEvaluatorParams, - Tag("EqualsEvaluatorParams"), - ], - Annotated[ - JsonSimilarityEvaluatorParams, - Tag("JsonSimilarityEvaluatorParams"), - ], - Annotated[ - UnknownEvaluatorParams, - Tag("UnknownEvaluatorParams"), - ], - ], - Field(discriminator=Discriminator(legacy_evaluator_discriminator)), -] - EvaluatorConfig = Annotated[ Union[ Annotated[ diff --git a/src/uipath/_cli/_evals/_progress_reporter.py b/src/uipath/_cli/_evals/_progress_reporter.py index 677fa1f63..ff7d6a355 100644 --- a/src/uipath/_cli/_evals/_progress_reporter.py +++ b/src/uipath/_cli/_evals/_progress_reporter.py @@ -16,7 +16,6 @@ EvaluationItem, EvaluationStatus, ) -from uipath._cli._evals._models._evaluator import Evaluator from uipath._cli._evals._models._sw_reporting import ( StudioWebAgentSnapshot, StudioWebProgressItem, @@ -453,7 +452,7 @@ async def create_eval_run( async def update_eval_run( self, sw_progress_item: StudioWebProgressItem, - evaluators: dict[str, Evaluator], + evaluators: dict[str, BaseEvaluator[Any, Any, Any]], is_coded: bool = False, spans: list[Any] | None = None, ): diff --git a/src/uipath/agent/models/evals.py b/src/uipath/agent/models/evals.py index accc61f2e..05fd6e141 100644 --- a/src/uipath/agent/models/evals.py +++ b/src/uipath/agent/models/evals.py @@ -8,7 +8,7 @@ from pydantic import Field from uipath._cli._evals._models._evaluation_set import EvaluationSet -from uipath._cli._evals._models._evaluator import Evaluator +from uipath._cli._evals._models._evaluator import LegacyEvaluator from uipath.agent.models.agent import ( AgentDefinition, ) @@ -22,6 +22,6 @@ class AgentEvalsDefinition(AgentDefinition): alias="evaluationSets", description="List of agent evaluation sets", ) - evaluators: Optional[List[Evaluator]] = Field( + evaluators: Optional[List[LegacyEvaluator]] = Field( None, description="List of agent evaluators" )