o
    +i[.                     @   s   d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZmZ G dd deZG d	d
 d
eZdeeef defddZG dd deZdS )zIContains the `LLMEvaluator` class for building LLM-as-a-judge evaluators.    )AnyCallableOptionalUnioncast)	BaseModel)	warn_beta)EvaluationResultEvaluationResultsRunEvaluator)ExampleRunc                   @   sJ   e Zd ZU dZeed< ee ed< eed< dZeed< dZ	e
e ed< dS )	CategoricalScoreConfigz&Configuration for a categorical score.keychoicesdescriptionFinclude_explanationNexplanation_description)__name__
__module____qualname____doc__str__annotations__listr   boolr   r    r   r   _/var/www/html/psymed-ai/venv/lib/python3.10/site-packages/langsmith/evaluation/llm_evaluator.pyr      s   
 r   c                   @   sV   e Zd ZU dZeed< dZeed< dZeed< eed< dZ	e
ed	< d
Zee ed< d
S )ContinuousScoreConfigz%Configuration for a continuous score.r   r   min   maxr   Fr   Nr   )r   r   r   r   r   r   r   floatr!   r   r   r   r   r   r   r   r   r      s   
 r   score_configreturnc                 C   s   i }t | trd| jdd| j dd|d< nt | tr3d| j| jd| j d	| j d
d|d< ntd| jrId| j	d u rBdn| j	d|d< | j
| jd|| jrXddgdS dgdS )Nstringz%The score for the evaluation, one of z, .)typeenumr   scorenumberz&The score for the evaluation, between z and z, inclusive.)r'   minimummaximumr   z9Invalid score type. Must be 'categorical' or 'continuous'zThe explanation for the score.)r'   r   explanationobject)titler   r'   
propertiesrequired)
isinstancer   r   joinr   r   r!   
ValueErrorr   r   r   r   )r#   r0   r   r   r   _create_score_json_schema!   sB   





r5   c                   @   s  e Zd ZdZdddddeeeeeef  f deee	f de
eee
e gef  d	ed
ef
ddZedddedeeeeeef  f deee	f de
eee
e gef  fddZdeeeeeef  f deee	f de
eee
e gef  defddZe	d dede
e deeef fddZe	d dede
e deeef fddZdede
e defddZdedeeef fddZdS )!LLMEvaluatorz/A class for building LLM-as-a-judge evaluators.Nzgpt-4oopenai)map_variables
model_namemodel_providerprompt_templater#   r8   r9   r:   c          
   
   K   s\   zddl m} W n ty } ztd|d}~ww |d||d|}	| ||||	 dS )a  Initialize the `LLMEvaluator`.

        Args:
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt
                template to use for the evaluation. If a string is provided, it is
                assumed to be a human / user message.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The configuration for the score, either categorical or continuous.
            map_variables (Optional[Callable[[Run, Example], dict]], optional):
                A function that maps the run and example to the variables in the
                prompt.

                If `None`, it is assumed that the prompt only requires 'input',
                'output', and 'expected'.
            model_name (Optional[str], optional): The model to use for the evaluation.
            model_provider (Optional[str], optional): The model provider to use
                for the evaluation.
        r   )init_chat_modelzmLLMEvaluator requires langchain to be installed. Please install langchain by running `pip install langchain`.N)modelr:   r   )langchain.chat_modelsr<   ImportError_initialize)
selfr;   r#   r8   r9   r:   kwargsr<   e
chat_modelr   r   r   __init__O   s    zLLMEvaluator.__init__)r8   r=   c                C   s   |  | }||||| |S )a  Create an `LLMEvaluator` instance from a `BaseChatModel` instance.

        Args:
            model (BaseChatModel): The chat model instance to use for the evaluation.
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt
                template to use for the evaluation. If a string is provided, it is
                assumed to be a system message.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The configuration for the score, either categorical or continuous.
            map_variables (Optional[Callable[[Run, Example]], dict]], optional):
                A function that maps the run and example to the variables in the
                prompt.

                If `None`, it is assumed that the prompt only requires 'input',
                'output', and 'expected'.

        Returns:
            LLMEvaluator: An instance of `LLMEvaluator`.
        )__new__r@   )clsr=   r;   r#   r8   instancer   r   r   
from_model{   s   
zLLMEvaluator.from_modelrD   c              
   C   s   zddl m} ddlm} W n ty } ztd|d}~ww t||r*t|ds.tdt|tr=|	d|fg| _
n|	|| _
t| j
jh d	 rS|sStd
|| _|| _t| j| _|| j}| j
|B | _dS )a  Shared initialization code for `__init__` and `from_model`.

        Args:
            prompt_template (Union[str, List[Tuple[str, str]]): The prompt template.
            score_config (Union[CategoricalScoreConfig, ContinuousScoreConfig]):
                The score configuration.
            map_variables (Optional[Callable[[Run, Example]], dict]]):
                Function to map variables.
            chat_model (BaseChatModel): The chat model instance.
        r   )BaseChatModel)ChatPromptTemplatez|LLMEvaluator requires langchain-core to be installed. Please install langchain-core by running `pip install langchain-core`.Nwith_structured_outputzRchat_model must be an instance of BaseLanguageModel and support structured output.human>   inputoutputexpectedzrmap_inputs must be provided if the prompt template contains variables other than 'input', 'output', and 'expected')*langchain_core.language_models.chat_modelsrJ   langchain_core.promptsrK   r?   r2   hasattrr4   r   from_messagespromptsetinput_variablesr8   r#   r5   score_schemarL   runnable)rA   r;   r#   r8   rD   rJ   rK   rC   r   r   r   r@      s>   
zLLMEvaluator._initializerunexampler$   c                 C   s(   |  ||}tt| j|}| |S )zEvaluate a run.)_prepare_variablesr   dictrY   invoke_parse_outputrA   rZ   r[   	variablesrO   r   r   r   evaluate_run   s   
zLLMEvaluator.evaluate_runc                    s0   |  ||}tt| j|I dH }| |S )zAsynchronously evaluate a run.N)r\   r   r]   rY   ainvoker_   r`   r   r   r   aevaluate_run   s   
zLLMEvaluator.aevaluate_runc                 C   s$  | j r	|  ||S i }d| jjv r2t|jdkrtdt|jdkr'tdt|j d |d< d| jjv r`|js?tdt|jdkrJtdt|jdkrUtdt|j d |d< d	| jjv r|rk|jsotd
t|jdkrztdt|jdkrtdt|j d |d	< |S )z'Prepare variables for model invocation.rN   r   zHNo input keys are present in run.inputs but the prompt requires 'input'.r    zWMultiple input keys are present in run.inputs. Please provide a map_variables function.rO   zKNo output keys are present in run.outputs but the prompt requires 'output'.zYMultiple output keys are present in run.outputs. Please provide a map_variables function.rP   zMNo example or example outputs is provided but the prompt requires 'expected'.zQNo output keys are present in example.outputs but the prompt requires 'expected'.z]Multiple output keys are present in example.outputs. Please provide a map_variables function.)	r8   rU   rW   leninputsr4   r   valuesoutputs)rA   rZ   r[   ra   r   r   r   r\      sT   
zLLMEvaluator._prepare_variablesrO   c                 C   sh   t | jtr|d }|dd}t| jj||dS t | jtr2|d }|dd}t| jj||dS dS )z1Parse the model output into an evaluation result.r)   r-   N)r   valuecomment)r   r)   rj   )r2   r#   r   getr	   r   r   )rA   rO   ri   r-   r)   r   r   r   r_     s   

zLLMEvaluator._parse_output)N)r   r   r   r   r   r   r   tupler   r   r   r   r   r   r]   rE   classmethodr   rI   r@   r   r	   r
   rb   rd   r\   r_   r   r   r   r   r6   L   sp    

,


6

9r6   N)r   typingr   r   r   r   r   pydanticr   #langsmith._internal._beta_decoratorr   langsmith.evaluationr	   r
   r   langsmith.schemasr   r   r   r   r]   r5   r6   r   r   r   r   <module>   s    


+