o
    +i*                     @  s   d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZ er8d dlmZ d dl	mZ G dd	 d	eZG d
d dZdS )    )annotations)TYPE_CHECKINGAnyCallableOptional	TypedDictUnion)DynamicRunEvaluator)	traceable)ExampleRunStringEvaluator)RunEvaluatorc                   @  s.   e Zd ZU dZded< 	 ded< 	 ded< dS )	SingleEvaluatorInputz!The input to a `StringEvaluator`.str
predictionzOptional[Any]	referencezOptional[str]inputN)__name__
__module____qualname____doc____annotations__ r   r   i/var/www/html/psymed-ai/venv/lib/python3.10/site-packages/langsmith/evaluation/integrations/_langchain.pyr      s   
 r   c                   @  s,   e Zd ZdZddddd
dZdddZdS )LangChainStringEvaluatorat  A class for wrapping a LangChain `StringEvaluator`.

    Requires the `langchain` package to be installed.

    Attributes:
        evaluator (StringEvaluator): The underlying `StringEvaluator` OR the name
            of the evaluator to load.

    Methods:
        `as_run_evaluator() -> RunEvaluator`:
            Convert the `LangChainStringEvaluator` to a `RunEvaluator`.

    Examples:
        !!! example "Creating a simple `LangChainStringEvaluator`"

            ```python
            evaluator = LangChainStringEvaluator("exact_match")
            ```

        !!! example "Converting a `LangChainStringEvaluator` to a `RunEvaluator`"

            ```python
            from langsmith.evaluation import LangChainStringEvaluator
            from langchain_openai import ChatOpenAI
            evaluator = LangChainStringEvaluator(
                "criteria",
                config={
                    "criteria": {
                        "usefulness": "The prediction is useful if"
                        " it is correct and/or asks a useful followup question."
                    },
                    "llm": ChatOpenAI(model="gpt-4o"),
                },
            )
            run_evaluator = evaluator.as_run_evaluator()
            run_evaluator  # doctest: +ELLIPSIS
            <DynamicRunEvaluator ...>
            ```

        !!! example "Customizing the LLM model used by the evaluator"

            ```python
            from langsmith.evaluation import LangChainStringEvaluator
            from langchain_anthropic import ChatAnthropic
            evaluator = LangChainStringEvaluator(
                "criteria",
                config={
                    "criteria": {
                        "usefulness": "The prediction is useful if"
                        " it is correct and/or asks a useful followup question."
                    },
                    "llm": ChatAnthropic(model="claude-3-opus-20240229"),
                },
            )

            run_evaluator = evaluator.as_run_evaluator()
            run_evaluator  # doctest: +ELLIPSIS
            <DynamicRunEvaluator ...>
            ```

        !!! example "Using the `evaluate` API with different evaluators"

            ```python
            def prepare_data(run: Run, example: Example):
                # Convert the evaluation data into the format expected by the evaluator
                # Only required for datasets with multiple inputs/output keys
                return {
                    "prediction": run.outputs["prediction"],
                    "reference": example.outputs["answer"],
                    "input": str(example.inputs),
                }


            import re
            from langchain_anthropic import ChatAnthropic
            import langsmith
            from langsmith.evaluation import LangChainStringEvaluator, evaluate

            criteria_evaluator = LangChainStringEvaluator(
                "criteria",
                config={
                    "criteria": {
                        "usefulness": "The prediction is useful if it is correct"
                        " and/or asks a useful followup question."
                    },
                    "llm": ChatAnthropic(model="claude-3-opus-20240229"),
                },
                prepare_data=prepare_data,
            )

            embedding_evaluator = LangChainStringEvaluator("embedding_distance")
            exact_match_evaluator = LangChainStringEvaluator("exact_match")
            regex_match_evaluator = LangChainStringEvaluator(
                "regex_match", config={"flags": re.IGNORECASE}, prepare_data=prepare_data
            )

            scoring_evaluator = LangChainStringEvaluator(
                "labeled_score_string",
                config={
                    "criteria": {
                        "accuracy": "Score 1: Completely inaccurate\nScore 5: Somewhat accurate\nScore 10: Completely accurate"
                    },
                    "normalize_by": 10,
                    "llm": ChatAnthropic(model="claude-3-opus-20240229"),
                },
                prepare_data=prepare_data,
            )
            string_distance_evaluator = LangChainStringEvaluator(
                "string_distance",
                config={"distance_metric": "levenshtein"},
                prepare_data=prepare_data,
            )

            from langsmith import Client

            client = Client()
            results = evaluate(
                lambda inputs: {"prediction": "foo"},
                data=client.list_examples(dataset_name="Evaluate Examples", limit=1),
                evaluators=[
                    embedding_evaluator,
                    criteria_evaluator,
                    exact_match_evaluator,
                    regex_match_evaluator,
                    scoring_evaluator,
                    string_distance_evaluator,
                ],
            )  # doctest: +ELLIPSIS
            ```
    N)configprepare_data	evaluatorUnion[StringEvaluator, str]r   Optional[dict]r   BOptional[Callable[[Run, Optional[Example]], SingleEvaluatorInput]]c                C  sh   ddl m} t||r|| _n t|tr&ddlm} ||fi |p!i | _n	tdt| || _	dS )zInitialize a `LangChainStringEvaluator`.

        Args:
            evaluator (StringEvaluator): The underlying `StringEvaluator`.
        r   r   )load_evaluatorzUnsupported evaluator type: N)
langchain.evaluation.schemar   
isinstancer   r   langchain.evaluationr#   NotImplementedErrortype_prepare_data)selfr   r   r   r   r#   r   r   r   __init__   s   


z!LangChainStringEvaluator.__init__returnr   c                   s   j jrdnd}j jrdnd}d| | d t	dd fddtj jdddfdd}tj jdddfdd}t||S )zConvert the `LangChainStringEvaluator` to a `RunEvaluator`.

        This is the object used in the LangSmith `evaluate` API.

        Returns:
            RunEvaluator: The converted `RunEvaluator`.
        z)
       "input": example.inputs['input'], z0
       "reference": example.outputs['expected']z]
def prepare_data(run, example):
    return {
        "prediction": run.outputs['my_output'],zL
    }
evaluator = LangChainStringEvaluator(..., prepare_data=prepare_data)
Nrunr   exampleOptional[Example]r,   r   c                   s   | j rt| j dkrtdj d  jjr0|r0|j r0t|j dkr0tdj d  jjrK|rK|jrKt|jdkrKtdj d  ttt	| j 
 jjrf|rf|j rftt	|j 
 nd jjr{|r{|jr{tt	|j
 dS d dS )N   z
Evaluator z{ only supports a single prediction key. Please ensure that the run has a single output. Or initialize with a prepare_data:
z nly supports a single reference key. Please ensure that the example has a single output. Or create a custom evaluator yourself:
zy only supports a single input key. Please ensure that the example has a single input. Or initialize with a prepare_data:
)r   r   r   )outputslen
ValueErrorr   requires_referencerequires_inputinputsr   nextitervalues)r.   r/   )customization_error_strr*   r   r   prepare_evaluator_inputs   s`   


zKLangChainStringEvaluator.as_run_evaluator.<locals>.prepare_evaluator_inputs)namedictc                   sB   j d u r
 | |n | |}jjdi |}djji|S Nkeyr   )r)   r   evaluate_stringsevaluation_namer.   r/   eval_inputsresultsr<   r*   r   r   evaluate  s   


z;LangChainStringEvaluator.as_run_evaluator.<locals>.evaluatec                   sJ   j d u r | |n | |}jjdi |I d H }djji|S r?   )r)   r   aevaluate_stringsrB   rC   rF   r   r   	aevaluate  s   


z<LangChainStringEvaluator.as_run_evaluator.<locals>.aevaluate)N)r.   r   r/   r0   r,   r   )r.   r   r/   r0   r,   r>   )r   r6   r5   r
   rB   r	   )r*   	input_strreference_strrG   rI   r   )r;   r<   r*   r   as_run_evaluator   s*   5	
	z)LangChainStringEvaluator.as_run_evaluator)r   r    r   r!   r   r"   )r,   r   )r   r   r   r   r+   rL   r   r   r   r   r      s     r   N)
__future__r   typingr   r   r   r   r   r   langsmith.evaluation.evaluatorr	   langsmith.run_helpersr
   langsmith.schemasr   r   r$   r   r   r   r   r   r   r   r   <module>   s     