LICENSE.md
MANIFEST.in
README.md
pyproject.toml
setup.py
deepeval/__init__.py
deepeval/_version.py
deepeval/constants.py
deepeval/evaluate.py
deepeval/key_handler.py
deepeval/progress_context.py
deepeval/singleton.py
deepeval/telemetry.py
deepeval/types.py
deepeval/utils.py
deepeval.egg-info/PKG-INFO
deepeval.egg-info/SOURCES.txt
deepeval.egg-info/dependency_links.txt
deepeval.egg-info/entry_points.txt
deepeval.egg-info/requires.txt
deepeval.egg-info/top_level.txt
deepeval/benchmarks/__init__.py
deepeval/benchmarks/base_benchmark.py
deepeval/benchmarks/schema.py
deepeval/benchmarks/utils.py
deepeval/benchmarks/big_bench_hard/__init__.py
deepeval/benchmarks/big_bench_hard/big_bench_hard.py
deepeval/benchmarks/big_bench_hard/task.py
deepeval/benchmarks/big_bench_hard/template.py
deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py
deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt
deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py
deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt
deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt
deepeval/benchmarks/drop/__init__.py
deepeval/benchmarks/drop/drop.py
deepeval/benchmarks/drop/task.py
deepeval/benchmarks/drop/template.py
deepeval/benchmarks/gsm8k/__init__.py
deepeval/benchmarks/gsm8k/gsm8k.py
deepeval/benchmarks/gsm8k/template.py
deepeval/benchmarks/hellaswag/__init__.py
deepeval/benchmarks/hellaswag/hellaswag.py
deepeval/benchmarks/hellaswag/task.py
deepeval/benchmarks/hellaswag/template.py
deepeval/benchmarks/human_eval/__init__.py
deepeval/benchmarks/human_eval/human_eval.py
deepeval/benchmarks/human_eval/task.py
deepeval/benchmarks/human_eval/template.py
deepeval/benchmarks/mmlu/__init__.py
deepeval/benchmarks/mmlu/mmlu.py
deepeval/benchmarks/mmlu/task.py
deepeval/benchmarks/mmlu/template.py
deepeval/benchmarks/modes/__init__.py
deepeval/benchmarks/tasks/__init__.py
deepeval/benchmarks/truthful_qa/__init__.py
deepeval/benchmarks/truthful_qa/mode.py
deepeval/benchmarks/truthful_qa/task.py
deepeval/benchmarks/truthful_qa/template.py
deepeval/benchmarks/truthful_qa/truthful_qa.py
deepeval/cli/__init__.py
deepeval/cli/main.py
deepeval/cli/test.py
deepeval/confident/__init__.py
deepeval/confident/api.py
deepeval/confident/evaluate.py
deepeval/confident/types.py
deepeval/dataset/__init__.py
deepeval/dataset/api.py
deepeval/dataset/dataset.py
deepeval/dataset/golden.py
deepeval/dataset/utils.py
deepeval/event/__init__.py
deepeval/event/api.py
deepeval/event/event.py
deepeval/integrations/__init__.py
deepeval/integrations/integrations.py
deepeval/integrations/harness/__init__.py
deepeval/integrations/harness/callback.py
deepeval/integrations/hugging_face/__init__.py
deepeval/integrations/hugging_face/callback.py
deepeval/integrations/hugging_face/rich_manager.py
deepeval/integrations/hugging_face/utils.py
deepeval/integrations/langchain/__init__.py
deepeval/integrations/langchain/callback.py
deepeval/integrations/llama_index/__init__.py
deepeval/integrations/llama_index/callback.py
deepeval/integrations/llama_index/evaluators.py
deepeval/integrations/llama_index/utils.py
deepeval/metrics/__init__.py
deepeval/metrics/base_metric.py
deepeval/metrics/indicator.py
deepeval/metrics/ragas.py
deepeval/metrics/utils.py
deepeval/metrics/answer_relevancy/__init__.py
deepeval/metrics/answer_relevancy/answer_relevancy.py
deepeval/metrics/answer_relevancy/schema.py
deepeval/metrics/answer_relevancy/template.py
deepeval/metrics/bias/__init__.py
deepeval/metrics/bias/bias.py
deepeval/metrics/bias/schema.py
deepeval/metrics/bias/template.py
deepeval/metrics/contextual_precision/__init__.py
deepeval/metrics/contextual_precision/contextual_precision.py
deepeval/metrics/contextual_precision/schema.py
deepeval/metrics/contextual_precision/template.py
deepeval/metrics/contextual_recall/__init__.py
deepeval/metrics/contextual_recall/contextual_recall.py
deepeval/metrics/contextual_recall/schema.py
deepeval/metrics/contextual_recall/template.py
deepeval/metrics/contextual_relevancy/__init__.py
deepeval/metrics/contextual_relevancy/contextual_relevancy.py
deepeval/metrics/contextual_relevancy/schema.py
deepeval/metrics/contextual_relevancy/template.py
deepeval/metrics/conversation_completeness/__init__.py
deepeval/metrics/conversation_completeness/conversation_completeness.py
deepeval/metrics/conversation_completeness/schema.py
deepeval/metrics/conversation_completeness/template.py
deepeval/metrics/conversation_relevancy/__init__.py
deepeval/metrics/conversation_relevancy/conversation_relevancy.py
deepeval/metrics/conversation_relevancy/schema.py
deepeval/metrics/conversation_relevancy/template.py
deepeval/metrics/faithfulness/__init__.py
deepeval/metrics/faithfulness/faithfulness.py
deepeval/metrics/faithfulness/schema.py
deepeval/metrics/faithfulness/template.py
deepeval/metrics/g_eval/__init__.py
deepeval/metrics/g_eval/g_eval.py
deepeval/metrics/g_eval/schema.py
deepeval/metrics/g_eval/template.py
deepeval/metrics/hallucination/__init__.py
deepeval/metrics/hallucination/hallucination.py
deepeval/metrics/hallucination/schema.py
deepeval/metrics/hallucination/template.py
deepeval/metrics/knowledge_retention/__init__.py
deepeval/metrics/knowledge_retention/knowledge_retention.py
deepeval/metrics/knowledge_retention/schema.py
deepeval/metrics/knowledge_retention/template.py
deepeval/metrics/red_teaming_metrics/__init__.py
deepeval/metrics/red_teaming_metrics/bias/__init__.py
deepeval/metrics/red_teaming_metrics/bias/bias.py
deepeval/metrics/red_teaming_metrics/bias/schema.py
deepeval/metrics/red_teaming_metrics/bias/template.py
deepeval/metrics/red_teaming_metrics/contracts/__init__.py
deepeval/metrics/red_teaming_metrics/contracts/contracts.py
deepeval/metrics/red_teaming_metrics/contracts/schema.py
deepeval/metrics/red_teaming_metrics/contracts/template.py
deepeval/metrics/red_teaming_metrics/debug_access/__init__.py
deepeval/metrics/red_teaming_metrics/debug_access/debug_access.py
deepeval/metrics/red_teaming_metrics/debug_access/schema.py
deepeval/metrics/red_teaming_metrics/debug_access/template.py
deepeval/metrics/red_teaming_metrics/excessive_agency/__init__.py
deepeval/metrics/red_teaming_metrics/excessive_agency/excessive_agency.py
deepeval/metrics/red_teaming_metrics/excessive_agency/schema.py
deepeval/metrics/red_teaming_metrics/excessive_agency/template.py
deepeval/metrics/red_teaming_metrics/hallucination/__init__.py
deepeval/metrics/red_teaming_metrics/hallucination/hallucination.py
deepeval/metrics/red_teaming_metrics/hallucination/schema.py
deepeval/metrics/red_teaming_metrics/hallucination/template.py
deepeval/metrics/red_teaming_metrics/harm/__init__.py
deepeval/metrics/red_teaming_metrics/harm/harm.py
deepeval/metrics/red_teaming_metrics/harm/schema.py
deepeval/metrics/red_teaming_metrics/harm/template.py
deepeval/metrics/red_teaming_metrics/imitation/__init__.py
deepeval/metrics/red_teaming_metrics/imitation/imitation.py
deepeval/metrics/red_teaming_metrics/imitation/schema.py
deepeval/metrics/red_teaming_metrics/imitation/template.py
deepeval/metrics/red_teaming_metrics/pii/__init__.py
deepeval/metrics/red_teaming_metrics/pii/pii.py
deepeval/metrics/red_teaming_metrics/pii/schema.py
deepeval/metrics/red_teaming_metrics/pii/template.py
deepeval/metrics/red_teaming_metrics/politics/__init__.py
deepeval/metrics/red_teaming_metrics/politics/politics.py
deepeval/metrics/red_teaming_metrics/politics/schema.py
deepeval/metrics/red_teaming_metrics/politics/template.py
deepeval/metrics/red_teaming_metrics/rbac/__init__.py
deepeval/metrics/red_teaming_metrics/rbac/rbac.py
deepeval/metrics/red_teaming_metrics/rbac/schema.py
deepeval/metrics/red_teaming_metrics/rbac/template.py
deepeval/metrics/red_teaming_metrics/shell_injection/__init__.py
deepeval/metrics/red_teaming_metrics/shell_injection/schema.py
deepeval/metrics/red_teaming_metrics/shell_injection/shell_injection.py
deepeval/metrics/red_teaming_metrics/shell_injection/template.py
deepeval/metrics/red_teaming_metrics/sql_injection/__init__.py
deepeval/metrics/red_teaming_metrics/sql_injection/schema.py
deepeval/metrics/red_teaming_metrics/sql_injection/sql_injection.py
deepeval/metrics/red_teaming_metrics/sql_injection/template.py
deepeval/metrics/summarization/__init__.py
deepeval/metrics/summarization/schema.py
deepeval/metrics/summarization/summarization.py
deepeval/metrics/summarization/template.py
deepeval/metrics/tool_correctness/__init__.py
deepeval/metrics/tool_correctness/tool_correctness.py
deepeval/metrics/toxicity/__init__.py
deepeval/metrics/toxicity/schema.py
deepeval/metrics/toxicity/template.py
deepeval/metrics/toxicity/toxicity.py
deepeval/models/__init__.py
deepeval/models/_summac_model.py
deepeval/models/answer_relevancy_model.py
deepeval/models/base_model.py
deepeval/models/detoxify_model.py
deepeval/models/gpt_model.py
deepeval/models/gpt_model_schematic.py
deepeval/models/hallucination_model.py
deepeval/models/openai_embedding_model.py
deepeval/models/summac_model.py
deepeval/models/unbias_model.py
deepeval/monitor/__init__.py
deepeval/monitor/api.py
deepeval/monitor/feedback.py
deepeval/monitor/monitor.py
deepeval/plugins/__init__.py
deepeval/plugins/plugin.py
deepeval/red_team/__init__.py
deepeval/red_team/red_team.py
deepeval/scorer/__init__.py
deepeval/scorer/scorer.py
deepeval/synthesizer/__init__.py
deepeval/synthesizer/base_synthesizer.py
deepeval/synthesizer/schema.py
deepeval/synthesizer/synthesizer.py
deepeval/synthesizer/synthesizer_red_team.py
deepeval/synthesizer/types.py
deepeval/synthesizer/utils.py
deepeval/synthesizer/chunking/__init__.py
deepeval/synthesizer/chunking/context_generator.py
deepeval/synthesizer/chunking/doc_chunker.py
deepeval/synthesizer/templates/__init__.py
deepeval/synthesizer/templates/template.py
deepeval/synthesizer/templates/template_prompt.py
deepeval/synthesizer/templates/template_red_team.py
deepeval/test_case/__init__.py
deepeval/test_case/conversational_test_case.py
deepeval/test_case/llm_test_case.py
deepeval/test_run/__init__.py
deepeval/test_run/api.py
deepeval/test_run/cache.py
deepeval/test_run/hooks.py
deepeval/test_run/hyperparameters.py
deepeval/test_run/test_run.py
deepeval/tracing/__init__.py
deepeval/tracing/tracer.py
deepeval/tracing/tracing.py
tests/__init__.py
tests/custom_judge.py
tests/test_answer_relevancy.py
tests/test_benchmarks.py
tests/test_bias.py
tests/test_cache.py
tests/test_cli.py
tests/test_contextual_precision.py
tests/test_contextual_recall.py
tests/test_contextual_relevancy.py
tests/test_custom_metric.py
tests/test_dataset.py
tests/test_deployment.py
tests/test_everything.py
tests/test_faithfulness.py
tests/test_g_eval.py
tests/test_hallucination.py
tests/test_hybrid_tracing.py
tests/test_json_metrics.py
tests/test_rag_metrics.py
tests/test_ragas.py
tests/test_red_team_synthesizer.py
tests/test_scoring.py
tests/test_stateless.py
tests/test_summarization.py
tests/test_synthesizer.py
tests/test_toxic.py
tests/test_utils.py