LICENSE
NOTICE.txt
README.md
pyproject.toml
setup.cfg
astabench/__init__.py
astabench/cli.py
astabench/constants.py
astabench.egg-info/PKG-INFO
astabench.egg-info/SOURCES.txt
astabench.egg-info/dependency_links.txt
astabench.egg-info/entry_points.txt
astabench.egg-info/requires.txt
astabench.egg-info/top_level.txt
astabench/config/v1.0.0-dev1.yml
astabench/config/v1.0.0.yml
astabench/evals/__init__.py
astabench/evals/_registry.py
astabench/evals/utils.py
astabench/evals/arxivdigestables/__init__.py
astabench/evals/arxivdigestables/prompts.py
astabench/evals/arxivdigestables/task.py
astabench/evals/code_diagnostics/__init__.py
astabench/evals/code_diagnostics/_sandbox_eval_code.py
astabench/evals/code_diagnostics/task.py
astabench/evals/demo/__init__.py
astabench/evals/demo/arithmetic/__init__.py
astabench/evals/demo/arithmetic/task.py
astabench/evals/demo/arithmetic/task_with_rubric.py
astabench/evals/demo/arithmetic/task_with_tools.py
astabench/evals/demo/code_execution/compose.yaml
astabench/evals/demo/code_execution/task.py
astabench/evals/discoverybench/__init__.py
astabench/evals/discoverybench/eval_utils.py
astabench/evals/discoverybench/lm_utils.py
astabench/evals/discoverybench/task.py
astabench/evals/discoverybench/task_utils.py
astabench/evals/e2e_discovery/__init__.py
astabench/evals/e2e_discovery/task.py
astabench/evals/e2e_discovery/solvers/autoasta/autoasta_cached.py
astabench/evals/e2e_discovery/solvers/codescientist/codescientist_cached.py
astabench/evals/e2e_discovery/solvers/faker/faker.py
astabench/evals/e2e_discovery/solvers/faker/faker_cached.py
astabench/evals/inspect_eval_wrappers/__init__.py
astabench/evals/inspect_eval_wrappers/core_bench.py
astabench/evals/inspect_eval_wrappers/ds1000.py
astabench/evals/inspect_eval_wrappers/ds1000_splits.json
astabench/evals/labbench/__init__.py
astabench/evals/labbench/litqa2/__init__.py
astabench/evals/labbench/litqa2/task.py
astabench/evals/paper_finder/__init__.py
astabench/evals/paper_finder/datamodel.py
astabench/evals/paper_finder/eval.py
astabench/evals/paper_finder/paper_finder_utils.py
astabench/evals/paper_finder/relevance.py
astabench/evals/paper_finder/task.py
astabench/evals/sqa/__init__.py
astabench/evals/sqa/citation_eval.py
astabench/evals/sqa/precision_eval.py
astabench/evals/sqa/pydantic_models.py
astabench/evals/sqa/retry_utils.py
astabench/evals/sqa/rubric.py
astabench/evals/sqa/split_sentences.py
astabench/evals/sqa/task.py
astabench/evals/super/__init__.py
astabench/evals/super/task.py
astabench/helpers/prompt_logs.py
astabench/scripts/estimate_sqa_cit_eval_variance.py
astabench/scripts/validation_for_half_credit.py
astabench/scripts/validation_for_recall_no_background.py
astabench/solvers/__init__.py
astabench/solvers/lit_tables.py
astabench/solvers/llm.py
astabench/solvers/youcom.py
astabench/solvers/arxivdigestables/asta_table_agent.py
astabench/solvers/asta/v0/asta.py
astabench/solvers/code_agent/__init__.py
astabench/solvers/code_agent/agent.py
astabench/solvers/code_agent/code_agent.py
astabench/solvers/code_agent/environment.py
astabench/solvers/code_agent/llm_agent.py
astabench/solvers/code_agent/prompt.yaml
astabench/solvers/datavoyager/agent.py
astabench/solvers/datavoyager/dv_core/agent.py
astabench/solvers/datavoyager/dv_core/inspect_utils.py
astabench/solvers/datavoyager/dv_core/logger.py
astabench/solvers/datavoyager/dv_core/models.py
astabench/solvers/datavoyager/dv_core/patched.py
astabench/solvers/datavoyager/dv_core/utils.py
astabench/solvers/datavoyager/dv_core/config/datavoyager_modal_deployment_magentic_one_config_20250617.yaml
astabench/solvers/datavoyager/dv_core/config/datavoyager_modal_deployment_magentic_one_config_20250617_gpt4_1.yaml
astabench/solvers/datavoyager/dv_core/config/datavoyager_modal_deployment_magentic_one_config_20250617_o3.yaml
astabench/solvers/datavoyager/dv_core/config/datavoyager_modal_deployment_magentic_one_config_20250617_sonnet_4.yaml
astabench/solvers/datavoyager/dv_core/static/tools.py
astabench/solvers/futurehouse/__init__.py
astabench/solvers/futurehouse/futurehouse_solver.py
astabench/solvers/react/__init__.py
astabench/solvers/react/basic_agent.py
astabench/solvers/search/paper_finder.py
astabench/solvers/search/youcom_search.py
astabench/solvers/smolagents/agent.py
astabench/solvers/smolagents/llm_wrapper.py
astabench/solvers/smolagents/sandbox_wrapper.py
astabench/solvers/sqa/__init__.py
astabench/solvers/sqa/format_solver.py
astabench/solvers/sqa/formatted_fhouse.py
astabench/solvers/sqa/formatted_llm.py
astabench/solvers/sqa/formatted_perplexity.py
astabench/solvers/sqa/formatted_youcom.py
astabench/solvers/sqa/openai_json_output.py
astabench/solvers/sqa/perplexity_base.py
astabench/solvers/sqa/perplexity_json_output.py
astabench/solvers/sqa/run_storm.py
astabench/solvers/sqa/sqa.py
astabench/solvers/sqa/sqa_subprocess.py
astabench/solvers/sqa/storm_solver.py
astabench/solvers/sqa/debug/cached_solver.py
astabench/solvers/sqa/elicit/elicit_to_hf_dataset.py
astabench/solvers/sqa/elicit/memorized_solver.py
astabench/solvers/sqa/general_memorized/memorized_solver.py
astabench/solvers/sqa/general_memorized/push_ds_to_hf.py
astabench/solvers/sqa/openscholar/convert_cache_to_sqa.py
astabench/solvers/sqa/openscholar/memorized_solver.py
astabench/solvers/sqa/openscholar/query_openscholar.py
astabench/solvers/sqa/scispace/scispace.py
astabench/solvers/sqa/scispace/scispace_to_hf_dataset.py
astabench/tools/__init__.py
astabench/tools/calculator.py
astabench/tools/native_provider_tools.py
astabench/tools/paper_finder_ai2i.py
astabench/tools/report.py
astabench/tools/s2_api_tools.py
astabench/tools/search.py
astabench/tools/stateful_python.py
astabench/tools/submission.py
astabench/tools/table.py
astabench/types/code_execution.py
astabench/types/sqa.py
astabench/util/model.py
astabench/util/state.py
astabench/util/sandbox/Dockerfile
astabench/util/sandbox/__init__.py
astabench/util/sandbox/diagnostics_requirements.txt
astabench/util/sandbox/ds1000_requirements.txt
astabench/util/sandbox/sandbox_compose.yaml
astabench/util/sandbox/sandbox_jupyter.py
astabench/util/sandbox/sandbox_tool_manager.py
astabench/util/sandbox/super_requirements.txt
astabench/util/sandbox/static/jupyter_interface.py
astabench/util/sandbox/static/sandbox_client.py
astabench/util/sandbox/static/sandbox_types.py
inspect_evals/src/inspect_evals/__init__.py
inspect_evals/src/inspect_evals/_registry.py
inspect_evals/src/inspect_evals/metadata.py
inspect_evals/src/inspect_evals/py.typed
inspect_evals/src/inspect_evals/core_bench/Dockerfile
inspect_evals/src/inspect_evals/core_bench/README.md
inspect_evals/src/inspect_evals/core_bench/__init__.py
inspect_evals/src/inspect_evals/core_bench/agent_prompts.json
inspect_evals/src/inspect_evals/core_bench/compose.yaml
inspect_evals/src/inspect_evals/core_bench/core_bench.py
inspect_evals/src/inspect_evals/core_bench/dataset.py
inspect_evals/src/inspect_evals/core_bench/scorer.py
inspect_evals/src/inspect_evals/core_bench/tools.py
inspect_evals/src/inspect_evals/core_bench/utils.py
tests/test_basic_mockllm.py
tests/test_cli_logging.py
tests/test_eval_utils.py
tests/test_registry_evals.py
tests/test_suite_init.py