LICENSE
MANIFEST.in
README.md
pyproject.toml
requirements.txt
dataflow/__init__.py
dataflow/cli.py
dataflow/logger.py
dataflow/version.py
dataflow/agent/__init__.py
dataflow/agent/agentrole/__init__.py
dataflow/agent/agentrole/analyst.py
dataflow/agent/agentrole/debugger.py
dataflow/agent/agentrole/executioner.py
dataflow/agent/agentrole/planner.py
dataflow/agent/promptstemplates/__init__.py
dataflow/agent/promptstemplates/prompt_template.py
dataflow/agent/promptstemplates/resources/__init__.py
dataflow/agent/promptstemplates/resources/code_debug_template.json
dataflow/agent/promptstemplates/resources/json_form_template.json
dataflow/agent/promptstemplates/resources/operator_template.json
dataflow/agent/promptstemplates/resources/template.json
dataflow/agent/servicemanager/__init__.py
dataflow/agent/servicemanager/analysis_service.py
dataflow/agent/servicemanager/memory_service.py
dataflow/agent/servicemanager/storage_service.py
dataflow/agent/taskcenter/__init__.py
dataflow/agent/taskcenter/task_definitions.py
dataflow/agent/taskcenter/task_dispatcher.py
dataflow/agent/taskcenter/task_reg.py
dataflow/agent/taskcenter/resources/TaskInfo.yaml
dataflow/agent/toolkits/__init__.py
dataflow/agent/toolkits/minio_tookits.py
dataflow/agent/toolkits/pipeline_processor.py
dataflow/agent/toolkits/post_processor.py
dataflow/agent/toolkits/tool_factory.py
dataflow/agent/toolkits/tools.py
dataflow/agent/toolkits/resources/Operator.json
dataflow/agent/toolkits/resources/Operator_patched.json
dataflow/cli_funcs/__init__.py
dataflow/cli_funcs/cli_env.py
dataflow/cli_funcs/cli_init.py
dataflow/cli_funcs/copy_funcs.py
dataflow/cli_funcs/paths.py
dataflow/core/LLMServing.py
dataflow/core/Operator.py
dataflow/core/__init__.py
dataflow/example/AgenticRAGPipeline/pipeline_small_chunk.json
dataflow/example/GeneralTextPipeline/pt_input.jsonl
dataflow/example/GeneralTextPipeline/sft_input.jsonl
dataflow/example/KBCleaningPipeline/kbc_placeholder.json
dataflow/example/KBCleaningPipeline/test.doc
dataflow/example/KBCleaningPipeline/test.pdf
dataflow/example/ReasoningPipeline/pipeline_math.json
dataflow/example/ReasoningPipeline/pipeline_math_short.json
dataflow/example/Text2SQLPipeline/dev_tables.jsonl
dataflow/example/Text2SQLPipeline/pipeline.json
dataflow/llmserving/APILLMServing_aisuite.py
dataflow/llmserving/APILLMServing_request.py
dataflow/llmserving/LocalModelLLMServing.py
dataflow/llmserving/__init__.py
dataflow/operators/__init__.py
dataflow/operators/db/db_operator.py
dataflow/operators/eval/__init__.py
dataflow/operators/eval/GeneralText/__init__.py
dataflow/operators/eval/GeneralText/APIcaller/alpagasus_scorer.py
dataflow/operators/eval/GeneralText/models/debertav3_scorer.py
dataflow/operators/eval/GeneralText/models/deita_complexity_scorer.py
dataflow/operators/eval/GeneralText/models/deita_quality_scorer.py
dataflow/operators/eval/GeneralText/models/fineweb_edu_scorer.py
dataflow/operators/eval/GeneralText/models/instag_scorer.py
dataflow/operators/eval/GeneralText/models/pair_qual_scorer.py
dataflow/operators/eval/GeneralText/models/qurating_scorer.py
dataflow/operators/eval/GeneralText/models/superfiltering_scorer.py
dataflow/operators/eval/GeneralText/models/textbook_scorer.py
dataflow/operators/eval/GeneralText/models/Kenlm/model.py
dataflow/operators/eval/GeneralText/models/Qurating/qurater_annotate.py
dataflow/operators/eval/GeneralText/models/Qurating/modeling/modeling_flash_llama.py
dataflow/operators/eval/GeneralText/models/Superfiltering/data_analysis.py
dataflow/operators/eval/GeneralText/statistics/__init__.py
dataflow/operators/eval/GeneralText/statistics/langkit_scorer.py
dataflow/operators/eval/GeneralText/statistics/lexical_diversity_scorer.py
dataflow/operators/eval/GeneralText/statistics/ngram_scorer.py
dataflow/operators/generate/__init__.py
dataflow/operators/generate/AgenticRAG/AutoPromptGenerator.py
dataflow/operators/generate/AgenticRAG/QAGenerator.py
dataflow/operators/generate/AgenticRAG/QAScorer.py
dataflow/operators/generate/AgenticRAG/__init__.py
dataflow/operators/generate/GeneralText/PretrainGenerator.py
dataflow/operators/generate/GeneralText/SupervisedFinetuneGenerator.py
dataflow/operators/generate/GeneralText/__init__.py
dataflow/operators/generate/KnowledgeCleaning/CorpusTextSplitter.py
dataflow/operators/generate/KnowledgeCleaning/KnowledgeCleaner.py
dataflow/operators/generate/KnowledgeCleaning/KnowledgeExtractor.py
dataflow/operators/generate/KnowledgeCleaning/MultiHopQAGenerator.py
dataflow/operators/generate/KnowledgeCleaning/__init__.py
dataflow/operators/generate/RARE/BM25HardNeg.py
dataflow/operators/generate/RARE/Doc2Query.py
dataflow/operators/generate/RARE/ReasonDistill.py
dataflow/operators/generate/RARE/__init__.py
dataflow/operators/generate/Reasoning/AnswerExtraction_QwenMathEval.py
dataflow/operators/generate/Reasoning/AnswerGenerator.py
dataflow/operators/generate/Reasoning/PretrainFormatConverter.py
dataflow/operators/generate/Reasoning/PseudoAnswerGenerator.py
dataflow/operators/generate/Reasoning/QuestionCategoryClassifier.py
dataflow/operators/generate/Reasoning/QuestionDifficultyClassifier.py
dataflow/operators/generate/Reasoning/QuestionGenerator.py
dataflow/operators/generate/Reasoning/__init__.py
dataflow/operators/generate/Text2SQL/DatabaseSchemaExtractor.py
dataflow/operators/generate/Text2SQL/ExtraKnowledgeGenerator.py
dataflow/operators/generate/Text2SQL/PromptGenerator.py
dataflow/operators/generate/Text2SQL/QuestionRefiner.py
dataflow/operators/generate/Text2SQL/SQLDifficultyClassifier.py
dataflow/operators/generate/Text2SQL/SQLFilter.py
dataflow/operators/generate/Text2SQL/SchemaLinking.py
dataflow/operators/generate/Text2SQL/Text2SQLDifficultyClassifier.py
dataflow/operators/generate/Text2SQL/__init__.py
dataflow/operators/process/__init__.py
dataflow/operators/process/AgenticRAG/ContentChooser.py
dataflow/operators/process/AgenticRAG/__init__.py
dataflow/operators/process/GeneralText/__init__.py
dataflow/operators/process/GeneralText/deduplicators/minhash_deduplicator.py
dataflow/operators/process/GeneralText/filters/__init__.py
dataflow/operators/process/GeneralText/filters/alpagasus_filter.py
dataflow/operators/process/GeneralText/filters/deita_complexity_filter.py
dataflow/operators/process/GeneralText/filters/deita_quality_filter.py
dataflow/operators/process/GeneralText/filters/fineweb_edu_filter.py
dataflow/operators/process/GeneralText/filters/heuristics.py
dataflow/operators/process/GeneralText/filters/instag_filter.py
dataflow/operators/process/GeneralText/filters/language_filter.py
dataflow/operators/process/GeneralText/filters/ngram_filter.py
dataflow/operators/process/GeneralText/filters/pair_qual_filter.py
dataflow/operators/process/GeneralText/filters/qurating_filter.py
dataflow/operators/process/GeneralText/filters/superfiltering_filter.py
dataflow/operators/process/GeneralText/filters/text_book_filter.py
dataflow/operators/process/GeneralText/filters/blocklist/en.txt
dataflow/operators/process/GeneralText/filters/blocklist/zh.txt
dataflow/operators/process/Reasoning/AnswerFormatterFilter.py
dataflow/operators/process/Reasoning/AnswerGroundTruthFilter.py
dataflow/operators/process/Reasoning/AnswerJudger_MathVerify.py
dataflow/operators/process/Reasoning/AnswerNgramFilter.py
dataflow/operators/process/Reasoning/AnswerPipelineRoot.py
dataflow/operators/process/Reasoning/AnswerTokenLengthFilter.py
dataflow/operators/process/Reasoning/QuestionFilter.py
dataflow/operators/process/Reasoning/__init__.py
dataflow/operators/refine/__init__.py
dataflow/operators/refine/GeneralText/__init__.py
dataflow/operators/refine/GeneralText/html_remove_refiner.py
dataflow/operators/refine/GeneralText/remove_emoji_refiner.py
dataflow/operators/refine/GeneralText/remove_extra_spaces_refiner.py
dataflow/prompts/__init__.py
dataflow/prompts/agenticrag.py
dataflow/prompts/general_text.py
dataflow/prompts/kbcleaning.py
dataflow/prompts/multihopqa.py
dataflow/prompts/reasoning.py
dataflow/prompts/text2sql.py
dataflow/scripts/pipelines/AgenticRAGPipeline/bash/pipeline_full.sh
dataflow/scripts/pipelines/AgenticRAGPipeline/yaml/generate/AutoPromptGenerator.yaml
dataflow/scripts/pipelines/AgenticRAGPipeline/yaml/generate/QAGenerator.yaml
dataflow/scripts/pipelines/AgenticRAGPipeline/yaml/generate/QAScorer.yaml
dataflow/scripts/pipelines/AgenticRAGPipeline/yaml/process/ContentChooser.yaml
dataflow/scripts/pipelines/ReasoningPipeline/bash/pipeline_full.sh
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/eval/ReasoningQualityJudger.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/generate/Pretrain_AnswerGenerator.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/generate/Pretrain_QuestionGenerator.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/process/Pretrain_AnswerPipelineRoot.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/process/Pretrain_FormatConvert_sft2pt.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/process/Pretrain_MathProblemFilter.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/PT/process/Pretrain_ReasonerNgramFilter.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/generate/AnswerExtraction_qwenmatheval.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/generate/AnswerGenerator.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/generate/PseudoAnswerGenerator.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/generate/QuestionCategoryClassifier.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/generate/QuestionDifficultyClassifier.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/generate/QuestionGenerator.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/AnswerPipelineRoot.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/MathProblemFilter.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/MathProblemFilter_step2.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerAnsSelection.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerFormatFilter.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerFormatFilter_withoutGT.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerLengthFilter.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerLengthFilter_withoutGT.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerNgramFilter.yaml
dataflow/scripts/pipelines/ReasoningPipeline/yaml/SFT/process/ReasonerNgramFilter_withoutGT.yaml
dataflow/scripts/pipelines/TextPipeline/yaml/eval/ngram_filter.yaml
dataflow/scripts/pipelines/TextPipeline/yaml/eval/ngram_scorer.yaml
dataflow/statics/pipelines/api_pipelines/agenticrag_pipeline.py
dataflow/statics/pipelines/api_pipelines/rare_pipeline.py
dataflow/statics/pipelines/api_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/api_pipelines/test_dockbcleaning.py
dataflow/statics/pipelines/api_pipelines/test_pdfkbcleaning.py
dataflow/statics/pipelines/api_pipelines/test_urlkbcleaning.py
dataflow/statics/pipelines/api_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/api_pipelines/text_sft_filter.py
dataflow/statics/pipelines/cpu_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/test_urlkbcleaning.py
dataflow/statics/pipelines/cpu_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/cpu_pipelines/text_pt_filter.py
dataflow/statics/pipelines/cpu_pipelines/text_sft_filter.py
dataflow/statics/pipelines/gpu_pipelines/agenticrag_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/rare_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/reasoning_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/test_dockbcleaning.py
dataflow/statics/pipelines/gpu_pipelines/test_pdfkbcleaning.py
dataflow/statics/pipelines/gpu_pipelines/test_urlkbcleaning.py
dataflow/statics/pipelines/gpu_pipelines/text2sql_pipeline.py
dataflow/statics/pipelines/gpu_pipelines/text_pt_filter.py
dataflow/statics/pipelines/gpu_pipelines/text_pt_synthetic.py
dataflow/statics/pipelines/gpu_pipelines/text_sft_filter.py
dataflow/statics/pipelines/gpu_pipelines/text_sft_synthetic.py
dataflow/utils/__init__.py
dataflow/utils/kbcleaning.py
dataflow/utils/registry.py
dataflow/utils/storage.py
dataflow/utils/utils.py
dataflow/utils/reasoning/AnswerExtraction.py
dataflow/utils/reasoning/CategoryFuzz.py
open_dataflow.egg-info/PKG-INFO
open_dataflow.egg-info/SOURCES.txt
open_dataflow.egg-info/dependency_links.txt
open_dataflow.egg-info/entry_points.txt
open_dataflow.egg-info/requires.txt
open_dataflow.egg-info/top_level.txt
test/test_agentic_rag.py
test/test_dataflow_agent.py
test/test_dockbcleaning.py
test/test_general_text.py
test/test_pdfkbcleaning.py
test/test_pipelines.py
test/test_pt_filter.py
test/test_pt_synthetic.py
test/test_reasoning.py
test/test_reasoning_pretrain.py
test/test_sft_filter.py
test/test_sft_synthetic.py
test/test_text2sql.py
test/test_urlkbcleaning.py