Metadata-Version: 2.4
Name: unstructured_ingest
Version: 1.2.2
Summary: Local ETL data pipeline to get data RAG ready
Author-email: Unstructured Technologies <devops@unstructuredai.io>
License-Expression: Apache-2.0
License-File: LICENSE.md
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Education
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
Requires-Python: <3.13,>=3.10
Requires-Dist: certifi>=2025.7.14
Requires-Dist: click
Requires-Dist: opentelemetry-sdk
Requires-Dist: pydantic>=2.7
Requires-Dist: python-dateutil
Requires-Dist: tqdm
Provides-Extra: airtable
Requires-Dist: pandas; extra == 'airtable'
Requires-Dist: pyairtable; extra == 'airtable'
Provides-Extra: astradb
Requires-Dist: astrapy>2.0.0; extra == 'astradb'
Provides-Extra: azure
Requires-Dist: adlfs; extra == 'azure'
Requires-Dist: fsspec; extra == 'azure'
Provides-Extra: azure-ai-search
Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
Provides-Extra: bedrock
Requires-Dist: aioboto3; extra == 'bedrock'
Requires-Dist: boto3; extra == 'bedrock'
Provides-Extra: biomed
Requires-Dist: bs4; extra == 'biomed'
Requires-Dist: requests; extra == 'biomed'
Provides-Extra: box
Requires-Dist: boxfs; extra == 'box'
Requires-Dist: fsspec; extra == 'box'
Provides-Extra: chroma
Requires-Dist: chromadb; extra == 'chroma'
Provides-Extra: clarifai
Requires-Dist: clarifai; extra == 'clarifai'
Provides-Extra: confluence
Requires-Dist: atlassian-python-api; extra == 'confluence'
Requires-Dist: requests; extra == 'confluence'
Provides-Extra: couchbase
Requires-Dist: couchbase; extra == 'couchbase'
Provides-Extra: databricks-delta-tables
Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
Requires-Dist: pandas; extra == 'databricks-delta-tables'
Provides-Extra: databricks-volumes
Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
Provides-Extra: delta-table
Requires-Dist: boto3; extra == 'delta-table'
Requires-Dist: deltalake; extra == 'delta-table'
Requires-Dist: pandas; extra == 'delta-table'
Requires-Dist: pyarrow; extra == 'delta-table'
Requires-Dist: tenacity; extra == 'delta-table'
Provides-Extra: discord
Requires-Dist: discord-py; extra == 'discord'
Provides-Extra: doc
Requires-Dist: unstructured[doc]; extra == 'doc'
Provides-Extra: docx
Requires-Dist: unstructured[docx]; extra == 'docx'
Provides-Extra: dropbox
Requires-Dist: dropboxdrivefs; extra == 'dropbox'
Requires-Dist: fsspec; extra == 'dropbox'
Provides-Extra: duckdb
Requires-Dist: duckdb; extra == 'duckdb'
Requires-Dist: pandas; extra == 'duckdb'
Provides-Extra: elasticsearch
Requires-Dist: elasticsearch[async]<9.0.0; extra == 'elasticsearch'
Provides-Extra: epub
Requires-Dist: unstructured[epub]; extra == 'epub'
Provides-Extra: gcs
Requires-Dist: bs4; extra == 'gcs'
Requires-Dist: fsspec; extra == 'gcs'
Requires-Dist: gcsfs; extra == 'gcs'
Provides-Extra: github
Requires-Dist: pygithub>1.58.0; extra == 'github'
Requires-Dist: requests; extra == 'github'
Provides-Extra: gitlab
Requires-Dist: python-gitlab; extra == 'gitlab'
Provides-Extra: google-drive
Requires-Dist: google-api-python-client; extra == 'google-drive'
Requires-Dist: tenacity; extra == 'google-drive'
Provides-Extra: hubspot
Requires-Dist: hubspot-api-client; extra == 'hubspot'
Requires-Dist: urllib3; extra == 'hubspot'
Provides-Extra: huggingface
Requires-Dist: sentence-transformers; extra == 'huggingface'
Provides-Extra: ibm-watsonx-s3
Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
Provides-Extra: image
Requires-Dist: unstructured[image]; extra == 'image'
Provides-Extra: jira
Requires-Dist: atlassian-python-api; extra == 'jira'
Provides-Extra: kafka
Requires-Dist: confluent-kafka; extra == 'kafka'
Provides-Extra: kdbai
Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
Requires-Dist: pandas; extra == 'kdbai'
Provides-Extra: lancedb
Requires-Dist: lancedb; extra == 'lancedb'
Provides-Extra: md
Requires-Dist: unstructured[md]; extra == 'md'
Provides-Extra: milvus
Requires-Dist: pymilvus; extra == 'milvus'
Provides-Extra: mixedbreadai
Requires-Dist: mixedbread; extra == 'mixedbreadai'
Provides-Extra: mongodb
Requires-Dist: pymongo; extra == 'mongodb'
Provides-Extra: msg
Requires-Dist: unstructured[msg]; extra == 'msg'
Provides-Extra: neo4j
Requires-Dist: cymple; extra == 'neo4j'
Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
Requires-Dist: networkx; extra == 'neo4j'
Provides-Extra: notion
Requires-Dist: backoff; extra == 'notion'
Requires-Dist: htmlbuilder; extra == 'notion'
Requires-Dist: httpx; extra == 'notion'
Requires-Dist: notion-client; extra == 'notion'
Provides-Extra: octoai
Requires-Dist: openai; extra == 'octoai'
Requires-Dist: tiktoken; extra == 'octoai'
Provides-Extra: odt
Requires-Dist: unstructured[odt]; extra == 'odt'
Provides-Extra: onedrive
Requires-Dist: msal; extra == 'onedrive'
Requires-Dist: office365-rest-python-client; extra == 'onedrive'
Requires-Dist: requests; extra == 'onedrive'
Provides-Extra: openai
Requires-Dist: openai; extra == 'openai'
Requires-Dist: tiktoken; extra == 'openai'
Provides-Extra: opensearch
Requires-Dist: opensearch-py; extra == 'opensearch'
Provides-Extra: org
Requires-Dist: unstructured[org]; extra == 'org'
Provides-Extra: outlook
Requires-Dist: msal; extra == 'outlook'
Requires-Dist: office365-rest-python-client; extra == 'outlook'
Provides-Extra: pdf
Requires-Dist: unstructured[pdf]; extra == 'pdf'
Provides-Extra: pinecone
Requires-Dist: pinecone; extra == 'pinecone'
Provides-Extra: postgres
Requires-Dist: pandas; extra == 'postgres'
Requires-Dist: psycopg2-binary; extra == 'postgres'
Provides-Extra: ppt
Requires-Dist: unstructured[ppt]; extra == 'ppt'
Provides-Extra: pptx
Requires-Dist: unstructured[pptx]; extra == 'pptx'
Provides-Extra: qdrant
Requires-Dist: qdrant-client; extra == 'qdrant'
Provides-Extra: reddit
Requires-Dist: praw; extra == 'reddit'
Provides-Extra: redis
Requires-Dist: redis<=5.3.0; extra == 'redis'
Provides-Extra: remote
Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
Provides-Extra: rst
Requires-Dist: unstructured[rst]; extra == 'rst'
Provides-Extra: rtf
Requires-Dist: unstructured[rtf]; extra == 'rtf'
Provides-Extra: s3
Requires-Dist: fsspec; extra == 's3'
Requires-Dist: s3fs; extra == 's3'
Provides-Extra: salesforce
Requires-Dist: simple-salesforce; extra == 'salesforce'
Provides-Extra: sftp
Requires-Dist: fsspec; extra == 'sftp'
Requires-Dist: paramiko; extra == 'sftp'
Provides-Extra: sharepoint
Requires-Dist: msal; extra == 'sharepoint'
Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
Requires-Dist: requests; extra == 'sharepoint'
Provides-Extra: singlestore
Requires-Dist: pandas; extra == 'singlestore'
Requires-Dist: singlestoredb; extra == 'singlestore'
Provides-Extra: slack
Requires-Dist: slack-sdk[optional]; extra == 'slack'
Provides-Extra: snowflake
Requires-Dist: pandas; extra == 'snowflake'
Requires-Dist: psycopg2-binary; extra == 'snowflake'
Requires-Dist: snowflake-connector-python; extra == 'snowflake'
Provides-Extra: togetherai
Requires-Dist: together; extra == 'togetherai'
Provides-Extra: tsv
Requires-Dist: unstructured[tsv]; extra == 'tsv'
Provides-Extra: vastdb
Requires-Dist: ibis; extra == 'vastdb'
Requires-Dist: pandas; extra == 'vastdb'
Requires-Dist: pyarrow; extra == 'vastdb'
Requires-Dist: vastdb; extra == 'vastdb'
Provides-Extra: vectara
Requires-Dist: aiofiles; extra == 'vectara'
Requires-Dist: httpx; extra == 'vectara'
Requires-Dist: requests; extra == 'vectara'
Provides-Extra: vertexai
Requires-Dist: vertexai; extra == 'vertexai'
Provides-Extra: voyageai
Requires-Dist: voyageai; extra == 'voyageai'
Provides-Extra: weaviate
Requires-Dist: weaviate-client; extra == 'weaviate'
Provides-Extra: wikipedia
Requires-Dist: wikipedia; extra == 'wikipedia'
Provides-Extra: xlsx
Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
Provides-Extra: zendesk
Requires-Dist: aiofiles; extra == 'zendesk'
Requires-Dist: bs4; extra == 'zendesk'
Requires-Dist: httpx; extra == 'zendesk'
Description-Content-Type: text/markdown

# Unstructured Ingest  

For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
