chardet
filetype
python-magic
lxml
nltk
tabulate
requests
beautifulsoup4
emoji
dataclasses-json
python-iso639
langdetect
numpy<2
rapidfuzz
backoff
typing-extensions
unstructured-client
wrapt
tqdm
psutil

[airtable]
pyairtable

[all-docs]
effdet
pillow_heif
pandas
unstructured-inference==0.7.36
python-oxmsg
openpyxl
google-cloud-vision
markdown
pdfminer.six
pytesseract
python-pptx<=0.6.23
unstructured.pytesseract>=0.3.12
pikepdf
xlrd
pypdf
pdf2image
python-docx>=1.1.2
networkx
onnx
pypandoc

[astra]
astrapy

[azure]
adlfs
fsspec

[azure-cognitive-search]
azure-search-documents

[bedrock]
boto3
langchain-community

[biomed]
bs4

[box]
boxfs
fsspec

[chroma]
chromadb
importlib-metadata>=7.1.0
typer<=0.9.0

[clarifai]
clarifai

[confluence]
atlassian-python-api

[csv]
pandas

[databricks-volumes]
databricks-sdk

[delta-table]
deltalake
fsspec

[discord]
discord-py

[doc]
python-docx>=1.1.2

[docx]
python-docx>=1.1.2

[dropbox]
dropboxdrivefs
fsspec

[elasticsearch]
elasticsearch[async]

[embed-huggingface]
huggingface
langchain-community
sentence_transformers

[embed-octoai]
openai
tiktoken

[embed-vertexai]
langchain
langchain-community
langchain-google-vertexai

[embed-voyageai]
langchain
langchain-voyageai

[epub]
pypandoc

[gcs]
gcsfs
fsspec
bs4

[github]
pygithub>1.58.0

[gitlab]
python-gitlab

[google-drive]
google-api-python-client

[hubspot]
hubspot-api-client
urllib3

[huggingface]
langdetect
sacremoses
sentencepiece
torch
transformers

[image]
onnx
pdf2image
pdfminer.six
pikepdf
pillow_heif
pypdf
pytesseract
google-cloud-vision
effdet
unstructured-inference==0.7.36
unstructured.pytesseract>=0.3.12

[jira]
atlassian-python-api

[kafka]
confluent-kafka

[local-inference]
effdet
pillow_heif
pandas
unstructured-inference==0.7.36
python-oxmsg
openpyxl
google-cloud-vision
markdown
pdfminer.six
pytesseract
python-pptx<=0.6.23
unstructured.pytesseract>=0.3.12
pikepdf
xlrd
pypdf
pdf2image
python-docx>=1.1.2
networkx
onnx
pypandoc

[md]
markdown

[mongodb]
pymongo

[msg]
python-oxmsg

[notion]
notion-client
htmlBuilder

[odt]
python-docx>=1.1.2
pypandoc

[onedrive]
msal
Office365-REST-Python-Client
bs4

[openai]
langchain-community
tiktoken
openai

[opensearch]
opensearch-py

[org]
pypandoc

[outlook]
msal
Office365-REST-Python-Client

[paddleocr]
unstructured.paddleocr==2.6.1.3

[pdf]
onnx
pdf2image
pdfminer.six
pikepdf
pillow_heif
pypdf
pytesseract
google-cloud-vision
effdet
unstructured-inference==0.7.36
unstructured.pytesseract>=0.3.12

[pinecone]
pinecone-client>=3.7.1

[postgres]
psycopg2-binary

[ppt]
python-pptx<=0.6.23

[pptx]
python-pptx<=0.6.23

[qdrant]
qdrant-client

[reddit]
praw

[rst]
pypandoc

[rtf]
pypandoc

[s3]
s3fs
fsspec

[salesforce]
simple-salesforce

[sftp]
fsspec
paramiko

[sharepoint]
msal
Office365-REST-Python-Client

[singlestore]
singlestoredb

[slack]
slack_sdk

[tsv]
pandas

[weaviate]
weaviate-client

[wikipedia]
wikipedia

[xlsx]
openpyxl
pandas
xlrd
networkx
