.gitignore
CONTRIBUTING.md
LICENSE
Makefile
README.md
pyproject.toml
setup.sh
.github/CODEOWNERS
.github/PULL_REQUEST_TEMPLATE.md
.github/workflows/check-commits.yml
.github/workflows/linux-cpu-tests.yml
.github/workflows/linux-cuda-tests.yml
.github/workflows/linux-examples.yml
.github/workflows/python-quality.yml
.github/workflows/security.yml
.github/workflows/stale.yml
bench/generation/README.md
bench/generation/evaluate_configurations.py
bench/generation/evaluate_many_models.sh
bench/generation/evaluate_model.py
bench/generation/gen_barchart.py
bench/generation/charts/google-gemma-2b_bf16_Accuracy.png
bench/generation/charts/google-gemma-2b_bf16_Latency__ms_.png
bench/generation/charts/google-gemma-2b_bf16_Perplexity.png
bench/generation/charts/meta-llama-Meta-Llama-3.1-8B_bf16_Accuracy.png
bench/generation/charts/meta-llama-Meta-Llama-3.1-8B_bf16_Latency__ms_.png
bench/generation/charts/meta-llama-Meta-Llama-3.1-8B_bf16_Perplexity.png
bench/generation/charts/mistralai-Mistral-7B-Instruct-v0.3_bf16_Accuracy.png
bench/generation/charts/mistralai-Mistral-7B-Instruct-v0.3_bf16_Latency__ms_.png
bench/generation/charts/mistralai-Mistral-7B-Instruct-v0.3_bf16_Perplexity.png
bench/generation/metrics/__init__.py
bench/generation/metrics/latency.py
bench/generation/metrics/perplexity.py
bench/generation/metrics/prediction.py
bench/generation/setup/__init__.py
bench/generation/setup/awq.py
bench/generation/setup/bnb.py
bench/generation/setup/hqq.py
bench/generation/setup/quanto.py
bench/kernels/benchmark.py
bench/kernels/benchmark_marlin_fp8.py
bench/kernels/benchmark_w4a16.py
bench/torch_kernels/README.md
bench/torch_kernels/test_int_mm.py
bench/torch_kernels/test_int_mm_inductor.py
bench/torch_kernels/test_weight_int4pack_mm.py
bench/torch_kernels/test_weight_int8pack_mm.py
examples/nlp/text-classification/sst2/quantize_sst2_model.py
examples/nlp/text-generation/quantize_causal_lm_model.py
examples/speech/speech_recognition/quantize_asr_model.py
examples/speech/speech_recognition/requirements.txt
examples/vision/StableDiffusion/README.md
examples/vision/StableDiffusion/quantize_StableDiffusion.py
examples/vision/StableDiffusion/requirements.txt
examples/vision/image-classification/mnist/quantize_mnist_model.py
examples/vision/image-classification/pets/quantize_vit_model.py
examples/vision/object-detection/quantize_owl_model.py
examples/vision/text-to-image/quantize_pixart_sigma.py
external/awq/conftest.py
external/awq/pack_intweight.py
external/awq/packing_utils.py
external/awq/test_awq_kernels.py
external/awq/test_awq_packing.py
external/awq/test_awq_quantize.py
external/smoothquant/README.md
external/smoothquant/smoothquant.py
optimum/quanto/__init__.py
optimum/quanto/calibrate.py
optimum/quanto/quantize.py
optimum/quanto/library/README.md
optimum/quanto/library/__init__.py
optimum/quanto/library/qbytes_mm.py
optimum/quanto/library/quantize.py
optimum/quanto/library/unpack.py
optimum/quanto/library/extensions/README.md
optimum/quanto/library/extensions/__init__.py
optimum/quanto/library/extensions/extension.py
optimum/quanto/library/extensions/cpp/README.md
optimum/quanto/library/extensions/cpp/__init__.py
optimum/quanto/library/extensions/cpp/pybind_module.cpp
optimum/quanto/library/extensions/cpp/unpack.cpp
optimum/quanto/library/extensions/cpp/unpack.h
optimum/quanto/library/extensions/cuda/README.md
optimum/quanto/library/extensions/cuda/__init__.py
optimum/quanto/library/extensions/cuda/pybind_module.cpp
optimum/quanto/library/extensions/cuda/unpack.cu
optimum/quanto/library/extensions/cuda/unpack.h
optimum/quanto/library/extensions/cuda/awq/dequantize.cuh
optimum/quanto/library/extensions/cuda/awq/v2/gemm_cuda.cu
optimum/quanto/library/extensions/cuda/awq/v2/gemm_cuda.h
optimum/quanto/library/extensions/cuda/awq/v2/gemv_cuda.cu
optimum/quanto/library/extensions/cuda/awq/v2/gemv_cuda.h
optimum/quanto/library/extensions/cuda/awq/v2/semaphore.h
optimum/quanto/library/extensions/cuda/marlin/COPYRIGHT
optimum/quanto/library/extensions/cuda/marlin/fp8_marlin.cu
optimum/quanto/library/extensions/cuda/marlin/fp8_marlin.cuh
optimum/quanto/library/extensions/cuda/marlin/gptq_marlin.cuh
optimum/quanto/library/extensions/cuda/marlin/gptq_marlin_dtypes.cuh
optimum/quanto/library/extensions/cuda/marlin/gptq_marlin_repack.cu
optimum/quanto/library/extensions/cuda/marlin/gptq_marlin_repack.cuh
optimum/quanto/library/extensions/cuda/marlin/marlin_cuda.cpp
optimum/quanto/library/extensions/cuda/marlin/marlin_cuda.h
optimum/quanto/library/extensions/cuda/marlin/marlin_cuda_kernel.cu
optimum/quanto/library/extensions/cuda/marlin/marlin_cuda_kernel.cuh
optimum/quanto/library/extensions/hip/__init__.py
optimum/quanto/library/extensions/hip/pybind_module.cpp
optimum/quanto/library/extensions/hip/unpack.cu
optimum/quanto/library/extensions/hip/unpack.h
optimum/quanto/library/extensions/mps/README.md
optimum/quanto/library/extensions/mps/__init__.py
optimum/quanto/library/extensions/mps/pybind_module.cpp
optimum/quanto/library/extensions/mps/unpack.h
optimum/quanto/library/extensions/mps/unpack.mm
optimum/quanto/models/__init__.py
optimum/quanto/models/diffusers_models.py
optimum/quanto/models/shared_dict.py
optimum/quanto/models/transformers_models.py
optimum/quanto/nn/__init__.py
optimum/quanto/nn/qconv2d.py
optimum/quanto/nn/qlayernorm.py
optimum/quanto/nn/qlinear.py
optimum/quanto/nn/qmodule.py
optimum/quanto/subpackage/__init__.py
optimum/quanto/subpackage/commands/__init__.py
optimum/quanto/subpackage/commands/base.py
optimum/quanto/subpackage/commands/quantize.py
optimum/quanto/tensor/__init__.py
optimum/quanto/tensor/core.py
optimum/quanto/tensor/function.py
optimum/quanto/tensor/grouped.py
optimum/quanto/tensor/packed.py
optimum/quanto/tensor/qbits.py
optimum/quanto/tensor/qbytes.py
optimum/quanto/tensor/qtensor.py
optimum/quanto/tensor/qtype.py
optimum/quanto/tensor/activations/__init__.py
optimum/quanto/tensor/activations/qbytes.py
optimum/quanto/tensor/activations/qbytes_ops.py
optimum/quanto/tensor/activations/quantization.py
optimum/quanto/tensor/optimizers/__init__.py
optimum/quanto/tensor/optimizers/absmax_optimizer.py
optimum/quanto/tensor/optimizers/affine_optimizer.py
optimum/quanto/tensor/optimizers/hqq_optimizer.py
optimum/quanto/tensor/optimizers/max_optimizer.py
optimum/quanto/tensor/optimizers/optimizer.py
optimum/quanto/tensor/optimizers/symmetric_optimizer.py
optimum/quanto/tensor/weights/__init__.py
optimum/quanto/tensor/weights/packing.py
optimum/quanto/tensor/weights/qbits.py
optimum/quanto/tensor/weights/qbytes.py
optimum/quanto/tensor/weights/quantization.py
optimum/quanto/tensor/weights/reordering.py
optimum/quanto/tensor/weights/awq/__init__.py
optimum/quanto/tensor/weights/awq/packed.py
optimum/quanto/tensor/weights/awq/qbits.py
optimum/quanto/tensor/weights/marlin/__init__.py
optimum/quanto/tensor/weights/marlin/permutations.py
optimum/quanto/tensor/weights/marlin/fp8/__init__.py
optimum/quanto/tensor/weights/marlin/fp8/packed.py
optimum/quanto/tensor/weights/marlin/fp8/qbits.py
optimum/quanto/tensor/weights/marlin/int4/__init__.py
optimum/quanto/tensor/weights/marlin/int4/packed.py
optimum/quanto/tensor/weights/marlin/int4/qbits.py
optimum/quanto/tensor/weights/tinygemm/__init__.py
optimum/quanto/tensor/weights/tinygemm/packed.py
optimum/quanto/tensor/weights/tinygemm/qbits.py
optimum_quanto.egg-info/PKG-INFO
optimum_quanto.egg-info/SOURCES.txt
optimum_quanto.egg-info/dependency_links.txt
optimum_quanto.egg-info/requires.txt
optimum_quanto.egg-info/top_level.txt
test/conftest.py
test/helpers.py
test/cli/cli_helpers.py
test/cli/test_quantize_cli.py
test/library/test_extensions.py
test/library/test_mm.py
test/library/test_quantize.py
test/library/test_unpack.py
test/models/conftest.py
test/models/test_quantized_model_for_causal_lm.py
test/models/test_quantized_model_for_pixart.py
test/nn/test_calibrate.py
test/nn/test_qattention.py
test/nn/test_qconv2d.py
test/nn/test_qlayernorm.py
test/nn/test_qlinear.py
test/nn/test_qmodule.py
test/quantize/test_quantize_mlp.py
test/quantize/test_quantize_patterns.py
test/quantize/test_requantize.py
test/tensor/test_absmax.py
test/tensor/test_packed_tensor.py
test/tensor/activations/test_activations_compile.py
test/tensor/activations/test_activations_dispatch.py
test/tensor/activations/test_activations_quantize.py
test/tensor/ops/test_linear_dispatch.py
test/tensor/ops/test_mm_dispatch.py
test/tensor/optimizers/test_hqq_optimizer.py
test/tensor/weights/test_weight_qbits_tensor.py
test/tensor/weights/test_weight_qbits_tensor_dispatch.py
test/tensor/weights/test_weight_qbits_tensor_instantiate.py
test/tensor/weights/test_weight_qbits_tensor_quantize.py
test/tensor/weights/test_weight_qbytes_tensor_backward.py
test/tensor/weights/test_weight_qbytes_tensor_dispatch.py
test/tensor/weights/test_weight_qbytes_tensor_instantiate.py
test/tensor/weights/test_weight_qbytes_tensor_quantize.py
test/tensor/weights/test_weight_qbytes_tensor_serialization.py
test/tensor/weights/weight_helpers.py
test/tensor/weights/optimized/test_awq_packed_tensor.py
test/tensor/weights/optimized/test_awq_weight_qbits_tensor.py
test/tensor/weights/optimized/test_marlin_fp8_packed_tensor.py
test/tensor/weights/optimized/test_marlin_int4_packed_tensor.py
test/tensor/weights/optimized/test_marlin_int4_weight_qbits_tensor.py
test/tensor/weights/optimized/test_marlin_qbytes_tensor.py
test/tensor/weights/optimized/test_tinygemm_packed_tensor.py
test/tensor/weights/optimized/test_tinygemm_weight_qbits_tensor.py