LICENSE.txt
README.md
setup.py
geak_eval/__init__.py
geak_eval/constants.py
geak_eval/initializations.py
geak_eval/run.py
geak_eval/run_old.py
geak_eval.egg-info/PKG-INFO
geak_eval.egg-info/SOURCES.txt
geak_eval.egg-info/dependency_links.txt
geak_eval.egg-info/entry_points.txt
geak_eval.egg-info/requires.txt
geak_eval.egg-info/top_level.txt
geak_eval/data/__init__.py
geak_eval/data/ROCm/__init__.py
geak_eval/data/ROCm/data/__init__.py
geak_eval/data/ROCm/data/ROCm_v1/__init__.py
geak_eval/data/ROCm/data/ROCm_v1/gemm.py
geak_eval/data/ROCm/data/ROCm_v1/layernorm.py
geak_eval/data/ROCm/data/ROCm_v1/moe_gemm.py
geak_eval/data/ROCm/data/ROCm_v1/multreduce_matmul_dot_kernel.py
geak_eval/data/ROCm/data/ROCm_v1/naive_softmax.py
geak_eval/data/ROCm/data/ROCm_v1/rmsnorm_bwd.py
geak_eval/data/ROCm/data/ROCm_v1/rmsnorm_fwd.py
geak_eval/data/ROCm/data/ROCm_v1/softmax.py
geak_eval/data/ROCm/data/ROCm_v1/test_add_kernel.py
geak_eval/data/ROCm/data/ROCm_v1/test_batched_vecmat.py
geak_eval/data/ROCm/data/ROCm_v1/test_block_copy.py
geak_eval/data/ROCm/data/ROCm_v1/test_block_pointer_matmul.py
geak_eval/data/ROCm/data/ROCm_v1/test_cast_matmul.py
geak_eval/data/ROCm/data/ROCm_v1/test_chained_dot_fp8.py
geak_eval/data/ROCm/data/ROCm_v1/test_chained_matmul.py
geak_eval/data/ROCm/data/ROCm_v1/test_flashattention_fwd.py
geak_eval/data/ROCm/data/ROCm_v1/test_gemm_fusion.py
geak_eval/data/ROCm/data/ROCm_v1/test_gemm_no_scf.py
geak_eval/data/ROCm/data/ROCm_v1/test_iv_dependent_matmul.py
geak_eval/data/ROCm/data/ROCm_v1/test_kernel_dot.py
geak_eval/data/ROCm/data/ROCm_v1/test_kernel_sub.py
geak_eval/data/ROCm/data/ROCm_v1/test_load_reduce.py
geak_eval/data/ROCm/data/ROCm_v1/test_matmul_MXFP.py
geak_eval/data/ROCm/data/ROCm_v1/test_randn.py
geak_eval/data/ROCm/data/ROCm_v1/test_random_int.py
geak_eval/data/ROCm/data/ROCm_v1/test_reverse_range.py
geak_eval/data/ROCm/data/ROCm_v1/test_tma_store_gemm.py
geak_eval/data/ROCm/data/ROCm_v1/test_triton_flip.py
geak_eval/data/ROCm/data/ROCm_v1/test_triton_sort.py
geak_eval/data/ROCm/data/ROCm_v1/test_triton_swizzle2d.py
geak_eval/data/ROCm/data/ROCm_v1/triton_multreduce_matmul_kernel.py
geak_eval/data/TritonBench/__init__.py
geak_eval/data/TritonBench/data/__init__.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/__init__.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/adam_update_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/add_example.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/add_value.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/apply_penalty.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_forward_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_fwd_triton1.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_fwd_triton2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_fwd_triton3.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_kernel_aligned.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_llama.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attention_score.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attn_fwd_causal.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/attn_fwd_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/batched_vecmat_mult.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/bgmv_expand_slice.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/bgmv_shrink_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/block_sparse_attn.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/bmm_chunk_bwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/bmm_chunk_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/bmm_optimized.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/cache_transform_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_bwd_dqkg.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_cumsum_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_cumsum_vector.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_delta_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_gate_recurrence.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_gated_attention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_gla_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_gla_simple.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_linear_attn.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_retention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunk_retention_ops.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/chunked_cumsum_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/context_attn_bloom.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/context_attn_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/context_attn_llama.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/context_attn_mistral.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/context_attn_nopad.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/cosine_compute.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/cross_entropy1.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/cross_entropy2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/cross_entropy_ops.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/decay_cumsum.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/dequantize_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/dequantize_rowwise.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/destindex_copy.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/destindex_copy_kv1.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/destindex_copy_kv2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/diag_ssm_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/dropout_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/embedding_triton_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/f8_conversion_utils.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fast_ce_loss.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fast_layernorm.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fast_rms_layernorm.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fast_rope_embedding.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fifth_order_sph_harmonics.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/flash_attn.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/flash_decode2_llama.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/flash_decode2_phi.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fp4_to_bf16.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fp4_to_bf16_conversion.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_activation.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_layernorm_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_recurrent_delta.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_recurrent_hgrn.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_recurrent_retention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_rotary_embedding.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/fused_rwkv6_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/geglu_tanh_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/index_select_bwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/index_select_cat.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/int4_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/int8_dequant_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/int8_matmul_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/int8_matmul_quantization.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/int8_quantization.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/int_scaled_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/isfinite_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/iv_dependent_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/kcache_copy_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/kldiv_compute.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/kldiv_ops.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/kldiv_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/ksoftmax_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/kv_cache_copy.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/kv_cache_filling.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/l2_norm_bwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/l2_norm_triton1.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/l2_norm_triton2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/layer_norm_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/layer_norm_liger.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/layer_norm_ops.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/layer_norm_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/layer_norm_welfold.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/layernorm_fwd_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/lightning_attention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/llama_ff_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/log_softmax.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/logsumexp_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/lora_expand_gemv.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/masked_add_cuda.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/masked_select.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_dequant_int4.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_dequantize.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_dequantize_int4.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_leakyrelu.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_leakyrelu_fp8.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_persistent_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_tma.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_triton1.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_triton2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matmul_triton_autotune.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matrix_reduction.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matrix_transpose.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/matrix_vector_multip.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/max_reduction.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/mean_reduction.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/mixed_sparse_attention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/mul_exponent_compensator.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/multinomial_sampling.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/nested_loops_processing.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/parallel_attention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/parallel_retention_attention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/pow_scalar_tensor.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/quant_transpose_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/quantize_copy_kv.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/quantize_global.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/quantize_kv_copy.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/quantize_kv_transform.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rbe_triton_transform.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/relu_strided_buffer.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/relu_triton_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/reversed_cumsum.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/reversed_cumsum_scalar.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rms_matmul_rbe.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rms_norm_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rms_rbe_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rmsnorm_fused.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rmsnorm_fused_llama.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rmsnorm_implementation.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rmsnorm_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rope_backward_transform.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rope_embedding.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rope_transform.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rotary_emb.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rotary_emb_nopad.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rotary_transform.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rotary_transform_ops.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/rowwise_quantization_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/seeded_dropout.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/sgmv_expand_slice.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/sin_computation.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/sin_kernel.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/softmax_flaggems.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/softmax_optimize.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/softmax_reducev.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/softmax_triton1.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/softmax_triton2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/softmax_triton3.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/spinning_lock_reduction.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/square_matrix.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/streamk_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/swiglu_backward.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/swiglu_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/swiglu_triton.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/token_attn_llama2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/token_attn_mistral.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/token_attn_reduceV.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/token_softmax_bloom.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/token_softmax_llama.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_argmax.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_attention.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_conv2d_fwd.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_linear_activation.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_matmul.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_mul2.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/triton_softmax.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/uniform_sampling.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/var_len_copy.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/vector_addition.py
geak_eval/data/TritonBench/data/TritonBench_G_v1/vector_addition_custom.py
geak_eval/data/TritonBench/performance_metrics/__init__.py
geak_eval/data/TritonBench/performance_metrics/perf_G/__init__.py
geak_eval/data/TritonBench/performance_metrics/perf_G/performance_utils.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/__init__.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/adam_update_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/add_example_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/add_value_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/apply_penalty_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_forward_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_fwd_triton1_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_fwd_triton2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_fwd_triton3_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_kernel_aligned_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_llama_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attention_score_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attn_fwd_causal_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/attn_fwd_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/batched_vecmat_mult_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/bgmv_expand_slice_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/bgmv_shrink_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/block_sparse_attn_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/bmm_chunk_bwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/bmm_chunk_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/bmm_optimized_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/cache_transform_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_bwd_dqkg_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_cumsum_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_cumsum_vector_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_delta_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_gate_recurrence_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_gated_attention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_gla_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_gla_simple_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_linear_attn_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_retention_ops_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunk_retention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/chunked_cumsum_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/context_attn_bloom_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/context_attn_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/context_attn_llama_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/context_attn_mistral_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/context_attn_nopad_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/cosine_compute_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/cross_entropy1_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/cross_entropy2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/cross_entropy_ops_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/decay_cumsum_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/dequantize_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/dequantize_rowwise_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/destindex_copy_kv1_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/destindex_copy_kv2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/destindex_copy_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/diag_ssm_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/dropout_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/embedding_triton_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/f8_conversion_utils_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fast_ce_loss_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fast_layernorm_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fast_rms_layernorm_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fast_rope_embedding_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fifth_order_sph_harmonics_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/flash_attn_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/flash_decode2_llama_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/flash_decode2_phi_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fp4_to_bf16_conversion_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fp4_to_bf16_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_activation_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_layernorm_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_recurrent_delta_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_recurrent_hgrn_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_recurrent_retention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_rotary_embedding_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/fused_rwkv6_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/geglu_tanh_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/index_select_bwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/index_select_cat_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/int4_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/int8_dequant_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/int8_matmul_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/int8_matmul_quantization_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/int8_quantization_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/int_scaled_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/isfinite_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/iv_dependent_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/kcache_copy_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/kldiv_compute_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/kldiv_ops_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/kldiv_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/ksoftmax_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/kv_cache_copy_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/kv_cache_filling_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/l2_norm_bwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/l2_norm_triton1_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/l2_norm_triton2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/layer_norm_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/layer_norm_liger_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/layer_norm_ops_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/layer_norm_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/layer_norm_welfold_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/layernorm_fwd_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/lightning_attention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/llama_ff_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/log_softmax_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/logsumexp_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/lora_expand_gemv_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/masked_add_cuda_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/masked_select_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_dequant_int4_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_dequantize_int4_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_dequantize_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_leakyrelu_fp8_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_leakyrelu_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_persistent_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_tma_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_triton1_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_triton2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matmul_triton_autotune_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matrix_reduction_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matrix_transpose_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/matrix_vector_multip_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/max_reduction_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/mean_reduction_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/mixed_sparse_attention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/mul_exponent_compensator_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/multinomial_sampling_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/nested_loops_processing_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/parallel_attention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/parallel_retention_attention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/pow_scalar_tensor_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/quant_transpose_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/quantize_copy_kv_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/quantize_global_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/quantize_kv_copy_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/quantize_kv_transform_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rbe_triton_transform_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/relu_strided_buffer_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/relu_triton_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/reversed_cumsum_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/reversed_cumsum_scalar_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rms_matmul_rbe_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rms_norm_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rms_rbe_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rmsnorm_fused_llama_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rmsnorm_fused_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rmsnorm_implementation_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rmsnorm_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rope_backward_transform_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rope_embedding_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rope_transform_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rotary_emb_nopad_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rotary_emb_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rotary_transform_ops_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rotary_transform_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/rowwise_quantization_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/seeded_dropout_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/sgmv_expand_slice_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/sin_computation_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/sin_kernel_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/softmax_flaggems_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/softmax_optimize_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/softmax_reducev_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/softmax_triton1_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/softmax_triton2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/softmax_triton3_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/spinning_lock_reduction_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/square_matrix_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/streamk_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/swiglu_backward_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/swiglu_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/swiglu_triton_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/token_attn_llama2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/token_attn_mistral_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/token_attn_reduceV_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/token_softmax_bloom_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/token_softmax_llama_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_argmax_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_attention_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_conv2d_fwd_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_linear_activation_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_matmul_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_mul2_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/triton_softmax_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/uniform_sampling_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/var_len_copy_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/vector_addition_custom_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_metrics/vector_addition_perf.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/__init__.py
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/adam_update_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/add_example.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/add_value.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_forward_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_fwd_triton1.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_fwd_triton2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_fwd_triton3.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_kernel_aligned.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_llama.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attention_score.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attn_fwd_causal.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/attn_fwd_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/batched_vecmat_mult.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/bgmv_expand_slice.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/bgmv_shrink_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/block_sparse_attn.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/bmm_chunk_bwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/bmm_chunk_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/bmm_optimized.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/cache_transform_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_cumsum_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_cumsum_vector.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_delta_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_gate_recurrence.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_gated_attention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_linear_attn.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunk_retention_ops.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/chunked_cumsum_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/context_attn_bloom.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/context_attn_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/context_attn_mistral.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/context_attn_nopad.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/cosine_compute.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/cross_entropy1.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/cross_entropy2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/cross_entropy_ops.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/decay_cumsum.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/dequantize_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/dequantize_rowwise.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/destindex_copy.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/destindex_copy_kv1.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/destindex_copy_kv2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/diag_ssm_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/dropout_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/embedding_triton_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/f8_conversion_utils.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fast_ce_loss.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fast_layernorm.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fast_rms_layernorm.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fast_rope_embedding.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fifth_order_sph_harmonics.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/flash_attn.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/flash_decode2_llama.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/flash_decode2_phi.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fp4_to_bf16.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fused_activation.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fused_layernorm_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fused_recurrent_delta.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fused_recurrent_hgrn.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fused_recurrent_retention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/fused_rwkv6_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/geglu_tanh_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/index_select_bwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/index_select_cat.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/int4_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/int8_matmul_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/int8_matmul_quantization.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/int8_quantization.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/int_scaled_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/isfinite_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/iv_dependent_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/kcache_copy_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/kldiv_compute.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/kldiv_ops.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/kldiv_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/ksoftmax_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/kv_cache_copy.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/kv_cache_filling.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/l2_norm_bwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/l2_norm_triton1.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/l2_norm_triton2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/layer_norm_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/layer_norm_liger.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/layer_norm_ops.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/layer_norm_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/layer_norm_welfold.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/layernorm_fwd_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/lightning_attention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/llama_ff_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/log_softmax.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/logsumexp_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/lora_expand_gemv.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/masked_add_cuda.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/masked_select.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_dequant_int4.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_dequantize.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_dequantize_int4.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_leakyrelu.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_leakyrelu_fp8.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_persistent_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_tma.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_triton1.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_triton2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matmul_triton_autotune.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matrix_reduction.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matrix_transpose.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/matrix_vector_multip.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/max_reduction.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/mean_reduction.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/mixed_sparse_attention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/mul_exponent_compensator.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/multinomial_sampling.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/nested_loops_processing.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/parallel_attention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/parallel_retention_attention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/pow_scalar_tensor.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/quant_transpose_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/quantize_copy_kv.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/quantize_global.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/quantize_kv_copy.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/quantize_kv_transform.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rbe_triton_transform.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/relu_strided_buffer.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/relu_triton_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/reversed_cumsum.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/reversed_cumsum_scalar.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rms_matmul_rbe.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rms_norm_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rms_rbe_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rmsnorm_fused.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rmsnorm_fused_llama.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rmsnorm_implementation.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rmsnorm_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rope_backward_transform.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rope_embedding.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rope_transform.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rotary_emb.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rotary_transform.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rotary_transform_ops.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/rowwise_quantization_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/seeded_dropout.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/sgmv_expand_slice.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/sin_computation.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/sin_kernel.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/softmax_flaggems.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/softmax_optimize.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/softmax_reducev.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/softmax_triton1.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/softmax_triton2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/softmax_triton3.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/spinning_lock_reduction.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/square_matrix.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/streamk_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/swiglu_backward.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/swiglu_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/swiglu_triton.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/token_attn_llama2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/token_attn_mistral.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/token_attn_reduceV.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/token_softmax_bloom.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/token_softmax_llama.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_argmax.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_attention.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_conv2d_fwd.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_linear_activation.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_matmul.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_mul2.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/triton_softmax.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/uniform_sampling.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/var_len_copy.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/vector_addition.json
geak_eval/data/TritonBench/performance_metrics/perf_G/golden_results/vector_addition_custom.json
geak_eval/data/TritonBench/performance_metrics/perf_G/run_bench/__init__.py
geak_eval/data/TritonBench/performance_metrics/perf_G/run_bench/multiprocess_gpu_run.py
geak_eval/data/TritonBench/performance_metrics/perf_G/run_bench/write_file.py
geak_eval/evaluators/ROCm_correctness.py
geak_eval/evaluators/TB_correctness.py
geak_eval/evaluators/__init__.py
geak_eval/evaluators/base.py
geak_eval/evaluators/interface.py
geak_eval/helpers/__init__.py
geak_eval/helpers/generators.py
geak_eval/helpers/helper.py
geak_eval/helpers/time.py
geak_eval/metrics/__init__.py
geak_eval/metrics/accuracy.py
geak_eval/metrics/base.py
geak_eval/metrics/passk.py
geak_eval/perf/2_efficiency.py
geak_eval/perf/__init__.py
geak_eval/perf/base.py
geak_eval/perf/efficiency.py
geak_eval/perf/performance_utils.py
geak_eval/perf/ROCm/__init__.py
geak_eval/perf/ROCm/efficiency.py
geak_eval/perf/ROCm/performance_utils_pytest.py
geak_eval/perf/run_bench/__init__.py
geak_eval/perf/run_bench/multiprocess_gpu_run.py
geak_eval/perf/run_bench/performance_utils.py
geak_eval/perf/run_bench/write_file.py
geak_eval/processors/__init__.py
geak_eval/processors/base.py
geak_eval/processors/llm.py