Permutation importance is a feature evaluation method that measures the decrease in the model score when a feature's values are randomly shuffled. It helps determine how much the model relies on a feature for prediction by breaking the association between the feature and the true outcome. Permutation importance is model agnostic and can avoid bias against low cardinality features. It is computed on a validation set during the training procedure and works only with certain partition methods. The content also provides examples of how to calculate permutation feature importance using SQL queries.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_META_DATA_TBL;
CREATE COLUMN TABLE PAL_META_DATA_TBL(
        VARIABLE_NAME NVARCHAR(100),
        VARIABLE_TYPE NVARCHAR(100)
);
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR1', 'CONTINUOUS');
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR2', 'CONTINUOUS');
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR3', 'CATEGORICAL');
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR4', 'CATEGORICAL');
INSERT INTO PAL_META_DATA_TBL VALUES ('LABEL', 'TARGET');

DROP TABLE PAL_DATA_TBL;
CREATE COLUMN TABLE PAL_DATA_TBL (
    DATA_ID INTEGER,
    VARIABLE_NAME NVARCHAR(100),
    VAR_VALUE NVARCHAR(100),
    PURPOSE  INTEGER
);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR1','1',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR2','10',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR3','100',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR4','A',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'LABEL','1',2);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR1',1.1,1);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR2',10.1,1);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR3',100.1,1);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(1,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR1',1.2,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR2',10.2,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR3',100,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(2,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR1',1.3,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR2',10.4,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR3',100,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(3,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR1',1.2,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR2',10.3,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR3',100,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(4,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR1',4,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR2',40,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(5,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR1',4.1,2);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR2',40,2);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR3',400,2);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR4','AB',2);
INSERT INTO PAL_DATA_TBL VALUES(6,'LABEL',2,2);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR1',4.2,1);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR2',40.2,1);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(7,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR1',4.3,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR2',40.4,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(8,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR1',4.2,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR2',40.3,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(9,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR1',9,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR2',90,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR4','B',1);
INSERT INTO PAL_DATA_TBL VALUES(10,'LABEL',3,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR1',9.1,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR2',90.1,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(11,'LABEL',3,1);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR1',9.2,2);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR2',90.2,2);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR3',900,2);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR4','B',2);
INSERT INTO PAL_DATA_TBL VALUES(12,'LABEL',3,2);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR1',9.3,1);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR2',90.4,1);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(13,'LABEL',3,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR1',9.2,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR2',90.3,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(14,'LABEL',3,1);

DROP TABLE PAL_PARAMETER_TBL;
CREATE  COLUMN TABLE PAL_PARAMETER_TBL (
        "PARAM_NAME" VARCHAR (100),
        "INT_VALUE" INTEGER,
        "DOUBLE_VALUE" DOUBLE,
        "STRING_VALUE" VARCHAR (100)
);
INSERT INTO PAL_PARAMETER_TBL VALUES ('FUNCTION', NULL, NULL, 'RDT');
INSERT INTO PAL_PARAMETER_TBL VALUES ('SPLIT_THRESHOLD', NULL, 0.0000001, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('MIN_SAMPLES_LEAF', 1, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('N_ESTIMATORS', 10, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('MAX_DEPTH', 55, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('SEED', 2, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_METHOD', 2, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_RANDOM_SEED', 2, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_STRATIFIED_VARIABLE', NULL, NULL, 'LABEL');
INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_TRAINING_PERCENT', NULL, 0.7, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('OUTPUT_PARTITION_RESULT', 1, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PERMUTATION_IMPORTANCE', 1, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PERMUTATION_EVALUATION_METRIC', NULL, NULL, 'ACCURACY');
INSERT INTO PAL_PARAMETER_TBL VALUES ('PERMUTATION_N_REPEATS', 5, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PERMUTATION_SEED', 2, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('PERMUTATION_N_SAMPLES', 0, NULL, NULL);

DO
BEGIN
  lt_meta_data = SELECT * FROM PAL_META_DATA_TBL;
  lt_data = SELECT * FROM PAL_DATA_TBL;
  lt_ctrl = SELECT * FROM PAL_PARAMETER_TBL;
  CALL _SYS_AFL.PAL_UNIFIED_CLASSIFICATION_PIVOT (:lt_meta_data, :lt_data, :lt_ctrl, lt_model, lt_imp, lt_stat, lt_opt, lt_cm, lt_metrics, lt_partition, lt_ph1);
  SELECT * FROM :lt_imp;
END;
