The content explains different methods for handling missing values in a table. The methods include filling missing values with the mode, mean, median, or a specified value, using a matrix completion model, deleting rows with missing values, or leaving the column untouched. The content also provides a parameter table with optional parameters for customizing the handling of missing values.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_META_DATA_TBL;
CREATE COLUMN TABLE PAL_META_DATA_TBL(
	VARIABLE_NAME NVARCHAR(100),
	VARIABLE_TYPE NVARCHAR(100)
);
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR1', 'CONTINUOUS');
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR2', 'CONTINUOUS');
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR3', 'CATEGORICAL');
INSERT INTO PAL_META_DATA_TBL VALUES ('ATTR4', 'CATEGORICAL');
INSERT INTO PAL_META_DATA_TBL VALUES ('LABEL', 'TARGET');

DROP TABLE PAL_DATA_TBL;
CREATE COLUMN TABLE PAL_DATA_TBL ( 
    DATA_ID INTEGER, 
    VARIABLE_NAME NVARCHAR(100), 
    VAR_VALUE NVARCHAR(100), 
    PURPOSE  INTEGER
);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR1','1',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR2','10',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR3','100',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'ATTR4','A',2);
INSERT INTO PAL_DATA_TBL VALUES(0,'LABEL','1',2);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR1',1.1,1);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR2',10.1,1);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR3',100.1,1);
INSERT INTO PAL_DATA_TBL VALUES(1,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(1,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR1',1.2,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR2',10.2,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR3',100,1);
INSERT INTO PAL_DATA_TBL VALUES(2,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(2,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR1',1.3,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR2',10.4,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR3',100,1);
INSERT INTO PAL_DATA_TBL VALUES(3,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(3,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR1',1.2,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR2',10.3,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR3',100,1);
INSERT INTO PAL_DATA_TBL VALUES(4,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(4,'LABEL',1,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR1',4,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR2',40,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(5,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(5,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR1',4.1,2);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR2',40,2);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR3',400,2);
INSERT INTO PAL_DATA_TBL VALUES(6,'ATTR4','AB',2);
INSERT INTO PAL_DATA_TBL VALUES(6,'LABEL',2,2);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR1',4.2,1);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR2',40.2,1);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(7,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(7,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR1',4.3,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR2',40.4,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(8,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(8,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR1',4.2,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR2',40.3,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR3',400,1);
INSERT INTO PAL_DATA_TBL VALUES(9,'ATTR4','AB',1);
INSERT INTO PAL_DATA_TBL VALUES(9,'LABEL',2,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR1',9,1);
--INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR2',90,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(10,'ATTR4','B',1);
INSERT INTO PAL_DATA_TBL VALUES(10,'LABEL',3,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR1',9.1,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR2',90.1,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(11,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(11,'LABEL',3,1);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR1',9.2,2);
--INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR2',90.2,2);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR3',900,2);
INSERT INTO PAL_DATA_TBL VALUES(12,'ATTR4','B',2);
INSERT INTO PAL_DATA_TBL VALUES(12,'LABEL',3,2);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR1',9.3,1);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR2',90.4,1);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(13,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(13,'LABEL',3,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR1',9.2,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR2',90.3,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR3',900,1);
INSERT INTO PAL_DATA_TBL VALUES(14,'ATTR4','A',1);
INSERT INTO PAL_DATA_TBL VALUES(14,'LABEL',3,1);


DROP TABLE PAL_PARAMETER_TBL;
CREATE  COLUMN TABLE PAL_PARAMETER_TBL (
	"PARAM_NAME" VARCHAR (100), 
	"INT_VALUE" INTEGER, 
	"DOUBLE_VALUE" DOUBLE, 
	"STRING_VALUE" VARCHAR (100)
);

INSERT INTO PAL_PARAMETER_TBL VALUES ('FUNCTION', NULL, NULL, 'M_LOGR');
INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_METHOD', 2, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('HANDLE_MISSING_VALUE', 1, NULL, NULL);

INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_STRATIFIED_VARIABLE', NULL, NULL, 'LABEL');
INSERT INTO PAL_PARAMETER_TBL VALUES ('PARTITION_TRAINING_PERCENT', NULL, 0.7, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('OUTPUT_PARTITION_RESULT', 1, NULL, NULL);  

DROP TABLE PAL_MODEL_TBL;  
CREATE COLUMN TABLE PAL_MODEL_TBL (
	"ROW_INDEX" INTEGER,
	"TREE_INDEX" INTEGER,
	"MODEL_CONTENT" NVARCHAR(5000)
);

DROP TABLE PAL_IMP_TBL;
CREATE COLUMN TABLE PAL_IMP_TBL (
	"VARIABLE_NAME" NVARCHAR(256),
	"IMPORTANCE" DOUBLE
);

DROP TABLE PAL_STAT_TBL;
CREATE COLUMN TABLE PAL_STAT_TBL (
	"STAT_NAME" NVARCHAR(256),
	"STAT_VALUE" NVARCHAR(1000),
	"CLASS_NAME" NVARCHAR(256)
);

DROP TABLE PAL_OPT_PARAM_TBL;
CREATE  COLUMN TABLE PAL_OPT_PARAM_TBL (
	"PARAM_NAME" VARCHAR (100), 
	"INT_VALUE" INTEGER, 
	"DOUBLE_VALUE" DOUBLE, 
	"STRING_VALUE" VARCHAR (100)
);

DROP TABLE PAL_CMATRIX_TBL;
CREATE COLUMN TABLE PAL_CMATRIX_TBL (
	"ACTUAL_CLASS" NVARCHAR(256),
	"PREDICTED_CLASS" NVARCHAR(256),
	"COUNT" INTEGER
);

DROP TABLE PAL_METRICS_TBL;
CREATE COLUMN TABLE PAL_METRICS_TBL (
	"NAME" NVARCHAR(256),
	"X" DOUBLE,
	"Y" DOUBLE 
);

DROP TABLE PAL_PARTITION_TBL;
CREATE COLUMN TABLE PAL_PARTITION_TBL (
	"ID" INTEGER,
	"TYPE" INTEGER
);

DO
BEGIN
	lt_meta_data = SELECT * FROM PAL_META_DATA_TBL;
	lt_data = SELECT * FROM PAL_DATA_TBL;
	lt_ctrl = SELECT * FROM PAL_PARAMETER_TBL;
	CALL _SYS_AFL.PAL_UNIFIED_CLASSIFICATION_PIVOT (:lt_meta_data, :lt_data, :lt_ctrl, lt_model, lt_imp,lt_stat, lt_opt,lt_cm,lt_metrics,lt_partition,lt_ph1);
	INSERT INTO PAL_MODEL_TBL SELECT * FROM :lt_model;
	INSERT INTO PAL_IMP_TBL SELECT * FROM :lt_imp;
	INSERT INTO PAL_STAT_TBL SELECT * FROM :lt_stat;
	INSERT INTO PAL_OPT_PARAM_TBL SELECT * FROM :lt_opt;
	INSERT INTO PAL_CMATRIX_TBL SELECT * FROM :lt_cm;
	INSERT INTO PAL_METRICS_TBL SELECT * FROM :lt_metrics;
	INSERT INTO PAL_PARTITION_TBL SELECT * FROM :lt_partition;
END; 
SELECT * FROM PAL_MODEL_TBL;
