Principal Component Analysis (PCA) is a technique used to reduce the dimensionality of multivariate data while preserving as much of the variation in the original data set as possible. It is particularly useful when the variables in the data set are highly correlated. PCA transforms the original variables into a new set of variables that are linear combinations of the original variables, uncorrelated with each other, and ordered based on the amount of variation they explain. The PCA of a data matrix is achieved through singular value decomposition (SVD), where the matrix of loadings represents the projections of the observations on the principal components, and the matrix of scores represents the transformed data. However, if there is a variable with a constant value across data items, scaling of variables is not possible.
------

SET SCHEMA "DM_PAL";

DROP TABLE PAL_PCA_DATA_TBL;
CREATE COLUMN TABLE PAL_PCA_DATA_TBL ("ID" INTEGER, "X1" DOUBLE, "X2" DOUBLE, "X3" DOUBLE, "X4" DOUBLE, "X5" DOUBLE, "X6" DOUBLE);
INSERT INTO PAL_PCA_DATA_TBL VALUES (1, 12, 52, 20, 44, 48, 16);
INSERT INTO PAL_PCA_DATA_TBL VALUES (2, 12, 57, 25, 45, 50, 16);
INSERT INTO PAL_PCA_DATA_TBL VALUES (3, 12, 54, 21, 45, 50, 16);
INSERT INTO PAL_PCA_DATA_TBL VALUES (4, 13, 52, 21, 46, 51, 17);
INSERT INTO PAL_PCA_DATA_TBL VALUES (5, 14, 54, 24, 46, 51, 17);
INSERT INTO PAL_PCA_DATA_TBL VALUES (6, 22, 52, 25, 54, 58, 26);
INSERT INTO PAL_PCA_DATA_TBL VALUES (7, 22, 56, 26, 55, 58, 27);
INSERT INTO PAL_PCA_DATA_TBL VALUES (8, 17, 52, 21, 45, 52, 17);
INSERT INTO PAL_PCA_DATA_TBL VALUES (9, 15, 53, 24, 45, 53, 18);
INSERT INTO PAL_PCA_DATA_TBL VALUES (10, 23, 54, 23, 53, 57, 24);
INSERT INTO PAL_PCA_DATA_TBL VALUES (11, 25, 54, 23, 55, 58, 25);

DROP TABLE PAL_PARAMETER_TBL;
CREATE COLUMN TABLE PAL_PARAMETER_TBL ("PARAM_NAME" VARCHAR(256), "INT_VALUE" INTEGER, "DOUBLE_VALUE" DOUBLE, "STRING_VALUE" VARCHAR(1000));
INSERT INTO PAL_PARAMETER_TBL VALUES ('SCALING', 1, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('SCORES', 1, NULL, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('THREAD_RATIO', NULL, 0.5, NULL);

DROP TABLE PAL_PCA_LOADINGS_TBL;
DROP TABLE PAL_PCA_LOADINGS_INFORMATION;
DROP TABLE PAL_PCA_SCORES;
DROP TABLE PAL_PCA_SCALING_INFORMATION_TBL;
CREATE COLUMN TABLE PAL_PCA_LOADINGS_TBL ("COMPONENT_ID" NVARCHAR(100), "LOADINGS_X1" DOUBLE, "LOADINGS_X2" DOUBLE, "LOADINGS_X3" DOUBLE, "LOADINGS_X4" DOUBLE, "LOADINGS_X5" DOUBLE, "LOADINGS_X6" DOUBLE);
CREATE COLUMN TABLE PAL_PCA_LOADINGS_INFORMATION ("COMPONENT_ID" NVARCHAR(100), "SD" DOUBLE, "VAR_PROP" DOUBLE, "CUM_VAR_PROP" DOUBLE);
CREATE COLUMN TABLE PAL_PCA_SCORES ("ID" INTEGER, "COMPONENT_1" DOUBLE, "COMPONENT_2" DOUBLE, "COMPONENT_3" DOUBLE, "COMPONENT_4" DOUBLE, "COMPONENT_5" DOUBLE, "COMPONENT_6" DOUBLE);
CREATE COLUMN TABLE PAL_PCA_SCALING_INFORMATION_TBL ("VARIABLE_ID" INTEGER, "MEAN" DOUBLE, "SCALE" DOUBLE);

DO BEGIN
lt_data = SELECT * FROM PAL_PCA_DATA_TBL;
lt_para = SELECT * FROM PAL_PARAMETER_TBL;
CALL _SYS_AFL.PAL_PCA (:lt_data,:lt_para,lt_loadings,lt_loadings_info,lt_pca_scores,lt_scaling_info);
INSERT INTO PAL_PCA_LOADINGS_TBL
SELECT * FROM :lt_loadings;
INSERT INTO PAL_PCA_LOADINGS_INFORMATION
SELECT * FROM :lt_loadings_info;
INSERT INTO PAL_PCA_SCORES
SELECT * FROM :lt_pca_scores;
INSERT INTO PAL_PCA_SCALING_INFORMATION_TBL
SELECT * FROM :lt_scaling_info;
END;

SELECT * FROM PAL_PCA_LOADINGS_TBL;
SELECT * FROM PAL_PCA_LOADINGS_INFORMATION;
SELECT * FROM PAL_PCA_SCORES;
SELECT * FROM PAL_PCA_SCALING_INFORMATION_TBL;

