Power Transform is a family of non-linear transform methods that use the power function to transform continuous data. It can stabilize the variance of data, minimize skewness, and approximate the data's distribution as Gaussian. This is useful because many machine learning algorithms assume Gaussian distributed data, but the data obtained may not conform to this distribution. Power Transform can be used to make the data more Gaussian-like. It supports two types of transformations: Box-Cox transformation and Yeo-Johnson transformation. Both transformations are monotonically increasing functions with a hyper-parameter called λ. The main difference between the two is that Box-Cox transformation is defined only for strictly positive data, while Yeo-Johnson transformation is defined for the entire real domain. Power Transform can be applied using two stored procedures: one for fitting the transformation parameters and another for transforming new data based on the fitted parameters.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_POWER_TRANSFORM_TRAIN_DATA_TBL;
CREATE COLUMN TABLE PAL_POWER_TRANSFORM_TRAIN_DATA_TBL ("ID" INTEGER, "X1" DOUBLE, "X2" DOUBLE, "X3" DOUBLE);

INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(0,5.155951363694313,4.076506774438141,6.775578240123138);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(1,5.016602867279994,4.298159629915712,5.972896878286871);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(2,6.330503142514262,4.3079927168024446,4.503660363833093);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(3,4.418133874557737,7.572598100863792,5.582568159406703);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(4,4.043450306707487,4.078382082912484,5.039633969808214);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(5,6.309902963930603,5.716670045148155,5.365876422148958);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(6,9.459266606686224,4.456605337786643,6.528845492078271);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(7,7.042910662171584,5.363604409362707,5.621450381520459);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(8,4.52491025786601,4.820377426072063,4.0940495788733395);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(9,10.085880972733657,4.797140870188886,4.967799257755148);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(10,6.274808607310309,4.087428452689988,4.614905343077156);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(11,4.416353864586462,8.60136467772791,5.295219180385827);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(12,6.204025193299364,6.034236800100728,4.992341419035431);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(13,5.223059748059121,7.395916235686572,4.1072557288544616);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(14,5.52254174106756,5.1980190368842845,7.716796419800201);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(15,4.8140726282581205,5.987543315087799,6.196331328967135);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(16,4.4429461072686145,6.240440141610463,7.042161278878741);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(17,4.355412962823212,5.182920672969789,4.486751411317423);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(18,7.020273410920357,3.8736115238569817,7.3447648834718215);
INSERT INTO PAL_POWER_TRANSFORM_TRAIN_DATA_TBL(ID, X1, X2, X3) VALUES
(19,4.762024626259294,4.961158730017223,4.6924661064778315);

DROP TABLE PAL_PARAMETER_TBL;
CREATE COLUMN TABLE PAL_PARAMETER_TBL ("PARAM_NAME" VARCHAR(256), "INT_VALUE" INTEGER, "DOUBLE_VALUE" DOUBLE, "STRING_VALUE" VARCHAR(1000));

-- Default value is put in square bracket
-- [0]: Yeo-Johnson; 1: Box-Cox
INSERT INTO PAL_PARAMETER_TBL VALUES ('METHOD', 0, NULL, NULL);
-- 0: Do not standardize; [1]: Standardize
INSERT INTO PAL_PARAMETER_TBL VALUES ('STANDARDIZE', 1, NULL, NULL);
-- [0]: Do not output transform result; 1: Output transform result
INSERT INTO PAL_PARAMETER_TBL VALUES ('OUTPUT_TRANSFORM', 1, NULL, NULL);
-- Maximum allowed iteration for optimization algorithm to compute lambda value, valid range is (0, +inf), default is 500
INSERT INTO PAL_PARAMETER_TBL VALUES ('MAX_ITERATION', 500, NULL, NULL);
-- Control the tolerable absolute accuracy error, valid range is (0, 1), default value is 1.0E-11
INSERT INTO PAL_PARAMETER_TBL VALUES ('TOLERANCE', NULL, 1E-11, NULL);
-- Define an internal for lambda value, final result with be restricted within the given range, any value is allowed, even interval_start > interval_end, default value for interval_start is -2.0, default value for interval_end is 2.0
INSERT INTO PAL_PARAMETER_TBL VALUES ('INTERVAL_START', NULL, -4.0, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('INTERVAL_END', NULL, 4.0, NULL);
-- Define thread_ratio, valid range is [0.0, 1.0], default value is 1.0, which mean to use all currently available CPU resource
INSERT INTO PAL_PARAMETER_TBL VALUES ('THREAD_RATIO', NULL, 1.0, NULL);
-- Specify the role of given interval: [0]: enforce final fitted hyper-parameter within given interval; 1: interval only serves as a hint, there is no guarantee final fitted hyper-parameter fall within given interval
INSERT INTO PAL_PARAMETER_TBL VALUES ('INTERVAL_HINT', 0, NULL, NULL);
-- Override INTERVAL_HINT as 1 for feature X1
INSERT INTO PAL_PARAMETER_TBL VALUES ('FEATURE_X1_INTERVAL_HINT', 1, NULL, NULL);
-- Override Interval Range for feature X2
INSERT INTO PAL_PARAMETER_TBL VALUES ('FEATURE_X2_INTERVAL_START', NULL, -3.0, NULL);
INSERT INTO PAL_PARAMETER_TBL VALUES ('FEATURE_X2_INTERVAL_END', NULL, 1.0, NULL);


DROP TABLE PAL_POWER_TRANSFORM_MODEL_TBL;
CREATE COLUMN TABLE PAL_POWER_TRANSFORM_MODEL_TBL ("ROW_ID" INTEGER, "MODEL_CONTENT" NVARCHAR(5000));
DROP TABLE PAL_POWER_TRANSFORM_RESULT;
CREATE COLUMN TABLE PAL_POWER_TRANSFORM_RESULT ("ID" INTEGER, "TRANSFORMED_X1" DOUBLE, "TRANSFORMED_X2" DOUBLE, "TRANSFORMED_X3" DOUBLE);

DO BEGIN
	lt_data = SELECT * FROM PAL_POWER_TRANSFORM_TRAIN_DATA_TBL;
	lt_param = SELECT * FROM PAL_PARAMETER_TBL;
	CALL _SYS_AFL.PAL_POWER_TRANSFORMER_FIT (:lt_data, :lt_param, lt_model, lt_transformed_result);
	INSERT INTO PAL_POWER_TRANSFORM_MODEL_TBL
	SELECT * FROM :lt_model;
	INSERT INTO PAL_POWER_TRANSFORM_RESULT
	SELECT * FROM :lt_transformed_result;
END;

SELECT * FROM PAL_POWER_TRANSFORM_MODEL_TBL;
SELECT * FROM PAL_POWER_TRANSFORM_RESULT;
