Univariate analysis is a statistical method that provides an overview of a dataset by analyzing each variable individually. It can handle both continuous and categorical variables, even if there are null values in the dataset.

For continuous variables, univariate analysis calculates various statistical quantities such as the count of non-null values, minimum value, lower quartile, median, upper quartile, maximum value, mean, confidence interval for the mean, trimmed mean, variance, standard deviation, skewness, and kurtosis.

For categorical variables, univariate analysis returns the occurrence and percentage of each category, including null values.
------

SET SCHEMA "DM_PAL";

DROP TABLE PAL_UNIVARIATE_ANALYSIS_DATA_TBL;
CREATE COLUMN TABLE 
	PAL_UNIVARIATE_ANALYSIS_DATA_TBL 
	 ("X1" DOUBLE, "X2" DOUBLE, "X3" INTEGER, "X4" VARCHAR(100));
INSERT INTO PAL_UNIVARIATE_ANALYSIS_DATA_TBL VALUES (1.2,   NULL,  1, 'A');
INSERT INTO PAL_UNIVARIATE_ANALYSIS_DATA_TBL VALUES (2.5,   NULL,  2, 'C');
INSERT INTO PAL_UNIVARIATE_ANALYSIS_DATA_TBL VALUES (5.2,   NULL,  3, 'A');
INSERT INTO PAL_UNIVARIATE_ANALYSIS_DATA_TBL VALUES (-10.2, NULL,  2, 'A');
INSERT INTO PAL_UNIVARIATE_ANALYSIS_DATA_TBL VALUES (8.5,   NULL,  2, 'C');
INSERT INTO PAL_UNIVARIATE_ANALYSIS_DATA_TBL VALUES (100,   NULL,  3, 'B');

DROP TABLE #PAL_PARAMETER_TBL;
CREATE LOCAL TEMPORARY COLUMN TABLE #PAL_PARAMETER_TBL(
    "PARAM_NAME" VARCHAR (256),
    "INT_VALUE" INTEGER,
    "DOUBLE_VALUE" DOUBLE,
    "STRING_VALUE" VARCHAR (1000)
);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('SIGNIFICANCE_LEVEL', NULL, 0.05, NULL); --default value is 0.05
INSERT INTO #PAL_PARAMETER_TBL VALUES ('TRIMMED_PERCENTAGE', NULL, 0.2, NULL); --default value is 0.05
INSERT INTO #PAL_PARAMETER_TBL VALUES ('CATEGORY_COL', 3, NULL, NULL); --no default

CALL _SYS_AFL.PAL_UNIVARIATE_ANALYSIS(PAL_UNIVARIATE_ANALYSIS_DATA_TBL, #PAL_PARAMETER_TBL, ?, ?);

