Kernel Density Estimation (KDE) is a popular technique used in unsupervised learning, feature engineering, and data modeling. It is similar to histograms but overcomes some of their limitations. KDE uses a kernel function, controlled by a bandwidth parameter, to estimate the density of data points within a group. The bandwidth determines the tradeoff between bias and variance in the result. There are six different kernel functions supported in PAL, including Gaussian, Tophat, Epanechnikov, Exponential, Linear, and Cosine kernels. These kernels can be used in both one-dimensional and multidimensional cases, with isotropic kernel densities considered in the latter. To speed up the calculation process, PAL provides data structures based on KD-tree and Ball-tree.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_KDE_DATA_TBL;
CREATE COLUMN TABLE PAL_KDE_DATA_TBL (
    "ID" INTEGER,
    "X1" DOUBLE,
    "X2" DOUBLE
);
INSERT INTO PAL_KDE_DATA_TBL VALUES (0, -0.42576979,-1.39613035);
INSERT INTO PAL_KDE_DATA_TBL VALUES (1, 0.88410039, 1.3814935);
INSERT INTO PAL_KDE_DATA_TBL VALUES (2, 0.13412623,-0.03222389);
INSERT INTO PAL_KDE_DATA_TBL VALUES (3, 0.84550359, 2.86792078);
INSERT INTO PAL_KDE_DATA_TBL VALUES (4, 0.28844078, 1.51333705);
INSERT INTO PAL_KDE_DATA_TBL VALUES (5, -0.66678474, 1.24498042);
INSERT INTO PAL_KDE_DATA_TBL VALUES (6, -2.10296835,-1.42832694);
INSERT INTO PAL_KDE_DATA_TBL VALUES (7, 0.76990237,-0.47300711);
INSERT INTO PAL_KDE_DATA_TBL VALUES (8, 0.21029135, 0.32843074);
INSERT INTO PAL_KDE_DATA_TBL VALUES (9, 0.48232251,-0.43796174);

DROP TABLE PAL_KDE_CLASSDATA_TBL;
CREATE COLUMN TABLE PAL_KDE_CLASSDATA_TBL(
    "ID" INTEGER,
    "X1" DOUBLE,
    "X2" DOUBLE
);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (0, -2.10296835,-1.42832694);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (1, -2.10296835, 0.71979692);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (2, -2.10296835, 2.86792078);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (3, -0.60943398,-1.42832694);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (4, -0.60943398, 0.71979692);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (5, -0.60943398, 2.86792078);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (6, 0.88410039,-1.42832694);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (7, 0.88410039, 0.71979692);
INSERT INTO PAL_KDE_CLASSDATA_TBL VALUES (8, 0.88410039, 2.86792078);


DROP TABLE #PAL_PARAMETER_TBL;
CREATE LOCAL TEMPORARY COLUMN TABLE #PAL_PARAMETER_TBL (
    "PARAM_NAME" NVARCHAR(256),
    "INT_VALUE" INTEGER, 
    "DOUBLE_VALUE" DOUBLE, 
    "STRING_VALUE" NVARCHAR (1000)
);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('THREAD_RATIO', NULL, 0, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('BUCKET_SIZE', 10, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('METHOD', 1, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('BANDWIDTH', NULL, 0.68129, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('DISTANCE_LEVEL', 2, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('KERNEL', 0, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('STAT_INFO', 1, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('"ABSOLUTE_RESULT_TOLERANCE"', NULL, 0, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('RELATIVE_RESULT_TOLERANCE', NULL, 1E-8, NULL);

CALL _SYS_AFL.PAL_KDE (PAL_KDE_DATA_TBL, PAL_KDE_CLASSDATA_TBL, "#PAL_PARAMETER_TBL", ?, ?);

