K-medians is a clustering algorithm that is similar to K-means. It partitions observations into clusters based on their nearest cluster center. However, instead of using the means of each feature to calculate cluster centers, K-medians uses the medians. The algorithm alternates between assigning observations to the closest cluster and updating the cluster centers until the assignments no longer change. The implementation of K-medians in PAL supports multi-threads, data normalization, and different distance level measurements. It does not support categorical data directly, but it can be managed through data transformation. Categorical attributes are converted into binary vectors and treated as numerical attributes. The Euclidean distance between observations is calculated, with a weight given to the transposed categorical attributes to lessen their impact on clustering.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_KMEDIANS_DATA_TBL;
CREATE COLUMN TABLE PAL_KMEDIANS_DATA_TBL(
	"ID" INTEGER,
	"V000" DOUBLE,
	"V001" VARCHAR(5),
	"V002" DOUBLE
);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (0 , 0.5, 'A', 0.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (1 , 1.5, 'A', 0.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (2 , 1.5, 'A', 1.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (3 , 0.5, 'A', 1.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (4 , 1.1, 'B', 1.2);

INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (5 , 0.5, 'B', 15.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (6 , 1.5, 'B', 15.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (7 , 1.5, 'B', 16.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (8 , 0.5, 'B', 16.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (9 , 1.2, 'C', 16.1);

INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (10, 15.5, 'C', 15.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (11, 16.5, 'C', 15.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (12, 16.5, 'C', 16.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (13, 15.5, 'C', 16.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (14, 15.6, 'D', 16.2);

INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (15, 15.5, 'D', 0.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (16, 16.5, 'D', 0.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (17, 16.5, 'D', 1.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (18, 15.5, 'D', 1.5);
INSERT INTO PAL_KMEDIANS_DATA_TBL VALUES (19, 15.7, 'A', 1.6);

DROP TABLE #PAL_PARAMETER_TBL;
CREATE LOCAL TEMPORARY COLUMN TABLE #PAL_PARAMETER_TBL(
	"PARAM_NAME" NVARCHAR(256), 
	"INT_VALUE" INTEGER, 
	"DOUBLE_VALUE" DOUBLE, 
	"STRING_VALUE" NVARCHAR(1000)
);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('THREAD_RATIO', NULL, 0.3, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('GROUP_NUMBER', 4, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('INIT_TYPE', 1, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('DISTANCE_LEVEL', 2, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('MAX_ITERATION', 100, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('EXIT_THRESHOLD', NULL, 1.0E-6, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('NORMALIZATION', 0, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('CATEGORY_WEIGHTS', NULL, 0.5, NULL);

CALL _SYS_AFL.PAL_KMEDIANS(PAL_KMEDIANS_DATA_TBL, "#PAL_PARAMETER_TBL", ?, ?);

