The goal is to predict the segmentation or clustering of new customers for a supermarket. The approach involves using the K-means function to segment existing customers and then using the output as training data for the C4.5 Decision Tree function to predict the segmentation of new customers.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_KMEANS_DATA_TBL;
CREATE COLUMN TABLE PAL_KMEANS_DATA_TBL(
    "ID" INT,
    "AGE" DOUBLE,
    "INCOME" DOUBLE
);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (0 , 20, 100000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (1 , 21, 101000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (2 , 22, 102000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (3 , 30, 200000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (4 , 31, 201000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (5 , 32, 202000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (6 , 40, 400000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (7 , 41, 401000);
INSERT INTO PAL_KMEANS_DATA_TBL VALUES (8 , 42, 402000);

DROP TABLE PAL_KMEANS_PARAMETER_TBL;
CREATE COLUMN TABLE PAL_KMEANS_PARAMETER_TBL (
    "PARAM_NAME" VARCHAR (256),
    "INT_VALUE" INTEGER,
    "DOUBLE_VALUE" DOUBLE,
    "STRING_VALUE" VARCHAR (1000)
);
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('FUNCTION', NULL, NULL, 'KMEANS');
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('N_CLUSTERS', 3, NULL, NULL);
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('INIT', 1, NULL, NULL);
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('DISTANCE_LEVEL', 2, NULL, NULL);
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('MAX_ITER', 100, NULL, NULL);
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('TOL', NULL, 0.000001, NULL);
INSERT INTO PAL_KMEANS_PARAMETER_TBL VALUES ('NORMALIZATION', 0, NULL, NULL);

DROP TABLE PAL_KMEANS_RESASSIGN_TBL;
CREATE COLUMN TABLE PAL_KMEANS_RESASSIGN_TBL (
    "ID" INT,
    "CENTER_ASSIGN" INT,
    "DISTANCE" DOUBLE,
    "SLIGHT_SILHOUETTE" DOUBLE
);

DROP TABLE PAL_KMEANS_CENTERS_TBL;
CREATE COLUMN TABLE PAL_KMEANS_CENTERS_TBL (
    "CENTER_ID" INT,
    "V000" NVARCHAR(1000),
    "V001" NVARCHAR(1000)
);

DO BEGIN
  lt_data = SELECT * FROM PAL_KMEANS_DATA_TBL;
  lt_param = SELECT * FROM PAL_KMEANS_PARAMETER_TBL;
  CALL "_SYS_AFL"."PAL_UNIFIED_CLUSTERING"(:lt_data, :lt_param, lt_assignment, lt_center, lt_model, lt_stat, lt_opt, lt_ph1, lt_ph2);
  INSERT INTO PAL_KMEANS_RESASSIGN_TBL SELECT * FROM :lt_assignment;
  INSERT INTO PAL_KMEANS_CENTERS_TBL SELECT * FROM :lt_center;
END;

DROP TABLE PAL_KMEANS_RESULT_TBL;
CREATE COLUMN TABLE PAL_KMEANS_RESULT_TBL(
	"AGE" DOUBLE,
	"INCOME" DOUBLE,
	"LEVEL" VARCHAR (10)
);
TRUNCATE TABLE PAL_KMEANS_RESULT_TBL;

INSERT INTO PAL_KMEANS_RESULT_TBL(
	SELECT PAL_KMEANS_DATA_TBL.AGE, PAL_KMEANS_DATA_TBL.INCOME, PAL_KMEANS_RESASSIGN_TBL.CENTER_ASSIGN
	FROM PAL_KMEANS_RESASSIGN_TBL
	INNER JOIN PAL_KMEANS_DATA_TBL
		ON PAL_KMEANS_RESASSIGN_TBL.ID = PAL_KMEANS_DATA_TBL.ID
);

SELECT * FROM PAL_KMEANS_RESULT_TBL;
