SMOTETomek is a class that combines the techniques of over-sampling using SMOTE and under-sampling using Tomek links. It is used to balance imbalanced datasets by generating synthetic samples with SMOTE and removing the majority class samples that are close to the minority class samples using Tomek links.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_SMOTETOMEK_DATA_TBL;
CREATE COLUMN TABLE PAL_SMOTETOMEK_DATA_TBL (
    "X1" INTEGER,
    "X2" DOUBLE,
    "X3" DOUBLE,
    "TYPE" INTEGER
);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (2, 1, 3.5, 1);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (3, 10, 7.6, 1);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (3, 10, 5.5, 2);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (3, 10, 4.7, 1);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (7, 1000, 8.5, 1);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (8, 1000, 9.4, 2);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (6, 1000, 0.34, 1);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (8, 999, 7.4, 2);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (7, 999, 3.5, 1);
INSERT INTO PAL_SMOTETOMEK_DATA_TBL VALUES (6, 1000, 7, 1);


DROP TABLE #PAL_PARAMETER_TBL;
CREATE LOCAL TEMPORARY COLUMN TABLE #PAL_PARAMETER_TBL (
    "PARAM_NAME" NVARCHAR(256),
    "INT_VALUE" INTEGER, 
    "DOUBLE_VALUE" DOUBLE, 
    "STRING_VALUE" NVARCHAR (1000)
);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('THREAD_RATIO', NULL, 0.1, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('RANDOM_SEED', 1, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('DEPENDENT_VARIABLE', NULL, NULL, 'TYPE');
INSERT INTO #PAL_PARAMETER_TBL VALUES ('MINORITY_CLASS', NULL, NULL, '2');
INSERT INTO #PAL_PARAMETER_TBL VALUES ('SMOTE_AMOUNT', 200, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('K_NEAREST_NEIGHBOURS', 2, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('METHOD', 1, NULL, NULL);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('SAMPLING_STRATEGY', 3, NULL, NULL);


CALL _SYS_AFL.PAL_SMOTETOMEK(PAL_SMOTETOMEK_DATA_TBL, "#PAL_PARAMETER_TBL", ?);

