The content explains the concept of partitioning an input dataset into three subsets: training, testing, and validation sets. There are two types of partitions: random partition and stratified partition. In a random partition, the data is divided randomly. In a stratified partition, the data is divided based on categorical attributes, ensuring that all categories are represented in the subsets.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_PARTITION_DATA_TBL;
CREATE COLUMN TABLE PAL_PARTITION_DATA_TBL(
     "ID" INTEGER,
     "HomeOwner" VARCHAR (100), 
     "MaritalStatus" VARCHAR (100),
     "AnnualIncome" DOUBLE,
     "DefaultedBorrower" VARCHAR (100)
);
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (0,  'YES', 'Single',   125, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (1,  'NO',  'Married',  100, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (2,  'NO',  'Single',    70, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (3,  'YES', 'Married',  120, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (4,  'NO',  'Divorced',  95, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (5,  'NO',  'Married',   60, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (6,  'YES', 'Divorced', 220, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (7,  'NO',  'Single',    85, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (8,  'NO',  'Married',   75, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (9,  'NO',  'Single',    90, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (10, 'YES', 'Single',   125, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (11, 'NO',  'Married',  100, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (12, 'NO',  'Single',    70, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (13, 'YES', 'Married',  120, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (14, 'NO',  'Divorced',  95, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (15, 'NO',  'Married',   60, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (16, 'YES', 'Divorced', 220, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (17, 'NO',  'Single',    85, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (18, 'NO',  'Married',   75, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (19, 'NO',  'Single',    90, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (20, 'YES', 'Single',   125, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (21, 'NO',  'Married',  100, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (22, 'NO',  'Single',    70, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (23, 'YES', 'Married',  120, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (24, 'NO',  'Divorced',  95, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (25, 'NO',  'Married',   60, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (26, 'YES', 'Divorced', 220, 'NO');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (27, 'NO',  'Single',    85, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (28, 'NO',  'Married',   75, 'YES');
INSERT INTO PAL_PARTITION_DATA_TBL VALUES (29, 'NO',  'Single',    90, 'YES');

DROP TABLE #PAL_PARAMETER_TBL;
CREATE LOCAL TEMPORARY COLUMN TABLE #PAL_PARAMETER_TBL (
    "PARAM_NAME" VARCHAR (256),
    "INT_VALUE" INTEGER,
    "DOUBLE_VALUE" DOUBLE,
    "STRING_VALUE" VARCHAR (1000)
);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('PARTITION_METHOD',0,null,null); 
INSERT INTO #PAL_PARAMETER_TBL VALUES ('RANDOM_SEED',23,null,null);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('TRAINING_PERCENT', null,0.6,null);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('TESTING_PERCENT', null,0.2,null);
INSERT INTO #PAL_PARAMETER_TBL VALUES ('VALIDATION_PERCENT', null,0.2,null);

CALL "_SYS_AFL"."PAL_PARTITION"(PAL_PARTITION_DATA_TBL, #PAL_PARAMETER_TBL, ?);

