The content discusses hybrid gradient boosting tree (HGBT), which is an ensemble machine learning technique used for regression and classification problems. HGBT builds the model in a stage-wise fashion and optimizes loss functions by building new classification or regression trees based on negative gradient. The final score is the weighted summation of each iteration's regression tree. HGBT supports mixed feature types, various loss criteria, regularization, and model evaluation. It also processes categorical features directly instead of using one-hot encoding. The prerequisites for using HGBT include using a column table to store the tree model, having an ID column in the scoring data, and providing all variables used in the training stage in the scoring data.
------

set schema DM_PAL;

drop table PAL_HGBT_DATA_TAB;
create column table PAL_HGBT_DATA_TAB (
	"ATT1" double,
	"ATT2" double,
	"ATT3" double,
	"ATT4" double,
	"LABEL" nvarchar(50)
);
insert into PAL_HGBT_DATA_TAB values (1.0, 10.0, 100, 1.0, 'A');
insert into PAL_HGBT_DATA_TAB values (1.1, 10.1, 100, 1.0, 'A');
insert into PAL_HGBT_DATA_TAB values (1.2, 10.2, 100, 1.0, 'A');
insert into PAL_HGBT_DATA_TAB values (1.3, 10.4, 100, 1.0, 'A');
insert into PAL_HGBT_DATA_TAB values (1.2, 10.3, 100, 1.0, 'A');
insert into PAL_HGBT_DATA_TAB values (4.0, 40.0, 400, 4.0, 'B');
insert into PAL_HGBT_DATA_TAB values (4.1, 40.1, 400, 4.0, 'B');
insert into PAL_HGBT_DATA_TAB values (4.2, 40.2, 400, 4.0, 'B');
insert into PAL_HGBT_DATA_TAB values (4.3, 40.4, 400, 4.0, 'B');
insert into PAL_HGBT_DATA_TAB values (4.2, 40.3, 400, 4.0, 'A');
insert into PAL_HGBT_DATA_TAB values (9.0, 90.0, 900, 2.0, 'A');
insert into PAL_HGBT_DATA_TAB values (9.1, 90.1, 900, 1.0, 'B');
insert into PAL_HGBT_DATA_TAB values (9.2, 90.2, 900, 2.0, 'B');
insert into PAL_HGBT_DATA_TAB values (9.3, 90.4, 900, 1.0, 'B');
insert into PAL_HGBT_DATA_TAB values (9.2, 90.3, 900, 1.0, 'B');

drop table PAL_PARAMETER_TAB;
create column table PAL_PARAMETER_TAB (
	"PARAM_NAME" nvarchar(100),
	"INT_VALUE" integer,
	"DOUBLE_VALUE" double,
	"STRING_VALUE" nvarchar(100)
);
insert into PAL_PARAMETER_TAB values ('SPLIT_METHOD',  null, null, 'exact');
insert into PAL_PARAMETER_TAB values ('RESAMPLING_METHOD', null, null, 'cv');
insert into PAL_PARAMETER_TAB values ('FOLD_NUM', 5, null, null);
insert into PAL_PARAMETER_TAB values ('PARAM_SEARCH_STRATEGY', null, null, 'grid');
insert into PAL_PARAMETER_TAB values ('EVALUATION_METRIC', null, null, 'ERROR_RATE');
insert into PAL_PARAMETER_TAB values ('REF_METRIC', null, null, 'AUC');
insert into PAL_PARAMETER_TAB values ('SEED', 1, null, null);

insert into PAL_PARAMETER_TAB values ('MAX_DEPTH_VALUES', null, null, '{3, 5, 6}');
insert into PAL_PARAMETER_TAB values ('ITER_NUM_RANGE', null, null, '[4, 2, 10]');
insert into PAL_PARAMETER_TAB values ('ETA_RANGE', null, null, '[0.1, 0.2, 1.0]');
insert into PAL_PARAMETER_TAB values ('GAMMA_RANGE', null, null, '[0.1, 0.2, 1.0]');

drop table PAL_HGBT_MODEL_TAB; -- for predict followed
create column table PAL_HGBT_MODEL_TAB (
	"ROW_INDEX" integer,
	"TREE_INDEX" integer,
	"MODEL_CONTENT" nclob
);

do begin
	LT_DATA_TAB = select * from PAL_HGBT_DATA_TAB;
	LT_PARAM_TAB = select * from PAL_PARAMETER_TAB;
	call _SYS_AFL.PAL_HGBT(:LT_DATA_TAB, :LT_PARAM_TAB, LT_MODEL_TAB, LT_IMP_TAB, LT_CONFUSE_TAB, LT_STAT_TAB, LT_CV_TAB);
	select * from :LT_MODEL_TAB;
	select * from :LT_IMP_TAB;
	select * from :LT_CONFUSE_TAB;
	select * from :LT_STAT_TAB;
	select * from :LT_CV_TAB;
	insert into PAL_HGBT_MODEL_TAB select * from :LT_MODEL_TAB;
end;
