Linear Discriminant Analysis (LDA) is a statistical method used for classification and dimensionality reduction. In the PAL implementation of LDA, there are three procedures: PAL_LINEAR_DISCRIMINANT_ANALYSIS, PAL_LINEAR_DISCRIMINANT_ANALYSIS_CLASSIFY, and PAL_LINEAR_DISCRIMINANT_ANALYSIS_PROJECT.

The main procedure, PAL_LINEAR_DISCRIMINANT_ANALYSIS, performs LDA on a given dataset X with labels Y and returns a classifier that can be used for further classification, a projection model that can reduce the dimension of X, and other basic information.

LDA assumes that the samples within each class follow a normal distribution with different means but the same covariance matrix. The model parameters, including the means and covariance matrix, are estimated using empirical estimation in PAL_LINEAR_DISCRIMINANT_ANALYSIS.

To classify an unlabeled sample, LDA computes the posterior probability of each class for the sample and assigns it to the class with the highest probability.

The projection matrix V, which can be used to reduce the dimension of the dataset, is also computed in PAL_LINEAR_DISCRIMINANT_ANALYSIS. It maximizes the separation between classes after the data is projected.

In some cases, when the number of features exceeds the number of samples in each class, the covariance estimates may be rank deficient. PAL_LINEAR_DISCRIMINANT_ANALYSIS provides options to handle these cases, such as using regularized covariance or the pseudo inverse.

PAL_LINEAR_DISCRIMINANT_ANALYSIS handles missing data and missing labels by filling in the missing values or ignoring the corresponding samples.

PAL_LINEAR_DISCRIMINANT_ANALYSIS_CLASSIFY and PAL_LINEAR_DISCRIMINANT_ANALYSIS_PROJECT handle missing data by filling in zeros and ignore missing IDs.

Overall, LDA in PAL provides a comprehensive implementation of the method for classification and dimensionality reduction, with options to handle various scenarios.
------

SET SCHEMA DM_PAL;

DROP TABLE PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL;
CREATE COLUMN TABLE PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL ("X1" DOUBLE, "X2" DOUBLE, "X3" DOUBLE, "X4" DOUBLE, "CLASS" NVARCHAR(100));
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.1, 3.5, 1.4, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.9, 3.0, 1.4, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.7, 3.2, 1.3, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.6, 3.1, 1.5, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.0, 3.6, 1.4, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.4, 3.9, 1.7, 0.4, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.6, 3.4, 1.4, 0.3, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.0, 3.4, 1.5, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.4, 2.9, 1.4, 0.2, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.9, 3.1, 1.5, 0.1, 'Iris-setosa');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (7.0, 3.2, 4.7, 1.4, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.4, 3.2, 4.5, 1.5, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.9, 3.1, 4.9, 1.5, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.5, 2.3, 4.0, 1.3, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.5, 2.8, 4.6, 1.5, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.7, 2.8, 4.5, 1.3, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.3, 3.3, 4.7, 1.6, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.9, 2.4, 3.3, 1.0, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.6, 2.9, 4.6, 1.3, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.2, 2.7, 3.9, 1.4, 'Iris-versicolor');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.3, 3.3, 6.0, 2.5, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (5.8, 2.7, 5.1, 1.9, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (7.1, 3.0, 5.9, 2.1, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.3, 2.9, 5.6, 1.8, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.5, 3.0, 5.8, 2.2, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (7.6, 3.0, 6.6, 2.1, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (4.9, 2.5, 4.5, 1.7, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (7.3, 2.9, 6.3, 1.8, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (6.7, 2.5, 5.8, 1.8, 'Iris-virginica');
INSERT INTO PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL VALUES (7.2, 3.6, 6.1, 2.5, 'Iris-virginica');

DROP TABLE PAL_PARAMETER_TBL;
CREATE COLUMN TABLE PAL_PARAMETER_TBL ("PARAM_NAME" VARCHAR(256), "INT_VALUE" INTEGER, "DOUBLE_VALUE" DOUBLE, "STRING_VALUE" VARCHAR(1000));
INSERT INTO PAL_PARAMETER_TBL VALUES ('DO_PROJECTION', 1, NULL, NULL);

DROP TABLE PAL_LDA_BASIC_INFO_TBL;
DROP TABLE PAL_LDA_PRIORS_TBL;
DROP TABLE PAL_LDA_CLASSIFIER_TBL;
DROP TABLE PAL_LDA_PROJECTION_INFO_TBL;
DROP TABLE PAL_LDA_PROJECTION_MODEL_TBL;
CREATE COLUMN TABLE PAL_LDA_BASIC_INFO_TBL ("NAME" NVARCHAR(100), VALUE DOUBLE);
CREATE COLUMN TABLE PAL_LDA_PRIORS_TBL ("CLASS" NVARCHAR(100), "PRIOR" DOUBLE);
CREATE COLUMN TABLE PAL_LDA_CLASSIFIER_TBL ("CLASS" NVARCHAR(100), "COEFF_X1" DOUBLE, "COEFF_X2" DOUBLE, "COEFF_X3" DOUBLE, "COEFF_X4" DOUBLE, "INTERCEPT" DOUBLE);
CREATE COLUMN TABLE PAL_LDA_PROJECTION_INFO_TBL ("DISCRIMINANT_ID" NVARCHAR(100), "SD" DOUBLE, "VAR_PROP" DOUBLE, "CUM_VAR_PROP" DOUBLE);
CREATE COLUMN TABLE PAL_LDA_PROJECTION_MODEL_TBL ("NAME" NVARCHAR(100), "X1" DOUBLE, "X2" DOUBLE, "X3" DOUBLE, "X4" DOUBLE);

DO BEGIN
  lt_data = SELECT * FROM PAL_LINEAR_DISCRIMINANT_ANALYSIS_DATA_TBL;
  lt_param = SELECT * FROM PAL_PARAMETER_TBL;
  CALL _SYS_AFL.PAL_LINEAR_DISCRIMINANT_ANALYSIS (:lt_data, :lt_param, lt_basic_info, lt_priors, lt_classifier, lt_projection_info, lt_model);
  INSERT INTO PAL_LDA_BASIC_INFO_TBL SELECT * FROM :lt_basic_info;
  INSERT INTO PAL_LDA_PRIORS_TBL SELECT * FROM :lt_priors;
  INSERT INTO PAL_LDA_CLASSIFIER_TBL SELECT * FROM :lt_classifier;
  INSERT INTO PAL_LDA_PROJECTION_INFO_TBL SELECT * FROM :lt_projection_info;
  INSERT INTO PAL_LDA_PROJECTION_MODEL_TBL SELECT * FROM :lt_model;
END;
SELECT * FROM PAL_LDA_BASIC_INFO_TBL;
SELECT * FROM PAL_LDA_PRIORS_TBL;
SELECT * FROM PAL_LDA_CLASSIFIER_TBL;
SELECT * FROM PAL_LDA_PROJECTION_INFO_TBL;
SELECT * FROM PAL_LDA_PROJECTION_MODEL_TBL;

