Term Analysis is a function that calculates TF-IDF values for input documents. TF-IDF, or term frequency–inverse document frequency, is a numerical statistic that measures the importance of a word in a document within a collection or corpus. It is calculated by considering the frequency of the word in the document and the inverse document frequency across a set of documents. The TF-IDF value increases with the frequency of the word in the document but is adjusted based on how frequently the word appears in the corpus. This function supports multiple languages including English, German, Spanish, French, Russian, and Portuguese.
------

set schema DM_PAL;

drop table PAL_TFIDF_DATA_TAB;
create column table PAL_TFIDF_DATA_TAB (
"ID" nvarchar(1000),
"CONTENT" nvarchar(1000),
"CATEGORY" nvarchar(1000)
);

INSERT INTO PAL_TFIDF_DATA_TAB VALUES('doc1','term1 term2 term2 term3 term3 term3','CATEGORY_1');
INSERT INTO PAL_TFIDF_DATA_TAB VALUES('doc2','term2 term3 term3 term4 term4 term4','CATEGORY_1');
INSERT INTO PAL_TFIDF_DATA_TAB VALUES('doc3','term3 term4 term4 term5 term5 term5','CATEGORY_2');
INSERT INTO PAL_TFIDF_DATA_TAB VALUES('doc5','term3 term4 term4 term5 term5 term5 term5 term5 term5','CATEGORY_2');
INSERT INTO PAL_TFIDF_DATA_TAB VALUES('doc4','term4 term6','CATEGORY_3');
INSERT INTO PAL_TFIDF_DATA_TAB VALUES('doc6','term4 term6 term6 term6','CATEGORY_3');


drop table PAL_PARAMETER_TAB;
create column table PAL_PARAMETER_TAB (
"PARAM_NAME" nvarchar(256),
"INT_VALUE" integer,
"DOUBLE_VALUE" double,
"STRING_VALUE" nvarchar(1000)
);

drop table PAL_TM_TERM_TAB;
create column table PAL_TM_TERM_TAB ("TM_TERMS" nvarchar(1000),"TM_TERM_FREQUENCY" integer,"TM_IDF_FREQUENCY" integer,"TF_VALUE" double,"IDF_VALUE" double);

drop table PAL_TM_DOC_TERM_FREQ_TAB;
create column table PAL_TM_DOC_TERM_FREQ_TAB ("ID" nvarchar(1000),"TM_TERMS" nvarchar(1000),"TM_TERM_FREQUENCY" integer);

drop table PAL_TM_CATE_TAB;
create column table PAL_TM_CATE_TAB ("ID" nvarchar(1000),"CATEGORY" nvarchar(1000));

DO BEGIN
	lt_data = SELECT * FROM PAL_TFIDF_DATA_TAB;
	lt_param = SELECT * FROM PAL_PARAMETER_TAB;
	CALL _SYS_AFL.PAL_TF_ANALYSIS(:lt_data,:lt_param,lt_term,lt_docterm,lt_cat);
	INSERT INTO PAL_TM_TERM_TAB SELECT * FROM :lt_term;
	INSERT INTO PAL_TM_DOC_TERM_FREQ_TAB SELECT * FROM :lt_docterm;
	INSERT INTO PAL_TM_CATE_TAB SELECT * FROM :lt_cat;
END;

--------------------------------------------

select * from PAL_TM_TERM_TAB;
select * from PAL_TM_DOC_TERM_FREQ_TAB;
select * from PAL_TM_CATE_TAB;
