From b16987800b35ba79cf893957b651ef453ba4aa96 Mon Sep 17 00:00:00 2001 From: Florian Schoppmann Date: Fri, 28 Sep 2012 08:47:08 -0700 Subject: [PATCH 1/5] Copied testspec/casespec/kmeans.xml to testspec/casespec/kmeans_new.xml, to track changes compared to old kmeans test cases. --- testspec/casespec/kmeans_new.xml | 3065 ++++++++++++++++++++++++++++++ 1 file changed, 3065 insertions(+) create mode 100755 testspec/casespec/kmeans_new.xml diff --git a/testspec/casespec/kmeans_new.xml b/testspec/casespec/kmeans_new.xml new file mode 100755 index 0000000..4c069f9 --- /dev/null +++ b/testspec/casespec/kmeans_new.xml @@ -0,0 +1,3065 @@ + + + feature + + + kmeans + + + kmeans_cset + + benchmark + TemplateExecutor + + + + kmeans_plusplus + + benchmark + TemplateExecutor + + + + kmeans_random + + benchmark + TemplateExecutor + + + + kmeans_canopy + + benchmark + TemplateExecutor + + + + kmeans_cset_ctas + + benchmark + TemplateExecutor + + + + kmeans_plusplus_ctas + + benchmark + TemplateExecutor + + + + kmeans_random_ctas + + benchmark + TemplateExecutor + + + + kmeans_canopy_ctas + + benchmark + TemplateExecutor + + + + + + kmeans_cset_baseline + It is to get baseline of kmeans using predefined centroids against R. + + 1 + + kmeans_cset + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + dist_metric + l1norml2normcosinetanimoto + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_baseline_out_points + + + out_centroids + madlibtestresult.kmeans_cset_baseline_out_centroids + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_baseline_out_centroids; + + + + + + kmeans_plusplus_baseline + It is to get baseline of kmeans using plus plus against R. + + 1 + + kmeans_plusplus + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + dist_metric + l1norml2normcosinetanimoto + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_baseline_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_baseline_out_centroids + + + k + 6 + + + sample_frac + 0.10 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_baseline_out_centroids; + + + + + + kmeans_random_baseline + It is to get baseline of kmeans using random against R. + + 1 + + kmeans_random + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + dist_metric + l1norml2normcosinetanimoto + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_baseline_out_points + + + out_centroids + madlibtestresult.kmeans_random_baseline_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_baseline_out_centroids; + + + + + + kmeans_canopy_baseline + It is to get baseline of kmeans using canopy against R. + + 1 + + kmeans_canopy + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + dist_metric + l1norml2normcosinetanimoto + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_baseline_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_baseline_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_baseline_out_centroids; + + + + + + kmeans_cset_gof_disabled + It is to test kmeans with gof disabled. + + 1 + + kmeans_cset + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_gof_disabled_out_points + + + out_centroids + madlibtestresult.kmeans_cset_gof_disabled_out_centroids + + + evaluate + False + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_gof_disabled_out_centroids; + + + + + + kmeans_plusplus_gof_disabled + It is to test kmeans with gof disabled. + + 1 + + kmeans_plusplus + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_gof_disabled_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_gof_disabled_out_centroids + + + evaluate + False + + + k + 6 + + + sample_frac + 0.10 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_gof_disabled_out_centroids; + + + + + + kmeans_random_gof_disabled + It is to test kmeans with gof disabled. + + 1 + + kmeans_random + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_gof_disabled_out_points + + + out_centroids + madlibtestresult.kmeans_random_gof_disabled_out_centroids + + + evaluate + False + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_gof_disabled_out_centroids; + + + + + + kmeans_canopy_gof_disabled + It is to test kmeans with gof disabled. + + 1 + + kmeans_canopy + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_gof_disabled_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_gof_disabled_out_centroids + + + evaluate + False + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_gof_disabled_out_centroids; + + + + + + kmeans_cset_maximum_iteration + It is to test kmeans with maximum iteration. + + 1 + + kmeans_cset + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + max_iter + 5102030 + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_maximum_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_cset_maximum_iteration_out_centroids + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_maximum_iteration_out_centroids; + + + + + + kmeans_plusplus_maximum_iteration + It is to test kmeans with maximum iteration. + + 1 + + kmeans_plusplus + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + max_iter + 5102030 + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_maximum_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_maximum_iteration_out_centroids + + + k + 6 + + + sample_frac + 0.10 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_maximum_iteration_out_centroids; + + + + + + kmeans_random_maximum_iteration + It is to test kmeans with maximum iteration. + + 1 + + kmeans_random + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + max_iter + 5102030 + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_maximum_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_random_maximum_iteration_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_maximum_iteration_out_centroids; + + + + + + kmeans_canopy_maximum_iteration + It is to test kmeans with maximum iteration. + + 1 + + kmeans_canopy + + dataset + km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red + + + max_iter + 5102030 + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_maximum_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_maximum_iteration_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_maximum_iteration_out_centroids; + + + + + + kmeans_cset_negative_overwrite + It is to test kmeans with existing output tables. + + 1 + + kmeans_cset + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_overwrite_out_centroids + + + + + + kmeans_cset + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_overwrite_out_centroids + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_overwrite_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_overwrite_out_centroids; + + + + + + kmeans_plusplus_negative_overwrite + It is to test kmeans with existing output tables. + + 1 + + kmeans_plusplus + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_overwrite_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + + + kmeans_plusplus + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_overwrite_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_overwrite_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_overwrite_out_centroids; + + + + + + kmeans_random_negative_overwrite + It is to test kmeans with existing output tables. + + 1 + + kmeans_random + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_overwrite_out_centroids + + + k + 6 + + + + + + kmeans_random + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_overwrite_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_overwrite_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_overwrite_out_centroids; + + + + + + kmeans_canopy_negative_overwrite + It is to test kmeans with existing output tables. + + 1 + + kmeans_canopy + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_overwrite_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + + + kmeans_canopy + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_overwrite_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_overwrite_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_overwrite_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_overwrite_out_centroids; + + + + + + kmeans_cset_negative_src_relation + It is to test kmeans using negative source relation. + + 1 + + kmeans_cset + + src_relation + madlibtestdata.invalid_src_relationNULL + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_src_relation_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_src_relation_out_centroids + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_src_relation_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_src_relation_out_centroids; + + + + + + kmeans_plusplus_negative_src_relation + It is to test kmeans using negative source relation. + + 1 + + kmeans_plusplus + + src_relation + madlibtestdata.invalid_src_relationNULL + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_src_relation_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_src_relation_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_src_relation_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_src_relation_out_centroids; + + + + + + kmeans_random_negative_src_relation + It is to test kmeans using negative source relation. + + 1 + + kmeans_random + + src_relation + madlibtestdata.invalid_src_relationNULL + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_src_relation_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_src_relation_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_src_relation_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_src_relation_out_centroids; + + + + + + kmeans_canopy_negative_src_relation + It is to test kmeans using negative source relation. + + 1 + + kmeans_canopy + + src_relation + madlibtestdata.invalid_src_relationNULL + + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_src_relation_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_src_relation_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_src_relation_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_src_relation_out_centroids; + + + + + + kmeans_cset_negative_src_col_data + It is to test kmeans using negative source column data. + + 1 + + kmeans_cset + + src_col_data + invalid_src_col_dataNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_src_col_data_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_src_col_data_out_centroids + + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_src_col_data_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_src_col_data_out_centroids; + + + + + + kmeans_plusplus_negative_src_col_data + It is to test kmeans using negative source column data. + + 1 + + kmeans_plusplus + + src_col_data + invalid_src_col_dataNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_src_col_data_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_src_col_data_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_src_col_data_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_src_col_data_out_centroids; + + + + + + kmeans_random_negative_src_col_data + It is to test kmeans using negative source column data. + + 1 + + kmeans_random + + src_col_data + invalid_src_col_dataNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_src_col_data_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_src_col_data_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_src_col_data_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_src_col_data_out_centroids; + + + + + + kmeans_canopy_negative_src_col_data + It is to test kmeans using negative source column data. + + 1 + + kmeans_canopy + + src_col_data + invalid_src_col_dataNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_src_col_data_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_src_col_data_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_src_col_data_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_src_col_data_out_centroids; + + + + + + kmeans_cset_negative_src_col_id + It is to test kmeans using negative source column id. + + 1 + + kmeans_cset + + src_col_id + invalid_src_col_idNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + out_points + madlibtestresult.kmeans_cset_negative_src_col_id_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_src_col_id_out_centroids + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_src_col_id_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_src_col_id_out_centroids; + + + + + + kmeans_plusplus_negative_src_col_id + It is to test kmeans using negative source column id. + + 1 + + kmeans_plusplus + + src_col_id + invalid_src_col_idNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + out_points + madlibtestresult.kmeans_plusplus_negative_src_col_id_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_src_col_id_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_src_col_id_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_src_col_id_out_centroids; + + + + + + kmeans_random_negative_src_col_id + It is to test kmeans using negative source column id. + + 1 + + kmeans_random + + src_col_id + invalid_src_col_idNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + out_points + madlibtestresult.kmeans_random_negative_src_col_id_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_src_col_id_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_src_col_id_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_src_col_id_out_centroids; + + + + + + kmeans_canopy_negative_src_col_id + It is to test kmeans using negative source column id. + + 1 + + kmeans_canopy + + src_col_id + invalid_src_col_idNULL + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + out_points + madlibtestresult.kmeans_canopy_negative_src_col_id_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_src_col_id_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_src_col_id_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_src_col_id_out_centroids; + + + + + + kmeans_cset_negative_dist_metric + It is to test kmeans using negative dist metric. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_dist_metric_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_dist_metric_out_centroids + + + dist_metric + invalid_dist_metric + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_dist_metric_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_dist_metric_out_centroids; + + + + + + kmeans_plusplus_negative_dist_metric + It is to test kmeans using negative dist metric. + + 1 + + kmeans_plusplus + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_dist_metric_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_dist_metric_out_centroids + + + dist_metric + invalid_dist_metric + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_dist_metric_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_dist_metric_out_centroids; + + + + + + kmeans_random_negative_dist_metric + It is to test kmeans using negative dist metric. + + 1 + + kmeans_random + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + dist_metric + pid + + + out_points + madlibtestresult.kmeans_random_negative_dist_metric_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_dist_metric_out_centroids + + + dist_metric + invalid_dist_metric + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_dist_metric_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_dist_metric_out_centroids; + + + + + + kmeans_canopy_negative_dist_metric + It is to test kmeans using negative dist metric. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_dist_metric_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_dist_metric_out_centroids + + + dist_metric + invalid_dist_metric + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_dist_metric_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_dist_metric_out_centroids; + + + + + + kmeans_plusplus_negative_k + It is to test kmeans using negative k. + + 1 + + kmeans_plusplus + + k + -10 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_k_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_k_out_centroids + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_k_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_k_out_centroids; + + + + + + kmeans_random_negative_k + It is to test kmeans using negative k. + + 1 + + kmeans_random + + k + -10 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + k + pid + + + out_points + madlibtestresult.kmeans_random_negative_k_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_k_out_centroids + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_k_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_k_out_centroids; + + + + + + kmeans_cset_negative_max_iteration + It is to test kmeans using negative max iteration. + + 1 + + kmeans_cset + + max_iter + -10 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_max_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_max_iteration_out_centroids + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_max_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_max_iteration_out_centroids; + + + + + + kmeans_plusplus_negative_max_iteration + It is to test kmeans using negative max iteration. + + 1 + + kmeans_plusplus + + max_iter + -10 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_max_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_max_iteration_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_max_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_max_iteration_out_centroids; + + + + + + kmeans_random_negative_max_iteration + It is to test kmeans using negative max iteration. + + 1 + + kmeans_random + + max_iter + -10 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_max_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_max_iteration_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_max_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_max_iteration_out_centroids; + + + + + + kmeans_canopy_negative_max_iteration + It is to test kmeans using negative max iteration. + + 1 + + kmeans_canopy + + max_iter + -10 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_max_iteration_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_max_iteration_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_max_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_max_iteration_out_centroids; + + + + + + kmeans_cset_negative_convergence_threshold + It is to test kmeans using negative convergence threshold. + + 1 + + kmeans_cset + + conv_threshold + -0.0001010000 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_convergence_threshold_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_convergence_threshold_out_centroids + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_convergence_threshold_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_convergence_threshold_out_centroids; + + + + + + kmeans_plusplus_negative_convergence_threshold + It is to test kmeans using negative convergence threshold. + + 1 + + kmeans_plusplus + + conv_threshold + -0.0001010000 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_convergence_threshold_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_convergence_threshold_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_convergence_threshold_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_convergence_threshold_out_centroids; + + + + + + kmeans_random_negative_convergence_threshold + It is to test kmeans using negative convergence threshold. + + 1 + + kmeans_random + + conv_threshold + -0.0001010000 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_convergence_threshold_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_convergence_threshold_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_convergence_threshold_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_convergence_threshold_out_centroids; + + + + + + kmeans_canopy_negative_convergence_threshold + It is to test kmeans using negative convergence threshold. + + 1 + + kmeans_canopy + + conv_threshold + -0.0001010000 + + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_convergence_threshold_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_convergence_threshold_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_convergence_threshold_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_convergence_threshold_out_centroids; + + + + + + kmeans_cset_negative_gof_switch + It is to test kmeans using negative gof switch. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_gof_switch_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_gof_switch_out_centroids + + + evaluate + invalid_gof_switch + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_gof_switch_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_gof_switch_out_centroids; + + + + + + kmeans_plusplus_negative_gof_switch + It is to test kmeans using negative gof switch. + + 1 + + kmeans_plusplus + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_gof_switch_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_gof_switch_out_centroids + + + evaluate + invalid_gof_switch + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_gof_switch_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_gof_switch_out_centroids; + + + + + + kmeans_random_negative_gof_switch + It is to test kmeans using negative gof switch. + + 1 + + kmeans_random + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_gof_switch_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_gof_switch_out_centroids + + + evaluate + invalid_gof_switch + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_gof_switch_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_gof_switch_out_centroids; + + + + + + kmeans_canopy_negative_gof_switch + It is to test kmeans using negative gof switch. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_gof_switch_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_gof_switch_out_centroids + + + evaluate + invalid_gof_switch + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_gof_switch_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_gof_switch_out_centroids; + + + + + + kmeans_cset_negative_out_points + It is to test kmeans using negative out points. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + NULL + + + out_centroids + madlibtestresult.kmeans_cset_negative_out_points_out_centroids + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_out_points_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_out_points_out_centroids; + + + + + + kmeans_plusplus_negative_out_points + It is to test kmeans using negative out points. + + 1 + + kmeans_plusplus + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + NULL + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_out_points_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_out_points_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_out_points_out_centroids; + + + + + + kmeans_random_negative_out_points + It is to test kmeans using negative out points. + + 1 + + kmeans_random + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + NULL + + + out_centroids + madlibtestresult.kmeans_random_negative_out_points_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_out_points_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_out_points_out_centroids; + + + + + + kmeans_canopy_negative_out_points + It is to test kmeans using negative out points. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + NULL + + + out_centroids + madlibtestresult.kmeans_canopy_negative_out_points_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_out_points_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_out_points_out_centroids; + + + + + + kmeans_cset_negative_out_centroids + It is to test kmeans using negative out centroids. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_out_centroids_out_points + + + out_centroids + NULL + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_out_centroids_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_out_centroids_out_centroids; + + + + + + kmeans_plusplus_negative_out_centroids + It is to test kmeans using negative out centroids. + + 1 + + kmeans_plusplus + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_out_centroids_out_points + + + out_centroids + NULL + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_out_centroids_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_out_centroids_out_centroids; + + + + + + kmeans_random_negative_out_centroids + It is to test kmeans using negative out centroids. + + 1 + + kmeans_random + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_out_centroids_out_points + + + out_centroids + NULL + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_out_centroids_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_out_centroids_out_centroids; + + + + + + kmeans_canopy_negative_out_centroids + It is to test kmeans using negative out centroids. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_out_centroids_out_points + + + out_centroids + NULL + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_out_centroids_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_out_centroids_out_centroids; + + + + + + kmeans_cset_negative_verbosity + It is to test kmeans using negative verbosity. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_verbosity_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_verbosity_out_centroids + + + verbosity + invalid_verbosity + + + init_cset_rel + madlibtestdata.km_abalone_centroids + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_verbosity_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_verbosity_out_centroids; + + + + + + kmeans_plusplus_negative_verbosity + It is to test kmeans using negative verbosity. + + 1 + + kmeans_plusplus + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_negative_verbosity_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_negative_verbosity_out_centroids + + + verbosity + invalid_verbosity + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_verbosity_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_negative_verbosity_out_centroids; + + + + + + kmeans_random_negative_verbosity + It is to test kmeans using negative verbosity. + + 1 + + kmeans_random + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_negative_verbosity_out_points + + + out_centroids + madlibtestresult.kmeans_random_negative_verbosity_out_centroids + + + verbosity + invalid_verbosity + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_verbosity_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_negative_verbosity_out_centroids; + + + + + + kmeans_canopy_negative_verbosity + It is to test kmeans using negative verbosity. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_verbosity_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_verbosity_out_centroids + + + verbosity + invalid_verbosity + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_verbosity_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_verbosity_out_centroids; + + + + + + kmeans_canopy_negative_t1 + It is to test kmeans using negative t1. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_t1_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_t1_out_centroids + + + t1 + -1 + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_t1_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_t1_out_centroids; + + + + + + kmeans_canopy_negative_t2 + It is to test kmeans using negative t2. + + 1 + + kmeans_canopy + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_negative_t2_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_negative_t2_out_centroids + + + t1 + NULL + + + t2 + -1 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_t2_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_negative_t2_out_centroids; + + + + + + kmeans_cset_negative_init_cset_rel + It is to test kmeans using negative init cset relation. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_init_cset_rel_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_init_cset_rel_out_centroids + + + init_cset_rel + invalid_init_cset_rel + + + init_cset_col + position + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_init_cset_rel_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_init_cset_rel_out_centroids; + + + + + + kmeans_cset_negative_init_cset_col + It is to test kmeans using negative init cset column. + + 1 + + kmeans_cset + + + src_relation + madlibtestdata.km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_negative_init_cset_col_out_points + + + out_centroids + madlibtestresult.kmeans_cset_negative_init_cset_col_out_centroids + + + init_cset_rel + madlibtestdata.km_abalone_precentroids + + + init_cset_col + invalid_init_cset_col + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_init_cset_col_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_negative_init_cset_col_out_centroids; + + + + + + kmeans_cset_ctas + It is to test kmeans using create table as. + + 1 + + kmeans_cset_ctas + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_cset_ctas_out_points + + + out_centroids + madlibtestresult.kmeans_cset_ctas_out_centroids + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_ctas_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_ctas_out_centroids; + + + + + + kmeans_plusplus_ctas + It is to test kmeans using create table as. + + 1 + + kmeans_plusplus_ctas + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_plusplus_ctas_out_points + + + out_centroids + madlibtestresult.kmeans_plusplus_ctas_out_centroids + + + k + 6 + + + sample_frac + 0.01 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_ctas_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_ctas_out_centroids; + + + + + + kmeans_random_ctas + It is to test kmeans using create table as. + + 1 + + kmeans_random_ctas + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_random_ctas_out_points + + + out_centroids + madlibtestresult.kmeans_random_ctas_out_centroids + + + k + 6 + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_random_ctas_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_ctas_out_centroids; + + + + + + kmeans_canopy_ctas + It is to test kmeans using create table as. + + 1 + + kmeans_canopy_ctas + + + dataset + km_abalone + + + src_col_data + position + + + src_col_id + pid + + + out_points + madlibtestresult.kmeans_canopy_ctas_out_points + + + out_centroids + madlibtestresult.kmeans_canopy_ctas_out_centroids + + + t1 + NULL + + + t2 + NULL + + + + DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_ctas_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_ctas_out_centroids; + + + + + + From bfc1c8c70e2be92ba6f952557a447da70a995c13 Mon Sep 17 00:00:00 2001 From: Florian Schoppmann Date: Fri, 28 Sep 2012 08:48:47 -0700 Subject: [PATCH 2/5] Copied (and adapted) existing k-means test cases to new k-means --- testspec/casespec/kmeans_new.xml | 487 ++-------------------------- testspec/metadata/algorithmspec.xml | 464 +++++++++++++++++++++++++- testspec/metadata/dataset.xml | 105 ++++++ 3 files changed, 599 insertions(+), 457 deletions(-) diff --git a/testspec/casespec/kmeans_new.xml b/testspec/casespec/kmeans_new.xml index 4c069f9..91567e9 100755 --- a/testspec/casespec/kmeans_new.xml +++ b/testspec/casespec/kmeans_new.xml @@ -6,56 +6,42 @@ kmeans - kmeans_cset + kmeans_new_cset benchmark TemplateExecutor - kmeans_plusplus + kmeans_new_plusplus benchmark TemplateExecutor - kmeans_random + kmeans_new_random benchmark TemplateExecutor - kmeans_canopy + kmeans_new_cset_ctas benchmark TemplateExecutor - kmeans_cset_ctas + kmeans_new_plusplus_ctas benchmark TemplateExecutor - kmeans_plusplus_ctas - - benchmark - TemplateExecutor - - - - kmeans_random_ctas - - benchmark - TemplateExecutor - - - - kmeans_canopy_ctas + kmeans_new_random_ctas benchmark TemplateExecutor @@ -64,364 +50,81 @@ - kmeans_cset_baseline + kmeans_new_cset_baseline It is to get baseline of kmeans using predefined centroids against R. 1 - kmeans_cset + kmeans_new_cset dataset km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red dist_metric - l1norml2normcosinetanimoto + dist_norm1squared_dist_norm2squared_anglesquared_tanimoto - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_cset_baseline_out_points - - - out_centroids - madlibtestresult.kmeans_cset_baseline_out_centroids - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_baseline_out_centroids; - + + DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_baseline_result; + - - kmeans_plusplus_baseline + + kmeans_new_plusplus_baseline It is to get baseline of kmeans using plus plus against R. 1 - kmeans_plusplus + kmeans_new_plusplus dataset km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red dist_metric - l1norml2normcosinetanimoto + dist_norm1squared_dist_norm2squared_anglesquared_tanimoto - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_plusplus_baseline_out_points - - - out_centroids - madlibtestresult.kmeans_plusplus_baseline_out_centroids - k 6 - - sample_frac - 0.10 - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_baseline_out_centroids; - - - kmeans_random_baseline + + kmeans_new_random_baseline It is to get baseline of kmeans using random against R. 1 - kmeans_random + kmeans_new_random dataset km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red dist_metric - l1norml2normcosinetanimoto + dist_norm1squared_dist_norm2squared_anglesquared_tanimoto - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_random_baseline_out_points - - - out_centroids - madlibtestresult.kmeans_random_baseline_out_centroids - k 6 - - DROP TABLE IF EXISTS madlibtestresult.kmeans_random_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_baseline_out_centroids; - - kmeans_canopy_baseline - It is to get baseline of kmeans using canopy against R. - - 1 - - kmeans_canopy - - dataset - km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red - - - dist_metric - l1norml2normcosinetanimoto - - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_canopy_baseline_out_points - - - out_centroids - madlibtestresult.kmeans_canopy_baseline_out_centroids - - - t1 - NULL - - - t2 - NULL - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_baseline_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_baseline_out_centroids; - - - - - - kmeans_cset_gof_disabled - It is to test kmeans with gof disabled. - - 1 - - kmeans_cset - - dataset - km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red - - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_cset_gof_disabled_out_points - - - out_centroids - madlibtestresult.kmeans_cset_gof_disabled_out_centroids - - - evaluate - False - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_gof_disabled_out_centroids; - - - - - - kmeans_plusplus_gof_disabled - It is to test kmeans with gof disabled. - - 1 - - kmeans_plusplus - - dataset - km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red - - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_plusplus_gof_disabled_out_points - - - out_centroids - madlibtestresult.kmeans_plusplus_gof_disabled_out_centroids - - - evaluate - False - - - k - 6 - - - sample_frac - 0.10 - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_gof_disabled_out_centroids; - - - - - - kmeans_random_gof_disabled - It is to test kmeans with gof disabled. - - 1 - - kmeans_random - - dataset - km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red - - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_random_gof_disabled_out_points - - - out_centroids - madlibtestresult.kmeans_random_gof_disabled_out_centroids - - - evaluate - False - - - k - 6 - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_random_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_gof_disabled_out_centroids; - - - - - - kmeans_canopy_gof_disabled - It is to test kmeans with gof disabled. - - 1 - - kmeans_canopy - - dataset - km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red - - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_canopy_gof_disabled_out_points - - - out_centroids - madlibtestresult.kmeans_canopy_gof_disabled_out_centroids - - - evaluate - False - - - t1 - NULL - - - t2 - NULL - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_gof_disabled_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_gof_disabled_out_centroids; - - - - - - kmeans_cset_maximum_iteration + kmeans_new_cset_maximum_iteration It is to test kmeans with maximum iteration. 1 - kmeans_cset + kmeans_new_cset dataset km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red @@ -430,37 +133,20 @@ max_iter 5102030 - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_cset_maximum_iteration_out_points - - - out_centroids - madlibtestresult.kmeans_cset_maximum_iteration_out_centroids - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_maximum_iteration_out_centroids; - + + dist_metric + dist_norm1squared_dist_norm2squared_anglesquared_tanimoto + - kmeans_plusplus_maximum_iteration + kmeans_new_plusplus_maximum_iteration It is to test kmeans with maximum iteration. 1 - kmeans_plusplus + kmeans_new_plusplus dataset km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red @@ -470,44 +156,21 @@ 5102030 - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_plusplus_maximum_iteration_out_points - - - out_centroids - madlibtestresult.kmeans_plusplus_maximum_iteration_out_centroids - k 6 - - sample_frac - 0.10 - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_plusplus_maximum_iteration_out_centroids; - - - kmeans_random_maximum_iteration + + kmeans_new_random_maximum_iteration It is to test kmeans with maximum iteration. 1 - kmeans_random + kmeans_new_random dataset km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red @@ -517,77 +180,11 @@ 5102030 - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_random_maximum_iteration_out_points - - - out_centroids - madlibtestresult.kmeans_random_maximum_iteration_out_centroids - k 6 - - DROP TABLE IF EXISTS madlibtestresult.kmeans_random_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_random_maximum_iteration_out_centroids; - - - - - - kmeans_canopy_maximum_iteration - It is to test kmeans with maximum iteration. - - 1 - - kmeans_canopy - - dataset - km_abalonekm_movement_libraskm_us_census_1990km_water_treatmentkm_winekm_winequality_red - - - max_iter - 5102030 - - - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_canopy_maximum_iteration_out_points - - - out_centroids - madlibtestresult.kmeans_canopy_maximum_iteration_out_centroids - - - t1 - NULL - - - t2 - NULL - - - - DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_maximum_iteration_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_canopy_maximum_iteration_out_centroids; - @@ -2902,36 +2499,20 @@ - kmeans_cset_ctas + kmeans_new_cset_ctas It is to test kmeans using create table as. 1 - kmeans_cset_ctas + kmeans_new_cset_ctas dataset km_abalone - - src_col_data - position - - - src_col_id - pid - - - out_points - madlibtestresult.kmeans_cset_ctas_out_points - - - out_centroids - madlibtestresult.kmeans_cset_ctas_out_centroids - - DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_ctas_out_points;DROP TABLE IF EXISTS madlibtestresult.kmeans_cset_ctas_out_centroids; + DROP TABLE IF EXISTS madlibtestresult.kmeans_new_cset_ctas_abalone; diff --git a/testspec/metadata/algorithmspec.xml b/testspec/metadata/algorithmspec.xml index b1b3b5c..93d0974 100755 --- a/testspec/metadata/algorithmspec.xml +++ b/testspec/metadata/algorithmspec.xml @@ -3317,7 +3317,73 @@ - + + kmeans_new_cset + true + + + src_relation + text + + + src_col_data + text + + + dist_metric + text + squared_dist_norm2 + + + agg_mean + text + avg + + + max_iter + int + 20 + + + conv_threshold + float + 0.001 + + + init_cset_rel + text + + + init_cset_col + text + + + + objective_fn + double precision + + + frac_reassigned + double precision + + + num_iterations + integer + + + + kmeans_plusplus true