diff --git a/.gitignore b/.gitignore index 8d826cfc6..e14a58638 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ /tests/StackName.stack /tests/ChainName.chain /tests/ClusterName.cluster +dist/ .coverage htmlcov/ @@ -23,12 +24,13 @@ temp.chain tests/Example Data (A).cache *.css *.ttf +*.js *.eot *.svg *.woff + *.js -docs/API/_build/ docs/API/_static/ docs/API/_templates/ @@ -36,5 +38,5 @@ docs/API/_templates/ *.egg .eggs/ .cache - -build \ No newline at end of file +.pytest_cache/* +build diff --git a/.pytest_cache/README.md b/.pytest_cache/README.md new file mode 100644 index 000000000..bb78ba07e --- /dev/null +++ b/.pytest_cache/README.md @@ -0,0 +1,8 @@ +# pytest cache directory # + +This directory contains data from the pytest's cache plugin, +which provides the `--lf` and `--ff` options, as well as the `cache` fixture. + +**Do not** commit this to version control. + +See [the docs](https://docs.pytest.org/en/latest/cache.html) for more information. diff --git a/.pytest_cache/v/cache/nodeids b/.pytest_cache/v/cache/nodeids new file mode 100644 index 000000000..203477c9a --- /dev/null +++ b/.pytest_cache/v/cache/nodeids @@ -0,0 +1,278 @@ +[ + "tests/test_banked_chains.py::TestBankedChains::test_banked_chain_structure_unweighted", + "tests/test_banked_chains.py::TestBankedChains::test_banked_chain_structure_weighted", + "tests/test_banked_chains.py::TestBankedChains::test_cluster_add_chain", + "tests/test_banked_chains.py::TestBankedChains::test_verify_banked_chain", + "tests/test_batch.py::TestBatch::test_add_crossbreak", + "tests/test_batch.py::TestBatch::test_add_downbreak", + "tests/test_batch.py::TestBatch::test_add_filter", + "tests/test_batch.py::TestBatch::test_add_open_ends", + "tests/test_batch.py::TestBatch::test_add_y_on_y", + "tests/test_batch.py::TestBatch::test_as_addition", + "tests/test_batch.py::TestBatch::test_copy", + "tests/test_batch.py::TestBatch::test_dataset_add_batch", + "tests/test_batch.py::TestBatch::test_dataset_get_batch", + "tests/test_batch.py::TestBatch::test_extend_filter", + "tests/test_batch.py::TestBatch::test_extend_y", + "tests/test_batch.py::TestBatch::test_from_batch", + "tests/test_batch.py::TestBatch::test_hiding", + "tests/test_batch.py::TestBatch::test_level", + "tests/test_batch.py::TestBatch::test_replace_y", + "tests/test_batch.py::TestBatch::test_set_cell_items", + "tests/test_batch.py::TestBatch::test_set_language", + "tests/test_batch.py::TestBatch::test_set_sigtest", + "tests/test_batch.py::TestBatch::test_set_weight", + "tests/test_batch.py::TestBatch::test_slicing", + "tests/test_batch.py::TestBatch::test_sorting", + "tests/test_batch.py::TestBatch::test_transpose", + "tests/test_chain.py::TestChainConstructor::()::test_init", + "tests/test_chain.py::TestChainConstructor::()::test_str", + "tests/test_chain.py::TestChainConstructor::()::test_repr", + "tests/test_chain.py::TestChainConstructor::()::test_len", + "tests/test_chain.py::TestChainExceptions::()::test_get_non_existent_columns", + "tests/test_chain.py::TestChainGet::()::test_get_x_orientation[params_getx0]", + "tests/test_chain.py::TestChainGet::()::test_get_x_orientation[params_getx1]", + "tests/test_chain.py::TestChainGet::()::test_get_x_orientation[params_getx2]", + "tests/test_chain.py::TestChainGet::()::test_get_x_orientation[params_getx3]", + "tests/test_chain.py::TestChainGet::()::test_sig_transformation_simple", + "tests/test_chain.py::TestChainGet::()::test_annotations_fields", + "tests/test_chain.py::TestChainGet::()::test_annotations_populated", + "tests/test_chain.py::TestChainGet::()::test_annotations_list_append", + "tests/test_chain.py::TestChainGet::()::test_sig_transformation_large", + "tests/test_chain.py::TestChainUnnamedAdd::()::test_unnamed", + "tests/test_chain.py::TestChainAdd::()::test_named", + "tests/test_chain.py::TestChainAdd::()::test_str[params_structure0]", + "tests/test_chain.py::TestChainAdd::()::test_str[params_structure1]", + "tests/test_chain.py::TestChainAddRepaint::()::test_str[params_structure0]", + "tests/test_chain.py::TestChainAddRepaint::()::test_str[params_structure1]", + "tests/test_chain_old.py::TestChainObject::test_auto_orientation", + "tests/test_chain_old.py::TestChainObject::test_dervie_attributes", + "tests/test_chain_old.py::TestChainObject::test_describe", + "tests/test_chain_old.py::TestChainObject::test_lazy_name", + "tests/test_chain_old.py::TestChainObject::test_save_chain", + "tests/test_cluster.py::TestClusterObject::test_add_chain", + "tests/test_cluster.py::TestClusterObject::test_add_chain_exceptions", + "tests/test_cluster.py::TestClusterObject::test_add_dataframe", + "tests/test_cluster.py::TestClusterObject::test_add_multiple_chains", + "tests/test_cluster.py::TestClusterObject::test_add_multiple_chains_exceptions", + "tests/test_cluster.py::TestClusterObject::test_dataframe_exceptions", + "tests/test_cluster.py::TestClusterObject::test_save_cluster", + "tests/test_complex_logic.py::TestStackObject::test___eq", + "tests/test_complex_logic.py::TestStackObject::test___ge", + "tests/test_complex_logic.py::TestStackObject::test___gt", + "tests/test_complex_logic.py::TestStackObject::test___le", + "tests/test_complex_logic.py::TestStackObject::test___lt", + "tests/test_complex_logic.py::TestStackObject::test___ne", + "tests/test_complex_logic.py::TestStackObject::test__has_not_all", + "tests/test_complex_logic.py::TestStackObject::test__has_not_all_errors", + "tests/test_complex_logic.py::TestStackObject::test__has_not_any", + "tests/test_complex_logic.py::TestStackObject::test__has_not_any_errors", + "tests/test_complex_logic.py::TestStackObject::test__has_not_count", + "tests/test_complex_logic.py::TestStackObject::test__has_not_count_errors", + "tests/test_complex_logic.py::TestStackObject::test_difference", + "tests/test_complex_logic.py::TestStackObject::test_get_logic_key", + "tests/test_complex_logic.py::TestStackObject::test_get_logic_key_chunk", + "tests/test_complex_logic.py::TestStackObject::test_has_all", + "tests/test_complex_logic.py::TestStackObject::test_has_all_errors", + "tests/test_complex_logic.py::TestStackObject::test_has_not_any", + "tests/test_complex_logic.py::TestStackObject::test_has_not_any_errors", + "tests/test_complex_logic.py::TestStackObject::test_has_not_count", + "tests/test_complex_logic.py::TestStackObject::test_has_not_count_errors", + "tests/test_complex_logic.py::TestStackObject::test_intersection", + "tests/test_complex_logic.py::TestStackObject::test_is_eq", + "tests/test_complex_logic.py::TestStackObject::test_is_ge", + "tests/test_complex_logic.py::TestStackObject::test_is_gt", + "tests/test_complex_logic.py::TestStackObject::test_is_le", + "tests/test_complex_logic.py::TestStackObject::test_is_lt", + "tests/test_complex_logic.py::TestStackObject::test_is_ne", + "tests/test_complex_logic.py::TestStackObject::test_logic_list", + "tests/test_complex_logic.py::TestStackObject::test_nested_logic", + "tests/test_complex_logic.py::TestStackObject::test_nested_logic_list", + "tests/test_complex_logic.py::TestStackObject::test_symmetric_difference", + "tests/test_complex_logic.py::TestStackObject::test_union", + "tests/test_complex_logic.py::TestStackObject::test_wildcards", + "tests/test_dataset.py::TestDataSet::test_array_metadata", + "tests/test_dataset.py::TestDataSet::test_categorical_metadata_additions", + "tests/test_dataset.py::TestDataSet::test_categorical_to_delimited_set", + "tests/test_dataset.py::TestDataSet::test_compare", + "tests/test_dataset.py::TestDataSet::test_copy_via_masks_full", + "tests/test_dataset.py::TestDataSet::test_copy_via_masks_sliced_and_reduced", + "tests/test_dataset.py::TestDataSet::test_crosstab", + "tests/test_dataset.py::TestDataSet::test_derotate_df", + "tests/test_dataset.py::TestDataSet::test_derotate_freq", + "tests/test_dataset.py::TestDataSet::test_derotate_meta", + "tests/test_dataset.py::TestDataSet::test_dichotomous_to_delimited_set", + "tests/test_dataset.py::TestDataSet::test_extend_values_autocodes", + "tests/test_dataset.py::TestDataSet::test_extend_values_no_texts", + "tests/test_dataset.py::TestDataSet::test_extend_values_raises_on_dupes", + "tests/test_dataset.py::TestDataSet::test_extend_values_usercodes", + "tests/test_dataset.py::TestDataSet::test_fileinfo", + "tests/test_dataset.py::TestDataSet::test_filter", + "tests/test_dataset.py::TestDataSet::test_force_texts", + "tests/test_dataset.py::TestDataSet::test_get_item_texts", + "tests/test_dataset.py::TestDataSet::test_get_value_texts", + "tests/test_dataset.py::TestDataSet::test_get_variable_text", + "tests/test_dataset.py::TestDataSet::test_interlock", + "tests/test_dataset.py::TestDataSet::test_order_full_change", + "tests/test_dataset.py::TestDataSet::test_order_repos_change", + "tests/test_dataset.py::TestDataSet::test_read_quantipy", + "tests/test_dataset.py::TestDataSet::test_remove_values", + "tests/test_dataset.py::TestDataSet::test_rename_via_masks", + "tests/test_dataset.py::TestDataSet::test_reorder_values", + "tests/test_dataset.py::TestDataSet::test_reorder_values_raises_on_incomplete_list", + "tests/test_dataset.py::TestDataSet::test_set_item_texts", + "tests/test_dataset.py::TestDataSet::test_set_missings_flagging", + "tests/test_dataset.py::TestDataSet::test_set_missings_results", + "tests/test_dataset.py::TestDataSet::test_set_value_texts", + "tests/test_dataset.py::TestDataSet::test_set_variable_text", + "tests/test_dataset.py::TestDataSet::test_sorting_rules_meta", + "tests/test_dataset.py::TestDataSet::test_subset_from_varlist", + "tests/test_dataset.py::TestDataSet::test_text_replacements_non_array", + "tests/test_dataset.py::TestDataSet::test_transpose", + "tests/test_dataset.py::TestDataSet::test_uncode", + "tests/test_dataset.py::TestDataSet::test_validate", + "tests/test_excel.py::TestExcel::()::test_structure[params0]", + "tests/test_excel.py::TestExcel::()::test_structure[params1]", + "tests/test_excel.py::TestExcel::()::test_structure[params2]", + "tests/test_excel.py::TestExcel::()::test_structure[params3]", + "tests/test_excel.py::TestExcel::()::test_structure[params4]", + "tests/test_io_dimensions.py::TestDimLabels::test_dimlabels", + "tests/test_link.py::TestLinkObject::test_get_data", + "tests/test_link.py::TestLinkObject::test_get_meta", + "tests/test_link.py::TestLinkObject::test_link_behaves_like_a_dict", + "tests/test_link.py::TestLinkObject::test_link_is_a_subclassed_dict", + "tests/test_logic_views.py::TestViewObject::test_simple_or", + "tests/test_merging.py::TestMerging::test_hmerge_basic", + "tests/test_merging.py::TestMerging::test_hmerge_vmerge_basic", + "tests/test_merging.py::TestMerging::test_subset_dataset", + "tests/test_merging.py::TestMerging::test_vmerge_basic", + "tests/test_merging.py::TestMerging::test_vmerge_blind_append", + "tests/test_merging.py::TestMerging::test_vmerge_blind_append_row_id", + "tests/test_merging.py::TestMerging::test_vmerge_row_id", + "tests/test_recode.py::TestRecodes::test_recode_parameters", + "tests/test_rim.py::TestScheme::test_cap", + "tests/test_rim.py::TestScheme::test_constructor", + "tests/test_rim.py::TestScheme::test_groups", + "tests/test_rules.py::TestRules::test_dropx", + "tests/test_rules.py::TestRules::test_rules_coltests", + "tests/test_rules.py::TestRules::test_rules_coltests_flag_bases", + "tests/test_rules.py::TestRules::test_rules_crosstab", + "tests/test_rules.py::TestRules::test_rules_frequency", + "tests/test_rules.py::TestRules::test_rules_get_chain", + "tests/test_rules.py::TestRules::test_rules_get_dataframe", + "tests/test_rules.py::TestRules::test_slicex", + "tests/test_rules.py::TestRules::test_sortx", + "tests/test_rules.py::TestRules::test_sortx_expand_net_between", + "tests/test_rules.py::TestRules::test_sortx_expand_net_within", + "tests/test_rules.py::TestRules::test_sortx_expand_net_within_between", + "tests/test_rules.py::TestRules::test_sortx_summaries_items", + "tests/test_rules.py::TestRules::test_sortx_summaries_mean", + "tests/test_rules.py::TestRules::test_sortx_summaries_value", + "tests/test_stack.py::TestStackObject::test_add_data", + "tests/test_stack.py::TestStackObject::test_add_data_as_arg", + "tests/test_stack.py::TestStackObject::test_add_link_exceptions", + "tests/test_stack.py::TestStackObject::test_add_link_generates_links_and_views", + "tests/test_stack.py::TestStackObject::test_add_link_lazy", + "tests/test_stack.py::TestStackObject::test_add_link_x_y_equal", + "tests/test_stack.py::TestStackObject::test_add_nets", + "tests/test_stack.py::TestStackObject::test_add_stats", + "tests/test_stack.py::TestStackObject::test_cache_is_created", + "tests/test_stack.py::TestStackObject::test_cumulative_sum", + "tests/test_stack.py::TestStackObject::test_describe", + "tests/test_stack.py::TestStackObject::test_factor_labels", + "tests/test_stack.py::TestStackObject::test_filters", + "tests/test_stack.py::TestStackObject::test_get_chain_generates_chains", + "tests/test_stack.py::TestStackObject::test_get_chain_lazy", + "tests/test_stack.py::TestStackObject::test_get_chain_orient_on_gives_correct_orientation", + "tests/test_stack.py::TestStackObject::test_get_chain_preserves_link_orientation", + "tests/test_stack.py::TestStackObject::test_getting_1D_views", + "tests/test_stack.py::TestStackObject::test_recode_from_net_def", + "tests/test_stack.py::TestStackObject::test_recode_from_stat_def", + "tests/test_stack.py::TestStackObject::test_reduce", + "tests/test_stack.py::TestStackObject::test_refresh", + "tests/test_stack.py::TestStackObject::test_refresh_remove_weight", + "tests/test_stack.py::TestStackObject::test_save_and_load_stack", + "tests/test_stack.py::TestStackObject::test_save_and_load_stack_path_expectations", + "tests/test_stack.py::TestStackObject::test_save_and_load_with_and_without_cache", + "tests/test_stack.py::TestStackObject::test_save_dataset", + "tests/test_stack.py::TestStackObject::test_save_describe", + "tests/test_stack.py::TestStackObject::test_save_load_stack_improved", + "tests/test_stack.py::TestStackObject::test_stack_aggregate", + "tests/test_stack.py::TestStackObject::test_stack_behaves_like_a_dict", + "tests/test_stack.py::TestStackObject::test_stack_is_a_subclassed_dict", + "tests/test_view_manager.py::TestViewManager::test_vm_c_b", + "tests/test_view_manager.py::TestViewManager::test_vm_c_b_w_auto", + "tests/test_view_manager.py::TestViewManager::test_vm_c_b_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_n_s_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_n_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_n_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_s_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_w_auto", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_b_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_n_s_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_cp_s_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_n_s_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_n_s_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_n_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_n_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_s_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_s_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_t_w_both", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_w_auto", + "tests/test_view_manager.py::TestViewManager::test_vm_p_b_w_both", + "tests/test_view_mapper.py::TestViewObject::test__apply_to", + "tests/test_view_mapper.py::TestViewObject::test__custom_methods", + "tests/test_view_mapper.py::TestViewObject::test__get_method_types", + "tests/test_view_mapper.py::TestViewObject::test_add_method", + "tests/test_view_mapper.py::TestViewObject::test_get_view_iterations", + "tests/test_view_mapper.py::TestViewObject::test_iterations_object", + "tests/test_view_maps.py::TestViewObject::test_bases_float_on_single_w", + "tests/test_view_maps.py::TestViewObject::test_combined_codes_delimited_on_single_w", + "tests/test_view_maps.py::TestViewObject::test_cumulative_sum_counts", + "tests/test_view_maps.py::TestViewObject::test_cumulative_sum_cpercent", + "tests/test_view_maps.py::TestViewObject::test_default_delimited_at_no_w", + "tests/test_view_maps.py::TestViewObject::test_default_delimited_at_w", + "tests/test_view_maps.py::TestViewObject::test_default_delimited_on_delimited_w", + "tests/test_view_maps.py::TestViewObject::test_default_float_at_no_w", + "tests/test_view_maps.py::TestViewObject::test_default_float_at_w", + "tests/test_view_maps.py::TestViewObject::test_default_int_at_no_w", + "tests/test_view_maps.py::TestViewObject::test_default_int_at_w", + "tests/test_view_maps.py::TestViewObject::test_default_int_on_int_no_w", + "tests/test_view_maps.py::TestViewObject::test_default_single_at_no_w", + "tests/test_view_maps.py::TestViewObject::test_default_single_at_w", + "tests/test_view_maps.py::TestViewObject::test_ebase", + "tests/test_view_maps.py::TestViewObject::test_exclude_and_rescale_on_means_categorical_w", + "tests/test_view_maps.py::TestViewObject::test_frequencies_delimited_on_delimited_no_w", + "tests/test_view_maps.py::TestViewObject::test_frequencies_single_on_delimited_w", + "tests/test_view_maps.py::TestViewObject::test_frequencies_single_on_single_no_w", + "tests/test_view_maps.py::TestViewObject::test_means_test_level_10_unweighted_ovlp_no_missings", + "tests/test_view_maps.py::TestViewObject::test_means_test_level_20_weighted_no_missings", + "tests/test_view_maps.py::TestViewObject::test_means_test_level_5_weighted_all_codes", + "tests/test_view_maps.py::TestViewObject::test_means_test_level_high_askia_unweighted_all_codes", + "tests/test_view_maps.py::TestViewObject::test_means_tests_code_exclusion_base_flags_incl_total", + "tests/test_view_maps.py::TestViewObject::test_means_tests_code_exclusion_incl_total", + "tests/test_view_maps.py::TestViewObject::test_nps_single_on_delimited_no_w", + "tests/test_view_maps.py::TestViewObject::test_props_blocknet_calc_incl_total", + "tests/test_view_maps.py::TestViewObject::test_props_changed_meta_nets_incl_total", + "tests/test_view_maps.py::TestViewObject::test_props_means_tests_incl_total", + "tests/test_view_maps.py::TestViewObject::test_props_test_level_1_ovlp_weighted", + "tests/test_view_maps.py::TestViewObject::test_props_test_level_20_weighted", + "tests/test_view_maps.py::TestViewObject::test_props_test_level_5_ovlp_unweighted", + "tests/test_view_maps.py::TestViewObject::test_props_test_level_low_askia_weighted", + "tests/test_view_maps.py::TestViewObject::test_simple_means_all_types_no_w", + "tests/test_view_maps.py::TestViewObject::test_source_kwarg_descriptives", + "tests/test_view_maps.py::TestViewObject::test_source_kwarg_descriptives_sigtest", + "tests/test_weight_engine.py::TestEngine::test_add_scheme_and_dataframe", + "tests/test_weight_engine.py::TestEngine::test_add_scheme_no_key", + "tests/test_weight_engine.py::TestEngine::test_constructor", + "tests/test_weight_engine.py::TestEngine::test_group_targets", + "tests/test_weight_engine.py::TestEngine::test_vaidate_targets", + "tests/test_weight_engine.py::TestEngine::test_wdf_structure", + "tests/test_weight_engine.py::TestEngine::test_weight_lazy", + "tests/test_xlsx_formats.py::TestXlsxFormatsObject::test_create_formats_dict", + "tests/test_xlsx_formats.py::TestXlsxFormatsObject::test_initialisation", + "tests/test_xlsx_formats.py::TestXlsxFormatsObject::test_initialisation_properties" +] \ No newline at end of file diff --git a/README.md b/README.md index 5d1ce7342..c5a328e60 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,10 @@ Quantipy is an open-source data processing, analysis and reporting software proj #### Contributors - Alexander Buchhammer, Alasdair Eaglestone, James Griffiths, Kerstin Müller : https://yougov.co.uk -- Datasmoothie’s Birgir Hrafn Sigurðsson and Geir Freysson: http://datasmoothie.io/ +- Datasmoothie’s Birgir Hrafn Sigurðsson and Geir Freysson: http://datasmoothie.com/ + +### Python 3 compatability +Efforts are underway to port Quantipy to Python 3 in a [seperate repository](https://www.github.com/quantipy/quantipy3). ## Docs [View the documentation at readthedocs.org](http://quantipy.readthedocs.io/) diff --git a/docs/API/_build/doctrees/environment.pickle b/docs/API/_build/doctrees/environment.pickle index fe0765657..64f6f6e1f 100644 Binary files a/docs/API/_build/doctrees/environment.pickle and b/docs/API/_build/doctrees/environment.pickle differ diff --git a/docs/API/_build/doctrees/index.doctree b/docs/API/_build/doctrees/index.doctree index 12e3b17cf..9f4fe9a21 100644 Binary files a/docs/API/_build/doctrees/index.doctree and b/docs/API/_build/doctrees/index.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/00overview.doctree b/docs/API/_build/doctrees/sites/api_ref/00overview.doctree new file mode 100644 index 000000000..4f0fe3ca4 Binary files /dev/null and b/docs/API/_build/doctrees/sites/api_ref/00overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/Chain.doctree b/docs/API/_build/doctrees/sites/api_ref/Chain.doctree index 141e03c1d..efe850331 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/Chain.doctree and b/docs/API/_build/doctrees/sites/api_ref/Chain.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/Cluster.doctree b/docs/API/_build/doctrees/sites/api_ref/Cluster.doctree index e1206fb92..1f57483fa 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/Cluster.doctree and b/docs/API/_build/doctrees/sites/api_ref/Cluster.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/DataSet.doctree b/docs/API/_build/doctrees/sites/api_ref/DataSet.doctree new file mode 100644 index 000000000..3e3d842aa Binary files /dev/null and b/docs/API/_build/doctrees/sites/api_ref/DataSet.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/QuantipyViews.doctree b/docs/API/_build/doctrees/sites/api_ref/QuantipyViews.doctree index f4249e049..a881ddc1f 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/QuantipyViews.doctree and b/docs/API/_build/doctrees/sites/api_ref/QuantipyViews.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/Rim_scheme.doctree b/docs/API/_build/doctrees/sites/api_ref/Rim_scheme.doctree index f9a074649..8a1d981ef 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/Rim_scheme.doctree and b/docs/API/_build/doctrees/sites/api_ref/Rim_scheme.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/View.doctree b/docs/API/_build/doctrees/sites/api_ref/View.doctree index c3148af34..a067c5e58 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/View.doctree and b/docs/API/_build/doctrees/sites/api_ref/View.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/ViewMapper.doctree b/docs/API/_build/doctrees/sites/api_ref/ViewMapper.doctree index e9f4a199b..f19f90a24 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/ViewMapper.doctree and b/docs/API/_build/doctrees/sites/api_ref/ViewMapper.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/quantify_engine.doctree b/docs/API/_build/doctrees/sites/api_ref/quantify_engine.doctree index 907215308..5928c5ab8 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/quantify_engine.doctree and b/docs/API/_build/doctrees/sites/api_ref/quantify_engine.doctree differ diff --git a/docs/API/_build/doctrees/sites/api_ref/stack.doctree b/docs/API/_build/doctrees/sites/api_ref/stack.doctree index ca33de829..f4da88860 100644 Binary files a/docs/API/_build/doctrees/sites/api_ref/stack.doctree and b/docs/API/_build/doctrees/sites/api_ref/stack.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/batch/00_overview.doctree b/docs/API/_build/doctrees/sites/lib_doc/batch/00_overview.doctree new file mode 100644 index 000000000..fdcf28510 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/batch/00_overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/batch/01_create_load.doctree b/docs/API/_build/doctrees/sites/lib_doc/batch/01_create_load.doctree new file mode 100644 index 000000000..aaf7e6ddb Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/batch/01_create_load.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/batch/02_variables.doctree b/docs/API/_build/doctrees/sites/lib_doc/batch/02_variables.doctree new file mode 100644 index 000000000..5ff828149 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/batch/02_variables.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/batch/03_properties.doctree b/docs/API/_build/doctrees/sites/lib_doc/batch/03_properties.doctree new file mode 100644 index 000000000..949a699ae Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/batch/03_properties.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/batch/04_subclass.doctree b/docs/API/_build/doctrees/sites/lib_doc/batch/04_subclass.doctree new file mode 100644 index 000000000..99e63e0a0 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/batch/04_subclass.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/builds/00_overview.doctree b/docs/API/_build/doctrees/sites/lib_doc/builds/00_overview.doctree new file mode 100644 index 000000000..8519ce31f Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/builds/00_overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/builds/01_chains.doctree b/docs/API/_build/doctrees/sites/lib_doc/builds/01_chains.doctree new file mode 100644 index 000000000..c5437f323 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/builds/01_chains.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/00_overview.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/00_overview.doctree new file mode 100644 index 000000000..c58fef7f9 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/00_overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/01_components.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/01_components.doctree new file mode 100644 index 000000000..6af8447f9 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/01_components.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/02_io.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/02_io.doctree new file mode 100644 index 000000000..4826feade Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/02_io.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/02a_management.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/02a_management.doctree new file mode 100644 index 000000000..903c2776e Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/02a_management.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/03_inspection.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/03_inspection.doctree new file mode 100644 index 000000000..1ce54f715 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/03_inspection.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/04_editing.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/04_editing.doctree new file mode 100644 index 000000000..b4f271307 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/04_editing.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/05_transforming.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/05_transforming.doctree new file mode 100644 index 000000000..25b73906e Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/05_transforming.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/06_logics.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/06_logics.doctree new file mode 100644 index 000000000..7fcce1615 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/06_logics.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/07_custom_recoding.doctree b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/07_custom_recoding.doctree new file mode 100644 index 000000000..7cfdd591e Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/dataprocessing/07_custom_recoding.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/engine/00_overview.doctree b/docs/API/_build/doctrees/sites/lib_doc/engine/00_overview.doctree new file mode 100644 index 000000000..3aa4625fe Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/engine/00_overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/engine/01_links_stacks.doctree b/docs/API/_build/doctrees/sites/lib_doc/engine/01_links_stacks.doctree new file mode 100644 index 000000000..62ac65e5e Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/engine/01_links_stacks.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/engine/02_quantity.doctree b/docs/API/_build/doctrees/sites/lib_doc/engine/02_quantity.doctree new file mode 100644 index 000000000..304b2ff28 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/engine/02_quantity.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/engine/03_test.doctree b/docs/API/_build/doctrees/sites/lib_doc/engine/03_test.doctree new file mode 100644 index 000000000..cd96c4826 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/engine/03_test.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/engine/04_agg_methods.doctree b/docs/API/_build/doctrees/sites/lib_doc/engine/04_agg_methods.doctree new file mode 100644 index 000000000..2f84217e9 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/engine/04_agg_methods.doctree differ diff --git a/docs/API/_build/doctrees/sites/lib_doc/overview.doctree b/docs/API/_build/doctrees/sites/lib_doc/overview.doctree new file mode 100644 index 000000000..307b00ce9 Binary files /dev/null and b/docs/API/_build/doctrees/sites/lib_doc/overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/00_overview.doctree b/docs/API/_build/doctrees/sites/release_notes/00_overview.doctree new file mode 100644 index 000000000..51bffc687 Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/00_overview.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/01_latest.doctree b/docs/API/_build/doctrees/sites/release_notes/01_latest.doctree new file mode 100644 index 000000000..31139a5ef Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/01_latest.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/02_archive.doctree b/docs/API/_build/doctrees/sites/release_notes/02_archive.doctree new file mode 100644 index 000000000..89e0682ec Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/02_archive.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/03_how_to_snippets.doctree b/docs/API/_build/doctrees/sites/release_notes/03_how_to_snippets.doctree new file mode 100644 index 000000000..cb1aca326 Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/03_how_to_snippets.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/create_categorical_meta.doctree b/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/create_categorical_meta.doctree new file mode 100644 index 000000000..5c9dcd8ea Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/create_categorical_meta.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/derotate.doctree b/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/derotate.doctree new file mode 100644 index 000000000..43ceaad15 Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/derotate.doctree differ diff --git a/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/dimensions_comp.doctree b/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/dimensions_comp.doctree new file mode 100644 index 000000000..1fc4bea95 Binary files /dev/null and b/docs/API/_build/doctrees/sites/release_notes/how_to_snippets/dimensions_comp.doctree differ diff --git a/docs/API/_build/html/.buildinfo b/docs/API/_build/html/.buildinfo index 145d7a01e..959860160 100644 --- a/docs/API/_build/html/.buildinfo +++ b/docs/API/_build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 0288e8257f20bf0e318a1e13f4f950ad +config: 3c8d31b3ee2b61e092a93342a4c7e2d0 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/API/_build/html/_sources/sites/api_ref/00overview.rst.txt b/docs/API/_build/html/_sources/sites/api_ref/00overview.rst.txt new file mode 100644 index 000000000..c4882fa5d --- /dev/null +++ b/docs/API/_build/html/_sources/sites/api_ref/00overview.rst.txt @@ -0,0 +1,16 @@ + +API references +============== + +.. toctree:: + :maxdepth: 3 + + Chain + Cluster + DataSet + quantify_engine + QuantipyViews + Rim_scheme + Stack + View + ViewMapper \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/api_ref/DataSet.rst.txt b/docs/API/_build/html/_sources/sites/api_ref/DataSet.rst.txt new file mode 100644 index 000000000..f463db588 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/api_ref/DataSet.rst.txt @@ -0,0 +1,8 @@ +.. toctree:: + :maxdepth: 3 + +DataSet +======= + +.. autoclass:: quantipy.DataSet + :members: \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/lib_doc/batch/00_overview.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/batch/00_overview.rst.txt new file mode 100644 index 000000000..99ba6909b --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/batch/00_overview.rst.txt @@ -0,0 +1,35 @@ + +========= +``Batch`` +========= + +``qp.Batch`` is a subclass of ``qp.DataSet`` and is a container for +structuring a ``qp.Link`` collection's specifications. + +``qp.Batch`` is not only a subclass of ``qp.DataSet``, it also takes a +DataSet instance as input argument, inheriting a few of its attributes, e.g. +``_meta``, ``_data``, ``valid_tks`` and ``text_key``. +All other ``Batch`` attributes are used as construction plans for populating a +``qp.Stack``, these get stored in the belonging ``DataSet`` meta component in +``_meta['sets']['batches'][batchname]``. + +In general, it does not matter in which order ``Batch`` attributes are set by +methods, the class ensures that all attributes are kept consistent. + +All next sections are working with the following ``qp.DataSet`` instance:: + + import quantipy as qp + + dataset = qp.DataSet('Example Data (A)') + dataset.read_quantipy('Example Data (A).json', 'Example Data (A).csv') + +The json and csv files you can find in ``quantipy/tests``. + +.. toctree:: + :maxdepth: 5 + :includehidden: + + 01_create_load + 02_variables + 03_properties + 04_subclass diff --git a/docs/API/_build/html/_sources/sites/lib_doc/batch/01_create_load.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/batch/01_create_load.rst.txt new file mode 100644 index 000000000..70ba8ef63 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/batch/01_create_load.rst.txt @@ -0,0 +1,27 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +========================================= +Creating/ Loading a ``qp.Batch`` instance +========================================= + +As mentioned, a ``Batch`` instance has a close connection to its belonging +``DataSet`` instance and we can easily create a new ``Batch`` from a ``DataSet`` +as per:: + + batch1 = dataset.add_batch(name='batch1') + batch2 = dataset.add_batch(name='batch2', ci=['c'], weights='weight') + +It is also possible to load an already existing instance out of the meta +stored in ``dataset._meta['sets']['batches']``:: + + batch = dataset.get_batch('batch1') + +Both methods, ``.add_batch()`` and ``.get_batch()``, are an easier way to +use the ``__init__()`` method of ``qp.Batch``. + +An other way to get a new ``qp.Batch`` instance is to copy an existing one, in +that case all added open ends are removed from the new instance:: + + copy_batch = batch.copy('copy_of_batch1') diff --git a/docs/API/_build/html/_sources/sites/lib_doc/batch/02_variables.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/batch/02_variables.rst.txt new file mode 100644 index 000000000..201d8c374 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/batch/02_variables.rst.txt @@ -0,0 +1,119 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +=========================================== +Adding variables to a ``qp.Batch`` instance +=========================================== + +----------------- +x-keys and y-keys +----------------- + +The included variables in a ``Batch`` constitute the main structure for the +``qp.Stack`` construction plan. Variables can be added as x-keys or y-keys, for +arrays all belonging items are automatically added and the ``qp.Stack`` gets +populated with all cross-tabulations of these keys: + +>>> batch.add_x(['q1', 'q2', 'q6']) +>>> batch.add_y(['gender', 'q1']) +Array summaries setup: Creating ['q6']. + +x-specific y-keys can be produced by manipulating the main y-keys, this edit +can be extending or replacing the existing keys: + +>>> batch.extend_y(['locality', 'ethnicity'], on=['q1']) +>>> batch.replace_y(['locality', 'ethnicity'], on=['q2']) + +With these settings the construction plan looks like that: + +>>> print batch.x_y_map +OrderedDict([('q1', ['@', 'gender', 'q1', 'locality', 'ethnicity']), + ('q2', ['locality', 'ethnicity']), + ('q6', ['@']), + (u'q6_1', ['@', 'gender', 'q1']), + (u'q6_2', ['@', 'gender', 'q1']), + (u'q6_3', ['@', 'gender', 'q1'])]) + +------ +Arrays +------ + +A special case exists if the added variables contain arrays. As default for all +arrays in x-keys array summaries are created (array as x-key and ``'@'``-referenced total as +y-key), see the output below (``Array summaries setup: Creating ['q6'].``). +If array summaries are requested only for a selection of variables or for none, +use ``.make_summaries()``: + +>>> batch.make_summaries(None) +Array summaries setup: Creating no summaries! + +Arrays can also be transposed (``'@'``-referenced total as x-key and array name +as y-key). If they are not in the batch summary list before, they are +automatically added and depending on the ``replace`` parameter only the +transposed or both types of summaries are added to the ``qp.Stack``: + +>>> batch.transpose_array('q6', replace=False) +Array summaries setup: Creating ['q6']. + +The construction plan now shows that both summary types are included: + +>>> print batch.x_y_map +OrderedDict([('q1', ['@', 'gender', 'q1', 'locality', 'ethnicity']), + ('q2', ['locality', 'ethnicity']), + ('q6', ['@']), + ('@', ['q6']), + (u'q6_1', ['@', 'gender', 'q1']), + (u'q6_2', ['@', 'gender', 'q1']), + (u'q6_3', ['@', 'gender', 'q1'])]) + +-------------------- +Verbatims/ open ends +-------------------- + +Another special case are verbatims. They will not be aggregated in a ``qp.Stack``, +but they have to be defined in a ``qp.Batch`` to add them later to a ``qp.Cluster``. + +There are two different ways to add verbatims: Either all to one ``qp.Cluster`` +key or each gets its own key. But both options can be done with the same method. + +For splitting the verbatims, set ``split=True`` and insert as many titles as +included verbatims/ open ends: + +>>> batch.add_open_ends(['q8a', 'q9a'], break_by=['record_number', 'age'], + split=True, title=['oe_q8', 'oe_q9']) + +For collecting all verbatims in one Cluster key, set ``split=False`` and add +only one ``title`` or use the default parameters: + +>>> batch.add_open_ends(['q8a', 'q9a'], break_by=['record_number', 'age']) + +-------------------- +Special aggregations +-------------------- + +It is possible to add some special aggregations to a ``qp.Batch``, that are +not stored in the main construction plan ``.x_y_map``. One option is to give a +name for a Cluster key in which all y-keys are cross-tabulated against each +other: + +>>> batch.add_y_on_y('y-keys') + +Another possibility is to add a ``qp.Batch`` instance to an other instance. +The added Batch loses all information about verbatims and ``.y_on_y``, that +means only the main construction plan in ``.x_y_map`` gets adopted. Each of +the two batches is aggregated discretely in the ``qp.Stack``, but the added +instance gets included into the ``qp.Cluster`` of the first ``qp.Batch`` in +a key named by its instance name. + +>>> batch1 = dataset.get_batch('batch1') +>>> batch2 = dataset.get_batch('batch2') +>>> batch2.add_x('q2b') +Array summaries setup: Creating no summaries! +>>> batch2.add_y('gender') +>>> batch2.as_addition('batch1') +Batch 'batch2' specified as addition to Batch 'batch1'. Any open end summaries and 'y_on_y' agg. have been removed! + +The connection between the two ``qp.Batch`` instances you can see in ``.additional`` +for the added instance and in ``._meta['sets']['batches']['batchname']['additions']`` +for the first instance. \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/lib_doc/batch/03_properties.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/batch/03_properties.rst.txt new file mode 100644 index 000000000..d4c43175e --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/batch/03_properties.rst.txt @@ -0,0 +1,58 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================================ +Set properties of a ``qp.Batch`` +================================ + +The section before explained how the main construction plan (``batch.x_y_map``) +is built, that describes which x-keys and y-keys are used to add ``qp.Link``\s +to a ``qp.Stack``. Now you will get to know how the missing information for the +``Link``\s are defined and which specific views get extracted for the +``qp.Cluster`` by adding some property options the ``qp.Batch`` instance. + +---------------------------------------- +Filter, weights and significance testing +---------------------------------------- + +``qp.Link``\s can be added to a ``qp.Stack`` data_key-level by defining its x +and y-keys, which is already done in ``.x_y_map``, and setting a filter. +This property can be edited in a ``qp.Batch`` instance with the +following methods: + +>>> batch.add_filter('men only', {'gender': 1}) +>>> batch.extend_filter({'q1': {'age': [20, 21, 22, 23, 24, 25]}}) + +Filters can be added globally or for a selection of x-keys only. Out of the +global filter, ``.sample_size`` is automatically calculated for each ``qp.Batch`` +defintion. + +Now all information are collected in the ``qp.Batch`` instance and the ``Stack`` +can be populated with ``Link``\s in form of ``stack[data_key][filter_key][x_key][y_key]``. + +For each ``Link`` ``qp.View``\s can be added, these views depend on a weight +definition, which is also defined in the ``qp.Batch``: + +>>> batch.set_weights(['weight_a']) + +Significance tests are a special ``View``; the sig. levels which they are +calculated on can be added to the ``qp.Batch`` like this: + +>>> batch.set_sigtests(levels=[0.05]) + +----------------------- +Cell items and language +----------------------- + +As ``qp.Stack`` is a container for a large amount of aggregations, it will +accommodate various ``qp.View``\s. The ``qp.Batch`` property ``.cell_items`` is +used to define which specfic ``Views`` will be taken to create a ``qp.Cluster``: + +>>> batch.set_cell_items(['c', 'p']) + +The property ``.language`` allows the user to define which ``text`` labels from +the meta data should be used for the extracted ``Views`` by entering a valid +text key: + +>>> batch.set_language('en-GB') diff --git a/docs/API/_build/html/_sources/sites/lib_doc/batch/04_subclass.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/batch/04_subclass.rst.txt new file mode 100644 index 000000000..a47c4d828 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/batch/04_subclass.rst.txt @@ -0,0 +1,26 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================================ +Inherited ``qp.DataSet`` methods +================================ + +Being a ``qp.DataSet`` subclasss, ``qp.Batch`` inherits some of its methods. +The important ones are these which allow the manipulation of the meta component. +That means meta-edits can be applied globally (run methods on ``qp.DataSet``) or +``Batch``-specific (run methods on ``qp.Batch``). Batch meta-edits +always overwrite global meta-edits and while building a ``qp.Cluster`` from a +``qp.Batch``, the modified meta information is taken from ``.meta_edits``. + +The following methods can be used to create meta-edits for a ``qp.Batch``: + +>>> batch.hiding('q1', [2], axis='y') +>>> batch.sorting('q2', fix=[97, 98]) +>>> batch.slicing('q1', [1, 2, 3, 4, 5], axis='x') +>>> batch.set_variable_text('gender', 'Gender???') +>>> batch.set_value_texts('gender', {1: 'Men', 2: 'Women'}) +>>> batch.set_property('q1', 'base_text', 'This var has a second filter.') + +Some methods are not allowed to be used for a ``Batch``. These will raise a +``NotImplementedError`` to prevent inconsistent case and meta data states. diff --git a/docs/API/_build/html/_sources/sites/lib_doc/builds/00_overview.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/builds/00_overview.rst.txt new file mode 100644 index 000000000..365bfbcd5 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/builds/00_overview.rst.txt @@ -0,0 +1,9 @@ +====== +Builds +====== + +.. toctree:: + :maxdepth: 5 + :includehidden: + + 01_chains diff --git a/docs/API/_build/html/_sources/sites/lib_doc/builds/01_chains.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/builds/01_chains.rst.txt new file mode 100644 index 000000000..c7af8e55f --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/builds/01_chains.rst.txt @@ -0,0 +1,21 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================= +Combining results +================= + +-------------------------------- +Organizing ``View`` aggregations +-------------------------------- + +------------------------------- +Creating ``Chain`` aggregations +------------------------------- + +What is a ``Chain``? +-------------------- + +Customizing results +------------------- diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/00_overview.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/00_overview.rst.txt new file mode 100644 index 000000000..aa4b136a8 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/00_overview.rst.txt @@ -0,0 +1,16 @@ +--------------- +Data processing +--------------- + +.. toctree:: + :maxdepth: 5 + :includehidden: + + 01_components + 02_io + 02a_management + 03_inspection + 04_editing + 05_transforming + 06_logics + 07_custom_recoding diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/01_components.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/01_components.rst.txt new file mode 100644 index 000000000..3dc724552 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/01_components.rst.txt @@ -0,0 +1,141 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================== +DataSet components +================== + +------------------ +Case and meta data +------------------ + +``Quantipy`` builds upon the ``pandas`` library to feature the ``DataFrame`` +and ``Series`` objects in the case data component of its ``DataSet`` object. +Additionally, each ``DataSet`` offers a metadata component to describe the +data columns and provide additional information on the characteristics of the +underlying structure. The metadata document is implemented as a nested ``dict`` +and provides the following ``keys`` on its first level: + +============== ============================================================== +element contains +============== ============================================================== +``'type'`` case data type +``'info'`` info on the source data +``'lib'`` shared use references +``'columns'`` info on ``DataFrame`` columns (Quantipy types, labels, etc.) +``'sets'`` ordered groups of variables pointing to other parts of the meta +``'masks'`` complex variable type definitions (arrays, dichotomous, etc.) +============== ============================================================== + +--------------------------------- +``columns`` and ``masks`` objects +--------------------------------- + +There are two variable collections inside a ``Quantipy`` metadata document: +``'columns'`` is storing the meta for each accompanying ``pandas.DataFrame`` +column object, while ``'masks'`` are building upon the regular ``'columns'`` +metadata but additionally employ special meta instructions to define +complex data types. An example is the the ``'array'`` type that (in MR speak) maps +multiple "question" variables to one "answer" object. + +"Simple"" data definitons that are supported by ``Quantipy`` can either be numeric +``'float'`` and ``'int'`` types, categorical ``'single'`` and ``'delimited set'`` +variables or of type ``'string'``, ``'date'`` and ``'time'``. + +--------------------------------------------- +Languages: ``text`` and ``text_key`` mappings +--------------------------------------------- +Throughout ``Quantipy`` metadata all label information, e.g. variable question +texts and category descriptions, are stored in ``text`` objects that are mapping +different language (or context) versions of a label to a specific ``text_key``. +That way the metadata can support multi-language and multi-purpose (for example +detailed/extensive vs. short question texts) label information in a digestable +format that is easy to query: + +>>> meta['columns']['q1']['text'] +{'de-DE': 'Das ist ein langes deutsches Label', + u'en-GB': u'What is your main fitness activity?', + 'x edits': {'de-DE': 'German build label', 'en-GB': 'English build label'}} + +Valid ``text_key`` settings are: + +============== ============================================================== +``text_key`` Language / context +============== ============================================================== +``'en-GB'`` English +``'de-DE'`` German +``'fr-FR'`` French +``'da-DK'`` Danish +``'sv-SV'`` Swedish +``'nb-NO'`` Norwegian +``'fi-FI'`` Finnish +``'x edits'`` Build label edit for x-axis +``'y edits'`` Build label edit for y-axis +============== ============================================================== + +----------------------------- +Categorical ``values`` object +----------------------------- +``single`` and ``delimited set`` variables restrict the possible case data +entries to a list of ``values`` that consist of numeric answer codes and their +``text`` labels, defining distinct categories: + +>>> meta['columns']['q1']['values'] +[{'value': 1, + 'text': {'en-GB': 'Dog'} + }, + {'value': 2, + 'text': {'en-GB': 'Cat'} + }, + {'value': 3, + 'text': {'en-GB': 'Bird'} + }, + {'value': -9, + 'text': {'en-GB': 'Not an animal'} + }] + +------------------ +The ``array`` type +------------------ +Turning to the ``masks`` collection of the metadata, ``array`` variables +group together a collection of variables that share a common response options +scheme, i.e. different statements (usually referencing a broader topic) that +are answered using the same scale. In the ``Quantipy`` metadata document, an +``array`` variable has a ``subtype`` that describes the type of the +constructing source variables listed in the ``items`` object. In contrast to simple variable types, any +categorical ``values`` metadata is stored inside the shared information collection +``lib``, for access from both the ``columns`` and ``masks`` representation of +``array`` elements: + +>>> meta['masks']['q5'] +{u'items': [{u'source': u'columns@q5_1', u'text': {u'en-GB': u'Surfing'}}, + {u'source': u'columns@q5_2', u'text': {u'en-GB': u'Snowboarding'}}, + {u'source': u'columns@q5_3', u'text': {u'en-GB': u'Kite boarding'}}, + {u'source': u'columns@q5_4', u'text': {u'en-GB': u'Parachuting'}}, + {u'source': u'columns@q5_5', u'text': {u'en-GB': u'Cave diving'}}, + {u'source': u'columns@q5_6', u'text': {u'en-GB': u'Windsurfing'}}], + u'name': u'q5', + u'subtype': u'single', + u'text': {u'en-GB': u'How likely are you to do each of the following in the next year?'}, + u'type': u'array', + u'values': 'lib@values@q5'} + +>>> meta['lib']['values']['q5'] +[{u'text': {u'en-GB': u'I would refuse if asked'}, u'value': 1}, + {u'text': {u'en-GB': u'Very unlikely'}, u'value': 2}, + {u'text': {u'en-GB': u"Probably wouldn't"}, u'value': 3}, + {u'text': {u'en-GB': u'Probably would if asked'}, u'value': 4}, + {u'text': {u'en-GB': u'Very likely'}, u'value': 5}, + {u'text': {u'en-GB': u"I'm already planning to"}, u'value': 97}, + {u'text': {u'en-GB': u"Don't know"}, u'value': 98}] + +Exploring the ``columns`` meta of an array item shows the same ``values`` reference pointer and informs about its ``parent`` meta structure, i.e. the +array's ``masks`` defintion: + +>>> meta['columns']['q5_1'] +{u'name': u'q5_1', + u'parent': {u'masks@q5': {u'type': u'array'}}, + u'text': {u'en-GB': u'How likely are you to do each of the following in the next year? - Surfing'}, + u'type': u'single', + u'values': u'lib@values@q5'} \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/02_io.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/02_io.rst.txt new file mode 100644 index 000000000..4a6205ea8 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/02_io.rst.txt @@ -0,0 +1,207 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +=== +I/O +=== + +------------------------------- +Starting from native components +------------------------------- + +Using a standalone ``pd.DataFrame`` +----------------------------------- +``Quantipy`` can create a meta document from a inferring its variable types from +the ``dtypes`` of a ``pd.DataFrame``. In that process, ``ìnt``, ``float`` and +``string`` data types are created inside the meta component of the ``DataSet``. +In this basic form, ``text`` label information is missing. For a example, given +a ``pd.DataFrame`` as per: + +>>> casedata = [[1000, 10, 1.2, 'text1'], +... [1001, 4, 3.4, 'jjda'], +... [1002, 8, np.NaN, 'what?'], +... [1003, 8, 7.81, '---' ], +... [1004, 5, 3.0, 'hello world!']] +>>> df = pd.DataFrame(casedata, columns=['identity', 'q1', 'q2', 'q3']) +>>> df + identity q1 q2 q3 +0 1000 10 1.20 text1 +1 1001 4 3.40 jjda +2 1002 8 NaN what? +3 1003 8 7.81 --- +4 1004 5 3.00 hello world! + +... the conversion is adding matching metadata to the ``DataSet`` instance: + +>>> dataset = qp.DataSet(name='example', dimensions_comp=False) +>>> dataset.from_components(df) +Inferring meta data from pd.DataFrame.columns (4)... +identity: dtype: int64 - converted: int +q1: dtype: int64 - converted: int +q2: dtype: float64 - converted: float +q3: dtype: object - converted: string + +>>> dataset.meta()['columns']['q2'] +{'text': {'en-GB': ''}, 'type': 'float', 'name': 'q2', 'parent': {}, 'properties': {'created': True}} + +``.csv`` / ``.json`` pairs +-------------------------- +We can easily read in ``Quantipy`` native data with the ``read_quantipy()`` +method and providing the paths to both the ``.csv`` and ``.json`` file (file +extensions are handled automatically), e.g.: + +>>> folder = './Data/' +>>> file_name = 'Example Data (A)' +>>> path_csv = path_json = folder + file_name + +>>> dataset = qp.DataSet(name='example', dimensions_comp=False) +>>> dataset.read_quantipy(path_json, path_csv) +DataSet: ./Data/example +rows: 8255 - columns: 76 +Dimensions compatibility mode: False + +We can that access the case and metadata components: + +>>> dataset.data()['q4'].head() +0 1 +1 2 +2 2 +3 1 +4 1 +Name: q4, dtype: int64 + +>>> meta = dataset.meta()['columns']['q4'] +>>> json.dumps(meta) +{ + "values": [ + { + "text": { + "en-GB": "Yes" + }, + "value": 1 + }, + { + "text": { + "en-GB": "No" + }, + "value": 2 + } + ], + "text": { + "en-GB": "Do you ever participate in sports activities with people in your household?" + }, + "type": "single", + "name": "q4", + "parent": {} +} + +----------------------- +Third party conversions +----------------------- + +Supported conversions +--------------------- + +In adddition to providing plain ``.csv``/``.json`` data (pairs), source files +can be read into Quantipy using a number of I/O functions to deal with +standard file formats encountered in the market research industry: + ++-------------+-------------+-------------+-------------+ +| Software | Format | Read | Write | ++=============+=============+=============+=============+ +| SPSS | .sav | Yes | Yes | +| Statistics | | | | ++-------------+-------------+-------------+-------------+ +| SPSS | .dff/.mdd | Yes | Yes | +| Dimensions | | | | ++-------------+-------------+-------------+-------------+ +| Decipher |tab-delimited| Yes | No | +| |.json/ .txt | | | ++-------------+-------------+-------------+-------------+ +| Ascribe |tab-delimited| Yes | No | +| |.xml/ .txt | | | ++-------------+-------------+-------------+-------------+ + +The following functions are designed to convert the different file formats' +structures into inputs understood by Quantipy. + +SPSS Statistics +--------------- + +**Reading:** + +>>> from quantipy.core.tools.dp.io import read_spss +>>> meta, data = read_spss(path_sav) + +.. note:: + On a Windows machine you MUST use ``ioLocale=None`` when reading + from SPSS. This means if you are using a Windows machine your base + example for reading from SPSS is + ``meta, data = read_spss(path_sav, ioLocale=None)``. + +When reading from SPSS you have the opportunity to specify a custom +dichotomous values map, that will be used to convert all dichotomous +sets into Quantipy delimited sets, using the ``dichot`` argument. + +The entire read operation will use the same map on all dichotomous +sets so they must be applied uniformly throughout the SAV file. The +default map that will be used if none is provided will be +``{'yes': 1, 'no': 0}``. + +>>> meta, data = read_spss(path_sav, dichot={'yes': 1, 'no': 2}) + +SPSS dates will be converted to pandas dates by default but +if this results in conversion issues or failures you can read +the dates in as Quantipy strings to deal with them later, using the +``dates_as_strings`` argument. + +>>> meta, data = read_spss(path_sav, dates_as_strings=True) + +**Writing:** + +>>> from quantipy.core.tools.dp.io import write_spss +>>> write_spss(path_sav, meta, data) + +By default SPSS files will be generated from the ``'data file'`` +set found in ``meta['sets']``, but a custom set can be named instead +using the ``from_set`` argument. + +>>> write_spss(path_sav_analysis, meta, data, from_set='sav-export') + +The custom set must be well-formed: + +>>> "sets" : { +... "sav-export": { +... "items": [ +... "columns@Q1", +... "columns@Q2", +... "columns@Q3", +... ... +... ] +... } +... } + +Dimensions +---------- + +**Reading:** + +>>> from quantipy.core.tools.dp.io import read_dimensions +>>> meta, data = read_dimensions(path_mdd, path_ddf) + +Decipher +-------- + +**Reading:** + +>>> from quantipy.core.tools.dp.io import read_decipher +>>> meta, data = read_decipher(path_json, path_txt) + +Ascribe +------- + +**Reading:** + +>>> from quantipy.core.tools.dp.io import read_ascribe +>>> meta, data = read_ascribe(path_xml, path_txt) diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/02a_management.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/02a_management.rst.txt new file mode 100644 index 000000000..de7824b4e --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/02a_management.rst.txt @@ -0,0 +1,122 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================== +DataSet management +================== + +-------------------------- +Setting the variable order +-------------------------- +The global variable order of a ``DataSet`` is dictated by the content of the +``meta['sets']['data file']['items']`` list and reflected in the structure of +the case data component's ``pd.DataFrame.columns``. There are two ways to set +a new order using the ``order(new_order=None, reposition=None)`` method: + +**Define a full order** + +Using this apporach requires that all ``DataSet`` variable names are passed +via the ``new_order`` parameter. Providing only a subset of the variables will +raise a ``ValueError``: + +>>> dataset.order(['q1', 'q8']) +ValueError: 'new_order' must contain all DataSet variables. + +Text... + +**Change positions relatively** + +Often only a few changes to the natural order of the ``DataSet`` are necessary, +e.g. derived variables should be moved alongside their originating ones or specific +sets of variables (demographics, etc.) should be grouped together. We can achieve +this using the ``reposition`` parameter as follows: + +Text... + +--------------------------------- +Cloning, filtering and subsetting +--------------------------------- + +Sometimes you want to cut the data into sections defined by either case/respondent conditions (e.g. a survey wave) or a collection of variables (e.g. +a specific part of the questionnaire). To not permanently change an existing +``DataSet`` by accident, draw a copy of it first: + +>>> copy_ds = dataset.clone() + +Then you can use ``filter()`` to restrict cases (rows) or ``subset()`` to keep +only a selected range of variables (columns). Both methods can be used inplace +but will return a new object by default. + +>>> keep = {'Wave': [1]} +>>> copy_ds.filter(alias='first wave', condition=keep, inplace=True) +>>> copy_ds._data.shape +(1621, 76) + +After the filter has been applied, the ``DataSet`` is only showing cases that contain the value 1 in the ``'Wave'`` variable. The filter alias (a short name +to describe the arbitrarily complex filter ``condition``) is attached to the +instance: + +>>> copy_ds.filtered +only first wave + +We are now further reducing the ``DataSet`` by dropping all variables except the three ``array`` variables ``'q5'``, ``'q6'``, and ``'q7'`` using ``subset()``. + +>>> reduced_ds = copy_ds.subset(variables=['q5', 'q6', 'q7']) + +We can see that only the requested variables (``masks`` defintitions and the +constructing ``array`` items) remain in ``reduced_ds``: + +>>> reduced_ds.by_type() +size: 1621 single delimited set array int float string date time N/A +0 q5_1 q5 +1 q5_2 q7 +2 q5_3 q6 +3 q5_4 +4 q5_5 +5 q5_6 +6 q6_1 +7 q6_2 +8 q6_3 +9 q7_1 +10 q7_2 +11 q7_3 +12 q7_4 +13 q7_5 +14 q7_6 + +------- +Merging +------- + +Intro text... As opposed to reducing an existing file... + +Vertical (cases/rows) merging +----------------------------- + +Text + +Horizontal (variables/columns) merging +-------------------------------------- + +Text + +----------------------------- +Savepoints and state rollback +----------------------------- + +When working with big ``DataSet``\s and needing to perform a lot of data +preparation (deriving large amounts of new variables, lots of meta editing, +complex cleaning, ...) it can be beneficial to quickly store a snapshot of a +clean and consistent state of the ``DataSet``. This is most useful when working +in interactive sessions like **IPython** or **Jupyter notebooks** and might +prevent you from reloading files from disk or waiting for previous processes +to finish. + +Savepoints are stored via ``save()`` and can be restored via ``revert()``. + +.. note:: + Savepoints only exists in memory and are not written to disk. Only one + savepoint can exist, so repeated ``save()`` calls will overwrite any previous + versions of the ``DataSet``. To permanently save your data, please use one + of the ``write`` methods, e.g. ``write_quantipy()``. diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/03_inspection.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/03_inspection.rst.txt new file mode 100644 index 000000000..5301da6fe --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/03_inspection.rst.txt @@ -0,0 +1,355 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +==================== +Inspecting variables +==================== + +------------------------------ +Querying and slicing case data +------------------------------ +A ``qp.DataSet`` is mimicking ``pandas``-like item access, i.e. passing a variable +name into the ``[]``-accessor will return a ``pandas.DataFrame`` view of the +case data component. That means that we can chain any ``pandas.DataFrame`` method to +the query: + +>>> ds['q9'].head() + q9 +0 99; +1 1;4; +2 98; +3 1;4; +4 99; + +There is the same support for selecting multiple variables at once: + +>>> ds[['q9', 'gender']].head() + q9 gender +0 99; 1 +1 1;4; 2 +2 98; 1 +3 1;4; 1 +4 99; 1 + +To integrate ``array`` (``masks``) variables into this behaviour, passing an +``array`` name will automatically call its item list: + +>>> ds['q6'].head() + q6_1 q6_2 q6_3 +0 1 1 1 +1 1 NaN 1 +2 1 NaN 2 +3 2 NaN 2 +4 2 10 10 + +This can be combined with the ``list``-based selection as well: + +>>> ds[['q6', 'q9', 'gender']].head() + q6_1 q6_2 q6_3 q9 gender +0 1 1 1 99; 1 +1 1 NaN 1 1;4; 2 +2 1 NaN 2 98; 1 +3 2 NaN 2 1;4; 1 +4 2 10 10 99; 1 + + +``DataSet`` case data supports row-slicing based on complex logical conditions +to inspect subsets of the data. We can use the ``take()`` with a ``Quantipy`` +logic operation naturally for this: + +>>> condition = intersection( +... [{'gender': [1]}, +... {'religion': [3]}, +... {'q9': [1, 4]}]) +>>> take = ds.take(condition) + +>>> ds[take, ['gender', 'religion', 'q9']].head() + gender religion q9 +52 1 3 1;2;4; +357 1 3 1;3;4; +671 1 3 1;3;4; +783 1 3 2;3;4; +802 1 3 4; + +.. seealso:: + Please find an overview of ``Quantipy`` logical operators and data slicing + and masking in the :doc:`docs about complex logical conditions <06_logics>`! + +---------------------------- +Variable and value existence +---------------------------- + +any, all, code_count, is_nan, var_exists, codes_in_data, is_like_numeric +variables + +______________________________________________________________________________ + +We can use ``variables()`` and ``var_exists()`` to generally test the membership +of variables inside ``DataSet``. The former is showing the list of all variables +registered inside the ``'data file'`` ``set``, the latter is checking if a variable's +``name`` is found in either the ``'columns'`` or ``'masks'`` collection. For +our example data, the variables are: + +>>> dataset.variables() + +So a test for the ``array`` ``'q5'`` should be positive: + +>>> dataset.var_exists('q5') +True + +In addition to ``Quantipy``\'s complex logic operators, the ``DataSet`` class +offers some quick case data operations for code existence tests. To return a +``pandas.Series`` of all empty rows inside a variable use ``is_nan()`` as per: + +>>> dataset.is_nan('q8').head() +0 True +1 True +2 True +3 True +4 True +Name: q8, dtype: bool + +Which we can also use to quickly check the number of missing cases... + +>>> dataset.is_nan('q8').value_counts() +True 5888 +False 2367 +Name: q8, dtype: int64 + +... as well as use the result as slicer for the ``DataSet`` case data component, +e.g. to show the non-empty rows: + +>>> slicer = dataset.is_nan('q8') +>>> dataset[~slicer, 'q8'].head() +Name: q8, dtype: int64 +7 5; +11 5; +13 1;4; +14 4;5; +23 1;4; +Name: q8, dtype: object + +Especially useful for ``delimited set`` and ``array`` data, the ``code_count()`` +method is creating the ``pandas.Series`` of response values found. If applied on +an ``array``, the result is expressed across all source item variables: + +>>> dataset.code_count('q6').value_counts() +3 5100 +2 3155 +dtype: int64 + +... which means that not all cases contain answers in all three of the array's items. + +With some basic ``pandas`` we can double-check this result: + +>>> pd.concat([dataset['q6'], dataset.code_count('q6')], axis=1).head() + q6_1 q6_2 q6_3 0 +0 1 1.0 1 3 +1 1 NaN 1 2 +2 1 NaN 2 2 +3 2 NaN 2 2 +4 2 10.0 10 3 + +``code_count()`` can optionally ignore certain codes via the ``count_only`` and +``count_not`` parameters: + +>>> q2_count = dataset.code_count('q2', count_only=[1, 2, 3]) +>>> pd.concat([dataset['q2'], q2_count], axis=1).head() + q2 0 +0 1;2;3;5; 3 +1 3;6; 1 +2 NaN 0 +3 NaN 0 +4 NaN 0 + + +Similarly, the ``any()`` and ``all()`` methods yield slicers for cases obeying +the condition that at least one / all of the provided codes are found in the +response. Again, for ``array`` variables the conditions are extended across all +the items: + +>>> dataset[dataset.all('q6', 5), 'q6'] + q6_1 q6_2 q6_3 +374 5 5.0 5 +2363 5 5.0 5 +2377 5 5.0 5 +4217 5 5.0 5 +5530 5 5.0 5 +5779 5 5.0 5 +5804 5 5.0 5 +6328 5 5.0 5 +6774 5 5.0 5 +7269 5 5.0 5 +8148 5 5.0 5 + +>>> dataset[dataset.all('q8', [1, 2, 3, 4, 96]), 'q8'] +845 1;2;3;4;5;96; +6242 1;2;3;4;96; +7321 1;2;3;4;96; +Name: q8, dtype: object + + +>>> dataset[dataset.any('q8', [1, 2, 3, 4, 96]), 'q8'].head() +13 1;4; +14 4;5; +23 1;4; +24 1;3;4; +25 1;4; +Name: q8, dtype: object + +-------------- +Variable types +-------------- + +To get a summary of the all variables grouped by type, call ``by_type()`` on +the ``DataSet``: + +>>> ds.by_type() +size: 8255 single delimited set array int float string date time N/A +0 gender q2 q5 record_number weight q8a start_time duration +1 locality q3 q7 unique_id weight_a q9a end_time +2 ethnicity q8 q6 age weight_b +3 religion q9 birth_day +4 q1 birth_month +5 q2b birth_year +6 q4 +7 q5_1 +8 q5_2 +9 q5_3 +10 q5_4 +11 q5_5 +12 q5_6 +13 q6_1 +14 q6_2 +15 q6_3 +16 q7_1 +17 q7_2 +18 q7_3 +19 q7_4 +20 q7_5 +21 q7_6 + +We can restrict the output to certain types by providing the desired ones in +the ``types`` parameter: + +>>> ds.by_type(types='delimited set') +size: 8255 delimited set +0 q2 +1 q3 +2 q8 +3 q9 + +>>> ds.by_type(types=['delimited set', 'float']) +size: 8255 delimited set float +0 q2 weight +1 q3 weight_a +2 q8 weight_b +3 q9 NaN + +In addition to that, ``DataSet`` implements the following methods +that return the corresponding variables as a ``list`` for easy iteration:: + + DataSet.singles + .delimied_sets() + .ints() + .floats() + .dates() + .strings() + .masks() + .columns() + .sets() + +>>> ds.delimited_sets() +[u'q3', u'q2', u'q9', u'q8'] + +>>> for delimited_set in ds.delimited_sets(): +... print delimited_set +q3 +q2 +q9 +q8 + +---------------------------------- +Slicing & dicing metadata objects +---------------------------------- + +Although it is possible to access a ``DataSet`` meta component via its ``_meta`` +attribute directly, the prefered way to inspect and interact with with the metadata +is to use ``DataSet`` methods. For instance, the easiest way to view the most +important meta on a variable is to use the ``meta()`` method: + +>>> ds.meta('q8') +delimited set codes texts missing +q8: Which of the following do you regularly skip? +1 1 Breakfast None +2 2 Mid-morning snacking None +3 3 Lunch None +4 4 Mid-afternoon snacking None +5 5 Dinner None +6 96 None of them None +7 98 Don't know (it varies a lot) None + +This output is extended with the ``item`` metadata if an ``array`` is passed: + +>>> ds.meta('q6') +single items item texts codes texts missing +q6: How often do you take part in any of the fo... +1 q6_1 Exercise alone 1 Once a day or more often None +2 q6_2 Join an exercise class 2 Every few days None +3 q6_3 Play any kind of team sport 3 Once a week None +4 4 Once a fortnight None +5 5 Once a month None +6 6 Once every few months None +7 7 Once every six months None +8 8 Once a year None +9 9 Less often than once a year None +10 10 Never None + +If the variable is not categorical, ``meta()`` returns simply: + +>>> ds.meta('weight_a') + float +weight_a: Weight (variant A) N/A + +``DataSet`` also provides a lot of methods to access and return the several +meta objects of a variable to make various data processing tasks easier: + +**Variable labels**: :meth:`quantipy.core.dataset.DataSet.text` + +>>> ds.text('q8', text_key=None) +Which of the following do you regularly skip? + +``values`` **object**: :meth:`quantipy.core.dataset.DataSet.values` + +>>> ds.values('gender', text_key=None) +[(1, u'Male'), (2, u'Female')] + +**Category codes**: :meth:`quantipy.core.dataset.DataSet.codes` + +>>> ds.codes('gender') +[1, 2] + +**Category labels**: :meth:`quantipy.core.dataset.DataSet.value_texts` + +>>> ds.value_texts('gender', text_key=None) +[u'Male', u'Female'] + +``items`` **object**: :meth:`quantipy.core.dataset.DataSet.items` + +>>> ds.items('q6', text_key=None) +[(u'q6_1', u'How often do you exercise alone?'), + (u'q6_2', u'How often do you take part in an exercise class?'), + (u'q6_3', u'How often do you play any kind of team sport?')] + +**Item** ``'columns'`` **sources**: :meth:`quantipy.core.dataset.DataSet.sources` + +>>> ds.sources('q6') +[u'q6_1', u'q6_2', u'q6_3'] + +**Item labels**: :meth:`quantipy.core.dataset.DataSet.item_texts` + +>>> ds.item_texts('q6', text_key=None) +[u'How often do you exercise alone?', + u'How often do you take part in an exercise class?', + u'How often do you play any kind of team sport?'] \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/04_editing.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/04_editing.rst.txt new file mode 100644 index 000000000..978a1cc95 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/04_editing.rst.txt @@ -0,0 +1,211 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================ +Editing metadata +================ + +-------------------------- +Creating meta from scratch +-------------------------- +It is very easy to add new variable metadata to a ``DataSet`` via ``add_meta()`` +which let's you create all supported variable types. Each new variable needs at +least a ``name``, ``qtype`` and ``label``. With this information a ``string``, +``int``, ``float`` or ``date`` variable can be defined, e.g.: + +>>> ds.add_meta(name='new_int', qtype='int', label='My new int variable') +>>> ds.meta('new_int') + int +new_int: My new int variable N/A + +Using the ``categories`` parameter we can create categorical variables of type +``single`` or ``delimited set``. We can provide the ``categories`` in two +different ways: + +>>> name, qtype, label = 'new_single', 'single', 'My new single variable' + +**Providing a list of category labels** (codes will be enumerated starting +from ``1``): + +>>> cats = ['Category A', 'Category B', 'Category C'] + +>>> ds.add_meta(name, qtype, label, categories=cats) +>>> ds.meta('new_single') +single codes texts missing +new_single: My new single variable +1 1 Category A None +2 2 Category B None +3 3 Category C None + +**Providing a list of tuples pairing codes and labels**: + +>>> cats = [(1, 'Category A'), (2, 'Category B'), (99, 'Category C')] + +>>> ds.add_meta(name, qtype, label, categories=cats) +>>> ds.meta('new_single') +single codes texts missing +new_single: My new single variable +1 1 Category A None +2 2 Category B None +3 99 Category C None + +.. note:: + ``add_meta()`` is preventing you from adding ill-formed or + inconsistent variable information, e.g. it is not possible to add ``categories`` + to an ``int``... + + >>> ds.add_meta('new_int', 'int', 'My new int variable', cats) + ValueError: Numerical data of type int does not accept 'categories'. + + ...and you must provide ``categories`` when trying to add categorical data: + + >>> ds.add_meta(name, 'single', label, categories=None) + ValueError: Must provide 'categories' when requesting data of type single. + +Similiar to the usage of the ``categories`` argument, ``items`` is controlling +the creation of an ``array``, i.e. specifying ``items`` is automatically +preparing the ``'masks'`` and ``'columns'`` metadata. The ``qtype`` argument +in this case always refers to the type of the corresponding ``'columns'``. + +>>> name, qtype, label = 'new_array', 'single', 'My new array variable' +>>> cats = ['Category A', 'Category B', 'Category C'] + +Again, there are two alternatives to construct the ``items`` object: + +**Providing a list of item labels** (item identifiers will be enumerated +starting from ``1``): + +>>> items = ['Item A', 'Item B', 'Item C', 'Item D'] + + +>>> ds.add_meta(name, qtype, label, cats, items=items) +>>> ds.meta('new_array') +single items item texts codes texts missing +new_array: My new array variable +1 new_array_1 Item A 1 Category A None +2 new_array_2 Item B 2 Category B None +3 new_array_3 Item C 3 Category C None +4 new_array_4 Item D + +**Providing a list of tuples pairing item identifiers and labels**: + +>>> items = [(1, 'Item A'), (2, 'Item B'), (97, 'Item C'), (98, 'Item D')] + +>>> ds.add_meta(name, qtype, label, cats, items) +>>> ds.meta('new_array') +single items item texts codes texts missing +new_array: My new array variable +1 new_array_1 Item A 1 Category A None +2 new_array_2 Item B 2 Category B None +3 new_array_97 Item C 3 Category C None +4 new_array_98 Item D + +.. note:: + For every created variable, ``add_meta()`` is also adding the relevant ``columns`` + into the ``pd.DataFrame`` case data component of the ``DataSet`` to keep + it consistent: + + >>> ds['new_array'].head() + new_array_1 new_array_2 new_array_97 new_array_98 + 0 NaN NaN NaN NaN + 1 NaN NaN NaN NaN + 2 NaN NaN NaN NaN + 3 NaN NaN NaN NaN + 4 NaN NaN NaN NaN + +-------- +Renaming +-------- +It is possible to attach new names to ``DataSet`` variables. Using the ``rename()`` +method will replace all former variable ``keys`` and other mentions inside the +metadata document and exchange the ``DataFrame`` column names. For ``array`` +variables only the ``'masks'`` name reference is updated by default -- to rename +the corresponding ``items`` a dict mapping item position number to new name can +be provided. + +>>> ds.rename(name='q8', new_name='q8_with_a_new_name') + +As mentioned, renaming a ``'masks'`` variable will leave the items untouched: + +>>> + +But we can simply provide their new names as per: + +>>> + +>>> + + +------------------------------- +Changing & adding ``text`` info +------------------------------- +All ``text``-related ``DataSet`` methods expose the ``text_key`` argument to +control to which language or context a label is added. For instance we can add +a German variable label to ``'q8'`` with ``set_variable_text()``: + +>>> ds.set_variable_text(name='q8', new_text='Das ist ein deutsches Label', text_key='de-DE') + +>>> ds.text('q8', 'en-GB') +Which of the following do you regularly skip? + +>>> ds.text('q8', 'de-DE') +Das ist ein deutsches Label + +To change the ``text`` inside the ``values`` or ``items`` metadata, we can +similarly use ``set_value_text`` and ``set_item_text()``: + +>>> + +When working with multiple language versions of the metadata, it might be required +to copy one language's ``text`` meta to another one's, for instance if there are +no fitting translations or the correct translation is missing. In such cases you +can use ``force_texts()`` to copy the meta of a source ``text_key`` (specified +in the ```copy_from`` parameter) to a target ``text_key`` (indicated via ``copy_to``). + +>>> + +>>> + +With ``clean_texts()`` you also have the option to replace specific characters, +terms or formatting tags (i.e. ``html``) from all ``text`` metadata of the +``DataSet``: + +>>> + +------------------------------- +Extending the ``values`` object +------------------------------- +We can add new category defintitons to existing ``values`` meta with the +``extend_values()`` method. As when adding full metadata for categorical +variables, new ``values`` can be generated by either providing only labels or +tuples of codes and labels. + +>>> + +While the method will never allow adding duplicated numeric values for the +categories, setting ``safe`` to ``False`` will enable you to add duplicated ``text`` +meta, i.e. ``values`` could contain both +``{'text': {'en-GB': 'No answer'}, 'value': 98}`` and +``{'text': {'en-GB': 'No answer'}, 'value': 99}``. By default, however, +the method will strictly prohibit any duplicates in the resulting ``values``. + +>>> + +-------------------------------- +Reordering the ``values`` object +-------------------------------- + + +---------------------------- +Removing ``DataSet`` objects +---------------------------- + + + + + + + + + diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/05_transforming.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/05_transforming.rst.txt new file mode 100644 index 000000000..cd44e78e1 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/05_transforming.rst.txt @@ -0,0 +1,373 @@ +====================== +Transforming variables +====================== + +.. toctree:: + :maxdepth: 5 + :includehidden: + +------- +Copying +------- + +It's often recommended to draw a clean copy of a variable before starting to +editing its meta or case data. With ``copy()`` you can add a copy to the +``DataSet`` that is identical to the original in all respects but its name. By +default, the copy's name will be suffixed with ``'_rec'``, but you can apply a +custom suffix by providing it via the ``suffix`` argument (leaving out the +``'_'`` which is added automatically): + +>>> ds.copy('q3') +>>> ds.copy('q3', suffix='version2') + +>>> ds.delimited_sets +[u'q3', u'q2', u'q9', u'q8', u'q3_rec', u'q3_version2'] + +Querying the ``DataSet``, we can see that all three version are looking identical: + +>>> ds[['q3', 'q3_rec', 'q3_version2']].head() + q3 q3_rec q3_version2 +0 1;2;3; 1;2;3; 1;2;3; +1 1;2;3; 1;2;3; 1;2;3; +2 1;2;3; 1;2;3; 1;2;3; +3 1;3; 1;3; 1;3; +4 2; 2; 2; + +We can, however, prevent copying the case data and simply add an "empty" copy +of the variable by passing ``copy_data=False``: + +>>> ds.copy('q3', suffix='no_data', copy_data=False) + +>>> ds[['q3', 'q3_rec', 'q3_version2', 'q3_no_data']].head() + q3 q3_rec q3_version2 q3_no_data +0 1;2;3; 1;2;3; 1;2;3; NaN +1 1;2;3; 1;2;3; 1;2;3; NaN +2 1;2;3; 1;2;3; 1;2;3; NaN +3 1;3; 1;3; 1;3; NaN +4 2; 2; 2; NaN + +If we wanted to only copy a subset of the case data, we could also use a +:doc:`logical slicer <06_logics>` and supply it in the ``copy()`` operation's +``slicer`` parameter: + +>>> slicer = {'gender': [1]} +>>> ds.copy('q3', suffix='only_men', copy_data=True, slicer=slicer) + +>>> ds[['q3', 'gender', 'q3_only_men']].head() + q3 gender q3_only_men +0 1;2;3; 1 1;2;3; +1 1;2;3; 2 NaN +2 1;2;3; 1 1;2;3; +3 1;3; 1 1;3; +4 2; 1 2; + +----------------------- +Inplace type conversion +----------------------- + +You can change the characteristics of existing ``DataSet`` variables by +converting from one ``type`` to another. Conversions happen ``inplace``, i.e. +no copy of the variable is taken prior to the operation. Therefore, you might +want to take a ``DataSet.copy()`` before using the ``convert(name, to)`` +method. + +Conversions need to modify both the ``meta`` and ``data`` component of the +``DataSet`` and are limited to transformations that keep the original and new +state of a variable consistent. The following conversions are currently +supported: + ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``name`` (from-``type``) | ``to='single'`` | ``to='delimited set'`` | ``to='int'`` | ``to='float'`` | ``to='string'`` | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``'single'`` | [X] | X | X | X | X | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``'delimited set'`` | | [X] | | | | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``'int'`` | X | | [X] | X | X | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``'float'`` | | | | [X] | X | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``'string'`` | X | | X* | X* | [X] | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ +| ``'date'`` | X | | | | X | ++--------------------------+-----------------+------------------------+--------------+----------------+-----------------+ + +\* *If all values of the variable are numerical, i.e.* ``DataSet.is_like_numeric()`` *returns* ``True``. + +Each of these conversions will rebuild the variable meta data to match the ``to`` +type. This means, that for instance a variable that is ``single`` will lose +its ``values`` object when transforming to ``int``, while the reverse operation +will create a ``values`` object that categorizes the unqiue numeric codes found in the +case data with their ``str`` representation as ``text`` meta. Consider the +variables ``q1`` (``single``) and ``age`` (``int``): + +**From type** ``single`` **to** ``int``: + +>>> ds.meta('q1') +single codes texts missing +q1: What is your main fitness activity? +1 1 Swimming None +2 2 Running/jogging None +3 3 Lifting weights None +4 4 Aerobics None +5 5 Yoga None +6 6 Pilates None +7 7 Football (soccer) None +8 8 Basketball None +9 9 Hockey None +10 96 Other None +11 98 I regularly change my fitness activity None +12 99 Not applicable - I don't exercise None + +>>> ds.convert('q1', to='int') +>>> ds.meta('q1') + int +q1: What is your main fitness activity? N/A + + +**From type** ``int`` **to** ``single``: + +>>> ds.meta('age') + int +age: Age N/A + +>>> ds.convert('age', to='single') +>>> ds.meta('age') +single codes texts missing +age: Age +1 19 19 None +2 20 20 None +3 21 21 None +4 22 22 None +5 23 23 None +6 24 24 None +7 25 25 None +8 26 26 None +9 27 27 None +10 28 28 None +11 29 29 None +12 30 30 None +13 31 31 None +14 32 32 None +15 33 33 None +16 34 34 None +17 35 35 None +18 36 36 None +19 37 37 None +20 38 38 None +21 39 39 None +22 40 40 None +23 41 41 None +24 42 42 None +25 43 43 None +26 44 44 None +27 45 45 None +28 46 46 None +29 47 47 None +30 48 48 None +31 49 49 None + + +-------------------------- +Banding and categorization +-------------------------- +In contrast to ``convert()``, the ``categorize()`` method creates a new +variable of type ``single``, acting as a short-hand for creating a renamed copy +and then type-transforming it. Therefore, it lets you quickly categorize +the unique values of a ``text``, ``int`` or ``date`` variable, storing +``values`` meta in the form of ``{'text': {'en-GB': str(1)}, 'value': 1}``. + +>>> + +Flexible banding of numeric data is provided thorugh ``DataSet.band()``: If a +variable is banded, it will standardly be added to the ``DataSet`` via the +original's name suffixed with ``'banded'``, e.g. ``'age_banded'``, keeping +the originating variables ``text`` label. The ``new_name`` and ``label`` +parameters can be used to create custom variable names and labels. The banding +of the incoming data is controlled with the ``bands`` argument that expects a +list containing ``int``, ``tuples`` or ``dict``, where each type is used for a +different kind of group definition. + +**Banding with** ``int`` **and** ``tuple``: + +- Use an ``int`` to make a band of only one value +- Use a ``tuple`` to indicate (inclusive) group limits +- ``values`` ``text`` meta is infered +- Example: ``[0, (1, 10), (11, 14), 15, (16, 25)]`` + +**Banding with** ``dict``: + +- The dict ``key`` will dicate the group's ``text`` label meta +- The dict ``value`` can pick up an ``int`` / ``tuple`` (see above) +- Example: ``[{'A': 0}, {'B': (1, 10)}, {'C': (11, 14)}, {'D': 15}, {'E': (16, 25)}]`` +- Mixing allowed: ``[0, {'A': (1, 10)}, (11, 14), 15, {'B': (16, 25)}]`` + +For instance, we could band ``'age'`` into a new variable called ``'grouped_age'`` +with ``bands`` being: + +>>> bands = [{'Younger than 35': (19, 34)}, +... (35, 39), +... {'Exactly 40': 40}, +... 41, +... (42, 60)] + +>>> ds.band(name='age', bands=bands, new_name='grouped_age', label=None) + +>>> ds.meta('grouped_age') +single codes texts missing +grouped_age: Age +1 1 Younger than 35 None +2 2 35-39 None +3 3 Exactly 40 None +4 4 41 None +5 5 42-60 None + +>>> ds.crosstab('age', 'grouped_age') +Question grouped_age. Age +Values All Younger than 35 35-39 Exactly 40 41 42-60 +Question Values +age. Age All 8255 4308 1295 281 261 2110 + 19 245 245 0 0 0 0 + 20 277 277 0 0 0 0 + 21 270 270 0 0 0 0 + 22 323 323 0 0 0 0 + 23 272 272 0 0 0 0 + 24 263 263 0 0 0 0 + 25 246 246 0 0 0 0 + 26 252 252 0 0 0 0 + 27 260 260 0 0 0 0 + 28 287 287 0 0 0 0 + 29 270 270 0 0 0 0 + 30 271 271 0 0 0 0 + 31 264 264 0 0 0 0 + 32 287 287 0 0 0 0 + 33 246 246 0 0 0 0 + 34 275 275 0 0 0 0 + 35 258 0 258 0 0 0 + 36 236 0 236 0 0 0 + 37 252 0 252 0 0 0 + 38 291 0 291 0 0 0 + 39 258 0 258 0 0 0 + 40 281 0 0 281 0 0 + 41 261 0 0 0 261 0 + 42 290 0 0 0 0 290 + 43 267 0 0 0 0 267 + 44 261 0 0 0 0 261 + 45 257 0 0 0 0 257 + 46 259 0 0 0 0 259 + 47 243 0 0 0 0 243 + 48 271 0 0 0 0 271 + 49 262 0 0 0 0 262 + +--------------------- +Array transformations +--------------------- + +**Transposing arrays** + +``DataSet`` offers tools to simplify common ``array`` variable operations. +You can switch the structure of ``items`` vs. ``values`` by producing the one +from the other using ``transpose()``. The transposition of an array will always +result in ``items`` that have the ``delimited set`` type in the corresponding +``'columns'`` metadata. That is because the transposed array is collecting +what former items have been assignd per former value: + +>>> ds.transpose('q5') + +*Original* + +>>> ds['q5'].head() + q5_1 q5_2 q5_3 q5_4 q5_5 q5_6 +0 2 2 2 2 1 2 +1 5 5 3 3 3 5 +2 5 98 5 5 1 5 +3 5 5 1 5 3 5 +4 98 98 98 98 98 98 + + +>>> ds.meta('q5') +single items item texts codes texts missing +q5: How likely are you to do each of the follow... +1 q5_1 Surfing 1 I would refuse if asked None +2 q5_2 Snowboarding 2 Very unlikely None +3 q5_3 Kite boarding 3 Probably wouldn't None +4 q5_4 Parachuting 4 Probably would if asked None +5 q5_5 Cave diving 5 Very likely None +6 q5_6 Windsurfing 97 I'm already planning to None +7 98 Don't know None + +*Transposition* + +>>> ds['q5_trans'].head() + q5_trans_1 q5_trans_2 q5_trans_3 q5_trans_4 q5_trans_5 q5_trans_97 q5_trans_98 +0 5; 1;2;3;4;6; NaN NaN NaN NaN NaN +1 NaN NaN 3;4;5; NaN 1;2;6; NaN NaN +2 5; NaN NaN NaN 1;3;4;6; NaN 2; +3 3; NaN 5; NaN 1;2;4;6; NaN NaN +4 NaN NaN NaN NaN NaN NaN 1;2;3;4;5;6; + +>>> ds.meta('q5_trans') +delimited set items item texts codes texts missing +q5_trans: How likely are you to do each of the ... +1 q5_trans_1 I would refuse if asked 1 Surfing None +2 q5_trans_2 Very unlikely 2 Snowboarding None +3 q5_trans_3 Probably wouldn't 3 Kite boarding None +4 q5_trans_4 Probably would if asked 4 Parachuting None +5 q5_trans_5 Very likely 5 Cave diving None +6 q5_trans_97 I'm already planning to 6 Windsurfing None +7 q5_trans_98 Don't know + +The method's ``ignore_items`` and ``ignore_values`` arguments can pick up +``items`` (indicated by their order number) and ``values`` to leave aside +during the transposition. + +*Ignoring items* + +The new ``values`` meta's numerical codes will always be enumerated from 1 to +the number of valid items for the transposition, so ignoring items 2, 3 and 4 +will lead to: + +>>> ds.transpose('q5', ignore_items=[2, 3, 4]) + +>>> ds['q5_trans'].head(1) + q5_trans_1 q5_trans_2 q5_trans_3 q5_trans_4 q5_trans_5 q5_trans_97 q5_trans_98 +0 2; 1;3; NaN NaN NaN NaN NaN + +>>> ds.values('q5_trans') +[(1, 'Surfing'), (2, 'Cave diving'), (3, 'Windsurfing')] + +*Ignoring values* + +>>> ds.transpose('q5', ignore_values=[1, 97]) + +>>> ds['q5_trans'].head(1) + q5_trans_2 q5_trans_3 q5_trans_4 q5_trans_5 q5_trans_98 +0 1;2;3;4;6; NaN NaN NaN NaN + +>>> ds.items('q5_trans') +[('q5_trans_2', u'Very unlikely'), + ('q5_trans_3', u"Probably wouldn't"), + ('q5_trans_4', u'Probably would if asked'), + ('q5_trans_5', u'Very likely'), + ('q5_trans_98', u"Don't know")] + +*Ignoring both items and values* + +>>> ds.transpose('q5', ignore_items=[2, 3, 4], ignore_values=[1, 97]) + +>>> ds['q5_trans'].head(1) + q5_trans_2 q5_trans_3 q5_trans_4 q5_trans_5 q5_trans_98 +0 1;3; NaN NaN NaN NaN + +>>> ds.meta('q5_trans') +delimited set items item texts codes texts missing +q5_trans: How likely are you to do each of the ... +1 q5_trans_2 Very unlikely 1 Surfing None +2 q5_trans_3 Probably wouldn't 2 Cave diving None +3 q5_trans_4 Probably would if asked 3 Windsurfing None +4 q5_trans_5 Very likely +5 q5_trans_98 Don't know + +**Flatten item answers** + +- ``flatten()`` \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/06_logics.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/06_logics.rst.txt new file mode 100644 index 000000000..45f601c45 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/06_logics.rst.txt @@ -0,0 +1,165 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +======================= +Logic and set operaters +======================= + +------ +Ranges +------ +The ``frange()`` function takes a string of abbreviated ranges, possibly delimited +by a comma (or some other character) and extrapolates its full, +unabbreviated list of ints. + +>>> from quantipy.core.tools.dp.prep import frange + +**Basic range**: + +>>> frange('1-5') +[1, 2, 3, 4, 5] + +**Range in reverse**: + +>>> frange('15-11') +[15, 14, 13, 12, 11] + +**Combination**: + +>>> frange('1-5,7,9,15-11') +[1, 2, 3, 4, 5, 7, 9, 15, 14, 13, 12, 11] + +**May include spaces for clarity**: + +>>> frange('1-5, 7, 9, 15-11') +[1, 2, 3, 4, 5, 7, 9, 15, 14, 13, 12, 11] + +------------- +Complex logic +------------- +Multiple conditions can be combined using ``union`` or ``intersection`` set +statements. Logical mappers can be arbitrarily nested as long as they are +well-formed. + +``union`` +--------- +``union`` takes a list of logical conditions that will be treated with +**or** logic. + +Where **any** of logic_A, logic_B **or** logic_C are ``True``: + +>>> union([logic_A, logic_B, logic_C]) + +``intersection`` +---------------- +``intersection`` takes a list of conditions that will be +treated with **and** logic. + +Where **all** of logic_A, logic_B **and** logic_C are ``True``: + +>>> intersection([logic_A, logic_B, logic_C]) + +"List" logic +------------ +Instead of using the verbose ``has_any`` operator, we can express simple, non-nested +*or* logics simply as a list of codes. For example ``{"q1_1": [1, 2]}`` is an +example of list-logic, where ``[1, 2]`` will be interpreted as ``has_any([1, 2])``, +meaning if **q1_1** has any of the values **1** or **2**. + +``q1_1`` has any of the responses 1, 2 or 3: + +>>> l = {"q1_1": [1, 2, 3]} + + +``has_any`` +----------- +``q1_1`` has any of the responses 1, 2 or 3: + +>>> l = {"q1_1": has_any([1, 2, 3])} + +``q1_1`` has any of the responses 1, 2 or 3 and no others: + +>>> l = {"q1_1": has_any([1, 2, 3], exclusive=True)} + + +``not_any`` +----------- +``q1_1`` doesn't have any of the responses 1, 2 or 3: + +>>> l = {"q1_1": not_any([1, 2, 3])} + +``q1_1`` doesn't have any of the responses 1, 2 or 3 but has some others: + +>>> l = {"q1_1": not_any([1, 2, 3], exclusive=True)} + +``has_all`` +----------- +``q1_1`` has all of the responses 1, 2 and 3: + +>>> l = {"q1_1": has_all([1, 2, 3])} + +``q1_1`` has all of the responses 1, 2 and 3 and no others: + +>>> l = {"q1_1": has_all([1, 2, 3], exclusive=True)} + +``not_all`` +----------- +``q1_1`` doesn't have all of the responses 1, 2 and 3: + +>>> l = {"q1_1": not_all([1, 2, 3])} + +``q1_1`` doesn't have all of the responses 1, 2 and 3 but has some others: + +>>> l = {"q1_1": not_all([1, 2, 3], exclusive=True)} + +``has_count`` +------------- + +``q1_1`` has exactly 2 responses: + +>>> l = {"q1_1": has_count(2)} + +``q1_1`` has 1, 2 or 3 responses: + +>>> l = {"q1_1": has_count([1, 3])} + +``q1_1`` has 1 or more responses: + +>>> l = {"q1_1": has_count([is_ge(1)])} + +``q1_1`` has 1, 2 or 3 responses from the response group 5, 6, 7, 8 or 9: + +>>> l = {"q1_1": has_count([1, 3, [5, 6, 7, 8, 9]])} + +``q1_1`` has 1 or more responses from the response group 5, 6, 7, 8 or 9: + +>>> l = {"q1_1": has_count([is_ge(1), [5, 6, 7, 8, 9]])} + +``not_count`` +------------- +``q1_1`` doesn't have exactly 2 responses: + +>>> l = {"q1_1": not_count(2)} + +``q1_1`` doesn't have 1, 2 or 3 responses: + +>>> l = {"q1_1": not_count([1, 3])} + +``q1_1`` doesn't have 1 or more responses: + +>>> l = {"q1_1": not_count([is_ge(1)])} + +``q1_1`` doesn't have 1, 2 or 3 responses from the response group 5, 6, 7, 8 or 9: + +>>> l = {"q1_1": not_count([1, 3, [5, 6, 7, 8, 9]])} + +``q1_1`` doesn't have 1 or more responses from the response group 5, 6, 7, 8 or 9: + +>>> l = {"q1_1": not_count([is_ge(1), [5, 6, 7, 8, 9]])} + +---------------------------------- +Boolean slicers and code existence +---------------------------------- +``any()``, ``all()`` +``code_count()``, ``is_nan()`` diff --git a/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/07_custom_recoding.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/07_custom_recoding.rst.txt new file mode 100644 index 000000000..71f666d18 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/dataprocessing/07_custom_recoding.rst.txt @@ -0,0 +1,632 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +==================== +Custom data recoding +==================== + +--------------------------------- +The ``recode()`` method in detail +--------------------------------- +This function takes a mapper of ``{key: logic}`` entries and injects the +key into the target column where its paired logic is True. The logic +may be arbitrarily complex and may refer to any other variable or +variables in data. Where a pre-existing column has been used to +start the recode, the injected values can replace or be appended to +any data found there to begin with. Note that this function does +not edit the target column, it returns a recoded copy of the target +column. The recoded data will always comply with the column type +indicated for the target column according to the meta. + + +:method: ``recode(target, mapper, default=None, append=False, + intersect=None, initialize=None, fillna=None, inplace=True)`` + + +``target`` +---------- + +``target`` controls which column meta should be used to control the +result of the recode operation. This is important because you cannot +recode multiple responses into a 'single'-typed column. + +The ``target`` column **must** already exist in meta. + +The ``recode`` function is effectively a request to return a copy of +the ``target`` column, recoded as instructed. ``recode`` does not +edit the ``target`` column in place, it returns a recoded copy of it. + +If the ``target`` column does not already exist in ``data`` then a new +series, named accordingly and initialized with ``np.NaN``, will begin +the recode. + +Return a recoded version of the column ``radio_stations_xb`` edited +based on the given mapper: + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper=mapper +... ) + +By default, recoded data resulting from the the mapper will replace any +data already sitting in the target column (on a cell-by-cell basis). + +``mapper`` +---------- + +A mapper is a dict of ``{value: logic}`` entries where value represents +the data that will be injected for cases where the logic is True. + +Here's a simplified example of what a mapper looks like: + +>>> mapper = { +... 1: logic_A, +... 2: logic_B, +... 3: logic_C, +... } + +1 will be generated where ``logic_A`` is ``True``, 2 where ``logic_B`` is +``True`` and 3 where ``logic_C`` is ``True``. + +The recode function, by referencing the type indicated by the meta, +will manage the complications involved in single vs delimited set +data. + +>>> mapper = { +... 901: {'radio_stations': frange('1-13')}, +... 902: {'radio_stations': frange('14-20')}, +... 903: {'radio_stations': frange('21-25')} +... } + +This means: inject 901 if the column ``radio_stations`` has any of the +values 1-13, 902 where ``radio_stations`` has any of the values 14-20 +and 903 where ``radio_stations`` has any of the values 21-25. + +``default`` +----------- + +If you had lots of values to generate from the same reference column +(say most/all of them were based on ``radio_stations``) then we can +omit the wildcard logic format and use recode's default parameter. + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: frange('1-13'), +... 902: frange('14-20'), +... 903: frange('21-25') +... }, +... default='radio_stations' +... ) + +This means, all unkeyed logic will default to be keyed to +``radio_stations``. In this case the three codes 901, 902 and 903 will +be generated based on the data found in ``radio_stations``. + +You can combine this with reference to other columns, but you can only +provide one default column. + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: frange('1-13'), +... 902: frange('14-20'), +... 903: frange('21-25'), +... 904: {'age': frange('18-34')} +... }, +... default='radio_stations' +... ) + +Given that logic can be arbitrarily complicated, mappers can be as +well. You'll see an example of a mapper that recodes a segmentation +in **Example 4**, below. + +``append`` +---------- + +If you want the recoded data to be appended to whatever may +already be in the target column (this is only applicable for 'delimited +set'-typed columns), then you should use the append parameter. + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper=mapper, +... append=True +... ) + +The precise behaviour of the append parameter can be seen in the +following examples. + +Given the following data: + +>>> df['radio_stations_xb'] +1 6;7;9;13; +2 97; +3 97; +4 13;16;18; +5 2;6; +Name: radio_stations_xb, dtype: object + +We generate a recoded value of 901 if any of the values 1-13 are +found. With the default ``append=False`` behaviour we will return the +following: + +>>> target = 'radio_stations_xb' +>>> recode(meta, data, target, mapper) +1 901; +2 97; +3 97; +4 901; +5 901; +Name: radio_stations_xb, dtype: object + +However, if we instead use ``append=True``, we will return the following: + +>>> target = 'radio_stations_xb' +>>> recode(meta, data, target, mapper, append=True) +1 6;7;9;13;901; +2 97; +3 97; +4 13;16;18;901; +5 2;6;901; +Name: radio_stations_xb, dtype: object + +``intersect`` +------------- + +One way to help simplify complex logical conditions, especially when +they are in some way repetitive, is to use ``intersect``, which +accepts any logical statement and forces every condition in the mapper +to become the intersection of both it and the intersect condition. + +For example, we could limit our recode to males by giving a logical +condition to that effect to ``intersect``: + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: frange('1-13'), +... 902: frange('14-20'), +... 903: frange('21-25'), +... 904: {'age': frange('18-34')} +... }, +... default='radio_stations', +... intersect={'gender': [1]} +... ) + +``initialize`` +-------------- + +You may also ``initialize`` your copy of the target column as part of your +recode operation. You can ``initalize`` with either np.NaN (to overwrite +anything that may already be there when your recode begins) or by naming +another column. When you name another column a copy of the data from that +column is used to initialize your recode. + +Initialization occurs **before** your recode. + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: frange('1-13'), +... 902: frange('14-20'), +... 903: frange('21-25'), +... 904: {'age': frange('18-34')} +... }, +... default='radio_stations', +... initialize=np.NaN +... ) + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: frange('1-13'), +... 902: frange('14-20'), +... 903: frange('21-25'), +... 904: {'age': frange('18-34')} +... }, +... default='radio_stations', +... initialize='radio_stations' +... ) + +``fillna`` +---------- + +You may also provide a ``fillna`` value that will be used as per +``pd.Series.fillna()`` **after** the recode has been performed. + +>>> recoded = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: frange('1-13'), +... 902: frange('14-20'), +... 903: frange('21-25'), +... 904: {'age': frange('18-34')} +... }, +... default='radio_stations', +... initialize=np.NaN, +... fillna=99 +... ) + +---------------------- +Custom recode examples +---------------------- + +Building a net code +------------------- +Here's an example of copying an existing question and recoding onto it a +net code. + +Create the new metadata: + +>>> meta['columns']['radio_stations_xb'] = copy.copy( +... meta['columns']['radio_stations'] +... ) +>>> meta['columns']['radio_stations_xb']['values'].append( +... { +... "value": 901, +... "text": {"en-GB": "NET: Listened to radio in past 30 days"} +... } +... ) + +Initialize the new column. In this case we're starting with a copy of +the ``radio_stations`` column: + +>>> data['radio_stations_xb'] = data['radio_stations'].copy() + +Recode the new column by appending the code 901 to it as indicated +by the mapper: + +>>> data['radio_stations_xb'] = recode( +... meta, data, +... target='radio_stations_xb', +... mapper={ +... 901: {'radio_stations': frange('1-23, 92, 94, 141')} +... }, +... append=True +... ) + +Check the result: + +>>> data[['radio_stations', 'radio_stations_xb']].head(20) + radio_stations radio_stations_cb +0 5; 5;901; +1 97; 97; +2 97; 97; +3 97; 97; +4 97; 97; +5 4; 4;901; +6 11; 11;901; +7 4; 4;901; +8 97; 97; +9 97; 97; +10 97; 97; +11 92; 92;901; +12 97; 97; +13 1;13;17; 1;13;17;901; +14 6; 6;901; +15 1;5;6;10; 1;5;6;10;901; +16 6; 6;901; +17 2;4;16; 2;4;16;901; +18 6;10; 6;10;901; +19 6; 6;901; + +Create-and-fill +--------------- + +Here's an example where the value 1 is generated based on some logic +and then all remaining cases are given the value 2 using the +pandas.Series.fillna() method. + +Create the new metadata + +>>> meta['columns']['age_xb'] = { +... 'type': 'single', +... 'text': {'en-GB': 'Age'}, +... 'values': [ +... {'value': 1, 'text': {'en-GB': '16-25'}}, +... {'value': 2, 'text': {'en-GB': 'Others'}} +... ] +... } + +Initialize the new column: + +>>> data['age_xb'] = np.NaN + +Recode the new column: + +>>> data['age_xb'] = recode( +... meta, data, +... target='age_xb', +... mapper={ +... 1: {'age': frange('16-40')} +... } +... ) + +Fill all cases that are still empty with the value 2: + +>>> data['age_xb'].fillna(2, inplace=True) + +Check the result: + +>>> data[['age', 'age_xb']].head(20) + age age_grp_rc +0 22 1 +1 68 2 +2 32 1 +3 44 2 +4 33 1 +5 52 2 +6 54 2 +7 44 2 +8 62 2 +9 49 2 +10 64 2 +11 73 2 +12 43 2 +13 28 1 +14 66 2 +15 39 1 +16 51 2 +17 50 2 +18 77 2 +19 42 2 + +Numerical banding +----------------- + +Here's a typical example of recoding age into custom bands. + +In this case we're using list comprehension to generate the first ten +values objects and then concatenate that with a final '65+' value object +which doesn't folow the same label format. + +Create the new metadata: + +>>> meta['columns']['age_xb_1'] = { +... 'type': 'single', +... 'text': {'en-GB': 'Age'}, +... 'values': [ +... { +... 'value': i, +... 'text': {'en-GB': '{}-{}'.format(r[0], r[1])} +... } +... for i, r in enumerate( +... [ +... [18, 20], +... [21, 25], [26, 30], +... [31, 35], [36, 40], +... [41, 45], [46, 50], +... [51, 55], [56, 60], +... [61, 65] +... ], +... start=1 +... ) +... ] + [ +... { +... 'value': 11, +... 'text': {'en-GB': '65+'} +... } +... ] +... } + +Initialize the new column: + +>>> data['age_xb_1'] = np.NaN + +Recode the new column: + +>>> data['age_xb_1'] = recode( +... meta, data, +... target='age_xb_1', +... mapper={ +... 1: frange('18-20'), +... 2: frange('21-25'), +... 3: frange('26-30'), +... 4: frange('31-35'), +... 5: frange('36-40'), +... 6: frange('41-45'), +... 7: frange('46-50'), +... 8: frange('51-55'), +... 9: frange('56-60'), +... 10: frange('61-65'), +... 11: frange('66-99') +... }, +... default='age' +... ) + +Check the result: + +>>> data[['age', 'age_xb_1']].head(20) + age age_cb +0 22 2 +1 68 11 +2 32 4 +3 44 6 +4 33 4 +5 52 8 +6 54 8 +7 44 6 +8 62 10 +9 49 7 +10 64 10 +11 73 11 +12 43 6 +13 28 3 +14 66 11 +15 39 5 +16 51 8 +17 50 7 +18 77 11 +19 42 6 + +Complicated segmentation +------------------------ + +Here's an example of using a complicated, nested series of logic +statements to recode an obscure segmentation. + +The segemenation was given with the following definition: + +**1 - Self-directed:** + +- If q1_1 in [1,2] and q1_2 in [1,2] and q1_3 in [3,4,5] + +**2 - Validators:** + +- If q1_1 in [1,2] and q1_2 in [1,2] and q1_3 in [1,2] + +**3 - Delegators:** + +- If (q1_1 in [3,4,5] and q1_2 in [3,4,5] and q1_3 in [1,2]) +- Or (q1_1 in [3,4,5] and q1_2 in [1,2] and q1_3 in [1,2]) +- Or (q1_1 in [1,2] and q1_2 in [3,4,5] and q1_3 in [1,2]) + +**4 - Avoiders:** + +- If (q1_1 in [3,4,5] and q1_2 in [3,4,5] and q1_3 in [3,4,5]) +- Or (q1_1 in [3,4,5] and q1_2 in [1,2] and q1_3 in [3,4,5]) +- Or (q1_1 in [1,2] and q1_2 in [3,4,5] and q1_3 in [3,4,5]) + +**5 - Others:** + +- Everyone else. + +Create the new metadata: + +>>> meta['columns']['segments'] = { +... 'type': 'single', +... 'text': {'en-GB': 'Segments'}, +... 'values': [ +... {'value': 1, 'text': {'en-GB': 'Self-directed'}}, +... {'value': 2, 'text': {'en-GB': 'Validators'}}, +... {'value': 3, 'text': {'en-GB': 'Delegators'}}, +... {'value': 4, 'text': {'en-GB': 'Avoiders'}}, +... {'value': 5, 'text': {'en-GB': 'Other'}}, +... ] +... } + +Initialize the new column? + +>>> data['segments'] = np.NaN + +Create the mapper separately, since it's pretty massive! + +See the **Complex logic** section for more information and examples +related to the use of ``union`` and ``intersection``. + +>>> mapper = { +... 1: intersection([ +... {"q1_1": [1, 2]}, +... {"q1_2": [1, 2]}, +... {"q1_3": [3, 4, 5]} +... ]), +... 2: intersection([ +... {"q1_1": [1, 2]}, +... {"q1_2": [1, 2]}, +... {"q1_3": [1, 2]} +... ]), +... 3: union([ +... intersection([ +... {"q1_1": [3, 4, 5]}, +... {"q1_2": [3, 4, 5]}, +... {"q1_3": [1, 2]} +... ]), +... intersection([ +... {"q1_1": [3, 4, 5]}, +... {"q1_2": [1, 2]}, +... {"q1_3": [1, 2]} +... ]), +... intersection([ +... {"q1_1": [1, 2]}, +... {"q1_2": [3, 4, 5]}, +... {"q1_3": [1, 2]} +... ]), +... ]), +... 4: union([ +... intersection([ +... {"q1_1": [3, 4, 5]}, +... {"q1_2": [3, 4, 5]}, +... {"q1_3": [3, 4, 5]} +... ]), +... intersection([ +... {"q1_1": [3, 4, 5]}, +... {"q1_2": [1, 2]}, +... {"q1_3": [3, 4, 5]} +... ]), +... intersection([ +... {"q1_1": [1, 2]}, +... {"q1_2": [3, 4, 5]}, +... {"q1_3": [3, 4, 5]} +... ]) +... ]) +... } + +Recode the new column: + +>>> data['segments'] = recode( +... meta, data, +... target='segments', +... mapper=mapper +... ) + +.. note:: + Anything not at the top level of the mapper will not benefit from using + the ``default`` parameter of the recode function. In this case, for example, + saying ``default='q1_1'`` would not have helped. Everything in a nested level + of the mapper, including anything in a ``union`` or ``intersection`` list, + must use the explicit dict form ``{"q1_1": [1, 2]}``. + +Fill all cases that are still empty with the value 5: + +>>> data['segments'].fillna(5, inplace=True) + +Check the result: + +>>> data[['q1_1', 'q1_2', 'q1_3', 'segments']].head(20) + q1_1 q1_2 q1_3 segments +0 3 3 3 4 +1 3 3 3 4 +2 1 1 3 1 +3 1 1 2 2 +4 2 2 2 2 +5 1 1 5 1 +6 2 3 2 3 +7 2 2 3 1 +8 1 1 4 1 +9 3 3 3 4 +10 3 3 4 4 +11 2 2 4 1 +12 1 1 5 1 +13 2 2 4 1 +14 1 1 1 2 +15 2 2 4 1 +16 2 2 3 1 +17 1 1 5 1 +18 5 5 1 3 +19 1 1 4 1 + + +Variable creation +----------------- + +Adding derived variables +------------------------ + +Interlocking variables +---------------------- + +Condition-based code removal +---------------------------- + + + diff --git a/docs/API/_build/html/_sources/sites/lib_doc/engine/00_overview.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/engine/00_overview.rst.txt new file mode 100644 index 000000000..ba0714fd0 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/engine/00_overview.rst.txt @@ -0,0 +1,12 @@ +---------------------- +Analysis & aggregation +---------------------- + +.. toctree:: + :maxdepth: 5 + :includehidden: + + 01_links_stacks + 02_quantity + 03_test + 04_agg_methods diff --git a/docs/API/_build/html/_sources/sites/lib_doc/engine/01_links_stacks.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/engine/01_links_stacks.rst.txt new file mode 100644 index 000000000..b29290cb6 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/engine/01_links_stacks.rst.txt @@ -0,0 +1,131 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +======================= +Collecting aggregations +======================= + +All computational results are collected in a so-called ``qp.Stack`` object which +acts as a container for large amount of aggregations in form of ``qp.Link``\s. + +---------------------- +What is a ``qp.Link?`` +---------------------- + +A ``qp.Link`` is defined by four attributes that make it unique and set how it is +stored in a ``qp.Stack``. These four attributes are ``data_key``, ``filter``, +``x`` (downbreak) and ``y`` (crossbreak), which are positioned in a ``qp.Stack`` +similar to a tree diagram: + + * Each ``Stack`` can have various ``data_key``\s. + * Each ``data_key`` can have various ``filter``\s. + * Each ``filter`` can have various ``x``\s. + * Each ``x`` can have various ``y``\s. + +Consequently ``qp.Stack[dk][filter][x][y]`` is one ``qp.Link`` that can be added +using ``add_link(self, data_keys=None, filters=['no_filter'], x=None, y=None, ...)`` + +``qp.Link``\s are are storing different ``qp.View``\s (frequencies, statistics, +etc. - all kinds of computations) that are applied on the same four data attributes. + +------------------------- +Populating a ``qp.Stack`` +------------------------- + +A ``qp.Stack`` is able to cope with a large amount of aggregations, so it is +impractical to add ``Link``\s one by one with repeated ``Stack.add_link()`` calls. +It is much easier to create a "construction plan" using a ``qp.Batch`` and +apply the settings saved in ``DataSet._meta['sets']['batches']`` to populate a +``qp.Stack`` instance. In the following, let's assume ``dataset`` is containing +the definitions of two ``qp.Batch``\es, a ``qp.Stack`` can be created running:: + + stack = dataset.populate(batches='all') + +For the ``Batch`` definitions from :doc:`here <../batch/00_overview>`, you +will get the following *construction plans*: + +>>> batch1 = dataset.get_batch('batch1') +>>> batch1.add_y_on_y('y_keys') + +>>> print batch1.x_y_map +OrderedDict([('q1', ['@', 'gender', 'q1', 'locality', 'ethnicity']), + ('q2', ['locality', 'ethnicity']), + ('q6', ['@']), + ('@', ['q6']), + (u'q6_1', ['@', 'gender', 'q1']), + (u'q6_2', ['@', 'gender', 'q1']), + (u'q6_3', ['@', 'gender', 'q1'])]) + +>>> print batch1.x_filter_map +OrderedDict([('q1', {'(men only)+(q1)': (, [{'gender': 1}, {'age': [20, 21, 22, 23, 24, 25]}])}), + ('q2', {'men only': {'gender': 1}}), + ('q6', {'men only': {'gender': 1}}), + ('q6_1', {'men only': {'gender': 1}}), + ('q6_2', {'men only': {'gender': 1}}), + ('q6_3', {'men only': {'gender': 1}})]) + +>>> batch2 = dataset.get_batch('batch2') + +>>> print batch2.x_y_map +OrderedDict([('q2b', ['@', 'gender'])]) + +>>> print batch2.x_filter_map +OrderedDict([('q2b', 'no_filter')]) + +As both ``Batch``\es refer to the same data file, the same ``data_key`` (in this +case the name of ``dataset``) is defining all ``Links``. + +After populating the ``Stack`` content can be viewed using ``.describe()``: + +>>> stack.describe() + data filter x y view # +0 Example Data (A) men only q1 q1 NaN 1 +1 Example Data (A) men only q1 @ NaN 1 +2 Example Data (A) men only q1 gender NaN 1 +3 Example Data (A) men only @ q6 NaN 1 +4 Example Data (A) men only q2 ethnicity NaN 1 +5 Example Data (A) men only q2 locality NaN 1 +6 Example Data (A) men only q6_1 q1 NaN 1 +7 Example Data (A) men only q6_1 @ NaN 1 +8 Example Data (A) men only q6_1 gender NaN 1 +9 Example Data (A) men only q6_2 q1 NaN 1 +10 Example Data (A) men only q6_2 @ NaN 1 +11 Example Data (A) men only q6_2 gender NaN 1 +12 Example Data (A) men only q6_3 q1 NaN 1 +13 Example Data (A) men only q6_3 @ NaN 1 +14 Example Data (A) men only q6_3 gender NaN 1 +15 Example Data (A) men only gender q1 NaN 1 +16 Example Data (A) men only gender @ NaN 1 +17 Example Data (A) men only gender gender NaN 1 +18 Example Data (A) men only q6 @ NaN 1 +19 Example Data (A) (men only)+(q1) q1 q1 NaN 1 +20 Example Data (A) (men only)+(q1) q1 @ NaN 1 +21 Example Data (A) (men only)+(q1) q1 locality NaN 1 +22 Example Data (A) (men only)+(q1) q1 ethnicity NaN 1 +23 Example Data (A) (men only)+(q1) q1 gender NaN 1 +24 Example Data (A) no_filter q2b @ NaN 1 +25 Example Data (A) no_filter q2b gender NaN 1 + +You can find all combinations defined in the ``x_y_map`` in the +``Stack`` structure, but also ``Link``\s like ``Stack['Example Data (A)']['men only']['gender']['gender']`` +are included. These special cases arising from the ``y_on_y`` setting. Sometimes +it is helpful to group a ``describe``-dataframe and create a cross-tabulation +of the four ``Link`` attributes to get a better overview, e.g. to see how many +``Links`` are included for each x-filter combination. +: + +>>> stack.describe('x', 'filter') +filter (men only)+(q1) men only no_filter +x +@ NaN 1.0 NaN +gender NaN 3.0 NaN +q1 5.0 3.0 NaN +q2 NaN 2.0 NaN +q2b NaN NaN 2.0 +q6 NaN 1.0 NaN +q6_1 NaN 3.0 NaN +q6_2 NaN 3.0 NaN +q6_3 NaN 3.0 NaN + + diff --git a/docs/API/_build/html/_sources/sites/lib_doc/engine/02_quantity.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/engine/02_quantity.rst.txt new file mode 100644 index 000000000..4f351499d --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/engine/02_quantity.rst.txt @@ -0,0 +1,8 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +======================== +The computational engine +======================== + diff --git a/docs/API/_build/html/_sources/sites/lib_doc/engine/03_test.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/engine/03_test.rst.txt new file mode 100644 index 000000000..83341abff --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/engine/03_test.rst.txt @@ -0,0 +1,7 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +==================== +Significance testing +==================== diff --git a/docs/API/_build/html/_sources/sites/lib_doc/engine/04_agg_methods.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/engine/04_agg_methods.rst.txt new file mode 100644 index 000000000..4dd5529b8 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/engine/04_agg_methods.rst.txt @@ -0,0 +1,465 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +================ +View aggregation +================ + +All following examples are working with a ``qp.Stack`` that was populated +from a ``qp.DataSet`` including the following ``qp.Batch`` definitions: + +>>> batch1 = dataset.get_batch('batch1') +>>> batch1.add_y_on_y('y_keys') + +>>> print batch1.x_y_map +OrderedDict([('q1', ['@', 'gender', 'q1', 'locality', 'ethnicity']), + ('q2', ['locality', 'ethnicity']), + ('q6', ['@']), + ('@', ['q6']), + (u'q6_1', ['@', 'gender', 'q1']), + (u'q6_2', ['@', 'gender', 'q1']), + (u'q6_3', ['@', 'gender', 'q1'])]) + +>>> print batch1.x_filter_map +OrderedDict([('q1', {'(men only)+(q1)': (, [{'gender': 1}, {'age': [20, 21, 22, 23, 24, 25]}])}), + ('q2', {'men only': {'gender': 1}}), + ('q6', {'men only': {'gender': 1}}), + ('q6_1', {'men only': {'gender': 1}}), + ('q6_2', {'men only': {'gender': 1}}), + ('q6_3', {'men only': {'gender': 1}})]) + +>>> print batch1.weights +['weight_a'] + +>>> batch2 = dataset.get_batch('batch2') + +>>> print batch2.x_y_map +OrderedDict([('q2b', ['@', 'gender'])]) + +>>> print batch2.x_filter_map +OrderedDict([('q2b', 'no_filter')]) + +>>> print batch2.weights +['weight'] + +----------- +Basic views +----------- + +It is possible to add various ``qp.View``\s to a ``Link``. This can be performed +by running ``Stack.add_link()`` providing ``View`` objects via the ``view`` parameter. +Alternatively, the ``qp.Batch`` definitions that are stored in the meta data +help to add basic ``View``\s (counts, percentages, bases and sums). By simply +running ``Stack.aggregate()`` we can easily add a large amount of aggregations +in one step. + +.. note:: + ``Stack.aggregate()`` can only be used with pre-populated ``Stack``\s! + (see :doc:`DataSet.populate() <01_links_stacks>`). + +For instance, we can add column percentages and (unweighted and weighted) base sizes +to all ``Link``\s of ``batch2`` like this: + +>>> stack.aggregate(views=['c%', 'cbase'], unweighted_base=True, batches='batch2', verbose=False) +>>> stack.describe() + data filter x y view # +0 Example Data (A) men only q1 q1 NaN 1 +1 Example Data (A) men only q1 @ NaN 1 +2 Example Data (A) men only q1 gender NaN 1 +3 Example Data (A) men only @ q6 NaN 1 +4 Example Data (A) men only q2 ethnicity NaN 1 +5 Example Data (A) men only q2 locality NaN 1 +6 Example Data (A) men only q6_1 q1 NaN 1 +7 Example Data (A) men only q6_1 @ NaN 1 +8 Example Data (A) men only q6_1 gender NaN 1 +9 Example Data (A) men only q6_2 q1 NaN 1 +10 Example Data (A) men only q6_2 @ NaN 1 +11 Example Data (A) men only q6_2 gender NaN 1 +12 Example Data (A) men only q6_3 q1 NaN 1 +13 Example Data (A) men only q6_3 @ NaN 1 +14 Example Data (A) men only q6_3 gender NaN 1 +15 Example Data (A) men only gender q1 NaN 1 +16 Example Data (A) men only gender @ NaN 1 +17 Example Data (A) men only gender gender NaN 1 +18 Example Data (A) men only q6 @ NaN 1 +19 Example Data (A) (men only)+(q1) q1 q1 NaN 1 +20 Example Data (A) (men only)+(q1) q1 @ NaN 1 +21 Example Data (A) (men only)+(q1) q1 locality NaN 1 +22 Example Data (A) (men only)+(q1) q1 ethnicity NaN 1 +23 Example Data (A) (men only)+(q1) q1 gender NaN 1 +24 Example Data (A) no_filter q2b @ x|f|:|y|weight|c% 1 +25 Example Data (A) no_filter q2b @ x|f|x:||weight|cbase 1 +26 Example Data (A) no_filter q2b @ x|f|x:|||cbase 1 +27 Example Data (A) no_filter q2b gender x|f|:|y|weight|c% 1 +28 Example Data (A) no_filter q2b gender x|f|x:||weight|cbase 1 +29 Example Data (A) no_filter q2b gender x|f|x:|||cbase 1 + +Obviously ``View``\s are only added to ``Link``\s defined by ``batch2`` and +automatically weighted according to the weight definition of ``batch2``, +which is evident from the view keys (``x|f|:|y|weight|c%``). Combining the information +of the four ``Link`` attributes with a view key, leads to a ``pd.DataFrame`` +and its belonging meta information: + +>>> link = stack['Example Data (A)']['no_filter']['q2b']['gender'] +>>> view_key = 'x|f|:|y|weight|c%' + +>>> link[view_key] +Question q2b +Values @ +Question Values +q2b 1 11.992144 + 2 80.802580 + 3 7.205276 + +>>> link[view_key].meta() +{ + "agg": { + "weights": "weight", + "name": "c%", + "grp_text_map": null, + "text": "", + "fullname": "x|f|:|y|weight|c%", + "is_weighted": true, + "method": "frequency", + "is_block": false + }, + "x": { + "is_array": false, + "name": "q2b", + "is_multi": false, + "is_nested": false + }, + "shape": [ + 3, + 1 + ], + "y": { + "is_array": false, + "name": "@", + "is_multi": false, + "is_nested": false + } +} + +Now we are adding ``View``\s to all ``batch1``-defined ``Link``\s as well: + +>>> stack.aggregate(views=['c%', 'counts', 'cbase'], unweighted_base=True, batches='batch1', verbose=False) +>>> stack.describe(['x', 'view'], 'y').loc[['@', 'q6'], ['@', 'q6']] +y @ q6 +x view +@ x|f|:|y|weight_a|c% NaN 1.0 + x|f|:||weight_a|counts NaN 1.0 +q6 x|f|:|y|weight_a|c% 1.0 NaN + x|f|:||weight_a|counts 1.0 NaN + +Even if unweighted bases are requested, they get skipped for array summaries +and transposed arrays. + +Since ``y_on_y`` is requested, for a variable used as cross- and downbreak, with an extended filter (in this +example ``q1``), two ``Link``\s with ``View``\s are created: + +>>> stack.describe(['y', 'filter', 'view'], 'x').loc['q1', 'q1'] +filter view +(men only)+(q1) x|f|:|y|weight_a|c% 1.0 + x|f|:||weight_a|counts 1.0 + x|f|x:||weight_a|cbase 1.0 + x|f|x:|||cbase 1.0 +men only x|f|:|y|weight_a|c% 1.0 + x|f|:||weight_a|counts 1.0 + x|f|x:||weight_a|cbase 1.0 + x|f|x:|||cbase 1.0 + +The first one is the aggregation defined by the ``Batch`` construction plan, +the second one shows the ``y_on_y`` aggregation using only the main +``Batch.filter``. + +------------------------- +Non-categorical variables +------------------------- + +>>> batch3 = dataset.add_batch('batch3') +>>> batch3.add_x('age') +>>> stack = dataset.populate('batch3') +>>> stack.describe() + data filter x y view # +0 Example Data (A) no_filter age @ NaN 1 + +Non-categorical variables (``ìnt`` or ``float``) are handled in a special way. +There are two options: + + * Treat them like categorical variables: + Append them to the parameter ``categorize``, then counts, percentage + and sum aggregations can be added alongside the ``cbase`` ``View``. + + >>> stack.aggregate(views=['c%', 'counts', 'cbase', 'counts_sum', 'c%_sum'], + unweighted_base=True, + categorize=['age'], + batches='batch3', + verbose=False) + + >>> stack.describe() + data filter x y view # + 0 Example Data (A) no_filter age @ x|f|:|||counts 1 + 1 Example Data (A) no_filter age @ x|f.c:f|x:|y||c%_sum 1 + 2 Example Data (A) no_filter age @ x|f|:|y||c% 1 + 3 Example Data (A) no_filter age @ x|f|x:|||cbase 1 + 4 Example Data (A) no_filter age @ x|f.c:f|x:|||counts_sum 1 + + * Do not categorize the variable: + Only ``cbase`` is created and additional descriptive statistics + ``View``\s must be added. The method will raise a warning: + + >>> stack.aggregate(views=['c%', 'counts', 'cbase', 'counts_sum', 'c%_sum'], + unweighted_base=True, + batches='batch3', + verbose=True) + Warning: Found 1 non-categorized numeric variable(s): ['age']. + Descriptive statistics must be added! + + >>> stack.describe() + data filter x y view # + 0 Example Data (A) no_filter age @ x|f|x:|||cbase 1 + +---------------------- +Descriptive statistics +---------------------- + +>>> b_name = 'batch4' +>>> batch4 = dataset.add_batch(b_name) +>>> batch4.add_x(['q2b', 'q6', 'age']) +>>> stack = dataset.populate(b_name) +>>> stack.aggregate(views=['counts', 'cbase'], batches=b_name, verbose=False) + +>>> stack.describe() + data filter x y view # +0 Example Data (A) no_filter q2b @ x|f|:|||counts 1 +1 Example Data (A) no_filter q2b @ x|f|x:|||cbase 1 +2 Example Data (A) no_filter q6_1 @ x|f|:|||counts 1 +3 Example Data (A) no_filter q6_1 @ x|f|x:|||cbase 1 +4 Example Data (A) no_filter q6_2 @ x|f|:|||counts 1 +5 Example Data (A) no_filter q6_2 @ x|f|x:|||cbase 1 +6 Example Data (A) no_filter q6_3 @ x|f|:|||counts 1 +7 Example Data (A) no_filter q6_3 @ x|f|x:|||cbase 1 +8 Example Data (A) no_filter age @ x|f|x:|||cbase 1 +9 Example Data (A) no_filter q6 @ x|f|:|||counts 1 +10 Example Data (A) no_filter q6 @ x|f|x:|||cbase 1 + +Adding descriptive statistics ``View``\s like mean, stddev, min, max, median, etc. +can be added with the method ``stack.add_stats()``. With the parameters +``other_source``, ``rescale`` and ``exclude`` you can specify the calculation. +Again each combination of the parameters refers to a unique view key. Note that +in ``on_vars`` included arrays get unrolled, that means also all belonging +array items get equipped with the added ``View``: + +>>> stack.add_stats(on_vars=['q2b', 'age'], stats='mean', _batches=b_name, verbose=False) +>>> stack.add_stats(on_vars=['q6'], stats='stddev', _batches=b_name, verbose=False) +>>> stack.add_stats(on_vars=['q2b'], stats='mean', rescale={1:100, 2:50, 3:0}, +... custom_text='rescale mean', _batches=b_name, verbose=False) + +>>> stack.describe('view', 'x') +x age q2b q6 q6_1 q6_2 q6_3 +view +x|d.mean|x:|||stat 1.0 1.0 NaN NaN NaN NaN +x|d.mean|x[{100,50,0}]:|||stat NaN 1.0 NaN NaN NaN NaN +x|d.stddev|x:|||stat NaN NaN 1.0 1.0 1.0 1.0 +x|f|:|||counts NaN 1.0 1.0 1.0 1.0 1.0 +x|f|x:|||cbase 1.0 1.0 1.0 1.0 1.0 1.0 + +---- +Nets +---- + +>>> b_name = 'batch5' +>>> batch5 = dataset.add_batch(b_name) +>>> batch5.add_x(['q2b', 'q6']) +>>> stack = dataset.populate(b_name) +>>> stack.aggregate(views=['counts', 'c%', 'cbase'], batches=b_name, verbose=False) + +>>> stack.describe('view', 'x') +x q2b q6 q6_1 q6_2 q6_3 +view +x|f|:|y||c% 1 1 1 1 1 +x|f|:|||counts 1 1 1 1 1 +x|f|x:|||cbase 1 1 1 1 1 + +Net-like ``View``\s can be added with the method ``Stack.add_nets()`` by defining +``net_map``\s for selected variables. There is a distinction between two different +types of net ``View``\s: + + * Expanded nets: + The existing counts or percentage ``View``\s are replaced with the new + net ``View``\s, which will the net-defining codes after or before the + computed net groups (i.e. "overcode" nets). + + >>> stack.add_nets('q2b', [{'Top2': [1, 2]}], expand='after', _batches=b_name, verbose=False) + + >>> stack.describe('view', 'x') + x q2b q6 q6_1 q6_2 q6_3 + view + x|f|:|y||c% NaN 1.0 1.0 1.0 1.0 + x|f|:|||counts NaN 1.0 1.0 1.0 1.0 + x|f|x:|||cbase 1.0 1.0 1.0 1.0 1.0 + x|f|x[{1,2}+]*:|y||net 1.0 NaN NaN NaN NaN + x|f|x[{1,2}+]*:|||net 1.0 NaN NaN NaN NaN + + * Not expanded nets: + The new net ``View``\s are added to the stack, which contain only the + computed net groups. + + >>> stack.add_nets('q2b', [{'Top2': [1, 2]}], _batches=b_name, verbose=False) + + >>> stack.describe('view', 'x') + x q2b q6 q6_1 q6_2 q6_3 + view + x|f|:|y||c% NaN 1.0 1.0 1.0 1.0 + x|f|:|||counts NaN 1.0 1.0 1.0 1.0 + x|f|x:|||cbase 1.0 1.0 1.0 1.0 1.0 + x|f|x[{1,2}+]*:|y||net 1.0 NaN NaN NaN NaN + x|f|x[{1,2}+]*:|||net 1.0 NaN NaN NaN NaN + x|f|x[{1,2}]:|y||net 1.0 NaN NaN NaN NaN + x|f|x[{1,2}]:|||net 1.0 NaN NaN NaN NaN + +The difference between the two net types are also visible in the view keys: +``x|f|x[{1,2}+]*:|||net`` versus ``x|f|x[{1,2}]:|||net``. + +~~~~~~~~~~~~~~~ +Net definitions +~~~~~~~~~~~~~~~ + +To create more complex net definitions the method ``quantipy.net()`` can be used, +which generates a well-formatted instruction dict and appends it to the ``net_map``. +It's a helper especially concerning including various texts with different +valid ``text_keys``. The next example shows how to prepare a net for 'q6' +(promoters, detractors): + +>>> q6_net = qp.net([], [1, 2, 3, 4, 5, 6], 'Promotors', ['en-GB', 'sv-SE']) +>>> q6_net = qp.net(q6_net, [9, 10], {'en-GB': 'Detractors', +... 'sv_SE': 'Detractors', +... 'de-DE': 'Kritiker'}) +>>> qp.net(q6_net[0], text='Promoter', text_key='de-DE') + +>>> print q6_net +[ + { + "1": [1, 2, 3, 4, 5, 6], + "text": { + "en-GB": "Promotors", + "sv-SE": "Promotors", + "de-DE": "Promoter" + } + }, + { + "2": [9, 10], + "text": { + "en-GB": "Detractors", + "sv_SE": "Detractors", + "de-DE": "Kritiker" + } + } +] + +~~~~~~~~~~~~ +Calculations +~~~~~~~~~~~~ + +``Stack.add_nets()`` has the parameter ``calc``, which allows adding ``View``\s +that are calculated out of the defined nets. The method ``qp.calc()`` is a +helper to create a well-formatted instruction dict for the calculation. +For instance, to calculate the NPS (*promoters* - *detractors*) for ``'q6'``, see the example +above and create the following calculation: + +>>> q6_calc = qp.calc((1, '-', 2), 'NPS', ['en-GB', 'sv-SE', 'de-DE']) + +>>> print q6_calc +OrderedDict([('calc', ('net_1', , 'net_2')), + ('calc_only', False), + ('text', {'en-GB': 'NPS', + 'sv-SE': 'NPS', + 'de-DE': 'NPS'})]) + +>>> stack.add_nets('q6', q6_net, calc=q6_calc, _batches=b_name, verbose=False) + +>>> stack.describe('view', 'x') +x q2b q6 q6_1 q6_2 q6_3 +view +x|f.c:f|x[{1,2,3,4,5,6}],x[{9,10}],x[{1,2,3,4,5... NaN 1.0 1.0 1.0 1.0 +x|f.c:f|x[{1,2,3,4,5,6}],x[{9,10}],x[{1,2,3,4,5... NaN 1.0 1.0 1.0 1.0 +x|f|:|y||c% NaN 1.0 1.0 1.0 1.0 +x|f|:|||counts NaN 1.0 1.0 1.0 1.0 +x|f|x:|||cbase 1.0 1.0 1.0 1.0 1.0 +x|f|x[{1,2}+]*:|y||net 1.0 NaN NaN NaN NaN +x|f|x[{1,2}+]*:|||net 1.0 NaN NaN NaN NaN +x|f|x[{1,2}]:|y||net 1.0 NaN NaN NaN NaN +x|f|x[{1,2}]:|||net 1.0 NaN NaN NaN NaN + +You can see that nets that are added on arrays are also applied for all array items. + +--------------- +Cumulative sums +--------------- + +Cumulative sum ``View``\s can be added to a specified collection of xks of the +``Stack`` using ``stack.cumulative_sum()``. These ``View``\s will always +replace the regular counts and percentage ``View``\s: + +>>> b_name = 'batch6' +>>> batch6 = dataset.add_batch(b_name) +>>> batch6.add_x(['q2b', 'q6']) +>>> stack = dataset.populate(b_name) +>>> stack.aggregate(views=['counts', 'c%', 'cbase'], batches=b_name, verbose=False) + +>>> stack.cumulative_sum('q6', verbose=False) + +>>> stack.describe('view', 'x') +x q2b q6 q6_1 q6_2 q6_3 +view +x|f.c:f|x++:|y||c%_cumsum NaN 1.0 1.0 1.0 1.0 +x|f.c:f|x++:|||counts_cumsum NaN 1.0 1.0 1.0 1.0 +x|f|:|y||c% 1.0 NaN NaN NaN NaN +x|f|:|||counts 1.0 NaN NaN NaN NaN +x|f|x:|||cbase 1.0 1.0 1.0 1.0 1.0 + +------------------ +Significance tests +------------------ + +>>> batch2 = dataset.get_batch('batch2') +>>> batch2.set_sigtests([0.05]) +>>> batch5 = dataset.get_batch('batch5') +>>> batch5.set_sigtests([0.01, 0.05]) +>>> stack = dataset.populate(['batch2', 'batch5']) + +>>> stack.aggregate(['counts', 'cbase'], batches=['batch2', 'batch5'], verbose=False) + +>>> stack.describe(['view', 'y'], 'x') +x q2b q6 q6_1 q6_2 q6_3 +view y +x|f|:||weight|counts @ 1.0 NaN NaN NaN NaN + gender 1.0 NaN NaN NaN NaN +x|f|:|||counts @ 1.0 1.0 1.0 1.0 1.0 +x|f|x:||weight|cbase @ 1.0 NaN NaN NaN NaN + gender 1.0 NaN NaN NaN NaN +x|f|x:|||cbase @ 1.0 1.0 1.0 1.0 1.0 + gender 1.0 NaN NaN NaN NaN + +Significance tests can only be added ``Batch``-wise, which also means that +significance levels must be defined for each ``Batch`` before running +``stack.add_tests()``. + +>>> stack.add_tests(['batch2', 'batch5'], verbose=False) + +>>> stack.describe(['view', 'y'], 'x') +x q2b q6 q6_1 q6_2 q6_3 +view y +x|f|:||weight|counts @ 1.0 NaN NaN NaN NaN + gender 1.0 NaN NaN NaN NaN +x|f|:|||counts @ 1.0 1.0 1.0 1.0 1.0 +x|f|x:||weight|cbase @ 1.0 NaN NaN NaN NaN + gender 1.0 NaN NaN NaN NaN +x|f|x:|||cbase @ 1.0 1.0 1.0 1.0 1.0 + gender 1.0 NaN NaN NaN NaN +x|t.props.Dim.01|:|||significance @ 1.0 NaN 1.0 1.0 1.0 +x|t.props.Dim.05|:||weight|significance @ 1.0 NaN NaN NaN NaN + gender 1.0 NaN NaN NaN NaN +x|t.props.Dim.05|:|||significance @ 1.0 NaN 1.0 1.0 1.0 diff --git a/docs/API/_build/html/_sources/sites/lib_doc/overview.rst.txt b/docs/API/_build/html/_sources/sites/lib_doc/overview.rst.txt new file mode 100644 index 000000000..eb91da666 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/lib_doc/overview.rst.txt @@ -0,0 +1,14 @@ +============= +Documentation +============= + +.. toctree:: + :maxdepth: 5 + :hidden: + + dataprocessing/00_overview + batch/00_overview + engine/00_overview + builds/00_overview + + diff --git a/docs/API/_build/html/_sources/sites/release_notes/00_overview.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/00_overview.rst.txt new file mode 100644 index 000000000..984dc1c01 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/00_overview.rst.txt @@ -0,0 +1,17 @@ +------------- +Release notes +------------- + +.. toctree:: + :maxdepth: 5 + :includehidden: + + 01_latest + 02_archive + + +.. toctree:: + :maxdepth: 5 + :hidden: + + 03_how_to_snippets \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/release_notes/01_latest.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/01_latest.rst.txt new file mode 100644 index 000000000..4eb81c383 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/01_latest.rst.txt @@ -0,0 +1,134 @@ +.. toctree:: + :maxdepth: 5 + :includehidden: + +=================== +Latest (01/10/2018) +=================== + +**New**: "rewrite" of Rules module (affecting sorting): + +**sorting "normal" columns**: + +* ``sort_on`` always '@' +* ``fix`` any categories +* ``sort_by_weight`` default is unweighted (None), but each weight (included +in data) can be used + +If sort_by_weight and the view-weight differ, a warning is shown. + +**sorting "expanded net" columns**: + +* ``sort_on`` always '@' +* ``fix`` any categories +* sorting ``within`` or ``between`` net groups is available +* ``sort_by_weight``: as default the weight of the first found +expanded-net-view is taken. Only weights of aggregated net-views are possible + +**sorting "array summaries"**: + +* ``sort_on`` can be any desc ('median', 'stddev', 'sem', 'max', 'min', +'mean', 'upper_q', 'lower_q') or nets ('net_1', 'net_2', .... enumerated +by the net_def) +* ``sort_by_weight``: as default the weight of the first found desc/net-view +is taken. Only weights of aggregated desc/net-views are possible +* ``sort_on`` can also be any category, here each weight can be used to sort_on + +"""" + +**New**: ``DataSet.min_value_count()`` + +A new wrapper for ``DataSet.hiding()`` is included. All values are hidden, +that have less counts than the included number ``min``. +The used data can be weighted or filtered using the parameters ``weight`` and +``condition``. + +Usage as Batch method: +``Batch.min_value_count()`` without the parameters ``weight`` and +``condition`` automatically grabs ``Batch.weights[0]`` and ``Batch.filter`` +to calculate low value counts. + +"""" + +**New**: Prevent weak duplicated in data + +As Python is case sensitive it is possible to have two or more variables with +the same name, but in lower- and uppercases. Most other software do not support +that, so a warning is shown if a weak dupe is created. Additionally +``Dataset.write_dimensions()`` performs auto-renaming is weak dupes are detected. + +"""" + +**New**: Prevent single-cat delimited sets + +``DataSet.add_meta(..., qtype='delimited set', categories=[...], ...)`` +automatically switches ``qtype`` to single if only one category is defined. +``DataSet.convert(name, 'single')`` allows conversion from ``delimited set`` to +``single`` if the variable has only one category. +``DataSet.repair()`` and ``DataSt.remove_values()`` convert delimited sets +automatically to singles if only one category is included. + +"""" + +**Update**: merge warnings + merging delimites sets + +Warnings in ``hmerge()`` and ``vmerge()`` are updated. If a column exists in +the left and the right dataset, the type is compared. Some type inconsistencies +are allowed, but return a warning, while others end up in a raise. + +delimited sets in ``vmerge()``: + +If a column is a delimited set in the left dataset, but a single, int or float +in the right dataset, the data of the right column is converted into a delimited +set. + +delimited sets in ``hmerge(...merge_existing=None)``: + +For the hmerge a new parameter ``merge_existing`` is included, which can be +``None``, a list of variable-names or ``'all'``. + +If delimited sets are included in left and right dataset: + +* ``merge_existing=None``: Only meta is adjusted. Data is untouched (left data +is taken). +* ``merge_existing='all'``: Meta and data are merged for all delimited sets, +that are included in both datasets. +* ``merge_existing=[variable-names]``: Meta and data are merged for all +delimited sets, that are listed and included in both datasets. + +"""" + +**Update**: encoding in ``DataSet.get_batch(name)`` + +The method is not that encoding sensitive anymore. It returns the depending +``Batch``, no matter if ``'...'``, ``u'...'`` or ``'...'.decode('utf8')`` is +included as name. + +"""" + +**Update**: warning in weight engine + +Missing codes in the sample are only alerted, if the belonging target is not 0. + +"""" + +**Update**: ``DataSet.to_array(..., variables, ...)`` + +Duplicated vars in ``variables`` are not allowed anymore, these were causing +problems in the ChainManager class. + +"""" + +**Update**: ``Batch.add_open_ends()`` + +Method raises an error if no vars are included in ``oe`` and ``break_by``. The +empty dataframe was causing issues in the ChainManager class. + +"""" + +**Update**: ``Batch.extend_x()`` + +The method automatically checks if the included variables are arrays and adds +them to ``Batch.summaries`` if they are included yet. + +"""" diff --git a/docs/API/_build/html/_sources/sites/release_notes/02_archive.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/02_archive.rst.txt new file mode 100644 index 000000000..d2099d199 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/02_archive.rst.txt @@ -0,0 +1,1642 @@ + + +.. toctree:: + :maxdepth: 5 + :hidden: + +====================== +Archived release notes +====================== + +--------------- +sd (04/06/2018) +--------------- + + + +**New**: Additional variable (names) "getter"-like and resolver methods + +* ``DataSet.created()`` +* ``DataSet.find(str_tags=None, suffixed=False)`` +* ``DataSet.names()`` +* ``DataSet.resolve_name()`` + +A bunch of new methods enhancing the options of finding and testing for variable +names have been added. ``created()`` will list all variables that have been added +to a dataset using core functions, i.e. ``add_meta()`` and ``derive()``, resp. +all helper methods that use them internally (as ``band()`` or ``categorize()`` do +for instance). + +The ``find()`` method is returning all variable names that contain any of the +provided substrings in ``str_tags``. To only consider names that end with these +strings, set ``suffixed=True``. If no ``str_tags`` are passed, the method will +use a default list of tags including ``['_rc', '_net', ' (categories', ' (NET', '_rec']``. + +Sometimes a dataset might contain "semi-duplicated" names, variables that differ +in respect to case sensitivity but have otherwise identical names. Calling +``names()`` will report such cases in a ``pd.DataFrame`` that lists all name +variants under the respective ``str.lower()`` version. If no semi-duplicates +are found, ``names()`` will simply return ``DataSet.variables()``. + +Lastly, ``resolve_name()`` can be used to return the "proper", existing representation(s) of a given variable name's spelling. + +"""" + +**New**: ``Batch.remove()`` + +Not needed batches can be removed from ``meta``, so they are not aggregated +anymore. + +"""" + +**New**: ``Batch.rename(new_name)`` + +Sometimes standard batches have long/ complex names. They can now be changed +into a custom name. Please take into account, that for most hubs the name of +omnibus batches should look like 'client ~ topic'. + +"""" + +**Update**: Handling verbatims in ``qp.Batch`` + +Instead of holding the well prepared open-end dataframe in ``batch.verbatims``, +the attribute is now filled by ``batch.add_open_ends()`` with instructions to +create the open-end dataframe. It is easier to to modify/ overwrite existing +verbatims. Therefore also a new parameter is included ``overwrite=True``. + +"""" + +**Update**: ``Batch.copy(..., b_filter=None, as_addition=False)`` + +It is now possible to define an additional filter for a copied batch and also +to set it as addition to the master batch. + +"""" + +**Update**: Regrouping the variable list using ``DataSet.order(..., regroup=True)`` + +A new parameter called ``regroup`` will instruct reordering all newly created +variables into their logical position of the dataset's main variable order, i.e. +attempting to place *derived* variables after the *originating* ones. + +"""" + +**Bugfix**: ``add_meta()`` and duplicated categorical ``values`` codes + +Providing duplicated numerical codes while attempting to create new metadata +using ``add_meta()`` will now correctly raise a ``ValueError`` to prevent +corrupting the ``DataSet``. + +>>> cats = [(1, 'A'), (2, 'B'), (3, 'C'), (3, 'D'), (2, 'AA')] +>>> dataset.add_meta('test_var', 'single', 'test label', cats) +ValueError: Cannot resolve category definition due to code duplicates: [2, 3] + + + +--------------- +sd (04/04/2018) +--------------- + + +**New**: Emptiness handlers in ``DataSet`` and ``Batch`` classes + +* ``DataSet.empty(name, condition=None)`` +* ``DataSet.empty_items(name, condition=None, by_name=True)`` +* ``DataSet.hide_empty_items(condition=None, arrays=None)`` +* ``Batch.hide_empty(xks=True, summaries=True)`` + +``empty()`` is used to test if regular variables are completely empty, +``empty_items()`` checks the same for the items of an array mask definition. +Both can be run on lists of variables. If a single variable is tested, the former +returns simply boolean, the latter will list all empty items. If lists are checked, +``empty()`` returns the sublist of empty variables, ``empty_items()`` is mapping +the list of empty items per array name. The ``condition`` parameter of these +methods takes a ``Quantipy logic`` expression to restrict the test to a subset +of the data, i.e. to check if variables will be empty if the dataset is filtered +a certain way. A very simple example: + +>>> dataset.add_meta('test_var', 'int', 'Variable is empty') +>>> dataset.empty('test_var') +True + +>>> dataset[dataset.take({'gender': 1}), 'test_var'] = 1 +>>> dataset.empty('test_var') +False + +>>> dataset.empty('test_var', {'gender': 2}) +True + + +The ``DataSet`` method ``hide_empty_items()`` uses the emptiness tests to +automatically apply a **hiding rule** on all empty items found in the dataset. +To restrict this to specific arrays only, their names can be provided via the +``arrays`` argument. ``Batch.hide_empty()`` takes into account the current +``Batch.filter`` setup and by drops/hides *all* relevant empty variables from the +``xks`` list and summary aggregations by default. Summaries that would end up without valid +items because of this are automatically removed from the ``summaries`` collection +and the user is warned. + +"""" + +**New**: ``qp.set_option('fast_stack_filters', True)`` + +A new option to enable a more efficient test for already existing filters +inside the ``qp.Stack`` object has been added. Set the ``'fast_stack_filters'`` +option to ``True`` to use it, the default is ``False`` to ensure compatibility +in different versions of production DP template workspaces. + +"""" + +**Update**: ``Stack.add_stats(..., factor_labels=True, ...)`` + +The parameter ``factor_labels`` is now also able to take the string ``'()'``, +then factors are written in the normal brackets next to the label (instead +of ``[]``). + +In the new version factor_labels are also just added if there are none included +before, except new scales are used. + +"""" + +**Bugfix**: ``DataSet`` ``np.NaN`` insertion to ``delimited_set`` variables + +``np.NaN`` was incorrectly transformed when inserted into ``delimited_set`` +before, leading to either ``numpy`` type conflicts or type casting exceptions. +This is now fixed. + + + +--------------- +sd (27/02/2018) +--------------- + +**New**: ``DataSet._dimensions_suffix`` + +``DataSet`` has a new attribute ``_dimensions_suffix``, which is used as mask +suffix while running ``DataSet.dimensionize()``. The default is ``_grid`` and +it can be modified with ``DataSet.set_dim_suffix()``. + +"""" + +**Update**: ``Stack._get_chain()`` (old chain) + +The method is speeded-up. If a filter is already included in the Stack, it is +not calculated from scratch anymore. Additionally the method has a new parameter +``described``, which takes a describing dataframe of the Stack, so it no longer +needs to be calculated in each loop. + +"""" +**Update**: ``Stack.add_nets()`` (recoded ``Views``) + +Nets that are applied on array variables will now also create a new recoded +array that reflects the net definitions if ``recoded`` is used. The +method has been creating only the item versions before. + +"""" + +**Update**: ``Stack.add_stats()`` + +The method will now create a new metadata property called ``'factor'`` for each +variable it is applied on. You can only have one factor assigned to one +categorical value, so for multiple statistic definitions (exclusions, etc.) +it will get overwritten. + +"""" + +**Update**: ``DataSet.from_batch()`` (``additions`` parameter) + +The ``additions`` parameter has been updated to also be able to create recoded +variables from existing "additional" Batches that are attached to a parent one. +Filter variables will get the new meta ``'properties'`` tag ``'recoded_filter'`` +and only have one category (``1``, ``'active'``). They are named simply +``'filter_1'``, ``'filter_2'`` and so on. The new possible values of the +parameters are now: + + * ``None``: ``as_addition()``-Batches are not considered. + * ``'variables'``: Only cross- and downbreak variables are considered. + * ``'filters'``: Only filters are recoded. + * ``'full'``: ``'variables'`` + ``'filters'`` + +"""" + +**Bugfix**: ``ViewManager._request_views()`` + +Cumulative sums are only requested if they are included in the belonging +``Stack``. Additionally the correct related sig-tests are now taken for +cumulative sums. + +--------------- +sd (12/01/2018) +--------------- + +**New**: ``Audit`` + +``Audit`` is a new class which takes ``DataSet`` instances, compares and aligns +them. + +The class compares/ reports/ aligns the following aspects: + + * datasets are valid (``DataSet.validate()``) + * mismatches (variables are not included in all datasets) + * different types (variables are in more than one dataset, but have different types) + * labels (variables are in more than one dataset, but have different labels for the same text_key) + * value codes (variables are in more than one dataset, but have different value codes) + * value texts (variables are in more than one dataset, but have different value texts) + * array items (arrays are in more than one dataset, but have different items) + * item labels (arrays are in more than one dataset, but their items have different labels) + +This is the first draft of the class, so it will need some testing and probably +adjustments. + +"""" + +**New**: ``DataSet.reorder_items(name, new_order)`` + +The new method reorders the items of the included array. The ints in the +``new_order`` list match up to the number of the items +(``DataSet.item_no('item_name')``), not to the position. + +"""" + +**New**: ``DataSet.valid_tks``, Arabic + +Arabic (``ar-AR``) is included as default valid text-key. + +"""" + +**New**: ``DataSet.extend_items(name, ext_items, text_key=None)`` + +The new method extends the items of an existing array. + +"""" + +**Update**: ``DataSet.set_missings()`` + +The method is now limited to ``DataSet``, ``Batch`` does not inherit it. + +"""" + +**Update**: ``DataSet`` + +The whole class is reordered and cleaned up. Some new deprecation warnings +will appear. + +"""" + +**Update**: ``DataSet.add_meta()`` / ``DataSet.derive()`` + +Both methods will now raise a ``ValueError: Duplicated codes provided. Value codes must be unique!`` +if categorical ``values`` definitions try to apply duplicated codes. + +"""" + +--------------- +sd (18/12/2017) +--------------- + + +**New**: ``Batch.remove_filter()`` + +Removes all defined (global + extended) filters from a Batch instance. + +"""" + +**Update**: ``Batch.add_filter()`` + +It's now possible to extend the global filter of a Batch instance. These options +are possible. + +Add first filter:: + + >>> batch.filter, batch.filter_names + 'no_filter', ['no_filter'] + >>> batch.add_filter('filter1', logic1) + >>> batch.filter, batch.filter_names + {'filter1': logic1}, ['filter1'] + +Extend filter:: + + >>> batch.filter, batch.filter_names + {'filter1': logic}, ['filter1'] + >>> batch.add_filter('filter2', logic2) + >>> batch.filter, batch.filter_names + {'filter1' + 'filter2': intersection([logic1, logic2])}, ['filter1' + 'filter2'] + +Replace filter:: + + >>> batch.filter, batch.filter_names + {'filter1': logic}, ['filter1'] + >>> batch.add_filter('filter1', logic2) + >>> batch.filter, batch.filter_names + {'filter1': logic2}, ['filter1'] + +"""" + +**Update**: ``Stack.add_stats(..., recode)`` + +The new parameter ``recode`` defines if a new numerical variable is created which +satisfies the stat definitions. + +"""" + +**Update**: ``DataSet.populate()`` + +A progress tracker is added to this method. + +"""" + +**Bugfix**: ``Batch.add_open_ends()`` + +``=`` is removed from all responsess in the included variables, as it causes +errors in the Excel-Painter. + +"""" + +**Bugfix**: ``Batch.extend_x()`` and ``Batch.extend_y()`` + +Check if included variables exist and unroll included masks. + +"""" + +**Bugfix**: ``Stack.add_nets(..., calc)`` + +If the operator in calc is ``div``/ ``/``, the calculation is now performed +correctly. + +"""" + +--------------- +sd (28/11/2017) +--------------- + +**New** ``DataSet.from_batch()`` + +Creates a new ``DataSet`` instance out of ``Batch`` definitions (xks, yks, +filter, weight, language, additions, edits). + +"""" + +**New**: ``Batch.add_total()`` + +Defines if total column ``@`` should be included in the downbreaks (yks). + +"""" + +**New**: ``Batch.set_unwgt_counts()`` + +If cellitems are ``cp`` and a weight is provided, it is possible to request +unweighted count views (percentages are still weighted). + +"""" + +**Update**: ``Batch.add_y_on_y(name, y_filter=None, main_filter='extend')`` + +Multiple ``y_on_y`` aggregations can now be added to a ``Batch`` instance +and each can have an own filter. The y_on_y-filter can ``extend`` or ``replace`` +the main_filter of the ``Batch``. + +"""" + +**Update**: ``Stack.add_nets(..., recode)`` + +The new parameter ``recode`` defines if a new variable is created which +satisfies the net definitions. Different options for ``recode`` are: + + * ``'extend_codes'``: The new variable contains all codes of the original + variable and all nets as new categories. + * ``'drop_codes'``: The new variable contains only all nets as new categories. + * ``'collect_codes'`` or ``'collect_codes@cat_name'``: The new variable contains + all nets as new categories and another new category which sums all cases that + are not in any net. The new category text can be defined by adding ``@cat_name`` + to ``collect_codes``. If none is provided ``Other`` is used as default. + +"""" + +**Update**: ``Stack.add_nets()`` + +If a variable in the ``Stack`` already has a net_view, it gets overwritten +if a new net is added. + +"""" + +**Update**: ``DataSet.set_missings(..., missing_map)`` + +The parameter ``missing_map`` can also handle lists now. All included +codes are be flagged as ``'exclude'``. + +"""" + +**Update**: ``request_views(..., sums='mid')`` (``ViewManager``/``query.py``) + +Allow different positions for sums in the view-order. They can be placed in +the middle (``'mid'``) between the basics/ nets and the stats or at the +``'bottom'`` after the stats. + +"""" + +**Update/ New**: ``write_dimensions()`` + +Converting qp data to mdd and ddf files by using ``write_dimensions()`` is +updated now. A bug regarding encoding texts is fixed and additionally all +included ``text_keys`` in the meta are transferred into the mdd. Therefore +two new classes are included: ``DimLabels`` and ``DimLabel``. + +--------------- +sd (13/11/2017) +--------------- + +**New** ``DataSet.to_delimited_set(name, label, variables, + from_dichotomous=True, codes_from_name=True)`` + +Creates a new delimited set variable out of other variables. If the input- +variables are dichotomous (``from_dichotomous``), the new value-codes can be +taken from the variable-names or from the order of the variables +(``codes_from_name``). + +"""" + +**Update** ``Stack.aggregate(..., bases={})`` + +A dictionary in form of:: + + bases = { + 'cbase': { + 'wgt': True, + 'unwgt': False}, + 'cbase_gross': { + 'wgt': True, + 'unwgt': True}, + 'ebase': { + 'wgt': False, + 'unwgt': False} + } + +defines what kind of bases will be aggregated. If ``bases`` is provided the +old parameter ``unweighted_base`` and any bases in the parameter ``views`` +will be ignored. If bases is not provided and any base is included in ``views``, +a dictionary is automatically created out of ``views`` and ``unweighted_base``. + +--------------- +sd (17/10/2017) +--------------- + + +**New**: ``del DataSet['var_name']`` and ``'var_name' in DataSet`` syntax support + +It is now possible to test membership of a variable name simply using the ``in`` +operator instead of ``DataSet.var_exists('var_name')`` and delete a variable definition +from ``DataSet`` using the ``del`` keyword inplace of the ``drop('var_name')`` +method. + +"""" + +**New**: ``DataSet.is_single(name)``, ``.is_delimited_set(name)``, ``.is_int(name)``, ``.is_float(name)``, ``.is_string(name)``, ``.is_date(name)``, ``.is_array(name)`` + +These new methods make testing a variable's type easy. + +"""" + +**Update**: ``DataSet.singles(array_items=True)`` and all other non-``array`` type iterators + +It is now possible to exclude ``array`` items from ``singles()``, ``delimited_sets()``, +``ints()`` and ``floats()`` variable lists by setting the new ``array_items`` +parameter to ``False``. + +"""" + +**Update**: ``Batch.set_sigtests(..., flags=None, test_total=None)``, ``Batch.sigproperties`` + +The significancetest-settings for flagging and testing against total, can now +be modified by the two parameters ``flags`` and ``test_total``. The ``Batch`` +attribute ``siglevels`` is removed, instead all sig-settings are stored +in ``Batch.sigproperties``. + +"""" + +**Update**: ``Batch.make_summaries(..., exclusive=False)``, ``Batch.skip_items`` + +The new parameter ``exclusive`` can take a list of arrays or a boolean. If a list +is included, these arrays are added to ``Batch.skip_items``, if it is True all +variables from ``Batch.summaries`` are added to ``Batch.skip_items`` + +"""" + +**Update**: ``quantipy.sandbox.sandbox.Chain.paint(..., totalize=True)`` + +If ``totalize`` is ``True``, ``@``-Total columns of a (x-oriented) ``Chain.dataframe`` +will be painted as ``'Total'`` instead of showing the corresponsing ``x``-variables +question text. + +"""" + +**Update**: ``quantipy.core.weights.Rim.Rake`` + +The weighting algorithm's ``generate_report()`` method can be caught up in a +``MemoryError`` for complex weight schemes run on very large sample sizes. This +is now prevented to ensure the weight factors are computed with priority and +the algorithm is able to terminate correctly. A warning is raised:: + + UserWarning: OOM: Could not finish writing report... + +"""" + +**Update**: ``Batch.replace_y()`` + +Conditional replacements of y-variables of a ``Batch`` will now always also +automatically add the ``@``-Total indicator if not provided. + +"""" + +**Bugfix**: ``DataSet.force_texts(..., overwrite=True)`` + +Forced overwriting of existing ``text_key`` meta data was failing for ``array`` +``mask`` objects. This is now solved. + +"""" + +--------------- +sd (15/09/2017) +--------------- + +**New**: ``DataSet.meta_to_json(key=None, collection=None)`` + +The new method allows saving parts of the metadata as a json file. The parameters +``key`` and ``collection`` define the metaobject which will be saved. + +"""" + +**New**: ``DataSet.save()`` and ``DataSet.revert()`` + +These two new methods are useful in interactive sessions like **Ipython** or +**Jupyter** notebooks. ``save()`` will make a temporary (only im memory, not +written to disk) copy of the ``DataSet`` and store its current state. You can +then use ``revert()`` to rollback to that snapshot of the data at a later +stage (e.g. a complex recode operation went wrong, reloading from the physical files takes +too long...). + +"""" + +**New**: ``DataSet.by_type(types=None)`` + +The ``by_type()`` method is replacing the soon to be deprecated implementation +of ``variables()`` (see below). It provides the same functionality +(``pd.DataFrame`` summary of variable types) as the latter. + +"""" + +**Update**: ``DataSet.variables()`` absorbs ``list_variables()`` and ``variables_from_set()`` + +In conjunction with the addition of ``by_type()``, ``variables()`` is +replacing the related ``list_variables()`` and ``variables_from_set()`` methods in order to offer a unified solution for querying the ``DataSet``\'s (main) variable collection. + +"""" + +**Update**: ``Batch.as_addition()`` + +The possibility to add multiple cell item iterations of one ``Batch`` definition +via that method has been reintroduced (it was working by accident in previous +versions with subtle side effects and then removed). Have fun! + +"""" + +**Update**: ``Batch.add_open_ends()`` + +The method will now raise an ``Exception`` if called on a ``Batch`` that has +been added to a parent one via ``as_addition()`` to warn the user and prevent +errors at the build stage:: + + NotImplementedError: Cannot add open end DataFrames to as_addition()-Batches! + +--------------- +sd (31/08/2017) +--------------- + +**New**: ``DataSet.code_from_label(..., exact=True)`` + +The new parameter ``exact`` is implemented. If ``exact=True`` codes are returned +whose belonging label is equal the included ``text_label``. Otherwise the +method checks if the labels contain the included ``text_label``. + +"""" + +**New**: ``DataSet.order(new_order=None, reposition=None)`` + +This new method can be used to change the global order of the ``DataSet`` +variables. You can either pass a complete ``new_order`` list of variable names to +set the order or provide a list of dictionaries to move (multiple) variables +before a reference variable name. The order is reflected in the case data +``pd.DataFrame.columns`` order and the meta ``'data file'`` ``set`` object's items. + +"""" + +**New**: ``DataSet.dichotomize(name, value_texts=None, keep_variable_text=True, ignore=None, replace=False, text_key=None)`` + +Use this to convert a ``'delimited set'`` variable into a set of binary coded +``'single'`` variables. Variables will have the values 1/0 and by default use +``'Yes'`` / ``'No'`` as the corresponding labels. Use the ``value_texts`` +parameter to apply custom labels. + +"""" + +**New**: ``Batch.extend_x(ext_xks)`` + +The new method enables an easy extension of ``Batch.xks``. In ``ext_xks`` +included ``str`` are added at the end of ``Batch.xks``. Values of included +``dict``\s are positioned in front of the related key. + +"""" + +**Update**: ``Batch.extend_y(ext_yks, ...)`` + +The parameter ``ext_yks`` now also takes ``dict``\s, which define the position +of the additional ``yks``. + +"""" + +**Update**: ``Batch.add_open_ends(..., replacements)`` + +The new parameter ``replacements`` is implemented. The method loops over the +whole pd.DataFrame and replaces all keys of the included ``dict`` +with the belonging value. + +"""" + +**Update**: ``Stack.add_stats(..., other_source)`` + +Statistic views can now be added to delimited sets if ``other_source`` is used. +In this case ``other_source`` must be a single or numerical variable. + +"""" + +**Update**: ``DataSet.validate(..., spss_limits=False)`` + +The new parameter ``spss_limits`` is implemented. If ``spss_limits=True``, the +validate output dataframe is extended by 3 columns which show if the SPSS label +limitations are satisfied. + +"""" + +**Bugfix**: ``DataSet.convert()`` + +A bug that prevented conversions from ``single`` to numeric types has been fixed. + +"""" + +**Bugfix**: ``DataSet.add_meta()`` + +A bug that prevented the creation of numerical arrays outside of ``to.array()`` +has been fixed. It is now possible to create ``array`` metadata without providing +category references. + +"""" + +**Bugfix**: ``Stack.add_stats()`` + +Checking the statistic views is skipped now if no single typed variables are +included even if a checking cluster is provided. + +"""" + +**Bugfix**: ``Batch.copy()`` + +Instead of using a deepcopy of the ``Batch`` instance, a new instance is created +and filled with the attributes of the initial one. Then the copied instance can +be used as additional ``Batch``. + +"""" + +**Bugfix**: ``qp.core.builds.powerpoint`` + +Access to bar-chart series and colour-filling is now working for +different Powerpoint versions. Also a bug is fixed which came up in +``PowerPointpainter()`` for variables which have fixed categories and whose +values are located in ``lib``. + +--------------- +sd (24/07/2017) +--------------- + +**New**: ``qp.set_option()`` + +It is now possible to set library-wide settings registered in ``qp.OPTIONS`` +by providing the setting's name (key) and the desired value. Currently supported +are:: + + OPTIONS = { + 'new_rules': False, + 'new_chains': False, + 'short_item_texts': False + } + +So for example, to work with the currently refactored ``Chain`` interim class +we can use ``qp.set_options('new_chains', True)``. + +"""" + +**New**: ``qp.Batch()`` + +This is a new object aimed at defining and structuring aggregation and build +setups. Please see an :doc:`extensive overview here <../lib_doc/batch/00_overview>`. + +"""" + +**New**: ``Stack.aggregate()`` / ``add_nets()`` / ``add_stats()`` / ``add_tests()`` / ... + +Connected to the new ``Batch`` class, some new ``Stack`` methods to ease up +view creation have been added. You can :doc:`find the docs here <../lib_doc/engine/00_overview>`. + +"""" + +**New**: ``DataSet.populate()`` + +Use this to create a ``qp.Stack`` from ``Batch`` definitions. This connects the +``Batch`` and ``Stack`` objects; check out the :doc:`Batch <../lib_doc/batch/00_overview>` +and :doc:`Analysis & aggregation <../lib_doc/engine/00_overview>` docs. + +"""" + +**New**: ``DataSet.write_dimensions(path_mdd=None, path_ddf=None, text_key=None, mdm_lang='ENG', run=True, clean_up=True)`` + +It is now possible to directly convert a ``DataSet`` into a Dimensions .ddf/.mdd +file pair (given SPSS Data Collection Base Professional is installed on your +machine). By default, files will be saved to the same location in that the +``DataSet`` resides and keep its ``text_key``. + +"""" + +**New**: ``DataSet.repair()`` + +This new method can be used to try to fix common ``DataSet`` metadata problems +stemming from outdated versions, incorrect manual editing of the meta dictionary +or other inconsistencies. The method is checking and repairing following issues: + + * ``'name'`` is present for all variable metadata + * ``'source'`` and ``'subtype'`` references for array variables + * correct ``'lib'``-based ``'values'`` object for array variables + * ``text key``-dependent ``'x edits'`` / ``'y edits'`` meta data + * ``['data file']['items']`` set entries exist in ``'columns'`` / ``'masks'`` + +"""" + +**New**: ``DataSet.subset(variables=None, from_set=None, inplace=False)`` + +As a counterpart to ``filter()``, ``subset()`` can be used to create a new +``DataSet`` that contains only a selection of variables. The new variables +collection can be provided either as a list of names or by naming an already +existing set containing the desired variables. + +"""" + +**New**: ``DataSet.variables_from_set(setname)`` + +Get the list of variables belonging to the passed set indicated by +``setname``. + +"""" + +**New**: ``DataSet.is_like_numeric(name)`` + +A new method to test if all of a ``string`` variable's values can be converted +to a numerical (``int`` / ``float``) type. Returns a boolean ``True`` / ``False``. + +"""" + +**Update**: ``DataSet.convert()`` + +It is now possible to convert inplace from ``string`` to ``int`` / ``float`` if +the respective internal ``is_like_numeric()`` check identifies numeric-like values. + +"""" + +**Update**: ``DataSet.from_components(..., reset=True)``, ``DataSet.read_quantipy(..., reset=True)`` + +Loaded ``.json`` metadata dictionaries will get cleaned now by default from any +user-defined, non-native objects inside the ``'lib'`` and ``'sets'`` +collections. Set ``reset=False`` to keep any extra entires (restoring the old +behaviour). + +"""" + +**Update**: ``DataSet.from_components(data_df, meta_dict=None, ...)`` + +It is now possible to create a ``DataSet`` instance by providing a ``pd.DataFrame`` +alone, without any accompanying meta data. While reading in the case data, the meta +component will be created by inferring the proper ``Quantipy`` variable types +from the ``pandas`` ``dtype`` information. + +"""" + +**Update**: ``Quantity.swap(var, ..., update_axis_def=True)`` + +It is now possible to ``swap()`` the ``'x'`` variable of an array based ``Quantity``, +as long as the length oh the constructing ``'items'`` collection is identical. +In addition, the new parameter ``update_axis_def`` is now by default enforcing +an update of the axis defintions (``pd.DataFrame`` column names, etc) while +previously the method was keeping the original index and column names. The old +behaviour can be restored by setting the parameter to ``False``. + +*Array example*: + +>>> link = stack[name_data]['no_filter']['q5']['@'] +>>> q = qp.Quantity(link) +>>> q.summarize() +Array q5 +Questions q5_1 q5_2 q5_3 q5_4 q5_5 q5_6 +Question Values +q5 All 8255.000000 8255.000000 8255.000000 8255.000000 8255.000000 8255.000000 + mean 26.410297 22.260569 25.181466 39.842883 24.399758 28.972017 + stddev 40.415559 38.060583 40.018463 46.012205 40.537497 41.903322 + min 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 + 25% 3.000000 3.000000 3.000000 3.000000 1.000000 3.000000 + median 5.000000 3.000000 3.000000 5.000000 3.000000 5.000000 + 75% 5.000000 5.000000 5.000000 98.000000 5.000000 97.000000 + max 98.000000 98.000000 98.000000 98.000000 98.000000 98.000000 + +*Updated axis definiton*: + +>>> q.swap('q7', update_axis_def=True) +>>> q.summarize() +Array q7 +Questions q7_1 q7_2 q7_3 q7_4 q7_5 q7_6 +Question Values +q7 All 1195.000000 1413.000000 3378.000000 35.000000 43.000000 36.000000 + mean 5.782427 5.423213 5.795145 4.228571 4.558140 5.333333 + stddev 2.277894 2.157226 2.366247 2.073442 2.322789 2.552310 + min 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 + 25% 4.000000 4.000000 4.000000 3.000000 3.000000 3.000000 + median 6.000000 6.000000 6.000000 4.000000 4.000000 6.000000 + 75% 8.000000 7.000000 8.000000 6.000000 6.000000 7.750000 + max 9.000000 9.000000 9.000000 8.000000 9.000000 9.000000 + +*Original axis definiton*: + +>>> q = qp.Quantity(link) +>>> q.swap('q7', update_axis_def=False) +>>> q.summarize() +Array q5 +Questions q5_1 q5_2 q5_3 q5_4 q5_5 q5_6 +Question Values +q5 All 1195.000000 1413.000000 3378.000000 35.000000 43.000000 36.000000 + mean 5.782427 5.423213 5.795145 4.228571 4.558140 5.333333 + stddev 2.277894 2.157226 2.366247 2.073442 2.322789 2.552310 + min 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 + 25% 4.000000 4.000000 4.000000 3.000000 3.000000 3.000000 + median 6.000000 6.000000 6.000000 4.000000 4.000000 6.000000 + 75% 8.000000 7.000000 8.000000 6.000000 6.000000 7.750000 + max 9.000000 9.000000 9.000000 8.000000 9.000000 9.000000 + + +"""" + +**Update**: ``DataSet.merge_texts()`` + +The method will now always overwrite existing ``text_key`` meta, which makes it +possible to merge ``text``\s from meta of the same ``text_key`` as the master +``DataSet``. + +"""" + +**Bugfix**: ``DataSet.band()`` + +``band(new_name=None)``\'s automatic name generation was incorrectly creating +new variables with the name ``None_banded``. This is now fixed. + +"""" + +**Bugfix**: ``DataSet.copy()`` + +The method will now check if the name of the copy already exists in the +``DataSet`` and drop the referenced variable if found to prevent +inconsistencies. Additionally, it is not longer possible to copy isolated +``array`` items: + +>>> dataset.copy('q5_1') +NotImplementedError: Cannot make isolated copy of array item 'q5_1'. Please copy array variable 'q5' instead! + +--------------- +sd (08/06/2017) +--------------- + + + +**New**: ``DataSet.extend_valid_tks()``, ``DataSet.valid_tks`` + +``DataSet`` has a new attribute ``valid_tks`` that contains a list of all valid +textkeys. All methods that take a textkey as parameter are checked against that +list. + +If a datafile contains a special/ unusual textkey (for example ``'id-ID'`` or +``'zh-TW'``), the list can be extended with ``DataSet.extend_valid_tks()``. +This extension can also be used to create a textkey for special conditions, +for example to create texts only for powerpoint outputs:: + + >>> dataset.extend_valid_tks('pptx') + >>> dataset.force_texts('pptx', 'en-GB') + >>> dataset.set_variable_text('gender','Gender label for pptx', text_key='pptx') + +"""" + +**New**: Equal error messages + +All methods that use the parameters ``name``/``var``, ``text_key`` or +``axis_edit``/ ``axis`` now have a decorator that checks the provided values. +The following shows a few examples for the new error messages: + +``name`` & ``var``:: + + 'name' argument for meta() must be in ['columns', 'masks']. + q1 is not in ['columns', 'masks']. + +``text_key``:: + + 'en-gb' is not a valid text_key! Supported are: ['en-GB', 'da-DK', 'fi-FI', 'nb-NO', 'sv-SE', 'de-DE'] + +``axis_edit`` & ``axis``:: + + 'xs' is not a valid axis! Supported are: ['x', 'y'] + +"""" + +**New**: ``DataSet.repair_text_edits(text_key)`` + +This new method can be used in trackers, that were drawn up in an older ``Quantipy`` +version. Text objects can be repaired if are not well prepared, for example if +it looks like this:: + + {'en-GB': 'some English text', + 'sv_SE': 'some Swedish text', + 'x edits': 'new text'} + +``DataSet.repair_text_edits()`` loops over all text objects in the dataset and +matches the ``x edits`` and ``y edits`` texts to all included textkeys:: + + >>> dataset.repair_text_edits(['en-GB', 'sv-SE']) + {'en-GB': 'some English text', + 'sv_SE': 'some Swedish text', + 'x edits': {'en-GB': new text', 'sv-SE': 'new text'}} + +"""" + +**Update**: ``DataSet.meta()``/ ``.text()``/ ``.values()``/ ``.value_texts()``/ ``.items()``/ ``.item_texts()`` + +All these methods now can take the parameters ``text_key`` and ``axis_edit``. +The related text is taken from the meta information and shown in the output. +If a text key or axis edit is not included the text is returned as None. + +"""" + +**Update**: ``DataSet.compare(dataset, variables=None, strict=False, text_key=None)`` + +The method is totally updated, works more precise and contains a few new +features. Generally variables included in ``dataset`` are compared with +eponymous variables in the main ``DataSet`` instance. You can specify witch +``variables`` should be compared, if question/ value texts should be compared +``strict`` or not and for which ``text_key``. + +"""" + +**Update**: ``DataSet.validate(verbose=True)`` + +A few new features are tested now and the output has changed. Set ``verbose=True`` +to see the definitions of the different error columns:: + + name: column/mask name and meta[collection][var]['name'] are not identical + + q_label: text object is badly formated or has empty text mapping + + values: categorical var does not contain values, value text is badly + formated or has empty text mapping + + textkeys: dataset.text_key is not included or existing tks are not + consistent (also for parents) + + source: parents or items do not exist + + codes: codes in .data are not included in .meta + +"""" + +**Update**: ``DataSet.sorting()`` / ``.slicing()`` / ``.hiding()`` + +These methods will now also work on lists of variable names. + +"""" + +**Update**: ``DataSet.set_variable_text()``, ``Dataset.set_item_texts()`` + +If these methods are applied to an array item, the new variable text is also +included in the meta information of the parent array. The same works also the +other way around, if an array text is set, then the array item texts are modified. + +"""" + +**Update**: ``DataSet.__init__(self, name, dimensions_comp=True)`` + +A few new features are included to handle data coming from Crunch. While +initializing a new ``DataSet`` instance dimensions compatibility can be set to +False. In the custom template use ``t.get_qp_dataset(name, dim_comp=False)`` +in the load cells. + +"""" + +**Bugfix**: ``DataSet.hmerge()`` + +If ``right_on`` and ``left_on`` are used and ``right_on`` is also included in +the main file, it is not overwritten any more. + +--------------- +sd (17/05/2017) +--------------- + +**Update**: ``DataSet.set_variable_text(..., axis_edit=None)``, ``DataSet.set_value_texts(..., axis_edit=False)`` + +The new ``axis_edit`` argument can be used with one of ``'x'``, ``'y'`` or ``['x', 'y']`` to instruct a text metadata change that will only be visible in build exports. + +.. warning:: + In a future version ``set_col_text_edit()`` and ``set_val_text_text()`` will + be removed! The identical functionality is provided via this ``axis_edit`` parameter. + +"""" + +**Update**: ``DataSet.replace_texts(..., text_key=None)`` + +The method loops over all meta text objects and replaces unwanted strings. +It is now possible to perform the replacement only for specified ``text_key``\s. +If ``text_key=None`` the method replaces the strings for all ``text_key``\s. + +"""" + +**Update**: ``DataSet.force_texts(copy_to=None, copy_from=None, update_existing=False)`` + +The method is now only able to force texts for all meta text objects (for +single variables use the methods ``set_variable_text()`` and +``set_value_texts()``). + +"""" + +**Bugfix**: ``DataSet.copy()`` + +Copied variables get the tag ``created`` and can be listed with +``t.list_variables(dataset, 'created')``. + +"""" + +**Bugfix**: ``DataSet.hmerge()``, ``DataSet.vmerge()`` + +Array meta information in merged datafiles is now updated correctly. + +--------------- +sd (04/05/2017) +--------------- + +**New**: ``DataSet.var_exists()`` + +Returns True if the input variable/ list of variables are included in the +``DataSet`` instance, otherwise False. + +"""" + +**New**: ``DataSet.remove_html()``, ``DataSet.replace_texts(replace)`` + +The ``DataSet`` method ``clean_texts()`` has been removed and split into two +methods to make usage more clear: ``remove_html()`` will strip all ``text`` +metadata objects from any html and formatting tags. ``replace_texts()`` will +use a ``dict`` mapping of old to new ``str`` terms to change the matching +``text`` throughout the ``DataSet`` metadata. + +"""" + +**New**: ``DataSet.item_no(name)`` + +This method will return the positional index number of an array item, e.g.: + +>>> dataset.item_no('Q4A[{q4a_1}].Q4A_grid') +1 + +"""" + +**New**: ``QuantipyViews``: ``counts_cumsum``, ``c%_cumsum`` + +These two new views contain frequencies with cumulative sums which are computed +over the x-axis. + +"""" + +**Update**: ``DataSet.text(name, shorten=True)`` + +The new parameter ``shorten`` is now controlling if the variable ``text`` metadata +of array masks will be reported in short format, i.e. without the corresponding +mask label text. This is now also the default behaviour. + +"""" + +**Update**: ``DataSet.to_array()`` + +Created mask meta information now also contains keys ``parent`` and ``subtype``. +Variable names are compatible with crunch and dimensions meta: + +Example in Dimensions modus: + +>>> dataset.to_array('Q11', ['Q1', 'Q2', 'Q3', 'Q4', 'Q5'], 'label') + +The new grid is named ``'Q11.Q11_grid'`` and the source/column variables are +``'Q11[{Q1}].-Q11_grid'`` - ``'Q11[{Q5}].-Q11_grid'``. + +"""" + +**Bugfix**: ``DataSet.derotate()`` + +Meta is now Crunch and Dimensions compatible. Also mask meta information are updated. + +--------------- +sd (24/04/2017) +--------------- + +**Update**: ``DataSet.hiding(..., hide_values=True)`` + +The new parameter ``hide_values`` is only necessary if the input variable is a +mask. If ``False``, mask items are hidden, if ``True`` mask values are hidden +for all mask items and for array summary sheets. + +"""" + +**Bugfix**: ``DataSet.set_col_text_edit(name)`` + +If the input variable is an array item, the new column text is also added to +``meta['mask'][name]['items]``. + + +"""" + +**Bugfix**: ``DataSet.drop(name, ignore_items=False)`` + +If a mask is dropped, but the items are kept, all items are handled now as +individual variables and their meta information is not stored in ``meta['lib']`` +anymore. + +--------------- +sd (06/04/2017) +--------------- + +Only small adjustments. + +--------------- +sd (29/03/2017) +--------------- + +**New**: ``DataSet.codes_in_data(name)`` + +This method returns a list of codes that exist in the data of a variable. This +information can be used for more complex recodes, for example copying a variable, +but keeping only all categories with more than 50 ratings, e.g.: + +>>> valid_code = dataset.codes_in_data('varname') +>>> keep_code = [x for x in valid_code if dataset['varname'].value_counts()[x] > 49] +>>> dataset.copy('varname', 'rc', copy_only=keep_code) + +"""" + +**Update**: ``DataSet.copy(..., copy_not=None)`` + +The new parameter ``copy_not`` takes a list of codes that should be ignored +for the copied version of the provided variable. The metadata of the copy will +be reduced as well. + +"""" + +**Update**: ``DataSet.code_count()`` + +This method is now alligned with ``any()`` and ``all()`` in that it can be used +on ``'array'`` variables as well. In such a case, the resulting ``pandas.Series`` +is reporting the number of answer codes found across all items per case data +row, i.e.: + +>>> code_count = dataset.code_count('Q4A.Q4A_grid', count_only=[3, 4]) +>>> check = pd.concat([dataset['Q4A.Q4A_grid'], code_count], axis=1) +>>> check.head(10) + Q4A[{q4a_1}].Q4A_grid Q4A[{q4a_2}].Q4A_grid Q4A[{q4a_3}].Q4A_grid 0 +0 3.0 3.0 NaN 2 +1 NaN NaN NaN 0 +2 3.0 3.0 4.0 3 +3 5.0 4.0 2.0 1 +4 4.0 4.0 4.0 3 +5 4.0 5.0 4.0 2 +6 3.0 3.0 3.0 3 +7 4.0 4.0 4.0 3 +8 6.0 6.0 6.0 0 +9 4.0 5.0 5.0 1 + +"""" + +--------------- +sd (20/03/2017) +--------------- + +**New**: ``qp.DataSet(dimensions_comp=True)`` + +The ``DataSet`` class can now be explicitly run in a Dimensions compatibility +mode to control the naming conventions of ``array`` variables ("grids"). This +is also the default behaviour for now. This comes with a few changes related to +meta creation and variable access using ``DataSet`` methods. Please see a brief +case study on this topic :doc:`here `. + +"""" + +**New**: enriched ``items`` / ``masks`` meta data + +``masks`` will now also store the ``subtype`` (``single``, ``delimited set``, etc.) +while ``items`` elements will now contain a reference to the defining ``masks`` +entrie(s) in a new ``parent`` object. + +"""" + +**Update**: ``DataSet.weight(..., subset=None)`` + +Filters the dataset by giving a Quantipy complex logic expression and weights +only the remaining subset. + +"""" + +**Update**: Defining categorical ``values`` meta and ``array`` items + +Both ``values`` and ``items`` can now be created in three different ways when +working with the ``DataSet`` methods ``add_meta()``, ``extend_values()`` and +``derive()``: (1) Tuples that map element code to label, (2) only labels or (3) +only element codes. Please see quick guide on that :doc:`here ` + +--------------- +sd (07/03/2017) +--------------- + +**Update**: ``DataSet.code_count(..., count_not=None)`` + +The new parameter ``count_not`` can be used to restrict the set of codes feeding +into the resulting ``pd.Series`` by exclusion (while ``count_only`` restricts +by inclusion). + +"""" + +**Update**: ``DataSet.copy(..., copy_only=None)`` + +The new parameter ``copy_only`` takes a list of codes that should be included +for the copied version of the provided variable, all others will be ignored +and the metadata of the copy will be reduced as well. + +"""" + +**Bugfix**: ``DataSet.band()`` + +There was a bug that was causing the method to crash for negative values. It is +now possible to create negative single value bands, while negative ranges +(lower and/or upper bound < 0) will raise a ``ValueError``. + +"""" + +--------------- +sd (24/02/2017) +--------------- + +* Some minor bugfixes and updates. Please use latest version. + +"""" + +--------------- +sd (16/02/2017) +--------------- + +**New:** ``DataSet.derotate(levels, mapper, other=None, unique_key='identity', dropna=True)`` + +Create a derotated ("levelled", responses-to-cases) ``DataSet`` instance by +defining level variables, looped variables and other (simple) variables that +should be added. + +View more information on the topic :doc:`here `. + +"""" + +**New:** ``DataSet.to_array(name, variables, label)`` + +Combine ``column`` variables with identical ``values`` objects to an ``array`` +incl. all required ``meta['masks']`` information. + +"""" + +**Update:** ``DataSet.interlock(..., variables)`` + +It is now possible to add ``dict``\s to ``variables``. In these ``dict``\s a +``derive()``-like mapper can be included which will then create a temporary +variable for the interlocked result. Example: + +>>> variables = ['gender', +... {'agegrp': [(1, '18-34', {'age': frange('18-34')}), +... (2, '35-54', {'age': frange('35-54')}), +... (3, '55+', {'age': is_ge(55)})]}, +... 'region'] +>>> dataset.interlock('new_var', 'label', variables) + +"""" + +--------------- +sd (04/01/2017) +--------------- + +**New:** ``DataSet.flatten(name, codes, new_name=None, text_key=None)`` + +Creates a new ``delimited set`` variable that groups ``grid item`` answers to +categories. The ``items`` become ``values`` of the new variable. If an +``item`` contains one of the ``codes`` it will be counted towards the categorical +case data of the new variable. + +"""" + +**New:** ``DataSet.uncode(target, mapper, default=None, intersect=None, inplace=True)`` + +Remove codes from the ``target`` variable's data component if a logical +condition is satisfied. + +"""" + +**New:** ``DataSet.text(var, text_key=None)`` + +Returns the question text label (per ``text_key``) of a variable. + +"""" + +**New:** ``DataSet.unroll(varlist, keep=None, both=None)`` + +Replaces ``masks`` names inside ``varlist`` with their ``items``. Optionally, +individual ``masks`` can be excluded or kept inside the list. + +"""" + +**New:** ``DataSet.from_stack(stack, datakey=None)`` + +Create a ``quantipy.DataSet`` from the ``meta``, ``data``, ``data_key`` and +``filter`` definition of a ``quantipy.Stack`` instance. + +"""" + +--------------- +sd (8/12/2016) +--------------- + +**New:** + +``DataSet.from_excel(path_xlsx, merge=True, unique_key='identity')`` + +Returns a new ``DataSet`` instance with ``data`` from ``excel``. The ``meta`` +for all variables contains ``type='int'``. + +Example: ``new_ds = dataset.from_excel(path, True, 'identity')`` + +The function is able to modify ``dataset`` inplace by merging ``new_ds`` on +``identity``. + +"""" + +**Update:** + +``DataSet.copy(..., slicer=None)`` + +It is now possible to filter the data that statisfies the logical condition +provided in the ``slicer``. +Example: + +>>> dataset.copy('q1', 'rec', True, {'q1': not_any([99])}) + +"""" + +--------------- +sd (23/11/2016) +--------------- + +**Update:** + +``DataSet.rename(name, new_name=None, array_item=None)`` + +The function is able to rename ``columns``, ``masks`` or ``mask items``. +``maks items`` are changed by position. + +"""" + +**Update:** + +``DataSet.categorize(..., categorized_name=None)`` + +Provide a custom name string for ``categorized_name`` will change the default +name of the categorized variable from ``OLD_NAME#`` to the passed string. + + +"""" + +--------------- +sd (16/11/2016) +--------------- + +**New:** + +``DataSet.check_dupe(name='identity')`` + +Returns a list with duplicated values for the variable provided via ``name``. +Identifies for example duplicated identities. + +"""" + +**New:** + +``DataSet.start_meta(text_key=None)`` + +Creates an empty QP meta data document blueprint to add variable definitions to. + +"""" + +**Update:** + +.. code-block:: python + + DataSet.create_set(setname='new_set', based_on='data file', included=None, + ... excluded=None, strings='keep', arrays='both', replace=None, + ... overwrite=False) + +Add a new ``set`` to the ``meta['sets']`` object. Variables from an existing +``set`` (``based_on``) can be ``included`` to ``new_set`` or varibles can be +``excluded`` from ``based_on`` with customized lists of variables. +Control ``string`` variables and ``masks`` with the ``kwargs`` ``strings`` and +``arrays``. ``replace`` single variables in ``new_set`` with a ``dict`` . + +"""" + +**Update:** + +``DataSet.from_components(..., text_key=None)`` + +Will now accept a ``text_key`` in the method call. If querying a ``text_key`` +from the meta component fails, the method will no longer crash, but raise a +``warning`` and set the ``text_key`` to ``None``. + +"""" + +**Update:** + +.. line-block:: + + ``DataSet.as_float()`` + ``DataSet.as_int()`` + ``DataSet.as_single()`` + ``DataSet.as_delimited_set()`` + ``DataSet.as_string()`` + ``DataSet.band_numerical()`` + ``DataSet.derive_categorical()`` + ``DataSet.set_mask_text()`` + ``DataSet.set_column_text()`` + +These methods will now print a ``UserWarning`` to prepare for the soon to +come removal of them. + +"""" + +**Bugfix:** + +``DataSet.__setitem__()`` + +Trying to set ``np.NaN`` was failing the test against meta data for categorical +variables and was raising a ``ValueError`` then. This is fixed now. + +"""" + +--------------- +sd (11/11/2016) +--------------- + +**New:** + +.. line-block:: + + ``DataSet.columns`` + ``DataSet.masks`` + ``DataSet.sets`` + ``DataSet.singles`` + ``DataSet.delimited_sets`` + ``DataSet.ints`` + ``DataSet.floats`` + ``DataSet.dates`` + ``DataSet.strings`` + +New ``DataSet`` instance attributes to quickly return the list of ``columns``, +``masks`` and ``sets`` objects from the meta or query the variables by +``type``. Use this to check for variables, iteration, inspection, ect. + +"""" + +**New:** + +``DataSet.categorize(name)`` + +Create a categorized version of ``int/string/date`` variables. New variables +will be named as per ``OLD_NAME#`` + +"""" + +**New:** + +``DataSet.convert(name, to)`` + +Wraps the individual ``as_TYPE()`` conversion methods. ``to`` must be one of +``'int', 'float', 'string', 'single', 'delimited set'``. + +"""" + +**New:** + +``DataSet.as_string(name)`` + +Only for completeness: Use ``DataSet.convert(name, to='string')`` instead. + +Converts ``int/float/single/date`` typed variables into a ``string`` and +removes all categorical metadata. + +"""" + +**Update:** + +``DataSet.add_meta()`` + +Can now add ``date`` and ``text`` type meta data. + +"""" + +**Bugfix:** + +``DataSet.vmerge()`` + +If ``masks`` in the right ``dataset``, that also exist in the left ``dataset``, +have new ``items`` or ``values``, they are added to ``meta['masks']``, +``meta['lib']`` and ``meta['sets']``. + +"""" + +--------------- +sd (09/11/2016) +--------------- + +**New:** + +``DataSet.as_float(name)`` + +Converts ``int/single`` typed variables into a ``float`` and removes +all categorical metadata. + +"""" + +**New:** + +``DataSet.as_int(name)`` + +Converts ``single`` typed variables into a ``int`` and removes +all categorical metadata. + +"""" + +**New:** + +``DataSet.as_single(name)`` + +Converts ``int`` typed variables into a ``single`` and adds numeric values as +categorical metadata. + +"""" + +**New:** + +``DataSet.create_set(name, variables, blacklist=None)`` + +Adds a new ``set`` to ``meta['sets']`` object. Create easily ``sets`` from +other ``sets`` while using customised ``blacklist``. + +"""" + +**New:** + +``DataSet.drop(name, ignore_items=False)`` + +Removes all metadata and data referenced to the variable. When passing an +``array mask``, ``ignore_items`` can be ste to ``True`` to keep the ``item +columns`` incl. their metadata. + +"""" + +**New:** + +``DataSet.compare(dataset=None, variables=None)`` + +Compare the metadata definition between the current and another ``dataset``, +optionally restricting to a pair of variables. + +"""" + +**Update:** + +``DataSet.__setitem__()`` + +``[..]``-Indexer now checks scalars against categorical meta. + diff --git a/docs/API/_build/html/_sources/sites/release_notes/03_how_to_snippets.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/03_how_to_snippets.rst.txt new file mode 100644 index 000000000..2b4fb423f --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/03_how_to_snippets.rst.txt @@ -0,0 +1,12 @@ + + +=============== +How-to-snippets +=============== + +.. toctree:: + :maxdepth: 5 + + how_to_snippets/dimensions_comp + how_to_snippets/create_categorical_meta + how_to_snippets/derotate \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/create_categorical_meta.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/create_categorical_meta.rst.txt new file mode 100644 index 000000000..857337f02 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/create_categorical_meta.rst.txt @@ -0,0 +1,66 @@ +.. toctree:: + :maxdepth: 5 + :hidden: + +============================================= +Different ways of creating categorical values +============================================= + +The ``DataSet`` methods ``add_meta()``, ``extend_values()`` and ``derive()`` +offer three alternatives for specifying the categorical values of ``'single'`` +and ``'delimited set'`` typed variables. The approaches differ with respect to +how the mapping of numerical value codes to value text labels is handled. + +**(1) Providing a list of text labels** + +By providing the category labels only as a list of ``str``, ``DataSet`` +is going to create the numerical codes by simple enumeration: + +>>> name, qtype, label = 'test_var', 'single', 'The test variable label' + +>>> cats = ['test_cat_1', 'test_cat_2', 'test_cat_3'] +>>> dataset.add_meta(name, qtype, label, cats) + +>>> dataset.meta('test_var') +single codes texts missing +test_var: The test variable label +1 1 test_cat_1 None +2 2 test_cat_2 None +3 3 test_cat_3 None + +**(2) Providing a list of numerical codes** + +If only the desired numerical codes are provided, the label information for all +categories consequently will appear blank. In such a case the user will, however, +get reminded to add the ``'text'`` meta in a separate step: + +>>> cats = [1, 2, 98] +>>> dataset.add_meta(name, qtype, label, cats) +...\\quantipy\core\dataset.py:1287: UserWarning: 'text' label information missing, +only numerical codes created for the values object. Remember to add value 'text' metadata manually! + +>>> dataset.meta('test_var') +single codes texts missing +test_var: The test variable label +1 1 None +2 2 None +3 98 None + +**(3) Pairing numerical codes with text labels** + +To explicitly assign codes to corresponding labels, categories can also be +defined as a list of tuples of codes and labels: + +>>> cats = [(1, 'test_cat_1') (2, 'test_cat_2'), (98, 'Don\'t know')] +>>> dataset.add_meta(name, qtype, label, cats) + +>>> dataset.meta('test_var') +single codes texts missing +test_var: The test variable label +1 1 test_cat_1 None +2 2 test_cat_2 None +3 98 Don't know None + +.. note:: + All three approaches are also valid for defining the ``items`` object for + ``array``-typed ``masks``. \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/derotate.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/derotate.rst.txt new file mode 100644 index 000000000..7e5abcc17 --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/derotate.rst.txt @@ -0,0 +1,182 @@ +.. toctree:: + :maxdepth: 5 + :hidden: + +========== +Derotation +========== + +------------------ +What is derotation +------------------ + +Derotation of ``data`` is necessary if brands, products or something similar +(**levels**) are assessed and each respondent (case) rates a different +selection of that levels. So each **case** has several **responses**. +Derotation now means, that the ``data`` is switched from case-level to +responses-level. + +**Example**: ``q1_1/q1_2``: On a scale from 1 to 10, how much do you like the +following drinks? + +| 1: water +| 2: cola +| 3: lemonade +| 4: beer +| + +**``data``** + ++-------+---------+---------+------+------+--------+ +| id | drink_1 | drink_2 | q1_1 | q1_2 | gender | ++-------+---------+---------+------+------+--------+ +| case1 | 1 | 3 | 2 | 8 | 1 | ++-------+---------+---------+------+------+--------+ +| case2 | 1 | 4 | 9 | 5 | 2 | ++-------+---------+---------+------+------+--------+ +| case3 | 2 | 4 | 6 | 10 | 1 | ++-------+---------+---------+------+------+--------+ + +**derotated ``data``** + ++-------+-------+----------------+----+--------+ +| | drink | drink_levelled | q1 | gender | ++-------+-------+----------------+----+--------+ +| case1 | 1 | 1 | 2 | 1 | ++-------+-------+----------------+----+--------+ +| case1 | 2 | 3 | 8 | 1 | ++-------+-------+----------------+----+--------+ +| case2 | 1 | 1 | 9 | 2 | ++-------+-------+----------------+----+--------+ +| case2 | 2 | 4 | 5 | 2 | ++-------+-------+----------------+----+--------+ +| case3 | 1 | 2 | 6 | 1 | ++-------+-------+----------------+----+--------+ +| case3 | 2 | 4 | 10 | 1 | ++-------+-------+----------------+----+--------+ + +To identify which case rates which levels, some key-/level-variables are +included in the ``data``, in this example ``drink_1`` and ``drink_2``. +Variables (for example ``gender``) that are not included to this loop can also +be added. + +--------------------------------- +How to use ``DataSet.derotate()`` +--------------------------------- + +The ``DataSet`` method takes a few parameters: + +* ``levels``: ``dict`` of ``list`` + + Contains all key-/level-variables and the name for the new levelled variable. + All key-/level-variables must have the same ``value_map``. + + >>> levels = {'drink': ['drink_1', 'drink_2']} + + +| + +* ``mapper``: ``list`` of ``dicts`` of ``list`` + + Contains the looped questions and the new ``column`` name to which the + looped questions will be combinded. + + >>> mapper = [{'q1': ['q1_1', 'q1_2']}] + +| + +* ``other``: ``str`` or ``list`` of ``str`` + + Contains all variables that should be assumed to the derotated ``data``, but + which are not included in the loop. + + >>> other = 'gender' + +| + +* ``unique_key``: ``str`` + + Name of varibale that identifies cases in the initial ``data``. + + >>> unique_key = 'id' + +| + +* ``dropna``: ``bool``, default ``True`` + + If a case rates less then the possible counts of levels, these responses + will be droped. + +>>> ds = dataset.derotate(levels = {'drink': ['drink_1', 'drink_2']}, +... mapper = [{'q1': ['q1_1', 'q1_2']}], +... other = 'gender', +... unique_key = 'id', +... dropna = True) + +---------------------- +What about ``arrays``? +---------------------- + +It is possible that also ``arrays`` are looped. In this case a mapper can look +like this: + +>>> mapper = [{'q12_1': ['q12a[{q12a_1}].q12a_grid', 'q12b[{q12b_1}].q12b_grid', +... 'q12c[{q12c_1}].q12c_grid', 'q12d[{q12d_1}].q12d_grid']}, +... {'q12_2': ['q12a[{q12a_2}].q12a_grid', 'q12b[{q12b_2}].q12b_grid', +... 'q12c[{q12c_2}].q12c_grid', 'q12d[{q12d_2}].q12d_grid']}, +... {'q12_3': ['q12a[{q12a_3}].q12a_grid', 'q12b[{q12b_3}].q12b_grid', +... 'q12c[{q12c_3}].q12c_grid', 'q12d[{q12d_3}].q12d_grid']}, +... {'q12_4': ['q12a[{q12a_4}].q12a_grid', 'q12b[{q12b_4}].q12b_grid', +... 'q12c[{q12c_4}].q12c_grid', 'q12d[{q12d_4}].q12d_grid']}, +... {'q12_5': ['q12a[{q12a_5}].q12a_grid', 'q12b[{q12b_5}].q12b_grid', +... 'q12c[{q12c_5}].q12c_grid', 'q12d[{q12d_5}].q12d_grid']}, +... {'q12_6': ['q12a[{q12a_6}].q12a_grid', 'q12b[{q12b_6}].q12b_grid', +... 'q12c[{q12c_6}].q12c_grid', 'q12d[{q12d_6}].q12d_grid']}, +... {'q12_7': ['q12a[{q12a_7}].q12a_grid', 'q12b[{q12b_7}].q12b_grid', +... 'q12c[{q12c_7}].q12c_grid', 'q12d[{q12d_7}].q12d_grid']}, +... {'q12_8': ['q12a[{q12a_8}].q12a_grid', 'q12b[{q12b_8}].q12b_grid', +... 'q12c[{q12c_8}].q12c_grid', 'q12d[{q12d_8}].q12d_grid']}, +... {'q12_9': ['q12a[{q12a_9}].q12a_grid', 'q12b[{q12b_9}].q12b_grid', +... 'q12c[{q12c_9}].q12c_grid', 'q12d[{q12d_9}].q12d_grid']}, +... {'q12_10': ['q12a[{q12a_10}].q12a_grid', 'q12b[{q12b_10}].q12b_grid', +... 'q12c[{q12c_10}].q12c_grid', 'q12d[{q12d_10}].q12d_grid']}, +... {'q12_11': ['q12a[{q12a_11}].q12a_grid', 'q12b[{q12b_11}].q12b_grid', +... 'q12c[{q12c_11}].q12c_grid', 'q12d[{q12d_11}].q12d_grid']}, +... {'q12_12': ['q12a[{q12a_12}].q12a_grid', 'q12b[{q12b_12}].q12b_grid', +... 'q12c[{q12c_12}].q12c_grid', 'q12d[{q12d_12}].q12d_grid']}, +... {'q12_13': ['q12a[{q12a_13}].q12a_grid', 'q12b[{q12b_13}].q12b_grid', +... 'q12c[{q12c_13}].q12c_grid', 'q12d[{q12d_13}].q12d_grid']}]] + +Can be also writen like this: + +>>> for y in frange('1-13'): +... q_group = [] +... for x in ['a', 'b', 'c', 'd']: +... var = 'q12{}'.format(x) +... var_grid = var + '[{' + var + '_{}'.format(y) + '}].' + var + '_grid' +... q_group.append(var_grid) +... mapper.append({'q12_{}'.format(y): q_group}) + +So the derotated ``dataset`` will lose its ``meta`` information about the +``mask`` and only the ``columns`` ``q12_1`` to ``q12_13`` will be added. To +receive back the ``mask`` structure, use the method ``dataset.to_array()``: + +>>> variables = [{'q12_1': u'label 1'}, +... {'q12_2': u'label 2'}, +... {'q12_3': u'label 3'}, +... {'q12_4': u'label 4'}, +... {'q12_5': u'label 5'}, +... {'q12_6': u'label 6'}, +... {'q12_7': u'label 7'}, +... {'q12_8': u'label 8'}, +... {'q12_9': u'label 9'}, +... {'q12_10': u'label 10'}, +... {'q12_11': u'label 11'}, +... {'q12_12': u'label 12'}, +... {'q12_13': u'label 13'}] +>>> ds.to_array('qTP', variables, 'Var_name') + +``variables`` can also be a list of variable-names, then the ``mask-items`` +will be named by its belonging ``columns``. + +``arrays`` included in ``other`` will keep their ``meta`` structure. \ No newline at end of file diff --git a/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/dimensions_comp.rst.txt b/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/dimensions_comp.rst.txt new file mode 100644 index 000000000..889f9208b --- /dev/null +++ b/docs/API/_build/html/_sources/sites/release_notes/how_to_snippets/dimensions_comp.rst.txt @@ -0,0 +1,110 @@ +.. toctree:: + :maxdepth: 5 + :hidden: + +==================================== +``DataSet`` Dimensions compatibility +==================================== + +DTO-downloaded and Dimensions converted variable naming conventions are following +specific rules for ``array`` names and corresponding ``ìtems``. ``DataSet`` +offers a compatibility mode for Dimensions scenarios and handles the proper +renaming automatically. Here is what you should know... + +---------------------- +The compatibility mode +---------------------- + +A ``DataSet`` will (by default) support Dimensions-like ``array`` naming for its connected data files when constructed. An ``array`` ``masks`` meta defintition +of a variable called ``q5`` looking like this...:: + + {u'items': [{u'source': u'columns@q5_1', u'text': {u'en-GB': u'Surfing'}}, + {u'source': u'columns@q5_2', u'text': {u'en-GB': u'Snowboarding'}}, + {u'source': u'columns@q5_3', u'text': {u'en-GB': u'Kite boarding'}}, + {u'source': u'columns@q5_4', u'text': {u'en-GB': u'Parachuting'}}, + {u'source': u'columns@q5_5', u'text': {u'en-GB': u'Cave diving'}}, + {u'source': u'columns@q5_6', u'text': {u'en-GB': u'Windsurfing'}}], + u'subtype': u'single', + u'text': {u'en-GB': u'How likely are you to do each of the following in the next year?'}, + u'type': u'array', + u'values': u'lib@values@q5'} + +...will be converted into its "Dimensions equivalent" as per: + +>>> dataset = qp.DataSet(name_data, dimensions_comp=True) +>>> dataset.read_quantipy(path_data+name_data, path_data+name_data) +DataSet: ../Data/Quantipy/Example Data (A) +rows: 8255 - columns: 75 +Dimensions compatibilty mode: True + +>>> dataset.masks() +['q5.q5_grid', 'q6.q6_grid', 'q7.q7_grid'] + +>>> dataset._meta['masks']['q5.q5_grid'] +{u'items': [{u'source': 'columns@q5[{q5_1}].q5_grid', + u'text': {u'en-GB': u'Surfing'}}, + {u'source': 'columns@q5[{q5_2}].q5_grid', + u'text': {u'en-GB': u'Snowboarding'}}, + {u'source': 'columns@q5[{q5_3}].q5_grid', + u'text': {u'en-GB': u'Kite boarding'}}, + {u'source': 'columns@q5[{q5_4}].q5_grid', + u'text': {u'en-GB': u'Parachuting'}}, + {u'source': 'columns@q5[{q5_5}].q5_grid', + u'text': {u'en-GB': u'Cave diving'}}, + {u'source': 'columns@q5[{q5_6}].q5_grid', + u'text': {u'en-GB': u'Windsurfing'}}], + 'name': 'q5.q5_grid', + u'subtype': u'single', + u'text': {u'en-GB': u'How likely are you to do each of the following in the next year?'}, + u'type': u'array', + u'values': 'lib@values@q5.q5_grid'} + +------------------------------------- +Accessing and creating ``array`` data +------------------------------------- + +Since new names are converted automatically by ``DataSet`` methods, there is +no need to write down the full (DTO-like) Dimensions ``array`` name when adding +new metadata. However, querying variables is always requiring the proper name: + +>>> name, qtype, label = 'array_var', 'single', 'ARRAY LABEL' +>>> cats = ['A', 'B', 'C'] +>>> items = ['1', '2', '3'] +>>> dataset.add_meta(name, qtype, label, cats, items) + +>>> dataset.masks() +['q5.q5_grid', 'array_var.array_var_grid', 'q6.q6_grid', 'q7.q7_grid'] + +>>> dataset.meta('array_var.array_var_grid') +single items item texts codes texts missing +array_var.array_var_grid: ARRAY LABEL +1 array_var[{array_var_1}].array_var_grid 1 1 A None +2 array_var[{array_var_2}].array_var_grid 2 2 B None +3 array_var[{array_var_3}].array_var_grid 3 3 C None + +>>> dataset['array_var.array_var_grid'].head(5) + array_var[{array_var_1}].array_var_grid array_var[{array_var_2}].array_var_grid array_var[{array_var_3}].array_var_grid +0 NaN NaN NaN +1 NaN NaN NaN +2 NaN NaN NaN +3 NaN NaN NaN +4 NaN NaN NaN + +As can been seen above, both the ``masks`` name as well as the ``array`` item +elements are being properly converted to match DTO/Dimensions +conventions. + +When using ``rename()``, ``copy()`` or ``transpose()``, the same behaviour +applies: + +>>> dataset.rename('q6.q6_grid', 'q6new') +>>> dataset.masks() +['q5.q5_grid', 'array_var.array_var_grid', 'q6new.q6new_grid', 'q7.q7_grid'] + +>>> dataset.copy('q6new.q6new_grid', suffix='q6copy') +>>> dataset.masks() +['q5.q5_grid', 'q6new_q6copy.q6new_q6copy_grid', 'array_var.array_var_grid', 'q6new.q6new_grid', 'q7.q7_grid'] + +>>> dataset.transpose('q6new_q6copy.q6new_q6copy_grid') +>>> dataset.masks() +['q5.q5_grid', 'q6new_q6copy_trans.q6new_q6copy_trans_grid', 'q6new_q6copy.q6new_q6copy_grid', 'array_var.array_var_grid', 'q6new.q6new_grid', 'q7.q7_grid'] \ No newline at end of file diff --git a/docs/API/_build/html/_static/basic.css b/docs/API/_build/html/_static/basic.css index 7ed0e58ed..6df76b0a6 100644 --- a/docs/API/_build/html/_static/basic.css +++ b/docs/API/_build/html/_static/basic.css @@ -4,7 +4,7 @@ * * Sphinx stylesheet -- basic theme. * - * :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -398,6 +398,13 @@ table.field-list td, table.field-list th { margin: 0; } +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + /* -- other body styles ----------------------------------------------------- */ ol.arabic { diff --git a/docs/API/_build/html/_static/doctools.js b/docs/API/_build/html/_static/doctools.js index 816349563..565497723 100644 --- a/docs/API/_build/html/_static/doctools.js +++ b/docs/API/_build/html/_static/doctools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for all documentation. * - * :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/API/_build/html/_static/searchtools.js b/docs/API/_build/html/_static/searchtools.js index bbfb3ac14..c82157380 100644 --- a/docs/API/_build/html/_static/searchtools.js +++ b/docs/API/_build/html/_static/searchtools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for the full-text search. * - * :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/API/_build/html/_static/underscore.js b/docs/API/_build/html/_static/underscore.js deleted file mode 100644 index 5b55f32be..000000000 --- a/docs/API/_build/html/_static/underscore.js +++ /dev/null @@ -1,31 +0,0 @@ -// Underscore.js 1.3.1 -// (c) 2009-2012 Jeremy Ashkenas, DocumentCloud Inc. -// Underscore is freely distributable under the MIT license. -// Portions of Underscore are inspired or borrowed from Prototype, -// Oliver Steele's Functional, and John Resig's Micro-Templating. -// For all details and documentation: -// http://documentcloud.github.com/underscore -(function(){function q(a,c,d){if(a===c)return a!==0||1/a==1/c;if(a==null||c==null)return a===c;if(a._chain)a=a._wrapped;if(c._chain)c=c._wrapped;if(a.isEqual&&b.isFunction(a.isEqual))return a.isEqual(c);if(c.isEqual&&b.isFunction(c.isEqual))return c.isEqual(a);var e=l.call(a);if(e!=l.call(c))return false;switch(e){case "[object String]":return a==String(c);case "[object Number]":return a!=+a?c!=+c:a==0?1/a==1/c:a==+c;case "[object Date]":case "[object Boolean]":return+a==+c;case "[object RegExp]":return a.source== -c.source&&a.global==c.global&&a.multiline==c.multiline&&a.ignoreCase==c.ignoreCase}if(typeof a!="object"||typeof c!="object")return false;for(var f=d.length;f--;)if(d[f]==a)return true;d.push(a);var f=0,g=true;if(e=="[object Array]"){if(f=a.length,g=f==c.length)for(;f--;)if(!(g=f in a==f in c&&q(a[f],c[f],d)))break}else{if("constructor"in a!="constructor"in c||a.constructor!=c.constructor)return false;for(var h in a)if(b.has(a,h)&&(f++,!(g=b.has(c,h)&&q(a[h],c[h],d))))break;if(g){for(h in c)if(b.has(c, -h)&&!f--)break;g=!f}}d.pop();return g}var r=this,G=r._,n={},k=Array.prototype,o=Object.prototype,i=k.slice,H=k.unshift,l=o.toString,I=o.hasOwnProperty,w=k.forEach,x=k.map,y=k.reduce,z=k.reduceRight,A=k.filter,B=k.every,C=k.some,p=k.indexOf,D=k.lastIndexOf,o=Array.isArray,J=Object.keys,s=Function.prototype.bind,b=function(a){return new m(a)};if(typeof exports!=="undefined"){if(typeof module!=="undefined"&&module.exports)exports=module.exports=b;exports._=b}else r._=b;b.VERSION="1.3.1";var j=b.each= -b.forEach=function(a,c,d){if(a!=null)if(w&&a.forEach===w)a.forEach(c,d);else if(a.length===+a.length)for(var e=0,f=a.length;e2;a== -null&&(a=[]);if(y&&a.reduce===y)return e&&(c=b.bind(c,e)),f?a.reduce(c,d):a.reduce(c);j(a,function(a,b,i){f?d=c.call(e,d,a,b,i):(d=a,f=true)});if(!f)throw new TypeError("Reduce of empty array with no initial value");return d};b.reduceRight=b.foldr=function(a,c,d,e){var f=arguments.length>2;a==null&&(a=[]);if(z&&a.reduceRight===z)return e&&(c=b.bind(c,e)),f?a.reduceRight(c,d):a.reduceRight(c);var g=b.toArray(a).reverse();e&&!f&&(c=b.bind(c,e));return f?b.reduce(g,c,d,e):b.reduce(g,c)};b.find=b.detect= -function(a,c,b){var e;E(a,function(a,g,h){if(c.call(b,a,g,h))return e=a,true});return e};b.filter=b.select=function(a,c,b){var e=[];if(a==null)return e;if(A&&a.filter===A)return a.filter(c,b);j(a,function(a,g,h){c.call(b,a,g,h)&&(e[e.length]=a)});return e};b.reject=function(a,c,b){var e=[];if(a==null)return e;j(a,function(a,g,h){c.call(b,a,g,h)||(e[e.length]=a)});return e};b.every=b.all=function(a,c,b){var e=true;if(a==null)return e;if(B&&a.every===B)return a.every(c,b);j(a,function(a,g,h){if(!(e= -e&&c.call(b,a,g,h)))return n});return e};var E=b.some=b.any=function(a,c,d){c||(c=b.identity);var e=false;if(a==null)return e;if(C&&a.some===C)return a.some(c,d);j(a,function(a,b,h){if(e||(e=c.call(d,a,b,h)))return n});return!!e};b.include=b.contains=function(a,c){var b=false;if(a==null)return b;return p&&a.indexOf===p?a.indexOf(c)!=-1:b=E(a,function(a){return a===c})};b.invoke=function(a,c){var d=i.call(arguments,2);return b.map(a,function(a){return(b.isFunction(c)?c||a:a[c]).apply(a,d)})};b.pluck= -function(a,c){return b.map(a,function(a){return a[c]})};b.max=function(a,c,d){if(!c&&b.isArray(a))return Math.max.apply(Math,a);if(!c&&b.isEmpty(a))return-Infinity;var e={computed:-Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b>=e.computed&&(e={value:a,computed:b})});return e.value};b.min=function(a,c,d){if(!c&&b.isArray(a))return Math.min.apply(Math,a);if(!c&&b.isEmpty(a))return Infinity;var e={computed:Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;bd?1:0}),"value")};b.groupBy=function(a,c){var d={},e=b.isFunction(c)?c:function(a){return a[c]};j(a,function(a,b){var c=e(a,b);(d[c]||(d[c]=[])).push(a)});return d};b.sortedIndex=function(a, -c,d){d||(d=b.identity);for(var e=0,f=a.length;e>1;d(a[g])=0})})};b.difference=function(a){var c=b.flatten(i.call(arguments,1));return b.filter(a,function(a){return!b.include(c,a)})};b.zip=function(){for(var a=i.call(arguments),c=b.max(b.pluck(a,"length")),d=Array(c),e=0;e=0;d--)b=[a[d].apply(this,b)];return b[0]}}; -b.after=function(a,b){return a<=0?b():function(){if(--a<1)return b.apply(this,arguments)}};b.keys=J||function(a){if(a!==Object(a))throw new TypeError("Invalid object");var c=[],d;for(d in a)b.has(a,d)&&(c[c.length]=d);return c};b.values=function(a){return b.map(a,b.identity)};b.functions=b.methods=function(a){var c=[],d;for(d in a)b.isFunction(a[d])&&c.push(d);return c.sort()};b.extend=function(a){j(i.call(arguments,1),function(b){for(var d in b)a[d]=b[d]});return a};b.defaults=function(a){j(i.call(arguments, -1),function(b){for(var d in b)a[d]==null&&(a[d]=b[d])});return a};b.clone=function(a){return!b.isObject(a)?a:b.isArray(a)?a.slice():b.extend({},a)};b.tap=function(a,b){b(a);return a};b.isEqual=function(a,b){return q(a,b,[])};b.isEmpty=function(a){if(b.isArray(a)||b.isString(a))return a.length===0;for(var c in a)if(b.has(a,c))return false;return true};b.isElement=function(a){return!!(a&&a.nodeType==1)};b.isArray=o||function(a){return l.call(a)=="[object Array]"};b.isObject=function(a){return a===Object(a)}; -b.isArguments=function(a){return l.call(a)=="[object Arguments]"};if(!b.isArguments(arguments))b.isArguments=function(a){return!(!a||!b.has(a,"callee"))};b.isFunction=function(a){return l.call(a)=="[object Function]"};b.isString=function(a){return l.call(a)=="[object String]"};b.isNumber=function(a){return l.call(a)=="[object Number]"};b.isNaN=function(a){return a!==a};b.isBoolean=function(a){return a===true||a===false||l.call(a)=="[object Boolean]"};b.isDate=function(a){return l.call(a)=="[object Date]"}; -b.isRegExp=function(a){return l.call(a)=="[object RegExp]"};b.isNull=function(a){return a===null};b.isUndefined=function(a){return a===void 0};b.has=function(a,b){return I.call(a,b)};b.noConflict=function(){r._=G;return this};b.identity=function(a){return a};b.times=function(a,b,d){for(var e=0;e/g,">").replace(/"/g,""").replace(/'/g,"'").replace(/\//g,"/")};b.mixin=function(a){j(b.functions(a), -function(c){K(c,b[c]=a[c])})};var L=0;b.uniqueId=function(a){var b=L++;return a?a+b:b};b.templateSettings={evaluate:/<%([\s\S]+?)%>/g,interpolate:/<%=([\s\S]+?)%>/g,escape:/<%-([\s\S]+?)%>/g};var t=/.^/,u=function(a){return a.replace(/\\\\/g,"\\").replace(/\\'/g,"'")};b.template=function(a,c){var d=b.templateSettings,d="var __p=[],print=function(){__p.push.apply(__p,arguments);};with(obj||{}){__p.push('"+a.replace(/\\/g,"\\\\").replace(/'/g,"\\'").replace(d.escape||t,function(a,b){return"',_.escape("+ -u(b)+"),'"}).replace(d.interpolate||t,function(a,b){return"',"+u(b)+",'"}).replace(d.evaluate||t,function(a,b){return"');"+u(b).replace(/[\r\n\t]/g," ")+";__p.push('"}).replace(/\r/g,"\\r").replace(/\n/g,"\\n").replace(/\t/g,"\\t")+"');}return __p.join('');",e=new Function("obj","_",d);return c?e(c,b):function(a){return e.call(this,a,b)}};b.chain=function(a){return b(a).chain()};var m=function(a){this._wrapped=a};b.prototype=m.prototype;var v=function(a,c){return c?b(a).chain():a},K=function(a,c){m.prototype[a]= -function(){var a=i.call(arguments);H.call(a,this._wrapped);return v(c.apply(b,a),this._chain)}};b.mixin(b);j("pop,push,reverse,shift,sort,splice,unshift".split(","),function(a){var b=k[a];m.prototype[a]=function(){var d=this._wrapped;b.apply(d,arguments);var e=d.length;(a=="shift"||a=="splice")&&e===0&&delete d[0];return v(d,this._chain)}});j(["concat","join","slice"],function(a){var b=k[a];m.prototype[a]=function(){return v(b.apply(this._wrapped,arguments),this._chain)}});m.prototype.chain=function(){this._chain= -true;return this};m.prototype.value=function(){return this._wrapped}}).call(this); diff --git a/docs/API/_build/html/_static/websupport.js b/docs/API/_build/html/_static/websupport.js index 98e7f40b6..53f6a4525 100644 --- a/docs/API/_build/html/_static/websupport.js +++ b/docs/API/_build/html/_static/websupport.js @@ -4,7 +4,7 @@ * * sphinx.websupport utilities for all documentation. * - * :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/API/_build/html/genindex.html b/docs/API/_build/html/genindex.html index 9730954ce..4a59564b2 100644 --- a/docs/API/_build/html/genindex.html +++ b/docs/API/_build/html/genindex.html @@ -90,9 +90,18 @@

Quick search

  • Release notes
      +
    • get_sig() (quantipy.Test method) +
    • get_statistic() (quantipy.Test method)
    • get_std_params() (quantipy.View method) @@ -566,6 +605,8 @@

      H

      @@ -645,14 +686,18 @@

      M

    • merge_texts() (quantipy.DataSet method)
    • - - + @@ -661,6 +706,8 @@

      M

      N

      @@ -672,6 +719,14 @@

      N

      +

      O

      + + +
      +

      P

      - + - + @@ -444,16 +454,16 @@

      QuantipyViews

      @@ -382,13 +392,13 @@

      Rimgroup_targets(group_targets)

      Set inter-group target proportions.

      This will scale the weight factors per group to match the desired group -proportions and thus effectively change each group’s weighted +proportions and thus effectively change each group’s weighted total number of cases.

      • to_array() (quantipy.DataSet method) +
      • +
      • to_delimited_set() (quantipy.DataSet method)
      • transpose() (quantipy.DataSet method)
      • @@ -833,6 +910,8 @@

        T

        U

        - - @@ -421,7 +430,7 @@

        Cluster

        - @@ -487,7 +496,7 @@

        Cluster - + diff --git a/docs/API/_build/html/sites/api_ref/DataSet.html b/docs/API/_build/html/sites/api_ref/DataSet.html new file mode 100644 index 000000000..8b2405e70 --- /dev/null +++ b/docs/API/_build/html/sites/api_ref/DataSet.html @@ -0,0 +1,3403 @@ + + + + + + + + + + + DataSet — Quantipy 0.1.3 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
        + + + + +
        + + + + + + +
        +
        + + + + + + +
        + +
        +
        +
        +
        + +
        +
        +
        +

        DataSet

        +
        +
        +class quantipy.DataSet(name, dimensions_comp=True)
        +

        A set of casedata (required) and meta data (optional).

        +

        DESC.

        +
        +
        +add_meta(name, qtype, label, categories=None, items=None, text_key=None, replace=True)
        +

        Create and insert a well-formed meta object into the existing meta document.

        +

          -
        • variables() (quantipy.DataSet method) +
        • variable_types() (quantipy.Stack method)
        • -
        • variables_from_set() (quantipy.DataSet method) +
        • variables() (quantipy.DataSet method)
        • View (class in quantipy)
        • @@ -947,7 +1024,7 @@

          W

          - + diff --git a/docs/API/_build/html/index.html b/docs/API/_build/html/index.html index e0199f47c..31ebf4ba6 100644 --- a/docs/API/_build/html/index.html +++ b/docs/API/_build/html/index.html @@ -90,9 +90,18 @@

          Quick search

          • Release notes
              -
            • Latest (15/09/2017)
            • +
            • Latest (01/10/2018)
            • Archived release notes
                -
              • Latest (31/08/2017)
              • +
              • sd (04/06/2018)
              • +
              • sd (04/04/2018)
              • +
              • sd (27/02/2018)
              • +
              • sd (12/01/2018)
              • +
              • sd (18/12/2017)
              • +
              • sd (28/11/2017)
              • +
              • sd (13/11/2017)
              • +
              • sd (17/10/2017)
              • +
              • sd (15/09/2017)
              • +
              • sd (31/08/2017)
              • sd (24/07/2017)
              • sd (08/06/2017)
              • sd (17/05/2017)
              • @@ -193,7 +202,7 @@

                Quick search

              • Complex logic @@ -429,7 +439,7 @@

                Key features - + diff --git a/docs/API/_build/html/objects.inv b/docs/API/_build/html/objects.inv index 78cb686ed..09c47acdb 100644 Binary files a/docs/API/_build/html/objects.inv and b/docs/API/_build/html/objects.inv differ diff --git a/docs/API/_build/html/search.html b/docs/API/_build/html/search.html index 7e5631023..c4cf1e99c 100644 --- a/docs/API/_build/html/search.html +++ b/docs/API/_build/html/search.html @@ -86,9 +86,18 @@
                • Release notes
                    -
                  • Latest (15/09/2017)
                  • +
                  • Latest (01/10/2018)
                  • Archived release notes
                      -
                    • Latest (31/08/2017)
                    • +
                    • sd (04/06/2018)
                    • +
                    • sd (04/04/2018)
                    • +
                    • sd (27/02/2018)
                    • +
                    • sd (12/01/2018)
                    • +
                    • sd (18/12/2017)
                    • +
                    • sd (28/11/2017)
                    • +
                    • sd (13/11/2017)
                    • +
                    • sd (17/10/2017)
                    • +
                    • sd (15/09/2017)
                    • +
                    • sd (31/08/2017)
                    • sd (24/07/2017)
                    • sd (08/06/2017)
                    • sd (17/05/2017)
                    • @@ -189,7 +198,7 @@
                    • Complex logic @@ -395,7 +405,7 @@

                      Search

                      - + diff --git a/docs/API/_build/html/searchindex.js b/docs/API/_build/html/searchindex.js index a9211028d..baec5bb34 100644 --- a/docs/API/_build/html/searchindex.js +++ b/docs/API/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["index","sites/api_ref/00overview","sites/api_ref/Chain","sites/api_ref/Cluster","sites/api_ref/DataSet","sites/api_ref/QuantipyViews","sites/api_ref/Rim_scheme","sites/api_ref/View","sites/api_ref/ViewMapper","sites/api_ref/quantify_engine","sites/api_ref/stack","sites/lib_doc/batch/00_overview","sites/lib_doc/batch/01_create_load","sites/lib_doc/batch/02_variables","sites/lib_doc/batch/03_properties","sites/lib_doc/batch/04_subclass","sites/lib_doc/builds/00_overview","sites/lib_doc/builds/01_chains","sites/lib_doc/dataprocessing/00_overview","sites/lib_doc/dataprocessing/01_components","sites/lib_doc/dataprocessing/02_io","sites/lib_doc/dataprocessing/02a_management","sites/lib_doc/dataprocessing/03_inspection","sites/lib_doc/dataprocessing/04_editing","sites/lib_doc/dataprocessing/05_transforming","sites/lib_doc/dataprocessing/06_logics","sites/lib_doc/dataprocessing/07_custom_recoding","sites/lib_doc/engine/00_overview","sites/lib_doc/engine/01_links_stacks","sites/lib_doc/engine/02_quantity","sites/lib_doc/engine/03_test","sites/lib_doc/engine/04_agg_methods","sites/lib_doc/overview","sites/release_notes/00_overview","sites/release_notes/01_latest","sites/release_notes/02_archive","sites/release_notes/03_how_to_snippets","sites/release_notes/how_to_snippets/create_categorical_meta","sites/release_notes/how_to_snippets/derotate","sites/release_notes/how_to_snippets/dimensions_comp"],envversion:50,filenames:["index.rst","sites\\api_ref\\00overview.rst","sites\\api_ref\\Chain.rst","sites\\api_ref\\Cluster.rst","sites\\api_ref\\DataSet.rst","sites\\api_ref\\QuantipyViews.rst","sites\\api_ref\\Rim_scheme.rst","sites\\api_ref\\View.rst","sites\\api_ref\\ViewMapper.rst","sites\\api_ref\\quantify_engine.rst","sites\\api_ref\\stack.rst","sites\\lib_doc\\batch\\00_overview.rst","sites\\lib_doc\\batch\\01_create_load.rst","sites\\lib_doc\\batch\\02_variables.rst","sites\\lib_doc\\batch\\03_properties.rst","sites\\lib_doc\\batch\\04_subclass.rst","sites\\lib_doc\\builds\\00_overview.rst","sites\\lib_doc\\builds\\01_chains.rst","sites\\lib_doc\\dataprocessing\\00_overview.rst","sites\\lib_doc\\dataprocessing\\01_components.rst","sites\\lib_doc\\dataprocessing\\02_io.rst","sites\\lib_doc\\dataprocessing\\02a_management.rst","sites\\lib_doc\\dataprocessing\\03_inspection.rst","sites\\lib_doc\\dataprocessing\\04_editing.rst","sites\\lib_doc\\dataprocessing\\05_transforming.rst","sites\\lib_doc\\dataprocessing\\06_logics.rst","sites\\lib_doc\\dataprocessing\\07_custom_recoding.rst","sites\\lib_doc\\engine\\00_overview.rst","sites\\lib_doc\\engine\\01_links_stacks.rst","sites\\lib_doc\\engine\\02_quantity.rst","sites\\lib_doc\\engine\\03_test.rst","sites\\lib_doc\\engine\\04_agg_methods.rst","sites\\lib_doc\\overview.rst","sites\\release_notes\\00_overview.rst","sites\\release_notes\\01_latest.rst","sites\\release_notes\\02_archive.rst","sites\\release_notes\\03_how_to_snippets.rst","sites\\release_notes\\how_to_snippets\\create_categorical_meta.rst","sites\\release_notes\\how_to_snippets\\derotate.rst","sites\\release_notes\\how_to_snippets\\dimensions_comp.rst"],objects:{"quantipy.Chain":{concat:[2,2,1,""],copy:[2,2,1,""],describe:[2,2,1,""],load:[2,3,1,""],save:[2,2,1,""]},"quantipy.Cluster":{add_chain:[3,2,1,""],bank_chains:[3,2,1,""],load:[3,3,1,""],merge:[3,2,1,""],save:[3,2,1,""]},"quantipy.DataSet":{add_meta:[4,2,1,""],all:[4,2,1,""],any:[4,2,1,""],band:[4,2,1,""],categorize:[4,2,1,""],clone:[4,2,1,""],code_count:[4,2,1,""],code_from_label:[4,2,1,""],codes:[4,2,1,""],codes_in_data:[4,2,1,""],compare:[4,2,1,""],convert:[4,2,1,""],copy:[4,2,1,""],copy_array_data:[4,2,1,""],create_set:[4,2,1,""],crosstab:[4,2,1,""],cut_item_texts:[4,2,1,""],data:[4,2,1,""],derive:[4,2,1,""],derotate:[4,2,1,""],describe:[4,2,1,""],dimensionize:[4,2,1,""],dimensionizing_mapper:[4,2,1,""],drop:[4,2,1,""],duplicates:[4,2,1,""],extend_values:[4,2,1,""],filter:[4,2,1,""],find_duplicate_texts:[4,2,1,""],flatten:[4,2,1,""],force_texts:[4,2,1,""],from_components:[4,2,1,""],from_excel:[4,2,1,""],from_stack:[4,2,1,""],get_batch:[4,2,1,""],hiding:[4,2,1,""],hmerge:[4,2,1,""],interlock:[4,2,1,""],is_nan:[4,2,1,""],item_no:[4,2,1,""],item_texts:[4,2,1,""],items:[4,2,1,""],link:[4,2,1,""],list_variables:[4,2,1,""],merge_texts:[4,2,1,""],meta:[4,2,1,""],parents:[4,2,1,""],populate:[4,2,1,""],read_ascribe:[4,2,1,""],read_dimensions:[4,2,1,""],read_quantipy:[4,2,1,""],read_spss:[4,2,1,""],recode:[4,2,1,""],remove_html:[4,2,1,""],remove_items:[4,2,1,""],remove_values:[4,2,1,""],rename:[4,2,1,""],rename_from_mapper:[4,2,1,""],reorder_values:[4,2,1,""],repair:[4,2,1,""],repair_text_edits:[4,2,1,""],replace_texts:[4,2,1,""],set_encoding:[4,4,1,""],set_item_texts:[4,2,1,""],set_missings:[4,2,1,""],set_property:[4,2,1,""],set_text_key:[4,2,1,""],set_value_texts:[4,2,1,""],set_variable_text:[4,2,1,""],set_verbose_errmsg:[4,2,1,""],set_verbose_infomsg:[4,2,1,""],slicing:[4,2,1,""],sorting:[4,2,1,""],sources:[4,2,1,""],split:[4,2,1,""],start_meta:[4,3,1,""],subset:[4,2,1,""],take:[4,2,1,""],text:[4,2,1,""],to_array:[4,2,1,""],transpose:[4,2,1,""],uncode:[4,2,1,""],undimensionize:[4,2,1,""],undimensionizing_mapper:[4,2,1,""],unify_values:[4,2,1,""],unroll:[4,2,1,""],update:[4,2,1,""],validate:[4,2,1,""],value_texts:[4,2,1,""],values:[4,2,1,""],variables:[4,2,1,""],variables_from_set:[4,2,1,""],vmerge:[4,2,1,""],weight:[4,2,1,""],write_dimensions:[4,2,1,""],write_quantipy:[4,2,1,""],write_spss:[4,2,1,""]},"quantipy.QuantipyViews":{"default":[5,2,1,""],coltests:[5,2,1,""],descriptives:[5,2,1,""],frequency:[5,2,1,""]},"quantipy.Quantity":{calc:[9,2,1,""],count:[9,2,1,""],exclude:[9,2,1,""],filter:[9,2,1,""],group:[9,2,1,""],limit:[9,2,1,""],normalize:[9,2,1,""],rebase:[9,2,1,""],rescale:[9,2,1,""],summarize:[9,2,1,""],swap:[9,2,1,""],unweight:[9,2,1,""],weight:[9,2,1,""]},"quantipy.Rim":{add_group:[6,2,1,""],group_targets:[6,2,1,""],report:[6,2,1,""],set_targets:[6,2,1,""],validate:[6,2,1,""]},"quantipy.Stack":{add_data:[10,2,1,""],add_link:[10,2,1,""],add_nets:[10,2,1,""],add_stats:[10,2,1,""],add_tests:[10,2,1,""],aggregate:[10,2,1,""],cumulative_sum:[10,2,1,""],describe:[10,2,1,""],from_sav:[10,3,1,""],load:[10,3,1,""],reduce:[10,2,1,""],refresh:[10,2,1,""],remove_data:[10,2,1,""],save:[10,2,1,""],variable_types:[10,2,1,""]},"quantipy.Test":{get_se:[9,2,1,""],get_sig:[9,2,1,""],get_statistic:[9,2,1,""],run:[9,2,1,""],set_params:[9,2,1,""]},"quantipy.View":{get_edit_params:[7,2,1,""],get_std_params:[7,2,1,""],has_other_source:[7,2,1,""],is_base:[7,2,1,""],is_counts:[7,2,1,""],is_cumulative:[7,2,1,""],is_meanstest:[7,2,1,""],is_net:[7,2,1,""],is_pct:[7,2,1,""],is_propstest:[7,2,1,""],is_stat:[7,2,1,""],is_sum:[7,2,1,""],is_weighted:[7,2,1,""],meta:[7,2,1,""],missing:[7,2,1,""],nests:[7,2,1,""],notation:[7,2,1,""],rescaling:[7,2,1,""],spec_condition:[7,2,1,""],weights:[7,2,1,""]},"quantipy.ViewMapper":{add_method:[8,2,1,""],make_template:[8,2,1,""],subset:[8,2,1,""]},Chain:{filename:[2,0,1,""]},quantipy:{Chain:[2,1,1,""],Cluster:[3,1,1,""],DataSet:[4,1,1,""],QuantipyViews:[5,1,1,""],Quantity:[9,1,1,""],Rim:[6,1,1,""],Stack:[10,1,1,""],Test:[9,1,1,""],View:[7,1,1,""],ViewMapper:[8,1,1,""]}},objnames:{"0":["py","attribute","Python attribute"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","staticmethod","Python static method"],"4":["py","classmethod","Python class method"]},objtypes:{"0":"py:attribute","1":"py:class","2":"py:method","3":"py:staticmethod","4":"py:classmethod"},terms:{"0x0000000019ae06d8":[28,31],"\u00ecnt":[20,31],"\u00ectem":39,"boolean":[4,6,10,18,35],"case":[0,4,5,6,9,10,12,13,15,18,20,23,24,26,28,35,37,38],"class":[2,3,4,5,6,7,8,9,10,11,22,35],"default":[3,4,5,7,8,9,10,13,18,20,21,23,24,35,38,39],"export":[0,3,4,20,35],"final":26,"float":[4,5,9,10,19,20,21,22,23,24,31,35],"function":[4,10,20,25,26,28,31,34,35],"import":[4,9,11,15,20,22,25,26],"int":[4,5,9,10,19,20,21,22,23,24,25,35],"long":[25,34,35],"m\u00fcller":0,"new":[3,4,5,8,9,10,12,21,23,24,26,31,34,35,38,39],"null":31,"return":[3,4,5,6,7,8,9,10,21,22,24,26,35],"short":[8,19,21,24,35],"sigur\u00f0sson":0,"static":[2,3,4,10],"switch":[24,38],"true":[4,5,6,8,9,10,13,20,21,22,24,25,26,31,35,38,39],"try":[4,10,23,35],"var":[4,9,15,35,38],"while":[4,15,19,23,24,35],Adding:[11,18,31],Age:[24,26],Being:15,But:[13,23],Das:[19,23],For:[4,5,13,14,20,22,23,24,25,26,28,31],IDs:[],NPS:31,Not:[19,24,31],One:[4,13,26],That:[15,19,22,24],The:[2,3,4,5,7,8,9,10,11,13,14,15,18,20,21,22,23,24,25,27,31,34,35,36,37,38],Their:5,Then:[21,35],There:[13,19,21,22,31,35],These:[15,28,31,34,35],Use:[4,9,24,35],Uses:4,Using:[18,21,23],Will:[4,5,9,10,35],With:[13,22,23,24,26,31],Yes:[20,35],__init__:[12,35],__setitem__:35,_band:4,_batch:[10,31],_cumsum:[31,35],_data:[4,11,21],_grid:38,_intersect:[28,31],_meta:[4,11,12,13,22,28,39],_missingfi:9,_rec:[4,24],_remove_html:4,_suffix:4,_sum:31,_tran:4,abbrevi:25,abl:[5,28,35],about:[13,19,22,36],abov:[24,31,39],absorb:34,accept:[23,26,35],access:[4,19,20,22,35,36],accessor:22,accid:[21,34],accommod:14,accompani:[19,35],accord:[4,9,10,26,31],accordingli:26,account:[4,5],achiev:21,across:[9,22,35],act:[5,24,28],activ:[19,20,24],add:[3,4,5,8,10,13,14,23,24,28,31,34,35,37],add_batch:[12,31],add_chain:3,add_data:10,add_filt:14,add_group:6,add_i:13,add_link:[10,28,31],add_meta:[4,23,35,37,39],add_method:8,add_net:[10,31,35],add_open_end:[13,34,35],add_stat:[10,31,35],add_test:[10,31,35],add_x:[13,31],add_y_on_i:[13,28,31],adddit:20,added:[4,10,12,13,14,23,24,28,31,34,35,38],adding:[4,14,18,20,31,39],addit:[4,13,19,22,31,34,35],addition:[19,35],adjust:35,adopt:13,aerob:24,affix:10,after:[4,9,10,21,26,28,31],afternoon:22,again:[22,23,31],against:[5,9,13,35],age:[4,13,14,22,24,26,28,31,35],age_band:24,age_cb:26,age_grp_rc:26,age_rec:4,age_xb:26,age_xb_1:26,agegrp:[4,35],agg:[7,13,31],aggnam:7,aggreg:[0,2,3,4,5,7,8,9,10,11,14,16,35],aim:[0,35],alasdair:0,alexand:0,algorithm:[4,5,9],alia:[4,21],all:[2,3,4,5,9,10,11,12,13,14,19,20,21,22,23,24,25,26,28,31,35,37,38],allign:35,allow:[4,14,15,23,24,31],alon:[22,35],along:[2,9],alongsid:[10,21,31],alphanumer:4,alreadi:[4,12,14,19,24,26,35],also:[4,9,10,11,12,13,14,22,23,24,26,28,31,35,37,38],altern:[4,23,31,37],although:22,alwai:[4,15,23,24,26,31,35,39],amount:[14,21,28,31],analysi:[0,35],ani:[4,5,7,9,10,13,19,21,22,23,25,26,35],anim:[4,19],anoth:[13,23,24,26,35],answer:[4,9,10,19,22,23,24,35],anymor:35,anyth:26,anywher:10,api:4,appear:[4,37],append:[4,5,18,31,38],appli:[4,5,8,9,10,15,20,21,22,24,28,31,35,39],applic:[24,26],apporach:21,approach:37,appropri:[4,5],arbitrari:10,arbitrarili:[4,21,25,26],archiv:33,argument:[5,8,11,20,23,24,35],aris:28,arithmet:9,around:[4,35],arrai:[4,9,10,11,18,21,22,23,31,35,36,37],array_item:35,array_var:39,array_var_1:39,array_var_2:39,array_var_3:39,array_var_grid:39,arriv:9,as_addit:[13,34],as_delimited_set:35,as_df:9,as_float:35,as_int:35,as_singl:35,as_str:35,as_typ:[4,35],ascend:4,ascii:4,ascrib:[0,4,18],asid:24,ask:[19,24],askia:[5,9],assess:[0,38],assign:[4,6,37],assignd:24,associ:[2,10],assum:[4,9,28,38],attach:[4,10,21,23],attempt:4,attribut:[2,4,11,22,28,31,35],auto:4,autom:[0,10],automat:[4,13,14,20,22,23,24,31,35,39],auxiliari:9,avail:4,avoid:26,axes:4,axi:[2,3,4,5,7,9,10,15,19,22,35],axis_edit:[4,35],b_name:31,back:[4,5,38],badli:[4,35],band:[4,10,18,35],band_numer:35,bank:3,bank_chain:3,bar:35,base:[4,5,7,8,9,10,18,20,22,31,35],base_al:9,base_text:15,based_on:[4,35],baselin:8,basi:26,basic:[20,22,25,27],basketbal:24,batch1:[12,13,28,31],batch2:[12,13,28,31],batch3:31,batch4:31,batch5:31,batch6:31,batch:[0,4,10,15,28,31,34,35],batchnam:[11,13],bchain:3,becaus:[24,26],becom:[4,9,26,35],been:[4,9,10,13,21,24,26,34,35,39],beer:38,befor:[4,9,10,13,14,24,26,31,35],begin:[4,26],behaviour:[5,7,22,26,35,39],being:[4,24,39],belong:[4,11,12,13,31,35,38],below:[9,13,26,34],benefici:21,benefit:26,better:28,between:[4,9,13,31,35],bia:9,big:21,binari:35,bird:19,birgir:0,birth_dai:22,birth_month:22,birth_year:22,bivari:5,bla:[],blacklist:[4,35],blank:37,blueprint:35,board:[19,24,39],bool:[4,5,8,9,10,22,38],both:[4,10,12,13,19,20,21,23,24,26,28,35,39],bottom3:10,bound:35,brand:38,break_bi:13,breakfast:22,brief:35,broader:19,buchhamm:0,bug:35,bugfix:35,build:[0,3,4,5,9,15,18,19,34,35],built:[14,31],by_typ:[21,22,34],bytestr:2,cach:10,calc:[5,7,9,10,31],calc_onli:[5,31],calcul:[5,9,10,14,27],call:[4,10,21,22,24,28,34,35,39],came:35,can:[2,4,5,9,10,11,12,13,14,15,19,20,21,22,23,24,25,26,28,31,34,35,37,38,39],cannot:[26,34,35],cap:6,carefulli:4,carri:5,case1:38,case2:38,case3:38,casedata:[4,20],cat:[4,19,23,37,39],categor:[4,5,10,18,22,23,27,35,36],categori:[4,5,10,19,22,23,35,37],categorized_nam:[4,35],caus:35,caution:4,cave:[19,24,39],cbase:[10,31],cell:[5,9,11,26,34,35],cell_item:14,central:5,certain:[4,22],chain:[0,1,3,16,22,35],chainnam:2,chang:[4,6,9,10,18,21,24,35],charact:[4,23,25],characterist:[19,24],chart:[0,35],check:[4,5,9,10,22,26,35],check_dup:35,checking_clust:10,choic:0,choos:5,clariti:25,classmethod:4,clean:[0,4,10,21,24,35],clean_text:[23,35],clean_up:[4,35],clear:35,clone:[4,18],close:12,cluster:[0,1,2,10,13,14,15,35],code:[4,5,6,7,9,10,18,19,22,23,24,31,35,37,39],code_count:[4,22,25,35],code_from_label:[4,35],code_map:4,codes_in_data:[4,22,35],cola:38,collect:[4,5,10,11,13,14,19,21,22,24,27,31,34,35],colour:35,coltest:[5,8,10],column:[2,4,5,6,7,9,10,18,20,22,23,24,26,31,35,38,39],combin:[4,5,10,16,22,25,26,28,31,35],combind:38,come:35,comma:25,common:[7,19,24,35],commun:10,compabl:4,compar:[4,5,9,35],comparison:9,compat:[4,10,20,35,36],compatibilti:39,complet:[9,35],complex:[4,5,6,18,19,21,22,26,31,34,35],compli:[4,26],complic:18,compon:[4,5,7,10,11,15,18,21,22,23,24,35],compos:10,compound:3,comprehens:26,compress:10,comput:[0,4,5,8,9,10,27,28,31,35],concat:[2,22,35],concaten:[2,26],concern:31,cond_map:4,condit:[4,7,9,18,21,22,25,35],confirm:10,conjunct:34,connect:[4,10,12,13,35,39],consequ:[28,37],consid:[4,5,9,10,24],consist:[4,9,11,19,21,23,24,35],constitut:13,constrcut:10,construct:[4,11,13,14,19,21,23,28,31,35,39],contain:[0,2,3,4,7,8,9,10,11,13,14,19,21,22,23,24,28,31,35,38],content:[21,28],context:[19,23],continu:[],contrast:[19,24],contributor:0,control:[4,5,7,9,10,23,24,26,35],convcrit:6,convent:[4,9,35,39],convers:[4,18,35],convert:[0,4,9,20,24,35,39],coordin:8,cope:28,copi:[2,4,8,12,18,21,23,26,34,35,39],copy_array_data:4,copy_batch:12,copy_d:21,copy_data:[4,24],copy_from:[4,23,35],copy_not:[4,35],copy_of_batch1:12,copy_onli:[4,35],copy_to:[4,23,35],core:[5,20,22,25,35,37],correct:[9,23,35],correctli:35,correspond:[4,5,9,22,23,24,35,37,39],could:[23,24,26],count:[4,5,7,9,10,31,35,38],count_not:[4,22,35],count_onli:[4,22,35],counterpart:35,counts_cumsum:[31,35],counts_sum:31,cpickl:2,crash:35,creat:[2,3,4,6,9,10,11,13,14,15,16,18,20,22,24,28,31,35,36],create_set:[4,35],creation:[9,18,23,35],cross:[13,28,31],crossbreak:28,crosstab:[4,24],crunch:35,csv:[0,4,10,11,18],cum_sum:9,cumul:[7,9,10,27,35],cumulative_sum:[10,31],current:[0,2,4,10,24,34,35],custom:[4,10,16,18,20,24,35],custom_text:[10,31],customis:35,customiz:0,cut:[4,21],cut_item_text:4,cwi_filt:9,cycl:[4,8],dai:[22,26],danish:19,data:[2,4,5,6,7,9,10,11,14,15,20,21,23,24,28,31,34,35,36,38],data_df:[4,35],data_kei:[4,10,14,28,35],datafil:[4,35],datafram:[3,4,6,7,9,10,18,19,21,22,23,28,31,34,35],datakei:35,dataset:[0,1,10,11,12,13,18,20,22,24,28,31,34,35,36,37],dataset_left:4,dataset_right:4,datasmoothi:0,date:[4,10,19,20,21,22,23,24,35],dates_as_str:20,ddf:[0,4,35],deafult:[5,9],deal:20,decim:4,deciph:[0,18],deck:0,decod:10,decode_str:10,decor:35,deep:4,deepcopi:35,defin:[2,3,4,9,10,13,14,19,21,23,28,31,35,37],definit:[2,4,5,9,10,14,19,24,26,27,28,34,35],definiton:[8,19,35],defint:[0,2,4,5,6,9,10,14,19,35],defintit:[21,39],defintiton:23,deleg:26,delet:[4,10],delimied_set:22,delimit:[4,10,19,20,21,22,23,24,25,26,35,37],delimited_set:[22,24,35],demograph:21,depend:[5,13,14,35],deprec:34,deriv:[4,9,18,21,35,37],derivc:8,derive_categor:35,derot:[4,35,36],desc:4,descend:4,describ:[0,2,3,4,10,14,19,21,28,31],descript:[4,5,8,9,10,19,27],design:20,desir:[4,6,22,35,37],detail:[7,18,19],detect:[4,6],determin:[4,5,10],detractor:31,deutsch:[19,23],dff:20,diagram:28,dicat:24,dice:18,dichot:20,dichotom:[4,19,20,35],dict:[2,3,4,5,6,7,8,9,10,19,23,24,26,31,35,38],dictat:21,dictionari:[4,6,7,8,10,35],differ:[5,7,9,10,13,19,20,23,24,28,31,35,36,38],digest:19,dim:[5,9,31],dim_comp:35,dimens:[0,4,5,9,18,35,36],dimension:4,dimensionizing_mapp:4,dimensions_comp:[4,20,35,39],dinner:22,direct:26,directli:[2,4,22,35],discret:[10,13],disk:[4,21,34],dispers:5,distinct:[19,31],distribut:[5,9],div:10,dive:[19,24,39],divid:9,dk_filter:4,dms:4,dmsrun:4,doc:[22,35],docstr:6,document:[0,4,10,19,20,23,35],doe:[4,11,23,26,35],doesn:[4,25,26],dog:19,don:[19,22,24,37],done:[4,13,14],doubl:22,down:39,downbreak:[28,31],download:39,draw:[21,24],drawn:[4,35],drink:38,drink_1:38,drink_2:38,drink_level:38,driven:0,drop:[4,5,9,10,21,35],drop_delimit:4,drope:[4,38],dropna:[4,6,35,38],dropx:4,dto:39,dtype:[20,22,26,35],dump:20,duplic:[4,23,35],durat:22,dure:[4,9,10,24],each:[2,4,5,6,10,13,14,19,23,24,28,31,38,39],eagleston:0,eas:35,easi:[0,19,22,23,35],easier:[12,22,28],easiest:22,easili:[12,20,31,35],ebas:10,echo:4,ect:35,edit:[0,4,7,9,13,14,15,18,19,21,24,26,35],edit_param:7,eff:9,effect:[6,9,26,34],ein:[19,23],either:[4,5,9,13,19,21,22,23,26,35],element:[4,9,10,19,35,39],eleph:4,els:26,emploi:19,empti:[4,10,22,24,26,35],en_u:10,enabl:[23,35],encod:4,encount:20,end:[2,4,10,11,12,34,35],end_tim:22,enforc:35,eng:[4,35],engin:[1,27],english:[19,35],enrich:35,ensur:[4,11],enter:[4,14],entir:[4,9,20,35],entri:[4,6,9,10,19,26,35],enumer:[4,10,23,24,26,37],eponym:35,equal:[9,35],equip:31,equival:[4,39],eras:[4,10],error:[4,9,34,35],escap:4,especi:[22,26,31],estim:9,etc:[4,19,21,28,31,35],ethnic:[13,22,28,31],even:[31,35],ever:20,everi:[22,23,26],everyon:26,everyth:26,evid:31,exact:[4,35],exactli:[4,24,25],exampl:[2,4,5,10,11,18,19,20,22,24,25,28,31,35,38,39],excel:[0,3,4,5,35],except:[4,21,34],exchang:[9,23],exclud:[4,5,7,9,10,31,35],exclus:[4,25,35],execut:4,exercis:[22,24],exist:[4,9,10,12,13,18,21,23,24,26,31,35],expand:[4,7,9,10,31],expect:24,experiment:4,explain:14,explicit:26,explicitli:[4,10,35,37],explor:19,expos:23,express:[4,5,6,9,10,22,25,35],ext_valu:4,ext_xk:35,ext_yk:35,extend:[4,5,13,18,22,31,35],extend_filt:14,extend_i:[13,35],extend_valid_tk:35,extend_valu:[4,23,35,37],extend_x:35,extens:[3,4,10,19,20,35],extra:35,extract:14,extrapol:25,factor:[4,6,10],factor_l:10,factor_label:10,fail:[4,35],failur:20,fall:[4,5,9],fallback:7,fals:[4,5,9,10,13,20,22,23,24,26,31,35],favour:9,featur:[19,35],feed:[4,35],feedback:4,femal:[4,22],few:[11,21,22,35,38],figur:9,file:[2,3,4,5,10,11,20,21,22,28,34,35,39],file_nam:20,filenam:[2,10],fill:[4,18,35],fillna:[4,18],filter:[4,6,9,10,11,15,18,28,31,35],filter_def:6,filter_kei:14,find:[4,11,22,28,35],find_duplicate_text:4,finish:[4,21],finnish:19,first:[4,5,9,13,19,21,26,31],fit:[19,23,24],fix:[4,15,35],flag:[4,5],flag_bas:9,flatten:[4,24,35],flexibl:[6,10,24],float64:20,folder:20,follow:[4,7,9,11,14,15,19,20,21,22,23,24,26,28,31,35,38,39],folow:26,footbal:24,forc:[26,35],force_text:[4,23,35],form:[3,4,9,10,14,20,23,24,25,26,28],format:[0,4,6,9,19,20,23,26,31,35,38],former:[4,10,22,23,24],fortnight:22,found:[2,3,4,9,10,20,22,24,26,31,35],four:[28,31],frang:[4,25,26,35,38],french:19,frequenc:[4,5,7,8,9,28,31,35],freysson:0,from:[0,2,3,4,5,9,10,12,14,15,18,19,21,24,25,26,28,31,34,35,38],from_compon:[4,20,35],from_excel:[4,35],from_sav:10,from_set:[4,20,35],from_stack:[4,35],front:35,fulfil:4,full:[3,4,5,10,21,23,25,39],fullnam:31,fun:34,further:21,futur:35,geir:0,gender:[4,13,14,15,22,24,26,28,31,35,38],gener:[2,4,5,7,8,10,11,20,22,23,26,31,35],german:[19,23],get:[4,7,10,11,12,13,14,22,28,31,35,37],get_batch:[4,12,13,28,31],get_edit_param:7,get_qp_dataset:35,get_s:9,get_sig:9,get_statist:9,get_std_param:7,give:[13,26,35],given:[4,5,6,9,10,20,26,35],global:[4,7,9,14,15,21,35],goe:4,going:37,grid:[4,35],griffith:0,group:[4,5,6,7,9,10,19,21,22,24,25,28,31,35],group_nam:6,group_target:6,grouped_ag:24,grp:9,grp_text_map:31,guid:35,gzip:10,hack:4,had:26,hand:24,handl:[0,6,8,9,10,20,31,35,37,39],happen:[4,24],happend:4,has:[2,4,10,12,15,19,21,25,26,31,34,35,38],has_al:[4,5,18],has_ani:[4,18],has_count:18,has_other_sourc:7,have:[4,9,10,13,20,23,24,25,26,28,34,35,38],head:[20,22,23,24,26,35,39],heirarch:4,hell:4,hello:20,help:[26,28,31],helper:[4,31],here:[4,26,28,35,39],hidden:[4,35],hide:[4,15,35],hide_valu:[4,35],high:[5,9],higher:9,hmerg:[4,35],hockei:24,hold:[2,4,9,10],horizont:[4,18],household:20,how:[3,4,10,14,19,22,24,28,31,37,39],howev:[23,24,26,37,39],hrafn:0,html:[4,23,35],ident:[4,20,24,35],identif:4,identifi:[4,5,23,35,38],ids:4,ignor:[4,9,10,22,24,35],ignore_cod:4,ignore_flag:9,ignore_item:[4,24,35],ignore_valu:[4,24],ill:23,implement:[9,19,22,34,35],impli:4,implicitli:9,impract:28,impute_method:6,incl:[10,35],includ:[2,3,4,5,9,10,13,25,26,28,31,35,38],inclus:[24,35],incom:[4,9,24],inconsist:[4,15,23,35],incorrect:35,incorrectli:35,independ:9,index:[0,2,4,9,10,35],indic:[4,5,9,10,23,24,26,35],individu:[3,4,35],industri:20,infer:[10,20,24,35],info:[4,18,19],inform:[0,4,7,8,13,14,15,19,20,23,26,31,35,37,38],inherit:11,inhomogen:9,init:26,initi:[4,18,35,38],inject:[4,26],innermost:7,inplac:[4,9,10,18,21,26,35],input:[6,9,10,11,20,35],insert:[4,13],insid:[2,4,8,9,19,20,22,23,35],inspect:[4,18,35],instal:[4,35],instanc:[2,3,4,8,9,10,11,14,20,21,22,23,24,28,31,35],instead:[4,9,20,25,26,35],instruct:[5,9,19,26,31,35],int64:[20,22],integ:9,integr:22,intend:10,inter:[4,6],interact:[2,3,21,22,34],interfac:0,interim:35,interlock:[4,18,35],intern:[2,35],interpret:[4,25],intersect:[4,18,22,35],intro:21,introduc:9,involv:26,iolocal:[10,20],ioutf8:10,ipython:[21,34],is_arrai:31,is_bas:7,is_block:31,is_count:7,is_cumul:7,is_g:[4,25,35],is_like_numer:[22,24,35],is_meanstest:7,is_multi:31,is_nan:[4,22,25],is_nest:31,is_net:7,is_pct:7,is_propstest:7,is_stat:7,is_sum:7,is_weight:[7,31],isol:35,issu:[20,35],ist:[19,23],item:[4,9,11,13,19,20,21,22,23,24,31,34,35,37,38,39],item_no:[4,35],item_text:[4,22,35],iter:[8,22,34,35],its:[2,4,5,7,9,10,11,12,13,14,15,19,20,22,24,25,26,31,34,35,38,39],itself:[4,9],jame:0,jjda:20,jog:24,join:[4,22],json:[4,10,11,18,35],jupyt:[21,34],keep:[4,8,9,21,23,24,35,38],keep_bas:9,keep_cod:[9,35],keep_origin:4,keep_variable_text:35,kei:[2,4,5,7,10,11,14,19,23,24,26,31,35,38],kept:[4,9,11,35],kerstin:0,keyword:[5,8],kind:[22,24,28],kite:[19,24,39],know:[14,19,22,24,37,39],kritik:31,kwarg:[4,5,7,8,35],lab:10,label:[4,5,10,14,19,20,22,23,24,26,35,37,38,39],lack:0,lang:19,languag:[4,11,18,23],larg:[14,21,28,31],last:4,later:[13,20,34],latest:[0,33],latter:[22,34],lead:[24,31],least:[4,22,23],leav:[23,24],left:[4,9,35],left_id:4,left_on:[4,35],legaci:4,lemonad:38,length:35,less:[22,38],let:[23,24,28],letter:[],level:[4,5,9,10,14,19,26,31,35,38],lib:[4,19,35,39],librari:[0,19,35],lift:24,like:[0,4,5,10,13,14,19,21,22,24,26,28,31,34,35,38,39],limit:[9,24,26,35],link:[2,4,5,7,8,9,10,11,14,27,31,35],list:[2,4,5,6,8,9,10,13,18,19,21,22,23,24,26,35,37,38],list_vari:[4,34,35],listen:26,load:[2,3,4,10,11,35],load_cach:10,loc:31,local:[10,13,22,28,31],locat:[2,4,10,35],logic:[4,5,7,9,18,22,24,26,35],logic_a:[25,26],logic_b:[25,26],logic_c:[25,26],london:4,longer:35,look:[13,24,26,35,38,39],loop:[35,38],lose:[13,24,38],lot:[4,21,22,26],low:[5,9],lower:[4,9,35],lower_q:9,lunch:22,machin:[4,20,35],made:2,mai:[4,25,26],main:[4,6,9,13,14,19,24,31,34,35],mainli:3,major:9,mak:35,make:[4,5,22,24,28,34,35],make_summari:13,make_templ:8,male:[22,26],manag:[0,18,26],mani:[13,28],manipul:[9,13,15],manual:[5,35,37],map:[4,5,6,8,9,10,18,20,23,35,37],mape:4,mapper:[4,18,25,35,38],mapper_to_meta:4,margin:[5,9],mark:9,market:[0,20],mask:[4,18,21,22,23,35,37,38,39],mass:5,massiv:26,master:35,match:[4,6,10,20,24,35,39],matric:9,matrix:9,matrixcach:10,matter:11,max:[9,10,31,35],max_iter:6,mdd:[0,4,20,35],mdm:4,mdm_lang:[4,35],mean:[4,5,6,7,9,10,13,15,20,22,24,25,26,31,35,38],measur:[5,9],median:[6,9,10,31,35],membership:22,memori:[21,34],men:[4,14,15,28,31],mention:[12,23],merg:[3,4,18,35],merge_text:[4,35],messag:35,meta:[4,5,7,10,11,12,14,15,18,20,21,22,24,26,31,35,37,38,39],meta_dict:[4,35],meta_edit:15,metadata:[0,4,10,18,19,20,24,26,35,37,39],method:[2,3,4,5,6,7,8,9,10,11,12,13,14,18,20,21,22,23,24,31,34,35,37,38,39],metric:[5,9,10],mid:[5,9,22],might:[10,21,23,24],mimic:[5,9],mimick:[5,9,22],min:[9,10,31,35],minimum:5,minor:35,miss:[4,6,7,14,20,22,23,24,37,39],missing_map:4,mix:[4,24],mode:[6,10,20,35,36],modifi:[4,9,10,15,24,35],modu:35,modul:[0,10],month:22,more:[2,4,22,25,26,31,35],morn:22,most:[9,21,22,26],mous:4,move:[0,4,21,35],mrs:4,mrset:4,mrset_tag_styl:4,much:[28,38],mul:10,multi:[5,9,19],multiindex:9,multipl:[0,3,4,5,19,22,23,25,26,34,35],multipli:9,multivari:9,must:[4,6,10,20,21,23,26,31,35,38],name:[2,3,4,5,6,7,8,9,10,12,13,19,20,21,22,23,24,26,28,31,35,37,38,39],name_data:[35,39],nan:[4,9,20,22,23,24,26,28,31,35,39],nate:4,nativ:[0,4,18,35],natur:[9,21,22],necessari:[9,21,35,38],need:[8,9,21,23,24,39],neg:35,nest:[7,10,19,25,26],net1:[],net2:[],net:[5,7,9,10,18,27],net_1:[10,31],net_2:[10,31],net_3:10,net_map:[10,31],never:[22,23],new_arrai:23,new_array_1:23,new_array_2:23,new_array_3:23,new_array_4:23,new_array_97:23,new_array_98:23,new_chain:35,new_cod:9,new_d:35,new_data:10,new_data_kei:10,new_dataset:4,new_int:23,new_meta:10,new_nam:[4,23,24,35],new_ord:[4,21,35],new_rul:35,new_set:[4,35],new_singl:23,new_stack:2,new_text:[4,23],new_var:35,new_weight:10,next:[4,10,11,19,31,39],no_data:24,no_filt:[4,10,28,31,35],non:[4,10,22,25,27,35],none:[2,3,4,5,6,7,8,9,10,13,20,21,22,23,24,26,28,34,35,37,39],none_band:35,nonea:4,normal:[4,5,9],norwegian:19,not_al:18,not_ani:[4,9,18,35],not_count:18,notat:[5,7,9,10],note:[2,4,5,26,31],notebook:[21,34],notimplementederror:[15,34,35],now:[13,14,21,31,34,35,38],num:5,number:[4,6,9,20,22,23,24,35],numer:[4,5,10,18,19,23,24,31,35,37],numpi:[0,9],obei:22,object:[2,3,4,5,9,10,18,20,21,24,26,28,31,35,37],obscur:26,observ:0,obvious:31,occur:26,oe_q8:13,oe_q9:13,offer:[0,4,19,22,24,34,37,39],often:[21,22,24],old:[4,5,10,35],old_cod:9,old_nam:35,older:35,omit:26,on_var:[10,31],onc:22,one:[2,4,5,12,13,19,21,22,23,24,26,28,31,34,35],ones:[4,9,15,21,22],onli:[4,8,9,10,11,13,14,21,23,24,26,28,31,34,35,37,38],only_men:24,only_typ:[4,10],onto:26,open:[0,11,12,34],oper:[4,5,20,22,24,25,26,34],operat:18,opportun:20,oppos:21,option:[4,5,6,9,10,13,14,19,22,23,31,35],order:[2,4,11,18,19,24,34,35],ordereddict:[3,10,13,28,31],organ:16,orgin:9,orient:2,origi:4,origin:[4,9,10,21,24,35],other:[4,5,9,11,12,13,19,23,24,25,26,35,38],other_sourc:[10,31,35],otherwis:[4,35],our:[0,22,26],out:[5,12,14,24,31,35],outcom:4,outdat:35,output:[4,9,10,13,22,35],outsid:35,over:[8,9,35],overcod:[4,10,31],overlap:9,overview:[4,10,22,28,35],overwrit:[4,15,21,26,35],overwrite_margin:9,overwrite_text:4,overwritten:[4,10,35],ovlp_correc:9,own:13,pack:4,packag:5,paint:3,pair:[4,5,9,18,23,26,35,37],panda:[0,3,4,6,9,10,19,20,22,26,35],pane:4,parachut:[19,24,39],parallel:10,paramet:[3,4,5,6,7,8,9,10,13,21,22,23,24,26,31,35,38],parent:[4,19,20,34,35],pars:5,part:[4,7,19,21,22,26],parti:18,particip:20,particular:4,pass:[4,5,9,10,21,22,24,35],past:26,path:[2,3,4,10,20,35],path_clust:3,path_csv:20,path_data:[4,39],path_ddf:[4,20,35],path_json:20,path_mdd:[4,20,35],path_meta:4,path_report:4,path_sav:[4,20],path_sav_analysi:20,path_stack:10,path_txt:20,path_xlsx:[4,35],path_xml:20,pct:4,peopl:20,per:[4,5,6,9,10,12,20,22,23,24,26,35,39],percentag:[7,9,10,31],perform:[4,5,7,9,10,21,26,31,35],perman:21,physic:34,pick:[4,24],pickl:2,pilat:24,pivot:10,place:[4,9,26],plai:22,plain:[0,7,20],plan:[11,13,14,19,24,28,31],pleas:[21,22,35],point:19,pointer:19,pool:9,popul:[4,11,13,14,27,31,35],portion:10,posit:[4,9,21,22,23,28,35],possibl:[3,4,5,10,12,13,19,22,23,25,31,34,35,38],power:0,powerpoint:[5,35],powerpointpaint:35,pptx:35,pre:[4,26,31],precis:[26,35],prefer:22,prefix:[4,10],prep:25,prepar:[3,21,23,31,35],present:[4,9,35],preset:[4,10],pretti:[4,26],prevent:[15,21,23,24,34,35],previou:[21,34],previous:35,print:[13,22,28,31,35],prior:[4,24],probabl:[19,24],problem:35,process:[0,4,8,10,20,21,22],produc:[5,9,13,24],product:38,profession:[4,35],progress:4,prohibit:23,project:0,promot:31,promotor:31,prop:[5,31],prop_nam:4,prop_valu:4,proper:[35,39],properli:39,properti:[4,9,11,20],proport:[5,6,7],protect:4,provid:[3,4,5,7,8,9,10,19,20,21,22,23,24,26,31,34,35,37],proxi:10,purpos:19,put:10,q11:35,q11_grid:35,q12:38,q12_10:38,q12_11:38,q12_12:38,q12_13:38,q12_1:38,q12_2:38,q12_3:38,q12_4:38,q12_5:38,q12_6:38,q12_7:38,q12_8:38,q12_9:38,q12_:38,q12a:38,q12a_10:38,q12a_11:38,q12a_12:38,q12a_13:38,q12a_1:38,q12a_2:38,q12a_3:38,q12a_4:38,q12a_5:38,q12a_6:38,q12a_7:38,q12a_8:38,q12a_9:38,q12a_grid:38,q12b:38,q12b_10:38,q12b_11:38,q12b_12:38,q12b_13:38,q12b_1:38,q12b_2:38,q12b_3:38,q12b_4:38,q12b_5:38,q12b_6:38,q12b_7:38,q12b_8:38,q12b_9:38,q12b_grid:38,q12c:38,q12c_10:38,q12c_11:38,q12c_12:38,q12c_13:38,q12c_1:38,q12c_2:38,q12c_3:38,q12c_4:38,q12c_5:38,q12c_6:38,q12c_7:38,q12c_8:38,q12c_9:38,q12c_grid:38,q12d:38,q12d_10:38,q12d_11:38,q12d_12:38,q12d_13:38,q12d_1:38,q12d_2:38,q12d_3:38,q12d_4:38,q12d_5:38,q12d_6:38,q12d_7:38,q12d_8:38,q12d_9:38,q12d_grid:38,q14_1:4,q14_1_1:4,q14_1_2:4,q14_1_3:4,q14_2:4,q14_2_1:4,q14_2_2:4,q14_2_3:4,q14_3:4,q14_3_1:4,q14_3_2:4,q14_3_3:4,q1_1:[4,25,26,38],q1_2:[4,26,38],q1_3:[4,26],q1_rec:4,q2_count:22,q2array_tran:4,q2b:[13,22,28,31],q3_no_data:24,q3_only_men:24,q3_rec:24,q3_version2:24,q4a:35,q4a_1:35,q4a_2:35,q4a_3:35,q4a_grid:35,q5_1:[19,21,22,24,35,39],q5_2:[19,21,22,24,35,39],q5_3:[19,21,22,24,35,39],q5_4:[19,21,22,24,35,39],q5_5:[19,21,22,24,35,39],q5_6:[19,21,22,24,35,39],q5_grid:39,q5_tran:24,q5_trans_1:24,q5_trans_2:24,q5_trans_3:24,q5_trans_4:24,q5_trans_5:24,q5_trans_97:24,q5_trans_98:24,q6_1:[13,21,22,28,31],q6_2:[13,21,22,28,31],q6_3:[13,21,22,28,31],q6_calc:31,q6_grid:39,q6_net:31,q6copi:39,q6new:39,q6new_grid:39,q6new_q6copi:39,q6new_q6copy_grid:39,q6new_q6copy_tran:39,q6new_q6copy_trans_grid:39,q7_1:[21,22,35],q7_2:[21,22,35],q7_3:[21,22,35],q7_4:[21,22,35],q7_5:[21,22,35],q7_6:[21,22,35],q7_grid:39,q8_with_a_new_nam:23,q8a:[13,22],q9a:[13,22],q_group:38,q_label:[4,35],qtp:38,qtype:[4,23,37,39],qualifi:[5,9],quantifi:1,quantipi:[2,3,4,5,6,7,8,9,10,11,19,20,22,25,31,35,37,39],quantipyview:[1,10,35],quantiti:[9,35],queri:[2,4,6,10,18,19,24,34,35,39],question:[4,9,19,24,26,31,35,38],questionnair:21,quick:[4,22,35],quickli:[6,21,22,24,35],radio:26,radio_st:26,radio_stations_cb:26,radio_stations_xb:26,rais:[4,15,21,31,34,35],rang:[4,5,18,21,35],rate:[35,38],raw:[5,9],raw_sum:9,rbase:10,read:[0,4,20,35],read_ascrib:[4,20],read_deciph:20,read_dimens:[4,20],read_quantipi:[4,11,20,35,39],read_spss:[4,20],rebas:9,rebuild:24,rec:[4,35],receiv:38,recod:[0,4,18,34,35],recode_seri:4,recommend:24,record_numb:[13,22],reduc:[4,9,10,21,35],reduced_d:21,reduct:4,refactor:35,refer:[4,9,10,19,23,26,28,31,35],referenc:[10,13,19,26,35],reflect:[9,21,35],refresh:10,refus:[19,24],regard:4,region:[4,35],regist:[4,22,35],regular:[4,19,31],regularli:[22,23,24],reindex:4,reintroduc:34,rel:21,rel_to:7,relat:[2,7,23,26,34,35],relation_str:7,relationship:10,relev:23,religion:22,reload:[21,34],remain:[4,5,21,26,35],rememb:37,remind:37,remov:[4,5,9,10,12,13,18,34,35],remove_data:10,remove_html:[4,35],remove_item:4,remove_valu:4,renam:[4,18,24,35,39],rename_from_mapp:4,renamed_item:4,renamed_v:4,reorder:18,reorder_valu:4,reorgan:0,repair:[4,35],repair_text_edit:[4,35],repeat:[21,28],repetit:26,replac:[4,9,13,23,26,31,34,35],replace_i:13,replace_text:[4,35],report:[0,4,5,6,35],reposit:[21,35],repres:[4,26],represent:[7,9,10,19,24],request:[5,10,13,21,23,26,31],requir:[4,21,23,35,39],rescal:[5,7,9,10,31],research:[0,20],reserach:[],reset:[4,35],reset_index:4,resid:35,respect:[4,9,24,35,37],respond:[9,21,38],respons:[19,22,25,26,35,38],restor:[21,35],restrict:[4,5,9,10,19,21,22,35],result:[3,4,5,8,9,10,16,20,22,23,24,26,28,35],result_onli:9,retain:8,retriev:9,revers:[24,25],revert:[21,34],right:[4,35],right_id:4,right_on:[4,35],rim:[1,4],rollback:[18,34],round:6,row:[4,5,9,18,20,22,35,39],row_id:4,row_id_nam:4,rule:[4,39],run:[4,9,10,15,24,28,31,35],safe:[4,23],safeguard:4,sai:26,same:[3,4,10,13,19,20,22,26,28,34,35,38,39],sampl:[5,7,9],sample_s:14,satisfi:35,sav:[4,10,20],save:[2,3,4,10,21,28,34,35],savepoint:18,scalar:35,scale:[5,6,9,19,38],scenario:39,scheme:[4,6,19],scratch:18,script:4,search:4,second:[4,5,9,15,31],sect:4,section:[7,9,11,14,21,26],see:[13,21,24,26,28,31,34,35],seen:[26,39],segemen:26,segment:18,select:[4,8,10,13,14,21,22,31,35,38],self:[2,4,9,26,28,35],sem:[9,10],separ:[4,26,37],septemb:[],seri:[4,19,22,26,35],serial:2,session:[21,34],set:[3,4,5,6,8,9,10,11,12,13,18,19,20,22,23,24,26,28,35,37],set_cell_item:14,set_col_text_edit:35,set_column_text:35,set_encod:4,set_item_text:[4,23,35],set_languag:14,set_mask_text:35,set_miss:4,set_opt:35,set_param:9,set_properti:[4,15],set_sigtest:[14,31],set_target:6,set_text_kei:4,set_val_text_text:35,set_value_text:[4,15,23,35],set_var:4,set_variable_text:[4,15,23,35],set_verbose_errmsg:4,set_verbose_infomsg:4,set_weight:14,setdefaultencod:4,setnam:[4,35],setup:[4,9,13,35],sever:[5,22,38],shape:[4,21,31],share:[4,19],sheet:[4,35],shop:4,short_item_text:35,shorten:[4,35],shorthand:[4,5],shortnam:[5,7],should:[3,4,10,14,21,22,26,35,38,39],show:[4,9,13,19,21,22,31,35],shown:[4,9,35],side:[8,34],sig:[5,9,10,14],signific:[5,9,11,27],similar:[28,38],similarli:[22,23],similiar:23,simpl:[5,6,9,19,25,35,37],simpli:[4,22,23,24,25,31],simplifi:[24,26],sinc:[9,26,31,39],singl:[3,4,10,19,20,21,22,23,24,26,35,37,39],sit:26,six:22,size:[5,7,9,21,22,31],skip:[22,23,31,35],slice:[4,7,15,18,35],slicer:[4,18,22,24,35],slicex:4,small:[5,35],snack:22,snapshot:[21,34],snowboard:[19,24,39],soccer:24,social:0,softwar:[0,4,5,9,20],solut:34,some:[13,14,15,22,25,26,35,38],someth:38,sometim:[21,28],soon:[34,35],sorri:0,sort:[4,15,35],sort_by_weight:4,sortx:4,sourc:[0,4,10,19,20,22,23,35,39],source_item:4,space:[4,25],speak:19,spec:3,spec_condit:7,specfic:14,special:[0,11,14,19,28,31,35],specif:[3,4,5,7,9,10,11,13,14,15,19,21,23,39],specifi:[2,4,5,6,9,10,13,20,23,31,35,37],split:[4,10,13,35],split_view_nam:10,sport:[20,22],spreadsheet:0,spss:[0,4,9,18,35],spss_limit:35,stack:[2,3,4,5,7,11,13,14,27,31,35],stage:34,standalon:18,standard:[7,9,20],standardli:24,start:[4,18,23,24,26],start_meta:[4,35],start_tim:22,stat:[5,9,10,31],state:[15,18,24,34],statement:[4,5,19,25,26],statisfi:35,statist:[0,4,5,7,9,10,18,27,28,35],std_paramet:7,stddev:[9,10,31,35],ste:35,stem:35,step:[31,37],still:26,store:[4,5,10,11,12,13,19,21,24,28,31,34,35],store_cach:10,str:[3,4,5,6,7,8,9,10,24,35,37,38],strict:[4,35],strict_select:8,strictli:23,string:[2,4,6,7,9,10,19,20,21,22,23,24,25,35],strip:35,structur:[0,4,6,8,10,11,13,19,20,21,24,28,35,38],studi:35,stuff:[],style:[4,10],sub:[10,31],subclass:[2,11],subclasss:15,subset:[4,8,18,22,24,35],subset_d:4,subtl:34,subtyp:[19,35,39],suffix:[4,5,24,39],sum:[4,5,7,9,10,27,35],summar:[4,5,9,35],summari:[4,5,6,7,9,10,13,22,31,34,35],summat:9,suppli:24,supporintg:10,support:[0,7,10,18,19,22,23,24,35,39],surf:[19,24,39],survei:21,sv_se:[31,35],swap:[7,9,10,35],swedish:[19,35],swim:24,sys:4,tab:20,tabl:[0,10],tabul:[5,13,28],tag:[4,23,35],take:[4,5,11,22,24,25,26,34,35,38],taken:[4,10,14,15,24,35],target:[4,6,18,23,35],target_item:4,task:22,team:22,temp:4,templat:[5,8,35],temporari:[4,34,35],ten:26,tend:4,term:[10,23,35],test:[2,4,5,7,9,11,22,27,35,37],test_cat_1:37,test_cat_2:37,test_cat_3:37,test_tot:[5,9],test_var:37,testtyp:9,text1:20,text:[4,5,7,10,14,18,20,21,22,24,26,31,35,37,39],text_kei:[3,4,10,11,18,22,23,31,35],text_label:[4,35],text_prefix:10,textkei:[4,35],than:[4,22,24,35],thei:[4,9,13,14,20,25,26,31,35],them:[4,5,13,20,22,26,31,35],themselv:[4,9],therefor:[4,5,24],thi:[2,3,4,5,6,9,10,13,14,15,20,21,22,23,24,26,28,31,35,38,39],third:18,thorugh:24,those:4,three:[4,21,22,24,26,35,37],threshold:5,through:[2,3,4,8],throughout:[4,19,20,35],thu:6,time:[10,19,21,22],titl:13,tks:35,to_arrai:[4,35,38],to_df:9,to_excel:10,todo:[4,5,6,8,9,10],togeth:[3,4,10,19,21],toggl:10,too:34,tool:[5,20,24,25],top2:31,top3:10,top:26,topic:[19,35],total:[4,5,6,9,13,35],toward:35,tracker:35,transform:[0,4,5,9,18],translat:23,transpos:[4,13,24,31,39],transpose_arrai:13,transposit:24,treat:[4,9,25,31],tree:28,treshhold:9,trigger:4,tstat:9,tupl:[4,7,23,24,35,37],turn:19,two:[4,5,9,13,19,21,23,28,31,34,35],txt:[4,20],type:[0,3,4,5,6,7,8,9,10,13,18,20,23,26,31,34,35,37,39],type_nam:10,typic:26,ultim:4,unabbrevi:25,unattend:4,uncod:[4,35],uncode_seri:4,uncodit:5,unconditi:9,under:[4,5],underli:19,understood:20,undimension:4,undimensionizing_mapp:4,uni:[5,9],unicod:10,unifi:[4,34],uniformli:20,unify_valu:4,union:[18,26],uniqu:[4,5,10,24,28,31],unique_id:22,unique_kei:[4,35,38],uniquify_kei:4,unkei:26,unless:4,unlik:[19,24],unpool:9,unqiu:24,unrol:[4,31,35],untouch:23,unusu:35,unwant:[4,35],unweight:[9,10,31],unweighted_bas:[10,31],upcom:[],updat:[4,7,8,9,23,34,35],update_axis_def:[9,35],update_exist:[4,35],upon:19,upper:[4,35],upper_q:9,usag:[23,35],use:[0,2,4,5,9,12,13,19,20,21,22,23,24,26,34,35,36],use_ebas:9,used:[2,3,4,5,7,8,9,10,11,14,15,20,21,24,26,31,35],useful:[21,22,34],user:[2,4,14,34,35,37],userwarn:[35,37],uses:[4,9],using:[0,2,3,4,6,10,19,20,21,24,25,26,28,31,35,39],usual:19,utf:10,val:4,val_text_sep:4,valid:[4,6,10,14,19,24,26,31,35,37],valid_cod:35,valid_tk:[11,35],valu:[3,4,5,6,7,8,9,10,18,20,21,24,25,26,31,35,36,39],value_count:[22,35],value_map:38,value_text:[4,22,35],valueerror:[4,21,23,35],var_exist:[22,35],var_grid:38,var_nam:38,varcoeff:9,vari:22,variabl:[0,4,5,6,7,9,10,11,18,19,20,23,27,34,35,37,38,39],variable_typ:10,variables_:[],variables_from_set:[4,34,35],varianc:9,variant:22,varibal:38,varibl:35,variou:[5,14,22,28,31],varlist:[4,35],varnam:[4,35],vector:9,verbatim:11,verbos:[4,10,25,31,35],veri:[19,23,24],versa:9,version2:24,version:[4,5,9,10,19,21,23,24,26,34,35],versu:31,vertic:[4,18],via:[0,4,5,10,21,22,23,24,31,34,35],vice:9,view:[1,2,3,4,5,8,9,10,14,16,22,27,28,35],view_kei:31,view_name_not:9,viewmapp:[1,10],viewmeta:7,visibl:[31,35],vmerg:[4,35],wai:[10,12,13,19,21,22,23,26,31,35,36],wait:21,want:[21,24,26],warn:[4,31,34,35],water:38,wave:21,week:22,weight:[0,4,6,7,9,10,11,12,22,24,31,35],weight_a:[14,22,31],weight_b:22,weight_column_nam:6,weight_nam:4,weight_schem:4,well:[4,9,20,22,25,26,31,35,39],went:34,were:[26,35],what:[16,19,20,24,26,27,36,39],whatev:[4,26],when:[4,5,9,10,20,21,23,24,26,35,39],where:[2,3,4,9,24,25,26],whether:[5,9],which:[4,5,9,10,11,13,14,15,22,23,24,26,28,31,35,38],whole:[4,35],whose:35,wide:35,wil:4,wildcard:26,window:20,windsurf:[19,24,39],wise:[4,31],witch:35,within:[4,9],without:35,women:15,work:[4,11,21,23,31,34,35],workbook:3,world:20,would:[4,19,24,26],wouldn:[19,24],wrap:35,wrapper:[4,9],write:[4,10,20,21,39],write_dimens:[4,35],write_quantipi:[4,21],write_spss:[4,20],writen:38,written:[21,34],wrong:34,x_filter_map:[28,31],x_kei:14,x_y_map:[13,14,28,31],xdef:9,xks:[10,31,35],xlsx:4,xml:[4,20],xsect:9,xtotal:4,y_kei:[14,28,31],y_on_i:[13,28,31],year:[19,22,39],yes:20,yield:22,yks:35,yoga:24,you:[4,11,13,14,19,20,21,22,23,24,26,28,31,34,35,38,39],younger:24,your:[4,19,20,21,24,26,35],ysect:9},titles:["Quantipy: Python survey data toolkit","API references","Chain","Cluster","DataSet","QuantipyViews","Rim","View","ViewMapper","quantify.engine","Stack","Batch","Creating/ Loading a qp.Batch instance","Adding variables to a qp.Batch instance","Set properties of a qp.Batch","Inherited qp.DataSet methods","Builds","Combining results","Data processing","DataSet components","I/O","DataSet management","Inspecting variables","Editing metadata","Transforming variables","Logic and set operaters","Custom data recoding","Analysis & aggregation","Collecting aggregations","The computational engine","Significance testing","View aggregation","Documentation","Release notes","Latest (15/09/2017)","Archived release notes","How-to-snippets","Different ways of creating categorical values","Derotation","DataSet Dimensions compatibility"],titleterms:{"boolean":25,"case":[19,21,22],"default":26,Adding:[13,26],The:[19,26,29,39],Using:20,about:38,access:39,adding:23,aggreg:[13,17,27,28,31],analysi:27,api:1,append:26,archiv:35,arrai:[13,19,24,38,39],ascrib:20,band:[24,26],base:26,basic:31,batch:[11,12,13,14],build:[16,26],calcul:31,categor:[19,24,31,37],cell:14,chain:[2,17],chang:23,clone:21,cluster:3,code:[25,26],collect:28,column:[19,21],combin:17,compat:39,complex:25,complic:26,compon:[19,20],comput:29,condit:26,convers:[20,24],copi:24,creat:[12,17,23,26,37,39],creation:26,csv:20,cumul:31,custom:[17,26],data:[0,18,19,22,26,39],datafram:20,dataset:[4,15,19,21,23,38,39],deciph:20,defin:[],definit:31,deriv:26,derot:38,descript:31,detail:26,dice:22,differ:37,dimens:[20,39],document:32,edit:23,end:13,engin:[9,29],exampl:26,exist:[22,25],extend:23,featur:0,file:[],fill:26,fillna:26,filter:[14,21],from:[20,23],full:[],gener:[],has_al:25,has_ani:25,has_count:25,horizont:21,how:[36,38],info:23,inherit:15,initi:26,inplac:24,inspect:22,instanc:[12,13],interlock:26,intersect:[25,26],item:14,json:20,kei:[0,13],languag:[14,19],latest:[34,35],link:28,list:25,load:12,logic:25,main:[],manag:21,map:19,mapper:26,mask:19,merg:21,meta:[19,23],metadata:[22,23],method:[15,26],mode:39,nativ:20,net:[26,31],non:31,not_al:25,not_ani:25,not_count:25,note:[33,35],numer:26,object:[19,22,23],open:13,operat:25,order:21,organ:17,pair:20,parti:20,popul:28,posit:[],process:18,properti:14,python:0,quantifi:9,quantipi:0,quantipyview:5,queri:22,rang:25,recod:26,refer:1,rel:[],releas:[33,35],remov:[23,26],renam:23,reorder:23,result:17,rim:6,rollback:21,row:21,savepoint:21,scratch:23,segment:26,septemb:[],set:[14,21,25],signific:[14,30,31],slice:22,slicer:25,snippet:36,special:13,spss:20,stack:[10,28],standalon:20,start:20,state:21,statist:[20,31],subset:21,sum:31,support:20,survei:0,target:26,test:[14,30,31],text:[19,23],text_kei:19,third:20,toolkit:0,transform:24,type:[19,22,24],union:25,upcom:[],use:38,valu:[19,22,23,37],variabl:[13,21,22,24,26,31],verbatim:13,vertic:21,view:[7,17,31],viewmapp:8,wai:37,weight:14,what:[17,28,38]}}) \ No newline at end of file +Search.setIndex({docnames:["index","sites/api_ref/00overview","sites/api_ref/Chain","sites/api_ref/Cluster","sites/api_ref/DataSet","sites/api_ref/QuantipyViews","sites/api_ref/Rim_scheme","sites/api_ref/Stack","sites/api_ref/View","sites/api_ref/ViewMapper","sites/api_ref/quantify_engine","sites/lib_doc/batch/00_overview","sites/lib_doc/batch/01_create_load","sites/lib_doc/batch/02_variables","sites/lib_doc/batch/03_properties","sites/lib_doc/batch/04_subclass","sites/lib_doc/builds/00_overview","sites/lib_doc/builds/01_chains","sites/lib_doc/dataprocessing/00_overview","sites/lib_doc/dataprocessing/01_components","sites/lib_doc/dataprocessing/02_io","sites/lib_doc/dataprocessing/02a_management","sites/lib_doc/dataprocessing/03_inspection","sites/lib_doc/dataprocessing/04_editing","sites/lib_doc/dataprocessing/05_transforming","sites/lib_doc/dataprocessing/06_logics","sites/lib_doc/dataprocessing/07_custom_recoding","sites/lib_doc/engine/00_overview","sites/lib_doc/engine/01_links_stacks","sites/lib_doc/engine/02_quantity","sites/lib_doc/engine/03_test","sites/lib_doc/engine/04_agg_methods","sites/release_notes/00_overview","sites/release_notes/01_latest","sites/release_notes/02_archive","sites/release_notes/03_how_to_snippets","sites/release_notes/how_to_snippets/create_categorical_meta","sites/release_notes/how_to_snippets/derotate","sites/release_notes/how_to_snippets/dimensions_comp"],envversion:52,filenames:["index.rst","sites\\api_ref\\00overview.rst","sites\\api_ref\\Chain.rst","sites\\api_ref\\Cluster.rst","sites\\api_ref\\DataSet.rst","sites\\api_ref\\QuantipyViews.rst","sites\\api_ref\\Rim_scheme.rst","sites\\api_ref\\Stack.rst","sites\\api_ref\\View.rst","sites\\api_ref\\ViewMapper.rst","sites\\api_ref\\quantify_engine.rst","sites\\lib_doc\\batch\\00_overview.rst","sites\\lib_doc\\batch\\01_create_load.rst","sites\\lib_doc\\batch\\02_variables.rst","sites\\lib_doc\\batch\\03_properties.rst","sites\\lib_doc\\batch\\04_subclass.rst","sites\\lib_doc\\builds\\00_overview.rst","sites\\lib_doc\\builds\\01_chains.rst","sites\\lib_doc\\dataprocessing\\00_overview.rst","sites\\lib_doc\\dataprocessing\\01_components.rst","sites\\lib_doc\\dataprocessing\\02_io.rst","sites\\lib_doc\\dataprocessing\\02a_management.rst","sites\\lib_doc\\dataprocessing\\03_inspection.rst","sites\\lib_doc\\dataprocessing\\04_editing.rst","sites\\lib_doc\\dataprocessing\\05_transforming.rst","sites\\lib_doc\\dataprocessing\\06_logics.rst","sites\\lib_doc\\dataprocessing\\07_custom_recoding.rst","sites\\lib_doc\\engine\\00_overview.rst","sites\\lib_doc\\engine\\01_links_stacks.rst","sites\\lib_doc\\engine\\02_quantity.rst","sites\\lib_doc\\engine\\03_test.rst","sites\\lib_doc\\engine\\04_agg_methods.rst","sites\\release_notes\\00_overview.rst","sites\\release_notes\\01_latest.rst","sites\\release_notes\\02_archive.rst","sites\\release_notes\\03_how_to_snippets.rst","sites\\release_notes\\how_to_snippets\\create_categorical_meta.rst","sites\\release_notes\\how_to_snippets\\derotate.rst","sites\\release_notes\\how_to_snippets\\dimensions_comp.rst"],objects:{"quantipy.Chain":{concat:[2,2,1,""],copy:[2,2,1,""],describe:[2,2,1,""],load:[2,3,1,""],save:[2,2,1,""]},"quantipy.Cluster":{add_chain:[3,2,1,""],bank_chains:[3,2,1,""],load:[3,3,1,""],merge:[3,2,1,""],save:[3,2,1,""]},"quantipy.DataSet":{add_meta:[4,2,1,""],all:[4,2,1,""],any:[4,2,1,""],band:[4,2,1,""],by_type:[4,2,1,""],categorize:[4,2,1,""],clear_factors:[4,2,1,""],clone:[4,2,1,""],code_count:[4,2,1,""],code_from_label:[4,2,1,""],codes:[4,2,1,""],codes_in_data:[4,2,1,""],compare:[4,2,1,""],convert:[4,2,1,""],copy:[4,2,1,""],copy_array_data:[4,2,1,""],create_set:[4,2,1,""],crosstab:[4,2,1,""],cut_item_texts:[4,2,1,""],data:[4,2,1,""],derive:[4,2,1,""],derotate:[4,2,1,""],describe:[4,2,1,""],dichotomize:[4,2,1,""],dimensionize:[4,2,1,""],dimensionizing_mapper:[4,2,1,""],drop:[4,2,1,""],drop_duplicates:[4,2,1,""],duplicates:[4,2,1,""],empty:[4,2,1,""],empty_items:[4,2,1,""],extend_items:[4,2,1,""],extend_values:[4,2,1,""],factors:[4,2,1,""],filter:[4,2,1,""],find:[4,2,1,""],find_duplicate_texts:[4,2,1,""],flatten:[4,2,1,""],force_texts:[4,2,1,""],from_batch:[4,2,1,""],from_components:[4,2,1,""],from_excel:[4,2,1,""],from_stack:[4,2,1,""],fully_hidden_arrays:[4,2,1,""],get_batch:[4,2,1,""],get_property:[4,2,1,""],hide_empty_items:[4,2,1,""],hiding:[4,2,1,""],hmerge:[4,2,1,""],interlock:[4,2,1,""],is_like_numeric:[4,2,1,""],is_nan:[4,2,1,""],item_no:[4,2,1,""],item_texts:[4,2,1,""],items:[4,2,1,""],link:[4,2,1,""],merge_texts:[4,2,1,""],meta:[4,2,1,""],meta_to_json:[4,2,1,""],min_value_count:[4,2,1,""],names:[4,2,1,""],order:[4,2,1,""],parents:[4,2,1,""],populate:[4,2,1,""],read_ascribe:[4,2,1,""],read_dimensions:[4,2,1,""],read_quantipy:[4,2,1,""],read_spss:[4,2,1,""],recode:[4,2,1,""],remove_html:[4,2,1,""],remove_items:[4,2,1,""],remove_values:[4,2,1,""],rename:[4,2,1,""],rename_from_mapper:[4,2,1,""],reorder_items:[4,2,1,""],reorder_values:[4,2,1,""],repair:[4,2,1,""],repair_text_edits:[4,2,1,""],replace_texts:[4,2,1,""],resolve_name:[4,2,1,""],restore_item_texts:[4,2,1,""],revert:[4,2,1,""],roll_up:[4,2,1,""],save:[4,2,1,""],select_text_keys:[4,2,1,""],set_encoding:[4,4,1,""],set_factors:[4,2,1,""],set_item_texts:[4,2,1,""],set_missings:[4,2,1,""],set_property:[4,2,1,""],set_text_key:[4,2,1,""],set_value_texts:[4,2,1,""],set_variable_text:[4,2,1,""],set_verbose_errmsg:[4,2,1,""],set_verbose_infomsg:[4,2,1,""],slicing:[4,2,1,""],sorting:[4,2,1,""],sources:[4,2,1,""],split:[4,2,1,""],start_meta:[4,3,1,""],subset:[4,2,1,""],take:[4,2,1,""],text:[4,2,1,""],to_array:[4,2,1,""],to_delimited_set:[4,2,1,""],transpose:[4,2,1,""],unbind:[4,2,1,""],uncode:[4,2,1,""],undimensionize:[4,2,1,""],undimensionizing_mapper:[4,2,1,""],unify_values:[4,2,1,""],unroll:[4,2,1,""],update:[4,2,1,""],validate:[4,2,1,""],value_texts:[4,2,1,""],values:[4,2,1,""],variables:[4,2,1,""],vmerge:[4,2,1,""],weight:[4,2,1,""],write_dimensions:[4,2,1,""],write_quantipy:[4,2,1,""],write_spss:[4,2,1,""]},"quantipy.QuantipyViews":{"default":[5,2,1,""],coltests:[5,2,1,""],descriptives:[5,2,1,""],frequency:[5,2,1,""]},"quantipy.Quantity":{calc:[10,2,1,""],count:[10,2,1,""],exclude:[10,2,1,""],filter:[10,2,1,""],group:[10,2,1,""],limit:[10,2,1,""],normalize:[10,2,1,""],rebase:[10,2,1,""],rescale:[10,2,1,""],summarize:[10,2,1,""],swap:[10,2,1,""],unweight:[10,2,1,""],weight:[10,2,1,""]},"quantipy.Rim":{add_group:[6,2,1,""],group_targets:[6,2,1,""],report:[6,2,1,""],set_targets:[6,2,1,""],validate:[6,2,1,""]},"quantipy.Stack":{add_data:[7,2,1,""],add_link:[7,2,1,""],add_nets:[7,2,1,""],add_stats:[7,2,1,""],add_tests:[7,2,1,""],aggregate:[7,2,1,""],apply_meta_edits:[7,2,1,""],cumulative_sum:[7,2,1,""],describe:[7,2,1,""],freeze_master_meta:[7,2,1,""],from_sav:[7,3,1,""],load:[7,3,1,""],recode_from_net_def:[7,3,1,""],reduce:[7,2,1,""],refresh:[7,2,1,""],remove_data:[7,2,1,""],restore_meta:[7,2,1,""],save:[7,2,1,""],variable_types:[7,2,1,""]},"quantipy.Test":{get_se:[10,2,1,""],get_sig:[10,2,1,""],get_statistic:[10,2,1,""],run:[10,2,1,""],set_params:[10,2,1,""]},"quantipy.View":{get_edit_params:[8,2,1,""],get_std_params:[8,2,1,""],has_other_source:[8,2,1,""],is_base:[8,2,1,""],is_counts:[8,2,1,""],is_cumulative:[8,2,1,""],is_meanstest:[8,2,1,""],is_net:[8,2,1,""],is_pct:[8,2,1,""],is_propstest:[8,2,1,""],is_stat:[8,2,1,""],is_sum:[8,2,1,""],is_weighted:[8,2,1,""],meta:[8,2,1,""],missing:[8,2,1,""],nests:[8,2,1,""],notation:[8,2,1,""],rescaling:[8,2,1,""],spec_condition:[8,2,1,""],weights:[8,2,1,""]},"quantipy.ViewMapper":{add_method:[9,2,1,""],make_template:[9,2,1,""],subset:[9,2,1,""]},Chain:{filename:[2,0,1,""]},quantipy:{Chain:[2,1,1,""],Cluster:[3,1,1,""],DataSet:[4,1,1,""],QuantipyViews:[5,1,1,""],Quantity:[10,1,1,""],Rim:[6,1,1,""],Stack:[7,1,1,""],Test:[10,1,1,""],View:[8,1,1,""],ViewMapper:[9,1,1,""]}},objnames:{"0":["py","attribute","Python attribute"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","staticmethod","Python static method"],"4":["py","classmethod","Python class method"]},objtypes:{"0":"py:attribute","1":"py:class","2":"py:method","3":"py:staticmethod","4":"py:classmethod"},terms:{"0x0000000019ae06d8":[28,31],"\u00ecnt":[20,31],"\u00ectem":38,"boolean":[4,6,7,18,34],"case":[0,4,5,6,7,10,12,13,15,18,20,23,24,26,28,33,34,36,37],"class":[2,3,4,5,6,7,8,9,10,11,22,33,34],"default":[3,4,5,7,8,9,10,13,18,20,21,23,24,33,34,37,38],"export":[0,3,4,20,34],"final":26,"float":[4,5,7,10,19,20,21,22,23,24,31,33,34],"function":[4,7,20,25,26,28,31,34],"import":[4,10,11,15,20,22,25,26],"int":[4,5,7,10,19,20,21,22,23,24,25,33,34],"long":[25,34],"m\u00fcller":0,"new":[3,4,5,7,9,10,12,21,23,24,26,31,33,34,37,38],"null":31,"return":[3,4,5,6,7,8,9,10,21,22,24,26,33,34],"short":[9,19,21,24,34],"sigur\u00f0sson":0,"static":[2,3,4,7],"switch":[24,33,37],"true":[4,5,6,7,9,10,13,20,21,22,24,25,26,31,34,37,38],"try":[4,7,23,34],"var":[4,10,15,33,34,37],"while":[4,15,19,23,24,33,34],Adding:[11,18,31],Age:[24,26],Being:15,But:[13,23],Das:[19,23],For:[4,5,13,14,20,22,23,24,25,26,28,31,33],NPS:31,Not:[19,24,31,34],One:[4,13,26],That:[15,19,22,24],The:[2,3,4,5,7,8,9,10,11,13,14,15,18,20,21,22,23,24,25,27,31,33,34,35,36,37],Their:5,Then:[21,34],There:[13,19,21,22,31,34],These:[15,28,31,34],Use:[4,10,24,34],Uses:4,Using:[7,18,21,23],Will:[4,5,7,10,34],With:[13,22,23,24,26,31],Yes:[20,34],__init__:[12,34],__setitem__:34,_band:4,_batch:[7,31],_cumsum:[31,34],_data:[4,11,21],_dimensions_suffix:34,_get_chain:34,_grid:[34,37],_intersect:[28,31],_meta:[4,11,12,13,22,28,38],_missingfi:10,_net:34,_rc:34,_rec:[4,24,34],_remove_html:4,_request_view:34,_suffix:4,_sum:31,_tran:4,abbrevi:25,abl:[5,28,34],about:[13,19,22,35],abov:[24,31,38],absorb:34,accept:[23,26,34],access:[4,7,19,20,22,34,35],accessor:22,accid:[21,34],accommod:14,accompani:[19,34],accord:[4,7,10,26,31],accordingli:26,account:[4,5,34],achiev:21,across:[10,22,34],act:[5,24,28],activ:[19,20,24,34],add:[3,4,5,7,9,13,14,23,24,28,31,33,34,36],add_batch:[12,31],add_chain:3,add_data:7,add_filt:[14,34],add_group:6,add_i:13,add_link:[7,28,31],add_meta:[4,23,33,34,36,38],add_method:9,add_net:[7,31,34],add_open_end:[13,33,34],add_stat:[7,31,34],add_test:[7,31,34],add_tot:34,add_x:[13,31],add_y_on_i:[13,28,31,34],adddit:20,added:[4,7,12,13,14,23,24,28,31,34,37],adding:[4,14,18,20,31,34,38],addit:[4,13,19,22,31,34],addition:[19,33,34],adjust:[33,34],adopt:13,aerob:24,affect:33,affix:7,after:[4,7,10,21,26,28,31,34],afternoon:22,again:[22,23,31],against:[4,5,10,13,34],age:[4,13,14,22,24,26,28,31,34],age_band:24,age_cb:26,age_grp_rc:26,age_rec:4,age_xb:26,age_xb_1:26,agegrp:[4,34],agg:[8,13,31],aggnam:8,aggreg:[0,2,3,4,5,7,8,9,10,11,14,16,33,34],aim:[0,34],alasdair:0,alert:33,alexand:0,algorithm:[4,5,10,34],alia:[4,21],align:34,all:[2,3,4,5,7,10,11,12,13,14,19,20,21,22,23,24,25,26,28,31,33,34,36,37],allign:34,allow:[4,14,15,23,24,31,33,34],alon:[22,34],along:[2,10],alongsid:[7,21,31],alphanumer:4,alreadi:[4,12,14,19,24,26,34],also:[4,7,10,11,12,13,14,22,23,24,26,28,31,33,34,36,37],altern:[4,23,31,36],although:22,alwai:[4,15,23,24,26,31,33,34,38],amount:[4,14,21,28,31],analysi:[0,34],ani:[4,5,7,8,10,13,19,21,22,23,25,26,33,34],anim:[4,19],anoth:[7,13,23,24,26,34],answer:[4,7,10,19,22,23,24,34],anymor:[33,34],anyth:26,anywher:7,api:4,appear:[4,34,36],append:[4,5,18,31,37],appli:[4,5,7,9,10,15,20,21,22,24,28,31,34,38],applic:[4,24,26],apply_edit:4,apply_meta_edit:7,apporach:21,approach:36,appropri:[4,5],arab:34,arbitrari:7,arbitrarili:[4,21,25,26],archiv:32,argument:[5,9,11,20,23,24,34],aris:28,arithmet:10,around:[4,34],arrai:[4,7,10,11,18,21,22,23,31,33,34,35,36],array_item:34,array_var:38,array_var_1:38,array_var_2:38,array_var_3:38,array_var_grid:38,arriv:10,as_addit:[13,34],as_delimited_set:34,as_df:10,as_float:34,as_int:34,as_singl:34,as_str:34,as_typ:[4,34],ascend:4,ascii:4,ascrib:[0,4,18],asid:24,ask:[19,24],askia:[5,10],aspect:34,assess:[0,37],assign:[4,6,34,36],assignd:24,associ:[2,7],assum:[4,10,28,37],attach:[4,7,21,23,34],attempt:[4,34],attribut:[2,4,11,22,28,31,34],audit:34,auto:[4,7,33],autom:[0,7],automat:[4,13,14,20,22,23,24,31,33,34,38],auxiliari:10,avail:[4,33],avoid:26,axes:4,axi:[2,3,4,5,7,8,10,15,19,22,34],axis_edit:[4,34],b_d:4,b_filter:34,b_name:31,back:[4,5,37],badli:[4,34],band:[4,7,18,34],band_numer:34,bank:3,bank_chain:3,bar:34,base:[4,5,7,8,9,10,18,20,22,31,34],base_al:10,base_text:15,based_on:[4,34],baselin:9,basi:26,basic:[20,22,25,27,34],basketbal:24,batch1:[12,13,28,31],batch2:[12,13,28,31],batch3:31,batch4:31,batch5:31,batch6:31,batch:[0,4,7,15,28,31,33,34],batch_nam:[4,7],batchnam:[11,13],bchain:3,becaus:[24,26,34],becom:[4,10,26,34],been:[4,7,10,13,21,24,26,34,38],beer:37,befor:[4,7,10,13,14,24,26,31,34],begin:[4,26],behaviour:[5,8,22,26,34,38],being:[4,24,38],belong:[4,11,12,13,31,33,34,37],below:[4,10,13,26,34],benefici:21,benefit:26,better:28,between:[4,10,13,31,33,34],bia:10,big:21,binari:34,bird:19,birgir:0,birth_dai:22,birth_month:22,birth_year:22,bivari:5,blacklist:[4,34],blank:36,blueprint:34,board:[19,24,38],bool:[4,5,7,9,10,22,37],border:4,both:[4,7,12,13,19,20,21,23,24,26,28,33,34,38],bottom3:7,bottom:34,bound:34,bracket:[7,34],brand:37,break_bi:[13,33],breakfast:22,brief:34,broader:19,buchhamm:0,bug:34,bugfix:34,build:[0,3,4,5,10,15,18,19,34],built:[14,31],bunch:34,by_nam:[4,34],by_typ:[4,21,22,34],bytestr:2,cach:[4,7],calc:[5,7,8,10,31,34],calc_onli:[5,31],calcul:[4,5,7,10,14,27,33,34],call:[4,7,21,22,24,28,34,38],came:34,can:[2,4,5,7,10,11,12,13,14,15,19,20,21,22,23,24,25,26,28,31,33,34,36,37,38],cannot:[26,34],cap:6,carefulli:4,carri:5,case1:37,case2:37,case3:37,casedata:[4,20],cast:34,cat:[4,19,23,33,34,36,38],cat_nam:[7,34],categor:[4,5,7,18,22,23,27,34,35],categori:[4,5,7,19,22,23,33,34,36],categorized_nam:[4,34],caught:34,caus:[33,34],caution:4,cave:[19,24,38],cbase:[7,31,34],cbase_gross:34,cell:[5,10,11,26,34],cell_item:14,cellitem:34,central:5,certain:[4,22,34],chain:[0,1,3,16,22,34],chainmanag:33,chainnam:2,chang:[4,6,7,10,18,21,24,34],charact:[4,23,25],characterist:[19,24],chart:[0,34],check:[4,5,7,10,22,26,33,34],check_dup:34,checking_clust:7,choic:0,choos:5,clariti:25,classmethod:4,clean:[0,4,7,21,24,34],clean_text:[23,34],clean_up:[4,34],clear:34,clear_factor:4,client:34,clone:[4,18],close:12,cluster:[0,1,2,7,13,14,15,34],code:[4,5,6,7,8,10,18,19,22,23,24,31,33,34,36,38],code_count:[4,22,25,34],code_from_label:[4,34],code_map:4,codes_from_nam:[4,34],codes_in_data:[4,22,34],cola:37,collect:[4,5,7,11,13,14,19,21,22,24,27,31,34],collect_cod:[7,34],colour:34,coltest:[5,7,9],column:[2,4,5,6,7,8,10,18,20,22,23,24,26,31,33,34,37,38],combin:[4,5,7,16,22,25,26,28,31,34],combind:37,come:34,comma:25,common:[8,19,24,34],commun:7,compabl:4,compar:[4,5,10,33,34],comparison:10,compat:[4,7,20,34,35],compatibilti:38,complet:[4,10,34],complex:[4,5,6,18,19,21,22,26,31,34],compli:[4,26],complic:18,compon:[4,5,7,8,11,15,18,21,22,23,24,34],compos:7,compound:3,comprehens:26,compress:7,comput:[0,4,5,7,9,10,27,28,31,34],concat:[2,22,34],concaten:[2,26],concern:31,cond_map:4,condit:[4,8,10,18,21,22,25,33,34],confirm:7,conflict:34,conjunct:34,connect:[4,7,12,13,34,38],consequ:[28,36],consid:[4,5,7,10,24,34],consist:[4,10,11,19,21,23,24,34],constitut:13,constrcut:7,construct:[4,11,13,14,19,21,23,28,31,34,38],contain:[0,2,3,4,7,8,9,10,11,13,14,19,21,22,23,24,28,31,34,37],content:[21,28],context:[19,23],contrast:[19,24],contributor:0,control:[4,5,7,8,10,23,24,26,34],convcrit:6,convent:[4,10,34,38],convers:[4,18,33,34],convert:[0,4,10,20,24,33,34,38],coordin:9,cope:28,copi:[2,4,9,12,18,21,23,26,34,38],copy_array_data:4,copy_batch:12,copy_d:21,copy_data:[4,24],copy_from:[4,23,34],copy_not:[4,34],copy_of_batch1:12,copy_onli:[4,34],copy_to:[4,23,34],core:[5,20,22,25,34,36],correct:[10,23,34],correctli:34,correspond:[4,5,10,22,23,24,34,36,38],correspons:34,corrupt:34,could:[23,24,26,34],count:[4,5,7,8,10,31,33,34,37],count_not:[4,22,34],count_onli:[4,22,34],counterpart:34,counts_cumsum:[31,34],counts_sum:31,cpickl:2,crash:34,creat:[2,3,4,6,7,10,11,13,14,15,16,18,20,22,24,28,31,33,34,35],create_set:[4,34],creation:[10,18,23,34],cross:[13,28,31,34],crossbreak:28,crosstab:[4,24],crunch:34,csv:[0,4,7,11,18],cum_sum:10,cumul:[7,8,10,27,34],cumulative_sum:[7,31],current:[0,2,4,7,24,34],custom:[4,7,16,18,20,24,34],custom_text:[7,31],customis:34,customiz:0,cut:[4,21],cut_item_text:4,cwi_filt:10,cycl:[4,9],dai:[22,26],danish:19,data:[2,4,5,6,7,8,10,11,14,15,20,21,23,24,28,31,33,34,35,37],data_df:[4,34],data_kei:[4,7,14,28,34],datafil:[4,34],datafram:[3,4,6,7,8,10,18,19,21,22,23,28,31,33,34],datakei:34,dataset:[0,1,7,11,12,13,18,20,22,24,28,31,33,34,35,36],dataset_left:4,dataset_right:4,datasmoothi:0,datast:33,date:[4,7,19,20,21,22,23,24,34],dates_as_str:20,ddf:[0,4,34],deafult:[5,10],deal:20,decim:4,deciph:[0,18],deck:0,decod:[7,33],decode_str:7,decor:34,deep:4,deepcopi:34,defin:[2,3,4,7,10,13,14,19,21,23,28,31,33,34,36],definit:[2,4,5,7,10,14,19,24,26,27,28,34],definiton:[9,19,34],defint:[0,2,4,5,6,7,10,14,19,34],defintit:[21,38],defintiton:23,del:34,deleg:26,delet:[4,7,34],delimied_set:22,delimit:[4,7,19,20,21,22,23,24,25,26,33,34,36],delimited_set:[22,24,34],demograph:21,depend:[5,13,14,33,34],deprec:[7,34],deriv:[4,10,18,21,34,36],derivc:9,derive_categor:34,derot:[4,34,35],desc:[4,33],descend:4,describ:[0,2,3,4,7,14,19,21,28,31,34],descript:[4,5,7,9,10,19,27],descrp:4,design:[4,20],desir:[4,6,22,34,36],detail:[8,18,19],detect:[4,6,33],determin:[4,5,7],detractor:31,deutsch:[19,23],dff:20,diagram:28,dicat:24,dice:18,dichot:20,dichotom:[4,19,20,34],dict:[2,3,4,5,6,7,8,9,10,19,23,24,26,31,34,37],dictat:21,dictionari:[4,6,7,8,9,34],differ:[4,5,7,8,10,13,19,20,23,24,28,31,33,34,35,37],digest:19,dim:[5,10,31],dim_comp:34,dimens:[0,4,5,10,18,34,35],dimension:[4,34],dimensionizing_mapp:4,dimensions_comp:[4,20,34,38],dimlabel:34,dinner:22,direct:26,directli:[2,4,22,34],discret:[7,13],disk:[4,21,34],dispers:5,distinct:[19,31],distribut:[5,10],div:[7,34],dive:[19,24,38],divid:10,dk_filter:4,dms:4,dmsrun:4,doc:[22,34],docstr:6,document:[0,4,7,19,20,23,34],doe:[4,11,23,26,34],doesn:[4,25,26],dog:19,don:[19,22,24,36],done:[4,13,14],doubl:22,down:38,downbreak:[28,31,34],download:38,draft:34,draw:[21,24],drawn:[4,34],drink:37,drink_1:37,drink_2:37,drink_level:37,driven:0,drop:[4,5,7,10,21,34],drop_cod:[7,34],drop_delimit:4,drop_dupl:4,drope:[4,37],dropna:[4,6,34,37],dropx:4,dto:38,dtype:[20,22,26,34],due:34,dump:20,dupe:33,duplic:[4,23,33,34],durat:22,dure:[4,7,10,24],each:[2,4,5,6,7,13,14,19,23,24,28,31,33,34,37,38],eagleston:0,eas:34,easi:[0,19,22,23,34],easier:[12,22,28,34],easiest:22,easili:[4,12,20,31,34],ebas:[7,34],echo:4,ect:34,edit:[0,4,8,10,13,14,15,18,19,21,24,26,34],edit_param:8,eff:10,effect:[6,10,26,34],effici:34,ein:[19,23],either:[4,5,10,13,19,21,22,23,26,34],element:[4,7,10,19,34,38],eleph:4,els:26,emploi:19,empti:[4,7,22,24,26,33,34],empty_item:[4,34],en_u:7,enabl:[23,34],encod:[4,33,34],encount:20,end:[2,4,7,11,12,33,34],end_tim:22,enforc:34,eng:34,engin:[1,27,33],english:[19,34],enhanc:34,enough:4,enrich:34,ensur:[4,11,34],enter:[4,14],entir:[4,10,20,34],entri:[4,6,7,10,19,26,34],enumer:[4,7,23,24,26,33,36],environ:4,eponym:34,eqaul:4,equal:[4,10,34],equip:31,equival:[4,38],eras:[4,7],error:[4,10,33,34],escap:4,especi:[22,26,31],estim:10,etc:[4,19,21,28,31,34],ethnic:[13,22,28,31],even:[4,31,34],ever:20,everi:[22,23,26],everyon:26,everyth:26,evid:31,exact:[4,34],exactli:[4,24,25],exampl:[2,4,5,7,11,18,19,20,22,24,25,28,31,34,37,38],excel:[0,3,4,5,34],except:[4,21,34],exchang:[10,23],exclud:[4,5,7,8,10,31,34],exclus:[4,25,34],execut:4,exercis:[22,24],exist:[4,7,10,12,13,18,21,23,24,26,31,33,34],expand:[4,7,8,10,31,33],expect:24,experiment:4,explain:14,explicit:26,explicitli:[4,7,34,36],explor:19,expos:23,express:[4,5,6,7,10,22,25,34],ext_item:[4,34],ext_valu:4,ext_xk:34,ext_yk:34,extend:[4,5,7,13,18,22,31,34],extend_cod:[7,34],extend_filt:14,extend_i:[13,34],extend_item:[4,34],extend_valid_tk:34,extend_valu:[4,23,34,36],extend_x:[33,34],extens:[3,4,7,19,20,34],extra:34,extract:14,extrapol:25,factor:[4,6,7,34],factor_label:[7,34],factormap:4,fail:[4,34],failur:20,fall:[4,5,10],fallback:8,fals:[4,5,7,10,13,20,22,23,24,26,31,34],fast_stack_filt:34,favour:10,featur:[19,34],feed:[4,34],feedback:4,femal:[4,22],few:[11,21,22,34,37],figur:10,file:[2,3,4,5,7,11,20,21,22,28,34,38],file_nam:20,filenam:[2,7],fill:[4,18,34],fillna:[4,18],filter1:34,filter2:34,filter:[4,6,7,10,11,15,18,28,31,33,34],filter_1:34,filter_2:34,filter_def:6,filter_kei:[7,14],filter_nam:34,find:[4,11,22,28,34],find_duplicate_text:4,finish:[4,21,34],finnish:19,first:[4,5,10,13,19,21,26,31,33,34],fit:[19,23,24],fix:[4,15,33,34],flag:[4,5,34],flag_bas:10,flat:4,flatten:[4,24,34],flexibl:[6,7,24],float64:20,folder:20,follow:[4,8,10,11,14,15,19,20,21,22,23,24,26,28,31,34,37,38],folow:26,footbal:24,forc:[26,34],force_text:[4,23,34],forcefulli:4,form:[3,4,7,10,14,20,23,24,25,26,28,34],format:[0,4,6,10,19,20,23,26,31,34,37],former:[4,7,22,23,24,34],fortnight:22,found:[2,3,4,7,10,20,22,24,26,31,33,34],four:[28,31],frang:[4,25,26,34,37],freez:7,freeze_master_meta:7,french:19,frequenc:[4,5,8,9,10,28,31,34],freysson:0,from:[0,2,3,4,5,7,10,12,14,15,18,19,21,24,25,26,28,31,33,34,37],from_batch:[4,34],from_compon:[4,20,34],from_dichotom:[4,34],from_excel:[4,34],from_sav:7,from_set:[4,20,34],from_stack:[4,34],front:34,fulfil:4,full:[3,4,5,7,21,23,25,34,38],fullnam:31,fully_hidden_arrai:4,fun:34,further:21,futur:[7,34],geir:0,gender:[4,13,14,15,22,24,26,28,31,34,37],gener:[2,4,5,7,8,9,11,20,22,23,26,31,34],generate_report:34,german:[19,23],get:[4,7,8,11,12,13,14,22,28,31,34,36],get_batch:[4,12,13,28,31,33],get_edit_param:8,get_properti:4,get_qp_dataset:34,get_s:10,get_sig:10,get_statist:10,get_std_param:8,getter:34,give:[13,26,34],given:[4,5,6,7,10,20,26,34],global:[4,8,10,14,15,21,34],goe:4,going:36,grab:33,greater:4,grid:[4,34],griffith:0,group:[4,5,6,7,8,10,19,21,22,24,25,28,31,33,34],group_nam:6,group_target:6,grouped_ag:24,grp:10,grp_text_map:31,guid:34,gzip:7,hack:4,had:26,hand:24,handl:[0,6,7,9,10,20,31,34,36,38],handler:34,happen:[4,24],happend:4,has:[2,4,7,12,15,19,21,25,26,31,33,34,37],has_al:[4,5,18],has_ani:[4,18],has_count:18,has_other_sourc:8,have:[4,7,10,13,20,23,24,25,26,28,33,34,37],head:[20,22,23,24,26,34,38],heirarch:4,hell:4,hello:20,help:[26,28,31],helper:[4,31,34],here:[4,7,26,28,33,34,38],hidden:[4,33,34],hide:[4,15,33,34],hide_empti:34,hide_empty_item:[4,34],hide_on_i:4,hide_valu:[4,34],high:[5,10],higher:10,hmerg:[4,33,34],hockei:24,hold:[2,4,7,10,34],horizont:[4,18],household:20,how:[3,4,7,14,19,22,24,28,31,36,38],howev:[23,24,26,36,38],hrafn:0,html:[4,23,34],hub:34,ident:[4,20,24,34],identif:4,identifi:[4,5,23,34,37],ids:4,ignor:[4,7,10,22,24,34],ignore_arrai:4,ignore_cod:4,ignore_flag:10,ignore_item:[4,24,34],ignore_valu:[4,24],ill:23,implement:[7,10,19,22,34],impli:4,implicitli:10,impract:28,impute_method:6,incl:[7,34],includ:[2,3,4,5,7,10,13,25,26,28,31,33,34,37],inclus:[24,34],incom:[4,10,24],inconsist:[4,15,23,33,34],incorrect:34,incorrectli:34,independ:[4,10],index:[0,2,4,7,10,34],indic:[4,5,7,10,23,24,26,34],individu:[3,4,34],industri:20,infer:[7,20,24,34],info:[4,18,19],inform:[0,4,8,9,13,14,15,19,20,23,26,31,34,36,37],inherit:[11,34],inhomogen:10,init:26,initi:[4,18,34,37],inject:[4,26],innermost:8,inplac:[4,7,10,18,21,26,34],input:[4,6,7,10,11,20,34],insert:[4,13,34],insid:[2,4,9,10,19,20,22,23,34],inspect:[4,18,34],instal:[4,34],instanc:[2,3,4,7,9,10,11,14,20,21,22,23,24,28,31,34],instead:[4,7,10,20,25,26,34],instruct:[5,10,19,26,31,34],int64:[20,22],integ:10,integr:22,intend:7,inter:[4,6],interact:[2,3,4,21,22,34],interfac:0,interim:34,interlock:[4,18,34],intern:[2,34],interpret:[4,25],intersect:[4,18,22,34],intro:21,introduc:10,involv:26,iolocal:[7,20],ioutf8:7,ipython:[4,21,34],is_arrai:[31,34],is_bas:8,is_block:31,is_count:8,is_cumul:8,is_dat:34,is_delimited_set:34,is_float:34,is_g:[4,25,34],is_int:34,is_like_numer:[4,22,24,34],is_meanstest:8,is_multi:31,is_nan:[4,22,25],is_nest:31,is_net:8,is_pct:8,is_propstest:8,is_singl:34,is_stat:8,is_str:34,is_sum:8,is_weight:[8,31],isol:34,issu:[20,33,34],ist:[19,23],item:[4,10,11,13,19,20,21,22,23,24,31,34,36,37,38],item_nam:[4,34],item_no:[4,34],item_text:[4,22,34],iter:[9,22,34],its:[2,4,5,7,8,10,11,12,13,14,15,19,20,22,24,25,26,31,34,37,38],itself:[4,10],jame:0,jjda:20,jog:24,join:[4,22],json:[4,7,11,18,34],jupyt:[4,21,34],just:34,keep:[4,9,10,21,23,24,34,37],keep_bas:10,keep_cod:[10,34],keep_origin:4,keep_variable_text:[4,34],kei:[2,4,5,7,8,11,14,19,23,24,26,31,34,37],kept:[4,10,11,34],kerstin:0,keyword:[5,9,34],kind:[22,24,28,34],kite:[19,24,38],know:[14,19,22,24,36,38],kritik:31,kwarg:[4,5,8,9,34],lab:7,label:[4,5,7,14,19,20,22,23,24,26,34,36,37,38],lack:0,lang:19,languag:[11,18,23,34],larg:[14,21,28,31,34],last:4,lastli:34,later:[4,13,20,34],latest:[0,32,34],latter:[22,34],lead:[24,31,34],least:[4,22,23],leav:[23,24],left:[4,10,33,34],left_id:4,left_on:[4,34],legaci:4,lemonad:37,length:[4,34],less:[22,33,37],let:[23,24,28],level:[4,5,7,10,14,19,26,31,34,37],lib:[4,19,34,38],librari:[0,19,34],lift:24,like:[0,4,5,7,13,14,19,21,22,24,26,28,31,34,37,38],limit:[4,10,24,26,34],link:[2,4,5,7,8,9,10,11,14,27,31,34],list:[2,4,5,6,7,9,10,13,18,19,21,22,23,24,26,33,34,36,37],list_vari:34,listen:26,load:[2,3,4,7,11,34],load_cach:7,loc:31,local:[7,13,22,28,31],locat:[2,4,7,34],logic1:34,logic2:34,logic:[4,5,8,10,18,22,24,26,34],logic_a:[25,26],logic_b:[25,26],logic_c:[25,26],london:4,longer:34,look:[4,13,24,26,34,37,38],loop:[34,37],lose:[13,24,37],lot:[4,21,22,26],low:[4,5,10,33],lower:[4,10,33,34],lower_q:[10,33],lunch:22,machin:[4,20,34],made:2,mai:[4,25,26],main:[4,6,10,13,14,19,24,31,34],main_filt:34,mainli:3,major:10,mak:34,make:[4,5,22,24,28,34],make_summari:[13,34],make_templ:9,male:[22,26],manag:[0,18,26],mani:[13,28],manifest:4,manipul:[10,13,15],manual:[4,5,34,36],map:[4,5,6,7,9,10,18,20,23,34,36],mapper:[4,18,25,34,37],mapper_to_meta:4,margin:[5,10],mark:10,market:[0,20],mask:[4,18,21,22,23,34,36,37,38],mass:5,massiv:26,master:34,master_meta:7,match:[4,6,7,20,24,34,38],matric:10,matrix:10,matrixcach:7,matter:[11,33],max:[7,10,31,33,34],max_iter:6,mdd:[0,4,20,34],mdm_lang:34,mean:[4,5,6,7,8,10,13,15,20,22,24,25,26,31,33,34,37],measur:[5,10],median:[6,7,10,31,33,34],membership:[22,34],memori:[21,34],memoryerror:34,men:[4,14,15,28,31],mention:[12,23],merg:[3,4,18,33,34],merge_exist:[4,33],merge_text:[4,34],messag:34,meta:[4,5,7,8,11,12,14,15,18,20,21,22,24,26,31,33,34,36,37,38],meta_dict:[4,34],meta_edit:[4,7,15],meta_to_json:[4,34],metadata:[0,4,7,18,19,20,24,26,34,36,38],metaobject:34,method:[2,3,4,5,6,7,8,9,10,11,12,13,14,18,20,21,22,23,24,31,33,34,36,37,38],metric:[5,7,10],mid:[5,10,22,34],middl:34,might:[7,21,23,24,34],mimic:[5,10],mimick:[5,10,22],min:[4,7,10,31,33,34],min_value_count:[4,33],minimum:5,minor:34,mismatch:34,miss:[4,6,8,14,20,22,23,24,33,36,38],missing_map:[4,34],mix:[4,24],mode:[6,7,20,34,35],modifi:[4,7,10,15,24,34],modu:34,modul:[0,4,7,33],month:22,more:[2,4,22,25,26,31,33,34],morn:22,most:[10,21,22,26,33,34],mous:4,move:[0,4,21,34],mrs:4,mrset:4,mrset_tag_styl:4,much:[28,37],mul:7,multi:[5,10,19],multiindex:10,multipl:[0,3,4,5,19,22,23,25,26,34],multipli:10,multivari:10,must:[4,6,7,20,21,23,26,31,34,37],name:[2,3,4,5,6,7,8,9,10,12,13,19,20,21,22,23,24,26,28,31,33,34,36,37,38],name_data:[34,38],nan:[4,10,20,22,23,24,26,28,31,34,38],nate:4,nativ:[0,4,18,34],natur:[10,21,22],necessari:[10,21,34,37],need:[4,7,9,10,21,23,24,34,38],neg:34,nest:[7,8,19,25,26],net:[5,7,8,10,18,27,33,34],net_1:[7,31,33],net_2:[7,31,33],net_3:7,net_def:33,net_map:[7,31],net_view:34,never:[22,23],new_arrai:23,new_array_1:23,new_array_2:23,new_array_3:23,new_array_4:23,new_array_97:23,new_array_98:23,new_chain:34,new_cod:10,new_column:4,new_d:34,new_data:7,new_data_kei:7,new_dataset:4,new_int:23,new_meta:7,new_nam:[4,23,24,34],new_ord:[4,21,34],new_rul:34,new_set:[4,34],new_singl:23,new_stack:2,new_text:[4,23],new_var:34,new_weight:7,newli:34,next:[4,7,11,19,31,34,38],no_data:24,no_filt:[4,7,28,31,34],non:[4,7,22,25,27,34],none:[2,3,4,5,6,7,8,9,10,13,20,21,22,23,24,26,28,33,34,36,38],none_band:34,nonea:4,normal:[4,5,7,10,33,34],norwegian:19,not_al:18,not_ani:[4,10,18,34],not_count:18,notat:[5,7,8,10],note:[2,4,5,26,31],notebook:[4,21,34],notimplementederror:[15,34],now:[13,14,21,31,34,37],num:5,number:[4,6,10,20,22,23,24,33,34],numer:[4,5,7,18,19,23,24,31,34,36],numpi:[0,10,34],obei:22,object:[2,3,4,5,7,10,18,20,21,24,26,28,31,34,36],obscur:26,observ:0,obvious:31,occur:26,oe_q8:13,oe_q9:13,offer:[0,4,19,22,24,34,36,38],often:[21,22,24],old:[4,5,7,34],old_cod:10,old_nam:34,older:34,omit:26,omnibu:34,on_var:[7,31],onc:22,one:[2,4,5,12,13,19,21,22,23,24,26,28,31,33,34],ones:[4,10,15,21,22,34],onli:[4,7,9,10,11,13,14,21,23,24,26,28,31,33,34,36,37],only_men:24,only_typ:[4,7],onto:26,oom:34,open:[0,11,12,34],oper:[4,5,20,22,24,25,26,34],operat:18,opportun:20,oppos:21,opt:4,option:[4,5,6,7,10,13,14,19,22,23,31,34],order:[2,4,11,18,19,24,34],ordereddict:[3,4,7,13,28,31],organ:16,orgin:10,orient:[2,34],origi:4,origin:[4,7,10,21,24,34],other:[4,5,7,10,11,12,13,19,23,24,25,26,33,34,37],other_sourc:[7,31,34],otherwis:[4,34],our:[0,22,26],out:[5,12,14,24,31,34],outcom:4,outdat:34,output:[4,7,10,13,22,34],outsid:34,over:[4,7,9,10,34],overcod:[4,7,31],overlap:10,overview:[4,7,22,28,34],overwrit:[4,15,21,26,34],overwrite_margin:10,overwrite_text:4,overwritten:[4,7,34],ovlp_correc:10,own:[13,34],pack:4,packag:5,paint:[3,34],painter:34,pair:[4,5,10,18,23,26,34,36],panda:[0,3,4,6,7,10,19,20,22,26,34],pane:4,parachut:[19,24,38],parallel:7,paramet:[3,4,5,6,7,8,9,10,13,21,22,23,24,26,31,33,34,37],parent:[4,19,20,34],pars:5,part:[4,8,19,21,22,26,34],parti:18,particip:20,particular:4,pass:[4,5,7,10,21,22,24,34],past:26,path:[2,3,4,7,20,34],path_clust:3,path_csv:20,path_data:[4,38],path_ddf:[4,20,34],path_json:20,path_mdd:[4,20,34],path_meta:4,path_report:4,path_sav:[4,20],path_sav_analysi:20,path_stack:7,path_txt:20,path_xlsx:[4,34],path_xml:20,pct:4,peopl:20,per:[4,5,6,7,10,12,20,22,23,24,26,34,38],percentag:[7,8,10,31,34],perform:[4,5,7,8,10,21,26,31,33,34],perman:21,physic:34,pick:[4,24],pickl:2,pilat:24,pivot:7,place:[10,26,34],plai:22,plain:[0,8,20],plan:[11,13,14,19,24,28,31],pleas:[7,21,22,34],point:19,pointer:19,pool:10,popul:[4,11,13,14,27,31,34],portion:7,posit:[4,10,21,22,23,28,34],possibl:[3,4,5,7,12,13,19,22,23,25,31,33,34,37],power:0,powerpoint:[5,34],powerpointpaint:34,pptx:34,pre:[4,26,31],precis:[26,34],prefer:22,prefix:[4,7],prep:25,prepar:[3,21,23,31,34],present:[4,10,34],preset:[4,7],pretti:[4,26],prevent:[4,15,21,23,24,33,34],previou:[21,34],previous:[4,34],primarili:4,print:[13,22,28,31,34],prior:[4,24],prioriti:34,probabl:[19,24,34],problem:[33,34],process:[0,4,7,9,20,21,22],produc:[5,10,13,24],product:[34,37],profession:[4,34],progress:[4,34],prohibit:23,project:0,promot:31,promotor:31,prop:[5,31],prop_nam:4,prop_valu:4,proper:[34,38],properli:38,properti:[4,10,11,20,34],proport:[5,6,8],protect:4,provid:[3,4,5,7,8,9,10,19,20,21,22,23,24,26,31,34,36],proxi:7,purpos:19,put:7,python:[4,33],q01_1:4,q01_3:4,q11:34,q11_grid:34,q12:37,q12_10:37,q12_11:37,q12_12:37,q12_13:37,q12_1:37,q12_2:37,q12_3:37,q12_4:37,q12_5:37,q12_6:37,q12_7:37,q12_8:37,q12_9:37,q12_:37,q12a:37,q12a_10:37,q12a_11:37,q12a_12:37,q12a_13:37,q12a_1:37,q12a_2:37,q12a_3:37,q12a_4:37,q12a_5:37,q12a_6:37,q12a_7:37,q12a_8:37,q12a_9:37,q12a_grid:37,q12b:37,q12b_10:37,q12b_11:37,q12b_12:37,q12b_13:37,q12b_1:37,q12b_2:37,q12b_3:37,q12b_4:37,q12b_5:37,q12b_6:37,q12b_7:37,q12b_8:37,q12b_9:37,q12b_grid:37,q12c:37,q12c_10:37,q12c_11:37,q12c_12:37,q12c_13:37,q12c_1:37,q12c_2:37,q12c_3:37,q12c_4:37,q12c_5:37,q12c_6:37,q12c_7:37,q12c_8:37,q12c_9:37,q12c_grid:37,q12d:37,q12d_10:37,q12d_11:37,q12d_12:37,q12d_13:37,q12d_1:37,q12d_2:37,q12d_3:37,q12d_4:37,q12d_5:37,q12d_6:37,q12d_7:37,q12d_8:37,q12d_9:37,q12d_grid:37,q14_1:4,q14_1_1:4,q14_1_2:4,q14_1_3:4,q14_2:4,q14_2_1:4,q14_2_2:4,q14_2_3:4,q14_3:4,q14_3_1:4,q14_3_2:4,q14_3_3:4,q1_1:[4,25,26,37],q1_2:[4,26,37],q1_3:[4,26],q1_rec:4,q2_count:22,q2array_tran:4,q2b:[13,22,28,31],q3_no_data:24,q3_only_men:24,q3_rec:24,q3_version2:24,q4a:34,q4a_1:34,q4a_2:34,q4a_3:34,q4a_grid:34,q5_1:[19,21,22,24,34,38],q5_2:[19,21,22,24,34,38],q5_3:[19,21,22,24,34,38],q5_4:[19,21,22,24,34,38],q5_5:[19,21,22,24,34,38],q5_6:[19,21,22,24,34,38],q5_grid:38,q5_tran:24,q5_trans_1:24,q5_trans_2:24,q5_trans_3:24,q5_trans_4:24,q5_trans_5:24,q5_trans_97:24,q5_trans_98:24,q6_1:[13,21,22,28,31],q6_2:[13,21,22,28,31],q6_3:[13,21,22,28,31],q6_calc:31,q6_grid:38,q6_net:31,q6copi:38,q6new:38,q6new_grid:38,q6new_q6copi:38,q6new_q6copy_grid:38,q6new_q6copy_tran:38,q6new_q6copy_trans_grid:38,q7_1:[21,22,34],q7_2:[21,22,34],q7_3:[21,22,34],q7_4:[21,22,34],q7_5:[21,22,34],q7_6:[21,22,34],q7_grid:38,q8_with_a_new_nam:23,q8a:[13,22],q9a:[13,22],q_group:37,q_label:[4,34],qtp:37,qtype:[4,23,33,36,38],qualifi:[4,5,10],quantifi:1,quantipi:[2,3,4,5,6,7,8,9,10,11,19,20,22,25,31,34,36,38],quantipyview:[1,7,34],quantiti:[10,34],queri:[2,4,6,7,18,19,24,34,38],question:[4,10,19,24,26,31,34,37],questionnair:21,quick:[4,22,34],quickli:[6,21,22,24,34],radio:26,radio_st:26,radio_stations_cb:26,radio_stations_xb:26,rais:[4,15,21,31,33,34],rake:34,rang:[4,5,18,21,34],rate:[34,37],raw:[5,10],raw_sum:10,rbase:7,read:[0,4,20,34],read_ascrib:[4,20],read_deciph:20,read_dimens:[4,20],read_quantipi:[4,11,20,34,38],read_spss:[4,20],rebas:10,rebuild:24,rec:[4,34],receiv:37,recod:[0,4,7,18,34],recode_from_net_def:7,recode_seri:4,recoded_filt:34,recommend:24,record_numb:[13,22],reduc:[4,7,10,21,34],reduced_d:21,reduct:4,refactor:34,refer:[4,7,10,19,23,26,28,31,34],referenc:[7,13,19,26,34],reflect:[4,10,21,34],refresh:7,refus:[19,24],regard:[4,34],region:[4,34],regist:[4,22,34],regroup:[4,34],regular:[4,19,31,34],regularli:[22,23,24],reindex:4,reintroduc:34,rel:21,rel_to:8,relat:[2,8,23,26,34],relation_str:8,relationship:7,relev:[23,34],religion:22,reload:[21,34],remain:[4,5,21,26,34],rememb:36,remind:36,remov:[4,5,7,10,12,13,18,34],remove_data:7,remove_filt:34,remove_html:[4,34],remove_item:4,remove_valu:[4,33],renam:[4,18,24,33,34,38],rename_from_mapp:4,renamed_item:4,renamed_v:4,reorder:[18,34],reorder_item:[4,34],reorder_valu:4,reorgan:0,repair:[4,33,34],repair_text_edit:[4,34],repeat:[21,28],repetit:26,replac:[4,10,13,23,26,31,34],replace_i:[13,34],replace_text:[4,34],report:[0,4,5,6,34],reposit:[4,21,34],repres:[4,26],represent:[7,8,10,19,24,34],request:[5,7,13,21,23,26,31,34],request_view:34,requir:[4,21,23,34,38],rescal:[5,7,8,10,31],research:[0,20],reset:[4,34],reset_index:4,resid:34,resolv:34,resolve_nam:[4,34],resp:34,respect:[4,10,24,34,36],respond:[10,21,37],respons:[19,22,25,26,34,37],responsess:34,restor:[4,7,21,34],restore_item_text:4,restore_meta:7,restrict:[4,5,7,10,19,21,22,34],result:[3,4,5,7,9,10,16,20,22,23,24,26,28,34],result_onli:10,retain:9,retriev:10,revers:[24,25],revert:[4,21,34],rewrit:33,right:[4,33,34],right_id:4,right_on:[4,34],rim:[1,4,34],roll:4,roll_up:4,rollback:[18,34],rolled_up:4,round:6,row:[4,5,10,18,20,22,34,38],row_id:4,row_id_nam:4,rule:[4,33,34,38],run:[4,7,10,15,24,28,31,34],safe:[4,23],safeguard:4,sai:26,same:[3,4,7,13,19,20,22,26,28,33,34,37,38],sampl:[5,8,10,33,34],sample_s:14,sandbox:34,satisfi:34,sav:[4,7,20],save:[2,3,4,7,21,28,34],savepoint:18,scalar:34,scale:[5,6,10,19,34,37],scan:4,scenario:38,scheme:[4,6,19,34],scratch:[18,34],script:4,search:4,second:[4,5,10,15,31],sect:4,section:[8,10,11,14,21,26],see:[13,21,24,26,28,31,34],seen:[26,38],segemen:26,segment:18,select:[4,7,9,13,14,21,22,31,34,37],select_text_kei:4,self:[2,4,7,10,26,28,34],sem:[7,10,33],semi:34,sensit:[4,33,34],separ:[4,26,36],septemb:[],sequenc:4,seri:[4,19,22,26,34],serial:2,session:[21,34],set:[3,4,5,6,7,9,10,11,12,13,18,19,20,22,23,24,26,28,33,34,36],set_cell_item:14,set_col_text_edit:34,set_column_text:34,set_dim_suffix:34,set_encod:4,set_factor:4,set_item_text:[4,23,34],set_languag:14,set_mask_text:34,set_miss:[4,34],set_opt:34,set_param:10,set_properti:[4,15],set_sigtest:[14,31,34],set_target:6,set_text_kei:4,set_unwgt_count:34,set_val_text_text:34,set_value_text:[4,15,23,34],set_variable_text:[4,15,23,34],set_verbose_errmsg:4,set_verbose_infomsg:4,set_weight:14,setdefaultencod:4,setnam:[4,34],setup:[4,10,13,34],sever:[5,22,37],shape:[4,21,31],share:[4,19],sheet:[4,34],shop:4,short_item_text:34,shorten:[4,34],shorthand:[4,5],shortnam:[5,8],should:[3,4,7,14,21,22,26,34,37,38],show:[4,10,13,19,21,22,31,34],shown:[4,10,33,34],side:[9,34],sig:[5,7,10,14,34],siglevel:34,signific:[5,10,11,27],significancetest:34,sigproperti:34,similar:[28,37],similarli:[22,23],similiar:23,simpl:[5,6,10,19,25,34,36],simpli:[4,22,23,24,25,31,34],simplifi:[24,26],sinc:[10,26,31,38],singl:[3,4,7,19,20,21,22,23,24,26,33,34,36,38],sit:26,six:22,size:[5,8,10,21,22,31,34],skip:[22,23,31,34],skip_item:34,slice:[4,8,15,18,34],slicer:[4,18,22,24,34],slicex:4,small:[5,34],snack:22,snapshot:[4,21,34],snowboard:[19,24,38],soccer:24,social:0,softwar:[0,4,5,10,20,33],solut:34,solv:34,some:[13,14,15,22,25,26,33,34,37],someth:37,sometim:[21,28,34],soon:34,sorri:0,sort:[4,15,33,34],sort_by_weight:[4,33],sort_on:33,sortx:4,sourc:[0,4,7,19,20,22,23,34,38],source_item:4,space:[4,25],speak:19,spec:3,spec_condit:8,specfic:14,special:[0,11,14,19,28,31,34],specif:[3,4,5,7,8,10,11,13,14,15,19,21,23,34,38],specifi:[2,4,5,6,7,10,13,20,23,31,34,36],speed:34,spell:[4,34],split:[4,7,13,34],split_view_nam:7,sport:[20,22],spreadsheet:0,spss:[0,4,10,18,34],spss_limit:[4,34],squar:7,stack:[1,2,3,4,5,8,11,13,14,27,31,34],stage:[4,34],standalon:18,standard:[8,10,20,34],standardli:24,start:[4,18,23,24,26],start_meta:[4,34],start_tim:22,stat:[4,5,7,10,31,34],state:[4,15,18,24,34],statement:[4,5,19,25,26],statisfi:34,statist:[0,4,5,7,8,10,18,27,28,34],std_paramet:8,stddev:[7,10,31,33,34],ste:34,stem:34,step:[31,36],still:[26,34],store:[4,5,7,11,12,13,19,21,24,28,31,34],store_cach:7,str:[3,4,5,6,7,8,9,10,24,34,36,37],str_tag:[4,34],strict:[4,34],strict_select:9,strictli:23,string:[2,4,6,7,8,10,19,20,21,22,23,24,25,34],strip:34,structur:[0,4,6,7,9,11,13,19,20,21,24,28,34,37],studi:34,style:[4,7],sub:[7,31],subclass:[2,11],subclasss:15,sublist:34,subset:[4,9,18,22,24,34],subset_d:4,substr:[4,34],subtl:34,subtyp:[19,34,38],suffix:[4,5,24,34,38],sum:[4,5,7,8,10,27,34],summar:[4,5,10,34],summari:[4,5,6,7,8,10,13,22,31,33,34],summaris:7,summat:10,suppli:24,supporintg:7,support:[0,7,8,18,19,22,23,24,33,34,38],surf:[19,24,38],survei:21,sv_se:[31,34],swap:[7,8,10,34],swedish:[19,34],swim:24,syntax:34,sys:4,tab:20,tabl:[0,7],tabul:[5,13,28],tag:[4,23,34],take:[4,5,7,11,22,24,25,26,34,37],taken:[4,7,14,15,24,33,34],target:[4,6,18,23,33,34],target_item:4,task:22,team:22,temp:4,templat:[5,9,34],temporari:[4,34],temporarili:4,ten:26,tend:4,term:[7,23,34],termin:34,test:[2,4,5,8,10,11,22,27,34,36],test_cat_1:36,test_cat_2:36,test_cat_3:36,test_tot:[5,10,34],test_var:[34,36],testtyp:10,text1:20,text:[4,5,7,8,14,18,20,21,22,24,26,31,34,36,38],text_kei:[3,4,7,11,18,22,23,31,34],text_label:[4,34],text_prefix:7,textkei:[4,34],than:[4,22,24,33,34],thei:[4,10,13,14,20,25,26,31,33,34],them:[4,5,13,20,22,26,31,33,34],themselv:[4,10],therefor:[4,5,24,34],thi:[2,3,4,5,6,7,10,13,14,15,20,21,22,23,24,26,28,31,34,37,38],third:18,thorugh:24,those:4,three:[4,21,22,24,26,34,36],threshold:5,through:[2,3,4,9],throughout:[4,19,20,34],thu:6,time:[7,19,21,22],titl:13,tks:34,to_arrai:[4,33,34,37],to_delimited_set:[4,34],to_df:10,to_excel:7,todo:[4,5,6,7,9,10],togeth:[3,4,7,19,21],toggl:7,too:34,tool:[5,20,24,25],top2:31,top3:7,top:26,topic:[19,34],total:[4,5,6,10,13,34],toward:34,tracker:34,tradit:10,transfer:34,transform:[0,4,5,10,18,34],translat:23,transpos:[4,13,24,31,38],transpose_arrai:13,transposit:24,treat:[4,10,25,31],tree:28,treshhold:10,trigger:4,tstat:10,tupl:[4,8,23,24,34,36],turn:19,two:[4,5,10,13,19,21,23,28,31,33,34],txt:[4,20],type:[0,3,4,5,6,7,8,9,10,13,18,20,23,26,31,33,34,36,38],type_nam:7,typic:26,ultim:4,unabbrevi:25,unattend:4,unbind:4,uncod:[4,34],uncode_seri:4,uncodit:5,unconditi:10,under:[4,5,34],underli:19,understood:20,undimension:4,undimensionizing_mapp:4,undo:7,uni:[5,10],unicod:7,unifi:[4,34],uniformli:20,unify_valu:4,union:[18,26],uniqu:[4,5,7,24,28,31,34],unique_id:[4,22],unique_kei:[4,34,37],uniquify_kei:4,unkei:26,unless:4,unlik:[19,24],unpool:10,unqiu:24,unrol:[4,31,34],untouch:[23,33],unusu:34,unwant:[4,34],unweight:[7,10,31,33,34],unweighted_bas:[7,31,34],unwgt:34,upcom:[],updat:[4,8,9,10,23,33,34],update_axis_def:[10,34],update_exist:[4,34],upon:19,upper:[4,34],upper_q:[10,33],uppercas:33,usag:[23,33,34],use:[0,2,4,5,7,10,12,13,19,20,21,22,23,24,26,34,35],use_ebas:10,used:[2,3,4,5,7,8,9,10,11,14,15,20,21,24,26,31,33,34],useful:[21,22,34],user:[2,4,14,34,36],userwarn:[34,36],uses:[4,10,34],using:[0,2,3,4,6,7,19,20,21,24,25,26,28,31,33,34,38],usual:19,utf8:33,utf:7,val:4,val_text_sep:4,valid:[4,6,7,14,19,24,26,31,34,36],valid_cod:34,valid_tk:[11,34],valu:[3,4,5,6,7,8,9,10,18,20,21,24,25,26,31,33,34,35,38],value_count:[4,22,34],value_map:37,value_text:[4,22,34],valueerror:[4,21,23,34],var_exist:[22,34],var_grid:37,var_nam:[34,37],var_suffix:4,varcoeff:10,vari:22,variabl:[0,4,5,6,7,8,10,11,18,19,20,23,27,33,34,36,37,38],variable_typ:7,variables_from_set:34,varianc:10,variant:[22,34],varibal:37,varibl:34,variou:[5,14,22,28,31],varlist:[4,34],varnam:[4,34],vector:10,verbatim:[11,34],verbos:[4,7,25,31,34],veri:[19,23,24,34],versa:10,version2:24,version:[4,5,7,10,19,21,23,24,26,34],versu:31,vertic:[4,18],via:[0,4,5,7,21,22,23,24,31,34],vice:10,view:[1,2,3,4,5,7,9,10,14,16,22,27,28,33,34],view_kei:31,view_name_not:10,viewmanag:34,viewmapp:[1,7],viewmeta:8,visibl:[31,34],vmerg:[4,33,34],wai:[7,12,13,19,21,22,23,26,31,34,35],wait:21,want:[21,24,26],warn:[4,31,33,34],water:37,wave:21,weak:[4,33],weak_dup:4,week:22,weight:[0,4,6,7,8,10,11,12,22,24,31,33,34],weight_a:[14,22,31],weight_b:22,weight_column_nam:6,weight_nam:4,weight_schem:4,weigth:4,well:[4,10,20,22,25,26,31,34,38],went:34,were:[26,33,34],wgt:34,what:[16,19,20,24,26,27,34,35,38],whatev:[4,26],when:[4,5,7,10,20,21,23,24,26,34,38],where:[2,3,4,10,24,25,26],whether:[5,10],which:[4,5,7,10,11,13,14,15,22,23,24,26,28,31,33,34,37],whole:[4,34],whose:[4,7,34],wide:34,wil:4,wildcard:26,window:20,windsurf:[19,24,38],wise:[4,31],witch:34,within:[4,10,33],without:[33,34],women:15,work:[4,11,21,23,31,34],workbook:3,workspac:34,world:20,would:[4,19,24,26,34],wouldn:[19,24],wrap:34,wrapper:[4,10,33],write:[4,7,20,21,34,38],write_dimens:[4,33,34],write_quantipi:[4,21],write_spss:[4,20],writen:37,written:[21,34],wrong:34,x_filter_map:[28,31],x_kei:14,x_y_map:[13,14,28,31],xdef:10,xks:[4,7,31,34],xlsx:4,xml:[4,20],xsect:10,xtotal:4,y_filter:34,y_kei:[14,28,31],y_on_i:[13,28,31,34],year:[19,22,38],yes:20,yet:33,yield:22,yks:[4,34],yoga:24,you:[4,11,13,14,19,20,21,22,23,24,26,28,31,34,37,38],younger:24,your:[4,19,20,21,24,26,34],ysect:10},titles:["Quantipy: Python survey data toolkit","API references","Chain","Cluster","DataSet","QuantipyViews","Rim","Stack","View","ViewMapper","quantify.engine","Batch","Creating/ Loading a qp.Batch instance","Adding variables to a qp.Batch instance","Set properties of a qp.Batch","Inherited qp.DataSet methods","Builds","Combining results","Data processing","DataSet components","I/O","DataSet management","Inspecting variables","Editing metadata","Transforming variables","Logic and set operaters","Custom data recoding","Analysis & aggregation","Collecting aggregations","The computational engine","Significance testing","View aggregation","Release notes","Latest (01/10/2018)","Archived release notes","How-to-snippets","Different ways of creating categorical values","Derotation","DataSet Dimensions compatibility"],titleterms:{"boolean":25,"case":[19,21,22],"default":26,Adding:[13,26],The:[19,26,29,38],Using:20,about:37,access:38,adding:23,aggreg:[13,17,27,28,31],analysi:27,api:1,append:26,archiv:34,arrai:[13,19,24,37,38],ascrib:20,band:[24,26],base:26,basic:31,batch:[11,12,13,14],build:[16,26],calcul:31,categor:[19,24,31,36],cell:14,chain:[2,17],chang:23,clone:21,cluster:3,code:[25,26],collect:28,column:[19,21],combin:17,compat:38,complex:25,complic:26,compon:[19,20],comput:29,condit:26,convers:[20,24],copi:24,creat:[12,17,23,26,36,38],creation:26,csv:20,cumul:31,custom:[17,26],data:[0,18,19,22,26,38],datafram:20,dataset:[4,15,19,21,23,37,38],deciph:20,definit:31,deriv:26,derot:37,descript:31,detail:26,dice:22,differ:36,dimens:[20,38],document:[],edit:23,end:13,engin:[10,29],exampl:26,exist:[22,25],extend:23,featur:0,fill:26,fillna:26,filter:[14,21],from:[20,23],has_al:25,has_ani:25,has_count:25,horizont:21,how:[35,37],info:23,inherit:15,initi:26,inplac:24,inspect:22,instanc:[12,13],interlock:26,intersect:[25,26],item:14,json:20,kei:[0,13],languag:[14,19],latest:33,link:28,list:25,load:12,logic:25,manag:21,map:19,mapper:26,mask:19,merg:21,meta:[19,23],metadata:[22,23],method:[15,26],mode:38,nativ:20,net:[26,31],non:31,not_al:25,not_ani:25,not_count:25,note:[32,34],numer:26,object:[19,22,23],open:13,operat:25,order:21,organ:17,pair:20,parti:20,popul:28,process:18,properti:14,python:0,quantifi:10,quantipi:0,quantipyview:5,queri:22,rang:25,recod:26,refer:1,releas:[32,34],remov:[23,26],renam:23,reorder:23,result:17,rim:6,rollback:21,row:21,savepoint:21,scratch:23,segment:26,septemb:[],set:[14,21,25],signific:[14,30,31],slice:22,slicer:25,snippet:35,special:13,spss:20,stack:[7,28],standalon:20,start:20,state:21,statist:[20,31],subset:21,sum:31,support:20,survei:0,target:26,test:[14,30,31],text:[19,23],text_kei:19,third:20,toolkit:0,transform:24,type:[19,22,24],union:25,upcom:[],use:37,valu:[19,22,23,36],variabl:[13,21,22,24,26,31],verbatim:13,vertic:21,view:[8,17,31],viewmapp:9,wai:36,weight:14,what:[17,28,37]}}) \ No newline at end of file diff --git a/docs/API/_build/html/sites/api_ref/00overview.html b/docs/API/_build/html/sites/api_ref/00overview.html new file mode 100644 index 000000000..b74e5a520 --- /dev/null +++ b/docs/API/_build/html/sites/api_ref/00overview.html @@ -0,0 +1,439 @@ + + + + + + + + + + + API references — Quantipy 0.1.3 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                      + + + + +
                      + + + + + + +
                      +
                      + + + + + + +
                      + +
                      +
                      +
                      +
                      + + + + +
                      +
                      + + +
                      +
                      + +
                      + +
                      + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/API/_build/html/sites/api_ref/Chain.html b/docs/API/_build/html/sites/api_ref/Chain.html index 9c29d5c9d..b8893065b 100644 --- a/docs/API/_build/html/sites/api_ref/Chain.html +++ b/docs/API/_build/html/sites/api_ref/Chain.html @@ -92,9 +92,18 @@

                      Quick search

        Parameters:
          -
        • spec (dict) – The banked chain specification object.
        • -
        • text_key (str, default='values') – Paint the x-axis of the banked chain using the spec provided +
        • spec (dict) – The banked chain specification object.
        • +
        • text_key (str, default='values') – Paint the x-axis of the banked chain using the spec provided and this text_key.
        Returns:

        bchain – The banked chain.

        +
        Returns:

        bchain – The banked chain.

        Return type:

        quantipy.Chain

        @@ -396,7 +405,7 @@

        Cluster

        Parameters:path_cluster (str) – The full path to the .cluster file that should be created, including +
        Parameters:path_cluster (str) – The full path to the .cluster file that should be created, including the extension.
        Returns:
        Parameters:path_cluster (str) – The full path to the .cluster file that should be created, including +
        Parameters:path_cluster (str) – The full path to the .cluster file that should be created, including the extension.
        Returns:
        +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in meta['columns'].
        • +
        • qtype ({'int', 'float', 'single', 'delimited set', 'date', 'string'}) – The structural type of the data the meta describes.
        • +
        • label (str) – The text label information.
        • +
        • categories (list of str, int, or tuples in form of (int, str), default None) – When a list of str is given, the categorical values will simply be +enumerated and mapped to the category labels. If only int are +provided, text labels are assumed to be an empty str (‘’) and a +warning is triggered. Alternatively, codes can be mapped to categorical +labels, e.g.: [(1, 'Elephant'), (2, 'Mouse'), (999, 'No animal')]
        • +
        • items (list of str, int, or tuples in form of (int, str), default None) – If provided will automatically create an array type mask. +When a list of str is given, the item number will simply be +enumerated and mapped to the category labels. If only int are +provided, item text labels are assumed to be an empty str (‘’) and +a warning is triggered. Alternatively, numerical values can be +mapped explicitly to items labels, e.g.: +[(1 'The first item'), (2, 'The second item'), (99, 'Last item')]
        • +
        • text_key (str, default None) – Text key for text-based label information. Uses the +DataSet.text_key information if not provided.
        • +
        • replace (bool, default True) – If True, an already existing corresponding pd.DataFrame +column in the case data component will be overwritten with a +new (empty) one.
        • +
        +
        Returns:

        DataSet is modified inplace, meta data and _data columns +will be added

        +
        Return type:

        None

        +
        + + +
        +
        +all(name, codes)
        +

        Return a logical has_all() slicer for the passed codes.

        +
        +

        Note

        +

        When applied to an array mask, the has_all() logic is ex- +tended to the item sources, i.e. the it must itself be true for +all the items.

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • name (str, default None) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • codes (int or list of int) – The codes to build the logical slicer from.
        • +
        +
        Returns:

        slicer – The indices fulfilling has_all([codes]).

        +
        Return type:

        pandas.Index

        +
        +
        + +
        +
        +any(name, codes)
        +

        Return a logical has_any() slicer for the passed codes.

        +
        +

        Note

        +

        When applied to an array mask, the has_any() logic is ex- +tended to the item sources, i.e. the it must itself be true for +at least one of the items.

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • name (str, default None) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • codes (int or list of int) – The codes to build the logical slicer from.
        • +
        +
        Returns:

        slicer – The indices fulfilling has_any([codes]).

        +
        Return type:

        pandas.Index

        +
        +
        + +
        +
        +band(name, bands, new_name=None, label=None, text_key=None)
        +

        Group numeric data with band definitions treated as group text labels.

        +

        Wrapper around derive() for quick banding of numeric +data.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] that will +be banded into summarized categories.
        • +
        • bands (list of int/tuple or dict mapping the former to value texts) – The categorical bands to be used. Bands can be single numeric +values or ranges, e.g.: [0, (1, 10), 11, 12, (13, 20)]. +Be default, each band will also make up the value text of the +category created in the _meta component. To specify custom +texts, map each band to a category name e.g.: +[{‘A’: 0}, +{‘B’: (1, 10)}, +{‘C’: 11}, +{‘D’: 12}, +{‘E’: (13, 20)}]
        • +
        • new_name (str, default None) – The created variable will be named '<name>_banded', unless a +desired name is provided explicitly here.
        • +
        • label (str, default None) – The created variable’s text label will be identical to the origi- +nating one’s passed in name, unless a desired label is provided +explicitly here.
        • +
        • text_key (str, default None) – Text key for text-based label information. Uses the +DataSet.text_key information if not provided.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +by_type(types=None)
        +

        Get an overview of all the variables ordered by their type.

        + +++ + + + + + + + +
        Parameters:types (str or list of str, default None) – Restrict the overview to these data types.
        Returns:overview – The variables per data type inside the DataSet.
        Return type:pandas.DataFrame
        +
        + +
        +
        +categorize(name, categorized_name=None)
        +

        Categorize an int/string/text variable to single.

        +

        The values object of the categorized variable is populated with the +unique values found in the originating variable (ignoring np.NaN / +empty row entries).

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in meta['columns'] that will +be categorized.
        • +
        • categorized_name (str) – If provided, the categorized variable’s new name will be drawn +from here, otherwise a default name in form of 'name#' will be +used.
        • +
        +
        Returns:

        DataSet is modified inplace, adding the categorized variable to it.

        +
        Return type:

        None

        +
        +
        + +
        +
        +clear_factors(name)
        +

        Remove all factors set in the variable’s 'values' object.

        + +++ + + + + + + + +
        Parameters:name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        Returns:
        Return type:None
        +
        + +
        +
        +clone()
        +

        Get a deep copy of the DataSet instance.

        +
        + +
        +
        +code_count(name, count_only=None, count_not=None)
        +

        Get the total number of codes/entries found per row.

        +
        +

        Note

        +

        Will be 0/1 for type single and range between 0 and the +number of possible values for type delimited set.

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in meta['columns'] or +meta['masks'].
        • +
        • count_only (int or list of int, default None) – Pass a list of codes to restrict counting to.
        • +
        • count_not (int or list of int, default None) – Pass a list of codes that should no be counted.
        • +
        +
        Returns:

        count – A series with the results as ints.

        +
        Return type:

        pandas.Series

        +
        +
        + +
        +
        +code_from_label(name, text_label, text_key=None, exact=True, flat=True)
        +

        Return the code belonging to the passed text label (if present).

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating variable name keyed in meta['columns'] +or meta['masks'].
        • +
        • text_label (str or list of str) – The value text(s) to search for.
        • +
        • text_key (str, default None) – The desired text_key to search through. Uses the +DataSet.text_key information if not provided.
        • +
        • exact (bool, default True) – text_label must exactly match a categorical value’s text. +If False, it is enough that the category contains the text_label.
        • +
        • flat (If a list is passed for text_label, return all found codes) – as a regular list. If False, return a list of lists matching the order +of the text_label list.
        • +
        +
        Returns:

        codes – The list of value codes found for the passed label text.

        +
        Return type:

        list

        +
        +
        + +
        +
        +codes(name)
        +

        Get categorical data’s numerical code values.

        + +++ + + + + + + + +
        Parameters:name (str) – The column variable name keyed in _meta['columns'].
        Returns:codes – The list of category codes.
        Return type:list
        +
        + +
        +
        +codes_in_data(name)
        +

        Get a list of codes that exist in data.

        +
        + +
        +
        +compare(dataset, variables=None, strict=False, text_key=None)
        +

        Compares types, codes, values, question labels of two datasets.

        + +++ + + + + + + + +
        Parameters:
          +
        • dataset (quantipy.DataSet instance) – Test if all variables in the provided dataset are also in +self and compare their metadata definitions.
        • +
        • variables (str, list of str) – Check only these variables
        • +
        • strict (bool, default False) – If True lower/ upper cases and spaces are taken into account.
        • +
        • text_key (str, list of str) – The textkeys for which texts are compared.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +convert(name, to)
        +

        Convert meta and case data between compatible variable types.

        +

        Wrapper around the separate as_TYPE() conversion methods.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in meta['columns'] that will +be converted.
        • +
        • to ({'int', 'float', 'single', 'delimited set', 'string'}) – The variable type to convert to.
        • +
        +
        Returns:

        The DataSet variable is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +copy(name, suffix='rec', copy_data=True, slicer=None, copy_only=None, copy_not=None)
        +

        Copy meta and case data of the variable defintion given per name.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['columns'] +or meta['masks'].
        • +
        • suffix (str, default 'rec') – The new variable name will be constructed by suffixing the original +name with _suffix, e.g. 'age_rec.
        • +
        • copy_data (bool, default True) – The new variable assumes the data of the original variable.
        • +
        • slicer (dict) – If the data is copied it is possible to filter the data with a +complex logic. Example: slicer = {‘q1’: not_any([99])}
        • +
        • copy_only (int or list of int, default None) – If provided, the copied version of the variable will only contain +(data and) meta for the specified codes.
        • +
        • copy_not (int or list of int, default None) – If provided, the copied version of the variable will contain +(data and) meta for the all codes, except of the indicated.
        • +
        +
        Returns:

        DataSet is modified inplace, adding a copy to both the data and meta +component.

        +
        Return type:

        None

        +
        +
        + +
        +
        +copy_array_data(source, target, source_items=None, target_items=None, slicer=None)
        +
        + +
        +
        +create_set(setname='new_set', based_on='data file', included=None, excluded=None, strings='keep', arrays='masks', replace=None, overwrite=False)
        +

        Create a new set in dataset._meta['sets'].

        + +++ + + + + + + + +
        Parameters:
          +
        • setname (str, default 'new_set') – Name of the new set.
        • +
        • based_on (str, default 'data file') – Name of set that can be reduced or expanded.
        • +
        • included (str or list/set/tuple of str) – Names of the variables to be included in the new set. If None all +variables in based_on are taken.
        • +
        • excluded (str or list/set/tuple of str) – Names of the variables to be excluded in the new set.
        • +
        • strings ({'keep', 'drop', 'only'}, default 'keep') – Keep, drop or only include string variables.
        • +
        • arrays ({'masks', 'columns'}, default masks) – For arrays add masks@varname or columns@varname.
        • +
        • replace (dict) – Replace a variable in the set with an other. +Example: {‘q1’: ‘q1_rec’}, ‘q1’ and ‘q1_rec’ must be included in +based_on. ‘q1’ will be removed and ‘q1_rec’ will be +moved to this position.
        • +
        • overwrite (bool, default False) – Overwrite if meta['sets'][name] already exist.
        • +
        +
        Returns:

        The DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +crosstab(x, y=None, w=None, pct=False, decimals=1, text=True, rules=False, xtotal=False, f=None)
        +
        + +
        +
        +cut_item_texts(arrays=None)
        +

        Remove array text from array item texts.

        + +++ + + + +
        Parameters:arrays (str, list of str, default None) – Cut texts for items of these arrays. If None, all keys in +._meta['masks'] are taken.
        +
        + +
        +
        +data()
        +

        Return the data component of the DataSet instance.

        +
        + +
        +
        +derive(name, qtype, label, cond_map, text_key=None)
        +

        Create meta and recode case data by specifying derived category logics.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in meta['columns'].
        • +
        • qtype ([int, float, single, delimited set]) – The structural type of the data the meta describes.
        • +
        • label (str) – The text label information.
        • +
        • cond_map (list of tuples) –

          Tuples of either two or three elements of following structures:

          +

          2 elements, no labels provided: +(code, <qp logic expression here>), e.g.: +(1, intersection([{'gender': [1]}, {'age': frange('30-40')}]))

          +

          2 elements, no codes provided: +(‘text label’, <qp logic expression here>), e.g.: +('Cat 1', intersection([{'gender': [1]}, {'age': frange('30-40')}]))

          +

          3 elements, with codes + labels: +(code, ‘Label goes here’, <qp logic expression here>), e.g.: +(1, 'Men, 30 to 40', intersection([{'gender': [1]}, {'age': frange('30-40')}]))

          +
        • +
        • text_key (str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not provided.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +derotate(levels, mapper, other=None, unique_key='identity', dropna=True)
        +

        Derotate data and meta using the given mapper, and appending others.

        +

        This function derotates data using the specification defined in +mapper, which is a list of dicts of lists, describing how +columns from data can be read as a heirarchical structure.

        +

        Returns derotated DataSet instance and saves data and meta as json +and csv.

        + +++ + + + + + + + +
        Parameters:
          +
        • levels (dict) – The name and values of a new column variable to identify cases.
        • +
        • mapper (list of dicts of lists) –

          A list of dicts matching where the new column names are keys to +to lists of source columns. Example:

          +
          >>> mapper = [{'q14_1': ['q14_1_1', 'q14_1_2', 'q14_1_3']},
          +...           {'q14_2': ['q14_2_1', 'q14_2_2', 'q14_2_3']},
          +...           {'q14_3': ['q14_3_1', 'q14_3_2', 'q14_3_3']}]
          +
          +
          +
        • +
        • unique_key (str) – Name of column variable that will be copied to new dataset.
        • +
        • other (list (optional; default=None)) – A list of additional columns from the source data to be appended +to the end of the resulting stacked dataframe.
        • +
        • dropna (boolean (optional; default=True)) – Passed through to the pandas.DataFrame.stack() operation.
        • +
        +
        Returns:

        +
        Return type:

        new qp.DataSet instance

        +
        +
        + +
        +
        +describe(var=None, only_type=None, text_key=None, axis_edit=None)
        +

        Inspect the DataSet’s global or variable level structure.

        +
        + +
        +
        +dichotomize(name, value_texts=None, keep_variable_text=True, ignore=None, replace=False, text_key=None)
        +
        + +
        +
        +dimensionize(names=None)
        +

        Rename the dataset columns for Dimensions compatibility.

        +
        + +
        +
        +dimensionizing_mapper(names=None)
        +

        Return a renaming dataset mapper for dimensionizing names.

        + +++ + + + + + + + +
        Parameters:None
        Returns:mapper – A renaming mapper in the form of a dict of {old: new} that +maps non-Dimensions naming conventions to Dimensions naming +conventions.
        Return type:dict
        +
        + +
        +
        +drop(name, ignore_items=False)
        +

        Drops variables from meta and data components of the DataSet.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str or list of str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • ignore_items (bool) – If False source variables for arrays in _meta['columns'] +are dropped, otherwise kept.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +drop_duplicates(unique_id='identity', keep='first')
        +

        Drop duplicated cases from self._data.

        + +++ + + + +
        Parameters:
          +
        • unique_id (str) – Variable name that gets scanned for duplicates.
        • +
        • keep (str, {'first', 'last'}) – Keep first or last of the duplicates.
        • +
        +
        +
        + +
        +
        +duplicates(name='identity')
        +

        Returns a list with duplicated values for the provided name.

        + +++ + + + + + + + +
        Parameters:name (str, default 'identity') – The column variable name keyed in meta['columns'].
        Returns:vals – A list of duplicated values found in the named variable.
        Return type:list
        +
        + +
        +
        +empty(name, condition=None)
        +

        Check variables for emptiness (opt. restricted by a condition).

        + +++ + + + + + + + +
        Parameters:
          +
        • name ((list of) str) – The mask variable name keyed in _meta['columns'].
        • +
        • condition (Quantipy logic expression, default None) – A logical condition expressed as Quantipy logic that determines +which subset of the case data rows to be considered.
        • +
        +
        Returns:

        empty

        +
        Return type:

        bool

        +
        +
        + +
        +
        +empty_items(name, condition=None, by_name=True)
        +

        Test arrays for item emptiness (opt. restricted by a condition).

        + +++ + + + + + + + +
        Parameters:
          +
        • name ((list of) str) – The mask variable name keyed in _meta['masks'].
        • +
        • condition (Quantipy logic expression, default None) – A logical condition expressed as Quantipy logic that determines +which subset of the case data rows to be considered.
        • +
        • by_name (bool, default True) – Return array items by their name or their index.
        • +
        +
        Returns:

        empty – The list of empty items by their source names or positional index +(starting from 1!, mapped to their parent mask name if more than +one).

        +
        Return type:

        list

        +
        +
        + +
        +
        +extend_items(name, ext_items, text_key=None)
        +

        Extend mask items of an existing array.

        + +++ + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['masks'].
        • +
        • ext_items (list of str/ list of dict) – The label of the new item. It can be provided as str, then the new +column is named by the grid and the item_no, or as dict +{‘new_column’: ‘label’}.
        • +
        • text_key (str/ list of str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not provided.
        • +
        +
        +
        + +
        +
        +extend_values(name, ext_values, text_key=None, safe=True)
        +

        Add to the ‘values’ object of existing column or mask meta data.

        +

        Attempting to add already existing value codes or providing already +present value texts will both raise a ValueError!

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • ext_values (list of str or tuples in form of (int, str), default None) – When a list of str is given, the categorical values will simply be +enumerated and mapped to the category labels. Alternatively codes can +mapped to categorical labels, e.g.: +[(1, ‘Elephant’), (2, ‘Mouse’), (999, ‘No animal’)]
        • +
        • text_key (str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not provided.
        • +
        • safe (bool, default True) – If set to False, duplicate value texts are allowed when extending +the values object.
        • +
        +
        Returns:

        The DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +factors(name)
        +

        Get categorical data’s stat. factor values.

        + +++ + + + + + + + +
        Parameters:name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        Returns:factors – A {value: factor} mapping.
        Return type:OrderedDict
        +
        + +
        +
        +filter(alias, condition, inplace=False)
        +

        Filter the DataSet using a Quantipy logical expression.

        +
        + +
        +
        +find(str_tags=None, suffixed=False)
        +

        Find variables by searching their names for substrings.

        + +++ + + + + + + + +
        Parameters:
          +
        • str_tags ((list of) str) – The strings tags to look for in the variable names. If not provided, +the modules’ default global list of substrings from VAR_SUFFIXES +will be used.
        • +
        • suffixed (bool, default False) – If set to True, only variable names that end with a given string +sequence will qualify.
        • +
        +
        Returns:

        found – The list of matching variable names.

        +
        Return type:

        list

        +
        +
        + +
        +
        +find_duplicate_texts(name, text_key=None)
        +

        Collect values that share the same text information to find duplicates.

        + +++ + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • text_key (str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not +provided.
        • +
        +
        +
        + +
        +
        +flatten(name, codes, new_name=None, text_key=None)
        +

        Create a variable that groups array mask item answers to categories.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The array variable name keyed in meta['masks'] that will +be converted.
        • +
        • codes (int, list of int) – The answers codes that determine the categorical grouping. +Item labels will become the category labels.
        • +
        • new_name (str, default None) – The name of the new delimited set variable. If None, name is +suffixed with ‘_rec’.
        • +
        • text_key (str, default None) – Text key for text-based label information. Uses the +DataSet.text_key information if not provided.
        • +
        +
        Returns:

        The DataSet is modified inplace, delimited set variable is added.

        +
        Return type:

        None

        +
        +
        + +
        +
        +force_texts(copy_to=None, copy_from=None, update_existing=False)
        +

        Copy info from existing text_key to a new one or update the existing one.

        + +++ + + + + + + + +
        Parameters:
          +
        • copy_to (str) – {‘en-GB’, ‘da-DK’, ‘fi-FI’, ‘nb-NO’, ‘sv-SE’, ‘de-DE’} +None -> _meta[‘lib’][‘default text’] +The text key that will be filled.
        • +
        • copy_from (str / list) – {‘en-GB’, ‘da-DK’, ‘fi-FI’, ‘nb-NO’, ‘sv-SE’, ‘de-DE’} +You can also enter a list with text_keys, if the first text_key +doesn’t exist, it takes the next one
        • +
        • update_existing (bool) – True : copy_to will be filled in any case +False: copy_to will be filled if it’s empty/not existing
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +from_batch(batch_name, include='identity', text_key=[], apply_edits=True, additions='variables')
        +

        Get a filtered subset of the DataSet using qp.Batch definitions.

        + +++ + + + + + + + +
        Parameters:
          +
        • batch_name (str) – Name of a Batch included in the DataSet.
        • +
        • include (str/ list of str) – Name of variables that get included even if they are not in Batch.
        • +
        • text_key (str/ list of str, default None) – Take over all texts of the included text_key(s), if None is provided +all included text_keys are taken.
        • +
        • apply_edits (bool, default True) – meta_edits and rules are used as/ applied on global meta of the +new DataSet instance.
        • +
        • additions ({'variables', 'filters', 'full', None}) – Extend included variables by the xks, yks and weights of the +additional batches if set to ‘variables’, ‘filters’ will create +new 1/0-coded variables that reflect any filters defined. Selecting +‘full’ will do both, None will ignore additional Batches completely.
        • +
        +
        Returns:

        b_ds

        +
        Return type:

        quantipy.DataSet

        +
        +
        + +
        +
        +from_components(data_df, meta_dict=None, reset=True, text_key=None)
        +

        Attach data and meta directly to the DataSet instance.

        +
        +

        Note

        +

        Except testing for appropriate object types, this method +offers no additional safeguards or consistency/compability checks +with regard to the passed data and meta documents!

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • data_df (pandas.DataFrame) – A DataFrame that contains case data entries for the DataSet.
        • +
        • meta_dict (dict, default None) – A dict that stores meta data describing the columns of the data_df. +It is assumed to be well-formed following the Quantipy meta data +structure.
        • +
        • reset (bool, default True) – Clean the ‘lib’ and 'sets' metadata collections from non-native +entries, e.g. user-defined information or helper metadata.
        • +
        • text_key (str, default None) – The text_key to be used. If not provided, it will be attempted to +use the ‘default text’ from the meta['lib'] definition.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +from_excel(path_xlsx, merge=True, unique_key='identity')
        +

        Converts excel files to a dataset or/and merges variables.

        + +++ + + + + + + + +
        Parameters:
          +
        • path_xlsx (str) – Path where the excel file is stored. The file must have exactly +one sheet with data.
        • +
        • merge (bool) – If True the new data from the excel file will be merged on the +dataset.
        • +
        • unique_key (str) – If merge=True an hmerge is done on this variable.
        • +
        +
        Returns:

        new_dataset – Contains only the data from excel. +If merge=True dataset is modified inplace.

        +
        Return type:

        quantipy.DataSet

        +
        +
        + +
        +
        +from_stack(stack, data_key=None, dk_filter=None, reset=True)
        +

        Use quantipy.Stack data and meta to create a DataSet instance.

        + +++ + + + + + + + +
        Parameters:
          +
        • stack (quantipy.Stack) – The Stack instance to convert.
        • +
        • data_key (str) – The reference name where meta and data information are stored.
        • +
        • dk_filter (string, default None) – Filter name if the stack contains more than one filters. If None +‘no_filter’ will be used.
        • +
        • reset (bool, default True) – Clean the ‘lib’ and 'sets' metadata collections from non-native +entries, e.g. user-defined information or helper metadata.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +fully_hidden_arrays()
        +

        Get all array definitions that contain only hidden items.

        + +++ + + + + + +
        Returns:hidden – The list of array mask names.
        Return type:list
        +
        + +
        +
        +get_batch(name)
        +

        Get existing Batch instance from DataSet meta information.

        + +++ + + + +
        Parameters:name (str) – Name of existing Batch instance.
        +
        + +
        +
        +get_property(name, prop_name, text_key=None)
        +
        + +
        +
        +hide_empty_items(condition=None, arrays=None)
        +

        Apply rules meta to automatically hide empty array items.

        + +++ + + + + + + + +
        Parameters:
          +
        • name ((list of) str, default None) – The array mask variable names keyed in _meta['masks']. If not +explicitly provided will test all array mask definitions.
        • +
        • condition (Quantipy logic expression) – A logical condition expressed as Quantipy logic that determines +which subset of the case data rows to be considered.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +hiding(name, hide, axis='y', hide_values=True)
        +

        Set or update rules[axis]['dropx'] meta for the named column.

        +

        Quantipy builds will respect the hidden codes and cut them from +results.

        +
        +

        Note

        +

        This is not equivalent to DataSet.set_missings() as +missing values are respected also in computations.

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • name (str or list of str) – The column variable(s) name keyed in _meta['columns'].
        • +
        • hide (int or list of int) – Values indicated by their int codes will be dropped from +Quantipy.View.dataframes.
        • +
        • axis ({'x', 'y'}, default 'y') – The axis to drop the values from.
        • +
        • hide_values (bool, default True) – Only considered if name refers to a mask. If True, values are +hidden on all mask items. If False, mask items are hidden by position +(only for array summaries).
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +hmerge(dataset, on=None, left_on=None, right_on=None, overwrite_text=False, from_set=None, inplace=True, merge_existing=None, verbose=True)
        +

        Merge Quantipy datasets together using an index-wise identifer.

        +

        This function merges two Quantipy datasets together, updating variables +that exist in the left dataset and appending others. New variables +will be appended in the order indicated by the ‘data file’ set if +found, otherwise they will be appended in alphanumeric order. +This merge happend horizontally (column-wise). Packed kwargs will be +passed on to the pandas.DataFrame.merge() method call, but that merge +will always happen using how=’left’.

        + +++ + + + + + + + +
        Parameters:
          +
        • dataset (quantipy.DataSet) – The dataset to merge into the current DataSet.
        • +
        • on (str, default=None) – The column to use as a join key for both datasets.
        • +
        • left_on (str, default=None) – The column to use as a join key for the left dataset.
        • +
        • right_on (str, default=None) – The column to use as a join key for the right dataset.
        • +
        • overwrite_text (bool, default=False) – If True, text_keys in the left meta that also exist in right +meta will be overwritten instead of ignored.
        • +
        • from_set (str, default=None) – Use a set defined in the right meta to control which columns are +merged from the right dataset.
        • +
        • inplace (bool, default True) – If True, the DataSet will be modified inplace with new/updated +columns. Will return a new DataSet instance if False.
        • +
        • verbose (bool, default=True) – Echo progress feedback to the output pane.
        • +
        +
        Returns:

        None or new_dataset – If the merge is not applied inplace, a DataSet instance +is returned.

        +
        Return type:

        quantipy.DataSet

        +
        +
        + +
        +
        +interlock(name, label, variables, val_text_sep='/')
        +

        Build a new category-intersected variable from >=2 incoming variables.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The new column variable name keyed in _meta['columns'].
        • +
        • label (str) – The new text label for the created variable.
        • +
        • variables (list of >= 2 str or dict (mapper)) –

          The column names of the variables that are feeding into the +intersecting recode operation. Or dicts/mapper to create temporary +variables for interlock. Can also be a mix of str and dict. Example:

          +
          >>> ['gender',
          +...  {'agegrp': [(1, '18-34', {'age': frange('18-34')}),
          +...              (2, '35-54', {'age': frange('35-54')}),
          +...              (3, '55+', {'age': is_ge(55)})]},
          +...  'region']
          +
          +
          +
        • +
        • val_text_sep (str, default '/') – The passed character (or any other str value) wil be used to +separate the incoming individual value texts to make up the inter- +sected category value texts, e.g.: ‘Female/18-30/London’.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +is_like_numeric(name)
        +

        Test if a string-typed variable can be expressed numerically.

        + +++ + + + + + + + +
        Parameters:name (str) – The column variable name keyed in _meta['columns'].
        Returns:
        Return type:bool
        +
        + +
        +
        +is_nan(name)
        +

        Detect empty entries in the _data rows.

        + +++ + + + + + + + +
        Parameters:name (str) – The column variable name keyed in meta['columns'].
        Returns:count – A series with the results as bool.
        Return type:pandas.Series
        +
        + +
        +
        +item_no(name)
        +

        Return the order/position number of passed array item variable name.

        + +++ + + + + + + + +
        Parameters:name (str) – The column variable name keyed in _meta['columns'].
        Returns:no – The positional index of the item (starting from 1).
        Return type:int
        +
        + +
        +
        +item_texts(name, text_key=None, axis_edit=None)
        +

        Get the text meta data for the items of the passed array mask name.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The mask variable name keyed in _meta['masks'].
        • +
        • text_key (str, default None) – The text_key that should be used when taking labels from the +source meta.
        • +
        • axis_edit ({'x', 'y'}, default None) – If provided the text_key is taken from the x/y edits dict.
        • +
        +
        Returns:

        texts – The list of item texts for the array elements.

        +
        Return type:

        list

        +
        +
        + +
        +
        +items(name, text_key=None, axis_edit=None)
        +

        Get the array’s paired item names and texts information from the meta.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['masks'].
        • +
        • text_key (str, default None) – The text_key that should be used when taking labels from the +source meta.
        • +
        • axis_edit ({'x', 'y'}, default None) – If provided the text_key is taken from the x/y edits dict.
        • +
        +
        Returns:

        items – The list of source item names (from _meta['columns']) and their +text information packed as tuples.

        +
        Return type:

        list of tuples

        +
        +
        + +
        + +

        Create a Link instance from the DataSet.

        +
        + +
        +
        +merge_texts(dataset)
        +

        Add additional text versions from other text_key meta.

        +

        Case data will be ignored during the merging process.

        + +++ + + + + + + + +
        Parameters:dataset ((A list of multiple) quantipy.DataSet) – One or multiple datasets that provide new text_key meta.
        Returns:
        Return type:None
        +
        + +
        +
        +meta(name=None, text_key=None, axis_edit=None)
        +

        Provide a pretty summary for variable meta given as per name.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str, default None) – The variable name keyed in _meta['columns'] or _meta['masks']. +If None, the entire meta component of the DataSet instance +will be returned.
        • +
        • text_key (str, default None) – The text_key that should be used when taking labels from the +source meta.
        • +
        • axis_edit ({'x', 'y'}, default None) – If provided the text_key is taken from the x/y edits dict.
        • +
        +
        Returns:

        meta – Either a DataFrame that sums up the meta information on a mask +or column or the meta dict as a whole is

        +
        Return type:

        dict or pandas.DataFrame

        +
        +
        + +
        +
        +meta_to_json(key=None, collection=None)
        +

        Save a meta object as json file.

        + +++ + + + + + + + +
        Parameters:
          +
        • key (str, default None) – Name of the variable whose metadata is saved, if key is not +provided included collection or the whole meta is saved.
        • +
        • collection (str {'columns', 'masks', 'sets', 'lib'}, default None) – The meta object is taken from this collection.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +min_value_count(name, min=50, weight=None, condition=None, axis='y', verbose=True)
        +

        Wrapper for self.hiding(), which is hiding low value_counts.

        + +++ + + + +
        Parameters:
          +
        • variables (str/ list of str) – Name(s) of the variable(s) whose values are checked against the +defined border.
        • +
        • min (int) – If the amount of counts for a value is below this number, the +value is hidden.
        • +
        • weight (str, default None) – Name of the weight, which is used to calculate the weigthed counts.
        • +
        • condition (complex logic) – The data, which is used to calculate the counts, can be filtered +by the included condition.
        • +
        • axis ({'y', 'x', ['x', 'y']}, default None) – The axis on which the values are hidden.
        • +
        +
        +
        + +
        +
        +names(ignore_items=True)
        +

        Find all weak-duplicate variable names that are different only by case.

        +
        +

        Note

        +

        Will return self.variables() if no weak-duplicates are found.

        +
        + +++ + + + + + +
        Returns:weak_dupes – An overview of case-sensitive spelling differences in otherwise +equal variable names.
        Return type:pd.DataFrame
        +
        + +
        +
        +order(new_order=None, reposition=None, regroup=False)
        +

        Set the global order of the DataSet variables collection.

        +

        The global order of the DataSet is reflected in the data component’s +pd.DataFrame.columns order and the variable references in the meta +component’s ‘data file’ items.

        + +++ + + + + + + + +
        Parameters:
          +
        • new_order (list) – A list of all DataSet variables in the desired order.
        • +
        • reposition ((List of) dict) – Each dict maps one or a list of variables to a reference variable +name key. The mapped variables are moved before the reference key.
        • +
        • regroup (bool, default False) – Attempt to regroup non-native variables (i.e. created either +manually with add_meta(), recode(), derive(), etc. +or automatically by manifesting qp.View objects) with their +originating variables.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +parents(name)
        +

        Get the parent meta information for masks-structured column elements.

        + +++ + + + + + + + +
        Parameters:name (str) – The mask variable name keyed in _meta['columns'].
        Returns:parents – The list of parents the _meta['columns'] variable is attached to.
        Return type:list
        +
        + +
        +
        +populate(batches='all', verbose=True)
        +

        Create a qp.Stack based on all available qp.Batch definitions.

        + +++ + + + + + + + +
        Parameters:batches (str/ list of str) – Name(s) of qp.Batch instances that are used to populate the +qp.Stack.
        Returns:
        Return type:qp.Stack
        +
        + +
        +
        +read_ascribe(path_meta, path_data, text_key)
        +

        Load Dimensions .xml/.txt files, connecting as data and meta components.

        + +++ + + + + + + + +
        Parameters:
          +
        • path_meta (str) – The full path (optionally with extension '.xml', otherwise +assumed as such) to the meta data defining '.xml' file.
        • +
        • path_data (str) – The full path (optionally with extension '.txt', otherwise +assumed as such) to the case data defining '.txt' file.
        • +
        +
        Returns:

        The DataSet is modified inplace, connected to Quantipy data +and meta components that have been converted from their Ascribe +source files.

        +
        Return type:

        None

        +
        +
        + +
        +
        +read_dimensions(path_meta, path_data)
        +

        Load Dimensions .ddf/.mdd files, connecting as data and meta components.

        + +++ + + + + + + + +
        Parameters:
          +
        • path_meta (str) – The full path (optionally with extension '.mdd', otherwise +assumed as such) to the meta data defining '.mdd' file.
        • +
        • path_data (str) – The full path (optionally with extension '.ddf', otherwise +assumed as such) to the case data defining '.ddf' file.
        • +
        +
        Returns:

        The DataSet is modified inplace, connected to Quantipy data +and meta components that have been converted from their Dimensions +source files.

        +
        Return type:

        None

        +
        +
        + +
        +
        +read_quantipy(path_meta, path_data, reset=True)
        +

        Load Quantipy .csv/.json files, connecting as data and meta components.

        + +++ + + + + + + + +
        Parameters:
          +
        • path_meta (str) – The full path (optionally with extension '.json', otherwise +assumed as such) to the meta data defining '.json' file.
        • +
        • path_data (str) – The full path (optionally with extension '.csv', otherwise +assumed as such) to the case data defining '.csv' file.
        • +
        • reset (bool, default True) – Clean the ‘lib’ and 'sets' metadata collections from non-native +entries, e.g. user-defined information or helper metadata.
        • +
        +
        Returns:

        The DataSet is modified inplace, connected to Quantipy native +data and meta components.

        +
        Return type:

        None

        +
        +
        + +
        +
        +read_spss(path_sav, **kwargs)
        +

        Load SPSS Statistics .sav files, converting and connecting data/meta.

        + +++ + + + + + + + +
        Parameters:path_sav (str) – The full path (optionally with extension '.sav', otherwise +assumed as such) to the '.sav' file.
        Returns:The DataSet is modified inplace, connected to Quantipy data +and meta components that have been converted from the SPSS +source file.
        Return type:None
        +
        + +
        +
        +recode(target, mapper, default=None, append=False, intersect=None, initialize=None, fillna=None, inplace=True)
        +

        Create a new or copied series from data, recoded using a mapper.

        +

        This function takes a mapper of {key: logic} entries and injects the +key into the target column where its paired logic is True. The logic +may be arbitrarily complex and may refer to any other variable or +variables in data. Where a pre-existing column has been used to +start the recode, the injected values can replace or be appended to +any data found there to begin with. Note that this function does +not edit the target column, it returns a recoded copy of the target +column. The recoded data will always comply with the column type +indicated for the target column according to the meta.

        + +++ + + + + + + + +
        Parameters:
          +
        • target (str) – The column variable name keyed in _meta['columns'] that is the +target of the recode. If not found in _meta this will fail +with an error. If target is not found in data.columns the +recode will start from an empty series with the same index as +_data. If target is found in data.columns the recode will +start from a copy of that column.
        • +
        • mapper (dict) – A mapper of {key: logic} entries.
        • +
        • default (str, default None) – The column name to default to in cases where unattended lists +are given in your logic, where an auto-transformation of +{key: list} to {key: {default: list}} is provided. Note that +lists in logical statements are themselves a form of shorthand +and this will ultimately be interpreted as: +{key: {default: has_any(list)}}.
        • +
        • append (bool, default False) – Should the new recoded data be appended to values already found +in the series? If False, data from series (where found) will +overwrite whatever was found for that item instead.
        • +
        • intersect (logical statement, default None) – If a logical statement is given here then it will be used as an +implied intersection of all logical conditions given in the +mapper.
        • +
        • initialize (str or np.NaN, default None) – If not None, a copy of the data named column will be used to +populate the target column before the recode is performed. +Alternatively, initialize can be used to populate the target +column with np.NaNs (overwriting whatever may be there) prior +to the recode.
        • +
        • fillna (int, default=None) – If not None, the value passed to fillna will be used on the +recoded series as per pandas.Series.fillna().
        • +
        • inplace (bool, default True) – If True, the DataSet will be modified inplace with new/updated +columns. Will return a new recoded pandas.Series instance if +False.
        • +
        +
        Returns:

        Either the DataSet._data is modified inplace or a new +pandas.Series is returned.

        +
        Return type:

        None or recode_series

        +
        +
        + +
        +
        +remove_html()
        +

        Cycle through all meta text objects removing html tags.

        +

        Currently uses the regular expression ‘<.*?>’ in _remove_html() +classmethod.

        + +++ + + + + + +
        Returns:
        Return type:None
        +
        + +
        +
        +remove_items(name, remove)
        +

        Erase array mask items safely from both meta and case data components.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['masks'].
        • +
        • remove (int or list of int) – The items listed by their order number in the +_meta['masks'][name]['items'] object will be droped from the +mask definition.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +remove_values(name, remove)
        +

        Erase value codes safely from both meta and case data components.

        +

        Attempting to remove all value codes from the variable’s value object +will raise a ValueError!

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['columns'] +or meta['masks'].
        • +
        • remove (int or list of int) – The codes to be removed from the DataSet variable.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +rename(name, new_name)
        +

        Change meta and data column name references of the variable defintion.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['columns'] +or meta['masks'].
        • +
        • new_name (str) – The new variable name.
        • +
        +
        Returns:

        DataSet is modified inplace. The new name reference replaces the +original one.

        +
        Return type:

        None

        +
        +
        + +
        +
        +rename_from_mapper(mapper, keep_original=False)
        +

        Rename meta objects and data columns using mapper.

        + +++ + + + + + + + +
        Parameters:mapper (dict) – A renaming mapper in the form of a dict of {old: new} that +will be used to rename columns throughout the meta and data.
        Returns:DataSet is modified inplace.
        Return type:None
        +
        + +
        +
        +reorder_items(name, new_order)
        +

        Apply a new order to mask items.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The variable name keyed in _meta['masks'].
        • +
        • new_order (list of int, default None) – The new order of the mask items. The included ints match up to +the number of the items (DataSet.item_no('item_name')).
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +reorder_values(name, new_order=None)
        +

        Apply a new order to the value codes defined by the meta data component.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • new_order (list of int, default None) – The new code order of the DataSet variable. If no order is given, +the values object is sorted ascending.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +repair()
        +

        Try to fix legacy meta data inconsistencies and badly shaped array / +datafile items 'sets' meta definitions.

        +
        + +
        +
        +repair_text_edits(text_key=None)
        +

        Cycle through all meta text objects repairing axis edits.

        + +++ + + + + + + + +
        Parameters:text_key (str / list of str, default None) – {None, ‘en-GB’, ‘da-DK’, ‘fi-FI’, ‘nb-NO’, ‘sv-SE’, ‘de-DE’} +The text_keys for which text edits should be included.
        Returns:
        Return type:None
        +
        + +
        +
        +replace_texts(replace, text_key=None)
        +

        Cycle through all meta text objects replacing unwanted strings.

        + +++ + + + + + + + +
        Parameters:
          +
        • replace (dict, default Nonea) – A dictionary mapping {unwanted string: replacement string}.
        • +
        • text_key (str / list of str, default None) – {None, ‘en-GB’, ‘da-DK’, ‘fi-FI’, ‘nb-NO’, ‘sv-SE’, ‘de-DE’} +The text_keys for which unwanted strings are replaced.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +resolve_name(name)
        +
        + +
        +
        +restore_item_texts(arrays=None)
        +

        Restore array item texts.

        + +++ + + + +
        Parameters:arrays (str, list of str, default None) – Restore texts for items of these arrays. If None, all keys in +._meta['masks'] are taken.
        +
        + +
        +
        +revert()
        +

        Return to a previously saved state of the DataSet.

        +
        +

        Note

        +

        This method is designed primarily for use in interactive +Python environments like iPython/Jupyter and their notebook +applications.

        +
        +
        + +
        +
        +roll_up(varlist, ignore_arrays=None)
        +

        Replace any array items with their parent mask variable definition name.

        + +++ + + + + + + + +
        Parameters:
          +
        • varlist (list) – A list of meta 'columns' and/or 'masks' names.
        • +
        • ignore_arrays ((list of) str) – A list of array mask names that should not be rolled up if their +items are found inside varlist.
        • +
        +
        Returns:

        rolled_up – The modified varlist.

        +
        Return type:

        list

        +
        +
        + +
        +
        +save()
        +

        Save the current state of the DataSet’s data and meta.

        +

        The saved file will be temporarily stored inside the cache. Use this +to take a snapshot of the DataSet state to easily revert back to at a +later stage.

        +
        +

        Note

        +

        This method is designed primarily for use in interactive +Python environments like iPython/Jupyter notebook applications.

        +
        +
        + +
        +
        +select_text_keys(text_key=None)
        +

        Cycle through all meta text objects keep only selected text_key.

        + +++ + + + + + + + +
        Parameters:text_key (str / list of str, default None) – {None, ‘en-GB’, ‘da-DK’, ‘fi-FI’, ‘nb-NO’, ‘sv-SE’, ‘de-DE’} +The text_keys which should be kept.
        Returns:
        Return type:None
        +
        + +
        +
        +classmethod set_encoding(encoding)
        +

        Hack sys.setdefaultencoding() to escape ASCII hell.

        + +++ + + + +
        Parameters:encoding (str) – The name of the encoding to default to.
        +
        + +
        +
        +set_factors(name, factormap, safe=False)
        +

        Apply numerical factors to (single-type categorical) variables.

        +

        Factors can be read while aggregating descrp. stat. qp.Views.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • factormap (dict) – A mapping of {value: factor} (int to int).
        • +
        • safe (bool, default False) – Set to True to prevent setting factors to the values meta +data of non-single type variables.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +set_item_texts(name, renamed_items, text_key=None, axis_edit=None)
        +

        Rename or add item texts in the items objects of masks.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['masks'].
        • +
        • renamed_items (dict) –

          A dict mapping with following structure (array mask items are +assumed to be passed by their order number):

          +
          >>> {1: 'new label for item #1',
          +...  5: 'new label for item #5'}
          +
          +
          +
        • +
        • text_key (str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not +provided.
        • +
        • axis_edit ({'x', 'y', ['x', 'y']}, default None) – If the new_text of the variable should only be considered temp. +for build exports, the axes on that the edited text should appear +can be provided.
        • +
        +
        Returns:

        The DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +set_missings(var, missing_map='default', hide_on_y=True, ignore=None)
        +

        Flag category definitions for exclusion in aggregations.

        + +++ + + + + + + + +
        Parameters:
          +
        • var (str or list of str) – Variable(s) to apply the meta flags to.
        • +
        • missing_map ('default' or list of codes or dict of {'flag': code(s)}, default 'default') – A mapping of codes to flags that can either be ‘exclude’ (globally +ignored) or ‘d.exclude’ (only ignored in descriptive statistics). +Codes provided in a list are flagged as ‘exclude’. +Passing ‘default’ is using a preset list of (TODO: specify) values +for exclusion.
        • +
        • ignore (str or list of str, default None) – A list of variables that should be ignored when applying missing +flags via the ‘default’ list method.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +set_property(name, prop_name, prop_value, ignore_items=False)
        +

        Access and set the value of a meta object’s properties collection.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['columns'] +or meta['masks'].
        • +
        • prop_name (str) – The property key name.
        • +
        • prop_value (any) – The value to be set for the property. Must be of valid type and +have allowed values(s) with regard to the property.
        • +
        • ignore_items (bool, default False) – When name refers to a variable from the 'masks' collection, +setting to True will ignore any items and only apply the +property to the mask itself.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +set_text_key(text_key)
        +

        Set the default text_key of the DataSet.

        +
        +

        Note

        +

        A lot of the instance methods will fall back to the default +text key in _meta['lib']['default text']. It is therefore +important to use this method with caution, i.e. ensure that the +meta contains text entries for the text_key set.

        +
        + +++ + + + + + + + +
        Parameters:text_key ({'en-GB', 'da-DK', 'fi-FI', 'nb-NO', 'sv-SE', 'de-DE'}) – The text key that will be set in _meta['lib']['default text'].
        Returns:
        Return type:None
        +
        + +
        +
        +set_value_texts(name, renamed_vals, text_key=None, axis_edit=None)
        +

        Rename or add value texts in the ‘values’ object.

        +

        This method works for array masks and column meta data.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • renamed_vals (dict) – A dict mapping with following structure: +{1: 'new label for code=1', 5: 'new label for code=5'} +Codes will be ignored if they do not exist in the ‘values’ object.
        • +
        • text_key (str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not +provided.
        • +
        • axis_edit ({'x', 'y', ['x', 'y']}, default None) – If renamed_vals should only be considered temp. for build +exports, the axes on that the edited text should appear can be +provided.
        • +
        +
        Returns:

        The DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +set_variable_text(name, new_text, text_key=None, axis_edit=None)
        +

        Apply a new or update a column’s/masks’ meta text object.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating column variable name keyed in meta['columns'] +or meta['masks'].
        • +
        • new_text (str) – The text (label) to be set.
        • +
        • text_key (str, default None) – Text key for text-based label information. Will automatically fall +back to the instance’s text_key property information if not provided.
        • +
        • axis_edit ({'x', 'y', ['x', 'y']}, default None) – If the new_text of the variable should only be considered temp. +for build exports, the axes on that the edited text should appear +can be provided.
        • +
        +
        Returns:

        The DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +set_verbose_errmsg(verbose=True)
        +
        + +
        +
        +set_verbose_infomsg(verbose=True)
        +
        + +
        +
        +slicing(name, slicer, axis='y')
        +

        Set or update rules[axis]['slicex'] meta for the named column.

        +

        Quantipy builds will respect the kept codes and show them exclusively +in results.

        +
        +

        Note

        +

        This is not a replacement for DataSet.set_missings() as +missing values are respected also in computations.

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • name (str or list of str) – The column variable(s) name keyed in _meta['columns'].
        • +
        • slice (int or list of int) – Values indicated by their int codes will be shown in +Quantipy.View.dataframes, respecting the provided order.
        • +
        • axis ({'x', 'y'}, default 'y') – The axis to slice the values on.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +sorting(name, on='@', within=False, between=False, fix=None, ascending=False, sort_by_weight=None)
        +

        Set or update rules['x']['sortx'] meta for the named column.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str or list of str) – The column variable(s) name keyed in _meta['columns'].
        • +
        • within (bool, default True) – Applies only to variables that have been aggregated by creating a +an expand grouping / overcode-style View: +If True, will sort frequencies inside each group.
        • +
        • between (bool, default True) – Applies only to variables that have been aggregated by creating a +an expand grouping / overcode-style View: +If True, will sort group and regular code frequencies with regard +to each other.
        • +
        • fix (int or list of int, default None) – Values indicated by their int codes will be ignored in +the sorting operation.
        • +
        • ascending (bool, default False) – By default frequencies are sorted in descending order. Specify +True to sort ascending.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +sources(name)
        +

        Get the _meta['columns'] elements for the passed array mask name.

        + +++ + + + + + + + +
        Parameters:name (str) – The mask variable name keyed in _meta['masks'].
        Returns:sources – The list of source elements from the array definition.
        Return type:list
        +
        + +
        +
        +split(save=False)
        +

        Return the meta and data components of the DataSet instance.

        + +++ + + + + + + + +
        Parameters:save (bool, default False) – If True, the meta and data objects will be saved to disk, +using the instance’s name and path attributes to determine +the file location.
        Returns:meta, data – The meta dict and the case data DataFrame as separate objects.
        Return type:dict, pandas.DataFrame
        +
        + +
        +
        +static start_meta(text_key='main')
        +

        Starts a new/empty Quantipy meta document.

        + +++ + + + + + + + +
        Parameters:text_key (str, default None) – The default text key to be set into the new meta document.
        Returns:meta – Quantipy meta object
        Return type:dict
        +
        + +
        +
        +subset(variables=None, from_set=None, inplace=False)
        +

        Create a cloned version of self with a reduced collection of variables.

        + +++ + + + + + + + +
        Parameters:
          +
        • variables (str or list of str, default None) – A list of variable names to include in the new DataSet instance.
        • +
        • from_set (str) – The name of an already existing set to base the new DataSet on.
        • +
        +
        Returns:

        subset_ds – The new reduced version of the DataSet.

        +
        Return type:

        qp.DataSet

        +
        +
        + +
        +
        +take(condition)
        +

        Create an index slicer to select rows from the DataFrame component.

        + +++ + + + + + + + +
        Parameters:condition (Quantipy logic expression) – A logical condition expressed as Quantipy logic that determines +which subset of the case data rows to be kept.
        Returns:slicer – The indices fulfilling the passed logical condition.
        Return type:pandas.Index
        +
        + +
        +
        +text(name, shorten=True, text_key=None, axis_edit=None)
        +

        Return the variables text label information.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str, default None) – The variable name keyed in _meta['columns'] or _meta['masks'].
        • +
        • shorten (bool, default True) – If True, text label meta from array items will not report +the parent mask’s text. Setting it to False will show the +“full” label.
        • +
        • text_key (str, default None) – The default text key to be set into the new meta document.
        • +
        • axis_edit ({'x', 'y'}, default None) – If provided the text_key is taken from the x/y edits dict.
        • +
        +
        Returns:

        text – The text metadata.

        +
        Return type:

        str

        +
        +
        + +
        +
        +to_array(name, variables, label, safe=True)
        +

        Combines column variables with same values meta into an array.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – Name of new grid.
        • +
        • variables (list of str or list of dicts) – Variable names that become items of the array. New item labels can +be added as dict. Example: +variables = [‘q1_1’, {‘q1_2’: ‘shop 2’}, {‘q1_3’: ‘shop 3’}]
        • +
        • label (str) – Text label for the mask itself.
        • +
        • safe (bool, default True) – If True, the method will raise a ValueError if the provided +variable name is already present in self. Select False to +forcefully overwrite an existing variable with the same name +(independent of its type).
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +to_delimited_set(name, label, variables, from_dichotomous=True, codes_from_name=True)
        +

        Combines multiple single variables to new delimited set variable.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – Name of new delimited set
        • +
        • label (str) – Label text for the new delimited set.
        • +
        • variables (list of str or list of tuples) – variables that get combined into the new delimited set. If they are +dichotomous (from_dichotomous=True), the labels of the variables +are used as category texts or if tuples are included, the second +items will be used for the category texts. +If the variables are categorical (from_dichotomous=False) the values +of the variables need to be eqaul and are taken for the delimited set.
        • +
        • from_dichotomous (bool, default True) – Define if the input variables are dichotomous or categorical.
        • +
        • codes_from_name (bool, default True) – If from_dichotomous=True, the codes can be taken from the Variable +names, if they are in form of ‘q01_1’, ‘q01_3’, … +In this case the codes will be 1, 3, ….
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +transpose(name, new_name=None, ignore_items=None, ignore_values=None, copy_data=True, text_key=None)
        +

        Create a new array mask with transposed items / values structure.

        +

        This method will automatically create meta and case data additions in +the DataSet instance.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The originating mask variable name keyed in meta['masks'].
        • +
        • new_name (str, default None) – The name of the new mask. If not provided explicitly, the new_name +will be constructed constructed by suffixing the original +name with ‘_trans’, e.g. 'Q2Array_trans.
        • +
        • ignore_items (int or list of int, default None) – If provided, the items listed by their order number in the +_meta['masks'][name]['items'] object will not be part of the +transposed array. This means they will be ignored while creating +the new value codes meta.
        • +
        • ignore_codes (int or list of int, default None) – If provided, the listed code values will not be part of the +transposed array. This means they will not be part of the new +item meta.
        • +
        • text_key (str) – The text key to be used when generating text objects, i.e. +item and value labels.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +unbind(name)
        +

        Remove mask-structure for arrays

        +
        + +
        +
        +uncode(target, mapper, default=None, intersect=None, inplace=True)
        +

        Create a new or copied series from data, recoded using a mapper.

        + +++ + + + + + + + +
        Parameters:
          +
        • target (str) – The variable name that is the target of the uncode. If it is keyed +in _meta['masks'] the uncode is done for all mask items. +If not found in _meta this will fail with an error.
        • +
        • mapper (dict) – A mapper of {key: logic} entries.
        • +
        • default (str, default None) – The column name to default to in cases where unattended lists +are given in your logic, where an auto-transformation of +{key: list} to {key: {default: list}} is provided. Note that +lists in logical statements are themselves a form of shorthand +and this will ultimately be interpreted as: +{key: {default: has_any(list)}}.
        • +
        • intersect (logical statement, default None) – If a logical statement is given here then it will be used as an +implied intersection of all logical conditions given in the +mapper.
        • +
        • inplace (bool, default True) – If True, the DataSet will be modified inplace with new/updated +columns. Will return a new recoded pandas.Series instance if +False.
        • +
        +
        Returns:

        Either the DataSet._data is modified inplace or a new +pandas.Series is returned.

        +
        Return type:

        None or uncode_series

        +
        +
        + +
        +
        +undimensionize(names=None, mapper_to_meta=False)
        +

        Rename the dataset columns to remove Dimensions compatibility.

        +
        + +
        +
        +undimensionizing_mapper(names=None)
        +

        Return a renaming dataset mapper for un-dimensionizing names.

        + +++ + + + + + + + +
        Parameters:None
        Returns:mapper – A renaming mapper in the form of a dict of {old: new} that +maps Dimensions naming conventions to non-Dimensions naming +conventions.
        Return type:dict
        +
        + +
        +
        +unify_values(name, code_map, slicer=None, exclusive=False)
        +

        Use a mapping of old to new codes to replace code values in _data.

        +
        +

        Note

        +

        Experimental! Check results carefully!

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in meta['columns'].
        • +
        • code_map (dict) – A mapping of {old: new}; old and new must be the +int-type code values from the column meta data.
        • +
        • slicer (Quantipy logic statement, default None) – If provided, the values will only be unified for cases where the +condition holds.
        • +
        • exclusive (bool, default False) – If True, the recoded unified value will replace whatever is already +found in the _data column, ignoring delimited set typed data +to which normally would get appended to.
        • +
        +
        Returns:

        +
        Return type:

        None

        +
        +
        + +
        +
        +unroll(varlist, keep=None, both=None)
        +

        Replace mask with their items, optionally excluding/keeping certain ones.

        + +++ + + + + + + + +
        Parameters:
          +
        • varlist (list) – A list of meta 'columns' and/or 'masks' names.
        • +
        • keep (str or list, default None) – The names of masks that will not be replaced with their items.
        • +
        • both ('all', str or list of str, default None) – The names of masks that will be included both as themselves and as +collections of their items.
        • +
        +
        Returns:

        unrolled – The modified varlist.

        +
        Return type:

        list

        +
        +
        + +
        +
        +update(data, on='identity')
        +

        Update the DataSet with the case data entries found in data.

        + +++ + + + + + + + +
        Parameters:
          +
        • data (pandas.DataFrame) – A dataframe that contains a subset of columns from the DataSet +case data component.
        • +
        • on (str, default 'identity') – The column to use as a join key.
        • +
        +
        Returns:

        DataSet is modified inplace.

        +
        Return type:

        None

        +
        +
        + +
        +
        +validate(spss_limits=False, verbose=True)
        +

        Identify and report inconsistencies in the DataSet instance.

        +
        +
        name:
        +
        column/mask name and meta[collection][var]['name'] are not identical
        +
        q_label:
        +
        text object is badly formatted or has empty text mapping
        +
        values:
        +
        categorical variable does not contain values, value text is badly +formatted or has empty text mapping
        +
        text_keys:
        +
        dataset.text_key is not included or existing text keys are not +consistent (also for parents)
        +
        source:
        +
        parents or items do not exist
        +
        codes:
        +
        codes in data component are not included in meta component
        +
        spss limit name:
        +
        length of name is greater than spss limit (64 characters) +(only shown if spss_limits=True)
        +
        spss limit q_label:
        +
        length of q_label is greater than spss limit (256 characters) +(only shown if spss_limits=True)
        +
        spss limit values:
        +
        length of any value text is greater than spss limit (120 characters) +(only shown if spss_limits=True)
        +
        +
        + +
        +
        +value_texts(name, text_key=None, axis_edit=None)
        +

        Get categorical data’s text information.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'].
        • +
        • text_key (str, default None) – The text_key that should be used when taking labels from the +source meta.
        • +
        • axis_edit ({'x', 'y'}, default None) – If provided the text_key is taken from the x/y edits dict.
        • +
        +
        Returns:

        texts – The list of category texts.

        +
        Return type:

        list

        +
        +
        + +
        +
        +values(name, text_key=None, axis_edit=None)
        +

        Get categorical data’s paired code and texts information from the meta.

        + +++ + + + + + + + +
        Parameters:
          +
        • name (str) – The column variable name keyed in _meta['columns'] or +_meta['masks'].
        • +
        • text_key (str, default None) – The text_key that should be used when taking labels from the +source meta.
        • +
        • axis_edit ({'x', 'y'}, default None) – If provided the text_key is taken from the x/y edits dict.
        • +
        +
        Returns:

        values – The list of the numerical category codes and their texts +packed as tuples.

        +
        Return type:

        list of tuples

        +
        +
        + +
        +
        +variables(setname='data file', numeric=True, string=True, date=True, boolean=True, blacklist=None)
        +

        View all DataSet variables listed in their global order.

        + +++ + + + + + + + +
        Parameters:
          +
        • setname (str, default 'data file') – The name of the variable set to query. Defaults to the main +variable collection stored via ‘data file’.
        • +
        • numeric (bool, default True) – Include int and float type variables?
        • +
        • string (bool, default True) – Include string type variables?
        • +
        • date (bool, default True) – Include date type variables?
        • +
        • boolean (bool, default True) – Include boolean type variables?
        • +
        • blacklist (list, default None) – A list of variables names to exclude from the variable listing.
        • +
        +
        Returns:

        varlist – The list of variables registered in the queried set.

        +
        Return type:

        list

        +
        +
        + +
        +
        +vmerge(dataset, on=None, left_on=None, right_on=None, row_id_name=None, left_id=None, right_id=None, row_ids=None, overwrite_text=False, from_set=None, uniquify_key=None, reset_index=True, inplace=True, verbose=True)
        +

        Merge Quantipy datasets together by appending rows.

        +

        This function merges two Quantipy datasets together, updating variables +that exist in the left dataset and appending others. New variables +will be appended in the order indicated by the ‘data file’ set if +found, otherwise they will be appended in alphanumeric order. This +merge happens vertically (row-wise).

        + +++ + + + + + + + +
        Parameters:
          +
        • dataset ((A list of multiple) quantipy.DataSet) – One or multiple datasets to merge into the current DataSet.
        • +
        • on (str, default=None) – The column to use to identify unique rows in both datasets.
        • +
        • left_on (str, default=None) – The column to use to identify unique in the left dataset.
        • +
        • right_on (str, default=None) – The column to use to identify unique in the right dataset.
        • +
        • row_id_name (str, default=None) – The named column will be filled with the ids indicated for each +dataset, as per left_id/right_id/row_ids. If meta for the named +column doesn’t already exist a new column definition will be +added and assigned a reductive-appropriate type.
        • +
        • left_id (str/int/float, default=None) – Where the row_id_name column is not already populated for the +dataset_left, this value will be populated.
        • +
        • right_id (str/int/float, default=None) – Where the row_id_name column is not already populated for the +dataset_right, this value will be populated.
        • +
        • row_ids (list of str/int/float, default=None) – When datasets has been used, this list provides the row ids +that will be populated in the row_id_name column for each of +those datasets, respectively.
        • +
        • overwrite_text (bool, default=False) – If True, text_keys in the left meta that also exist in right +meta will be overwritten instead of ignored.
        • +
        • from_set (str, default=None) – Use a set defined in the right meta to control which columns are +merged from the right dataset.
        • +
        • uniquify_key (str, default None) – A int-like column name found in all the passed DataSet objects +that will be protected from having duplicates. The original version +of the column will be kept under its name prefixed with ‘original’.
        • +
        • reset_index (bool, default=True) – If True pandas.DataFrame.reindex() will be applied to the merged +dataframe.
        • +
        • inplace (bool, default True) – If True, the DataSet will be modified inplace with new/updated +rows. Will return a new DataSet instance if False.
        • +
        • verbose (bool, default=True) – Echo progress feedback to the output pane.
        • +
        +
        Returns:

        None or new_dataset – If the merge is not applied inplace, a DataSet instance +is returned.

        +
        Return type:

        quantipy.DataSet

        +
        +
        + +
        +
        +weight(weight_scheme, weight_name='weight', unique_key='identity', subset=None, report=True, path_report=None, inplace=True, verbose=True)
        +

        Weight the DataSet according to a well-defined weight scheme.

        + +++ + + + + + + + +
        Parameters:
          +
        • weight_scheme (quantipy.Rim instance) – A rim weights setup with defined targets. Can include multiple +weight groups and/or filters.
        • +
        • weight_name (str, default 'weight') – A name for the float variable that is added to pick up the weight +factors.
        • +
        • unique_key (str, default 'identity'.) – A variable inside the DataSet instance that will be used to +the map individual case weights to their matching rows.
        • +
        • subset (Quantipy complex logic expression) – A logic to filter the DataSet, weighting only the remaining subset.
        • +
        • report (bool, default True) – If True, will report a summary of the weight algorithm run +and factor outcomes.
        • +
        • path_report (str, default None) – A file path to save an .xlsx version of the weight report to.
        • +
        • inplace (bool, default True) – If True, the weight factors are merged back into the DataSet +instance. Will otherwise return the pandas.DataFrame that +contains the weight factors, the unique_key and all variables +that have been used to compute the weights (filters, target +variables, etc.).
        • +
        +
        Returns:

        Will either create a new column called 'weight' in the +DataSet instance or return a DataFrame that contains +the weight factors.

        +
        Return type:

        None or pandas.DataFrame

        +
        +
        + +
        +
        +write_dimensions(path_mdd=None, path_ddf=None, text_key=None, run=True, clean_up=True)
        +

        Build Dimensions/SPSS Base Professional .ddf/.mdd data pairs.

        +
        +

        Note

        +

        SPSS Data Collection Base Professional must be installed on +the machine. The method is creating .mrs and .dms scripts which are +executed through the software’s API.

        +
        + +++ + + + + + + + +
        Parameters:
          +
        • path_mdd (str, default None) – The full path (optionally with extension '.mdd', otherwise +assumed as such) for the saved the DataSet._meta component. +If not provided, the instance’s name and `path attributes +will be used to determine the file location.
        • +
        • path_ddf (str, default None) – The full path (optionally with extension '.ddf', otherwise +assumed as such) for the saved DataSet._data component. +If not provided, the instance’s name and `path attributes +will be used to determine the file location.
        • +
        • text_key (str, default None) – The desired text_key for all text label information. Uses +the DataSet.text_key information if not provided.
        • +
        • run (bool, default True) – If True, the method will try to run the metadata creating .mrs +script and execute a DMSRun for the case data transformation in +the .dms file.
        • +
        • clean_up (bool, default True) – By default, all helper files from the conversion (.dms, .mrs, +paired .csv files, etc.) will be deleted after the process has +finished.
        • +
        +
        Returns:

        +
        Return type:

        A .ddf/.mdd pair is saved at the provided path location.

        +
        +
        + +
        +
        +write_quantipy(path_meta=None, path_data=None)
        +

        Write the data and meta components to .csv/.json files.

        +

        The resulting files are well-defined native Quantipy source files.

        + +++ + + + + + + + +
        Parameters:
          +
        • path_meta (str, default None) – The full path (optionally with extension '.json', otherwise +assumed as such) for the saved the DataSet._meta component. +If not provided, the instance’s name and `path attributes +will be used to determine the file location.
        • +
        • path_data (str, default None) – The full path (optionally with extension '.csv', otherwise +assumed as such) for the saved DataSet._data component. +If not provided, the instance’s name and `path attributes +will be used to determine the file location.
        • +
        +
        Returns:

        +
        Return type:

        A .csv/.json pair is saved at the provided path location.

        +
        +
        + +
        +
        +write_spss(path_sav=None, index=True, text_key=None, mrset_tag_style='__', drop_delimited=True, from_set=None, verbose=True)
        +

        Convert the Quantipy DataSet into a SPSS .sav data file.

        + +++ + + + + + + + +
        Parameters:
          +
        • path_sav (str, default None) – The full path (optionally with extension '.json', otherwise +assumed as such) for the saved the DataSet._meta component. +If not provided, the instance’s name and `path attributes +will be used to determine the file location.
        • +
        • index (bool, default False) – Should the index be inserted into the dataframe before the +conversion happens?
        • +
        • text_key (str, default None) – The text_key that should be used when taking labels from the +source meta. If the given text_key is not found for any +particular text object, the DataSet.text_key will be used +instead.
        • +
        • mrset_tag_style (str, default '__') – The delimiting character/string to use when naming dichotomous +set variables. The mrset_tag_style will appear between the +name of the variable and the dichotomous variable’s value name, +as taken from the delimited set value that dichotomous +variable represents.
        • +
        • drop_delimited (bool, default True) – Should Quantipy’s delimited set variables be dropped from +the export after being converted to dichotomous sets/mrsets?
        • +
        • from_set (str) – The set name from which the export should be drawn.
        • +
        +
        Returns:

        +
        Return type:

        A SPSS .sav file is saved at the provided path location.

        +
        +
        + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/API/_build/html/sites/api_ref/QuantipyViews.html b/docs/API/_build/html/sites/api_ref/QuantipyViews.html index 1e9618ed7..74a7eb22b 100644 --- a/docs/API/_build/html/sites/api_ref/QuantipyViews.html +++ b/docs/API/_build/html/sites/api_ref/QuantipyViews.html @@ -92,9 +92,18 @@

        Quick search

      Parameters:
        -
      • link (Quantipy Link object.) –
      • -
      • name (str) – The shortname applied to the view.
      • -
      • kwargs (dict) –
      • -
      • arguments (specific) (Keyword) –
      • -
      • text (str, optional, default None) – Sets an optional label in the meta component of the view that is +
      • link (Quantipy Link object.) –
      • +
      • name (str) – The shortname applied to the view.
      • +
      • kwargs (dict) –
      • +
      • arguments (specific) (Keyword) –
      • +
      • text (str, optional, default None) – Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint).
      • -
      • metric ({'props', 'means'}, default 'props') – Determines whether a proportion or means test algorithm is +
      • metric ({'props', 'means'}, default 'props') – Determines whether a proportion or means test algorithm is performed.
      • -
      • test_total (bool, deafult False) – If True, the each View’s y-axis column will be tested against the +
      • test_total (bool, deafult False) – If True, the each View’s y-axis column will be tested against the uncoditional total of its x-axis.
      • -
      • mimic ({'Dim', 'askia'}, default 'Dim') – It is possible to mimic the test logics used in other statistical +
      • mimic ({'Dim', 'askia'}, default 'Dim') – It is possible to mimic the test logics used in other statistical software packages by passing them as instructions. The method will then choose the appropriate test parameters.
      • -
      • level ({'high', 'mid', 'low'} or float) – Sets the level of significance to which the test is carried out. +
      • level ({'high', 'mid', 'low'} or float) – Sets the level of significance to which the test is carried out. Given as str the levels correspond to 'high' = 0.01, 'mid' = 0.05 and 'low' = 0.1. If a float is passed the specified level will be used.
      • -
      • flags (list of two int, default None) – Base thresholds for Dimensions-like tests, e.g. [30, 100]. First +
      • flags (list of two int, default None) – Base thresholds for Dimensions-like tests, e.g. [30, 100]. First int is minimum base for reported results, second int controls small base indication.
      Returns:

        -
      • None – Adds requested View to the Stack, storing it under the full +
      • None – Adds requested View to the Stack, storing it under the full view name notation key.
      • -
      • .. note:: – Mimicking the askia software (mimic = 'askia') +
      • .. note:: – Mimicking the askia software (mimic = 'askia') restricts the values to be one of 'high', 'low', 'mid'. Any other value passed will make the algorithm fall back to 'low'. Mimicking Dimensions (mimic = @@ -416,9 +426,9 @@

        QuantipyViews

      Parameters:
        -
      • link (Quantipy Link object.) –
      • -
      • name (str) – The shortname applied to the view.
      • -
      • kwargs (dict) –
      • +
      • link (Quantipy Link object.) –
      • +
      • name (str) – The shortname applied to the view.
      • +
      • kwargs (dict) –
      Parameters:
        -
      • link (Quantipy Link object.) –
      • -
      • name (str) – The shortname applied to the view.
      • -
      • kwargs (dict) –
      • -
      • arguments (specific) (Keyword) –
      • -
      • text (str, optional, default None) – Sets an optional label suffix for the meta component of the view +
      • link (Quantipy Link object.) –
      • +
      • name (str) – The shortname applied to the view.
      • +
      • kwargs (dict) –
      • +
      • arguments (specific) (Keyword) –
      • +
      • text (str, optional, default None) – Sets an optional label suffix for the meta component of the view which will be appended to the statistic name and used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint).
      • -
      • stats (str, default 'mean') – The measure to compute.
      • -
      • exclude (list of int) – Codes that will not be considered calculating the result.
      • -
      • rescale (dict) –

        A mapping of {old code: new code}, e.g.:

        +
      • stats (str, default 'mean') – The measure to compute.
      • +
      • exclude (list of int) – Codes that will not be considered calculating the result.
      • +
      • rescale (dict) –

        A mapping of {old code: new code}, e.g.:

      @@ -494,14 +504,14 @@

      QuantipyViews

      Parameters:
        -
      • link (Quantipy Link object.) –
      • -
      • name (str) – The shortname applied to the view.
      • -
      • kwargs (dict) –
      • -
      • arguments (specific) (Keyword) –
      • -
      • text (str, optional, default None) – Sets an optional label in the meta component of the view that is +
      • link (Quantipy Link object.) –
      • +
      • name (str) – The shortname applied to the view.
      • +
      • kwargs (dict) –
      • +
      • arguments (specific) (Keyword) –
      • +
      • text (str, optional, default None) – Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint).
      • -
      • logic (list of int, list of dicts or core.tools.view.logic operation) –

        If a list is passed this instructs a simple net of the codes given +

      • logic (list of int, list of dicts or core.tools.view.logic operation) –

        If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. @@ -517,15 +527,15 @@

        QuantipyViews

      Returns:

        -
      • None – Adds requested View to the Stack, storing it under the full +
      • None – Adds requested View to the Stack, storing it under the full view name notation key.
      • -
      • .. note:: Net codes take into account if a variable is – multi-coded. The net will therefore consider qualifying +
      • .. note:: Net codes take into account if a variable is – multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases.
      @@ -591,7 +601,7 @@

      QuantipyViews - + diff --git a/docs/API/_build/html/sites/api_ref/Rim_scheme.html b/docs/API/_build/html/sites/api_ref/Rim_scheme.html index d5a35b9fc..a61efd058 100644 --- a/docs/API/_build/html/sites/api_ref/Rim_scheme.html +++ b/docs/API/_build/html/sites/api_ref/Rim_scheme.html @@ -37,7 +37,7 @@ - + @@ -92,9 +92,18 @@

      Quick search

      Parameters:
        -
      • name (str) – Name of the weight group.
      • -
      • filter_def (str, optional) – An optional filter defintion given as a boolean expression in +
      • name (str) – Name of the weight group.
      • +
      • filter_def (str, optional) – An optional filter defintion given as a boolean expression in string format. Must be a valid input for the pandas DataFrame.query() method.
      • -
      • targets (dict) – Dictionary mapping of DataFrame columns to target proportion list.
      • +
      • targets (dict) – Dictionary mapping of DataFrame columns to target proportion list.
      - + @@ -413,8 +423,8 @@

      Rim

      @@ -436,7 +446,7 @@

      Rim

      - @@ -456,7 +466,7 @@

      Rim - Next + Next Previous @@ -500,7 +510,7 @@

      Rim - + diff --git a/docs/API/_build/html/sites/api_ref/View.html b/docs/API/_build/html/sites/api_ref/View.html index 215373c15..226848b81 100644 --- a/docs/API/_build/html/sites/api_ref/View.html +++ b/docs/API/_build/html/sites/api_ref/View.html @@ -38,7 +38,7 @@ - + @@ -92,9 +92,18 @@

      Quick search

      Parameters:group_targets (dict) – A dictionary mapping of group names to the desired proportions.
      Parameters:group_targets (dict) – A dictionary mapping of group names to the desired proportions.
      Returns:
      Parameters:
        -
      • targets (dict or list of dict) – Dictionary mapping of DataFrame columns to target proportion list.
      • -
      • group_name (str, optional) – A name for the simple weight (group) created.
      • +
      • targets (dict or list of dict) – Dictionary mapping of DataFrame columns to target proportion list.
      • +
      • group_name (str, optional) – A name for the simple weight (group) created.
      Returns:df – A summary of missing entries and (rounded) mean/mode/median of +
      Returns:df – A summary of missing entries and (rounded) mean/mode/median of value codes per target variable.
      Return type:pandas.DataFrame
      - + @@ -369,12 +379,12 @@

      View
      get_std_params()
      -

      Provides the View’s standard kwargs with fallbacks to default values.

      +

      Provides the View’s standard kwargs with fallbacks to default values.

      Returns:edit_params – A tuple of kwargs controlling the following supported Link data -edits: logic, calc, ...
      Returns:edit_params – A tuple of kwargs controlling the following supported Link data +edits: logic, calc, …
      Return type:tuple
      - @@ -452,12 +462,12 @@

      View
      meta()
      -

      Get a summary on a View’s meta information.

      +

      Get a summary on a View’s meta information.

      Returns:std_parameters – A tuple of the common kwargs controlling the general View method +
      Returns:std_parameters – A tuple of the common kwargs controlling the general View method behaviour: axis, relation, rel_to, weights, text
      Return type:tuple
      - + @@ -480,15 +490,15 @@

      View
      notation(method, condition)
      -

      Generate the View’s Stack key notation string.

      +

      Generate the View’s Stack key notation string.

      Returns:viewmeta – A dictionary that contains global aggregation information.
      Returns:viewmeta – A dictionary that contains global aggregation information.
      Return type:dict
      - - + @@ -505,14 +515,14 @@

      View
      spec_condition(link, conditionals=None, expand=None)
      -

      Updates the View notation’s condition component based on agg. details.

      +

      Updates the View notation’s condition component based on agg. details.

      Parameters:shortname, relation (aggname,) – Strings for the aggregation name, the method’s shortname and the +
      Parameters:shortname, relation (aggname,) – Strings for the aggregation name, the method’s shortname and the relation component of the View notation.
      Returns:notation – The View notation.
      Returns:notation – The View notation.
      Return type:str
      - + - + @@ -540,7 +550,7 @@

      ViewNext - Previous + Previous @@ -581,7 +591,7 @@

      View - + diff --git a/docs/API/_build/html/sites/api_ref/ViewMapper.html b/docs/API/_build/html/sites/api_ref/ViewMapper.html index ddd24f0f5..cbebbcad6 100644 --- a/docs/API/_build/html/sites/api_ref/ViewMapper.html +++ b/docs/API/_build/html/sites/api_ref/ViewMapper.html @@ -91,9 +91,18 @@

      Quick search

      @@ -387,8 +396,8 @@

      ViewMapper

      @@ -411,8 +420,8 @@

      ViewMapper

      @@ -479,7 +488,7 @@

      ViewMapper - + diff --git a/docs/API/_build/html/sites/api_ref/quantify_engine.html b/docs/API/_build/html/sites/api_ref/quantify_engine.html index 400248b0c..fa88687ea 100644 --- a/docs/API/_build/html/sites/api_ref/quantify_engine.html +++ b/docs/API/_build/html/sites/api_ref/quantify_engine.html @@ -92,9 +92,18 @@

      Quick search

      Parameters:link (Link) –
      Parameters:link (Link) –
      Returns:relation_string – The relation part of the View name notation.
      Returns:relation_string – The relation part of the View name notation.
      Return type:str
      Parameters:
        -
      • name (str) – The short name of the View.
      • -
      • method (view method) – The view method that will be used to derivce the result
      • -
      • kwargs (dict) – The keyword arguments needed by the view method.
      • -
      • template (dict) – A ViewMapper template that contains information on view method and +
      • name (str) – The short name of the View.
      • +
      • method (view method) – The view method that will be used to derivce the result
      • +
      • kwargs (dict) – The keyword arguments needed by the view method.
      • +
      • template (dict) – A ViewMapper template that contains information on view method and kwargs values to iterate over.
      Parameters:
        -
      • method ({'frequency', 'descriptives', 'coltests'}) – The baseline view method to be used.
      • -
      • iterators (dict) – A dictionary mapping of view method kwargs to lists of values.
      • +
      • method ({'frequency', 'descriptives', 'coltests'}) – The baseline view method to be used.
      • +
      • iterators (dict) – A dictionary mapping of view method kwargs to lists of values.
      Parameters:
        -
      • views (list of str) – The selection of View names to keep.
      • -
      • strict_selection (bool, default True) – TODO
      • +
      • views (list of str) – The selection of View names to keep.
      • +
      • strict_selection (bool, default True) – TODO
      - @@ -484,14 +496,14 @@

      quantify.engine
      rescale(scaling, drop=False)
      -

      Modify the object’s xdef property reflecting new value defintions.

      +

      Modify the object’s xdef property reflecting new value defintions.

      Parameters:
        -
      • axis ({None, 'x', 'y'}, deafult None) – When axis is None, the frequency of all cells from the uni- or +
      • axis ({None, 'x', 'y'}, deafult None) – When axis is None, the frequency of all cells from the uni- or multivariate distribution is presented. If the axis is specified -to be either ‘x’ or ‘y’ the margin per axis becomes the resulting +to be either ‘x’ or ‘y’ the margin per axis becomes the resulting aggregation.
      • -
      • raw_sum (bool, default False) – If True will perform a simple summation over the cells given the +
      • raw_sum (bool, default False) – If True will perform a simple summation over the cells given the axis parameter. This ignores net counting of qualifying answers in favour of summing over all answers given when considering margins.
      • -
      • cum_sum (bool, default False) – If True a cumulative sum of the elements along the given axis is +
      • cum_sum (bool, default False) – If True a cumulative sum of the elements along the given axis is returned.
      • -
      • margin (bool, deafult True) – Controls whether the margins of the aggregation result are shown. +
      • effective (bool, default False) – If True, compute effective counts instead of traditional (weighted) +counts.
      • +
      • margin (bool, deafult True) – Controls whether the margins of the aggregation result are shown. This also applies to margin aggregations themselves, since they contain a margin in (form of the total number of cases) as well.
      • -
      • as_df (bool, default True) – Controls whether the aggregation is transformed into a Quantipy- +
      • as_df (bool, default True) – Controls whether the aggregation is transformed into a Quantipy- multiindexed (following the Question/Values convention) pandas.DataFrame or will be left in its numpy.array format.
      @@ -402,7 +414,7 @@

      quantify.engine
      exclude(codes, axis='x')
      -

      Wrapper for _missingfy(...keep_codes=False, ..., keep_base=False, ...) +

      Wrapper for _missingfy(…keep_codes=False, …, keep_base=False, …) Excludes specified codes from aggregation.

      @@ -421,18 +433,18 @@

      quantify.engine

      Parameters:
        -
      • groups (list, dict of lists or logic expression) –

        The group/net code defintion(s) in form of...

        +
      • groups (list, dict of lists or logic expression) –

        The group/net code defintion(s) in form of…

        • a simple list: [1, 2, 3]
        • a dict of list: {'grp A': [1, 2, 3], 'grp B': [4, 5, 6]}
        • a logical expression: not_any([1, 2])
      • -
      • axis ({'x', 'y'}, default 'x') – The axis to group codes on.
      • -
      • expand ({None, 'before', 'after'}, default None) – If 'before', the codes that are grouped will be kept and placed +
      • axis ({'x', 'y'}, default 'x') – The axis to group codes on.
      • +
      • expand ({None, 'before', 'after'}, default None) – If 'before', the codes that are grouped will be kept and placed before the grouped aggregation; vice versa for 'after'. Ignored on logical expressions found in groups.
      • -
      • complete (bool, default False) – If True, codes that define the Link on the given axis but are +
      • complete (bool, default False) – If True, codes that define the Link on the given axis but are not present in the groups defintion(s) will be placed in their natural position within the aggregation, respecting the value of expand.
      • @@ -452,7 +464,7 @@

        quantify.engine
        limit(codes, axis='x')
        -

        Wrapper for _missingfy(...keep_codes=True, ..., keep_base=True, ...) +

        Wrapper for _missingfy(…keep_codes=True, …, keep_base=True, …) Restrict the data matrix entires to contain the specified codes only.

        @@ -464,7 +476,7 @@

        quantify.engine

      Parameters:on ({'y', 'x'}, default 'y') – Defines the base to normalize the result on. 'y' will +
      Parameters:on ({'y', 'x'}, default 'y') – Defines the base to normalize the result on. 'y' will produce column percentages, 'x' will produce row percentages.
      @@ -514,14 +526,14 @@

      quantify.engine

      @@ -597,16 +647,16 @@

      Stack

      -
      Parameters:
        -
      • scaling (dict) – Mapping of old_code: new_code, given as of type int or float.
      • -
      • drop (bool, default False) – If True, codes not included in the scaling dict will be excluded.
      • +
      • scaling (dict) – Mapping of old_code: new_code, given as of type int or float.
      • +
      • drop (bool, default False) – If True, codes not included in the scaling dict will be excluded.
      Parameters:
        -
      • stat ({'summary', 'mean', 'median', 'var', 'stddev', 'sem', varcoeff',) – ‘min’, ‘lower_q’, ‘upper_q’, ‘max’}, default ‘summary’ +
      • stat ({'summary', 'mean', 'median', 'var', 'stddev', 'sem', varcoeff',) – ‘min’, ‘lower_q’, ‘upper_q’, ‘max’}, default ‘summary’ The measure to calculate. Defaults to a summary output of the most important sample statistics.
      • -
      • axis ({'x', 'y'}, default 'x') – The axis which is reduced in the aggregation, e.g. column vs. row +
      • axis ({'x', 'y'}, default 'x') – The axis which is reduced in the aggregation, e.g. column vs. row means.
      • -
      • margin (bool, default True) – Controls whether statistic(s) of the marginal distribution are +
      • margin (bool, default True) – Controls whether statistic(s) of the marginal distribution are shown.
      • -
      • as_df (bool, default True) – Controls whether the aggregation is transformed into a Quantipy- +
      • as_df (bool, default True) – Controls whether the aggregation is transformed into a Quantipy- multiindexed (following the Question/Values convention) pandas.DataFrame or will be left in its numpy.array format.
      @@ -541,20 +553,20 @@

      quantify.engine
      swap(var, axis='x', update_axis_def=True, inplace=True)
      -

      Change the Quantity’s x- or y-axis keeping filter and weight setup.

      +

      Change the Quantity’s x- or y-axis keeping filter and weight setup.

      All edits and aggregation results will be removed during the swap.

      @@ -594,7 +606,7 @@

      quantify.engine run()

      Performs the testing algorithm and creates an output pd.DataFrame.

      -

      The output is indexed according to Quantipy’s Questions->Values +

      The output is indexed according to Quantipy’s Questions->Values convention. Significant results between columns are presented as lists of integer y-axis codes where the column with the higher value is holding the codes of the columns with the lower values. NaN is @@ -626,7 +638,7 @@

      quantify.engine set_params(test_total=False, level='mid', mimic='Dim', testtype='pooled', use_ebase=True, ovlp_correc=True, cwi_filter=False, flag_bases=None)

      Sets the test algorithm parameters and defines the type of test.

      -

      This method sets the test’s global parameters and derives the +

      This method sets the test’s global parameters and derives the necessary measures for the computation of the test statistic. The default values correspond to the SPSS Dimensions Column Tests algorithms that control for bias introduced by weighting and @@ -640,21 +652,21 @@

      quantify.engine

      @@ -395,14 +410,14 @@

      Stack

      @@ -419,15 +434,15 @@

      Stack
      -add_nets(on_vars, net_map, expand=None, calc=None, text_prefix='Net:', checking_cluster=None, _batches='all', verbose=True)
      +add_nets(on_vars, net_map, expand=None, calc=None, text_prefix='Net:', checking_cluster=None, _batches='all', recode='auto', verbose=True)

      Add a net-like view to a specified collection of x keys of the stack.

      Parameters:
        -
      • var (str) – New variable’s name used in axis swap.
      • -
      • axis ({‘x’, ‘y’}, default 'x') – The axis to swap.
      • -
      • update_axis_def (bool, default False) – If self is of type 'array', the name and item definitions +
      • var (str) – New variable’s name used in axis swap.
      • +
      • axis ({‘x’, ‘y’}, default 'x') – The axis to swap.
      • +
      • update_axis_def (bool, default False) – If self is of type 'array', the name and item definitions (that are e.g. used in the to_df() method) can be updated to -reflect the swapped axis variable or kept to show the original’s +reflect the swapped axis variable or kept to show the original’s ones.
      • -
      • inplace (bool, default True) – Whether to modify the Quantity inplace or return a new instance.
      • +
      • inplace (bool, default True) – Whether to modify the Quantity inplace or return a new instance.
      Parameters:
      Parameters:
        -
      • data_key (str) – The reference name for a data source connected to the Stack.
      • -
      • data (pandas.DataFrame) – The input (case) data source.
      • -
      • meta (dict or OrderedDict) – A quantipy compatible metadata source that describes the case data.
      • +
      • data_key (str) – The reference name for a data source connected to the Stack.
      • +
      • data (pandas.DataFrame) – The input (case) data source.
      • +
      • meta (dict or OrderedDict) – A quantipy compatible metadata source that describes the case data.
      Parameters:
        -
      • data_keys (str, optional) – The data_key to be added to. If none is given, the method will try +
      • data_keys (str, optional) – The data_key to be added to. If none is given, the method will try to add to all data_keys found in the Stack.
      • -
      • filters (list of str describing filter defintions, default ['no_filter']) – The string must be a valid input for the +
      • filters (list of str describing filter defintions, default ['no_filter']) – The string must be a valid input for the pandas.DataFrame.query() method.
      • -
      • y (x,) – The x and y variables to constrcut Links from.
      • -
      • views (list of view method names.) – Can be any of Quantipy’s preset Views or the names of created +
      • y (x,) – The x and y variables to constrcut Links from.
      • +
      • views (list of view method names.) – Can be any of Quantipy’s preset Views or the names of created view method specifications.
      • -
      • weights (list, optional) – The names of weight variables to consider in the data aggregation +
      • weights (list, optional) – The names of weight variables to consider in the data aggregation process. Weight variables must be of type float.
      @@ -474,31 +496,34 @@

      Stack
      -add_stats(on_vars, stats=['mean'], other_source=None, rescale=None, drop=True, exclude=None, factor_labels=True, custom_text=None, checking_cluster=None, _batches='all', verbose=True)
      +add_stats(on_vars, stats=['mean'], other_source=None, rescale=None, drop=True, exclude=None, factor_labels=True, custom_text=None, checking_cluster=None, _batches='all', recode=False, verbose=True)

      Add a descriptives view to a specified collection of xks of the stack.

      -

      Valid descriptives views: {‘mean’, ‘stddev’, ‘min’, ‘max’, ‘median’, ‘sem’}

      +

      Valid descriptives views: {‘mean’, ‘stddev’, ‘min’, ‘max’, ‘median’, ‘sem’}

      Parameters:
      @@ -521,7 +546,7 @@

      Stack

      - @@ -534,22 +559,25 @@

      Stack
      -aggregate(views, unweighted_base=True, categorize=[], batches='all', xs=None, verbose=True)
      +aggregate(views, unweighted_base=True, categorize=[], batches='all', xs=None, bases={}, verbose=True)

      Add views to all defined qp.Link in qp.Stack.

      Parameters:
        -
      • on_vars (list) – The list of x variables to add the view to.
      • -
      • stats (list of str, default ['mean']) – The metrics to compute and add as a view.
      • -
      • other_source (str) – If provided the Link’s x-axis variable will be swapped with the +
      • on_vars (list) – The list of x variables to add the view to.
      • +
      • stats (list of str, default ['mean']) – The metrics to compute and add as a view.
      • +
      • other_source (str) – If provided the Link’s x-axis variable will be swapped with the (numerical) variable provided. This can be used to attach statistics of a different variable to a Link definition.
      • -
      • rescale (dict) – A dict that maps old to new codes, e.g. {1: 5, 2: 4, 3: 3, 4: 2, 5: 1}
      • -
      • drop (bool, default True) – If rescale is provided all codes that are not mapped will be +
      • rescale (dict) – A dict that maps old to new codes, e.g. {1: 5, 2: 4, 3: 3, 4: 2, 5: 1}
      • +
      • drop (bool, default True) – If rescale is provided all codes that are not mapped will be ignored in the computation.
      • -
      • exclude (list) – Codes/values to ignore in the computation.
      • -
      • factor_lables (bool, default True) – If True, will write the (rescaled) factor values next to the -category text label.
      • -
      • custom_text (str, default None) – A custom string affix to put at the end of the requested statistics’ +
      • exclude (list) – Codes/values to ignore in the computation.
      • +
      • factor_labels (bool / str, default True) – Writes the (rescaled) factor values next to the category text label. +If True, square-brackets are used. If ‘()’, normal brackets are used.
      • +
      • custom_text (str, default None) – A custom string affix to put at the end of the requested statistics’ names.
      • -
      • checking_cluster (quantipy.Cluster, default None) – When provided, an automated checking aggregation will be added to the +
      • checking_cluster (quantipy.Cluster, default None) – When provided, an automated checking aggregation will be added to the Cluster instance.
      • -
      • _batches (str or list of str) – Only for qp.Links that are defined in this qp.Batch +
      • _batches (str or list of str) – Only for qp.Links that are defined in this qp.Batch instances views are added.
      • +
      • recode (bool, default False) – Create a new variable that contains only the values +which are needed for the stat computation. The values and the included +data will be rescaled.
      Parameters:_batches (str or list of str) – Only for qp.Links that are defined in this qp.Batch +
      Parameters:_batches (str or list of str) – Only for qp.Links that are defined in this qp.Batch instances views are added.
      Returns:
      @@ -563,6 +591,28 @@

      Stack

      Parameters:
        -
      • views (str or list of str or qp.ViewMapper) – views that are added.
      • -
      • unweighted_base (bool, default True) – If True, unweighted ‘cbase’ is added to all non-arrays.
      • -
      • categorize (str or list of str) – Determines how numerical data is handled: If provided, the +
      • views (str or list of str or qp.ViewMapper) – views that are added.
      • +
      • unweighted_base (bool, default True) – If True, unweighted ‘cbase’ is added to all non-arrays. +This parameter will be deprecated in future, please use bases +instead.
      • +
      • categorize (str or list of str) – Determines how numerical data is handled: If provided, the variables will get counts and percentage aggregations ('counts', 'c%') alongside the 'cbase' view. If False, only 'cbase' views are generated for non-categorical types.
      • -
      • batches (str/ list of str, default 'all') – Name(s) of qp.Batch instance(s) that are used to aggregate the +
      • batches (str/ list of str, default 'all') – Name(s) of qp.Batch instance(s) that are used to aggregate the qp.Stack.
      • -
      • xs (list of str) – Names of variable, for which views are added.
      • +
      • xs (list of str) – Names of variable, for which views are added.
      • +
      • bases (dict) – Defines which bases should be aggregated, weighted or unweighted.
      +
      +
      +apply_meta_edits(batch_name, data_key, filter_key=None, freeze=False)
      +

      Take over meta_edits from Batch definitions.

      + +++ + + + +
      Parameters:
        +
      • batch_name (str) – Name of the Batch whose meta_edits are taken.
      • +
      • data_key (str) – Accessing this metadata: self[data_key].meta +Batch definitions are takes from here and this metadata is modified.
      • +
      • filter_key (str, default None) – Currently not implemented! +Accessing this metadata: self[data_key][filter_key].meta +Batch definitions are takes from here and this metadata is modified.
      • +
      +
      +
      +
      cumulative_sum(on_vars, _batches='all', verbose=True)
      @@ -572,8 +622,8 @@

      Stack

      Parameters:
        -
      • on_vars (list) – The list of x variables to add the view to.
      • -
      • _batches (str or list of str) – Only for qp.Links that are defined in this qp.Batch +
      • on_vars (list) – The list of x variables to add the view to.
      • +
      • _batches (str or list of str) – Only for qp.Links that are defined in this qp.Batch instances views are added.
      Parameters:
        -
      • columns (index,) – optional +
      • columns (index,) – optional Controls the output representation by structuring a pivot-style table according to the index and column values.
      • -
      • query (str) – A query string that is valid for the pandas.DataFrame.query() method.
      • -
      • split_view_names (bool, default False) – If True, will create an output of unique view name notations split +
      • query (str) – A query string that is valid for the pandas.DataFrame.query() method.
      • +
      • split_view_names (bool, default False) – If True, will create an output of unique view name notations split up into their components.
      Returns:

      description – DataFrame summing the Stack’s structure in terms of Links and Views.

      +
      Returns:

      description – DataFrame summing the Stack’s structure in terms of Links and Views.

      Return type:

      pandas.DataFrame

      @@ -616,6 +666,25 @@

      Stack

      +
      +
      +freeze_master_meta(data_key, filter_key=None)
      +

      Save .meta in .master_meta for a defined data_key.

      + +++ + + + +
      Parameters:
        +
      • data_key (str) – Using: self[data_key]
      • +
      • filter_key (str, default None) – Currently not implemented! +Using: self[data_key][filter_key]
      • +
      +
      +
      +
      static from_sav(data_key, filename, name=None, path=None, ioLocale='en_US.UTF-8', ioUtf8=True)
      @@ -625,17 +694,17 @@

      Stack Parameters:
        -
      • data_key (str) – The data_key for the data and meta in the sav file.
      • -
      • filename (str) – The name to the sav file.
      • -
      • name (str) – A name for the sav (stored in the meta).
      • -
      • path (str) – The path to the sav file.
      • -
      • ioLocale (str) – The locale used in during the sav processing.
      • -
      • ioUtf8 (bool) – Boolean that indicates the mode in which text communicated to or +
      • data_key (str) – The data_key for the data and meta in the sav file.
      • +
      • filename (str) – The name to the sav file.
      • +
      • name (str) – A name for the sav (stored in the meta).
      • +
      • path (str) – The path to the sav file.
      • +
      • ioLocale (str) – The locale used in during the sav processing.
      • +
      • ioUtf8 (bool) – Boolean that indicates the mode in which text communicated to or from the I/O module will be.
      -Returns:

      stack – A stack instance that has a data_key with data and metadata +Returns:

      stack – A stack instance that has a data_key with data and metadata to run aggregations.

      @@ -655,10 +724,10 @@

      Stack Parameters:
        -
      • path_stack (str) – The full path to the .stack file that should be created, including +
      • path_stack (str) – The full path to the .stack file that should be created, including the extension.
      • -
      • compression ({'gzip'}, default 'gzip') – The compression type that has been used saving the file.
      • -
      • load_cache (bool, default False) – Loads MatrixCache into the Stack a .cache file is found.
      • +
      • compression ({'gzip'}, default 'gzip') – The compression type that has been used saving the file.
      • +
      • load_cache (bool, default False) – Loads MatrixCache into the Stack a .cache file is found.
      @@ -672,6 +741,12 @@

      Stack

      +
      +
      +static recode_from_net_def(dataset, on_vars, net_map, expand, recode='auto', text_prefix='Net:', verbose=True)
      +

      Create variables from net definitions.

      +
      +
      reduce(data_keys=None, filters=None, x=None, y=None, variables=None, views=None)
      @@ -680,7 +755,7 @@

      Stack -Parameters:filters, x, y, views (data_keys,) – +Parameters:filters, x, y, views (data_keys,) – Returns: @@ -693,7 +768,7 @@

      Stack
      refresh(data_key, new_data_key='', new_weight=None, new_data=None, new_meta=None)
      -

      Re-run all or a portion of Stack’s aggregations for a given data key.

      +

      Re-run all or a portion of Stack’s aggregations for a given data key.

      refresh() can be used to re-weight the data using a new case data weight variable or to re-run all aggregations based on a changed source data version (e.g. after cleaning the file/ dropping cases) or a @@ -709,13 +784,13 @@

      Stack Parameters:
        -
      • data_key (str) – The Links’ data key to be modified.
      • -
      • new_data_key (str, default '') – Controls if the existing data key’s files and aggregations will be +
      • data_key (str) – The Links’ data key to be modified.
      • +
      • new_data_key (str, default '') – Controls if the existing data key’s files and aggregations will be overwritten or stored via a new data key.
      • -
      • new_weight (str) – The name of a new weight variable used to re-aggregate the Links.
      • -
      • new_data (pandas.DataFrame) – The case data source. If None is given, the +
      • new_weight (str) – The name of a new weight variable used to re-aggregate the Links.
      • +
      • new_data (pandas.DataFrame) – The case data source. If None is given, the original case data found for the data key will be used.
      • -
      • new_meta (quantipy meta document) – A meta data source associated with the case data. If None is given, +
      • new_meta (quantipy meta document) – A meta data source associated with the case data. If None is given, the original meta definition found for the data key will be used.
      @@ -738,7 +813,7 @@

      Stack -Parameters:data_keys (str or list of str) – The data keys to remove. +Parameters:data_keys (str or list of str) – The data keys to remove. Returns: @@ -748,6 +823,26 @@

      Stack

      +
      +
      +restore_meta(data_key, filter_key=None)
      +

      Restore the .master_meta for a defined data_key if it exists.

      +

      Undo self.apply_meta_edits()

      + +++ + + + +
      Parameters:
        +
      • data_key (str) – Accessing this metadata: self[data_key].meta
      • +
      • filter_key (str, default None) – Currently not implemented! +Accessing this metadata: self[data_key][filter_key].meta
      • +
      +
      +
      +
      save(path_stack, compression='gzip', store_cache=True, decode_str=False, dataset=False, describe=False)
      @@ -757,15 +852,15 @@

      Stack Parameters:
        -
      • path_stack (str) – The full path to the .stack file that should be created, including +
      • path_stack (str) – The full path to the .stack file that should be created, including the extension.
      • -
      • compression ({'gzip'}, default 'gzip') – The intended compression type.
      • -
      • store_cache (bool, default True) – Stores the MatrixCache in a file in the same location.
      • -
      • decode_str (bool, default=True) – If True the unicoder function will be used to decode all str +
      • compression ({'gzip'}, default 'gzip') – The intended compression type.
      • +
      • store_cache (bool, default True) – Stores the MatrixCache in a file in the same location.
      • +
      • decode_str (bool, default=True) – If True the unicoder function will be used to decode all str objects found anywhere in the meta document/s.
      • -
      • dataset (bool, default=False) – If True a json/csv will be saved parallel to the saved stack +
      • dataset (bool, default=False) – If True a json/csv will be saved parallel to the saved stack for each data key in the stack.
      • -
      • describe (bool, default=False) – If True the result of stack.describe().to_excel() will be +
      • describe (bool, default=False) – If True the result of stack.describe().to_excel() will be saved parallel to the saved stack.
      @@ -789,13 +884,13 @@

      Stack Parameters:
        -
      • data_key (str) – The reference name of a case data source hold by the Stack instance.
      • -
      • only_type ({'int', 'float', 'single', 'delimited set', 'string',) – ‘date’, time’, ‘array’}, optional +
      • data_key (str) – The reference name of a case data source hold by the Stack instance.
      • +
      • only_type ({'int', 'float', 'single', 'delimited set', 'string',) – ‘date’, time’, ‘array’}, optional Will restrict the output to the given data type.
      -Returns:

      types – A summary of variable names mapped to their data types, in form of +Returns:

      types – A summary of variable names mapped to their data types, in form of {type_name: [variable names]} or a list of variable names confirming only_type.

      @@ -816,6 +911,15 @@

      Stack