diff --git a/include/gdf/cffi/functions.h b/include/gdf/cffi/functions.h index 610e756d..00d77338 100644 --- a/include/gdf/cffi/functions.h +++ b/include/gdf/cffi/functions.h @@ -1,44 +1,38 @@ #pragma once -/* --------------------------------------------------------------------------*/ + /** - * @Synopsis Start a NVTX range with predefined color. + * @brief Start a NVTX range with predefined color. * * This function is useful only for profiling with nvvp or Nsight Systems. It * demarcates the begining of a user-defined range with a specified name and * color that will show up in the timeline view of nvvp/Nsight Systems. Can be * nested within other ranges. * - * @Param name The name of the NVTX range - * @Param color The predefined gdf_color enum to use to color this range + * @Param[in] name The name of the NVTX range + * @Param[in] color The predefined gdf_color enum to use to color this range * * @Returns */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_nvtx_range_push(char const * const name, gdf_color color ); - - -/* --------------------------------------------------------------------------*/ /** - * @Synopsis Start a NVTX range with a custom ARGB color code. + * @brief Start a NVTX range with a custom ARGB color code. * * This function is useful only for profiling with nvvp or Nsight Systems. It * demarcates the begining of a user-defined range with a specified name and * color that will show up in the timeline view of nvvp/Nsight Systems. Can be * nested within other ranges. * - * @Param name The name of the NVTX range - * @Param color The ARGB hex color code to use to color this range (e.g., 0xFF00FF00) + * @Param[in] name The name of the NVTX range + * @Param[in] color The ARGB hex color code to use to color this range (e.g., 0xFF00FF00) * * @Returns */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color ); -/* --------------------------------------------------------------------------*/ /** * @Synopsis Ends the inner-most NVTX range. * @@ -48,12 +42,11 @@ gdf_error gdf_nvtx_range_push_hex(char const * const name, unsigned int color ); * * @Returns */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_nvtx_range_pop(); -/* --------------------------------------------------------------------------*/ + /** - * @Synopsis Counts the number of valid bits in the mask that corresponds to + * @brief Counts the number of valid bits in the mask that corresponds to * the specified number of rows. * * @Param[in] masks Array of gdf_valid_types with enough bits to represent @@ -63,23 +56,60 @@ gdf_error gdf_nvtx_range_pop(); * * @Returns GDF_SUCCESS upon successful completion. */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_count_nonzero_mask(gdf_valid_type const * masks, int num_rows, int * count); /* column operations */ +/** + * @brief Size of the @ref gdf_column itself (as opposed to the data of any specific column) + */ gdf_size_type gdf_column_sizeof(); -gdf_error gdf_column_view(gdf_column *column, void *data, gdf_valid_type *valid, - gdf_size_type size, gdf_dtype dtype); - +/** + * @brief Construct (in-place) a GDF column structure from pre-existing column data + * + * + * @note One cannot currently pass in a name for the column + * + * @param[out] column location for the constructed @ref gdf_column + * @param[in] data see the `data` field under @ref gdf_column + * @param[in] valid see the `valid` field under @ref gdf_column + * @param[in] size see the `size` field under @ref gdf_column + * @param[in] dtype see the `dtype` field under @ref gdf_column + */ gdf_error gdf_column_view_augmented(gdf_column *column, void *data, gdf_valid_type *valid, gdf_size_type size, gdf_dtype dtype, gdf_size_type null_count); + +/** + * @brief A variant of @ref gdf_column_view_augmented for the case of a column + * having no null elements + */ +gdf_error gdf_column_view(gdf_column *column, void *data, gdf_valid_type *valid, + gdf_size_type size, gdf_dtype dtype); + +/** + * @brief Free the resources associated with a @ref gdf_column created entirely + * by libgdf + * + * @note This is not to be called on columns created using @ref gdf_column_view, + * @ref gdf_column_view_augmented or manually outside libgdf) + * + * @param[in] column The column whose resources are to be freed + * + */ gdf_error gdf_column_free(gdf_column *column); /* context operations */ +/** + * @brief Constructs a @ref gdf_context struct from its component fields + * + * @param[out] context location for the constructed @ref gdf_context + * @param[in] flag_sorted see the `flag_sorted` field under @ref gdf_context + * @param[in] flag_method see the `flag_method` field under @ref gdf_context + * @param[in] flag_distinct see the `flag_distrinct` field under @ref gdf_context + */ gdf_error gdf_context_view(gdf_context *context, int flag_sorted, gdf_method flag_method, int flag_distinct); @@ -182,30 +212,26 @@ gdf_error gdf_segmented_radixsort_generic(gdf_segmented_radixsort_plan_type *hdl // joins -/* --------------------------------------------------------------------------*/ /** - * @Synopsis Joins two dataframes (left, right) together on the specified columns + * @brief Joins two dataframes (left, right) together on the specified columns * - * @Param[in] left_cols[] The columns of the left dataframe + * @Param[in] left_cols The columns of the left dataframe * @Param[in] num_left_cols The number of columns in the left dataframe - * @Param[in] left_join_cols[] The column indices of columns from the left dataframe + * @Param[in] left_join_cols The column indices of columns from the left dataframe * to join on - * @Param[in] right_cols[] The columns of the right dataframe + * @Param[in] right_cols The columns of the right dataframe * @Param[in] num_right_cols The number of columns in the right dataframe - * @Param[in] right_join_cols[] The column indices of columns from the right dataframe + * @Param[in] right_join_cols The column indices of columns from the right dataframe * to join on * @Param[in] num_cols_to_join The total number of columns to join on * @Param[in] result_num_cols The number of columns in the resulting dataframe - * @Param[out] gdf_column *result_cols[] If not nullptr, the dataframe that results from joining + * @Param[out] result_cols If not nullptr, the dataframe that results from joining * the left and right tables on the specified columns - * @Param[out] gdf_column * left_indices If not nullptr, indices of rows from the left table that match rows in the right table - * @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table - * @Param[in] join_context The context to use to control how the join is performed,e.g., + * @Param[out] left_indices If not nullptr, indices of rows from the left table that match rows in the right table + * @Param[out] right_indices If not nullptr, indices of rows from the right table that match rows in the left table + * @Param[in] join_context The context to use to control how the join is performed, e.g. * sort vs hash based implementation - * - * @Returns */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_inner_join( gdf_column **left_cols, int num_left_cols, @@ -220,30 +246,26 @@ gdf_error gdf_inner_join( gdf_column * right_indices, gdf_context *join_context); -/* --------------------------------------------------------------------------*/ /** - * @Synopsis Joins two dataframes (left, right) together on the specified columns + * @brief Joins two dataframes (left, right) together on the specified columns * - * @Param[in] left_cols[] The columns of the left dataframe + * @Param[in] left_cols The columns of the left dataframe * @Param[in] num_left_cols The number of columns in the left dataframe - * @Param[in] left_join_cols[] The column indices of columns from the left dataframe + * @Param[in] left_join_cols The column indices of columns from the left dataframe * to join on - * @Param[in] right_cols[] The columns of the right dataframe + * @Param[in] right_cols The columns of the right dataframe * @Param[in] num_right_cols The number of columns in the right dataframe - * @Param[in] right_join_cols[] The column indices of columns from the right dataframe + * @Param[in] right_join_cols The column indices of columns from the right dataframe * to join on * @Param[in] num_cols_to_join The total number of columns to join on * @Param[in] result_num_cols The number of columns in the resulting dataframe - * @Param[out] gdf_column *result_cols[] If not nullptr, the dataframe that results from joining + * @Param[out] result_cols If not nullptr, the dataframe that results from joining * the left and right tables on the specified columns - * @Param[out] gdf_column * left_indices If not nullptr, indices of rows from the left table that match rows in the right table - * @Param[out] gdf_column * right_indices If not nullptr, indices of rows from the right table that match rows in the left table + * @Param[out] left_indices If not nullptr, indices of rows from the left table that match rows in the right table + * @Param[out] right_indices If not nullptr, indices of rows from the right table that match rows in the left table * @Param[in] join_context The context to use to control how the join is performed,e.g., * sort vs hash based implementation - * - * @Returns */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_left_join( gdf_column **left_cols, int num_left_cols, @@ -276,7 +298,6 @@ gdf_error gdf_outer_join_generic(gdf_column *leftcol, gdf_column *rightcol, /* partioning */ -/* --------------------------------------------------------------------------*/ /** * @brief Computes the hash values of the rows in the specified columns of the * input columns and bins the hash values into the desired number of partitions. @@ -284,20 +305,17 @@ gdf_error gdf_outer_join_generic(gdf_column *leftcol, gdf_column *rightcol, * are contiguous. * * @Param[in] num_input_cols The number of columns in the input columns - * @Param[in] input[] The input set of columns - * @Param[in] columns_to_hash[] Indices of the columns in the input set to hash + * @Param[in] input The input set of columns + * @Param[in] columns_to_hash Indices of the columns in the input set to hash * @Param[in] num_cols_to_hash The number of columns to hash * @Param[in] num_partitions The number of partitions to rearrange the input rows into - * @Param[out] partitioned_output Preallocated gdf_columns to hold the rearrangement + * @Param[out] partitioned_output Preallocated gdf_columns to hold the rearrangement * of the input columns into the desired number of partitions * @Param[out] partition_offsets Preallocated array the size of the number of * partitions. Where partition_offsets[i] indicates the starting position * of partition 'i' * @Param[in] hash The hash function to use - * - * @Returns If the operation was successful, returns GDF_SUCCESS */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_hash_partition(int num_input_cols, gdf_column * input[], int columns_to_hash[], @@ -319,19 +337,14 @@ gdf_error gdf_prefixsum_i64(gdf_column *inp, gdf_column *out, int inclusive); /* hashing */ -/* --------------------------------------------------------------------------*/ /** - * @Synopsis Computes the hash value of each row in the input set of columns. - * - * @Param num_cols The number of columns in the input set - * @Param input The list of columns whose rows will be hashed - * @Param hash The hash function to use - * @Param output The hash value of each row of the input + * @brief Computes the hash value of each row in the input set of columns. * - * @Returns GDF_SUCCESS if the operation was successful, otherwise an appropriate - * error code + * @Param[in] num_cols The number of columns in the input set + * @Param[in] input The list of columns whose rows will be hashed + * @Param[in] hash The hash function to use + * @Param[out] output The hash value of each row of the input */ -/* ----------------------------------------------------------------------------*/ gdf_error gdf_hash(int num_cols, gdf_column **input, gdf_hash_func hash, gdf_column *output); /* trig */ diff --git a/include/gdf/cffi/types.h b/include/gdf/cffi/types.h index 0b251a73..53f3e9de 100644 --- a/include/gdf/cffi/types.h +++ b/include/gdf/cffi/types.h @@ -2,16 +2,20 @@ typedef size_t gdf_size_type; typedef gdf_size_type gdf_index_type; + +/** + * @brief A bit-holder type, used for indicating whether some column elements + * are null or not. If the corresponding element is null, its bit will be 0; + * otherwise the value is 1 (a "valid" element) + */ typedef unsigned char gdf_valid_type; typedef long gdf_date64; typedef int gdf_date32; typedef int gdf_category; -/* --------------------------------------------------------------------------*/ - /** - * @Synopsis These enums indicate the possible data types for a gdf_column +/** + * @brief Possible data types for a @ref `gdf_column` */ -/* ----------------------------------------------------------------------------*/ typedef enum { GDF_invalid=0, GDF_INT8, @@ -20,86 +24,137 @@ typedef enum { GDF_INT64, GDF_FLOAT32, GDF_FLOAT64, - GDF_DATE32, /**< int32_t days since the UNIX epoch */ - GDF_DATE64, /**< int64_t milliseconds since the UNIX epoch */ - GDF_TIMESTAMP, /**< Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) */ + GDF_DATE32, ///< int32_t days since the UNIX epoch + GDF_DATE64, ///< int64_t milliseconds since the UNIX epoch + GDF_TIMESTAMP, ///< Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) GDF_CATEGORY, GDF_STRING, N_GDF_TYPES, /* additional types should go BEFORE N_GDF_TYPES */ } gdf_dtype; -/* --------------------------------------------------------------------------*/ /** - * @Synopsis These are all possible gdf error codes that can be returned from - * a libgdf function. ANY NEW ERROR CODE MUST ALSO BE ADDED TO `gdf_error_get_name` - * AS WELL + * @brief Possible return values from libgdf functions. + * + * @note All error codes in this enum have corresponding descriptions + * available via the @ref `gdf_error_get_name` function */ -/* ----------------------------------------------------------------------------*/ typedef enum { GDF_SUCCESS=0, - GDF_CUDA_ERROR, /**< Error occured in a CUDA call */ - GDF_UNSUPPORTED_DTYPE, /**< The datatype of the gdf_column is unsupported */ - GDF_COLUMN_SIZE_MISMATCH, /**< Two columns that should be the same size aren't the same size*/ - GDF_COLUMN_SIZE_TOO_BIG, /**< Size of column is larger than the max supported size */ - GDF_DATASET_EMPTY, /**< Input dataset is either null or has size 0 when it shouldn't */ - GDF_VALIDITY_MISSING, /**< gdf_column's validity bitmask is null */ - GDF_VALIDITY_UNSUPPORTED, /**< The requested gdf operation does not support validity bitmask handling, and one of the input columns has the valid bits enabled */ - GDF_INVALID_API_CALL, /**< The arguments passed into the function were invalid */ - GDF_JOIN_DTYPE_MISMATCH, /**< Datatype mismatch between corresponding columns in left/right tables in the Join function */ - GDF_JOIN_TOO_MANY_COLUMNS, /**< Too many columns were passed in for the requested join operation*/ - GDF_DTYPE_MISMATCH, /**< Type mismatch between columns that should be the same type */ - GDF_UNSUPPORTED_METHOD, /**< The method requested to perform an operation was invalid or unsupported (e.g., hash vs. sort)*/ - GDF_INVALID_AGGREGATOR, /**< Invalid aggregator was specified for a groupby*/ - GDF_INVALID_HASH_FUNCTION, /**< Invalid hash function was selected */ - GDF_PARTITION_DTYPE_MISMATCH, /**< Datatype mismatch between columns of input/output in the hash partition function */ - GDF_HASH_TABLE_INSERT_FAILURE, /**< Failed to insert to hash table, likely because its full */ - GDF_UNSUPPORTED_JOIN_TYPE, /**< The type of join requested is unsupported */ - GDF_UNDEFINED_NVTX_COLOR, /**< The requested color used to define an NVTX range is not defined */ - GDF_NULL_NVTX_NAME, /**< The requested name for an NVTX range cannot be nullptr */ - GDF_C_ERROR, /**< C error not related to CUDA */ - GDF_FILE_ERROR, /**< error processing sepcified file */ + GDF_CUDA_ERROR, ///< Error occurred in a CUDA call + GDF_UNSUPPORTED_DTYPE, ///< The data type of the gdf_column is unsupported + GDF_COLUMN_SIZE_MISMATCH, ///< Two columns that should be the same size aren't the same size + GDF_COLUMN_SIZE_TOO_BIG, ///< Size of column is larger than the max supported size + GDF_DATASET_EMPTY, ///< An input column is either null or has size 0, when it must have data + GDF_VALIDITY_MISSING, ///< gdf_column's validity bitmask is null + GDF_VALIDITY_UNSUPPORTED, ///< The requested GDF operation does not support validity bitmask handling, and one of the input columns has the valid bits enabled + GDF_INVALID_API_CALL, ///< The arguments passed into the function were invalid + GDF_JOIN_DTYPE_MISMATCH, ///< Data type mismatch between corresponding columns in left/right tables in the Join function + GDF_JOIN_TOO_MANY_COLUMNS, ///< Too many columns were passed in for the requested join operation + GDF_DTYPE_MISMATCH, ///< Type mismatch between columns that should be the same type + GDF_UNSUPPORTED_METHOD, ///< The method requested to perform an operation was invalid or unsupported (e.g., hash vs. sort) + GDF_INVALID_AGGREGATOR, ///< Invalid aggregator was specified for a group-by operation + GDF_INVALID_HASH_FUNCTION, ///< Invalid hash function was selected + GDF_PARTITION_DTYPE_MISMATCH, ///< Data type mismatch between columns of input/output in the hash partition function + GDF_HASH_TABLE_INSERT_FAILURE, ///< Failed to insert to hash table, likely because its full + GDF_UNSUPPORTED_JOIN_TYPE, ///< The type of join requested is unsupported + GDF_UNDEFINED_NVTX_COLOR, ///< The requested color used to define an NVTX range is not defined + GDF_NULL_NVTX_NAME, ///< The requested name for an NVTX range cannot be a null ptr + GDF_C_ERROR, ///< C error not related to CUDA + GDF_FILE_ERROR, ///< Error processing the specified file } gdf_error; +/** + * @brief Possible hash functions for use in Joins, partitioning, and other operations libgdf provides. + */ typedef enum { - GDF_HASH_MURMUR3=0, /**< Murmur3 hash function */ - GDF_HASH_IDENTITY, /**< Identity hash function that simply returns the key to be hashed */ + GDF_HASH_MURMUR3=0, ///< Murmur3 hash function; see @url https://en.wikipedia.org/wiki/MurmurHash#MurmurHash3 + GDF_HASH_IDENTITY, ///< Identity hash function that simply returns the key to be hashed } gdf_hash_func; +/** + * @brief The resolution, or unit, for durations of time, used alongside duration values + */ typedef enum { - TIME_UNIT_NONE=0, // default (undefined) - TIME_UNIT_s, // second - TIME_UNIT_ms, // millisecond - TIME_UNIT_us, // microsecond - TIME_UNIT_ns // nanosecond + TIME_UNIT_NONE=0, ///< time unit is undefined/unknown; this is the implicit default + TIME_UNIT_s, ///< seconds + TIME_UNIT_ms, ///< milliseconds (10^{-3} seconds) + TIME_UNIT_us, ///< microseconds (10^{-6} seconds) + TIME_UNIT_ns ///< nanoseconds (10^{-9} seconds) } gdf_time_unit; +/** + * @brief Potential Auxiliary information regarding a datum in a libgdf column. + * + * @note held either at the single-element or whole-column level. + * + */ typedef struct { gdf_time_unit time_unit; // here we can also hold info for decimal datatype or any other datatype that requires additional information } gdf_dtype_extra_info; -typedef struct gdf_column_{ - void *data; /**< Pointer to the columns data */ - gdf_valid_type *valid; /**< Pointer to the columns validity bit mask where the 'i'th bit indicates if the 'i'th row is NULL */ - gdf_size_type size; /**< Number of data elements in the columns data buffer*/ - gdf_dtype dtype; /**< The datatype of the column's data */ - gdf_size_type null_count; /**< The number of NULL values in the column's data */ - gdf_dtype_extra_info dtype_info; - char * col_name; // host-side: null terminated string -} gdf_column; - -/* --------------------------------------------------------------------------*/ +/** + * @brief The fundamental, columnar format of data the GDF library works with. + * + * A gdf_column_ may originate in a RDBMS-like schema table; it may be the intermediary result + * within an execution plan; or it may be the result of non-DBMS related computation. + */ + typedef struct gdf_column_{ + void *data; + ///< Type-erased pointer to the column's raw data - which is a consecutive sequence + ///< with no gaps of elements of the type represented by `dtype`. + ///< + ///< @todo There are currently no formal alignment requirements, but it seems the + ///< implementation may implicitly be assuming alignment to the size of the relevant + ///< type. + ///< @todo Can this be NULL? What about after "construction"? + ///< @todo Is this always in device memory? + + gdf_valid_type *valid; + ///< a pseudo-column of `size` bits, packed into bytes (with in-byte order from + ///< the least significant to the most significant bit), indicating whether + ///< the column element is null (bit value is 0) or not null (bit value is 1; + ///< a "valid" element) + ///< + ///< @todo There are currently no formal alignment requirements, but it seems the + ///< implementation may implicitly be assuming alignment to the size of the relevant + ///< + ///< @todo Is this expressly forbidden from being NULL in the case of a 0 null-count? + + gdf_size_type size; + ///< The number of column elements (_not_ their total size in bytes, _nor_ the + ///< size of an individual element) + ///< + ///< @todo is it allocated capacity or size in use? + + gdf_dtype dtype; + ///< An indicator of the column's data type, for type un-erasure + + gdf_size_type null_count; + ///< The number of null elements in the column, which is + ///< also the number of 0 bits in the `valid` pseudo-column + ///< (within the range of valid bits, i.e. 0..size-1 ) + + gdf_dtype_extra_info dtype_info; + ///< Additional information qualifying the data type + + char * col_name; + ///< The column's name - a NUL-terminated string in host memory + + } gdf_column; + /** - * @Synopsis These enums indicate which method is to be used for an operation. + * @brief These enums indicate which method is to be used for an operation. * For example, it is used to select between the hash-based vs. sort-based implementations * of the Join operation. */ -/* ----------------------------------------------------------------------------*/ typedef enum { - GDF_SORT = 0, /**< Indicates that the sort-based implementation of the function will be used */ - GDF_HASH, /**< Indicates that the hash-based implementation of the function will be used */ - N_GDF_METHODS, /* additional methods should go BEFORE N_GDF_METHODS */ + GDF_SORT = 0, ///< Indicates that the sort-based implementation of the function will be used + GDF_HASH, ///< Indicates that the hash-based implementation of the function will be used + + // New enum values should be added above this line + N_GDF_METHODS, } gdf_method; typedef enum { @@ -108,35 +163,37 @@ typedef enum { GDF_QUANT_HIGHER, GDF_QUANT_MIDPOINT, GDF_QUANT_NEAREST, + + // New enum values should be added above this line N_GDF_QUANT_METHODS, } gdf_quantile_method; -/* --------------------------------------------------------------------------*/ /** - * @Synopsis These enums indicate the supported aggregation operations that can be - * performed on a set of aggregation columns as part of a GroupBy operation + * @brief Possible aggregation (=reduction) function which may be performed on a + * column, or sequence of aggregation columns, by a GroupBy operation + * + * Also @ref window_reduction_type . */ -/* ----------------------------------------------------------------------------*/ typedef enum { - GDF_SUM = 0, /**< Computes the sum of all values in the aggregation column*/ - GDF_MIN, /**< Computes minimum value in the aggregation column */ - GDF_MAX, /**< Computes maximum value in the aggregation column */ - GDF_AVG, /**< Computes arithmetic mean of all values in the aggregation column */ - GDF_COUNT, /**< Computes histogram of the occurance of each key in the GroupBy Columns */ - GDF_COUNT_DISTINCT, /**< Counts the number of distinct keys in the GroupBy columns */ - N_GDF_AGG_OPS, /**< The total number of aggregation operations. ALL NEW OPERATIONS SHOULD BE ADDED ABOVE THIS LINE*/ + GDF_SUM = 0, ///< Computes the sum of all values in the aggregation column + GDF_MIN, ///< Computes minimum value in the aggregation column + GDF_MAX, ///< Computes maximum value in the aggregation column + GDF_AVG, ///< Computes arithmetic mean of all values in the aggregation column + GDF_COUNT, ///< Computes histogram of the occurance of each key in the GroupBy Columns + GDF_COUNT_DISTINCT, ///< Counts the number of distinct keys in the GroupBy columns + + // New enum values should be added above this line + N_GDF_AGG_OPS, ///< The total number of aggregation operations. } gdf_agg_op; -/* --------------------------------------------------------------------------*/ /** - * @Synopsis Colors for use with NVTX ranges. + * @brief Colors for use with NVTX ranges. * * These enumerations are the available pre-defined colors for use with * user-defined NVTX ranges. */ -/* ----------------------------------------------------------------------------*/ typedef enum { GDF_GREEN = 0, GDF_BLUE, @@ -147,21 +204,20 @@ typedef enum { GDF_WHITE, GDF_DARK_GREEN, GDF_ORANGE, - GDF_NUM_COLORS, /** Add new colors above this line */ + + // New enum values should be added above this line + GDF_NUM_COLORS, } gdf_color; -/* --------------------------------------------------------------------------*/ /** - * @Synopsis This struct holds various information about how an operation should be - * performed as well as additional information about the input data. + * @brief Information about how an operation should be performed and about its input */ -/* ----------------------------------------------------------------------------*/ typedef struct gdf_context_{ - int flag_sorted; /**< Indicates if the input data is sorted. 0 = No, 1 = yes */ - gdf_method flag_method; /**< The method to be used for the operation (e.g., sort vs hash) */ - int flag_distinct; /**< for COUNT: DISTINCT = 1, else = 0 */ - int flag_sort_result; /**< When method is GDF_HASH, 0 = result is not sorted, 1 = result is sorted */ - int flag_sort_inplace; /**< 0 = No sort in place allowed, 1 = else */ + int flag_sorted; ///< Indicates if the input data is sorted. 0 = No, 1 = yes + gdf_method flag_method; ///< The method to be used for the operation (e.g., sort vs hash) + int flag_distinct; ///< for COUNT: DISTINCT = 1, else = 0 + int flag_sort_result; ///< When method is GDF_HASH, 0 = result is not sorted, 1 = result is sorted + int flag_sort_inplace; ///< 0 = No sort in place allowed, 1 = else } gdf_context; struct _OpaqueIpcParser; @@ -176,11 +232,9 @@ struct _OpaqueSegmentedRadixsortPlan; typedef struct _OpaqueSegmentedRadixsortPlan gdf_segmented_radixsort_plan_type; - - typedef enum{ - GDF_ORDER_ASC, - GDF_ORDER_DESC + GDF_ORDER_ASC, ///< Ascending order + GDF_ORDER_DESC ///< Descending order } order_by_type; typedef enum{ @@ -197,6 +251,12 @@ typedef enum{ GDF_WINDOW_ROW } window_function_type; +/** + * @brief Possible aggregation (=reduction) function which may be performed on a + * window in a column, or sof aggregation columns, by a GroupBy operation + * + * Also, see @ref gdf_agg_op . + */ typedef enum{ GDF_WINDOW_AVG, GDF_WINDOW_SUM,