Skip to content

Commit d19df01

Browse files
pivotCount, pivotMatches kdocs
1 parent 58704fe commit d19df01

File tree

3 files changed

+245
-33
lines changed

3 files changed

+245
-33
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,7 @@ import kotlin.reflect.KProperty
5050
* * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations
5151
* and then reduced or aggregated into a [DataFrame].
5252
*
53-
* Grouping keys can also be created inline:
54-
* ```kotlin
55-
* // Create a new column "newName" based on existing "oldName" values
56-
* // and use it as a grouping key:
57-
* df.groupBy { expr("newName") { oldName.drop(5) } }
58-
* ```
53+
* @include [GroupingKeysInline]
5954
*
6055
* Check out [Grammar].
6156
*
@@ -324,6 +319,17 @@ internal interface GroupByDocs {
324319
* @include [PivotGroupByDocs.CommonDescription]
325320
*/
326321
interface Pivoting
322+
323+
/**
324+
* Grouping key columns can also be created inline:
325+
* ```kotlin
326+
* // Create a new column "newName" based on existing "oldName" values
327+
* // and use it as a grouping key:
328+
* df.groupBy { expr("newName") { oldName.drop(5) } }
329+
* ```
330+
*/
331+
@ExcludeFromSources
332+
interface GroupingKeysInline
327333
}
328334

329335
/** {@set [SelectingColumns.OPERATION] [groupBy][groupBy]} */
@@ -379,17 +385,11 @@ public fun <T> DataFrame<T>.groupBy(vararg cols: AnyColumnReference, moveToTop:
379385
/**
380386
* Groups the rows of this [Pivot] groups
381387
* based on the values in one or more specified [key columns][\columns].
382-
*
383-
* Works like regular [DataFrame.groupBy] on pivot groups.
384-
*
385-
* Grouping keys can also be created inline:
386-
* ```kotlin
387-
* // Create a new column "newName" based on existing "oldName" values
388-
* // and use it as a grouping key:
389-
* pivot.groupBy { expr("newName") { oldName.drop(5) } }
390-
* ```
388+
* Returns a [PivotGroupBy].
391389
*
392390
* @include [PivotGroupByDocs.CommonDescription]
391+
*
392+
* @include [GroupByDocs.GroupingKeysInline]
393393
*/
394394
@ExcludeFromSources
395395
private interface GroupByForPivotDocs
@@ -405,17 +405,8 @@ private interface CommonGroupByForPivotDocs
405405

406406
/**
407407
* {@include [CommonGroupByForPivotDocs]}
408-
* @include [SelectingColumns.Dsl]
409-
*
410-
* #### For example:
411-
*
412-
* `pivot.`{@get [OPERATION]}` { length `[and][ColumnsSelectionDsl.and]` age }`
408+
* @include [SelectingColumns.Dsl.WithExample] {@include [SetGroupByOperationArg] {@set [SelectingColumns.RECEIVER] <code>`pivot`</code>}}
413409
*
414-
* `pivot.`{@get [OPERATION]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }`
415-
*
416-
* `pivot.`{@get [OPERATION]}` { `[colsOf][ColumnsSelectionDsl.colsOf]`<`[Double][Double]`>() }`
417-
*
418-
* {@include [SetGroupByOperationArg]}
419410
* @param moveToTop Specifies whether nested grouping columns should be moved to the top level
420411
* or kept inside a [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup].
421412
* Defaults to `true`.

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt

Lines changed: 215 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ import kotlin.reflect.KProperty
4747
* * [grouped][Grouping] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations
4848
* and then reduced or aggregated into a [DataFrame].
4949
*
50+
* @include [PivotedColumnsInline]
51+
*
5052
* Check out [Grammar].
5153
*
5254
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
@@ -238,8 +240,23 @@ internal interface PivotDocs {
238240
*/
239241
interface AggregationStatistics
240242

243+
/**
244+
* Pivoted columns can also be created inline:
245+
* ```kotlin
246+
* // Create a new column "newName" based on existing "oldName" values
247+
* // and pivot it:
248+
* df.pivot { expr("newName") { oldName.drop(5) } }
249+
* ```
250+
*/
251+
@ExcludeFromSources
252+
interface PivotedColumnsInline
241253
}
242254

255+
/** {@set [SelectingColumns.OPERATION] [pivot][pivot]} */
256+
@ExcludeFromSources
257+
private interface SetPivotOperationArg
258+
259+
243260
/**
244261
* A specialized [ColumnsSelectionDsl] that allows specifying [pivot] key ordering
245262
* using the [then] function.
@@ -412,7 +429,7 @@ internal interface PivotMatchesResultDescription
412429

413430
/**
414431
* Computes whether matching rows exist in this [DataFrame] for all unique values of the
415-
* selected columns (independently) across all possible combinations
432+
* selected [\columns] (independently) across all possible combinations
416433
* of values in the remaining columns (all expecting selected).
417434
*
418435
* Performs a [pivot] operation on the specified [\columns] of this [DataFrame],
@@ -431,8 +448,9 @@ internal interface PivotMatchesResultDescription
431448
*
432449
* For more information: {@include [DocumentationUrls.PivotMatches]}
433450
*
434-
* See also: [pivotCounts], which performs a similar operation
435-
* but counts the number of matching rows instead of checking for their presence.
451+
* See also:
452+
* * [pivotCounts], which performs a similar operation
453+
* but counts the number of matching rows instead of checking for their presence.
436454
*
437455
* ### This `pivotMatches` Overload
438456
*/
@@ -502,7 +520,7 @@ internal interface PivotCountsResultDescription
502520

503521
/**
504522
* Computes number of matching rows in this [DataFrame] for all unique values of the
505-
* selected columns (independently) across all possible combinations
523+
* selected [\columns] (independently) across all possible combinations
506524
* of values in the remaining columns (all expecting selected).
507525
*
508526
* Performs a [pivot] operation on the specified [\columns] of this [DataFrame],
@@ -519,7 +537,7 @@ internal interface PivotCountsResultDescription
519537
*
520538
* See [Selecting Columns][SelectSelectingOptions].
521539
*
522-
* For more information: {@include [DocumentationUrls.PivotMatches]}
540+
* For more information: {@include [DocumentationUrls.PivotCounts]}
523541
*
524542
* See also: [pivotMatches], which performs a similar operation
525543
* but check if there is any matching row instead of counting then.
@@ -588,6 +606,33 @@ public fun <T> DataFrame<T>.pivotCounts(vararg columns: KProperty<*>, inward: Bo
588606

589607
// region pivot
590608

609+
/**
610+
* Pivots the selected [\columns] of this [GroupBy] groups.
611+
* Returns a [PivotGroupBy].
612+
*
613+
* @include [PivotGroupByDocs.CommonDescription]
614+
*
615+
* @include [PivotDocs.PivotedColumnsInline]
616+
*/
617+
@ExcludeFromSources
618+
private interface PivotForGroupByDocs
619+
620+
/**
621+
* {@include [PivotForGroupByDocs]}
622+
* ### This `pivot` Overload
623+
*/
624+
@ExcludeFromSources
625+
private interface CommonPivotForGroupByDocs
626+
627+
/**
628+
* @include [CommonPivotForGroupByDocs]
629+
* @include [SelectingColumns.Dsl.WithExample] {@include [SetPivotOperationArg] {@set [SelectingColumns.RECEIVER] <code>`gb`</code>}}
630+
* @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column;
631+
* if `false`, they are placed at the top level.
632+
* @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are pivoted.
633+
* @return A new [PivotGroupBy] that preserves the original [groupBy] key columns
634+
* and pivots the provided columns.
635+
*/
591636
public fun <G> GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelector<G, *>): PivotGroupBy<G> =
592637
PivotGroupByImpl(this, columns, inward)
593638

@@ -596,6 +641,15 @@ public fun <G> GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelec
596641
public fun <G> GroupBy<*, G>.pivot(vararg columns: AnyColumnReference, inward: Boolean = true): PivotGroupBy<G> =
597642
pivot(inward) { columns.toColumnSet() }
598643

644+
/**
645+
* @include [CommonPivotForGroupByDocs]
646+
* @include [SelectingColumns.Dsl.WithExample] {@include [SetPivotOperationArg] {@set [SelectingColumns.RECEIVER] <code>`gb`</code>}}
647+
* @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column;
648+
* if `false`, they are placed at the top level.
649+
* @param [columns] The [Column names][String] that defines which columns are pivoted.
650+
* @return A new [PivotGroupBy] that preserves the original [groupBy] key columns
651+
* and pivots the provided columns.
652+
*/
599653
public fun <G> GroupBy<*, G>.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy<G> =
600654
pivot(inward) { columns.toColumnSet() }
601655

@@ -608,9 +662,69 @@ public fun <G> GroupBy<*, G>.pivot(vararg columns: KProperty<*>, inward: Boolean
608662

609663
// region pivotMatches
610664

665+
/**
666+
* Computes whether matching rows exist in groups of this [GroupBy] for all unique values of the
667+
* selected columns (independently) across all [groupBy] key combinations.
668+
*
669+
* Performs a [pivot][GroupBy.pivot] operation on the specified [\columns] of this [GroupBy] groups,
670+
* and produces a new matrix-like [DataFrame].
671+
*
672+
* @include [PivotGroupByDocs.ResultingMatrixCommonDescription]
673+
* @include [PivotMatchesResultDescription]
674+
*
675+
* This function combines [pivot][GroupBy.pivot]
676+
* and [matches][PivotGroupBy.matches] operations into a single call.
677+
*
678+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
679+
*
680+
* See [Selecting Columns][SelectSelectingOptions].
681+
*
682+
* For more information: {@include [DocumentationUrls.PivotMatches]}
683+
*
684+
* See also: [pivotCounts][GroupBy.pivotCounts], which performs a similar operation
685+
* but counts the number of matching rows instead of checking for their presence.
686+
*
687+
* ### This `pivotMatches` Overload
688+
*/
689+
internal interface GroupByPivotMatchesCommonDocs
690+
691+
/**
692+
* @include [GroupByPivotMatchesCommonDocs]
693+
* @include [SelectingColumns.Dsl]
694+
*
695+
* ### Example
696+
* ```kotlin
697+
* // Compute whether matching rows exist for all unique values of "city"
698+
* // and "name" (independently) across all grouping key combinations
699+
* gb.pivotMatches { city and name }
700+
* ```
701+
*
702+
* @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column;
703+
* if `false`, they are placed at the top level.
704+
* @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation.
705+
* @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows,
706+
* pivot key values as columns, and `true`/`false` cells indicating existing combinations.
707+
*/
611708
public fun <G> GroupBy<*, G>.pivotMatches(inward: Boolean = true, columns: ColumnsSelector<G, *>): DataFrame<G> =
612709
pivot(inward, columns).matches()
613710

711+
/**
712+
* @include [GroupByPivotMatchesCommonDocs]
713+
* @include [SelectingColumns.ColumnNames]
714+
*
715+
* ### Example
716+
* ```kotlin
717+
* // Compute whether matching rows exist for all unique values of "city"
718+
* // and "name" (independently) across all grouping key combinations
719+
* df.pivotMatches("city", "name")
720+
* ```
721+
*
722+
* @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column;
723+
* if `false`, they are placed at the top level.
724+
* @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation.
725+
* @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows,
726+
* pivot key values as columns, and `true`/`false` cells indicating existing combinations.
727+
*/
614728
public fun <G> GroupBy<*, G>.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame<G> =
615729
pivotMatches(inward) { columns.toColumnSet() }
616730

@@ -628,9 +742,69 @@ public fun <G> GroupBy<*, G>.pivotMatches(vararg columns: KProperty<*>, inward:
628742

629743
// region pivotCounts
630744

745+
/**
746+
* Computes number of matching rows in groups of this [GroupBy] for all unique values of the
747+
* selected [\columns] (independently) across all [groupBy] key combinations.
748+
*
749+
* Performs a [pivot] operation on the specified [\columns] of this [DataFrame]
750+
* and produces a new matrix-like [DataFrame].
751+
*
752+
* @include [PivotGroupByDocs.ResultingMatrixCommonDescription]
753+
* @include [PivotCountsResultDescription]
754+
*
755+
* This function combines [pivot][GroupBy.pivot]
756+
* and [count][PivotGroupBy.count] operations into a single call.
757+
*
758+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
759+
*
760+
* See [Selecting Columns][SelectSelectingOptions].
761+
*
762+
* For more information: {@include [DocumentationUrls.PivotCounts]}
763+
*
764+
* See also: [pivotMatches][GroupBy.pivotMatches], which performs a similar operation
765+
* but check if there is any matching row instead of counting then.
766+
*
767+
* ### This `pivotCounts` Overload
768+
*/
769+
internal interface GroupByPivotCountsCommonDocs
770+
771+
/**
772+
* @include [GroupByPivotCountsCommonDocs]
773+
* @include [SelectingColumns.Dsl]
774+
*
775+
* ### Example
776+
* ```kotlin
777+
* // Compute number of matching rows for all unique values of "city"
778+
* // and "name" (independently) across all grouping key combinations.
779+
* df.pivotCounts { city and name }
780+
* ```
781+
*
782+
* @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column;
783+
* if `false`, they are placed at the top level.
784+
* @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation.
785+
* @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows,
786+
* pivot key values as columns, and the number of rows with the corresponding combinations in the cells.
787+
*/
631788
public fun <G> GroupBy<*, G>.pivotCounts(inward: Boolean = true, columns: ColumnsSelector<G, *>): DataFrame<G> =
632789
pivot(inward, columns).count()
633790

791+
/**
792+
* @include [GroupByPivotCountsCommonDocs]
793+
* @include [SelectingColumns.ColumnNames]
794+
*
795+
* ### Example
796+
* ```kotlin
797+
* // Compute number of matching rows for all unique values of "city"
798+
* // and "name" (independently) across all grouping key combinations.
799+
* df.pivotCounts("city", "name")
800+
* ```
801+
*
802+
* @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column;
803+
* if `false`, they are placed at the top level.
804+
* @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation.
805+
* @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows,
806+
* pivot key values as columns, and the number of rows with the corresponding combinations in the cells.
807+
*/
634808
public fun <G> GroupBy<*, G>.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame<G> =
635809
pivotCounts(inward) { columns.toColumnSet() }
636810

@@ -652,6 +826,42 @@ public fun <G> GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B
652826

653827
// region pivot
654828

829+
830+
/**
831+
* A special [pivot][GroupBy.pivot] override for usage in [aggregate][Grouped.aggregate] method
832+
* of [GroupBy].
833+
* This allows combining [column pivoting aggregations][PivotGroupByDocs.Aggregation]
834+
* with common [GroupBy] aggregations in [aggregate][Grouped.aggregate].
835+
*
836+
* This function itself doesn't affect [aggregate][Grouped.aggregate] result, but
837+
* it allows putting results of [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] into
838+
* [aggregate][Grouped.aggregate] resulting [DataFrame] by simply calling them.
839+
* See [GroupBy.pivot] and [PivotGroupByDocs.Aggregation] for more information.
840+
*
841+
* Resulting columns added as common [aggregations][Grouped.aggregate] result columns;
842+
* their structure depends on exact
843+
* [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] used.
844+
*
845+
* ### Example
846+
* ```kotlin
847+
* df.groupBy { name.firstName }.aggregate {
848+
* // Pivot "city" column of each group, resulting into
849+
* // `PivotGroupBy` with "firstName" groping keys and "city" values columns
850+
* pivot { city }.aggregate {
851+
* // Aggregate mean of "age" column values of each of
852+
* // `groupBy` x `pivot` group into "meanAge" column
853+
* mean { age } into "meanAge"
854+
* // Aggregate size of each `PivotGroupBy` group into "count" column
855+
* count() into "count"
856+
* }
857+
* // Shortcut for `count` aggregation in
858+
* // "firstName" x "lastName" groups
859+
* pivot { name.lastName }.count()
860+
* // Common `count` aggregation
861+
* count() into "total"
862+
* }
863+
* ```
864+
*/
655865
public fun <T> AggregateGroupedDsl<T>.pivot(
656866
inward: Boolean = true,
657867
columns: PivotColumnsSelector<T, *>,

0 commit comments

Comments
 (0)