@@ -456,35 +456,6 @@ def test_sample(self):
456456 with tm .assertRaises (ValueError ):
457457 o .sample (n = 3 , weights = nan_weights )
458458
459- # A few dataframe test with degenerate weights.
460- easy_weight_list = [0 ] * 10
461- easy_weight_list [5 ] = 1
462-
463- df = pd .DataFrame ({'col1' : range (10 , 20 ),
464- 'col2' : range (20 , 30 ),
465- 'colString' : ['a' ] * 10 ,
466- 'easyweights' : easy_weight_list })
467- sample1 = df .sample (n = 1 , weights = 'easyweights' )
468- assert_frame_equal (sample1 , df .iloc [5 :6 ])
469-
470- # Ensure proper error if string given as weight for Series, panel, or
471- # DataFrame with axis = 1.
472- s = Series (range (10 ))
473- with tm .assertRaises (ValueError ):
474- s .sample (n = 3 , weights = 'weight_column' )
475-
476- panel = pd .Panel (items = [0 , 1 , 2 ], major_axis = [2 , 3 , 4 ],
477- minor_axis = [3 , 4 , 5 ])
478- with tm .assertRaises (ValueError ):
479- panel .sample (n = 1 , weights = 'weight_column' )
480-
481- with tm .assertRaises (ValueError ):
482- df .sample (n = 1 , weights = 'weight_column' , axis = 1 )
483-
484- # Check weighting key error
485- with tm .assertRaises (KeyError ):
486- df .sample (n = 3 , weights = 'not_a_real_column_name' )
487-
488459 # Check np.nan are replaced by zeros.
489460 weights_with_nan = [np .nan ] * 10
490461 weights_with_nan [5 ] = 0.5
@@ -497,90 +468,6 @@ def test_sample(self):
497468 self ._compare (
498469 o .sample (n = 1 , axis = 0 , weights = weights_with_None ), o .iloc [5 :6 ])
499470
500- # Check that re-normalizes weights that don't sum to one.
501- weights_less_than_1 = [0 ] * 10
502- weights_less_than_1 [0 ] = 0.5
503- tm .assert_frame_equal (
504- df .sample (n = 1 , weights = weights_less_than_1 ), df .iloc [:1 ])
505-
506- ###
507- # Test axis argument
508- ###
509-
510- # Test axis argument
511- df = pd .DataFrame ({'col1' : range (10 ), 'col2' : ['a' ] * 10 })
512- second_column_weight = [0 , 1 ]
513- assert_frame_equal (
514- df .sample (n = 1 , axis = 1 , weights = second_column_weight ), df [['col2' ]])
515-
516- # Different axis arg types
517- assert_frame_equal (df .sample (n = 1 , axis = 'columns' ,
518- weights = second_column_weight ),
519- df [['col2' ]])
520-
521- weight = [0 ] * 10
522- weight [5 ] = 0.5
523- assert_frame_equal (df .sample (n = 1 , axis = 'rows' , weights = weight ),
524- df .iloc [5 :6 ])
525- assert_frame_equal (df .sample (n = 1 , axis = 'index' , weights = weight ),
526- df .iloc [5 :6 ])
527-
528- # Check out of range axis values
529- with tm .assertRaises (ValueError ):
530- df .sample (n = 1 , axis = 2 )
531-
532- with tm .assertRaises (ValueError ):
533- df .sample (n = 1 , axis = 'not_a_name' )
534-
535- with tm .assertRaises (ValueError ):
536- s = pd .Series (range (10 ))
537- s .sample (n = 1 , axis = 1 )
538-
539- # Test weight length compared to correct axis
540- with tm .assertRaises (ValueError ):
541- df .sample (n = 1 , axis = 1 , weights = [0.5 ] * 10 )
542-
543- # Check weights with axis = 1
544- easy_weight_list = [0 ] * 3
545- easy_weight_list [2 ] = 1
546-
547- df = pd .DataFrame ({'col1' : range (10 , 20 ),
548- 'col2' : range (20 , 30 ),
549- 'colString' : ['a' ] * 10 })
550- sample1 = df .sample (n = 1 , axis = 1 , weights = easy_weight_list )
551- assert_frame_equal (sample1 , df [['colString' ]])
552-
553- # Test default axes
554- p = pd .Panel (items = ['a' , 'b' , 'c' ], major_axis = [2 , 4 , 6 ],
555- minor_axis = [1 , 3 , 5 ])
556- assert_panel_equal (
557- p .sample (n = 3 , random_state = 42 ), p .sample (n = 3 , axis = 1 ,
558- random_state = 42 ))
559- assert_frame_equal (
560- df .sample (n = 3 , random_state = 42 ), df .sample (n = 3 , axis = 0 ,
561- random_state = 42 ))
562-
563- # Test that function aligns weights with frame
564- df = DataFrame (
565- {'col1' : [5 , 6 , 7 ],
566- 'col2' : ['a' , 'b' , 'c' ], }, index = [9 , 5 , 3 ])
567- s = Series ([1 , 0 , 0 ], index = [3 , 5 , 9 ])
568- assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s ))
569-
570- # Weights have index values to be dropped because not in
571- # sampled DataFrame
572- s2 = Series ([0.001 , 0 , 10000 ], index = [3 , 5 , 10 ])
573- assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s2 ))
574-
575- # Weights have empty values to be filed with zeros
576- s3 = Series ([0.01 , 0 ], index = [3 , 5 ])
577- assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s3 ))
578-
579- # No overlap in weight and sampled DataFrame indices
580- s4 = Series ([1 , 0 ], index = [1 , 2 ])
581- with tm .assertRaises (ValueError ):
582- df .sample (1 , weights = s4 )
583-
584471 def test_size_compat (self ):
585472 # GH8846
586473 # size property should be defined
@@ -1963,6 +1850,9 @@ class TestPanel4D(tm.TestCase, Generic):
19631850 _typ = Panel4D
19641851 _comparator = lambda self , x , y : assert_panel4d_equal (x , y )
19651852
1853+ def test_sample (self ):
1854+ raise nose .SkipTest ("sample on Panel4D" )
1855+
19661856 def test_to_xarray (self ):
19671857
19681858 tm ._skip_if_no_xarray ()
@@ -1984,6 +1874,123 @@ def test_to_xarray(self):
19841874class TestNDFrame (tm .TestCase ):
19851875 # tests that don't fit elsewhere
19861876
1877+ def test_sample (sel ):
1878+ # Fixes issue: 2419
1879+ # additional specific object based tests
1880+
1881+ # A few dataframe test with degenerate weights.
1882+ easy_weight_list = [0 ] * 10
1883+ easy_weight_list [5 ] = 1
1884+
1885+ df = pd .DataFrame ({'col1' : range (10 , 20 ),
1886+ 'col2' : range (20 , 30 ),
1887+ 'colString' : ['a' ] * 10 ,
1888+ 'easyweights' : easy_weight_list })
1889+ sample1 = df .sample (n = 1 , weights = 'easyweights' )
1890+ assert_frame_equal (sample1 , df .iloc [5 :6 ])
1891+
1892+ # Ensure proper error if string given as weight for Series, panel, or
1893+ # DataFrame with axis = 1.
1894+ s = Series (range (10 ))
1895+ with tm .assertRaises (ValueError ):
1896+ s .sample (n = 3 , weights = 'weight_column' )
1897+
1898+ panel = pd .Panel (items = [0 , 1 , 2 ], major_axis = [2 , 3 , 4 ],
1899+ minor_axis = [3 , 4 , 5 ])
1900+ with tm .assertRaises (ValueError ):
1901+ panel .sample (n = 1 , weights = 'weight_column' )
1902+
1903+ with tm .assertRaises (ValueError ):
1904+ df .sample (n = 1 , weights = 'weight_column' , axis = 1 )
1905+
1906+ # Check weighting key error
1907+ with tm .assertRaises (KeyError ):
1908+ df .sample (n = 3 , weights = 'not_a_real_column_name' )
1909+
1910+ # Check that re-normalizes weights that don't sum to one.
1911+ weights_less_than_1 = [0 ] * 10
1912+ weights_less_than_1 [0 ] = 0.5
1913+ tm .assert_frame_equal (
1914+ df .sample (n = 1 , weights = weights_less_than_1 ), df .iloc [:1 ])
1915+
1916+ ###
1917+ # Test axis argument
1918+ ###
1919+
1920+ # Test axis argument
1921+ df = pd .DataFrame ({'col1' : range (10 ), 'col2' : ['a' ] * 10 })
1922+ second_column_weight = [0 , 1 ]
1923+ assert_frame_equal (
1924+ df .sample (n = 1 , axis = 1 , weights = second_column_weight ), df [['col2' ]])
1925+
1926+ # Different axis arg types
1927+ assert_frame_equal (df .sample (n = 1 , axis = 'columns' ,
1928+ weights = second_column_weight ),
1929+ df [['col2' ]])
1930+
1931+ weight = [0 ] * 10
1932+ weight [5 ] = 0.5
1933+ assert_frame_equal (df .sample (n = 1 , axis = 'rows' , weights = weight ),
1934+ df .iloc [5 :6 ])
1935+ assert_frame_equal (df .sample (n = 1 , axis = 'index' , weights = weight ),
1936+ df .iloc [5 :6 ])
1937+
1938+ # Check out of range axis values
1939+ with tm .assertRaises (ValueError ):
1940+ df .sample (n = 1 , axis = 2 )
1941+
1942+ with tm .assertRaises (ValueError ):
1943+ df .sample (n = 1 , axis = 'not_a_name' )
1944+
1945+ with tm .assertRaises (ValueError ):
1946+ s = pd .Series (range (10 ))
1947+ s .sample (n = 1 , axis = 1 )
1948+
1949+ # Test weight length compared to correct axis
1950+ with tm .assertRaises (ValueError ):
1951+ df .sample (n = 1 , axis = 1 , weights = [0.5 ] * 10 )
1952+
1953+ # Check weights with axis = 1
1954+ easy_weight_list = [0 ] * 3
1955+ easy_weight_list [2 ] = 1
1956+
1957+ df = pd .DataFrame ({'col1' : range (10 , 20 ),
1958+ 'col2' : range (20 , 30 ),
1959+ 'colString' : ['a' ] * 10 })
1960+ sample1 = df .sample (n = 1 , axis = 1 , weights = easy_weight_list )
1961+ assert_frame_equal (sample1 , df [['colString' ]])
1962+
1963+ # Test default axes
1964+ p = pd .Panel (items = ['a' , 'b' , 'c' ], major_axis = [2 , 4 , 6 ],
1965+ minor_axis = [1 , 3 , 5 ])
1966+ assert_panel_equal (
1967+ p .sample (n = 3 , random_state = 42 ), p .sample (n = 3 , axis = 1 ,
1968+ random_state = 42 ))
1969+ assert_frame_equal (
1970+ df .sample (n = 3 , random_state = 42 ), df .sample (n = 3 , axis = 0 ,
1971+ random_state = 42 ))
1972+
1973+ # Test that function aligns weights with frame
1974+ df = DataFrame (
1975+ {'col1' : [5 , 6 , 7 ],
1976+ 'col2' : ['a' , 'b' , 'c' ], }, index = [9 , 5 , 3 ])
1977+ s = Series ([1 , 0 , 0 ], index = [3 , 5 , 9 ])
1978+ assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s ))
1979+
1980+ # Weights have index values to be dropped because not in
1981+ # sampled DataFrame
1982+ s2 = Series ([0.001 , 0 , 10000 ], index = [3 , 5 , 10 ])
1983+ assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s2 ))
1984+
1985+ # Weights have empty values to be filed with zeros
1986+ s3 = Series ([0.01 , 0 ], index = [3 , 5 ])
1987+ assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s3 ))
1988+
1989+ # No overlap in weight and sampled DataFrame indices
1990+ s4 = Series ([1 , 0 ], index = [1 , 2 ])
1991+ with tm .assertRaises (ValueError ):
1992+ df .sample (1 , weights = s4 )
1993+
19871994 def test_squeeze (self ):
19881995 # noop
19891996 for s in [tm .makeFloatSeries (), tm .makeStringSeries (),
0 commit comments