1+ import sys
12cimport util
23from tslib import NaT
34from datetime import datetime, timedelta
45iNaT = util.get_nat()
56
7+ cdef bint PY2 = sys.version_info[0 ] == 2
8+
69# core.common import for fast inference checks
710def is_float (object obj ):
811 return util.is_float_object(obj)
@@ -38,10 +41,10 @@ _TYPE_MAP = {
3841 ' f' : ' floating' ,
3942 ' complex128' : ' complex' ,
4043 ' c' : ' complex' ,
41- ' string' : ' string' ,
42- ' S' : ' string' ,
43- ' unicode' : ' unicode' ,
44- ' U' : ' unicode' ,
44+ ' string' : ' string' if PY2 else ' bytes ' ,
45+ ' S' : ' string' if PY2 else ' bytes ' ,
46+ ' unicode' : ' unicode' if PY2 else ' string ' ,
47+ ' U' : ' unicode' if PY2 else ' string ' ,
4548 ' bool' : ' boolean' ,
4649 ' b' : ' boolean' ,
4750 ' datetime64[ns]' : ' datetime64' ,
@@ -181,6 +184,10 @@ def infer_dtype(object _values):
181184 if is_unicode_array(values):
182185 return ' unicode'
183186
187+ elif PyBytes_Check(val):
188+ if is_bytes_array(values):
189+ return ' bytes'
190+
184191 elif is_timedelta(val):
185192 if is_timedelta_or_timedelta64_array(values):
186193 return ' timedelta'
@@ -196,11 +203,6 @@ def infer_dtype(object _values):
196203
197204 return ' mixed'
198205
199- def infer_dtype_list (list values ):
200- cdef:
201- Py_ssize_t i, n = len (values)
202- pass
203-
204206
205207def is_possible_datetimelike_array (object arr ):
206208 # determine if we have a possible datetimelike (or null-like) array
@@ -253,7 +255,6 @@ def is_bool_array(ndarray values):
253255 cdef:
254256 Py_ssize_t i, n = len (values)
255257 ndarray[object ] objbuf
256- object obj
257258
258259 if issubclass (values.dtype.type, np.bool_):
259260 return True
@@ -277,7 +278,6 @@ def is_integer_array(ndarray values):
277278 cdef:
278279 Py_ssize_t i, n = len (values)
279280 ndarray[object ] objbuf
280- object obj
281281
282282 if issubclass (values.dtype.type, np.integer):
283283 return True
@@ -298,7 +298,6 @@ def is_integer_float_array(ndarray values):
298298 cdef:
299299 Py_ssize_t i, n = len (values)
300300 ndarray[object ] objbuf
301- object obj
302301
303302 if issubclass (values.dtype.type, np.integer):
304303 return True
@@ -321,7 +320,6 @@ def is_float_array(ndarray values):
321320 cdef:
322321 Py_ssize_t i, n = len (values)
323322 ndarray[object ] objbuf
324- object obj
325323
326324 if issubclass (values.dtype.type, np.floating):
327325 return True
@@ -342,9 +340,9 @@ def is_string_array(ndarray values):
342340 cdef:
343341 Py_ssize_t i, n = len (values)
344342 ndarray[object ] objbuf
345- object obj
346343
347- if issubclass (values.dtype.type, (np.string_, np.unicode_)):
344+ if ((PY2 and issubclass (values.dtype.type, np.string_)) or
345+ not PY2 and issubclass (values.dtype.type, np.unicode_)):
348346 return True
349347 elif values.dtype == np.object_:
350348 objbuf = values
@@ -363,7 +361,6 @@ def is_unicode_array(ndarray values):
363361 cdef:
364362 Py_ssize_t i, n = len (values)
365363 ndarray[object ] objbuf
366- object obj
367364
368365 if issubclass (values.dtype.type, np.unicode_):
369366 return True
@@ -381,8 +378,29 @@ def is_unicode_array(ndarray values):
381378 return False
382379
383380
381+ def is_bytes_array (ndarray values ):
382+ cdef:
383+ Py_ssize_t i, n = len (values)
384+ ndarray[object ] objbuf
385+
386+ if issubclass (values.dtype.type, np.bytes_):
387+ return True
388+ elif values.dtype == np.object_:
389+ objbuf = values
390+
391+ if n == 0 :
392+ return False
393+
394+ for i in range (n):
395+ if not PyBytes_Check(objbuf[i]):
396+ return False
397+ return True
398+ else :
399+ return False
400+
401+
384402def is_datetime_array (ndarray[object] values ):
385- cdef int i, null_count = 0 , n = len (values)
403+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
386404 cdef object v
387405 if n == 0 :
388406 return False
@@ -399,7 +417,7 @@ def is_datetime_array(ndarray[object] values):
399417 return null_count != n
400418
401419def is_datetime64_array (ndarray values ):
402- cdef int i, null_count = 0 , n = len (values)
420+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
403421 cdef object v
404422 if n == 0 :
405423 return False
@@ -416,7 +434,7 @@ def is_datetime64_array(ndarray values):
416434 return null_count != n
417435
418436def is_timedelta_array (ndarray values ):
419- cdef int i, null_count = 0 , n = len (values)
437+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
420438 cdef object v
421439 if n == 0 :
422440 return False
@@ -431,7 +449,7 @@ def is_timedelta_array(ndarray values):
431449 return null_count != n
432450
433451def is_timedelta64_array (ndarray values ):
434- cdef int i, null_count = 0 , n = len (values)
452+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
435453 cdef object v
436454 if n == 0 :
437455 return False
@@ -447,7 +465,7 @@ def is_timedelta64_array(ndarray values):
447465
448466def is_timedelta_or_timedelta64_array (ndarray values ):
449467 """ infer with timedeltas and/or nat/none """
450- cdef int i, null_count = 0 , n = len (values)
468+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
451469 cdef object v
452470 if n == 0 :
453471 return False
@@ -462,7 +480,7 @@ def is_timedelta_or_timedelta64_array(ndarray values):
462480 return null_count != n
463481
464482def is_date_array (ndarray[object] values ):
465- cdef int i, n = len (values)
483+ cdef Py_ssize_t i, n = len (values)
466484 if n == 0 :
467485 return False
468486 for i in range (n):
@@ -471,7 +489,7 @@ def is_date_array(ndarray[object] values):
471489 return True
472490
473491def is_time_array (ndarray[object] values ):
474- cdef int i, n = len (values)
492+ cdef Py_ssize_t i, n = len (values)
475493 if n == 0 :
476494 return False
477495 for i in range (n):
@@ -484,7 +502,7 @@ def is_period(object o):
484502 return isinstance (o,Period)
485503
486504def is_period_array (ndarray[object] values ):
487- cdef int i, n = len (values)
505+ cdef Py_ssize_t i, n = len (values)
488506 from pandas.tseries.period import Period
489507
490508 if n == 0 :
0 commit comments