Merge pull request #596 from dhirschfeld/detect-integral-dtype

Fix detection of integral dtypes
blaze · Jan 11, 2018 · e62822a · e62822a
2 parents ba84238 + c8bc170
commit e62822a
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 12 deletions.
diff --git a/odo/backends/pandas.py b/odo/backends/pandas.py
@@ -14,7 +14,11 @@
 
 
 possibly_missing = frozenset({string, datetime_})
-categorical = type(pd.Categorical.dtype)
+try:
+    from pandas.api.types import CategoricalDtype as categorical
+except ImportError:
+    categorical = type(pd.Categorical.dtype)
+    assert categorical is not property
 
 
 def dshape_from_pandas(col):

diff --git a/odo/backends/sql.py b/odo/backends/sql.py
@@ -88,7 +88,7 @@
 # mssql.TIMESTAMP and sa.TIMESTAMP.
 # At the time of this writing, (mssql.TIMESTAMP == sa.TIMESTAMP) is True,
 # which causes a collision when defining the revtypes mappings.
-# 
+#
 # See:
 # https://bitbucket.org/zzzeek/sqlalchemy/issues/4092/type-problem-with-mssqltimestamp
 class MSSQLTimestamp(mssql.TIMESTAMP):
@@ -207,7 +207,7 @@ def rowiterator(sel, chunksize=chunksize):
                     yield rows
                 else:
                     return
-    
+
     columns = [col.name for col in sel.columns]
     iterator = rowiterator(sel)
     return columns, concat(iterator)
@@ -794,13 +794,14 @@ def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs):
     for field, dtype in fields:
         if isinstance(dtype, Option):
             ty = dtype.ty
-            if ty in datashape.integral:
-                dtypes[field] = 'float64'
+            try:
+                dtypes[field] = ty.to_numpy_dtype()
+            except TypeError:
+                dtypes[field] = np.dtype(object)
             else:
-                try:
-                    dtypes[field] = ty.to_numpy_dtype()
-                except TypeError:
-                    dtypes[field] = np.dtype(object)
+                if np.issubdtype(dtypes[field], np.integer):
+                    # cast nullable ints to float64 so NaN can be used for nulls
+                    dtypes[field] = np.float64
         else:
             try:
                 dtypes[field] = dtype.to_numpy_dtype()

diff --git a/odo/backends/tests/test_bokeh.py b/odo/backends/tests/test_bokeh.py
@@ -15,9 +15,8 @@
 
 def test_convert_dataframe_to_cds():
     cds = convert(ColumnDataSource, df)
-    assert cds.data == {'name': ['Alice', 'Bob', 'Charlie'],
-                        'balance': [100, 200, 300]}
-
+    assert list(cds.data['name']) == ['Alice', 'Bob', 'Charlie']
+    assert list(cds.data['balance']) == [100, 200, 300]
     df2 = convert(pd.DataFrame, cds)
     assert isinstance(df2, pd.DataFrame)
 

diff --git a/odo/backends/tests/test_sql.py b/odo/backends/tests/test_sql.py
@@ -911,3 +911,32 @@ def test_transaction():
 
     # now the data should appear outside the transaction
     assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == data
+
+
+def test_integer_detection():
+    """Test for PR #596"""
+    engine = sa.create_engine('sqlite://')
+    metadata = sa.MetaData(bind=engine)
+
+    T = sa.Table(
+        'Demo', metadata,
+        sa.Column('pkid', sa.Integer, primary_key=True),
+        sa.Column(
+            'value',
+            sa.DECIMAL(precision=1, scale=0, asdecimal=False),
+            nullable=True
+        ),
+    )
+    metadata.create_all()
+
+    values =  [1, 0, 1, 0, None, 1, 1, 1, 0, 1]
+    pkids = range(len(values))
+    dtype = [('pkid', np.int32), ('value', np.float64)]
+    expected = np.array(list(zip(pkids, values)), dtype=dtype)
+    expected = pd.DataFrame(expected)
+    records = expected.to_dict(orient='records')
+    with engine.connect() as conn:
+        conn.execute(T.insert(), records)
+
+    actual = odo(T, pd.DataFrame)
+    assert actual.equals(expected)