diff --git a/odo/backends/pandas.py b/odo/backends/pandas.py index feceecff..493e5468 100644 --- a/odo/backends/pandas.py +++ b/odo/backends/pandas.py @@ -14,7 +14,11 @@ possibly_missing = frozenset({string, datetime_}) -categorical = type(pd.Categorical.dtype) +try: + from pandas.api.types import CategoricalDtype as categorical +except ImportError: + categorical = type(pd.Categorical.dtype) + assert categorical is not property def dshape_from_pandas(col): diff --git a/odo/backends/sql.py b/odo/backends/sql.py index eb59d6ac..801ca667 100644 --- a/odo/backends/sql.py +++ b/odo/backends/sql.py @@ -88,7 +88,7 @@ # mssql.TIMESTAMP and sa.TIMESTAMP. # At the time of this writing, (mssql.TIMESTAMP == sa.TIMESTAMP) is True, # which causes a collision when defining the revtypes mappings. -# +# # See: # https://bitbucket.org/zzzeek/sqlalchemy/issues/4092/type-problem-with-mssqltimestamp class MSSQLTimestamp(mssql.TIMESTAMP): @@ -207,7 +207,7 @@ def rowiterator(sel, chunksize=chunksize): yield rows else: return - + columns = [col.name for col in sel.columns] iterator = rowiterator(sel) return columns, concat(iterator) @@ -794,13 +794,14 @@ def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs): for field, dtype in fields: if isinstance(dtype, Option): ty = dtype.ty - if ty in datashape.integral: - dtypes[field] = 'float64' + try: + dtypes[field] = ty.to_numpy_dtype() + except TypeError: + dtypes[field] = np.dtype(object) else: - try: - dtypes[field] = ty.to_numpy_dtype() - except TypeError: - dtypes[field] = np.dtype(object) + if np.issubdtype(dtypes[field], np.integer): + # cast nullable ints to float64 so NaN can be used for nulls + dtypes[field] = np.float64 else: try: dtypes[field] = dtype.to_numpy_dtype() diff --git a/odo/backends/tests/test_bokeh.py b/odo/backends/tests/test_bokeh.py index 2c3d4983..fa8c7186 100644 --- a/odo/backends/tests/test_bokeh.py +++ b/odo/backends/tests/test_bokeh.py @@ -15,9 +15,8 @@ def test_convert_dataframe_to_cds(): cds = convert(ColumnDataSource, df) - assert cds.data == {'name': ['Alice', 'Bob', 'Charlie'], - 'balance': [100, 200, 300]} - + assert list(cds.data['name']) == ['Alice', 'Bob', 'Charlie'] + assert list(cds.data['balance']) == [100, 200, 300] df2 = convert(pd.DataFrame, cds) assert isinstance(df2, pd.DataFrame) diff --git a/odo/backends/tests/test_sql.py b/odo/backends/tests/test_sql.py index 8aef4936..59ce81a4 100644 --- a/odo/backends/tests/test_sql.py +++ b/odo/backends/tests/test_sql.py @@ -911,3 +911,32 @@ def test_transaction(): # now the data should appear outside the transaction assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == data + + +def test_integer_detection(): + """Test for PR #596""" + engine = sa.create_engine('sqlite://') + metadata = sa.MetaData(bind=engine) + + T = sa.Table( + 'Demo', metadata, + sa.Column('pkid', sa.Integer, primary_key=True), + sa.Column( + 'value', + sa.DECIMAL(precision=1, scale=0, asdecimal=False), + nullable=True + ), + ) + metadata.create_all() + + values = [1, 0, 1, 0, None, 1, 1, 1, 0, 1] + pkids = range(len(values)) + dtype = [('pkid', np.int32), ('value', np.float64)] + expected = np.array(list(zip(pkids, values)), dtype=dtype) + expected = pd.DataFrame(expected) + records = expected.to_dict(orient='records') + with engine.connect() as conn: + conn.execute(T.insert(), records) + + actual = odo(T, pd.DataFrame) + assert actual.equals(expected)