Skip to content

Commit

Permalink
Merge pull request #596 from dhirschfeld/detect-integral-dtype
Browse files Browse the repository at this point in the history
Fix detection of integral dtypes
  • Loading branch information
ehebert authored Jan 11, 2018
2 parents ba84238 + c8bc170 commit e62822a
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 12 deletions.
6 changes: 5 additions & 1 deletion odo/backends/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@


possibly_missing = frozenset({string, datetime_})
categorical = type(pd.Categorical.dtype)
try:
from pandas.api.types import CategoricalDtype as categorical
except ImportError:
categorical = type(pd.Categorical.dtype)
assert categorical is not property


def dshape_from_pandas(col):
Expand Down
17 changes: 9 additions & 8 deletions odo/backends/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# mssql.TIMESTAMP and sa.TIMESTAMP.
# At the time of this writing, (mssql.TIMESTAMP == sa.TIMESTAMP) is True,
# which causes a collision when defining the revtypes mappings.
#
#
# See:
# https://bitbucket.org/zzzeek/sqlalchemy/issues/4092/type-problem-with-mssqltimestamp
class MSSQLTimestamp(mssql.TIMESTAMP):
Expand Down Expand Up @@ -207,7 +207,7 @@ def rowiterator(sel, chunksize=chunksize):
yield rows
else:
return

columns = [col.name for col in sel.columns]
iterator = rowiterator(sel)
return columns, concat(iterator)
Expand Down Expand Up @@ -794,13 +794,14 @@ def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs):
for field, dtype in fields:
if isinstance(dtype, Option):
ty = dtype.ty
if ty in datashape.integral:
dtypes[field] = 'float64'
try:
dtypes[field] = ty.to_numpy_dtype()
except TypeError:
dtypes[field] = np.dtype(object)
else:
try:
dtypes[field] = ty.to_numpy_dtype()
except TypeError:
dtypes[field] = np.dtype(object)
if np.issubdtype(dtypes[field], np.integer):
# cast nullable ints to float64 so NaN can be used for nulls
dtypes[field] = np.float64
else:
try:
dtypes[field] = dtype.to_numpy_dtype()
Expand Down
5 changes: 2 additions & 3 deletions odo/backends/tests/test_bokeh.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@

def test_convert_dataframe_to_cds():
cds = convert(ColumnDataSource, df)
assert cds.data == {'name': ['Alice', 'Bob', 'Charlie'],
'balance': [100, 200, 300]}

assert list(cds.data['name']) == ['Alice', 'Bob', 'Charlie']
assert list(cds.data['balance']) == [100, 200, 300]
df2 = convert(pd.DataFrame, cds)
assert isinstance(df2, pd.DataFrame)

Expand Down
29 changes: 29 additions & 0 deletions odo/backends/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,3 +911,32 @@ def test_transaction():

# now the data should appear outside the transaction
assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == data


def test_integer_detection():
"""Test for PR #596"""
engine = sa.create_engine('sqlite://')
metadata = sa.MetaData(bind=engine)

T = sa.Table(
'Demo', metadata,
sa.Column('pkid', sa.Integer, primary_key=True),
sa.Column(
'value',
sa.DECIMAL(precision=1, scale=0, asdecimal=False),
nullable=True
),
)
metadata.create_all()

values = [1, 0, 1, 0, None, 1, 1, 1, 0, 1]
pkids = range(len(values))
dtype = [('pkid', np.int32), ('value', np.float64)]
expected = np.array(list(zip(pkids, values)), dtype=dtype)
expected = pd.DataFrame(expected)
records = expected.to_dict(orient='records')
with engine.connect() as conn:
conn.execute(T.insert(), records)

actual = odo(T, pd.DataFrame)
assert actual.equals(expected)

0 comments on commit e62822a

Please sign in to comment.