Skip to content

Commit

Permalink
Merge branch 'master' of github.com:medoidai/skrobot
Browse files Browse the repository at this point in the history
  • Loading branch information
echatzikyriakidis committed Jan 10, 2021
2 parents b93e314 + 15e7997 commit c935c8f
Show file tree
Hide file tree
Showing 56 changed files with 60 additions and 20 deletions.
4 changes: 2 additions & 2 deletions docs/source/how_do_i_use_it.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ The following example has generated the following `results <https://github.com/m
random_seed=random_seed))
# Run Prediction Task
new_data_set = ft.calculate_feature_matrix(feature_defs, entities={ "passengers" : (new_raw_data_set, id_column, None, variable_types) }, relationships=())
new_data_set = ft.calculate_feature_matrix(feature_defs, entities={ "passengers" : (new_raw_data_set, id_column, None, variable_types) })
new_data_set.reset_index(inplace=True)
Expand Down Expand Up @@ -290,4 +290,4 @@ The following example has generated the following `results <https://github.com/m
print(train_results['estimator'])
print(predictions)
print(predictions)
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
random_seed=random_seed))

# Run Prediction Task
new_data_set = ft.calculate_feature_matrix(feature_defs, entities={ "passengers" : (new_raw_data_set, id_column, None, variable_types) }, relationships=())
new_data_set = ft.calculate_feature_matrix(feature_defs, entities={ "passengers" : (new_raw_data_set, id_column, None, variable_types) })

new_data_set.reset_index(inplace=True)

Expand Down Expand Up @@ -171,4 +171,4 @@

print(train_results['estimator'])

print(predictions)
print(predictions)

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,108 +1,129 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>feature_id</th>
<th>feature_name</th>
<th>feature_type</th>
<th>feature_description</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>session_id</td>
<td>int64</td>
<td>The "session_id".</td>
</tr>
<tr>
<td>1</td>
<td>product_id</td>
<td>category</td>
<td>The "product_id".</td>
</tr>
<tr>
<td>2</td>
<td>DAY(transaction_time)</td>
<td>int64</td>
<td>The day of the month of the "transaction_time".</td>
</tr>
<tr>
<td>3</td>
<td>MONTH(transaction_time)</td>
<td>int64</td>
<td>The month of the "transaction_time".</td>
</tr>
<tr>
<td>4</td>
<td>WEEKDAY(transaction_time)</td>
<td>int64</td>
<td>The day of the week of the "transaction_time".</td>
</tr>
<tr>
<td>5</td>
<td>YEAR(transaction_time)</td>
<td>int64</td>
<td>The year of the "transaction_time".</td>
</tr>
<tr>
<td>6</td>
<td>sessions.customer_id</td>
<td>int64</td>
<td>The "customer_id" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>7</td>
<td>sessions.device</td>
<td>object</td>
<td>The "device" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>8</td>
<td>products.brand</td>
<td>object</td>
<td>The "brand" for the instance of "products" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>9</td>
<td>sessions.COUNT(transactions)</td>
<td>int64</td>
<td>The number of all instances of "transactions" for each "session_id" in "sessions" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>10</td>
<td>sessions.MODE(transactions.product_id)</td>
<td>int64</td>
<td>The most frequently occurring value of the "product_id" of all instances of "transactions" for each "session_id" in "sessions" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>11</td>
<td>sessions.NUM_UNIQUE(transactions.product_id)</td>
<td>int64</td>
<td>The number of unique elements in the "product_id" of all instances of "transactions" for each "session_id" in "sessions" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>12</td>
<td>sessions.DAY(session_start)</td>
<td>int64</td>
<td>The day of the month of the "session_start" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>13</td>
<td>sessions.MONTH(session_start)</td>
<td>int64</td>
<td>The month of the "session_start" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>14</td>
<td>sessions.WEEKDAY(session_start)</td>
<td>int64</td>
<td>The day of the week of the "session_start" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>15</td>
<td>sessions.YEAR(session_start)</td>
<td>int64</td>
<td>The year of the "session_start" for the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>16</td>
<td>sessions.customers.zip_code</td>
<td>object</td>
<td>The "zip_code" for the instance of "customers" associated with the instance of "sessions" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>17</td>
<td>products.COUNT(transactions)</td>
<td>int64</td>
<td>The number of all instances of "transactions" for each "product_id" in "products" for the instance of "products" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>18</td>
<td>products.MODE(transactions.session_id)</td>
<td>int64</td>
<td>The most frequently occurring value of the "session_id" of all instances of "transactions" for each "product_id" in "products" for the instance of "products" associated with this instance of "transactions".</td>
</tr>
<tr>
<td>19</td>
<td>products.NUM_UNIQUE(transactions.session_id)</td>
<td>int64</td>
<td>The number of unique elements in the "session_id" of all instances of "transactions" for each "product_id" in "products" for the instance of "products" associated with this instance of "transactions".</td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
random_seed=random_seed))

# Run Prediction Task
new_data_set = ft.calculate_feature_matrix(feature_defs, entities={ "passengers" : (new_raw_data_set, id_column, None, variable_types) }, relationships=())
new_data_set = ft.calculate_feature_matrix(feature_defs, entities={ "passengers" : (new_raw_data_set, id_column, None, variable_types) })

new_data_set.reset_index(inplace=True)

Expand Down Expand Up @@ -171,4 +171,4 @@

print(train_results['estimator'])

print(predictions)
print(predictions)

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Original file line number Diff line number Diff line change
@@ -1,103 +1,123 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>feature_id</th>
<th>feature_name</th>
<th>feature_type</th>
<th>feature_description</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Age</td>
<td>float64</td>
<td>The "Age".</td>
</tr>
<tr>
<td>1</td>
<td>Fare</td>
<td>float64</td>
<td>The "Fare".</td>
</tr>
<tr>
<td>2</td>
<td>SibSp</td>
<td>int64</td>
<td>The "SibSp".</td>
</tr>
<tr>
<td>3</td>
<td>Parch</td>
<td>int64</td>
<td>The "Parch".</td>
</tr>
<tr>
<td>4</td>
<td>Embarked</td>
<td>object</td>
<td>The "Embarked".</td>
</tr>
<tr>
<td>5</td>
<td>Sex</td>
<td>object</td>
<td>The "Sex".</td>
</tr>
<tr>
<td>6</td>
<td>Pclass</td>
<td>int64</td>
<td>The "Pclass".</td>
</tr>
<tr>
<td>7</td>
<td>Age + Fare</td>
<td>float64</td>
<td>The sum of the "Age" and the "Fare".</td>
</tr>
<tr>
<td>8</td>
<td>Age + Parch</td>
<td>float64</td>
<td>The sum of the "Age" and the "Parch".</td>
</tr>
<tr>
<td>9</td>
<td>Age + SibSp</td>
<td>float64</td>
<td>The sum of the "Age" and the "SibSp".</td>
</tr>
<tr>
<td>10</td>
<td>Fare + Parch</td>
<td>float64</td>
<td>The sum of the "Fare" and the "Parch".</td>
</tr>
<tr>
<td>11</td>
<td>Fare + SibSp</td>
<td>float64</td>
<td>The sum of the "Fare" and the "SibSp".</td>
</tr>
<tr>
<td>12</td>
<td>Parch + SibSp</td>
<td>int64</td>
<td>The sum of the "Parch" and the "SibSp".</td>
</tr>
<tr>
<td>13</td>
<td>Age * Fare</td>
<td>float64</td>
<td>The product of the "Age" and the "Fare".</td>
</tr>
<tr>
<td>14</td>
<td>Age * Parch</td>
<td>float64</td>
<td>The product of the "Age" and the "Parch".</td>
</tr>
<tr>
<td>15</td>
<td>Age * SibSp</td>
<td>float64</td>
<td>The product of the "Age" and the "SibSp".</td>
</tr>
<tr>
<td>16</td>
<td>Fare * Parch</td>
<td>float64</td>
<td>The product of the "Fare" and the "Parch".</td>
</tr>
<tr>
<td>17</td>
<td>Fare * SibSp</td>
<td>float64</td>
<td>The product of the "Fare" and the "SibSp".</td>
</tr>
<tr>
<td>18</td>
<td>Parch * SibSp</td>
<td>int64</td>
<td>The product of the "Parch" and the "SibSp".</td>
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
long_description_content_type='text/markdown',
url="https://github.com/medoidai/skrobot",
python_requires='>=3.6',
install_requires=['featuretools==0.22.0',
install_requires=['featuretools==0.23.0',
'joblib==1.0.0',
'matplotlib==3.3.3',
'numpy==1.19.4',
Expand All @@ -45,4 +45,4 @@
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Environment :: Console"]
)
)
19 changes: 9 additions & 10 deletions skrobot/tasks/deep_feature_synthesis_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,21 +128,20 @@ def run(self, output_directory):

feature_defs = [ o for o in feature_defs if o.get_name() != self.label_column and o.get_name() not in label_related_columns_to_drop ]

if self.export_feature_graphs:
for feature_def in feature_defs:
ft.graph_feature(feature_def, to_file=os.path.join(output_directory, 'feature_graphs', f'{feature_def.get_name()}.png'), description=True)
features_df = pd.DataFrame({ 'feature_id' : range(len(feature_defs)), 'feature_def' : feature_defs })

features_df['feature_name'] = features_df['feature_def'].apply(lambda o: o.get_name())
features_df['feature_type'] = features_df['feature_def'].apply(lambda o: synthesized_dataset.dtypes[o.get_name()])

features = { 'feature_name': [], 'feature_type': [], 'feature_description': [] }
if self.export_feature_graphs:
features_df.apply(lambda o: ft.graph_feature(o['feature_def'], to_file=os.path.join(output_directory, 'feature_graphs', f'{o["feature_id"]}.png'), description=True), axis=1)

if self.export_feature_information:
for feature_def in feature_defs:
feature_name = feature_def.get_name()
features_df['feature_description'] = features_df['feature_def'].apply(lambda o: ft.describe_feature(o))

features['feature_name'].append(feature_name)
features['feature_type'].append(synthesized_dataset.dtypes[feature_name])
features['feature_description'].append(ft.describe_feature(feature_def))
features_df.drop(columns=['feature_def'], inplace=True)

pd.DataFrame(features).to_html(os.path.join(output_directory, 'feature_information.html'), index=False)
features_df.to_html(os.path.join(output_directory, 'feature_information.html'), index=False)

synthesized_dataset.reset_index(inplace=True)

Expand Down
Binary file modified static/image-28.png
Binary file modified static/image-31.png

0 comments on commit c935c8f

Please sign in to comment.