Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
Eden Wu committed Jul 19, 2024
1 parent 8e13f02 commit a00caec
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 13 deletions.
4 changes: 3 additions & 1 deletion alpha_automl/automl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from alpha_automl.automl_manager import AutoMLManager
from alpha_automl.scorer import make_scorer, make_splitter, make_str_metric, get_sign_sorting
from alpha_automl.utils import make_d3m_pipelines, hide_logs, get_start_method, check_input_for_multiprocessing, \
setup_output_folder, SemiSupervisedSplitter, SemiSupervisedLabelEncoder, write_pipeline_code_as_pyfile
setup_output_folder, SemiSupervisedSplitter, SemiSupervisedLabelEncoder, write_pipeline_code_as_pyfile, sample_dataset
from alpha_automl.visualization import plot_comparison_pipelines
from alpha_automl.pipeline_serializer import PipelineSerializer

Expand Down Expand Up @@ -112,6 +112,8 @@ def fit(self, X, y):
self.leaderboard = pd.DataFrame(leaderboard_data, columns=['ranking', 'pipeline', self.metric])

best_pipeline_id = PIPELINE_PREFIX + '1'

X, y, _ = sample_dataset(X, y, 2000000, self.task_type)
self._fit(X, y, best_pipeline_id)

def predict(self, X):
Expand Down
2 changes: 1 addition & 1 deletion alpha_automl/automl_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
INCLUDE_PRIMITIVES = []
NEW_PRIMITIVES = {}
SPLITTING_STRATEGY = 'holdout'
SAMPLE_SIZE = 2000
SAMPLE_SIZE = 100000
MAX_RUNNING_PROCESSES = multiprocessing.cpu_count()

logger = logging.getLogger(__name__)
Expand Down
1 change: 1 addition & 0 deletions alpha_automl/pipeline_search/agent_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def dump_result_to_json(primitives, task_start, score, output_folder=None):
if primitives in data.values():
return
data[score] = primitives
logger.info(f"{timestamp}-Pipeline found with score {score}: \n {primitives}")

# Write unique elements to output file
with open(output_path, "w") as f:
Expand Down
2 changes: 1 addition & 1 deletion alpha_automl/resource/base_grammar.bnf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
S -> CLASSIFICATION_TASK | REGRESSION_TASK | CLUSTERING_TASK | TIME_SERIES_FORECAST_TASK | SEMISUPERVISED_TASK | NA_TASK
CLASSIFICATION_TASK -> IMPUTER ENCODERS FEATURE_GENERATOR FEATURE_SCALER FEATURE_SELECTOR CLASSIFIER CLASSIFICATION_ENSEMBLER
CLASSIFICATION_TASK -> IMPUTER ENCODERS FEATURE_GENERATOR FEATURE_SCALER FEATURE_SELECTOR CLASSIFIER
REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_GENERATOR FEATURE_SCALER FEATURE_SELECTOR REGRESSOR REGRESSION_ENSEMBLER
CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_GENERATOR FEATURE_SCALER FEATURE_SELECTOR CLUSTERER
TIME_SERIES_FORECAST_TASK -> IMPUTER TIME_SERIES_FORECASTER | REGRESSION_TASK
Expand Down
10 changes: 0 additions & 10 deletions alpha_automl/resource/primitives_hierarchy.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
"alpha_automl.builtin_primitives.image_encoder.HogTransformer"
],
"FEATURE_GENERATOR": [
"alpha_automl.wrapper_primitives.llm_feature_engine.LLMFeatureGenerator",
"feature_engine.creation.math_features.MathFeatures-sum",
"feature_engine.creation.math_features.MathFeatures-mean",
"feature_engine.creation.math_features.MathFeatures-prod",
Expand Down Expand Up @@ -56,21 +55,12 @@
"sklearn.ensemble.VotingRegressor"
],
"CLASSIFIER": [
"sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
"sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis",
"sklearn.ensemble.ExtraTreesClassifier",
"sklearn.ensemble.GradientBoostingClassifier",
"sklearn.ensemble.RandomForestClassifier",
"sklearn.naive_bayes.BernoulliNB",
"sklearn.naive_bayes.GaussianNB",
"sklearn.naive_bayes.MultinomialNB",
"sklearn.neighbors.KNeighborsClassifier",
"sklearn.linear_model.LogisticRegression",
"sklearn.linear_model.PassiveAggressiveClassifier",
"sklearn.linear_model.SGDClassifier",
"sklearn.svm.LinearSVC",
"sklearn.svm.SVC",
"sklearn.tree.DecisionTreeClassifier",
"xgboost.XGBClassifier",
"lightgbm.LGBMClassifier",
"catboost.CatBoostClassifier"
Expand Down
8 changes: 8 additions & 0 deletions alpha_automl/wrapper_primitives/llm_feature_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ def transform(self, X, y=None):
exec(compile(parsed, filename="<ast>", mode="exec"), access_scope, loc)
return np.array(X_cp)

def transform_df(self, X, y=None):
X_cp = copy.deepcopy(X)
loc = {}
access_scope = {"df": X_cp, "pd": pd, "np": np}
parsed = ast.parse(self.code)
exec(compile(parsed, filename="<ast>", mode="exec"), access_scope, loc)
return X_cp

def get_prompt(
df, description, iterative=1, data_description_unparsed=None, samples=None, **kwargs
):
Expand Down

0 comments on commit a00caec

Please sign in to comment.