{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Using Feat's archive\n", "\n", "Feat optimizes a population of models. \n", "At the end of the run, it can be useful to explore this population to find a trade-off between objectives, \n", "such as performance and complexity. \n", "\n", "In this example, we apply Feat to a regression problem and visualize the archive of representations. \n", "\n", "Note: this code uses the Penn ML Benchmark Suite (https://github.com/EpistasisLab/penn-ml-benchmarks/) to fetch data. You can install it using `pip install pmlb`.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, we import the data and create a train-test split." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pmlb import fetch_data\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_squared_error as mse\n", "import numpy as np\n", "# fix the random state\n", "random_state=42\n", "dataset='690_visualizing_galaxy'\n", "X, y = fetch_data(dataset,return_X_y=True)\n", "X_t,X_v, y_t, y_v = train_test_split(X,y,train_size=0.75,test_size=0.25,random_state=random_state)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then we set up a Feat instance and train the model, storing the final archive." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FEAT version: 0.5.2.post75\n" ] }, { "data": { "text/html": [ "
FeatRegressor(backprop=False, batch_size=0, classification=False,\n", " corr_delete_mutate=False, cross_rate=0.5, erc=False, fb=0.5,\n", " feature_names='',\n", " functions=['+', '-', '*', '/', '^2', '^3', 'sqrt', 'sin', 'cos',\n", " 'exp', 'log', '^', 'logit', 'tanh', 'gauss', 'relu',\n", " 'split', 'split_c', 'b2f', 'c2f', 'and', 'or', 'not',\n", " 'xor', '=', '<', '<=', '>', '>=', 'if', ...],\n", " gens=100, hillclimb=True, iters=10, logfile='', lr=0.1,\n", " max_depth=2, max_dim=5, max_stall=0, max_time=60,\n", " ml='LinearRidgeRegression', n_jobs=4, normalize=True,\n", " objectives=['fitness', 'complexity'], otype='a', pop_size=100,\n", " protected_groups='', random_state=42, residual_xo=False,\n", " root_xo_rate=0.5, save_pop=0, scorer='', ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
FeatRegressor(backprop=False, batch_size=0, classification=False,\n", " corr_delete_mutate=False, cross_rate=0.5, erc=False, fb=0.5,\n", " feature_names='',\n", " functions=['+', '-', '*', '/', '^2', '^3', 'sqrt', 'sin', 'cos',\n", " 'exp', 'log', '^', 'logit', 'tanh', 'gauss', 'relu',\n", " 'split', 'split_c', 'b2f', 'c2f', 'and', 'or', 'not',\n", " 'xor', '=', '<', '<=', '>', '>=', 'if', ...],\n", " gens=100, hillclimb=True, iters=10, logfile='', lr=0.1,\n", " max_depth=2, max_dim=5, max_stall=0, max_time=60,\n", " ml='LinearRidgeRegression', n_jobs=4, normalize=True,\n", " objectives=['fitness', 'complexity'], otype='a', pop_size=100,\n", " protected_groups='', random_state=42, residual_xo=False,\n", " root_xo_rate=0.5, save_pop=0, scorer='', ...)
FeatRegressor(backprop=False, batch_size=0, classification=False,\n", " corr_delete_mutate=False, cross_rate=0.5, erc=False, fb=0.5,\n", " feature_names='',\n", " functions=['+', '-', '*', '/', '^2', '^3', 'sqrt', 'sin', 'cos',\n", " 'exp', 'log', '^', 'logit', 'tanh', 'gauss', 'relu',\n", " 'split', 'split_c', 'b2f', 'c2f', 'and', 'or', 'not',\n", " 'xor', '=', '<', '<=', '>', '>=', 'if', ...],\n", " gens=100, hillclimb=True, iters=10, logfile='', lr=0.1,\n", " max_depth=2, max_dim=5, max_stall=0, max_time=60,\n", " ml='LinearRidgeRegression', n_jobs=4, normalize=True,\n", " objectives=['fitness', 'complexity'], otype='a', pop_size=100,\n", " protected_groups='', random_state=42, residual_xo=False,\n", " root_xo_rate=0.5, save_pop=0, scorer='', ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
FeatRegressor(backprop=False, batch_size=0, classification=False,\n", " corr_delete_mutate=False, cross_rate=0.5, erc=False, fb=0.5,\n", " feature_names='',\n", " functions=['+', '-', '*', '/', '^2', '^3', 'sqrt', 'sin', 'cos',\n", " 'exp', 'log', '^', 'logit', 'tanh', 'gauss', 'relu',\n", " 'split', 'split_c', 'b2f', 'c2f', 'and', 'or', 'not',\n", " 'xor', '=', '<', '<=', '>', '>=', 'if', ...],\n", " gens=100, hillclimb=True, iters=10, logfile='', lr=0.1,\n", " max_depth=2, max_dim=5, max_stall=0, max_time=60,\n", " ml='LinearRidgeRegression', n_jobs=4, normalize=True,\n", " objectives=['fitness', 'complexity'], otype='a', pop_size=100,\n", " protected_groups='', random_state=42, residual_xo=False,\n", " root_xo_rate=0.5, save_pop=0, scorer='', ...)