Confounder Removal

           
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold

from photonai.base import Hyperpipe, PipelineElement, OutputSettings

# WE USE THE BREAST CANCER SET FROM SKLEARN
data = load_breast_cancer()
y = data.target

# now let's assume we want to regress out the effect of mean_radius and mean_texture
X = data.data[:, 2:]
mean_radius = data.data[:, 0]
mean_texture = data.data[:, 1]

# BUILD HYPERPIPE
pipe = Hyperpipe('confounder_removal_example',
                 optimizer='grid_search',
                 metrics=['accuracy', 'precision', 'recall'],
                 best_config_metric='accuracy',
                 outer_cv=KFold(n_splits=5),
                 inner_cv=KFold(n_splits=3),
                 verbosity=1,
                 output_settings=OutputSettings(project_folder='./tmp/'))

# # there are two ways of specifying multiple confounders
# # first, you can simply pass a dictionary with "confounder" as key and a data matrix or list as value
# pipe += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=False)
# pipe.fit(X, y, **{'confounder': [mean_radius, mean_texture]})
# pipe += PipelineElement('SVC')

# second, you can also specify the names of the variables that should be used in the confounder removal step
pipe += PipelineElement('ConfounderRemoval', {},
                        standardize_covariates=True,
                        test_disabled=True,
                        confounder_names=['mean_radius', 'mean_texture'])
pipe += PipelineElement('SVC')

# those names must be keys in the kwargs dictionary
pipe.fit(X, y, **{'mean_radius': mean_radius, 'mean_texture': mean_texture})