In [None]:
# ! pip install git+https://github.com/sberbank-ai-lab/LightAutoML.git@master

# Imports

In [None]:
import pandas as pd

In [None]:
from lightautoml.tasks import Task
from lightautoml.addons.uplift.base import AutoUplift
from lightautoml.addons.uplift.metrics import calculate_uplift_auc

In [None]:
from sklearn.model_selection import train_test_split

# Load data & prepare

In [None]:
train_df = pd.read_csv('data/train.csv')
test_df  = pd.read_csv('data/test.csv')
submission_df = pd.read_csv('data/submission.csv')

In [None]:
train_df.head()

In [None]:
TARGET_NAME = 'target'
TREATMENT_NAME = 'group'

In [None]:
train_df[TREATMENT_NAME] = (train_df[TREATMENT_NAME] == 'test').astype(int)

In [None]:
train_df.drop('id', axis=1, inplace=True)
test_df.drop('id', axis=1, inplace=True)

In [None]:
stratify_val = train_df[[TARGET_NAME, TREATMENT_NAME]]

In [None]:
train_df, valid_df = train_test_split(
    train_df,
    stratify=stratify_val,
    shuffle=True,
    random_state=100
)

In [None]:
valid_target = valid_df[TARGET_NAME].values.ravel()
valid_treatment = valid_df[TREATMENT_NAME].values.ravel()

# Training

In [None]:
TIMEOUT = 60 * 30  # 30 min

In [None]:
roles = {
    'target': TARGET_NAME,
    'treatment': TREATMENT_NAME
}

In [None]:
autouplift = AutoUplift(
    base_task=Task('binary'),
    timeout=TIMEOUT
)

In [None]:
%%time

autouplift.fit(train_df, roles)

In [None]:
uplift_pred, _, _  = autouplift.predict(valid_df)

In [None]:
cum_gain = calculate_uplift_auc(
    valid_target,
    uplift_pred,
    valid_treatment
)

In [None]:
print(cum_gain)

# Make submission

In [None]:
uplift_pred, _, _ = autouplift.predict(test_df)

In [None]:
submission_df['uplift'] = uplift_pred

In [None]:
submission_df.to_csv('./data/baseline_submission.csv', index=False)