# Copyright (c) Yuta Saito, Yusuke Narita, and ZOZO Technologies, Inc. All rights reserved.
# Licensed under the Apache 2.0 License.
"""Dataset Class for Real-World Logged Bandit Feedback."""
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple
import numpy as np
import pandas as pd
from scipy.stats import rankdata
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import check_random_state
from .base import BaseRealBanditDataset
from ..types import BanditFeedback
[docs]@dataclass
class OpenBanditDataset(BaseRealBanditDataset):
"""Class for loading and preprocessing Open Bandit Dataset.
Note
-----
Users are free to implement their own feature engineering by overriding the `pre_process` method.
Parameters
-----------
behavior_policy: str
Name of the behavior policy that generated the logged bandit feedback data.
Must be either 'random' or 'bts'.
campaign: str
One of the three possible campaigns considered in ZOZOTOWN, "all", "men", and "women".
data_path: Path, default=Path('./obd')
Path that stores Open Bandit Dataset.
dataset_name: str, default='obd'
Name of the dataset.
References
------------
Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita.
"Large-scale Open Dataset, Pipeline, and Benchmark for Bandit Algorithms.", 2020.
"""
behavior_policy: str
campaign: str
data_path: Path = Path("./obd")
dataset_name: str = "obd"
def __post_init__(self) -> None:
"""Initialize Open Bandit Dataset Class."""
assert self.behavior_policy in [
"bts",
"random",
], f"behavior_policy must be either of 'bts' or 'random', but {self.behavior_policy} is given"
assert self.campaign in [
"all",
"men",
"women",
], f"campaign must be one of 'all', 'men', and 'women', but {self.campaign} is given"
assert isinstance(self.data_path, Path), f"data_path must be a Path type"
self.data_path = self.data_path / self.behavior_policy / self.campaign
self.raw_data_file = f"{self.campaign}.csv"
self.load_raw_data()
self.pre_process()
@property
def n_rounds(self) -> int:
"""Total number of rounds contained in the logged bandit dataset."""
return self.data.shape[0]
@property
def n_actions(self) -> int:
"""Number of actions."""
return int(self.action.max() + 1)
@property
def dim_context(self) -> int:
"""Dimensions of context vectors."""
return self.context.shape[1]
@property
def len_list(self) -> int:
"""Length of recommendation lists."""
return int(self.position.max() + 1)
[docs] @classmethod
def calc_on_policy_policy_value_estimate(
cls,
behavior_policy: str,
campaign: str,
data_path: Path = Path("./obd"),
test_size: float = 0.3,
is_timeseries_split: bool = False,
) -> float:
"""Calculate on-policy policy value estimate (used as a ground-truth policy value).
Parameters
----------
behavior_policy: str
Name of the behavior policy that generated the log data.
Must be either 'random' or 'bts'.
campaign: str
One of the three possible campaigns considered in ZOZOTOWN (i.e., "all", "men", and "women").
data_path: Path, default=Path('./obd')
Path that stores Open Bandit Dataset.
test_size: float, default=0.3
If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split.
is_timeseries_split: bool, default=False
If true, split the original logged bandit feedback data by time series.
Returns
---------
on_policy_policy_value_estimate: float
Policy value of the behavior policy estimated by on-policy estimation, i.e., :math:`\\mathbb{E}_{\\mathcal{D}} [r_t]`.
where :math:`\\mathbb{E}_{\\mathcal{D}}[\\cdot]` is the empirical average over :math:`T` observations in :math:`\\mathcal{D}`.
This parameter is used as a ground-truth policy value in the evaluation of OPE estimators.
"""
return (
cls(behavior_policy=behavior_policy, campaign=campaign, data_path=data_path)
.obtain_batch_bandit_feedback(
test_size=test_size, is_timeseries_split=is_timeseries_split
)["reward_test"]
.mean()
)
[docs] def load_raw_data(self) -> None:
"""Load raw open bandit dataset."""
self.data = pd.read_csv(self.data_path / self.raw_data_file, index_col=0)
self.item_context = pd.read_csv(
self.data_path / "item_context.csv", index_col=0
)
self.data.sort_values("timestamp", inplace=True)
self.action = self.data["item_id"].values
self.position = (rankdata(self.data["position"].values, "dense") - 1).astype(
int
)
self.reward = self.data["click"].values
self.pscore = self.data["propensity_score"].values
[docs] def pre_process(self) -> None:
"""Preprocess raw open bandit dataset.
Note
-----
This is the default feature engineering and please override this method to
implement your own preprocessing.
see https://github.com/st-tech/zr-obp/blob/master/examples/examples_with_obd/custom_dataset.py for example.
"""
user_cols = self.data.columns.str.contains("user_feature")
self.context = pd.get_dummies(
self.data.loc[:, user_cols], drop_first=True
).values
item_feature_0 = self.item_context["item_feature_0"]
item_feature_cat = self.item_context.drop("item_feature_0", 1).apply(
LabelEncoder().fit_transform
)
self.action_context = pd.concat([item_feature_cat, item_feature_0], 1).values
[docs] def obtain_batch_bandit_feedback(
self, test_size: float = 0.3, is_timeseries_split: bool = False
) -> BanditFeedback:
"""Obtain batch logged bandit feedback.
Parameters
-----------
test_size: float, default=0.3
If float, should be between 0.0 and 1.0 and represent the proportion of
the dataset to include in the evaluation split.
is_timeseries_split: bool, default=False
If true, split the original logged bandit feedback data by time series.
Returns
--------
bandit_feedback: BanditFeedback
Batch logged bandit feedback collected by a behavior policy.
"""
if is_timeseries_split:
assert isinstance(test_size, float) & (
0 < test_size < 1
), f"test_size must be a float in the (0,1) interval, but {test_size} is given"
n_rounds_train = np.int(self.n_rounds * (1.0 - test_size))
return dict(
n_rounds=n_rounds_train,
n_actions=self.n_actions,
action=self.action[:n_rounds_train],
action_test=self.action[n_rounds_train:],
position=self.position[:n_rounds_train],
position_test=self.position[n_rounds_train:],
reward=self.reward[:n_rounds_train],
reward_test=self.reward[n_rounds_train:],
pscore=self.pscore[:n_rounds_train],
pscore_test=self.pscore[n_rounds_train:],
context=self.context[:n_rounds_train],
context_test=self.context[n_rounds_train:],
action_context=self.action_context,
)
else:
return dict(
n_rounds=self.n_rounds,
n_actions=self.n_actions,
action=self.action,
position=self.position,
reward=self.reward,
reward_test=self.reward,
pscore=self.pscore,
context=self.context,
action_context=self.action_context,
)
[docs] def sample_bootstrap_bandit_feedback(
self,
test_size: float = 0.3,
is_timeseries_split: bool = False,
random_state: Optional[int] = None,
) -> BanditFeedback:
"""Obtain bootstrap logged bandit feedback.
Parameters
-----------
test_size: float, default=0.3
If float, should be between 0.0 and 1.0 and represent the proportion of
the dataset to include in the evaluation split.
is_timeseries_split: bool, default=False
If true, split the original logged bandit feedback data by time series.
random_state: int, default=None
Controls the random seed in bootstrap sampling.
Returns
--------
bandit_feedback: BanditFeedback
Logged bandit feedback sampled independently from the original data with replacement.
"""
bandit_feedback = self.obtain_batch_bandit_feedback(
test_size=test_size, is_timeseries_split=is_timeseries_split
)
n_rounds = bandit_feedback["n_rounds"]
random_ = check_random_state(random_state)
bootstrap_idx = random_.choice(np.arange(n_rounds), size=n_rounds, replace=True)
for key_ in ["action", "position", "reward", "pscore", "context"]:
bandit_feedback[key_] = bandit_feedback[key_][bootstrap_idx]
return bandit_feedback