Source code for obp.dataset.real

# Copyright (c) Yuta Saito, Yusuke Narita, and ZOZO Technologies, Inc. All rights reserved.
# Licensed under the Apache 2.0 License.

"""Dataset Class for Real-World Logged Bandit Feedback."""
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple

import numpy as np
import pandas as pd
from scipy.stats import rankdata
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import check_random_state

from .base import BaseRealBanditDataset
from ..types import BanditFeedback


[docs]@dataclass class OpenBanditDataset(BaseRealBanditDataset): """Class for loading and preprocessing Open Bandit Dataset. Note ----- Users are free to implement their own feature engineering by overriding the `pre_process` method. Parameters ----------- behavior_policy: str Name of the behavior policy that generated the logged bandit feedback data. Must be either 'random' or 'bts'. campaign: str One of the three possible campaigns considered in ZOZOTOWN, "all", "men", and "women". data_path: Path, default=Path('./obd') Path that stores Open Bandit Dataset. dataset_name: str, default='obd' Name of the dataset. References ------------ Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. "Large-scale Open Dataset, Pipeline, and Benchmark for Bandit Algorithms.", 2020. """ behavior_policy: str campaign: str data_path: Path = Path("./obd") dataset_name: str = "obd" def __post_init__(self) -> None: """Initialize Open Bandit Dataset Class.""" assert self.behavior_policy in [ "bts", "random", ], f"behavior_policy must be either of 'bts' or 'random', but {self.behavior_policy} is given" assert self.campaign in [ "all", "men", "women", ], f"campaign must be one of 'all', 'men', and 'women', but {self.campaign} is given" assert isinstance(self.data_path, Path), f"data_path must be a Path type" self.data_path = self.data_path / self.behavior_policy / self.campaign self.raw_data_file = f"{self.campaign}.csv" self.load_raw_data() self.pre_process() @property def n_rounds(self) -> int: """Total number of rounds contained in the logged bandit dataset.""" return self.data.shape[0] @property def n_actions(self) -> int: """Number of actions.""" return int(self.action.max() + 1) @property def dim_context(self) -> int: """Dimensions of context vectors.""" return self.context.shape[1] @property def len_list(self) -> int: """Length of recommendation lists.""" return int(self.position.max() + 1)
[docs] @classmethod def calc_on_policy_policy_value_estimate( cls, behavior_policy: str, campaign: str, data_path: Path = Path("./obd"), test_size: float = 0.3, is_timeseries_split: bool = False, ) -> float: """Calculate on-policy policy value estimate (used as a ground-truth policy value). Parameters ---------- behavior_policy: str Name of the behavior policy that generated the log data. Must be either 'random' or 'bts'. campaign: str One of the three possible campaigns considered in ZOZOTOWN (i.e., "all", "men", and "women"). data_path: Path, default=Path('./obd') Path that stores Open Bandit Dataset. test_size: float, default=0.3 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. is_timeseries_split: bool, default=False If true, split the original logged bandit feedback data by time series. Returns --------- on_policy_policy_value_estimate: float Policy value of the behavior policy estimated by on-policy estimation, i.e., :math:`\\mathbb{E}_{\\mathcal{D}} [r_t]`. where :math:`\\mathbb{E}_{\\mathcal{D}}[\\cdot]` is the empirical average over :math:`T` observations in :math:`\\mathcal{D}`. This parameter is used as a ground-truth policy value in the evaluation of OPE estimators. """ return ( cls(behavior_policy=behavior_policy, campaign=campaign, data_path=data_path) .obtain_batch_bandit_feedback( test_size=test_size, is_timeseries_split=is_timeseries_split )["reward_test"] .mean() )
[docs] def load_raw_data(self) -> None: """Load raw open bandit dataset.""" self.data = pd.read_csv(self.data_path / self.raw_data_file, index_col=0) self.item_context = pd.read_csv( self.data_path / "item_context.csv", index_col=0 ) self.data.sort_values("timestamp", inplace=True) self.action = self.data["item_id"].values self.position = (rankdata(self.data["position"].values, "dense") - 1).astype( int ) self.reward = self.data["click"].values self.pscore = self.data["propensity_score"].values
[docs] def pre_process(self) -> None: """Preprocess raw open bandit dataset. Note ----- This is the default feature engineering and please override this method to implement your own preprocessing. see https://github.com/st-tech/zr-obp/blob/master/examples/examples_with_obd/custom_dataset.py for example. """ user_cols = self.data.columns.str.contains("user_feature") self.context = pd.get_dummies( self.data.loc[:, user_cols], drop_first=True ).values item_feature_0 = self.item_context["item_feature_0"] item_feature_cat = self.item_context.drop("item_feature_0", 1).apply( LabelEncoder().fit_transform ) self.action_context = pd.concat([item_feature_cat, item_feature_0], 1).values
[docs] def obtain_batch_bandit_feedback( self, test_size: float = 0.3, is_timeseries_split: bool = False ) -> BanditFeedback: """Obtain batch logged bandit feedback. Parameters ----------- test_size: float, default=0.3 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the evaluation split. is_timeseries_split: bool, default=False If true, split the original logged bandit feedback data by time series. Returns -------- bandit_feedback: BanditFeedback Batch logged bandit feedback collected by a behavior policy. """ if is_timeseries_split: assert isinstance(test_size, float) & ( 0 < test_size < 1 ), f"test_size must be a float in the (0,1) interval, but {test_size} is given" n_rounds_train = np.int(self.n_rounds * (1.0 - test_size)) return dict( n_rounds=n_rounds_train, n_actions=self.n_actions, action=self.action[:n_rounds_train], action_test=self.action[n_rounds_train:], position=self.position[:n_rounds_train], position_test=self.position[n_rounds_train:], reward=self.reward[:n_rounds_train], reward_test=self.reward[n_rounds_train:], pscore=self.pscore[:n_rounds_train], pscore_test=self.pscore[n_rounds_train:], context=self.context[:n_rounds_train], context_test=self.context[n_rounds_train:], action_context=self.action_context, ) else: return dict( n_rounds=self.n_rounds, n_actions=self.n_actions, action=self.action, position=self.position, reward=self.reward, reward_test=self.reward, pscore=self.pscore, context=self.context, action_context=self.action_context, )
[docs] def sample_bootstrap_bandit_feedback( self, test_size: float = 0.3, is_timeseries_split: bool = False, random_state: Optional[int] = None, ) -> BanditFeedback: """Obtain bootstrap logged bandit feedback. Parameters ----------- test_size: float, default=0.3 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the evaluation split. is_timeseries_split: bool, default=False If true, split the original logged bandit feedback data by time series. random_state: int, default=None Controls the random seed in bootstrap sampling. Returns -------- bandit_feedback: BanditFeedback Logged bandit feedback sampled independently from the original data with replacement. """ bandit_feedback = self.obtain_batch_bandit_feedback( test_size=test_size, is_timeseries_split=is_timeseries_split ) n_rounds = bandit_feedback["n_rounds"] random_ = check_random_state(random_state) bootstrap_idx = random_.choice(np.arange(n_rounds), size=n_rounds, replace=True) for key_ in ["action", "position", "reward", "pscore", "context"]: bandit_feedback[key_] = bandit_feedback[key_][bootstrap_idx] return bandit_feedback