@article{ sun2024when, title={When is Off-Policy Evaluation (Reward Modeling) Useful in Contextual Bandits? A Data-Centric Perspective}, author={Hao Sun and Alex James Chan and Nabeel Seedat and Alihan H{\"u}y{\"u}k and Mihaela van der Schaar}, journal={Journal of Data-centric Machine Learning Research}, year={2024}, url={https://openreview.net/forum?id=wg5y4AK6l7} }