@inproceedings{5724a21a30aa4b639bd39cb226e4c07b,
title = "COSMic: A Coherence-Aware Generation Metric for Image Descriptions",
abstract = "Developers of text generation models rely on automated evaluation metrics as a stand-in for slow and expensive manual evaluations. However, image captioning metrics have struggled to give accurate learned estimates of the semantic and pragmatic success of output text. We address this weakness by introducing the first discourse-aware learned generation metric for evaluating image descriptions. Our approach is inspired by computational theories of discourse for capturing information goals using coherence. We present a dataset of image-description pairs annotated with coherence relations. We then train a coherence-aware metric on a subset of the Conceptual Captions dataset and measure its effectiveness - its ability to predict human ratings of output captions - on a test set composed of out-of-domain images. We demonstrate a higher Kendall Correlation Coefficient for our proposed metric with the human judgments for the results of a number of stateof-the-art coherence-aware caption generation models when compared to several other metrics including recently proposed learned metrics such as BLEURT and BERTScore.",
author = "Mert Inan and Piyush Sharma and Baber Khalid and Radu Soricut and Matthew Stone and Malihe Alikhani",
note = "Funding Information: The authors affiliated with Rutgers University were partly supported by NSF Award CCF-19349243. Thanks to Pitt Cyber for supporting this project and the authors from the University of Pittsburgh. We also acknowledge the Center for Research Computing at the University of Pittsburgh for providing the required computational resources for carrying out experiments at the University of Pittsburgh. Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics.; 2021 Findings of the Association for Computational Linguistics, Findings of ACL: EMNLP 2021 ; Conference date: 07-11-2021 Through 11-11-2021",
year = "2021",
language = "English (US)",
series = "Findings of the Association for Computational Linguistics, Findings of ACL: EMNLP 2021",
publisher = "Association for Computational Linguistics (ACL)",
pages = "3419--3430",
editor = "Marie-Francine Moens and Xuanjing Huang and Lucia Specia and Yih, {Scott Wen-Tau}",
booktitle = "Findings of the Association for Computational Linguistics, Findings of ACL",
}