In recent years, there has been a surge of interest in the natural language processing related to the real world, such as symbol grounding, language generation, and nonlinguistic data search by natural language queries. We argue that shogi (Japanese chess) commentaries, which are accompanied by game states, are an interesting testbed for these tasks. A commentator refers not only to the current board state but to past and future moves, and yet such references can be grounded in the game tree, possibly with the help of modern game-tree search algorithms. For this reason, we previously collected shogi commentaries together with board states and have been developing a game commentary generator. In this paper, we augment the corpus with manual annotation of modality expressions and event factuality. The annotated corpus includes 1,622 modality expressions, 5,014 event class tags and 3,092 factuality tags. It can be used to train a computer to identify words and phrases that signal factuality and to determine events with the said factuality, paving the way for grounding possible and counterfactual states.
@InProceedings{MATSUYOSHI18.54, author = {Suguru Matsuyoshi and Hirotaka Kameko and Yugo Murawaki and Shinsuke Mori}, title = "{Annotating Modality Expressions and Event Factuality for a Japanese Chess Commentary Corpus}", booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {May 7-12, 2018}, address = {Miyazaki, Japan}, editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga}, publisher = {European Language Resources Association (ELRA)}, isbn = {979-10-95546-00-9}, language = {english} }