Discourse parsing is a challenging task in NLP and plays a crucial role in discourse analysis. To enable discourse analysis for Hindi, Hindi Discourse Relations Bank was created on a subset of Hindi TreeBank. The benefits of a discourse analyzer in automated discourse analysis, question summarization and question answering domains has motivated us to begin work on a discourse analyzer for Hindi. In this paper, we focus on discourse connective identification for Hindi. We explore various available syntactic features for this task. We also explore the use of dependency tree parses present in the Hindi TreeBank and study the impact of the same on the performance of the system. We report that the novel dependency features introduced have a higher impact on precision, in comparison to the syntactic features previously used for this task. In addition, we report a high accuracy of 96% for this task.
@InProceedings{JAIN16.746,
author = {Rohit Jain and Himanshu Sharma and Dipti Sharma}, title = {Using lexical and Dependency Features to Disambiguate Discourse Connectives in Hindi}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }