This paper introduces a ruled-based method and software tool, called SemAligner, for aligning chunks across texts in a given pair of short English texts. The tool, based on the top performing method at the Interpretable Short Text Similarity shared task at SemEval 2015, where it was used with human annotated (gold) chunks, can now additionally process plain text-pairs using two powerful chunkers we developed, e.g. using Conditional Random Fields. Besides aligning chunks, the tool automatically assigns semantic relations to the aligned chunks (such as EQUI for equivalent and OPPO for opposite) and semantic similarity scores that measure the strength of the semantic relation between the aligned chunks. Experiments show that SemAligner performs competitively for system generated chunks and that these results are also comparable to results obtained on gold chunks. SemAligner has other capabilities such as handling various input formats and chunkers as well as extending lookup resources.
@InProceedings{MAHARJAN16.115,
author = {Nabin Maharjan and Rajendra Banjade and Nobal Bikram Niraula and Vasile Rus}, title = {SemAligner: A Method and Tool for Aligning Chunks with Semantic Relation Types and Semantic Similarity Scores}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }