The aim of this experiment is to present an easy way to compare fragments of texts in order to detect (supposed) results of copy & paste operations between articles in the domain of Natural Language Processing (NLP). The search space of the comparisons is a corpus labeled as NLP4NLP gathering a large part of the NLP field. The study is centered on LREC papers in both directions, first with an LREC paper borrowing a fragment of text from the collection, and secondly in the reverse direction with fragments of LREC documents borrowed and inserted in the collection.
@InProceedings{FRANCOPOULO16.85,
author = {Gil Francopoulo and Joseph Mariani and Patrick Paroubek}, title = {A Study of Reuse and Plagiarism in LREC papers}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }