In this paper, we describe our effort in the development and annotation of a large scale corpus containing code-switched data. Until recently, very limited effort has been devoted to develop computational approaches or even basic linguistic resources to support research into the processing of Moroccan Darija.
@InProceedings{SAMIH16.341,
author = {Younes Samih and Wolfgang Maier}, title = {An Arabic-Moroccan Darija Code-Switched Corpus}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }