This article describes the creation of corpora with part-of-speech annotations for three regional languages of France: Alsatian, Occitan and Picard. These manual annotations were performed in the context of the RESTAURE project, whose goal is to develop resources and tools for these under-resourced French regional languages. The article presents the tagsets used in the annotation process as well as the resulting annotated corpora.
@InProceedings{BERNHARD18.288, author = {Delphine Bernhard and Anne-Laure Ligozat and Fanny Martin and Myriam Bras and Pierre Magistry and Marianne Vergez-Couret and Lucie Steiblé and Pascale Erhart and Nabil Hathout and Dominique Huck and Christophe Rey and Philippe Reynés and Sophie Rosset and Jean Sibille and Thomas Lavergne}, title = "{Corpora with Part-of-Speech Annotations for Three Regional Languages of France: Alsatian, Occitan and Picard}", booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {May 7-12, 2018}, address = {Miyazaki, Japan}, editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga}, publisher = {European Language Resources Association (ELRA)}, isbn = {979-10-95546-00-9}, language = {english} }