Massive labelled data is important for Named Entity Recognition(NER). For Low Resource Languages(LRL), massive labelled data means more labor, more time and more cost. A semi-supervised learning (SSL) that need fewer labelled data is proposed to recognize person name in Tibetan texts. Based on Conditional Random Fields (CRFs) and Radial Basis Function (RBF), this method use 5-element feature matrix to propagate information from few labeled data to massive unlabelled data. Experiments demonstrate that its F-measure can achieve 84\% using only 100 documents as seeds, whereas about 800 labeled documents are required for a supervised learning based on pure CRFs.
@InProceedings{WANG18.1, author = {Zhijuan Wang and Fuxian Li}, title = {A Semi-supervised Learning Approach for Person Name Recognition in Tibetan}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, editor = {Jinhua Du and Mihael Arcan and Qun Liu and Hitoshi Isahara}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-15-3}, language = {english} }