ORCA is composed of 60 publicly available datasets and covers 29 tasks within seven NLU task clusters as follows.
| |||||
Task Name | Identifier | Score Metric | URL | ||
ANS Stance | ans-stance | Macro F1-score | | | |
Baly Stance | baly-stance | Macro F1-score | | | |
XLNI | xlni | Macro F1-score | | | |
| |||||
Task Name | Identifier | Score Metric | URL | ||
Question Answering | qa | Macro F1-score | | | |
| |||||
Task Name | Identifier | Score Metric | URL | ||
Emotion Regression | emotion-reg | Spearman Correlation | | | |
MQ2Q | mq2q | Macro F1-score | | | |
STS | sts | Spearman Correlation | | | |
| |||||
Task Name | Identifier | Score Metric | URL | ||
Abusive | abusive | Macro F1-score | | | |
Adult | adult | Macro F1-score | | | |
Age | age | Macro F1-score | | | |
ANS Claim | ans-claim | Macro F1-score | | | |
Dangerous | dangerous | Macro F1-score | | | |
Dialect at Binary Level | dialect-binary | Macro F1-score | | | |
Dialect at Country Level | dialect-country | Macro F1-score | | | |
Dialect at Region Level | dialect-region | Macro F1-score | | | |
Emotion | emotion | Macro F1-score | | | |
Gender | gender | Macro F1-score | | | |
Hate Speech | hate-speech | Macro F1-score | | | |
Irony | irony | Macro F1-score | | | |
Machine Generation | machine-generation | Macro F1-score | | | |
Offensive | offensive | Macro F1-score | | | |
Sarcasm | sarcasm | Macro F1-score | | | |
Sentiment Analysis | sentiment | Macro F1-score | | | |
| |||||
Task Name | Identifier | Score Metric | URL | ||
Aqmar NER | aqmar-ner | Macro F1-score | | | |
Arabic NER Corpus | arabic-ner | Macro F1-score | | | |
Dialect Part Of Speech | dialect-pos | Macro F1-score | | | |
MSA Part Of Speech | msa-pos | Macro F1-score | | | |
| |||||
Task Name | Identifier | Score Metric | URL | ||
Topic | topic | Macro F1-score | | | |
| |||||
Task Name | Identifier | Score Metric | URL | ||
Word Sense Disambiguation | wsd | Macro F1-score | | |
ANS Stance Task BibTex
@inproceedings{khouja-2020-stance, title = "Stance Prediction and Claim Verification: An {A}rabic Perspective", author = "Khouja, Jude", booktitle = "Proceedings of the Third Workshop on Fact Extraction and VERification (FEVER)", month = jul, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.fever-1.2", doi = "10.18653/v1/2020.fever-1.2", pages = "8--17", abstract = "This work explores the application of textual entailment in news claim verification and stance prediction using a new corpus in Arabic. The publicly available corpus comes in two perspectives: a version consisting of 4,547 true and false claims and a version consisting of 3,786 pairs (claim, evidence). We describe the methodology for creating the corpus and the annotation process. Using the introduced corpus, we also develop two machine learning baselines for two proposed tasks: claim verification and stance prediction. Our best model utilizes pretraining (BERT) and achieves 76.7 F1 on the stance prediction task and 64.3 F1 on the claim verification task. Our preliminary experiments shed some light on the limits of automatic claim verification that relies on claims text only. Results hint that while the linguistic features and world knowledge learned during pretraining are useful for stance prediction, such learned representations from pretraining are insufficient for verifying claims without access to context or evidence.", }
Baly Stance Task BibTex
@inproceedings{baly-etal-2018-integrating, title = "Integrating Stance Detection and Fact Checking in a Unified Corpus", author = "Baly, Ramy and Mohtarami, Mitra and Glass, James and M{\`a}rquez, Llu{\'\i}s and Moschitti, Alessandro and Nakov, Preslav", booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers)", month = jun, year = "2018", address = "New Orleans, Louisiana", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/N18-2004", doi = "10.18653/v1/N18-2004", pages = "21--27", abstract = "A reasonable approach for fact checking a claim involves retrieving potentially relevant documents from different sources (e.g., news websites, social media, etc.), determining the stance of each document with respect to the claim, and finally making a prediction about the claim{'}s factuality by aggregating the strength of the stances, while taking the reliability of the source into account. Moreover, a fact checking system should be able to explain its decision by providing relevant extracts (rationales) from the documents. Yet, this setup is not directly supported by existing datasets, which treat fact checking, document retrieval, source credibility, stance detection and rationale extraction as independent tasks. In this paper, we support the interdependencies between these tasks as annotations in the same corpus. We implement this setup on an Arabic fact checking corpus, the first of its kind.", }
XLNI Task BibTex
@InProceedings{conneau2018xnli, author = "Conneau, Alexis and Rinott, Ruty and Lample, Guillaume and Williams, Adina and Bowman, Samuel R. and Schwenk, Holger and Stoyanov, Veselin", title = "XNLI: Evaluating Cross-lingual Sentence Representations", booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", year = "2018", publisher = "Association for Computational Linguistics", location = "Brussels, Belgium", }
Question Answering Task BibTex
@inproceedings{abdul-mageed-etal-2021-arbert, title = "{ARBERT} {\&} {MARBERT}: Deep Bidirectional Transformers for {A}rabic", author = "Abdul-Mageed, Muhammad and Elmadany, AbdelRahim and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.acl-long.551", doi = "10.18653/v1/2021.acl-long.551", pages = "7088--7105", abstract = "Pre-trained language models (LMs) are currently integral to many natural language processing systems. Although multilingual LMs were also introduced to serve many languages, these have limitations such as being costly at inference time and the size and diversity of non-English data involved in their pre-training. We remedy these issues for a collection of diverse Arabic varieties by introducing two powerful deep bidirectional transformer-based models, ARBERT and MARBERT. To evaluate our models, we also introduce ARLUE, a new benchmark for multi-dialectal Arabic language understanding evaluation. ARLUE is built using 42 datasets targeting six different task clusters, allowing us to offer a series of standardized experiments under rich conditions. When fine-tuned on ARLUE, our models collectively achieve new state-of-the-art results across the majority of tasks (37 out of 48 classification tasks, on the 42 datasets). Our best model acquires the highest ARLUE score (77.40) across all six task clusters, outperforming all other models including XLM-R Large ( 3.4x larger size). Our models are publicly available at https://github.com/UBC-NLP/marbert and ARLUE will be released through the same repository.", }
Emotion Regression Task BibTex
@inproceedings{mohammad-etal-2018-semeval, title = "{S}em{E}val-2018 Task 1: Affect in Tweets", author = "Mohammad, Saif and Bravo-Marquez, Felipe and Salameh, Mohammad and Kiritchenko, Svetlana", booktitle = "Proceedings of the 12th International Workshop on Semantic Evaluation", month = jun, year = "2018", address = "New Orleans, Louisiana", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/S18-1001", doi = "10.18653/v1/S18-1001", pages = "1--17", abstract = "We present the SemEval-2018 Task 1: Affect in Tweets, which includes an array of subtasks on inferring the affectual state of a person from their tweet. For each task, we created labeled data from English, Arabic, and Spanish tweets. The individual tasks are: 1. emotion intensity regression, 2. emotion intensity ordinal classification, 3. valence (sentiment) regression, 4. valence ordinal classification, and 5. emotion classification. Seventy-five teams (about 200 team members) participated in the shared task. We summarize the methods, resources, and tools used by the participating teams, with a focus on the techniques and resources that are particularly useful. We also analyze systems for consistent bias towards a particular race or gender. The data is made freely available to further improve our understanding of how people convey emotions through language.", }
MQ2Q Task BibTex
@inproceedings{seelawi-etal-2019-nsurl, title = "{NSURL}-2019 Task 8: Semantic Question Similarity in {A}rabic", author = "Seelawi, Haitham and Mustafa, Ahmad and Al-Bataineh, Hesham and Farhan, Wael and Al-Natsheh, Hussein T.", booktitle = "Proceedings of the First International Workshop on NLP Solutions for Under Resourced Languages (NSURL 2019) co-located with ICNLSP 2019 - Short Papers", month = "11--12 " # sep, year = "2019", address = "Trento, Italy", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2019.nsurl-1.1", pages = "1--8", }
STS Task BibTex
@inproceedings{cer-etal-2017-semeval, title = "{S}em{E}val-2017 Task 1: Semantic Textual Similarity Multilingual and Crosslingual Focused Evaluation", author = "Cer, Daniel and Diab, Mona and Agirre, Eneko and Lopez-Gazpio, I{\~n}igo and Specia, Lucia", booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)", month = aug, year = "2017", address = "Vancouver, Canada", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/S17-2001", doi = "10.18653/v1/S17-2001", pages = "1--14", abstract = "Semantic Textual Similarity (STS) measures the meaning similarity of sentences. Applications include machine translation (MT), summarization, generation, question answering (QA), short answer grading, semantic search, dialog and conversational systems. The STS shared task is a venue for assessing the current state-of-the-art. The 2017 task focuses on multilingual and cross-lingual pairs with one sub-track exploring MT quality estimation (MTQE) data. The task obtained strong participation from 31 teams, with 17 participating in \textit{all language tracks}. We summarize performance and review a selection of well performing methods. Analysis highlights common errors, providing insight into the limitations of existing models. To support ongoing work on semantic representations, the \textit{STS Benchmark} is introduced as a new shared training and evaluation set carefully selected from the corpus of English STS shared task data (2012-2017).", }
Abusive Task BibTex
@inproceedings{mulki-etal-2019-l, title = "{L}-{HSAB}: A {L}evantine {T}witter Dataset for Hate Speech and Abusive Language", author = "Mulki, Hala and Haddad, Hatem and Bechikh Ali, Chedi and Alshabani, Halima", booktitle = "Proceedings of the Third Workshop on Abusive Language Online", month = aug, year = "2019", address = "Florence, Italy", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/W19-3512", doi = "10.18653/v1/W19-3512", pages = "111--118", abstract = "Hate speech and abusive language have become a common phenomenon on Arabic social media. Automatic hate speech and abusive detection systems can facilitate the prohibition of toxic textual contents. The complexity, informality and ambiguity of the Arabic dialects hindered the provision of the needed resources for Arabic abusive/hate speech detection research. In this paper, we introduce the first publicly-available Levantine Hate Speech and Abusive (L-HSAB) Twitter dataset with the objective to be a benchmark dataset for automatic detection of online Levantine toxic contents. We, further, provide a detailed review of the data collection steps and how we design the annotation guidelines such that a reliable dataset annotation is guaranteed. This has been later emphasized through the comprehensive evaluation of the annotations as the annotation agreement metrics of Cohen{'}s Kappa (k) and Krippendorff{'}s alpha (α) indicated the consistency of the annotations.", }
Adult Task BibTex
@inproceedings{mubarak-etal-2021-adult, title = "Adult Content Detection on {A}rabic {T}witter: Analysis and Experiments", author = "Mubarak, Hamdy and Hassan, Sabit and Abdelali, Ahmed", booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop", month = apr, year = "2021", address = "Kyiv, Ukraine (Virtual)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.wanlp-1.14", pages = "136--144", abstract = "With Twitter being one of the most popular social media platforms in the Arab region, it is not surprising to find accounts that post adult content in Arabic tweets; despite the fact that these platforms dissuade users from such content. In this paper, we present a dataset of Twitter accounts that post adult content. We perform an in-depth analysis of the nature of this data and contrast it with normal tweet content. Additionally, we present extensive experiments with traditional machine learning models, deep neural networks and contextual embeddings to identify such accounts. We show that from user information alone, we can identify such accounts with F1 score of 94.7{\%} (macro average). With the addition of only one tweet as input, the F1 score rises to 96.8{\%}.", }
Age Task BibTex
@inproceedings{abdul-mageed-etal-2020-aranet, title = "{A}ra{N}et: A Deep Learning Toolkit for {A}rabic Social Media", author = "Abdul-Mageed, Muhammad and Zhang, Chiyu and Hashemi, Azadeh and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.3", pages = "16--23", abstract = "We describe AraNet, a collection of deep learning Arabic social media processing tools. Namely, we exploit an extensive host of both publicly available and novel social media datasets to train bidirectional encoders from transformers (BERT) focused at social meaning extraction. AraNet models predict age, dialect, gender, emotion, irony, and sentiment. AraNet either delivers state-of-the-art performance on a number of these tasks and performs competitively on others. AraNet is exclusively based on a deep learning framework, giving it the advantage of being feature-engineering free. To the best of our knowledge, AraNet is the first to performs predictions across such a wide range of tasks for Arabic NLP. As such, AraNet has the potential to meet critical needs. We publicly release AraNet to accelerate research, and to facilitate model-based comparisons across the different tasks", language = "English", ISBN = "979-10-95546-51-1", }
ANS Claim Task BibTex
@inproceedings{khouja-2020-stance, title = "Stance Prediction and Claim Verification: An {A}rabic Perspective", author = "Khouja, Jude", booktitle = "Proceedings of the Third Workshop on Fact Extraction and VERification (FEVER)", month = jul, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.fever-1.2", doi = "10.18653/v1/2020.fever-1.2", pages = "8--17", abstract = "This work explores the application of textual entailment in news claim verification and stance prediction using a new corpus in Arabic. The publicly available corpus comes in two perspectives: a version consisting of 4,547 true and false claims and a version consisting of 3,786 pairs (claim, evidence). We describe the methodology for creating the corpus and the annotation process. Using the introduced corpus, we also develop two machine learning baselines for two proposed tasks: claim verification and stance prediction. Our best model utilizes pretraining (BERT) and achieves 76.7 F1 on the stance prediction task and 64.3 F1 on the claim verification task. Our preliminary experiments shed some light on the limits of automatic claim verification that relies on claims text only. Results hint that while the linguistic features and world knowledge learned during pretraining are useful for stance prediction, such learned representations from pretraining are insufficient for verifying claims without access to context or evidence.", }
Dangerous Task BibTex
@inproceedings{alshehri-etal-2020-understanding, title = "Understanding and Detecting Dangerous Speech in Social Media", author = "Alshehri, Ali and Nagoudi, El Moatez Billah and Abdul-Mageed, Muhammad", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.6", pages = "40--47", abstract = "Social media communication has become a significant part of daily activity in modern societies. For this reason, ensuring safety in social media platforms is a necessity. Use of dangerous language such as physical threats in online environments is a somewhat rare, yet remains highly important. Although several works have been performed on the related issue of detecting offensive and hateful language, dangerous speech has not previously been treated in any significant way. Motivated by these observations, we report our efforts to build a labeled dataset for dangerous speech. We also exploit our dataset to develop highly effective models to detect dangerous content. Our best model performs at 59.60{\%} macro F1, significantly outperforming a competitive baseline.", language = "English", ISBN = "979-10-95546-51-1", }
Dialect at Binary Level Task BibTex
@inproceedings{abu-farha-magdy-2020-arabic, title = "From {A}rabic Sentiment Analysis to Sarcasm Detection: The {A}r{S}arcasm Dataset", author = "Abu Farha, Ibrahim and Magdy, Walid", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.5", pages = "32--39", abstract = "Sarcasm is one of the main challenges for sentiment analysis systems. Its complexity comes from the expression of opinion using implicit indirect phrasing. In this paper, we present ArSarcasm, an Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. The dataset contains 10,547 tweets, 16{\%} of which are sarcastic. In addition to sarcasm the data was annotated for sentiment and dialects. Our analysis shows the highly subjective nature of these tasks, which is demonstrated by the shift in sentiment labels based on annotators{'} biases. Experiments show the degradation of state-of-the-art sentiment analysers when faced with sarcastic content. Finally, we train a deep learning model for sarcasm detection using BiLSTM. The model achieves an F1 score of 0.46, which shows the challenging nature of the task, and should act as a basic baseline for future research on our dataset.", language = "English", ISBN = "979-10-95546-51-1", } @article{zaidan-callison-burch-2014-arabic, title = "{A}rabic Dialect Identification", author = "Zaidan, Omar F. and Callison-Burch, Chris", journal = "Computational Linguistics", volume = "40", number = "1", month = mar, year = "2014", address = "Cambridge, MA", publisher = "MIT Press", url = "https://aclanthology.org/J14-1006", doi = "10.1162/COLI_a_00169", pages = "171--202", } @inproceedings{abdul-mageed-etal-2021-arbert, title = "{ARBERT} {\&} {MARBERT}: Deep Bidirectional Transformers for {A}rabic", author = "Abdul-Mageed, Muhammad and Elmadany, AbdelRahim and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.acl-long.551", doi = "10.18653/v1/2021.acl-long.551", pages = "7088--7105", abstract = "Pre-trained language models (LMs) are currently integral to many natural language processing systems. Although multilingual LMs were also introduced to serve many languages, these have limitations such as being costly at inference time and the size and diversity of non-English data involved in their pre-training. We remedy these issues for a collection of diverse Arabic varieties by introducing two powerful deep bidirectional transformer-based models, ARBERT and MARBERT. To evaluate our models, we also introduce ARLUE, a new benchmark for multi-dialectal Arabic language understanding evaluation. ARLUE is built using 42 datasets targeting six different task clusters, allowing us to offer a series of standardized experiments under rich conditions. When fine-tuned on ARLUE, our models collectively achieve new state-of-the-art results across the majority of tasks (37 out of 48 classification tasks, on the 42 datasets). Our best model acquires the highest ARLUE score (77.40) across all six task clusters, outperforming all other models including XLM-R Large ( 3.4x larger size). Our models are publicly available at https://github.com/UBC-NLP/marbert and ARLUE will be released through the same repository.", } @inproceedings{bouamor-etal-2019-madar, title = "The {MADAR} Shared Task on {A}rabic Fine-Grained Dialect Identification", author = "Bouamor, Houda and Hassan, Sabit and Habash, Nizar", booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop", month = aug, year = "2019", address = "Florence, Italy", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/W19-4622", doi = "10.18653/v1/W19-4622", pages = "199--207", abstract = "In this paper, we present the results and findings of the MADAR Shared Task on Arabic Fine-Grained Dialect Identification. This shared task was organized as part of The Fourth Arabic Natural Language Processing Workshop, collocated with ACL 2019. The shared task includes two subtasks: the MADAR Travel Domain Dialect Identification subtask (Subtask 1) and the MADAR Twitter User Dialect Identification subtask (Subtask 2). This shared task is the first to target a large set of dialect labels at the city and country levels. The data for the shared task was created or collected under the Multi-Arabic Dialect Applications and Resources (MADAR) project. A total of 21 teams from 15 countries participated in the shared task.", } @inproceedings{abdelali-etal-2021-qadi, title = "{QADI}: {A}rabic Dialect Identification in the Wild", author = "Abdelali, Ahmed and Mubarak, Hamdy and Samih, Younes and Hassan, Sabit and Darwish, Kareem", booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop", month = apr, year = "2021", address = "Kyiv, Ukraine (Virtual)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.wanlp-1.1", pages = "1--10", abstract = "Proper dialect identification is important for a variety of Arabic NLP applications. In this paper, we present a method for rapidly constructing a tweet dataset containing a wide range of country-level Arabic dialects {---}covering 18 different countries in the Middle East and North Africa region. Our method relies on applying multiple filters to identify users who belong to different countries based on their account descriptions and to eliminate tweets that either write mainly in Modern Standard Arabic or mostly use vulgar language. The resultant dataset contains 540k tweets from 2,525 users who are evenly distributed across 18 Arab countries. Using intrinsic evaluation, we show that the labels of a set of randomly selected tweets are 91.5{\%} accurate. For extrinsic evaluation, we are able to build effective country level dialect identification on tweets with a macro-averaged F1-score of 60.6{\%} across 18 classes.", } @inproceedings{el-haj-2020-habibi, title = "Habibi - a multi Dialect multi National {A}rabic Song Lyrics Corpus", author = "El-Haj, Mahmoud", booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resources Association", url = "https://aclanthology.org/2020.lrec-1.165", pages = "1318--1326", abstract = "This paper introduces Habibi the first Arabic Song Lyrics corpus. The corpus comprises more than 30,000 Arabic song lyrics in 6 Arabic dialects for singers from 18 different Arabic countries. The lyrics are segmented into more than 500,000 sentences (song verses) with more than 3.5 million words. I provide the corpus in both comma separated value (csv) and annotated plain text (txt) file formats. In addition, I converted the csv version into JavaScript Object Notation (json) and eXtensible Markup Language (xml) file formats. To experiment with the corpus I run extensive binary and multi-class experiments for dialect and country-of-origin identification. The identification tasks include the use of several classical machine learning and deep learning models utilising different word embeddings. For the binary dialect identification task the best performing classifier achieved a testing accuracy of 93{\%}. This was achieved using a word-based Convolutional Neural Network (CNN) utilising a Continuous Bag of Words (CBOW) word embeddings model. The results overall show all classical and deep learning models to outperform our baseline, which demonstrates the suitability of the corpus for both dialect and country-of-origin identification tasks. I am making the corpus and the trained CBOW word embeddings freely available for research purposes.", language = "English", ISBN = "979-10-95546-34-4", }
Dialect at Country Level Task BibTex
@inproceedings{abu-farha-magdy-2020-arabic, title = "From {A}rabic Sentiment Analysis to Sarcasm Detection: The {A}r{S}arcasm Dataset", author = "Abu Farha, Ibrahim and Magdy, Walid", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.5", pages = "32--39", abstract = "Sarcasm is one of the main challenges for sentiment analysis systems. Its complexity comes from the expression of opinion using implicit indirect phrasing. In this paper, we present ArSarcasm, an Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. The dataset contains 10,547 tweets, 16{\%} of which are sarcastic. In addition to sarcasm the data was annotated for sentiment and dialects. Our analysis shows the highly subjective nature of these tasks, which is demonstrated by the shift in sentiment labels based on annotators{'} biases. Experiments show the degradation of state-of-the-art sentiment analysers when faced with sarcastic content. Finally, we train a deep learning model for sarcasm detection using BiLSTM. The model achieves an F1 score of 0.46, which shows the challenging nature of the task, and should act as a basic baseline for future research on our dataset.", language = "English", ISBN = "979-10-95546-51-1", } @article{zaidan-callison-burch-2014-arabic, title = "{A}rabic Dialect Identification", author = "Zaidan, Omar F. and Callison-Burch, Chris", journal = "Computational Linguistics", volume = "40", number = "1", month = mar, year = "2014", address = "Cambridge, MA", publisher = "MIT Press", url = "https://aclanthology.org/J14-1006", doi = "10.1162/COLI_a_00169", pages = "171--202", } @inproceedings{abdul-mageed-etal-2021-arbert, title = "{ARBERT} {\&} {MARBERT}: Deep Bidirectional Transformers for {A}rabic", author = "Abdul-Mageed, Muhammad and Elmadany, AbdelRahim and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.acl-long.551", doi = "10.18653/v1/2021.acl-long.551", pages = "7088--7105", abstract = "Pre-trained language models (LMs) are currently integral to many natural language processing systems. Although multilingual LMs were also introduced to serve many languages, these have limitations such as being costly at inference time and the size and diversity of non-English data involved in their pre-training. We remedy these issues for a collection of diverse Arabic varieties by introducing two powerful deep bidirectional transformer-based models, ARBERT and MARBERT. To evaluate our models, we also introduce ARLUE, a new benchmark for multi-dialectal Arabic language understanding evaluation. ARLUE is built using 42 datasets targeting six different task clusters, allowing us to offer a series of standardized experiments under rich conditions. When fine-tuned on ARLUE, our models collectively achieve new state-of-the-art results across the majority of tasks (37 out of 48 classification tasks, on the 42 datasets). Our best model acquires the highest ARLUE score (77.40) across all six task clusters, outperforming all other models including XLM-R Large ( 3.4x larger size). Our models are publicly available at https://github.com/UBC-NLP/marbert and ARLUE will be released through the same repository.", } @inproceedings{bouamor-etal-2019-madar, title = "The {MADAR} Shared Task on {A}rabic Fine-Grained Dialect Identification", author = "Bouamor, Houda and Hassan, Sabit and Habash, Nizar", booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop", month = aug, year = "2019", address = "Florence, Italy", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/W19-4622", doi = "10.18653/v1/W19-4622", pages = "199--207", abstract = "In this paper, we present the results and findings of the MADAR Shared Task on Arabic Fine-Grained Dialect Identification. This shared task was organized as part of The Fourth Arabic Natural Language Processing Workshop, collocated with ACL 2019. The shared task includes two subtasks: the MADAR Travel Domain Dialect Identification subtask (Subtask 1) and the MADAR Twitter User Dialect Identification subtask (Subtask 2). This shared task is the first to target a large set of dialect labels at the city and country levels. The data for the shared task was created or collected under the Multi-Arabic Dialect Applications and Resources (MADAR) project. A total of 21 teams from 15 countries participated in the shared task.", } @inproceedings{abdelali-etal-2021-qadi, title = "{QADI}: {A}rabic Dialect Identification in the Wild", author = "Abdelali, Ahmed and Mubarak, Hamdy and Samih, Younes and Hassan, Sabit and Darwish, Kareem", booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop", month = apr, year = "2021", address = "Kyiv, Ukraine (Virtual)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.wanlp-1.1", pages = "1--10", abstract = "Proper dialect identification is important for a variety of Arabic NLP applications. In this paper, we present a method for rapidly constructing a tweet dataset containing a wide range of country-level Arabic dialects {---}covering 18 different countries in the Middle East and North Africa region. Our method relies on applying multiple filters to identify users who belong to different countries based on their account descriptions and to eliminate tweets that either write mainly in Modern Standard Arabic or mostly use vulgar language. The resultant dataset contains 540k tweets from 2,525 users who are evenly distributed across 18 Arab countries. Using intrinsic evaluation, we show that the labels of a set of randomly selected tweets are 91.5{\%} accurate. For extrinsic evaluation, we are able to build effective country level dialect identification on tweets with a macro-averaged F1-score of 60.6{\%} across 18 classes.", } @inproceedings{el-haj-2020-habibi, title = "Habibi - a multi Dialect multi National {A}rabic Song Lyrics Corpus", author = "El-Haj, Mahmoud", booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resources Association", url = "https://aclanthology.org/2020.lrec-1.165", pages = "1318--1326", abstract = "This paper introduces Habibi the first Arabic Song Lyrics corpus. The corpus comprises more than 30,000 Arabic song lyrics in 6 Arabic dialects for singers from 18 different Arabic countries. The lyrics are segmented into more than 500,000 sentences (song verses) with more than 3.5 million words. I provide the corpus in both comma separated value (csv) and annotated plain text (txt) file formats. In addition, I converted the csv version into JavaScript Object Notation (json) and eXtensible Markup Language (xml) file formats. To experiment with the corpus I run extensive binary and multi-class experiments for dialect and country-of-origin identification. The identification tasks include the use of several classical machine learning and deep learning models utilising different word embeddings. For the binary dialect identification task the best performing classifier achieved a testing accuracy of 93{\%}. This was achieved using a word-based Convolutional Neural Network (CNN) utilising a Continuous Bag of Words (CBOW) word embeddings model. The results overall show all classical and deep learning models to outperform our baseline, which demonstrates the suitability of the corpus for both dialect and country-of-origin identification tasks. I am making the corpus and the trained CBOW word embeddings freely available for research purposes.", language = "English", ISBN = "979-10-95546-34-4", }
Dialect at Region Level Task BibTex
@inproceedings{abu-farha-magdy-2020-arabic, title = "From {A}rabic Sentiment Analysis to Sarcasm Detection: The {A}r{S}arcasm Dataset", author = "Abu Farha, Ibrahim and Magdy, Walid", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.5", pages = "32--39", abstract = "Sarcasm is one of the main challenges for sentiment analysis systems. Its complexity comes from the expression of opinion using implicit indirect phrasing. In this paper, we present ArSarcasm, an Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. The dataset contains 10,547 tweets, 16{\%} of which are sarcastic. In addition to sarcasm the data was annotated for sentiment and dialects. Our analysis shows the highly subjective nature of these tasks, which is demonstrated by the shift in sentiment labels based on annotators{'} biases. Experiments show the degradation of state-of-the-art sentiment analysers when faced with sarcastic content. Finally, we train a deep learning model for sarcasm detection using BiLSTM. The model achieves an F1 score of 0.46, which shows the challenging nature of the task, and should act as a basic baseline for future research on our dataset.", language = "English", ISBN = "979-10-95546-51-1", } @article{zaidan-callison-burch-2014-arabic, title = "{A}rabic Dialect Identification", author = "Zaidan, Omar F. and Callison-Burch, Chris", journal = "Computational Linguistics", volume = "40", number = "1", month = mar, year = "2014", address = "Cambridge, MA", publisher = "MIT Press", url = "https://aclanthology.org/J14-1006", doi = "10.1162/COLI_a_00169", pages = "171--202", } @inproceedings{abdul-mageed-etal-2021-arbert, title = "{ARBERT} {\&} {MARBERT}: Deep Bidirectional Transformers for {A}rabic", author = "Abdul-Mageed, Muhammad and Elmadany, AbdelRahim and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.acl-long.551", doi = "10.18653/v1/2021.acl-long.551", pages = "7088--7105", abstract = "Pre-trained language models (LMs) are currently integral to many natural language processing systems. Although multilingual LMs were also introduced to serve many languages, these have limitations such as being costly at inference time and the size and diversity of non-English data involved in their pre-training. We remedy these issues for a collection of diverse Arabic varieties by introducing two powerful deep bidirectional transformer-based models, ARBERT and MARBERT. To evaluate our models, we also introduce ARLUE, a new benchmark for multi-dialectal Arabic language understanding evaluation. ARLUE is built using 42 datasets targeting six different task clusters, allowing us to offer a series of standardized experiments under rich conditions. When fine-tuned on ARLUE, our models collectively achieve new state-of-the-art results across the majority of tasks (37 out of 48 classification tasks, on the 42 datasets). Our best model acquires the highest ARLUE score (77.40) across all six task clusters, outperforming all other models including XLM-R Large ( 3.4x larger size). Our models are publicly available at https://github.com/UBC-NLP/marbert and ARLUE will be released through the same repository.", } @inproceedings{bouamor-etal-2019-madar, title = "The {MADAR} Shared Task on {A}rabic Fine-Grained Dialect Identification", author = "Bouamor, Houda and Hassan, Sabit and Habash, Nizar", booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop", month = aug, year = "2019", address = "Florence, Italy", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/W19-4622", doi = "10.18653/v1/W19-4622", pages = "199--207", abstract = "In this paper, we present the results and findings of the MADAR Shared Task on Arabic Fine-Grained Dialect Identification. This shared task was organized as part of The Fourth Arabic Natural Language Processing Workshop, collocated with ACL 2019. The shared task includes two subtasks: the MADAR Travel Domain Dialect Identification subtask (Subtask 1) and the MADAR Twitter User Dialect Identification subtask (Subtask 2). This shared task is the first to target a large set of dialect labels at the city and country levels. The data for the shared task was created or collected under the Multi-Arabic Dialect Applications and Resources (MADAR) project. A total of 21 teams from 15 countries participated in the shared task.", } @inproceedings{abdelali-etal-2021-qadi, title = "{QADI}: {A}rabic Dialect Identification in the Wild", author = "Abdelali, Ahmed and Mubarak, Hamdy and Samih, Younes and Hassan, Sabit and Darwish, Kareem", booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop", month = apr, year = "2021", address = "Kyiv, Ukraine (Virtual)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.wanlp-1.1", pages = "1--10", abstract = "Proper dialect identification is important for a variety of Arabic NLP applications. In this paper, we present a method for rapidly constructing a tweet dataset containing a wide range of country-level Arabic dialects {---}covering 18 different countries in the Middle East and North Africa region. Our method relies on applying multiple filters to identify users who belong to different countries based on their account descriptions and to eliminate tweets that either write mainly in Modern Standard Arabic or mostly use vulgar language. The resultant dataset contains 540k tweets from 2,525 users who are evenly distributed across 18 Arab countries. Using intrinsic evaluation, we show that the labels of a set of randomly selected tweets are 91.5{\%} accurate. For extrinsic evaluation, we are able to build effective country level dialect identification on tweets with a macro-averaged F1-score of 60.6{\%} across 18 classes.", } @inproceedings{el-haj-2020-habibi, title = "Habibi - a multi Dialect multi National {A}rabic Song Lyrics Corpus", author = "El-Haj, Mahmoud", booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resources Association", url = "https://aclanthology.org/2020.lrec-1.165", pages = "1318--1326", abstract = "This paper introduces Habibi the first Arabic Song Lyrics corpus. The corpus comprises more than 30,000 Arabic song lyrics in 6 Arabic dialects for singers from 18 different Arabic countries. The lyrics are segmented into more than 500,000 sentences (song verses) with more than 3.5 million words. I provide the corpus in both comma separated value (csv) and annotated plain text (txt) file formats. In addition, I converted the csv version into JavaScript Object Notation (json) and eXtensible Markup Language (xml) file formats. To experiment with the corpus I run extensive binary and multi-class experiments for dialect and country-of-origin identification. The identification tasks include the use of several classical machine learning and deep learning models utilising different word embeddings. For the binary dialect identification task the best performing classifier achieved a testing accuracy of 93{\%}. This was achieved using a word-based Convolutional Neural Network (CNN) utilising a Continuous Bag of Words (CBOW) word embeddings model. The results overall show all classical and deep learning models to outperform our baseline, which demonstrates the suitability of the corpus for both dialect and country-of-origin identification tasks. I am making the corpus and the trained CBOW word embeddings freely available for research purposes.", language = "English", ISBN = "979-10-95546-34-4", }
Emotion Task BibTex
@inproceedings{abdul-mageed-etal-2020-aranet, title = "{A}ra{N}et: A Deep Learning Toolkit for {A}rabic Social Media", author = "Abdul-Mageed, Muhammad and Zhang, Chiyu and Hashemi, Azadeh and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.3", pages = "16--23", abstract = "We describe AraNet, a collection of deep learning Arabic social media processing tools. Namely, we exploit an extensive host of both publicly available and novel social media datasets to train bidirectional encoders from transformers (BERT) focused at social meaning extraction. AraNet models predict age, dialect, gender, emotion, irony, and sentiment. AraNet either delivers state-of-the-art performance on a number of these tasks and performs competitively on others. AraNet is exclusively based on a deep learning framework, giving it the advantage of being feature-engineering free. To the best of our knowledge, AraNet is the first to performs predictions across such a wide range of tasks for Arabic NLP. As such, AraNet has the potential to meet critical needs. We publicly release AraNet to accelerate research, and to facilitate model-based comparisons across the different tasks", language = "English", ISBN = "979-10-95546-51-1", }
Gender Task BibTex
@inproceedings{abdul-mageed-etal-2020-aranet, title = "{A}ra{N}et: A Deep Learning Toolkit for {A}rabic Social Media", author = "Abdul-Mageed, Muhammad and Zhang, Chiyu and Hashemi, Azadeh and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.3", pages = "16--23", abstract = "We describe AraNet, a collection of deep learning Arabic social media processing tools. Namely, we exploit an extensive host of both publicly available and novel social media datasets to train bidirectional encoders from transformers (BERT) focused at social meaning extraction. AraNet models predict age, dialect, gender, emotion, irony, and sentiment. AraNet either delivers state-of-the-art performance on a number of these tasks and performs competitively on others. AraNet is exclusively based on a deep learning framework, giving it the advantage of being feature-engineering free. To the best of our knowledge, AraNet is the first to performs predictions across such a wide range of tasks for Arabic NLP. As such, AraNet has the potential to meet critical needs. We publicly release AraNet to accelerate research, and to facilitate model-based comparisons across the different tasks", language = "English", ISBN = "979-10-95546-51-1", }
Hate Speech Task BibTex
@inproceedings{mubarak-etal-2020-overview, title = "Overview of {OSACT}4 {A}rabic Offensive Language Detection Shared Task", author = "Mubarak, Hamdy and Darwish, Kareem and Magdy, Walid and Elsayed, Tamer and Al-Khalifa, Hend", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.7", pages = "48--52", abstract = "This paper provides an overview of the offensive language detection shared task at the 4th workshop on Open-Source Arabic Corpora and Processing Tools (OSACT4). There were two subtasks, namely: Subtask A, involving the detection of offensive language, which contains unacceptable or vulgar content in addition to any kind of explicit or implicit insults or attacks against individuals or groups; and Subtask B, involving the detection of hate speech, which contains insults or threats targeting a group based on their nationality, ethnicity, race, gender, political or sport affiliation, religious belief, or other common characteristics. In total, 40 teams signed up to participate in Subtask A, and 14 of them submitted test runs. For Subtask B, 33 teams signed up to participate and 13 of them submitted runs. We present and analyze all submissions in this paper.", language = "English", ISBN = "979-10-95546-51-1", }
Irony Task BibTex
@inproceedings{ghanem2019idat, title={Idat at fire2019: Overview of the track on irony detection in arabic tweets}, author={Ghanem, Bilal and Karoui, Jihen and Benamara, Farah and Moriceau, V{\'e}ronique and Rosso, Paolo}, booktitle={Proceedings of the 11th Annual Meeting of the Forum for Information Retrieval Evaluation}, pages={10--13}, year={2019} }
Machine Generation Task BibTex
@inproceedings{nagoudi-etal-2020-machine, title = "Machine Generation and Detection of {A}rabic Manipulated and Fake News", author = "Nagoudi, El Moatez Billah and Elmadany, AbdelRahim and Abdul-Mageed, Muhammad and Alhindi, Tariq", booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop", month = dec, year = "2020", address = "Barcelona, Spain (Online)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.wanlp-1.7", pages = "69--84", abstract = "Fake news and deceptive machine-generated text are serious problems threatening modern societies, including in the Arab world. This motivates work on detecting false and manipulated stories online. However, a bottleneck for this research is lack of sufficient data to train detection models. We present a novel method for automatically generating Arabic manipulated (and potentially fake) news stories. Our method is simple and only depends on availability of true stories, which are abundant online, and a part of speech tagger (POS). To facilitate future work, we dispense with both of these requirements altogether by providing AraNews, a novel and large POS-tagged news dataset that can be used off-the-shelf. Using stories generated based on AraNews, we carry out a human annotation study that casts light on the effects of machine manipulation on text veracity. The study also measures human ability to detect Arabic machine manipulated text generated by our method. Finally, we develop the first models for detecting manipulated Arabic news and achieve state-of-the-art results on Arabic fake news detection (macro F1=70.06). Our models and data are publicly available.", }
Offensive Task BibTex
@inproceedings{mubarak-etal-2020-overview, title = "Overview of {OSACT}4 {A}rabic Offensive Language Detection Shared Task", author = "Mubarak, Hamdy and Darwish, Kareem and Magdy, Walid and Elsayed, Tamer and Al-Khalifa, Hend", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.7", pages = "48--52", abstract = "This paper provides an overview of the offensive language detection shared task at the 4th workshop on Open-Source Arabic Corpora and Processing Tools (OSACT4). There were two subtasks, namely: Subtask A, involving the detection of offensive language, which contains unacceptable or vulgar content in addition to any kind of explicit or implicit insults or attacks against individuals or groups; and Subtask B, involving the detection of hate speech, which contains insults or threats targeting a group based on their nationality, ethnicity, race, gender, political or sport affiliation, religious belief, or other common characteristics. In total, 40 teams signed up to participate in Subtask A, and 14 of them submitted test runs. For Subtask B, 33 teams signed up to participate and 13 of them submitted runs. We present and analyze all submissions in this paper.", language = "English", ISBN = "979-10-95546-51-1", }
Sarcasm Task BibTex
@inproceedings{abu-farha-magdy-2020-arabic, title = "From {A}rabic Sentiment Analysis to Sarcasm Detection: The {A}r{S}arcasm Dataset", author = "Abu Farha, Ibrahim and Magdy, Walid", booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection", month = may, year = "2020", address = "Marseille, France", publisher = "European Language Resource Association", url = "https://aclanthology.org/2020.osact-1.5", pages = "32--39", abstract = "Sarcasm is one of the main challenges for sentiment analysis systems. Its complexity comes from the expression of opinion using implicit indirect phrasing. In this paper, we present ArSarcasm, an Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. The dataset contains 10,547 tweets, 16{\%} of which are sarcastic. In addition to sarcasm the data was annotated for sentiment and dialects. Our analysis shows the highly subjective nature of these tasks, which is demonstrated by the shift in sentiment labels based on annotators{'} biases. Experiments show the degradation of state-of-the-art sentiment analysers when faced with sarcastic content. Finally, we train a deep learning model for sarcasm detection using BiLSTM. The model achieves an F1 score of 0.46, which shows the challenging nature of the task, and should act as a basic baseline for future research on our dataset.", language = "English", ISBN = "979-10-95546-51-1", }
Sentiment Analysis Task BibTex
@inproceedings{abdul-mageed-etal-2021-arbert, title = "{ARBERT} {\&} {MARBERT}: Deep Bidirectional Transformers for {A}rabic", author = "Abdul-Mageed, Muhammad and Elmadany, AbdelRahim and Nagoudi, El Moatez Billah", booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.acl-long.551", doi = "10.18653/v1/2021.acl-long.551", pages = "7088--7105", abstract = "Pre-trained language models (LMs) are currently integral to many natural language processing systems. Although multilingual LMs were also introduced to serve many languages, these have limitations such as being costly at inference time and the size and diversity of non-English data involved in their pre-training. We remedy these issues for a collection of diverse Arabic varieties by introducing two powerful deep bidirectional transformer-based models, ARBERT and MARBERT. To evaluate our models, we also introduce ARLUE, a new benchmark for multi-dialectal Arabic language understanding evaluation. ARLUE is built using 42 datasets targeting six different task clusters, allowing us to offer a series of standardized experiments under rich conditions. When fine-tuned on ARLUE, our models collectively achieve new state-of-the-art results across the majority of tasks (37 out of 48 classification tasks, on the 42 datasets). Our best model acquires the highest ARLUE score (77.40) across all six task clusters, outperforming all other models including XLM-R Large ( 3.4x larger size). Our models are publicly available at https://github.com/UBC-NLP/marbert and ARLUE will be released through the same repository.", }
Aqmar NER Task BibTex
Arabic NER Corpus Task BibTex
Dialect Part Of Speech Task BibTex
@inproceedings{darwish2018multi, title={Multi-dialect Arabic POS tagging: a CRF approach}, author={Darwish, Kareem and Mubarak, Hamdy and Eldesouki, Mohamed and Abdelali, Ahmed and Samih, Younes and Alharbi, Randah and Attia, Mohammed and Magdy, Walid and Kallmeyer, Laura}, booktitle={11th Edition of the Language Resources and Evaluation Conference}, pages={93--98}, year={2018}, organization={European Language Resources Association (ELRA)} }
MSA Part Of Speech Task BibTex
@misc{liang2020xglue, title={XGLUE: A New Benchmark Dataset for Cross-lingual Pre-training, Understanding and Generation}, author={Yaobo Liang and Nan Duan and Yeyun Gong and Ning Wu and Fenfei Guo and Weizhen Qi and Ming Gong and Linjun Shou and Daxin Jiang and Guihong Cao and Xiaodong Fan and Ruofei Zhang and Rahul Agrawal and Edward Cui and Sining Wei and Taroon Bharti and Ying Qiao and Jiun-Hung Chen and Winnie Wu and Shuguang Liu and Fan Yang and Daniel Campos and Rangan Majumder and Ming Zhou}, year={2020}, eprint={2004.01401}, archivePrefix={arXiv}, primaryClass={cs.CL} }
Topic Task BibTex
Word Sense Disambiguation Task BibTex
@Article{app11062567, AUTHOR = {El-Razzaz, Mohammed and Fakhr, Mohamed Waleed and Maghraby, Fahima A.}, TITLE = {Arabic Gloss WSD Using BERT}, JOURNAL = {Applied Sciences}, VOLUME = {11}, YEAR = {2021}, NUMBER = {6}, ARTICLE-NUMBER = {2567}, URL = {https://www.mdpi.com/2076-3417/11/6/2567}, ISSN = {2076-3417}, ABSTRACT = {Word Sense Disambiguation (WSD) aims to predict the correct sense of a word given its context. This problem is of extreme importance in Arabic, as written words can be highly ambiguous; 43% of diacritized words have multiple interpretations and the percentage increases to 72% for non-diacritized words. Nevertheless, most Arabic written text does not have diacritical marks. Gloss-based WSD methods measure the semantic similarity or the overlap between the context of a target word that needs to be disambiguated and the dictionary definition of that word (gloss of the word). Arabic gloss WSD suffers from a lack of context-gloss datasets. In this paper, we present an Arabic gloss-based WSD technique. We utilize the celebrated Bidirectional Encoder Representation from Transformers (BERT) to build two models that can efficiently perform Arabic WSD. These models can be trained with few training samples since they utilize BERT models that were pretrained on a large Arabic corpus. Our experimental results show that our models outperform two of the most recent gloss-based WSDs when we test them against the same test data used to evaluate our model. Additionally, our model achieves an F1-score of 89% compared to the best-reported F1-score of 85% for knowledge-based Arabic WSD. Another contribution of this paper is introducing a context-gloss benchmark that may help to overcome the lack of a standardized benchmark for Arabic gloss-based WSD.}, DOI = {10.3390/app11062567} }