Publications | Dr. Idris Abdulmumin

2026

TACL

Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks

Tadesse Destaw Belay, Ibrahim Said Ahmad, Idris Abdulmumin, and 6 more authors

Transactions of the Association for Computational Linguistics, 2026

Accepted; forthcoming (pre-MIT Press version)

@article{belay2026beyond,
  title = {Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks},
  author = {Belay, Tadesse Destaw and Ahmad, Ibrahim Said and Abdulmumin, Idris and Ayele, Abinew Ali and Gelbukh, Alexander and Ricárdez-Vázquez, Eusebio and Kolesnikova, Olga and Muhammad, Shamsuddeen Hassan and Yimam, Seid Muhie},
  journal = {Transactions of the Association for Computational Linguistics},
  year = {2026},
  note = {Accepted; forthcoming (pre-MIT Press version)},
  url = {https://arxiv.org/abs/2605.09955},
}

arXiv

AfriScience-MT: Towards Decolonizing Science in Africa through Text Translation

Idris Abdulmumin, Tajuddeen Gwadabe, Shamsuddeen Hassan Muhammad, and 11 more authors

2026

Bib

@misc{abdulmumin2026afriscience,
  title = {AfriScience-MT: Towards Decolonizing Science in Africa through Text Translation},
  author = {Abdulmumin, Idris and Gwadabe, Tajuddeen and Muhammad, Shamsuddeen Hassan and Adelani, David Ifeoluwa and Khalo, Nomonde and Ahmad, Ibrahim Said and Modupe, Abiodun and Mumm, Anina and Biyela, Sibusiso and Rabie, Michelle and Havemann, Johanna and Rei, Marek and Abbott, Jade and Marivate, Vukosi},
  year = {2026},
  eprint = {2605.29741},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2605.29741}
}

arXiv

Temporal Simultaneity Predicts Annotation Quality in Sentiment Corpora

Idris Abdulmumin, Mokgadi Penelope Matloga, Tadesse Destaw Belay, and 5 more authors

2026

Bib

@misc{abdulmumin2026temporal,
  title = {Temporal Simultaneity Predicts Annotation Quality in Sentiment Corpora},
  author = {Abdulmumin, Idris and Matloga, Mokgadi Penelope and Belay, Tadesse Destaw and Kondowe, Botshelo and Mohleleng, Letlhogonolo and Letsoalo, Hareaipha Nkopo and Muhammad, Shamsuddeen Hassan and Marivate, Vukosi},
  year = {2026},
  eprint = {2605.27239},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2605.27239}
}

arXiv

DimStance: Multilingual Datasets for Dimensional Stance Analysis

Jonas Becker, Liang-Chih Yu, Shamsuddeen Hassan Muhammad, and 14 more authors

2026

Bib

@misc{becker2026dimstancemultilingualdatasetsdimensional,
  title = {DimStance: Multilingual Datasets for Dimensional Stance Analysis},
  author = {Becker, Jonas and Yu, Liang-Chih and Muhammad, Shamsuddeen Hassan and Wahle, Jan Philip and Ruas, Terry and Abdulmumin, Idris and Lee, Lung-Hao and Odhiambo, Nelson and Wanzare, Lilian and Liu, Wen-Ni and Lin, Tzu-Mi and Xu, Zhe-Yu and Lin, Ying-Lung and Wang, Jin and Mukhtar, Maryam Ibrahim and Gipp, Bela and Mohammad, Saif M.},
  year = {2026},
  eprint = {2601.21483},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2601.21483}
}

ACL

CommonLID: Re-evaluating State-of-the-Art Language Identification Performance on Web Data

Pedro Ortiz Suarez, Laurie Burchell, Catherine Arnett, and 94 more authors

In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 2026

DOI Bib

@inproceedings{suarez2026commonlidreevaluatingstateoftheartlanguage,
  title = {CommonLID: Re-evaluating State-of-the-Art Language Identification Performance on Web Data},
  author = {Suarez, Pedro Ortiz and Burchell, Laurie and Arnett, Catherine and Mosquera-Gómez, Rafael and Hincapie-Monsalve, Sara and Vaughan, Thom and Stewart, Damian and Ostendorff, Malte and Abdulmumin, Idris and Marivate, Vukosi and Muhammad, Shamsuddeen Hassan and Tonja, Atnafu Lambebo and Al-Khalifa, Hend and Hammouda, Nadia Ghezaiel and Otiende, Verrah and Wong, Tack Hwa and Saydaliev, Jakhongir and Nobakhtian, Melika and Habibi, Muhammad Ravi Shulthan and Kranti, Chalamalasetti and Muchemi, Carol and Nguyen, Khang and Adam, Faisal Muhammad and Salim, Luis Frentzen and Alqifari, Reem and Amol, Cynthia and Imperial, Joseph Marvin and Kesen, Ilker and Mustafid, Ahmad and Stepachev, Pavel and Choshen, Leshem and Anugraha, David and Nayel, Hamada and Yimam, Seid Muhie and Putra, Vallerie Alexandra and Nguyen, My Chiffon and Wasi, Azmine Toushik and Vadithya, Gouthami and van der Goot, Rob and ar C'horr, Lanwenn and Dua, Karan and Yates, Andrew and Bangera, Mithil and Bangera, Yeshil and Patel, Hitesh Laxmichand and Okabe, Shu and Ilasariya, Fenal Ashokbhai and Gaynullin, Dmitry and Winata, Genta Indra and Li, Yiyuan and Martínez, Juan Pablo and Agarwal, Amit and Hanif, Ikhlasul Akmal and Ahmad, Raia Abu and Adenuga, Esther and Tjiaranata, Filbert Aurelian and Buaphet, Weerayut and Anugraha, Michael and Vajjala, Sowmya and Rice, Benjamin and Amirudin, Azril Hafizi and Alabi, Jesujoba O. and Panda, Srikant and Toughrai, Yassine and Kyomuhendo, Bruhan and Ruffinelli, Daniel and A, Akshata and Goulão, Manuel and Zhou, Ej and Ramirez, Ingrid Gabriela Franco and Aggazzotti, Cristina and Dobler, Konstantin and Kevin, Jun and Pagès, Quentin and Andrews, Nicholas and Ibrahim, Nuhu and Ruckdeschel, Mattes and Keleg, Amr and Zhang, Mike and Muziri, Casper and Samuel, Saron and Takeshita, Sotaro and Kerdthaisong, Kun and Foppiano, Luca and Dent, Rasul and Green, Tommaso and Wali, Ahmad Mustapha and Makaaka, Kamohelo and Feliren, Vicky and Idris, Inshirah and Celikkanat, Hande and Abubakar, Abdulhamid and Maillard, Jean and Sagot, Benoît and Clérice, Thibault and Murray, Kenton and Luger, Sarah},
  booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  year = {2026},
  address = {San Diego, California, USA},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2026.acl-long.1527/},
  doi = {10.18653/v1/2026.acl-long.1527},
  pages = {33063--33080}
}

ACL

Afri-MCQA: Multimodal Cultural Question Answering for African Languages

Atnafu Lambebo Tonja, Srija Anand, Emilio Villa-Cueva, and 16 more authors

In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 2026

DOI Bib

@inproceedings{tonja2026afrimcqamultimodalculturalquestion,
  title = {Afri-MCQA: Multimodal Cultural Question Answering for African Languages},
  author = {Tonja, Atnafu Lambebo and Anand, Srija and Villa-Cueva, Emilio and Azime, Israel Abebe and Alabi, Jesujoba Oluwadara and Mohamed, Muhidin A. and Yadeta, Debela Desalegn and Abadi, Negasi Haile and Oppong, Abigail and Obiefuna, Nnaemeka Casmir and Abdulmumin, Idris and Etori, Naome A and Wairagala, Eric Peter and Tshinu, Kanda Patrick and Emmanuel, Imanigirimbabazi and Malema, Gabofetswe and Aji, Alham Fikri and Adelani, David Ifeoluwa and Solorio, Thamar},
  booktitle = {Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  year = {2026},
  address = {San Diego, California, USA},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2026.acl-long.1869/},
  doi = {10.18653/v1/2026.acl-long.1869},
  pages = {40249--40282}
}

arXiv

Swivuriso: The South African Next Voices Multilingual Speech Dataset

Vukosi Marivate, Kayode Olaleye, Sitwala Mundia, and 19 more authors

2026

Bib

@misc{marivate2026swivurisosouthafricanvoices,
  title = {Swivuriso: The South African Next Voices Multilingual Speech Dataset},
  author = {Marivate, Vukosi and Olaleye, Kayode and Mundia, Sitwala and Bakainga, Andinda and Netshifhefhe, Unarine and Milanzie, Mahmooda and Mogale, Tsholofelo Hope and Sindane, Thapelo and Abdulrasaq, Zainab and Mokgosi, Kesego and Okorie, Chijioke and Wyk, Nia Zion Van and Morrissey, Graham and Dunbar, Dale and Smit, Francois and Chidi, Tsosheletso and Mabuya, Rooweither and Bukula, Andiswa and Mlambo, Respect and Macucwa, Tebogo and Abdulmumin, Idris and Rananga, Seani},
  year = {2026},
  eprint = {2512.02201},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2512.02201}
}

AfricaNLP

Full Fine-Tuning vs. Parameter-Efficient Adaptation for Low-Resource African ASR: A Controlled Study with Whisper-Small

Sukairaj Hafiz Imam, Muhammad Yahuza Bello, Hadiza Ali Umar, and 4 more authors

In Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026), Mar 2026

Bib

@inproceedings{imam-etal-2026-full,
  title = {Full Fine-Tuning vs. Parameter-Efficient Adaptation for Low-Resource {A}frican {ASR}: A Controlled Study with Whisper-Small},
  author = {Imam, Sukairaj Hafiz and Bello, Muhammad Yahuza and Umar, Hadiza Ali and Belay, Tadesse Destaw and Abdulmumin, Idris and Yimam, Seid Muhie and Muhammad, Shamsuddeen Hassan},
  editor = {Chimoto, Everlyn Asiko and Lignos, Constantine and Muhammad, Shamsuddeen and Abdulmumin, Idris and Siro, Clemencia and Adelani, David Ifeoluwa},
  booktitle = {Proceedings of the 7th Workshop on {A}frican Natural Language Processing ({A}frica{NLP} 2026)},
  month = mar,
  year = {2026},
  address = {Rabat, Morocco},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2026.africanlp-main.19/},
  pages = {197--203},
  isbn = {979-8-89176-364-7}
}

AfricaNLP

Trust but Check: LLM-Assisted Review of Human Translations in African Languages

Tadesse Destaw Belay, Henok Biadglign Ademtew, Idris Abdulmumin, and 24 more authors

In 7th Workshop on African Natural Language Processing, Mar 2026

Bib

@inproceedings{belay2026trust,
  title = {Trust but Check: {LLM}-Assisted Review of Human Translations in African Languages},
  author = {Belay, Tadesse Destaw and Ademtew, Henok Biadglign and Abdulmumin, Idris and Imam, Sukairaj Hafiz and Chilala, Abubakar Juma and Agyapong, Godfred and MBONU, CHINEDU EMMANUEL and Ovu, Basil Friday and Essuman, Catherine Nana Nyaah and Kondoro, Alfred Malengo and Adhiambo, Sonia and Abolade, Daud and Mpholle, Ponts'o and Ladislaus, Nicholaus Dismas and Aliyu, Saminu Mohammad and Samuel, Gali Ahmad and Hakuzimana, Fabrice and Nzirainengwe, Mike and Olatoye, Temitayo and Haile, Sileshi Bogale and Achamaleh, Tewodros and Abiola, Tolulope Olalekan and Hussen, Kedir Yassin and Ahmad, Ibrahim Said and Otiende, Verrah Akinyi and Yimam, Seid Muhie and Muhammad, Shamsuddeen Hassan},
  booktitle = {7th Workshop on African Natural Language Processing},
  year = {2026},
  url = {https://openreview.net/forum?id=8B2WDhIAMV}
}

AfricaNLP

The Rise of AfricaNLP: Contributions, Contributors, and Community Impact (2005–2025)

Tadesse Destaw Belay, Kedir Yassin Hussen, Sukairaj Hafiz Imam, and 10 more authors

In 7th Workshop on African Natural Language Processing, Mar 2026

Bib

@inproceedings{belay2026the,
  title = {The Rise of Africa{NLP}: Contributions, Contributors, and Community Impact (2005{\textendash}2025)},
  author = {Belay, Tadesse Destaw and Hussen, Kedir Yassin and Imam, Sukairaj Hafiz and Ahmad, Ibrahim Said and Inuwa-Dutse, Isa and Aabr and Sidorov, Grigori and Ameer, Iqra and Abdulmumin, Idris and Gwadabe, Tajuddeen and Marivate, Vukosi and Yimam, Seid Muhie and Muhammad, Shamsuddeen Hassan},
  booktitle = {7th Workshop on African Natural Language Processing},
  year = {2026},
  url = {https://openreview.net/forum?id=DWCxhpad1k}
}

ACL

POLAR: A Benchmark for Multilingual, Multicultural, and Multi-Event Online Polarization

Usman Naseem, Juan Ren, Saba Anwar, and 14 more authors

In Findings of the Association for Computational Linguistics: ACL 2026, Mar 2026

DOI Bib

@inproceedings{naseem2025polarbenchmarkmultilingualmulticultural,
  title = {POLAR: A Benchmark for Multilingual, Multicultural, and Multi-Event Online Polarization},
  author = {Naseem, Usman and Ren, Juan and Anwar, Saba and Kohail, Sarah and Veliz, Rudy Alexandro Garrido and Geislinger, Robert and Jabr, Aisha and Abdulmumin, Idris and Qureshi, Laiba and Borkar, Aarushi Ajay and Mukhtar, Maryam Ibrahim and Ayele, Abinew Ali and Ahmad, Ibrahim Said and Ali, Adem and Semmann, Martin and Muhammad, Shamsuddeen Hassan and Yimam, Seid Muhie},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2026},
  year = {2026},
  address = {San Diego, California, USA},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2026.findings-acl.1433/},
  doi = {10.18653/v1/2026.findings-acl.1433},
  pages = {28699--28720}
}

arXiv

Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks

Tadesse Destaw Belay, Ibrahim Said Ahmad, Idris Abdulmumin, and 6 more authors

May 2026

Bib

@misc{belay2026beyone,
  title = {Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks},
  author = {Belay, Tadesse Destaw and Ahmad, Ibrahim Said and Abdulmumin, Idris and Ayele, Abinew Ali and Gelbukh, Alexander and Ric{\'a}rdez-V{\'a}zquez, Eusebio and Kolesnikova, Olga and Muhammad, Shamsuddeen Hassan and Yimam, Seid Muhie},
  year = {2026},
  month = may,
  eprint = {2605.09955},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2605.09955}
}

arXiv

NaijaS2ST: A Multi-Accent Benchmark for Speech-to-Speech Translation in Low-Resource Nigerian Languages

Marie Maltais, Yejin Jeon, Min Ma, and 7 more authors

Apr 2026

Bib

@misc{maltais2026naijas2st,
  title = {NaijaS2ST: A Multi-Accent Benchmark for Speech-to-Speech Translation in Low-Resource Nigerian Languages},
  author = {Maltais, Marie and Jeon, Yejin and Ma, Min and Muhammad, Shamsuddeen Hassan and Abdulmumin, Idris and Mukhtar, Maryam Ibrahim and Abolade, Daud and Okepefi, Joel and Sewedo, Johnson and Adelani, David Ifeoluwa},
  year = {2026},
  month = apr,
  eprint = {2604.16287},
  archiveprefix = {arXiv},
  primaryclass = {cs.SD},
  url = {https://arxiv.org/abs/2604.16287}
}

SemEval

SemEval-2026 Task 3: Dimensional Aspect-Based Sentiment Analysis (DimABSA)

Liang-Chih Yu, Jonas Becker, Shamsuddeen Hassan Muhammad, and 14 more authors

Apr 2026

Bib

@misc{yu2026semeval,
  title = {SemEval-2026 Task 3: Dimensional Aspect-Based Sentiment Analysis (DimABSA)},
  author = {Yu, Liang-Chih and Becker, Jonas and Muhammad, Shamsuddeen Hassan and Abdulmumin, Idris and Lee, Lung-Hao and Lin, Ying-Lung and Wang, Jin and Wahle, Jan Philip and Ruas, Terry and Loukachevitch, Natalia and Panchenko, Alexander and Alimova, Ilseyar and Wanzare, Lilian and Odhiambo, Nelson and Gipp, Bela and Chang, Kai-Wei and Mohammad, Saif M.},
  year = {2026},
  month = apr,
  eprint = {2604.07066},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2604.07066}
}

SemEval

SemEval-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization

Usman Naseem, Robert Geislinger, Juan Ren, and 31 more authors

Apr 2026

Bib

@misc{naseem2026semeval,
  title = {SemEval-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization},
  author = {Naseem, Usman and Geislinger, Robert and Ren, Juan and Kohail, Sarah and Veliz, Rudy Garrido and Sahil, P Sam and Zhang, Yiran and Stranisci, Marco Antonio and Abdulmumin, Idris and {\"O}zge Ala{\c{c}}am and Acart{\"u}rk, Cengiz and Jabr, Aisha and Anwar, Saba and Ayele, Abinew Ali and Tutubalina, Elena and Htet, Aung Kyaw and Wang, Xintong and Thapa, Surendrabikram and Chakraborty, Tanmoy and Kodati, Dheeraj and Moradizeyveh, Sahar and Alam, Firoj and Thu, Ye Kyaw and Parida, Shantipriya and Qazi, Ihsan Ayyub and Wanzare, Lilian and Onyango, Nelson Odhiambo and Siro, Clemencia and Ahmad, Ibrahim Said and Ali, Adem Chanie and Semmann, Martin and Biemann, Chris and Muhammad, Shamsuddeen Hassan and Yimam, Seid Muhie},
  year = {2026},
  month = apr,
  eprint = {2604.06817},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2604.06817}
}

2025

NAACL

AfriHate: A Multilingual Collection of Hate Speech and Abusive Language Datasets for African Languages

Shamsuddeen Hassan Muhammad, Idris Abdulmumin, Abinew Ali Ayele, and 24 more authors

In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), Apr 2025

IRCAI Top 100 Bib

Listed in the IRCAI Global Top 100 Outstanding AI Projects Advancing the SDGs (UNESCO).

@inproceedings{muhammad-etal-2025-afrihate,
  title = {{A}fri{H}ate: A Multilingual Collection of Hate Speech and Abusive Language Datasets for {A}frican Languages},
  author = {Muhammad, Shamsuddeen Hassan and Abdulmumin, Idris and Ayele, Abinew Ali and Adelani, David Ifeoluwa and Ahmad, Ibrahim Said and Aliyu, Saminu Mohammad and R{\"o}ttger, Paul and Oppong, Abigail and Bukula, Andiswa and Chukwuneke, Chiamaka Ijeoma and Jibril, Ebrahim Chekol and Ismail, Elyas Abdi and Alemneh, Esubalew and Gebremichael, Hagos Tesfahun and Aliyu, Lukman Jibril and Beloucif, Meriem and Hourrane, Oumaima and Mabuya, Rooweither and Osei, Salomey and Rutunda, Samuel and Belay, Tadesse Destaw and Guge, Tadesse Kebede and Asfaw, Tesfa Tegegne and Wanzare, Lilian Diana Awuor and Onyango, Nelson Odhiambo and Yimam, Seid Muhie and Ousidhoum, Nedjma},
  editor = {Chiruzzo, Luis and Ritter, Alan and Wang, Lu},
  booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)},
  month = apr,
  year = {2025},
  address = {Albuquerque, New Mexico},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.naacl-long.92/},
  pages = {1854--1871},
  isbn = {979-8-89176-189-6}
}

ACL

BRIGHTER: BRIdging the Gap in Human-Annotated Textual Emotion Recognition Datasets for 28 Languages

Shamsuddeen Hassan Muhammad, Nedjma Ousidhoum, Idris Abdulmumin, and 45 more authors

In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Jul 2025

Best Resource Paper DOI Bib

Best Resource Paper at ACL 2025.

@inproceedings{muhammad-etal-2025-brighter,
  title = {{BRIGHTER}: {BRI}dging the Gap in Human-Annotated Textual Emotion Recognition Datasets for 28 Languages},
  author = {Muhammad, Shamsuddeen Hassan and Ousidhoum, Nedjma and Abdulmumin, Idris and Wahle, Jan Philip and Ruas, Terry and Beloucif, Meriem and de Kock, Christine and Surange, Nirmal and Teodorescu, Daniela and Ahmad, Ibrahim Said and Adelani, David Ifeoluwa and Aji, Alham Fikri and Ali, Felermino D. M. A. and Alimova, Ilseyar and Araujo, Vladimir and Babakov, Nikolay and Baes, Naomi and Bucur, Ana-Maria and Bukula, Andiswa and Cao, Guanqun and Tufi{\~n}o, Rodrigo and Chevi, Rendi and Chukwuneke, Chiamaka Ijeoma and Ciobotaru, Alexandra and Dementieva, Daryna and Gadanya, Murja Sani and Geislinger, Robert and Gipp, Bela and Hourrane, Oumaima and Ignat, Oana and Lawan, Falalu Ibrahim and Mabuya, Rooweither and Mahendra, Rahmad and Marivate, Vukosi and Panchenko, Alexander and Piper, Andrew and Ferreira, Charles Henrique Porto and Protasov, Vitaly and Rutunda, Samuel and Shrivastava, Manish and Udrea, Aura Cristina and Wanzare, Lilian Diana Awuor and Wu, Sophie and Wunderlich, Florian Valentin and Zhafran, Hanif Muhammad and Zhang, Tianhui and Zhou, Yi and Mohammad, Saif M.},
  booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month = jul,
  year = {2025},
  address = {Vienna, Austria},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.acl-long.436/},
  doi = {10.18653/v1/2025.acl-long.436},
  pages = {8895--8916},
  isbn = {979-8-89176-251-0}
}

DiB

ZASCA-Sum: A Dataset of the South Africa Supreme Courts of Appeal Judgments and Media Summaries for Legal Documents Summarization Research

Idris Abdulmumin and Vukosi Marivate

Data in Brief, Jul 2025

DOI Bib

@article{ABDULMUMIN2025111567,
  title = {ZASCA-Sum: A Dataset of the South Africa Supreme Courts of Appeal Judgments and Media Summaries for Legal Documents Summarization Research},
  journal = {Data in Brief},
  pages = {111567},
  year = {2025},
  issn = {2352-3409},
  doi = {https://doi.org/10.1016/j.dib.2025.111567},
  url = {https://www.sciencedirect.com/science/article/pii/S2352340925002999},
  author = {Abdulmumin, Idris and Marivate, Vukosi}
}

WMT

Findings of the WMT 2025 Shared Task of the Open Language Data Initiative

David Dale, Laurie Burchell, Jean Maillard, and 4 more authors

In Proceedings of the Tenth Conference on Machine Translation, Nov 2025

DOI Bib

@inproceedings{dale-etal-2025-findings,
  title = {Findings of the {WMT} 2025 Shared Task of the Open Language Data Initiative},
  author = {Dale, David and Burchell, Laurie and Maillard, Jean and Abdulmumin, Idris and Anastasopoulos, Antonios and Caswell, Isaac and Koehn, Philipp},
  editor = {Haddow, Barry and Kocmi, Tom and Koehn, Philipp and Monz, Christof},
  booktitle = {Proceedings of the Tenth Conference on Machine Translation},
  month = nov,
  year = {2025},
  address = {Suzhou, China},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.wmt-1.26/},
  doi = {10.18653/v1/2025.wmt-1.26},
  pages = {495--502},
  isbn = {979-8-89176-341-8}
}

EMNLP

AfroXLMR-Social: Adapting Pre-trained Language Models for African Languages Social Media Text

Tadesse Destaw Belay, Israel Abebe Azime, Ibrahim Said Ahmad, and 5 more authors

In Findings of the Association for Computational Linguistics: EMNLP 2025, Nov 2025

DOI Bib

@inproceedings{belay-etal-2025-afroxlmr,
  title = {{A}fro{XLMR}-Social: Adapting Pre-trained Language Models for {A}frican Languages Social Media Text},
  author = {Belay, Tadesse Destaw and Azime, Israel Abebe and Ahmad, Ibrahim Said and Adelani, David Ifeoluwa and Abdulmumin, Idris and Ayele, Abinew Ali and Muhammad, Shamsuddeen Hassan and Yimam, Seid Muhie},
  editor = {Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2025},
  month = nov,
  year = {2025},
  address = {Suzhou, China},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.findings-emnlp.842/},
  doi = {10.18653/v1/2025.findings-emnlp.842},
  pages = {15570--15587},
  isbn = {979-8-89176-335-7}
}

SemEval

SemEval-2025 Task 11: Bridging the Gap in Text-Based Emotion Detection

Shamsuddeen Hassan Muhammad, Nedjma Ousidhoum, Idris Abdulmumin, and 18 more authors

In Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025), Jul 2025

Best Task Abs Bib

Best Shared Task at SemEval-2025.

We present our shared task on text-based emotion detection, covering more than 30 languages from seven distinct language families. These languages are predominantly low-resource and spoken across various continents. The data instances are multi-labeled into six emotional classes, with additional datasets in 11 languages annotated for emotion intensity. Participants were asked to predict labels in three tracks: (a) emotion labels in monolingual settings, (b) emotion intensity scores, and (c) emotion labels in cross-lingual settings.

@inproceedings{muhammad-etal-2025-semeval,
  title = {{S}em{E}val-2025 Task 11: Bridging the Gap in Text-Based Emotion Detection},
  author = {Muhammad, Shamsuddeen Hassan and Ousidhoum, Nedjma and Abdulmumin, Idris and Yimam, Seid Muhie and Wahle, Jan Philip and Lima Ruas, Terry and Beloucif, Meriem and De Kock, Christine and Belay, Tadesse Destaw and Ahmad, Ibrahim Said and Surange, Nirmal and Teodorescu, Daniela and Adelani, David Ifeoluwa and Aji, Alham Fikri and Ali, Felermino Dario Mario and Araujo, Vladimir and Ayele, Abinew Ali and Ignat, Oana and Panchenko, Alexander and Zhou, Yi and Mohammad, Saif},
  editor = {Rosenthal, Sara and Ros{\'a}, Aiala and Ghosh, Debanjan and Zampieri, Marcos},
  booktitle = {Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)},
  month = jul,
  year = {2025},
  address = {Vienna, Austria},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.semeval-1.327/},
  pages = {2558--2569},
  isbn = {979-8-89176-273-2}
}

IWSLT

Findings of the IWSLT 2025 Evaluation Campaign

Idris Abdulmumin, Victor Agostinelli, Tanel Alumäe, and 49 more authors

In Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025), Jul 2025

Abs DOI Bib

This paper presents the outcomes of the shared tasks conducted at the 22nd International Workshop on Spoken Language Translation (IWSLT). The workshop addressed seven critical challenges in spoken language translation: simultaneous and offline translation, automatic subtitling and dubbing, model compression, speech-to-speech translation, dialect and low-resource speech translation, and Indic languages. The shared tasks garnered significant participation, with 32 teams submitting their runs. The field’s growing importance is reflected in the increasing diversity of shared task organizers and contributors to this overview paper, representing a balanced mix of industrial and academic institutions. This broad participation demonstrates the rising prominence of spoken language translation in both research and practical applications.

@inproceedings{agostinelli-etal-2025-findings,
  title = {Findings of the {IWSLT} 2025 Evaluation Campaign},
  author = {Abdulmumin, Idris and Agostinelli, Victor and Alum{\"a}e, Tanel and Anastasopoulos, Antonios and Bentivogli, Luisa and Bojar, Ond{\v{r}}ej and Borg, Claudia and Bougares, Fethi and Cattoni, Roldano and Cettolo, Mauro and Chen, Lizhong and Chen, William and Dabre, Raj and Est{\`e}ve, Yannick and Federico, Marcello and Fishel, Mark and Gaido, Marco and Javorsk{\'y}, D{\'a}vid and Kasztelnik, Marek and Kponou, Fortun{\'e} and Krubi{\'n}ski, Mateusz and Kin Lam, Tsz and Liu, Danni and Matusov, Evgeny and Kumar Maurya, Chandresh and P. McCrae, John and Mdhaffar, Salima and Moslem, Yasmin and Murray, Kenton and Nakamura, Satoshi and Negri, Matteo and Niehues, Jan and Kr. Ojha, Atul and Ortega, John E. and Papi, Sara and Pecina, Pavel and Pol{\'a}k, Peter and Po{\l}e{\'c}, Piotr and Sankar, Ashwin and Savoldi, Beatrice and Sethiya, Nivedita and Sikasote, Claytone and Sperber, Matthias and St{\"u}ker, Sebastian and Sudoh, Katsuhito and Thompson, Brian and Turchi, Marco and Waibel, Alex and Wilken, Patrick and Zevallos, Rodolfo and Zouhar, Vil{\'e}m and Z{\"u}fle, Maike},
  editor = {Salesky, Elizabeth and Federico, Marcello and Anastasopoulos, Antonis},
  booktitle = {Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)},
  month = jul,
  year = {2025},
  address = {Vienna, Austria (in-person and online)},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.iwslt-1.44/},
  doi = {10.18653/v1/2025.iwslt-1.44},
  pages = {412--481},
  isbn = {979-8-89176-272-5}
}

IWSLT
QUESPA Submission for the IWSLT 2025 Dialectal and Low-resource Speech Translation Task

John E. Ortega, Rodolfo Joel Zevallos, William Chen, and 1 more author

In Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025), Jul 2025

Abs DOI Bib

This article describes the QUESPA team speech translation (ST) submissions for the Quechua to Spanish (QUE-SPA) track featured in the Evaluation Campaign of IWSLT 2025: dialectal and low-resource speech translation. This year, there is one main submission type supported in the campaign: unconstrained. This is our third year submitting our ST systems to the IWSLT shared task and we feel that we have achieved novel performance, surpassing last year’s submission. This year we submit three total unconstrained-only systems of which our best (contrastive 2) system uses last year’s best performing pre-trained language (PLM) model for ST (without cascading) and the inclusion of additional Quechua–Collao speech transcriptions found online. Fine-tuning of Microsoft’s SpeechT5 model in a ST setting along with the addition of new data and a data augmentation technique allowed us to achieve 26.7 BLEU. In this article, we present the three submissions along with a detailed description of the updated machine translation system where a comparison is done between synthetic, unconstrained, and other data for fine-tuning.
@inproceedings{e-ortega-etal-2025-quespa, title = {{QUESPA} Submission for the {IWSLT} 2025 Dialectal and Low-resource Speech Translation Task}, author = {Ortega, John E. and Joel Zevallos, Rodolfo and Chen, William and Abdulmumin, Idris}, editor = {Salesky, Elizabeth and Federico, Marcello and Anastasopoulos, Antonis}, booktitle = {Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)}, month = jul, year = {2025}, address = {Vienna, Austria (in-person and online)}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2025.iwslt-1.25/}, doi = {10.18653/v1/2025.iwslt-1.25}, pages = {260--268}, isbn = {979-8-89176-272-5} }
AfricaNLP
Automatic Speech Recognition for African Low-Resource Languages: Challenges and Future Directions

Sukairaj Hafiz Imam, Babangida Sani, Dawit Ketema Gete, and 6 more authors

In Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025), Jul 2025

Abs DOI Bib

Automatic Speech Recognition (ASR) technologies have transformed human-computer interaction; however, low-resource languages in Africa remain significantly underrepresented in both research and practical applications. This study investigates the major challenges hindering the development of ASR systems for these languages, which include data scarcity, linguistic complexity, limited computational resources, acoustic variability, and ethical concerns surrounding bias and privacy. The primary goal is to critically analyze these barriers and identify practical, inclusive strategies to advance ASR technologies within the African context. Recent advances and case studies emphasize promising strategies such as community-driven data collection, self-supervised and multilingual learning, lightweight model architectures, and techniques that prioritize privacy. Evidence from pilot projects involving various African languages showcases the feasibility and impact of customized solutions, which encompass morpheme-based modeling and domain-specific ASR applications in sectors like healthcare and education. The findings highlight the importance of interdisciplinary collaboration and sustained investment to tackle the distinct linguistic and infrastructural challenges faced by the continent. This study offers a progressive roadmap for creating ethical, efficient, and inclusive ASR systems that not only safeguard linguistic diversity but also improve digital accessibility and promote socioeconomic participation for speakers of African languages.
@inproceedings{imam-etal-2025-automatic, title = {Automatic Speech Recognition for {A}frican Low-Resource Languages: Challenges and Future Directions}, author = {Imam, Sukairaj Hafiz and Sani, Babangida and Gete, Dawit Ketema and Ahmed, Bedru Yimam and Ahmad, Ibrahim Said and Abdulmumin, Idris and Yimam, Seid Muhie and Bello, Muhammad Yahuza and Muhammad, Shamsuddeen Hassan}, editor = {Lignos, Constantine and Abdulmumin, Idris and Adelani, David}, booktitle = {Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)}, month = jul, year = {2025}, address = {Vienna, Austria}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2025.africanlp-1.13/}, doi = {10.18653/v1/2025.africanlp-1.13}, pages = {89--94}, isbn = {979-8-89176-257-2} }

SemEval

HausaNLP at SemEval-2025 Task 2: Entity-Aware Fine-tuning vs. Prompt Engineering in Entity-Aware Machine Translation

Abdulhamid Abubakar, Hamidatu Abdulkadir, Rabiu Ibrahim, and 9 more authors

In Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025), Jul 2025

Abs Bib

This paper presents our findings for SemEval 2025 Task 2, a shared task on entity-aware machine translation (EA-MT). The goal of this task is to develop translation models that can accurately translate English sentences into target languages, with a particular focus on handling named entities, which often pose challenges for MT systems. The task covers 10 target languages with English as the source. In this paper, we describe the different systems we employed, detail our results, and discuss insights gained from our experiments.

@inproceedings{abubakar-etal-2025-hausanlp,
  title = {{H}ausa{NLP} at {S}em{E}val-2025 Task 2: Entity-Aware Fine-tuning vs. Prompt Engineering in Entity-Aware Machine Translation},
  author = {Abubakar, Abdulhamid and Abdulkadir, Hamidatu and Ibrahim, Rabiu and Auwal, Abubakar and Wali, Ahmad and Umar, Amina and Bala, Maryam and Sani, Sani Abdullahi and Ahmad, Ibrahim Said and Muhammad, Shamsuddeen Hassan and Abdulmumin, Idris and Marivate, Vukosi},
  editor = {Rosenthal, Sara and Ros{\'a}, Aiala and Ghosh, Debanjan and Zampieri, Marcos},
  booktitle = {Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)},
  month = jul,
  year = {2025},
  address = {Vienna, Austria},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.semeval-1.120/},
  pages = {885--892},
  isbn = {979-8-89176-273-2}
}

SemEval
HausaNLP at SemEval-2025 Task 3: Towards a Fine-Grained Model-Aware Hallucination Detection

Maryam Bala, Amina Abubakar, Abdulhamid Abubakar, and 6 more authors

In Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025), Jul 2025

Abs Bib

This paper presents our findings of the Multilingual Shared Task on Hallucinations and Related Observable Overgeneration Mistakes, MU-SHROOM, which focuses on identifying hallucinations and related overgeneration errors in large language models (LLMs). The shared task involves detecting specific text spans that constitute hallucinations in the outputs generated by LLMs in 14 languages. To address this task, we aim to provide a nuanced, model-aware understanding of hallucination occurrences and severity in English. We used natural language inference and fine-tuned a ModernBERT model using a synthetic dataset of 400 samples, achieving an Intersection over Union (IoU) score of 0.032 and a correlation score of 0.422. These results indicate a moderately positive correlation between the model’s confidence scores and the actual presence of hallucinations. The IoU score indicates that our modelhas a relatively low overlap between the predicted hallucination span and the truth annotation. The performance is unsurprising, given the intricate nature of hallucination detection. Hallucinations often manifest subtly, relying on context, making pinpointing their exact boundaries formidable.
@inproceedings{bala-etal-2025-hausanlp, title = {{H}ausa{NLP} at {S}em{E}val-2025 Task 3: Towards a Fine-Grained Model-Aware Hallucination Detection}, author = {Bala, Maryam and Abubakar, Amina and Abubakar, Abdulhamid and Bichi, Abdulkadir and Ahmad, Hafsa and Sani, Sani Abdullahi and Abdulmumin, Idris and Muhammad, Shamsuddeen Hassan and Ahmad, Ibrahim Said}, editor = {Rosenthal, Sara and Ros{\'a}, Aiala and Ghosh, Debanjan and Zampieri, Marcos}, booktitle = {Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)}, month = jul, year = {2025}, address = {Vienna, Austria}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2025.semeval-1.227/}, pages = {1737--1741}, isbn = {979-8-89176-273-2} }
AfricaNLP
Who Wrote This? Identifying Machine vs Human-Generated Text in Hausa

Babangida Sani, Aakansha Soy, Sukairaj Hafiz Imam, and 5 more authors

In Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025), Jul 2025

Abs DOI Bib

The advancement of large language models (LLMs) has allowed them to be proficient in various tasks, including content generation. However, their unregulated usage can lead to malicious activities such as plagiarism and generating and spreading fake news, especially for low-resource languages. Most existing machine-generated text detectors are trained on high-resource languages like English, French, etc. In this study, we developed the first large-scale detector that can distinguish between human- and machine-generated content in Hausa. We scraped seven Hausa-language media outlets for the human-generated text and the Gemini-2.0 flash model to automatically generate the corresponding Hausa-language articles based on the human-generated article headlines. We fine-tuned four pre-trained African-centric models (AfriTeVa, AfriBERTa, AfroX LMR, and AfroXLMR-76L) on the resulting dataset and assessed their performance using accuracy and F1-score metrics. AfroXLMR achieved the highest performance with an accuracy of 99.23% and an F1 score of 99.21%, demonstrating its effectiveness for Hausa text detection. Our dataset is made publicly available to enable further research.
@inproceedings{sani-etal-2025-wrote, title = {Who Wrote This? Identifying Machine vs Human-Generated Text in {H}ausa}, author = {Sani, Babangida and Soy, Aakansha and Imam, Sukairaj Hafiz and Mustapha, Ahmad and Aliyu, Lukman Jibril and Abdulmumin, Idris and Ahmad, Ibrahim Said and Muhammad, Shamsuddeen Hassan}, editor = {Lignos, Constantine and Abdulmumin, Idris and Adelani, David}, booktitle = {Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)}, month = jul, year = {2025}, address = {Vienna, Austria}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2025.africanlp-1.12/}, doi = {10.18653/v1/2025.africanlp-1.12}, pages = {82--88}, isbn = {979-8-89176-257-2} }
AfricaNLP
HausaNLP: Current Status, Challenges and Future Directions for Hausa Natural Language Processing

Shamsuddeen Hassan Muhammad, Ibrahim Said Ahmad, Idris Abdulmumin, and 8 more authors

In Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025), Jul 2025

Abs DOI Bib

Hausa Natural Language Processing (NLP) has gained increasing attention in recent years, yet remains understudied as a low-resource language despite having over 120 million first-language (L1) and 80 million second-language (L2) speakers worldwide. While significant advances have been made in high-resource languages, Hausa NLP faces persistent challenges including limited open-source datasets and inadequate model representation. This paper presents an overview of the current state of Hausa NLP, systematically examining existing resources, research contributions, and gaps across fundamental NLP tasks: text classification, machine translation, named entity recognition, speech recognition, and question answering. We introduce HausaNLP, a curated catalog that aggregates datasets, tools, and research works to enhance accessibility and drive further development. Furthermore, we discuss challenges in integrating Hausa into large language models (LLMs), addressing issues of suboptimal tokenization, and dialectal variation. Finally, we propose strategic research directions emphasizing dataset expansion, improved language modeling approaches, and strengthened community collaboration to advance Hausa NLP. Our work provides both a foundation for accelerating Hausa NLP progress and valuable insights for broader multilingual NLP research.
@inproceedings{muhammad-etal-2025-hausanlp, title = {{H}ausa{NLP}: Current Status, Challenges and Future Directions for {H}ausa Natural Language Processing}, author = {Muhammad, Shamsuddeen Hassan and Ahmad, Ibrahim Said and Abdulmumin, Idris and Lawan, Falalu Ibrahim and Imam, Sukairaj Hafiz and Aliyu, Yusuf and Sani, Sani Abdullahi and Umar, Ali Usman and Gwadabe, Tajuddeen and Church, Kenneth and Marivate, Vukosi}, editor = {Lignos, Constantine and Abdulmumin, Idris and Adelani, David}, booktitle = {Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)}, month = jul, year = {2025}, address = {Vienna, Austria}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2025.africanlp-1.27/}, doi = {10.18653/v1/2025.africanlp-1.27}, pages = {176--191}, isbn = {979-8-89176-257-2} }

arXiv

Automatic Speech Recognition (ASR) for African Low-Resource Languages: A Systematic Literature Review

Sukairaj Hafiz Imam, Tadesse Destaw Belay, Kedir Yassin Husse, and 7 more authors

Jul 2025

Bib

@misc{imam2025automaticspeechrecognitionasr,
  title = {Automatic Speech Recognition (ASR) for African Low-Resource Languages: A Systematic Literature Review},
  author = {Imam, Sukairaj Hafiz and Belay, Tadesse Destaw and Husse, Kedir Yassin and Ahmad, Ibrahim Said and Abdulmumin, Idris and Umar, Hadiza Ali and Bello, Muhammad Yahuza and Nakatumba-Nabende, Joyce and Yimam, Seid Muhie and Muhammad, Shamsuddeen Hassan},
  year = {2025},
  eprint = {2510.01145},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2510.01145}
}

2024

SemEval

SemEval Task 1: Semantic Textual Relatedness for African and Asian Languages

Nedjma Ousidhoum, Shamsuddeen Hassan Muhammad, Mohamed Abdalla, and 14 more authors

In Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024), Jun 2024

Best Task (HM) DOI Bib

Best Task Paper Honorable Mention at SemEval-2024.

@inproceedings{ousidhoum-etal-2024-semeval,
  title = {{S}em{E}val Task 1: Semantic Textual Relatedness for {A}frican and {A}sian Languages},
  author = {Ousidhoum, Nedjma and Muhammad, Shamsuddeen Hassan and Abdalla, Mohamed and Abdulmumin, Idris and Ahmad, Ibrahim Said and Ahuja, Sanchit and Aji, Alham Fikri and Araujo, Vladimir and Beloucif, Meriem and De Kock, Christine and Hourrane, Oumaima and Shrivastava, Manish and Solorio, Thamar and Surange, Nirmal and Vishnubhotla, Krishnapriya and Yimam, Seid Muhie and Mohammad, Saif M.},
  editor = {Ojha, Atul Kr. and Do{\u{g}}ru{\"o}z, A. Seza and Tayyar Madabushi, Harish and Da San Martino, Giovanni and Rosenthal, Sara and Ros{\'a}, Aiala},
  booktitle = {Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)},
  month = jun,
  year = {2024},
  address = {Mexico City, Mexico},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2024.semeval-1.272},
  doi = {10.18653/v1/2024.semeval-1.272},
  pages = {1963--1978}
}

LREC-COLING

Mitigating Translationese in Low-resource Languages: The Storyboard Approach

Garry Kuwanto, Eno-Abasi E. Urua, Priscilla Amondi Amuok, and 21 more authors

In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), May 2024

Bib

@inproceedings{kuwanto-etal-2024-mitigating-translationese,
  title = {Mitigating Translationese in Low-resource Languages: The Storyboard Approach},
  author = {Kuwanto, Garry and Urua, Eno-Abasi E. and Amuok, Priscilla Amondi and Muhammad, Shamsuddeen Hassan and Aremu, Anuoluwapo and Otiende, Verrah and Nanyanga, Loice Emma and Nyoike, Teresiah W. and Akpan, Aniefon D. and Udouboh, Nsima Ab and Archibong, Idongesit Udeme and Moses, Idara Effiong and Ige, Ifeoluwatayo A. and Ajibade, Benjamin and Awokoya, Olumide Benjamin and Abdulmumin, Idris and Aliyu, Saminu Mohammad and Iro, Ruqayya Nasir and Ahmad, Ibrahim Said and Smith, Deontae and Michaels, Praise-EL and Adelani, David Ifeoluwa and Wijaya, Derry Tanti and Andy, Anietie},
  editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  month = may,
  year = {2024},
  address = {Torino, Italy},
  publisher = {ELRA and ICCL},
  url = {https://aclanthology.org/2024.lrec-main.992},
  pages = {11349--11360}
}

SIGIR

CIRAL: A Test Collection for CLIR Evaluation in African Languages

Mofetoluwa Adeyemi, Akintunde Oladipo, Xinyu Zhang, and 20 more authors

In Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval, Jul 2024

DOI Bib

@inproceedings{10.1145/3626772.3657884,
  author = {Adeyemi, Mofetoluwa and Oladipo, Akintunde and Zhang, Xinyu and Alfonso-Hermelo, David and Rezagholizadeh, Mehdi and Chen, Boxing and Omotayo, Abdul-Hakeem and Abdulmumin, Idris and Etori, Naome A. and Musa, Toyib Babatunde and Fanijo, Samuel and Awoyomi, Oluwabusayo Olufunke and Salahudeen, Saheed Abdullahi and Mohammed, Labaran Adamu and Abolade, Daud Olamide and Lawan, Falalu Ibrahim and Abubakar, Maryam Sabo and Iro, Ruqayya Nasir and Abubakar, Amina Imam and Mohamed, Shafie Abdi and Mohamed, Hanad Mohamud and Ajayi, Tunde Oluwaseyi and Lin, Jimmy},
  booktitle = {Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval},
  title = {CIRAL: A Test Collection for CLIR Evaluation in African Languages},
  month = jul,
  year = {2024},
  address = {Washington, DC, USA},
  publisher = {Association for Computing Machinery},
  doi = {10.1145/3626772.3657884},
  url = {https://doi.org/10.1145/3626772.3657884},
  series = {SIGIR '24}
}

SACAIR

Analysing Public Transport User Sentiment on Low Resource Multilingual Data

Rozina Myoya, Vukosi Marivate, and Idris Abdulmumin

In Proceedings of the Fifth Southern African Conference for Artificial Intelligence Research, Jul 2024

Bib

@inproceedings{myoya2024-sacair,
  author = {Myoya, Rozina and Marivate, Vukosi and Abdulmumin, Idris},
  title = {Analysing Public Transport User Sentiment on Low Resource Multilingual Data},
  booktitle = {Proceedings of the Fifth Southern African Conference for Artificial Intelligence Research},
  year = {2024},
  address = {Bloemfontein, South Africa},
  editor = {Gerber, Aurona and Maritz, Jacques and Pillay, Anban},
  url = {https://2024.sacair.org.za/proceedings}
}

WOAH

HausaHate: An Expert Annotated Corpus for Hausa Hate Speech Detection

Francielle Vargas, Samuel Guimarães, Shamsuddeen Hassan Muhammad, and 6 more authors

In Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024), Jun 2024

DOI Bib

@inproceedings{vargas-etal-2024-hausahate,
  title = {{H}ausa{H}ate: An Expert Annotated Corpus for {H}ausa Hate Speech Detection},
  author = {Vargas, Francielle and Guimar{\~a}es, Samuel and Muhammad, Shamsuddeen Hassan and Alves, Diego and Ahmad, Ibrahim Said and Abdulmumin, Idris and Mohamed, Diallo and Pardo, Thiago and Benevenuto, Fabr{\'\i}cio},
  editor = {Chung, Yi-Ling and Talat, Zeerak and Nozza, Debora and Plaza-del-Arco, Flor Miriam and R{\"o}ttger, Paul and Mostafazadeh Davani, Aida and Calabrese, Agostina},
  booktitle = {Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)},
  month = jun,
  year = {2024},
  address = {Mexico City, Mexico},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2024.woah-1.5},
  doi = {10.18653/v1/2024.woah-1.5},
  pages = {52--58}
}

WMT

Correcting FLORES Evaluation Dataset for Four African Languages

Idris Abdulmumin, Sthembiso Mkhwanazi, Mahlatse Mbooi, and 7 more authors

In Proceedings of the Ninth Conference on Machine Translation, Nov 2024

DOI Bib

@inproceedings{abdulmumin-etal-2024-correcting,
  title = {Correcting {FLORES} Evaluation Dataset for Four {A}frican Languages},
  author = {Abdulmumin, Idris and Mkhwanazi, Sthembiso and Mbooi, Mahlatse and Muhammad, Shamsuddeen Hassan and Ahmad, Ibrahim Said and Putini, Neo and Mathebula, Miehleketo and Shingange, Matimba and Gwadabe, Tajuddeen and Marivate, Vukosi},
  editor = {Haddow, Barry and Kocmi, Tom and Koehn, Philipp and Monz, Christof},
  booktitle = {Proceedings of the Ninth Conference on Machine Translation},
  month = nov,
  year = {2024},
  address = {Miami, Florida, USA},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2024.wmt-1.44},
  doi = {10.18653/v1/2024.wmt-1.44},
  pages = {570--578}
}

WMT

Findings of WMT2024 English-to-Low Resource Multimodal Translation Task

Shantipriya Parida, Ondřej Bojar, Idris Abdulmumin, and 2 more authors

In Proceedings of the Ninth Conference on Machine Translation, Nov 2024

DOI Bib

@inproceedings{parida-etal-2024-findings,
  title = {Findings of {WMT}2024 {E}nglish-to-Low Resource Multimodal Translation Task},
  author = {Parida, Shantipriya and Bojar, Ond{\v{r}}ej and Abdulmumin, Idris and Muhammad, Shamsuddeen Hassan and Ahmad, Ibrahim Said},
  editor = {Haddow, Barry and Kocmi, Tom and Koehn, Philipp and Monz, Christof},
  booktitle = {Proceedings of the Ninth Conference on Machine Translation},
  month = nov,
  year = {2024},
  address = {Miami, Florida, USA},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2024.wmt-1.56},
  doi = {10.18653/v1/2024.wmt-1.56},
  pages = {677--683}
}

ACL

SemRel2024: A Collection of Semantic Textual Relatedness Datasets for 13 Languages

Nedjma Ousidhoum, Shamsuddeen Muhammad, Mohamed Abdalla, and 24 more authors

In Findings of the Association for Computational Linguistics: ACL 2024, Aug 2024

DOI Bib

@inproceedings{ousidhoum-etal-2024-semrel2024,
  title = {{S}em{R}el2024: A Collection of Semantic Textual Relatedness Datasets for 13 Languages},
  author = {Ousidhoum, Nedjma and Muhammad, Shamsuddeen and Abdalla, Mohamed and Abdulmumin, Idris and Ahmad, Ibrahim and Ahuja, Sanchit and Aji, Alham and Araujo, Vladimir and Ayele, Abinew and Baswani, Pavan and Beloucif, Meriem and Biemann, Chris and Bourhim, Sofia and Kock, Christine and Dekebo, Genet and Hourrane, Oumaima and Kanumolu, Gopichand and Madasu, Lokesh and Rutunda, Samuel and Shrivastava, Manish and Solorio, Thamar and Surange, Nirmal and Tilaye, Hailegnaw and Vishnubhotla, Krishnapriya and Winata, Genta and Yimam, Seid and Mohammad, Saif},
  editor = {Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2024},
  month = aug,
  year = {2024},
  address = {Bangkok, Thailand},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2024.findings-acl.147},
  doi = {10.18653/v1/2024.findings-acl.147},
  pages = {2512--2530}
}

2023

ACL

HaVQA: A Dataset for Visual Question Answering and Multimodal Research in Hausa Language

Shantipriya Parida, Idris Abdulmumin, Shamsuddeen Hassan Muhammad, and 7 more authors

In Findings of the Association for Computational Linguistics: ACL 2023, Jul 2023

Bib

@inproceedings{parida-etal-2023-havqa,
  title = {{H}a{VQA}: A Dataset for Visual Question Answering and Multimodal Research in {H}ausa Language},
  author = {Parida, Shantipriya and Abdulmumin, Idris and Muhammad, Shamsuddeen Hassan and Bose, Aneesh and Kohli, Guneet Singh and Ahmad, Ibrahim Said and Kotwal, Ketan and Deb Sarkar, Sayan and Bojar, Ond{\v{r}}ej and Kakudi, Habeebah},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2023},
  month = jul,
  year = {2023},
  address = {Toronto, Canada},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2023.findings-acl.646},
  pages = {10162--10183}
}

SemEval

HausaNLP at SemEval-2023 Task 10: Transfer Learning, Synthetic Data and Side-information for Multi-level Sexism Classification

Saminu Mohammad Aliyu, Idris Abdulmumin, Shamsuddeen Hassan Muhammad, and 4 more authors

In Proceedings of the The 17th International Workshop on Semantic Evaluation (SemEval-2023), Jul 2023

Bib

@inproceedings{aliyu-etal-2023-hausanlp,
  title = {{H}ausa{NLP} at {S}em{E}val-2023 Task 10: Transfer Learning, Synthetic Data and Side-information for Multi-level Sexism Classification},
  author = {Aliyu, Saminu Mohammad and Abdulmumin, Idris and Muhammad, Shamsuddeen Hassan and Ahmad, Ibrahim Said and Salahudeen, Saheed Abdullahi and Yusuf, Aliyu and Lawan, Falalu Ibrahim},
  booktitle = {Proceedings of the The 17th International Workshop on Semantic Evaluation (SemEval-2023)},
  month = jul,
  year = {2023},
  address = {Toronto, Canada},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2023.semeval-1.273},
  pages = {1983--1987}
}

ICCAIT

Analyzing COVID-19 Vaccination Sentiments in Nigerian Cyberspace: Insights from a Manually Annotated Twitter Dataset

Ibrahim Said Ahmad, Lukman Jibril Aliyu, Auwal Abubakar Khalid, and 6 more authors

In Proceedings of the International Conference on Computing and Advances in Information Technology (ICCAIT 2023), Nov 2023

Bib

@inproceedings{abdulmumin2023covid,
  title = {Analyzing COVID-19 Vaccination Sentiments in Nigerian Cyberspace: Insights from a Manually Annotated Twitter Dataset},
  author = {Ahmad, Ibrahim Said and Aliyu, Lukman Jibril and Khalid, Auwal Abubakar and Aliyu, Saminu Muhammad and Muhammad, Shamsuddeen Hassan and Abdulmumin, Idris and Abduljalil, Bala Mairiga and Shehu, Bello Shehu and Abubakar, Amina Imam},
  year = {2023},
  month = nov,
  booktitle = {Proceedings of the International Conference on Computing and Advances in Information Technology ({ICCAIT} 2023)},
  address = {Zaria, Nigeria}
}

ICCAIT

Leveraging Closed-Access Multilingual Embedding for Automatic Sentence Alignment in Low Resource Languages

Idris Abdulmumin, Auwal Abubakar Khalid, Shamsuddeen Hassan Muhammad, and 5 more authors

In Proceedings of the International Conference on Computing and Advances in Information Technology (ICCAIT 2023), Nov 2023

Bib

@inproceedings{abdulmumin2023leveraging,
  title = {Leveraging Closed-Access Multilingual Embedding for Automatic Sentence Alignment in Low Resource Languages},
  author = {Abdulmumin, Idris and Khalid, Auwal Abubakar and Muhammad, Shamsuddeen Hassan and Ahmad, Ibrahim Said and Aliyu, Lukman Jibril and Sani, Babangida and Abduljalil, Bala Mairiga and Hassan, Sani Ahmad},
  year = {2023},
  month = nov,
  booktitle = {Proceedings of the International Conference on Computing and Advances in Information Technology ({ICCAIT} 2023)},
  address = {Zaria, Nigeria},
  url = {https://arxiv.org/abs/2311.12179}
}

SemEval

SemEval-2023 Task 12: Sentiment Analysis for African Languages (AfriSenti-SemEval)

Shamsuddeen Hassan Muhammad, Idris Abdulmumin, Seid Muhie Yimam, and 7 more authors

In Proceedings of the The 17th International Workshop on Semantic Evaluation (SemEval-2023), Jul 2023

Bib

@inproceedings{muhammad-etal-2023-semeval,
  title = {{S}em{E}val-2023 Task 12: Sentiment Analysis for {A}frican Languages ({A}fri{S}enti-{S}em{E}val)},
  author = {Muhammad, Shamsuddeen Hassan and Abdulmumin, Idris and Yimam, Seid Muhie and Adelani, David Ifeoluwa and Ahmad, Ibrahim Said and Ousidhoum, Nedjma and Ayele, Abinew Ali and Mohammad, Saif and Beloucif, Meriem and Ruder, Sebastian},
  booktitle = {Proceedings of the The 17th International Workshop on Semantic Evaluation (SemEval-2023)},
  month = jul,
  year = {2023},
  address = {Toronto, Canada},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2023.semeval-1.315},
  pages = {2319--2337}
}

IJCNLP

MasakhaNEWS: News Topic Classification for African languages

David Ifeoluwa Adelani, Marek Masiak, Israel Abebe Azime, and 62 more authors

In Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, Nov 2023

Area Chair Award Bib

Area Chair Award (Resources and Evaluation) at IJCNLP-AACL 2023.

@inproceedings{adelani-EtAl:2023:ijcnlp,
  author = {Adelani, David Ifeoluwa and Masiak, Marek and Azime, Israel Abebe and Alabi, Jesujoba and Tonja, Atnafu Lambebo and Mwase, Christine and Ogundepo, Odunayo and Dossou, Bonaventure F. P. and Oladipo, Akintunde and Nixdorf, Doreen and Emezue, Chris Chinenye and al-azzawi, sana and Sibanda, Blessing and David, Davis and Ndolela, Lolwethu and Mukiibi, Jonathan and Ajayi, Tunde and Moteu, Tatiana and Odhiambo, Brian and Owodunni, Abraham and Obiefuna, Nnaemeka and Mohamed, Muhidin and Muhammad, Shamsuddeen Hassan and Ababu, Teshome Mulugeta and Salahudeen, Saheed Abdullahi and Yigezu, Mesay Gemeda and Gwadabe, Tajuddeen and Abdulmumin, Idris and Taye, Mahlet and Awoyomi, Oluwabusayo and Shode, Iyanuoluwa and Adelani, Tolulope and Abdulganiyu, Habiba and Omotayo, Abdul-Hakeem and Adeeko, Adetola and Afolabi, Abeeb and Aremu, Anuoluwapo and Samuel, Olanrewaju and Siro, Clemencia and Kimotho, Wangari and Ogbu, Onyekachi and Mbonu, Chinedu and Chukwuneke, Chiamaka and Fanijo, Samuel and Ojo, Jessica and Awosan, Oyinkansola and Kebede, Tadesse and Sakayo, Toadoum Sari and Nyatsine, Pamela and Sidume, Freedmore and Yousuf, Oreen and Oduwole, Mardiyyah and Tshinu, kanda and Kimanuka, Ussen and Diko, Thina and Nxakama, Siyanda and Nigusse, Sinodos and Johar, Abdulmejid and Mohamed, Shafie and Hassan, Fuad Mire and Mehamed, Moges Ahmed and Ngabire, Evrard and Jules, Jules and Ssenkungu, Ivan and Stenetorp, Pontus},
  title = {MasakhaNEWS: News Topic Classification for African languages},
  booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics},
  month = nov,
  year = {2023},
  address = {Nusa Dua, Bali},
  publisher = {Association for Computational Linguistics},
  pages = {144--159},
  url = {https://aclanthology.org/2023.ijcnlp-long.10}
}

EMNLP

AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages

Shamsuddeen Muhammad, Idris Abdulmumin, Abinew Ayele, and 24 more authors

In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, Dec 2023

Best Paper Bib

Best Non-archival Paper, AfricaNLP 2023.

@inproceedings{muhammad-etal-2023-afrisenti,
  title = {{A}fri{S}enti: A {T}witter Sentiment Analysis Benchmark for {A}frican Languages},
  author = {Muhammad, Shamsuddeen and Abdulmumin, Idris and Ayele, Abinew and Ousidhoum, Nedjma and Adelani, David and Yimam, Seid and Ahmad, Ibrahim and Beloucif, Meriem and Mohammad, Saif and Ruder, Sebastian and Hourrane, Oumaima and Jorge, Alipio and Brazdil, Pavel and Ali, Felermino and David, Davis and Osei, Salomey and Shehu-Bello, Bello and Lawan, Falalu and Gwadabe, Tajuddeen and Rutunda, Samuel and Belay, Tadesse and Messelle, Wendimu and Balcha, Hailu and Chala, Sisay and Gebremichael, Hagos and Opoku, Bernard and Arthur, Stephen},
  editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
  booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
  month = dec,
  year = {2023},
  address = {Singapore},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2023.emnlp-main.862},
  pages = {13968--13981}
}

2022

LREC

NaijaSenti: A Nigerian Twitter Sentiment Corpus for Multilingual Sentiment Analysis

Shamsuddeen Hassan Muhammad, David Ifeoluwa Adelani, Sebastian Ruder, and 8 more authors

In Proceedings of the Language Resources and Evaluation Conference, Jun 2022

Bib

@inproceedings{muhammad-EtAl:2022:LREC,
  author = {Muhammad, Shamsuddeen Hassan and Adelani, David Ifeoluwa and Ruder, Sebastian and Ahmad, Ibrahim Sa’id and Abdulmumin, Idris and Bello, Bello Shehu and Choudhury, Monojit and Emezue, Chris Chinenye and Abdullahi, Saheed Salahudeen and Aremu, Anuoluwapo and Brazdil, Pavel},
  title = {NaijaSenti: A Nigerian Twitter Sentiment Corpus for Multilingual Sentiment Analysis},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  month = jun,
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {590--602},
  url = {https://aclanthology.org/2022.lrec-1.63}
}

NAACL

A Few Thousand Translations Go a Long Way! Leveraging Pre-trained Models for African News Translation

David Adelani, Jesujoba Alabi, Angela Fan, and 42 more authors

In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Jul 2022

Bib

@inproceedings{adelani-etal-2022-thousand,
  title = {A Few Thousand Translations Go a Long Way! Leveraging Pre-trained Models for {A}frican News Translation},
  author = {Adelani, David and Alabi, Jesujoba and Fan, Angela and Kreutzer, Julia and Shen, Xiaoyu and Reid, Machel and Ruiter, Dana and Klakow, Dietrich and Nabende, Peter and Chang, Ernie and Gwadabe, Tajuddeen and Sackey, Freshia and Dossou, Bonaventure F. P. and Emezue, Chris and Leong, Colin and Beukman, Michael and Muhammad, Shamsuddeen and Jarso, Guyo and Yousuf, Oreen and Niyongabo Rubungo, Andre and Hacheme, Gilles and Wairagala, Eric Peter and Nasir, Muhammad Umair and Ajibade, Benjamin and Ajayi, Tunde and Gitau, Yvonne and Abbott, Jade and Ahmed, Mohamed and Ochieng, Millicent and Aremu, Anuoluwapo and Ogayo, Perez and Mukiibi, Jonathan and Ouoba Kabore, Fatoumata and Kalipe, Godson and Mbaye, Derguene and Tapo, Allahsera Auguste and Memdjokam Koagne, Victoire and Munkoh-Buabeng, Edwin and Wagner, Valencia and Abdulmumin, Idris and Awokoya, Ayodele and Buzaaba, Happy and Sibanda, Blessing and Bukula, Andiswa and Manthalu, Sam},
  booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  month = jul,
  year = {2022},
  address = {Seattle, United States},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.naacl-main.223},
  pages = {3053--3070}
}

AfricaNLP

NECAT-CLWE: A Simple But Efficient Parallel Data Generation Approach for Unsupervised and Semi-Supervised Neural Machine Translation

Rabiu Abdullahi Ibrahim and Idris Abdulmumin

In 3rd Workshop on African Natural Language Processing, Jul 2022

Bib

@inproceedings{ibrahim2022necatclwe,
  title = {{NECAT}-{CLWE}: A Simple But Efficient Parallel Data Generation Approach for Unsupervised and Semi-Supervised Neural Machine Translation},
  author = {Ibrahim, Rabiu Abdullahi and Abdulmumin, Idris},
  booktitle = {3rd Workshop on African Natural Language Processing},
  year = {2022},
  url = {https://openreview.net/forum?id=ruW4c9fNLZ5}
}

AfricaNLP

The African Stopwords Project: Curating Stopwords for African Languages

Chris Chinenye Emezue, Hellina Hailu Nigatu, Cynthia Thinwa, and 12 more authors

In 3rd Workshop on African Natural Language Processing, Jul 2022

Bib

@inproceedings{emezue2021the,
  title = {The African Stopwords Project: Curating Stopwords for African Languages},
  author = {Emezue, Chris Chinenye and Nigatu, Hellina Hailu and Thinwa, Cynthia and Louis, Lerato and Abdulmumin, Idris and Oyerinde, Samuel Gbenga and Ajibade, Benjamin Ayoade and Zhou, Helper and Onwuegbuzia, Emeka Felix and Emezue, Handel Chiagozie and Ige, Ifeoluwatayo Adeseye and Tonja, Atnafu Lambebo and Chukwuneke, Chiamaka Ijeoma and Muhammad, Shamsuddeen Hassan and Samuel, Olanrewaju},
  booktitle = {3rd Workshop on African Natural Language Processing},
  year = {2022},
  url = {https://openreview.net/forum?id=B0GEqcGV8-5}
}

WiNLP

Domain-Specific Lexicon-Based Sentiment Analysis using Contextual Shifter Patterns

Shamsuddeen Muhammad, Pavel Brazdil, and Idris Abdulmumin

In Proceedings of the Sixth Workshop on Widening Natural Language Processing, Dec 2022

Bib

@inproceedings{muhammad-2022-domain,
  title = {Domain-Specific Lexicon-Based Sentiment Analysis using Contextual Shifter Patterns},
  author = {Muhammad, Shamsuddeen and Brazdil, Pavel and Abdulmumin, Idris},
  booktitle = {Proceedings of the Sixth Workshop on Widening Natural Language Processing},
  month = dec,
  year = {2022},
  address = {Abu Dhabi, United Arab Emirates},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.winlp-1.0}
}

WiNLP

HERDPhobia: A Dataset for Hate Speech Detection against Fulani Herdsmen in Nigeria

Saminu Aliyu, Gregory Wajiga, Muhammad Murtala, and 3 more authors

In Proceedings of the Sixth Workshop on Widening Natural Language Processing, Dec 2022

Bib

@inproceedings{aliyu-2022-herdphobia,
  title = {HERDPhobia: A Dataset for Hate Speech Detection against Fulani Herdsmen in Nigeria},
  author = {Aliyu, Saminu and Wajiga, Gregory and Murtala, Muhammad and Muhammad, Shamsuddeen and Abdulmumin, Idris and Ahmad, Ibrahim},
  booktitle = {Proceedings of the Sixth Workshop on Widening Natural Language Processing},
  month = dec,
  year = {2022},
  address = {Abu Dhabi, United Arab Emirates},
  publisher = {Association for Computational Linguistics},
  url = {https://arxiv.org/abs/2211.15262}
}

EMNLP

MasakhaNER 2.0: Africa-centric Transfer Learning for Named Entity Recognition

David Ifeoluwa Adelani, Graham Neubig, Sebastian Ruder, and 42 more authors

In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, Dec 2022

Bib

@inproceedings{adelani-2022-masakhaner,
  url = {https://aclanthology.org/2022.emnlp-main.298},
  author = {Adelani, David Ifeoluwa and Neubig, Graham and Ruder, Sebastian and Rijhwani, Shruti and Beukman, Michael and Palen-Michel, Chester and Lignos, Constantine and Alabi, Jesujoba O. and Muhammad, Shamsuddeen H. and Nabende, Peter and Dione, Cheikh M. Bamba and Bukula, Andiswa and Mabuya, Rooweither and Dossou, Bonaventure F. P. and Sibanda, Blessing and Buzaaba, Happy and Mukiibi, Jonathan and Kalipe, Godson and Mbaye, Derguene and Taylor, Amelia and Kabore, Fatoumata and Emezue, Chris Chinenye and Aremu, Anuoluwapo and Ogayo, Perez and Gitau, Catherine and Munkoh-Buabeng, Edwin and Koagne, Victoire M. and Tapo, Allahsera Auguste and Macucwa, Tebogo and Marivate, Vukosi and Mboning, Elvis and Gwadabe, Tajuddeen and Adewumi, Tosin and Ahia, Orevaoghene and Nakatumba-Nabende, Joyce and Mokono, Neo L. and Ezeani, Ignatius and Chukwuneke, Chiamaka and Adeyemi, Mofetoluwa and Hacheme, Gilles Q. and Abdulmumin, Idris and Ogundepo, Odunayo and Yousuf, Oreen and Ngoli, Tatiana Moteu and Klakow, Dietrich},
  title = {{MasakhaNER 2.0: Africa-centric Transfer Learning for Named Entity Recognition}},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
  month = dec,
  year = {2022},
  address = {Abu Dhabi, United Arab Emirates},
  publisher = {Association for Computational Linguistics},
  pages = {4488--4508}
}

IEEE

Quantity vs. Quality of Monolingual Source Data in Automatic Text Translation: Can It Be Too Little If It Is Too Good?

Idris Abdulmumin, Bashir Shehu Galadanci, Shamsuddeen Hassan Muhammad, and 1 more author

In 2022 IEEE Nigeria 4th International Conference on Disruptive Technologies for Sustainable Development (NIGERCON), Dec 2022

DOI Bib

@inproceedings{9803137,
  author = {Abdulmumin, Idris and Galadanci, Bashir Shehu and Hassan Muhammad, Shamsuddeen and Aliyu, Garba},
  booktitle = {2022 IEEE Nigeria 4th International Conference on Disruptive Technologies for Sustainable Development (NIGERCON)},
  title = {Quantity vs. Quality of Monolingual Source Data in Automatic Text Translation: Can It Be Too Little If It Is Too Good?},
  year = {2022},
  volume = {},
  number = {},
  pages = {1-5},
  doi = {10.1109/NIGERCON54645.2022.9803137}
}

LREC

Hausa Visual Genome: A Dataset for Multi-Modal English to Hausa Machine Translation

Idris Abdulmumin, Satya Ranjan Dash, Musa Abdullahi Dawud, and 7 more authors

In Proceedings of the Language Resources and Evaluation Conference, Jun 2022

Bib

@inproceedings{abdulmumin-EtAl:2022:LREC,
  author = {Abdulmumin, Idris and Dash, Satya Ranjan and Dawud, Musa Abdullahi and Parida, Shantipriya and Muhammad, Shamsuddeen and Ahmad, Ibrahim Said and Panda, Subhadarshi and Bojar, Ond{\v{r}}ej and Galadanci, Bashir Shehu and Bello, Bello Shehu},
  title = {Hausa Visual Genome: A Dataset for Multi-Modal English to Hausa Machine Translation},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  month = jun,
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {6471--6479},
  url = {https://aclanthology.org/2022.lrec-1.694}
}

WMT

Separating Grains from the Chaff: Using Data Filtering to Improve Multilingual Translation for Low-Resourced African Languages

Idris Abdulmumin, Michael Beukman, Jesujoba Alabi, and 8 more authors

In Proceedings of the Seventh Conference on Machine Translation, Dec 2022

Bib

@inproceedings{abdulmumin-EtAl:2022:WMT,
  author = {Abdulmumin, Idris and Beukman, Michael and Alabi, Jesujoba and Emezue, Chris Chinenye and Chimoto, Everlyn and Adewumi, Tosin and Muhammad, Shamsuddeen and Adeyemi, Mofetoluwa and Yousuf, Oreen and Singh, Sahib and Gwadabe, Tajuddeen},
  title = {Separating Grains from the Chaff: Using Data Filtering to Improve Multilingual Translation for Low-Resourced African Languages},
  booktitle = {Proceedings of the Seventh Conference on Machine Translation},
  month = dec,
  year = {2022},
  address = {Abu Dhabi},
  publisher = {Association for Computational Linguistics},
  pages = {1001--1014}
}

arXiv

BLOOM: A 176B-Parameter Open-Access Multilingual Language Model

Teven Le Scao, Angela Fan, Christopher Akiki, and 387 more authors

Dec 2022

DOI Bib

@misc{https://doi.org/10.48550/arxiv.2211.05100,
  doi = {10.48550/ARXIV.2211.05100},
  url = {https://arxiv.org/abs/2211.05100},
  author = {Scao, Teven Le and Fan, Angela and Akiki, Christopher and Pavlick, Ellie and Ilić, Suzana and Hesslow, Daniel and Castagné, Roman and Luccioni, Alexandra Sasha and Yvon, François and Gallé, Matthias and Tow, Jonathan and Rush, Alexander M. and Biderman, Stella and Webson, Albert and Ammanamanchi, Pawan Sasanka and Wang, Thomas and Sagot, Benoît and Muennighoff, Niklas and del Moral, Albert Villanova and Ruwase, Olatunji and Bawden, Rachel and Bekman, Stas and McMillan-Major, Angelina and Beltagy, Iz and Nguyen, Huu and Saulnier, Lucile and Tan, Samson and Suarez, Pedro Ortiz and Sanh, Victor and Laurençon, Hugo and Jernite, Yacine and Launay, Julien and Mitchell, Margaret and Raffel, Colin and Gokaslan, Aaron and Simhi, Adi and Soroa, Aitor and Aji, Alham Fikri and Alfassy, Amit and Rogers, Anna and Nitzav, Ariel Kreisberg and Xu, Canwen and Mou, Chenghao and Emezue, Chris and Klamm, Christopher and Leong, Colin and van Strien, Daniel and Adelani, David Ifeoluwa and Radev, Dragomir and Ponferrada, Eduardo González and Levkovizh, Efrat and Kim, Ethan and Natan, Eyal Bar and De Toni, Francesco and Dupont, Gérard and Kruszewski, Germán and Pistilli, Giada and Elsahar, Hady and Benyamina, Hamza and Tran, Hieu and Yu, Ian and Abdulmumin, Idris and Johnson, Isaac and Gonzalez-Dios, Itziar and de la Rosa, Javier and Chim, Jenny and Dodge, Jesse and Zhu, Jian and Chang, Jonathan and Frohberg, Jörg and Tobing, Joseph and Bhattacharjee, Joydeep and Almubarak, Khalid and Chen, Kimbo and Lo, Kyle and Von Werra, Leandro and Weber, Leon and Phan, Long and allal, Loubna Ben and Tanguy, Ludovic and Dey, Manan and Muñoz, Manuel Romero and Masoud, Maraim and Grandury, María and Šaško, Mario and Huang, Max and Coavoux, Maximin and Singh, Mayank and Jiang, Mike Tian-Jian and Vu, Minh Chien and Jauhar, Mohammad A. and Ghaleb, Mustafa and Subramani, Nishant and Kassner, Nora and Khamis, Nurulaqilla and Nguyen, Olivier and Espejel, Omar and de Gibert, Ona and Villegas, Paulo and Henderson, Peter and Colombo, Pierre and Amuok, Priscilla and Lhoest, Quentin and Harliman, Rheza and Bommasani, Rishi and López, Roberto Luis and Ribeiro, Rui and Osei, Salomey and Pyysalo, Sampo and Nagel, Sebastian and Bose, Shamik and Muhammad, Shamsuddeen Hassan and Sharma, Shanya and Longpre, Shayne and Nikpoor, Somaieh and Silberberg, Stanislav and Pai, Suhas and Zink, Sydney and Torrent, Tiago Timponi and Schick, Timo and Thrush, Tristan and Danchev, Valentin and Nikoulina, Vassilina and Laippala, Veronika and Lepercq, Violette and Prabhu, Vrinda and Alyafeai, Zaid and Talat, Zeerak and Raja, Arun and Heinzerling, Benjamin and Si, Chenglei and Salesky, Elizabeth and Mielke, Sabrina J. and Lee, Wilson Y. and Sharma, Abheesht and Santilli, Andrea and Chaffin, Antoine and Stiegler, Arnaud and Datta, Debajyoti and Szczechla, Eliza and Chhablani, Gunjan and Wang, Han and Pandey, Harshit and Strobelt, Hendrik and Fries, Jason Alan and Rozen, Jos and Gao, Leo and Sutawika, Lintang and Bari, M Saiful and Al-shaibani, Maged S. and Manica, Matteo and Nayak, Nihal and Teehan, Ryan and Albanie, Samuel and Shen, Sheng and Ben-David, Srulik and Bach, Stephen H. and Kim, Taewoon and Bers, Tali and Fevry, Thibault and Neeraj, Trishala and Thakker, Urmish and Raunak, Vikas and Tang, Xiangru and Yong, Zheng-Xin and Sun, Zhiqing and Brody, Shaked and Uri, Yallow and Tojarieh, Hadar and Roberts, Adam and Chung, Hyung Won and Tae, Jaesung and Phang, Jason and Press, Ofir and Li, Conglong and Narayanan, Deepak and Bourfoune, Hatim and Casper, Jared and Rasley, Jeff and Ryabinin, Max and Mishra, Mayank and Zhang, Minjia and Shoeybi, Mohammad and Peyrounette, Myriam and Patry, Nicolas and Tazi, Nouamane and Sanseviero, Omar and von Platen, Patrick and Cornette, Pierre and Lavallée, Pierre François and Lacroix, Rémi and Rajbhandari, Samyam and Gandhi, Sanchit and Smith, Shaden and Requena, Stéphane and Patil, Suraj and Dettmers, Tim and Baruwa, Ahmed and Singh, Amanpreet and Cheveleva, Anastasia and Ligozat, Anne-Laure and Subramonian, Arjun and Névéol, Aurélie and Lovering, Charles and Garrette, Dan and Tunuguntla, Deepak and Reiter, Ehud and Taktasheva, Ekaterina and Voloshina, Ekaterina and Bogdanov, Eli and Winata, Genta Indra and Schoelkopf, Hailey and Kalo, Jan-Christoph and Novikova, Jekaterina and Forde, Jessica Zosa and Clive, Jordan and Kasai, Jungo and Kawamura, Ken and Hazan, Liam and Carpuat, Marine and Clinciu, Miruna and Kim, Najoung and Cheng, Newton and Serikov, Oleg and Antverg, Omer and van der Wal, Oskar and Zhang, Rui and Zhang, Ruochen and Gehrmann, Sebastian and Pais, Shani and Shavrina, Tatiana and Scialom, Thomas and Yun, Tian and Limisiewicz, Tomasz and Rieser, Verena and Protasov, Vitaly and Mikhailov, Vladislav and Pruksachatkun, Yada and Belinkov, Yonatan and Bamberger, Zachary and Kasner, Zdeněk and Rueda, Alice and Pestana, Amanda and Feizpour, Amir and Khan, Ammar and Faranak, Amy and Santos, Ana and Hevia, Anthony and Unldreaj, Antigona and Aghagol, Arash and Abdollahi, Arezoo and Tammour, Aycha and HajiHosseini, Azadeh and Behroozi, Bahareh and Ajibade, Benjamin and Saxena, Bharat and Ferrandis, Carlos Muñoz and Contractor, Danish and Lansky, David and David, Davis and Kiela, Douwe and Nguyen, Duong A. and Tan, Edward and Baylor, Emi and Ozoani, Ezinwanne and Mirza, Fatima and Ononiwu, Frankline and Rezanejad, Habib and Jones, Hessie and Bhattacharya, Indrani and Solaiman, Irene and Sedenko, Irina and Nejadgholi, Isar and Passmore, Jesse and Seltzer, Josh and Sanz, Julio Bonis and Fort, Karen and Dutra, Livia and Samagaio, Mairon and Elbadri, Maraim and Mieskes, Margot and Gerchick, Marissa and Akinlolu, Martha and McKenna, Michael and Qiu, Mike and Ghauri, Muhammed and Burynok, Mykola and Abrar, Nafis and Rajani, Nazneen and Elkott, Nour and Fahmy, Nour and Samuel, Olanrewaju and An, Ran and Kromann, Rasmus and Hao, Ryan and Alizadeh, Samira and Shubber, Sarmad and Wang, Silas and Roy, Sourav and Viguier, Sylvain and Le, Thanh and Oyebade, Tobi and Le, Trieu and Yang, Yoyo and Nguyen, Zach and Kashyap, Abhinav Ramesh and Palasciano, Alfredo and Callahan, Alison and Shukla, Anima and Miranda-Escalada, Antonio and Singh, Ayush and Beilharz, Benjamin and Wang, Bo and Brito, Caio and Zhou, Chenxi and Jain, Chirag and Xu, Chuxin and Fourrier, Clémentine and Periñán, Daniel León and Molano, Daniel and Yu, Dian and Manjavacas, Enrique and Barth, Fabio and Fuhrimann, Florian and Altay, Gabriel and Bayrak, Giyaseddin and Burns, Gully and Vrabec, Helena U. and Bello, Imane and Dash, Ishani and Kang, Jihyun and Giorgi, John and Golde, Jonas and Posada, Jose David and Sivaraman, Karthik Rangasai and Bulchandani, Lokesh and Liu, Lu and Shinzato, Luisa and de Bykhovetz, Madeleine Hahn and Takeuchi, Maiko and Pàmies, Marc and Castillo, Maria A and Nezhurina, Marianna and Sänger, Mario and Samwald, Matthias and Cullan, Michael and Weinberg, Michael and De Wolf, Michiel and Mihaljcic, Mina and Liu, Minna and Freidank, Moritz and Kang, Myungsun and Seelam, Natasha and Dahlberg, Nathan and Broad, Nicholas Michio and Muellner, Nikolaus and Fung, Pascale and Haller, Patrick and Chandrasekhar, Ramya and Eisenberg, Renata and Martin, Robert and Canalli, Rodrigo and Su, Rosaline and Su, Ruisi and Cahyawijaya, Samuel and Garda, Samuele and Deshmukh, Shlok S and Mishra, Shubhanshu and Kiblawi, Sid and Ott, Simon and Sang-aroonsiri, Sinee and Kumar, Srishti and Schweter, Stefan and Bharati, Sushil and Laud, Tanmay and Gigant, Théo and Kainuma, Tomoya and Kusa, Wojciech and Labrak, Yanis and Bajaj, Yash Shailesh and Venkatraman, Yash and Xu, Yifan and Xu, Yingxin and Xu, Yu and Tan, Zhe and Xie, Zhongli and Ye, Zifan and Bras, Mathilde and Belkada, Younes and Wolf, Thomas},
  title = {{BLOOM: A 176B-Parameter Open-Access Multilingual Language Model}},
  publisher = {arXiv},
  year = {2022}
}

2021

Mach. Trans.

Tag-less back-translation

Idris Abdulmumin, Bashir Shehu Galadanci, and Garba Aliyu

Machine Translation, Dec 2021

DOI Bib

@article{Abdulmumin2021-tagless-mach-trans,
  doi = {10.1007/s10590-021-09284-y},
  year = {2021},
  month = dec,
  publisher = {Springer Science and Business Media {LLC}},
  volume = {35},
  number = {4},
  pages = {519--549},
  author = {Abdulmumin, Idris and Galadanci, Bashir Shehu and Aliyu, Garba},
  title = {Tag-less back-translation},
  journal = {Machine Translation},
  url = {https://link.springer.com/article/10.1007/s10590-021-09284-y}
}

IAENG EL

A hybrid approach for improved low resource neural machine translation using monolingual data

Idris Abdulmumin, Bashir Shehu Galadanci, Abubakar Isah, and 2 more authors

Engineering Letters, Nov 2021

Bib

@article{Abdulmumin2021-hybrid-eng-letters,
  title = {A hybrid approach for improved low resource neural machine translation using monolingual data},
  journal = {Engineering Letters},
  year = {2021},
  month = nov,
  volume = {29},
  number = {4},
  pages = {1478--1493},
  author = {Abdulmumin, Idris and Galadanci, Bashir Shehu and Isah, Abubakar and Kakudi, Habeebah Adamu and Sinan, Isma'ila Idris}
}

LNCS

Data Selection as an Alternative to Quality Estimation in Self-Learning for Low Resource Neural Machine Translation

Idris Abdulmumin, Bashir Shehu Galadanci, Ibrahim Said Ahmad, and 1 more author

In Computational Science and Its Applications – ICCSA 2021, Nov 2021

DOI Bib

@incollection{Abdulmumin2021c,
  author = {Abdulmumin, Idris and Galadanci, Bashir Shehu and Ahmad, Ibrahim Said and Abdullahi, Rabiu Ibrahim},
  editor = {Gervasi, Osvaldo and Murgante, Beniamino and Misra, Sanjay and Garau, Chiara and Ble{\v{c}}i{\'{c}}, Ivan and Taniar, David and Apduhan, Bernady O. and Rocha, Ana Maria A. C. and Tarantino, Eufemia and Torre, Carmelo Maria},
  title = {Data Selection as an Alternative to Quality Estimation in Self-Learning for Low Resource Neural Machine Translation},
  booktitle = {Computational Science and Its Applications -- ICCSA 2021},
  year = {2021},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {311--326},
  isbn = {978-3-030-87013-3},
  doi = {10.1007/978-3-030-87013-3_24},
  url = {http://doi.org/10.1007/978-3-030-87013-3_24}
}

CCIS

Enhanced Back-Translation for Low Resource Neural Machine Translation Using Self-training

Idris Abdulmumin, Bashir Shehu Galadanci, and Abubakar Isa

In Information and Communication Technology and Applications, Nov 2021

DOI Bib

@incollection{10.1007/978-3-030-69143-1_28,
  author = {Abdulmumin, Idris and Galadanci, Bashir Shehu and Isa, Abubakar},
  editor = {Misra, Sanjay and Muhammad-Bello, Bilkisu},
  title = {Enhanced Back-Translation for Low Resource Neural Machine Translation Using Self-training},
  booktitle = {Information and Communication Technology and Applications},
  year = {2021},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {355--371},
  isbn = {978-3-030-69143-1},
  doi = {10.1007/978-3-030-69143-1_28}
}

2019

IEEE

HauWE: Hausa Words Embedding for Natural Language Processing

Idris Abdulmumin and Bashir Shehu Galadanci

In 2019 2nd International Conference of the IEEE Nigeria Computer Chapter, NigeriaComputConf 2019, Nov 2019

Bib

@inproceedings{Abdulmumin2019,
  title = {HauWE: Hausa Words Embedding for Natural Language Processing},
  booktitle = {2019 2nd International Conference of the IEEE Nigeria Computer Chapter, NigeriaComputConf 2019},
  year = {2019},
  author = {Abdulmumin, Idris and Galadanci, Bashir Shehu},
  url = {http://ieeexplore.ieee.org/document/8949674/},
  address = {Zaria, Kaduna-Nigeria}
}