-
Notifications
You must be signed in to change notification settings - Fork 0
/
CITATION.bib
15 lines (15 loc) · 1.75 KB
/
CITATION.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
@inproceedings{wahle-etal-2022-large,
title = "How Large Language Models are Transforming Machine-Paraphrase Plagiarism",
author = "Wahle, Jan Philip and
Ruas, Terry and
Kirstein, Frederic and
Gipp, Bela",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.62",
pages = "952--963",
abstract = "The recent success of large language models for text generation poses a severe threat to academic integrity, as plagiarists can generate realistic paraphrases indistinguishable from original work.However, the role of large autoregressive models in generating machine-paraphrased plagiarism and their detection is still incipient in the literature.This work explores T5 and GPT3 for machine-paraphrase generation on scientific articles from arXiv, student theses, and Wikipedia.We evaluate the detection performance of six automated solutions and one commercial plagiarism detection software and perform a human study with 105 participants regarding their detection performance and the quality of generated examples.Our results suggest that large language models can rewrite text humans have difficulty identifying as machine-paraphrased (53{\%} mean acc.).Human experts rate the quality of paraphrases generated by GPT-3 as high as original texts (clarity 4.0/5, fluency 4.2/5, coherence 3.8/5).The best-performing detection model (GPT-3) achieves 66{\%} F1-score in detecting paraphrases.We make our code, data, and findings publicly available to facilitate the development of detection solutions.",
}