From 5e14c60880231caa1099db21ab247a6aead2cb4e Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 10 Nov 2023 10:01:23 -0700 Subject: [PATCH] [ngrams] Speed up bz2 decoding Decode at once. --- ngrams.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ngrams.py b/ngrams.py index 350df88..7e8b8d0 100644 --- a/ngrams.py +++ b/ngrams.py @@ -61,8 +61,8 @@ def extract_ngrams_from_file(filename, *kargs, **kwargs): import bz2 # Assume harfbuzz-testing-wikipedia format - txtfile = bz2.open(filename + ".txt.bz2") - frqfile = bz2.open(filename + ".frq.bz2") + txtfile = bz2.open(filename + ".txt.bz2").read().splitlines() + frqfile = bz2.open(filename + ".frq.bz2").read().splitlines() return extract_ngrams(txtfile, *kargs, frequencies=frqfile, **kwargs)