Merge "update_dictionaries: cache and speedup and fixup" into main
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0a2101f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/cache/
diff --git a/update_dictionaries.py b/update_dictionaries.py
index 5e8dc2e..30596b5 100755
--- a/update_dictionaries.py
+++ b/update_dictionaries.py
@@ -8,13 +8,14 @@
import glob
import os
from pathlib import Path
+import shutil
import sys
-import tempfile
import urllib.request
import zipfile
DIR = Path(__file__).resolve().parent
+CACHE_DIR = DIR / "cache"
DICTIONARIES = (
@@ -30,8 +31,9 @@
"hunspell-en_AU-2020.12.07.zip",
"https://github.com/b00f/lilak/releases/latest/download/fa-IR.zip",
# NOTE: need to remove IGNORE from uk_UA.aff
- "https://github.com/brown-uk/dict_uk/releases/latest/download/"
- "hunspell-uk_UA.zip",
+ # TODO: This archive no longer exists.
+ # "https://github.com/brown-uk/dict_uk/releases/latest/download/"
+ # "hunspell-uk_UA.zip",
)
@@ -40,20 +42,27 @@
sys.exit(f"{__file__}: script takes no args")
os.chdir(DIR)
- for url in DICTIONARIES:
- print(f"Downloading {url}")
- with tempfile.NamedTemporaryFile() as tmp:
- with urllib.request.urlopen(url) as response:
- tmp.write(response.read())
- tmp.flush()
- zipfile.ZipFile(tmp.name).extractall()
+ CACHE_DIR.mkdir(exist_ok=True)
- for name in glob.glob("*en_GB-ise*"):
- os.rename(name, name.replace("-ise", ""))
- for name in glob.glob("*en_GB-ize*"):
- os.rename(name, name.replace("-ize", "_oxendict"))
- for name in glob.glob("*fa-IR.*"):
- os.rename(name, name.replace("-", "_"))
+ for url in DICTIONARIES:
+ cache = CACHE_DIR / url.rsplit("/", 1)[1]
+ if not cache.exists():
+ print(f"Downloading {url} to cache {cache}")
+ tmp = cache.with_suffix(".tmp")
+ with urllib.request.urlopen(url) as response:
+ tmp.write_bytes(response.read())
+ tmp.rename(cache)
+
+ print(f"Extracting {cache.name}")
+ zipfile.ZipFile(cache).extractall()
+
+ for name in glob.glob("*en_GB-ise*"):
+ os.rename(name, name.replace("-ise", ""))
+ for name in glob.glob("*en_GB-ize*"):
+ os.rename(name, name.replace("-ize", "_oxendict"))
+ for name in glob.glob("fa-IR/*fa-IR.*"):
+ os.rename(name, os.path.basename(name.replace("-", "_")))
+ shutil.rmtree("fa-IR")
return 0