Merge "update_dictionaries: cache and speedup and fixup" into main
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0a2101f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/cache/
diff --git a/update_dictionaries.py b/update_dictionaries.py
index 5e8dc2e..30596b5 100755
--- a/update_dictionaries.py
+++ b/update_dictionaries.py
@@ -8,13 +8,14 @@
 import glob
 import os
 from pathlib import Path
+import shutil
 import sys
-import tempfile
 import urllib.request
 import zipfile
 
 
 DIR = Path(__file__).resolve().parent
+CACHE_DIR = DIR / "cache"
 
 
 DICTIONARIES = (
@@ -30,8 +31,9 @@
     "hunspell-en_AU-2020.12.07.zip",
     "https://github.com/b00f/lilak/releases/latest/download/fa-IR.zip",
     # NOTE: need to remove IGNORE from uk_UA.aff
-    "https://github.com/brown-uk/dict_uk/releases/latest/download/"
-    "hunspell-uk_UA.zip",
+    # TODO: This archive no longer exists.
+    # "https://github.com/brown-uk/dict_uk/releases/latest/download/"
+    # "hunspell-uk_UA.zip",
 )
 
 
@@ -40,20 +42,27 @@
         sys.exit(f"{__file__}: script takes no args")
     os.chdir(DIR)
 
-    for url in DICTIONARIES:
-        print(f"Downloading {url}")
-        with tempfile.NamedTemporaryFile() as tmp:
-            with urllib.request.urlopen(url) as response:
-                tmp.write(response.read())
-            tmp.flush()
-            zipfile.ZipFile(tmp.name).extractall()
+    CACHE_DIR.mkdir(exist_ok=True)
 
-        for name in glob.glob("*en_GB-ise*"):
-            os.rename(name, name.replace("-ise", ""))
-        for name in glob.glob("*en_GB-ize*"):
-            os.rename(name, name.replace("-ize", "_oxendict"))
-        for name in glob.glob("*fa-IR.*"):
-            os.rename(name, name.replace("-", "_"))
+    for url in DICTIONARIES:
+        cache = CACHE_DIR / url.rsplit("/", 1)[1]
+        if not cache.exists():
+            print(f"Downloading {url} to cache {cache}")
+            tmp = cache.with_suffix(".tmp")
+            with urllib.request.urlopen(url) as response:
+                tmp.write_bytes(response.read())
+            tmp.rename(cache)
+
+        print(f"Extracting {cache.name}")
+        zipfile.ZipFile(cache).extractall()
+
+    for name in glob.glob("*en_GB-ise*"):
+        os.rename(name, name.replace("-ise", ""))
+    for name in glob.glob("*en_GB-ize*"):
+        os.rename(name, name.replace("-ize", "_oxendict"))
+    for name in glob.glob("fa-IR/*fa-IR.*"):
+        os.rename(name, os.path.basename(name.replace("-", "_")))
+    shutil.rmtree("fa-IR")
 
     return 0