From 49eb121c68d1cb8d7aec134c5b3e88be577fc6e3 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Sat, 21 Sep 2024 09:08:36 +0200 Subject: [PATCH] feat: delete datalayer's files on delete Until now, uMap was not deleting files on delete, which can increase file storage a lot after some time. The files are not deleted, but moved to a "purgatory" folder, from where they can be deleted after some time. --- docs/config/settings.md | 6 +++++ umap/management/commands/purge_purgatory.py | 28 ++++++++++++++++++++ umap/models.py | 29 +++++++++++++++++++-- umap/settings/base.py | 1 + umap/tests/test_datalayer.py | 24 +++++++++++++++++ umap/tests/test_purge_purgatory.py | 25 ++++++++++++++++++ 6 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 umap/management/commands/purge_purgatory.py create mode 100644 umap/tests/test_purge_purgatory.py diff --git a/docs/config/settings.md b/docs/config/settings.md index fc32fa3a..fbb9d0ba 100644 --- a/docs/config/settings.md +++ b/docs/config/settings.md @@ -282,6 +282,12 @@ How many total maps to return in the search. How many maps to show in the user "my maps" page. +#### UMAP_PURGATORY_ROOT + +Path where files are moved when a datalayer is deleted. They will stay there until +`umap purge_purgatory` is run. May be useful in case a user deletes by mistake +a datalayer, or even a map. + #### UMAP_SEARCH_CONFIGURATION Use it if you take control over the search configuration. diff --git a/umap/management/commands/purge_purgatory.py b/umap/management/commands/purge_purgatory.py new file mode 100644 index 00000000..ee4be29d --- /dev/null +++ b/umap/management/commands/purge_purgatory.py @@ -0,0 +1,28 @@ +import time +from pathlib import Path + +from django.conf import settings +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = "Remove old files from purgatory. Eg.: umap purge_purgatory --days 7" + + def add_arguments(self, parser): + parser.add_argument( + "--days", + help="Number of days to consider files for removal", + default=30, + type=int, + ) + + def handle(self, *args, **options): + days = options["days"] + root = Path(settings.UMAP_PURGATORY_ROOT) + threshold = time.time() - days * 86400 + for path in root.iterdir(): + stats = path.stat() + filestamp = stats.st_mtime + if filestamp < threshold: + path.unlink() + print(f"Removed old file {path}") diff --git a/umap/models.py b/umap/models.py index 93caacec..8f12ade7 100644 --- a/umap/models.py +++ b/umap/models.py @@ -3,6 +3,7 @@ import operator import os import time import uuid +from pathlib import Path from django.conf import settings from django.contrib.auth.models import User @@ -255,6 +256,13 @@ class Map(NamedModel): ) return map_settings + def delete(self, **kwargs): + # Explicitely call datalayers.delete, so we can deal with removing files + # (the cascade delete would not call the model delete method) + for datalayer in self.datalayer_set.all(): + datalayer.delete() + return super().delete(**kwargs) + def generate_umapjson(self, request): umapjson = self.settings umapjson["type"] = "umap" @@ -462,7 +470,9 @@ class DataLayer(NamedModel): def save(self, force_insert=False, force_update=False, **kwargs): is_new = not bool(self.pk) - super(DataLayer, self).save(force_insert, force_update, **kwargs) + super(DataLayer, self).save( + force_insert=force_insert, force_update=force_update, **kwargs + ) if is_new: force_insert, force_update = False, True @@ -471,10 +481,25 @@ class DataLayer(NamedModel): new_name = self.geojson.storage.save(filename, self.geojson) self.geojson.storage.delete(old_name) self.geojson.name = new_name - super(DataLayer, self).save(force_insert, force_update, **kwargs) + super(DataLayer, self).save( + force_insert=force_insert, force_update=force_update, **kwargs + ) self.purge_gzip() self.purge_old_versions() + def delete(self, **kwargs): + self.purge_gzip() + self.to_purgatory() + return super().delete(**kwargs) + + def to_purgatory(self): + dest = Path(settings.UMAP_PURGATORY_ROOT) + dest.mkdir(parents=True, exist_ok=True) + src = Path(self.geojson.storage.location) / self.storage_root() + for version in self.versions: + name = version["name"] + (src / name).rename(dest / f"{self.map.pk}_{name}") + def upload_to(self): root = self.storage_root() name = "%s_%s.geojson" % (self.pk, int(time.time() * 1000)) diff --git a/umap/settings/base.py b/umap/settings/base.py index 1e1eda73..73a1bf71 100644 --- a/umap/settings/base.py +++ b/umap/settings/base.py @@ -267,6 +267,7 @@ UMAP_DEFAULT_FEATURES_HAVE_OWNERS = False UMAP_HOME_FEED = "latest" UMAP_IMPORTERS = {} UMAP_HOST_INFOS = {} +UMAP_PURGATORY_ROOT = "/tmp/umappurgatory" UMAP_READONLY = env("UMAP_READONLY", default=False) UMAP_GZIP = True diff --git a/umap/tests/test_datalayer.py b/umap/tests/test_datalayer.py index bee1bcd8..1019ff58 100644 --- a/umap/tests/test_datalayer.py +++ b/umap/tests/test_datalayer.py @@ -1,3 +1,4 @@ +import tempfile from pathlib import Path import pytest @@ -269,3 +270,26 @@ def test_anonymous_can_edit_in_inherit_mode_and_map_in_public_mode( map.save() fake_request.user = AnonymousUser() assert datalayer.can_edit(fake_request) + + +def test_should_remove_all_versions_on_delete(map, settings): + settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp() + datalayer = DataLayerFactory(uuid="0f1161c0-c07f-4ba4-86c5-8d8981d8a813", old_id=17) + root = Path(datalayer.storage_root()) + before = len(datalayer.geojson.storage.listdir(root)[1]) + other = "123456_1440918637.geojson" + files = [ + f"{datalayer.pk}_1440924889.geojson", + f"{datalayer.pk}_1440923687.geojson", + f"{datalayer.pk}_1440918637.geojson", + f"{datalayer.old_id}_1440918537.geojson", + other, + ] + for path in files: + datalayer.geojson.storage.save(root / path, ContentFile("{}")) + datalayer.geojson.storage.save(root / f"{path}.gz", ContentFile("{}")) + assert len(datalayer.geojson.storage.listdir(root)[1]) == 10 + before + datalayer.delete() + found = datalayer.geojson.storage.listdir(root)[1] + assert found == [other, f"{other}.gz"] + assert len(list(Path(settings.UMAP_PURGATORY_ROOT).iterdir())) == 4 + before diff --git a/umap/tests/test_purge_purgatory.py b/umap/tests/test_purge_purgatory.py new file mode 100644 index 00000000..d4d29e43 --- /dev/null +++ b/umap/tests/test_purge_purgatory.py @@ -0,0 +1,25 @@ +import os +import tempfile +from pathlib import Path + +from django.core.management import call_command + + +def test_purge_purgatory(settings): + settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp() + root = Path(settings.UMAP_PURGATORY_ROOT) + old = root / "old.json" + old.write_text("{}") + stat = old.stat() + os.utime(old, times=(stat.st_mtime - 31 * 86400, stat.st_mtime - 31 * 86400)) + recent = root / "recent.json" + recent.write_text("{}") + stat = recent.stat() + os.utime(recent, times=(stat.st_mtime - 8 * 86400, stat.st_mtime - 8 * 86400)) + now = root / "now.json" + now.write_text("{}") + assert {f.name for f in root.iterdir()} == {"old.json", "recent.json", "now.json"} + call_command("purge_purgatory") + assert {f.name for f in root.iterdir()} == {"recent.json", "now.json"} + call_command("purge_purgatory", "--days=7") + assert {f.name for f in root.iterdir()} == {"now.json"}