mirror of
https://github.com/umap-project/umap.git
synced 2025-04-28 19:42:36 +02:00
feat: delete datalayer's files on delete (#2158)
Until now, uMap was not deleting files on delete, which can increase file storage a lot after some time. The files are not deleted, but moved to a "purgatory" folder, from where they can be deleted after some time.
This commit is contained in:
commit
5d5be52337
6 changed files with 111 additions and 2 deletions
|
@ -282,6 +282,12 @@ How many total maps to return in the search.
|
|||
|
||||
How many maps to show in the user "my maps" page.
|
||||
|
||||
#### UMAP_PURGATORY_ROOT
|
||||
|
||||
Path where files are moved when a datalayer is deleted. They will stay there until
|
||||
`umap purge_purgatory` is run. May be useful in case a user deletes by mistake
|
||||
a datalayer, or even a map.
|
||||
|
||||
#### UMAP_SEARCH_CONFIGURATION
|
||||
|
||||
Use it if you take control over the search configuration.
|
||||
|
|
28
umap/management/commands/purge_purgatory.py
Normal file
28
umap/management/commands/purge_purgatory.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Remove old files from purgatory. Eg.: umap purge_purgatory --days 7"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
help="Number of days to consider files for removal",
|
||||
default=30,
|
||||
type=int,
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
days = options["days"]
|
||||
root = Path(settings.UMAP_PURGATORY_ROOT)
|
||||
threshold = time.time() - days * 86400
|
||||
for path in root.iterdir():
|
||||
stats = path.stat()
|
||||
filestamp = stats.st_mtime
|
||||
if filestamp < threshold:
|
||||
path.unlink()
|
||||
print(f"Removed old file {path}")
|
|
@ -3,6 +3,7 @@ import operator
|
|||
import os
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
|
@ -255,6 +256,13 @@ class Map(NamedModel):
|
|||
)
|
||||
return map_settings
|
||||
|
||||
def delete(self, **kwargs):
|
||||
# Explicitely call datalayers.delete, so we can deal with removing files
|
||||
# (the cascade delete would not call the model delete method)
|
||||
for datalayer in self.datalayer_set.all():
|
||||
datalayer.delete()
|
||||
return super().delete(**kwargs)
|
||||
|
||||
def generate_umapjson(self, request):
|
||||
umapjson = self.settings
|
||||
umapjson["type"] = "umap"
|
||||
|
@ -462,7 +470,9 @@ class DataLayer(NamedModel):
|
|||
|
||||
def save(self, force_insert=False, force_update=False, **kwargs):
|
||||
is_new = not bool(self.pk)
|
||||
super(DataLayer, self).save(force_insert, force_update, **kwargs)
|
||||
super(DataLayer, self).save(
|
||||
force_insert=force_insert, force_update=force_update, **kwargs
|
||||
)
|
||||
|
||||
if is_new:
|
||||
force_insert, force_update = False, True
|
||||
|
@ -471,10 +481,25 @@ class DataLayer(NamedModel):
|
|||
new_name = self.geojson.storage.save(filename, self.geojson)
|
||||
self.geojson.storage.delete(old_name)
|
||||
self.geojson.name = new_name
|
||||
super(DataLayer, self).save(force_insert, force_update, **kwargs)
|
||||
super(DataLayer, self).save(
|
||||
force_insert=force_insert, force_update=force_update, **kwargs
|
||||
)
|
||||
self.purge_gzip()
|
||||
self.purge_old_versions()
|
||||
|
||||
def delete(self, **kwargs):
|
||||
self.purge_gzip()
|
||||
self.to_purgatory()
|
||||
return super().delete(**kwargs)
|
||||
|
||||
def to_purgatory(self):
|
||||
dest = Path(settings.UMAP_PURGATORY_ROOT)
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
src = Path(self.geojson.storage.location) / self.storage_root()
|
||||
for version in self.versions:
|
||||
name = version["name"]
|
||||
(src / name).rename(dest / f"{self.map.pk}_{name}")
|
||||
|
||||
def upload_to(self):
|
||||
root = self.storage_root()
|
||||
name = "%s_%s.geojson" % (self.pk, int(time.time() * 1000))
|
||||
|
|
|
@ -267,6 +267,7 @@ UMAP_DEFAULT_FEATURES_HAVE_OWNERS = False
|
|||
UMAP_HOME_FEED = "latest"
|
||||
UMAP_IMPORTERS = {}
|
||||
UMAP_HOST_INFOS = {}
|
||||
UMAP_PURGATORY_ROOT = "/tmp/umappurgatory"
|
||||
|
||||
UMAP_READONLY = env("UMAP_READONLY", default=False)
|
||||
UMAP_GZIP = True
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
@ -269,3 +270,26 @@ def test_anonymous_can_edit_in_inherit_mode_and_map_in_public_mode(
|
|||
map.save()
|
||||
fake_request.user = AnonymousUser()
|
||||
assert datalayer.can_edit(fake_request)
|
||||
|
||||
|
||||
def test_should_remove_all_versions_on_delete(map, settings):
|
||||
settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp()
|
||||
datalayer = DataLayerFactory(uuid="0f1161c0-c07f-4ba4-86c5-8d8981d8a813", old_id=17)
|
||||
root = Path(datalayer.storage_root())
|
||||
before = len(datalayer.geojson.storage.listdir(root)[1])
|
||||
other = "123456_1440918637.geojson"
|
||||
files = [
|
||||
f"{datalayer.pk}_1440924889.geojson",
|
||||
f"{datalayer.pk}_1440923687.geojson",
|
||||
f"{datalayer.pk}_1440918637.geojson",
|
||||
f"{datalayer.old_id}_1440918537.geojson",
|
||||
other,
|
||||
]
|
||||
for path in files:
|
||||
datalayer.geojson.storage.save(root / path, ContentFile("{}"))
|
||||
datalayer.geojson.storage.save(root / f"{path}.gz", ContentFile("{}"))
|
||||
assert len(datalayer.geojson.storage.listdir(root)[1]) == 10 + before
|
||||
datalayer.delete()
|
||||
found = datalayer.geojson.storage.listdir(root)[1]
|
||||
assert found == [other, f"{other}.gz"]
|
||||
assert len(list(Path(settings.UMAP_PURGATORY_ROOT).iterdir())) == 4 + before
|
||||
|
|
25
umap/tests/test_purge_purgatory.py
Normal file
25
umap/tests/test_purge_purgatory.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from django.core.management import call_command
|
||||
|
||||
|
||||
def test_purge_purgatory(settings):
|
||||
settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp()
|
||||
root = Path(settings.UMAP_PURGATORY_ROOT)
|
||||
old = root / "old.json"
|
||||
old.write_text("{}")
|
||||
stat = old.stat()
|
||||
os.utime(old, times=(stat.st_mtime - 31 * 86400, stat.st_mtime - 31 * 86400))
|
||||
recent = root / "recent.json"
|
||||
recent.write_text("{}")
|
||||
stat = recent.stat()
|
||||
os.utime(recent, times=(stat.st_mtime - 8 * 86400, stat.st_mtime - 8 * 86400))
|
||||
now = root / "now.json"
|
||||
now.write_text("{}")
|
||||
assert {f.name for f in root.iterdir()} == {"old.json", "recent.json", "now.json"}
|
||||
call_command("purge_purgatory")
|
||||
assert {f.name for f in root.iterdir()} == {"recent.json", "now.json"}
|
||||
call_command("purge_purgatory", "--days=7")
|
||||
assert {f.name for f in root.iterdir()} == {"now.json"}
|
Loading…
Reference in a new issue