mirror of
https://github.com/umap-project/umap.git
synced 2025-04-28 19:42:36 +02:00
feat: delete datalayer's files on delete
Until now, uMap was not deleting files on delete, which can increase file storage a lot after some time. The files are not deleted, but moved to a "purgatory" folder, from where they can be deleted after some time.
This commit is contained in:
parent
86a8bbafa2
commit
49eb121c68
6 changed files with 111 additions and 2 deletions
|
@ -282,6 +282,12 @@ How many total maps to return in the search.
|
||||||
|
|
||||||
How many maps to show in the user "my maps" page.
|
How many maps to show in the user "my maps" page.
|
||||||
|
|
||||||
|
#### UMAP_PURGATORY_ROOT
|
||||||
|
|
||||||
|
Path where files are moved when a datalayer is deleted. They will stay there until
|
||||||
|
`umap purge_purgatory` is run. May be useful in case a user deletes by mistake
|
||||||
|
a datalayer, or even a map.
|
||||||
|
|
||||||
#### UMAP_SEARCH_CONFIGURATION
|
#### UMAP_SEARCH_CONFIGURATION
|
||||||
|
|
||||||
Use it if you take control over the search configuration.
|
Use it if you take control over the search configuration.
|
||||||
|
|
28
umap/management/commands/purge_purgatory.py
Normal file
28
umap/management/commands/purge_purgatory.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Remove old files from purgatory. Eg.: umap purge_purgatory --days 7"
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--days",
|
||||||
|
help="Number of days to consider files for removal",
|
||||||
|
default=30,
|
||||||
|
type=int,
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
days = options["days"]
|
||||||
|
root = Path(settings.UMAP_PURGATORY_ROOT)
|
||||||
|
threshold = time.time() - days * 86400
|
||||||
|
for path in root.iterdir():
|
||||||
|
stats = path.stat()
|
||||||
|
filestamp = stats.st_mtime
|
||||||
|
if filestamp < threshold:
|
||||||
|
path.unlink()
|
||||||
|
print(f"Removed old file {path}")
|
|
@ -3,6 +3,7 @@ import operator
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
@ -255,6 +256,13 @@ class Map(NamedModel):
|
||||||
)
|
)
|
||||||
return map_settings
|
return map_settings
|
||||||
|
|
||||||
|
def delete(self, **kwargs):
|
||||||
|
# Explicitely call datalayers.delete, so we can deal with removing files
|
||||||
|
# (the cascade delete would not call the model delete method)
|
||||||
|
for datalayer in self.datalayer_set.all():
|
||||||
|
datalayer.delete()
|
||||||
|
return super().delete(**kwargs)
|
||||||
|
|
||||||
def generate_umapjson(self, request):
|
def generate_umapjson(self, request):
|
||||||
umapjson = self.settings
|
umapjson = self.settings
|
||||||
umapjson["type"] = "umap"
|
umapjson["type"] = "umap"
|
||||||
|
@ -462,7 +470,9 @@ class DataLayer(NamedModel):
|
||||||
|
|
||||||
def save(self, force_insert=False, force_update=False, **kwargs):
|
def save(self, force_insert=False, force_update=False, **kwargs):
|
||||||
is_new = not bool(self.pk)
|
is_new = not bool(self.pk)
|
||||||
super(DataLayer, self).save(force_insert, force_update, **kwargs)
|
super(DataLayer, self).save(
|
||||||
|
force_insert=force_insert, force_update=force_update, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
if is_new:
|
if is_new:
|
||||||
force_insert, force_update = False, True
|
force_insert, force_update = False, True
|
||||||
|
@ -471,10 +481,25 @@ class DataLayer(NamedModel):
|
||||||
new_name = self.geojson.storage.save(filename, self.geojson)
|
new_name = self.geojson.storage.save(filename, self.geojson)
|
||||||
self.geojson.storage.delete(old_name)
|
self.geojson.storage.delete(old_name)
|
||||||
self.geojson.name = new_name
|
self.geojson.name = new_name
|
||||||
super(DataLayer, self).save(force_insert, force_update, **kwargs)
|
super(DataLayer, self).save(
|
||||||
|
force_insert=force_insert, force_update=force_update, **kwargs
|
||||||
|
)
|
||||||
self.purge_gzip()
|
self.purge_gzip()
|
||||||
self.purge_old_versions()
|
self.purge_old_versions()
|
||||||
|
|
||||||
|
def delete(self, **kwargs):
|
||||||
|
self.purge_gzip()
|
||||||
|
self.to_purgatory()
|
||||||
|
return super().delete(**kwargs)
|
||||||
|
|
||||||
|
def to_purgatory(self):
|
||||||
|
dest = Path(settings.UMAP_PURGATORY_ROOT)
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
src = Path(self.geojson.storage.location) / self.storage_root()
|
||||||
|
for version in self.versions:
|
||||||
|
name = version["name"]
|
||||||
|
(src / name).rename(dest / f"{self.map.pk}_{name}")
|
||||||
|
|
||||||
def upload_to(self):
|
def upload_to(self):
|
||||||
root = self.storage_root()
|
root = self.storage_root()
|
||||||
name = "%s_%s.geojson" % (self.pk, int(time.time() * 1000))
|
name = "%s_%s.geojson" % (self.pk, int(time.time() * 1000))
|
||||||
|
|
|
@ -267,6 +267,7 @@ UMAP_DEFAULT_FEATURES_HAVE_OWNERS = False
|
||||||
UMAP_HOME_FEED = "latest"
|
UMAP_HOME_FEED = "latest"
|
||||||
UMAP_IMPORTERS = {}
|
UMAP_IMPORTERS = {}
|
||||||
UMAP_HOST_INFOS = {}
|
UMAP_HOST_INFOS = {}
|
||||||
|
UMAP_PURGATORY_ROOT = "/tmp/umappurgatory"
|
||||||
|
|
||||||
UMAP_READONLY = env("UMAP_READONLY", default=False)
|
UMAP_READONLY = env("UMAP_READONLY", default=False)
|
||||||
UMAP_GZIP = True
|
UMAP_GZIP = True
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -269,3 +270,26 @@ def test_anonymous_can_edit_in_inherit_mode_and_map_in_public_mode(
|
||||||
map.save()
|
map.save()
|
||||||
fake_request.user = AnonymousUser()
|
fake_request.user = AnonymousUser()
|
||||||
assert datalayer.can_edit(fake_request)
|
assert datalayer.can_edit(fake_request)
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_remove_all_versions_on_delete(map, settings):
|
||||||
|
settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp()
|
||||||
|
datalayer = DataLayerFactory(uuid="0f1161c0-c07f-4ba4-86c5-8d8981d8a813", old_id=17)
|
||||||
|
root = Path(datalayer.storage_root())
|
||||||
|
before = len(datalayer.geojson.storage.listdir(root)[1])
|
||||||
|
other = "123456_1440918637.geojson"
|
||||||
|
files = [
|
||||||
|
f"{datalayer.pk}_1440924889.geojson",
|
||||||
|
f"{datalayer.pk}_1440923687.geojson",
|
||||||
|
f"{datalayer.pk}_1440918637.geojson",
|
||||||
|
f"{datalayer.old_id}_1440918537.geojson",
|
||||||
|
other,
|
||||||
|
]
|
||||||
|
for path in files:
|
||||||
|
datalayer.geojson.storage.save(root / path, ContentFile("{}"))
|
||||||
|
datalayer.geojson.storage.save(root / f"{path}.gz", ContentFile("{}"))
|
||||||
|
assert len(datalayer.geojson.storage.listdir(root)[1]) == 10 + before
|
||||||
|
datalayer.delete()
|
||||||
|
found = datalayer.geojson.storage.listdir(root)[1]
|
||||||
|
assert found == [other, f"{other}.gz"]
|
||||||
|
assert len(list(Path(settings.UMAP_PURGATORY_ROOT).iterdir())) == 4 + before
|
||||||
|
|
25
umap/tests/test_purge_purgatory.py
Normal file
25
umap/tests/test_purge_purgatory.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
|
||||||
|
def test_purge_purgatory(settings):
|
||||||
|
settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp()
|
||||||
|
root = Path(settings.UMAP_PURGATORY_ROOT)
|
||||||
|
old = root / "old.json"
|
||||||
|
old.write_text("{}")
|
||||||
|
stat = old.stat()
|
||||||
|
os.utime(old, times=(stat.st_mtime - 31 * 86400, stat.st_mtime - 31 * 86400))
|
||||||
|
recent = root / "recent.json"
|
||||||
|
recent.write_text("{}")
|
||||||
|
stat = recent.stat()
|
||||||
|
os.utime(recent, times=(stat.st_mtime - 8 * 86400, stat.st_mtime - 8 * 86400))
|
||||||
|
now = root / "now.json"
|
||||||
|
now.write_text("{}")
|
||||||
|
assert {f.name for f in root.iterdir()} == {"old.json", "recent.json", "now.json"}
|
||||||
|
call_command("purge_purgatory")
|
||||||
|
assert {f.name for f in root.iterdir()} == {"recent.json", "now.json"}
|
||||||
|
call_command("purge_purgatory", "--days=7")
|
||||||
|
assert {f.name for f in root.iterdir()} == {"now.json"}
|
Loading…
Reference in a new issue