diff --git a/Makefile b/Makefile index 90e97af6..282b75f1 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ install: ## Install the dependencies .PHONY: develop develop: ## Install the test and dev dependencies - python3 -m pip install -e .[test,dev,sync] + python3 -m pip install -e .[test,dev,sync,s3] playwright install .PHONY: format diff --git a/docs/config/settings.md b/docs/config/settings.md index 481855d3..69032017 100644 --- a/docs/config/settings.md +++ b/docs/config/settings.md @@ -89,13 +89,6 @@ Running uMap / Django with a known SECRET_KEY defeats many of Django’s securit See [Django documentation for SECRET_KEY](https://docs.djangoproject.com/en/4.2/ref/settings/#secret-key) -#### SITE_URL - -The final URL of you instance, including the protocol: - -`SITE_URL=http://umap.org` - - #### SHORT_SITE_URL If you have a short domain for sharing links. @@ -108,6 +101,13 @@ Eg.: `SHORT_SITE_URL=https://u.umap.org` The name of the site, to be used in header and HTML title. +#### SITE_URL + +The final URL of you instance, including the protocol: + +`SITE_URL=http://umap.org` + + #### STATIC_ROOT Where uMap should store static files (CSS, JS…), must be consistent with your @@ -115,6 +115,12 @@ Nginx configuration. See [Django documentation for STATIC_ROOT](https://docs.djangoproject.com/en/4.2/ref/settings/#static-root) + +#### STORAGES + +See [storage](storage.md). + + #### USE_I18N Default is True. Set it to False if you don't want uMap to localize the app. diff --git a/docs/config/storage.md b/docs/config/storage.md new file mode 100644 index 00000000..d0064e27 --- /dev/null +++ b/docs/config/storage.md @@ -0,0 +1,63 @@ +# Storage + +uMap stores metadata (such as owner, permissions…) in PostgreSQL, and the data itself (the content of a layer) +in geojson format, by default on the local file system, but optionally in a S3 like server. + +This can be configured through the `STORAGES` settings. uMap will use three keys: + +- `default`, used only for the pictogram files, it can use whatever storage suits your needs +- `staticfiles`, used to store the static files, it can use whatever storage suits your needs, + but by default uses a custom storage that will add hash to the filenames, to be sure they + are not kept in any cache after a release +- `data`, used to store the layers data. This one should follow the uMap needs, and currently + uMap provides only two options: `umap.storage.UmapFileSystem` and `umap.storage.UmapS3` + +## Default settings: + +This will use the file system for everything, including the data. + +``` +STORAGES = { + "default": { + "BACKEND": "django.core.files.storage.FileSystemStorage", + }, + "data": { + "BACKEND": "umap.storage.UmapFileSystem", + }, + "staticfiles": { + "BACKEND": "umap.storage.UmapManifestStaticFilesStorage", + }, +} +``` + +## Using S3 + +To use an S3 like server for the layers data, the first thing is to install +the needed dependencies: `pip install umap-project[s3]`. + +Then, change the `STORAGES` settings with something like this: + +``` +STORAGES = { + "default": { + "BACKEND": "django.core.files.storage.FileSystemStorage", + }, + "data": { + "BACKEND": "umap.storage.UmapS3", + "OPTIONS": { + "access_key": "xxx", + "secret_key": "yyy", + "bucket_name": "umap", + "region_name": "eu", + "endpoint_url": "http://127.0.0.1:9000", + }, + }, + "staticfiles": { + "BACKEND": "umap.storage.UmapManifestStaticFilesStorage", + }, +} +``` + +In order to store old versions of a layer, the versioning should be activated in the bucket. + +See more about the configuration on the [django-storages documentation](https://django-storages.readthedocs.io/en/latest/backends/amazon-S3.html). diff --git a/mkdocs.yml b/mkdocs.yml index 99a7ccbf..5b304c38 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -15,6 +15,7 @@ nav: - Configuration: - Settings: config/settings.md - Customize: config/customize.md + - Storage: config/storage.md - Icon packs: config/icons.md - Deployment: - Docker: deploy/docker.md diff --git a/pyproject.toml b/pyproject.toml index 8395e75c..d3dc02ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,10 +61,14 @@ test = [ "pytest-playwright==0.6.2", "pytest-rerunfailures==15.0", "pytest-xdist>=3.5.0,<4", + "moto[s3]==5.0.21" ] docker = [ "uwsgi==2.0.28", ] +s3 = [ + "django-storages[s3]==1.14.4", +] sync = [ "channels==4.2.0", "daphne==4.1.2", diff --git a/umap/migrations/0024_alter_datalayer_geojson.py b/umap/migrations/0024_alter_datalayer_geojson.py new file mode 100644 index 00000000..6ebf2324 --- /dev/null +++ b/umap/migrations/0024_alter_datalayer_geojson.py @@ -0,0 +1,24 @@ +# Generated by Django 5.1.2 on 2024-11-29 15:45 + +from django.db import migrations, models + +import umap.models + + +class Migration(migrations.Migration): + dependencies = [ + ("umap", "0023_alter_datalayer_uuid"), + ] + + operations = [ + migrations.AlterField( + model_name="datalayer", + name="geojson", + field=models.FileField( + blank=True, + null=True, + storage=umap.models.set_storage, + upload_to=umap.models.upload_to, + ), + ), + ] diff --git a/umap/models.py b/umap/models.py index c7e1d388..fe8e6fa1 100644 --- a/umap/models.py +++ b/umap/models.py @@ -1,17 +1,12 @@ import json -import operator -import os -import shutil -import time import uuid -from pathlib import Path from django.conf import settings from django.contrib.auth.models import User from django.contrib.gis.db import models from django.core.files.base import File +from django.core.files.storage import storages from django.core.signing import Signer -from django.template.defaultfilters import slugify from django.urls import reverse from django.utils.functional import classproperty from django.utils.translation import gettext_lazy as _ @@ -270,7 +265,7 @@ class Map(NamedModel): umapjson["uri"] = request.build_absolute_uri(self.get_absolute_url()) datalayers = [] for datalayer in self.datalayer_set.all(): - with open(datalayer.geojson.path, "rb") as f: + with datalayer.geojson.open("rb") as f: layer = json.loads(f.read()) if datalayer.settings: layer["_umap_options"] = datalayer.settings @@ -423,10 +418,11 @@ class Pictogram(NamedModel): # Must be out of Datalayer for Django migration to run, because of python 2 # serialize limitations. def upload_to(instance, filename): - if instance.pk: - return instance.upload_to() - name = "%s.geojson" % slugify(instance.name)[:50] or "untitled" - return os.path.join(instance.storage_root(), name) + return instance.geojson.storage.make_filename(instance) + + +def set_storage(): + return storages["data"] class DataLayer(NamedModel): @@ -448,7 +444,9 @@ class DataLayer(NamedModel): old_id = models.IntegerField(null=True, blank=True) map = models.ForeignKey(Map, on_delete=models.CASCADE) description = models.TextField(blank=True, null=True, verbose_name=_("description")) - geojson = models.FileField(upload_to=upload_to, blank=True, null=True) + geojson = models.FileField( + upload_to=upload_to, blank=True, null=True, storage=set_storage + ) display_on_load = models.BooleanField( default=False, verbose_name=_("display on load"), @@ -467,50 +465,14 @@ class DataLayer(NamedModel): class Meta: ordering = ("rank",) - def save(self, force_insert=False, force_update=False, **kwargs): - is_new = not bool(self.pk) - super(DataLayer, self).save( - force_insert=force_insert, force_update=force_update, **kwargs - ) - - if is_new: - force_insert, force_update = False, True - filename = self.upload_to() - old_name = self.geojson.name - new_name = self.geojson.storage.save(filename, self.geojson) - self.geojson.storage.delete(old_name) - self.geojson.name = new_name - super(DataLayer, self).save( - force_insert=force_insert, force_update=force_update, **kwargs - ) - self.purge_gzip() - self.purge_old_versions() + def save(self, **kwargs): + super(DataLayer, self).save(**kwargs) + self.geojson.storage.onDatalayerSave(self) def delete(self, **kwargs): - self.purge_gzip() - self.to_purgatory() + self.geojson.storage.onDatalayerDelete(self) return super().delete(**kwargs) - def to_purgatory(self): - dest = Path(settings.UMAP_PURGATORY_ROOT) - dest.mkdir(parents=True, exist_ok=True) - src = Path(self.geojson.storage.location) / self.storage_root() - for version in self.versions: - name = version["name"] - shutil.move(src / name, dest / f"{self.map.pk}_{name}") - - def upload_to(self): - root = self.storage_root() - name = "%s_%s.geojson" % (self.pk, int(time.time() * 1000)) - return os.path.join(root, name) - - def storage_root(self): - path = ["datalayer", str(self.map.pk)[-1]] - if len(str(self.map.pk)) > 1: - path.append(str(self.map.pk)[-2]) - path.append(str(self.map.pk)) - return os.path.join(*path) - def metadata(self, request=None): # Retrocompat: minimal settings for maps not saved after settings property # has been introduced @@ -532,74 +494,23 @@ class DataLayer(NamedModel): new.pk = uuid.uuid4() if map_inst: new.map = map_inst - new.geojson = File(new.geojson.file.file) + new.geojson = File(new.geojson.file.file, name="tmpname") new.save() return new - def is_valid_version(self, name): - valid_prefixes = [name.startswith("%s_" % self.pk)] - if self.old_id: - valid_prefixes.append(name.startswith("%s_" % self.old_id)) - return any(valid_prefixes) and name.endswith(".geojson") - - def extract_version_number(self, path): - version = path.split(".")[0] - if "_" in version: - return version.split("_")[-1] - return version - @property def reference_version(self): - return self.extract_version_number(self.geojson.path) - - def version_metadata(self, name): - return { - "name": name, - "at": self.extract_version_number(name), - "size": self.geojson.storage.size(self.get_version_path(name)), - } + return self.geojson.storage.get_reference_version(self) @property def versions(self): - root = self.storage_root() - names = self.geojson.storage.listdir(root)[1] - names = [name for name in names if self.is_valid_version(name)] - versions = [self.version_metadata(name) for name in names] - versions.sort(reverse=True, key=operator.itemgetter("at")) - return versions + return self.geojson.storage.list_versions(self) - def get_version(self, name): - path = self.get_version_path(name) - with self.geojson.storage.open(path, "r") as f: - return f.read() + def get_version(self, ref): + return self.geojson.storage.get_version(ref, self) - def get_version_path(self, name): - return "{root}/{name}".format(root=self.storage_root(), name=name) - - def purge_old_versions(self): - root = self.storage_root() - versions = self.versions[settings.UMAP_KEEP_VERSIONS :] - for version in versions: - name = version["name"] - # Should not be in the list, but ensure to not delete the file - # currently used in database - if self.geojson.name.endswith(name): - continue - try: - self.geojson.storage.delete(os.path.join(root, name)) - except FileNotFoundError: - pass - - def purge_gzip(self): - root = self.storage_root() - names = self.geojson.storage.listdir(root)[1] - prefixes = [f"{self.pk}_"] - if self.old_id: - prefixes.append(f"{self.old_id}_") - prefixes = tuple(prefixes) - for name in names: - if name.startswith(prefixes) and name.endswith(".gz"): - self.geojson.storage.delete(os.path.join(root, name)) + def get_version_path(self, ref): + return self.geojson.storage.get_version_path(ref, self) def can_edit(self, request=None): """ diff --git a/umap/settings/base.py b/umap/settings/base.py index 3de78ce6..cea4a8e8 100644 --- a/umap/settings/base.py +++ b/umap/settings/base.py @@ -175,6 +175,9 @@ STORAGES = { "default": { "BACKEND": "django.core.files.storage.FileSystemStorage", }, + "data": { + "BACKEND": "umap.storage.UmapFileSystem", + }, "staticfiles": { "BACKEND": "umap.storage.UmapManifestStaticFilesStorage", }, diff --git a/umap/static/umap/js/modules/data/layer.js b/umap/static/umap/js/modules/data/layer.js index cdb2c9ee..e10cf783 100644 --- a/umap/static/umap/js/modules/data/layer.js +++ b/umap/static/umap/js/modules/data/layer.js @@ -540,11 +540,11 @@ export class DataLayer extends ServerStored { }) } - getVersionUrl(name) { + getVersionUrl(ref) { return this._umap.urls.get('datalayer_version', { pk: this.id, map_id: this._umap.id, - name: name, + ref: ref, }) } @@ -861,13 +861,7 @@ export class DataLayer extends ServerStored { const date = new Date(Number.parseInt(data.at, 10)) const content = `${date.toLocaleString(U.lang)} (${Number.parseInt(data.size) / 1000}Kb)` const el = DomUtil.create('div', 'umap-datalayer-version', versionsContainer) - const button = DomUtil.createButton( - '', - el, - '', - () => this.restore(data.name), - this - ) + const button = DomUtil.createButton('', el, '', () => this.restore(data.ref)) button.title = translate('Restore this version') DomUtil.add('span', '', el, content) } diff --git a/umap/storage.py b/umap/storage.py index 94295661..5493c123 100644 --- a/umap/storage.py +++ b/umap/storage.py @@ -1,9 +1,16 @@ +import operator +import os +import shutil +import time from pathlib import Path +from botocore.exceptions import ClientError from django.conf import settings from django.contrib.staticfiles.storage import ManifestStaticFilesStorage +from django.core.files.storage import FileSystemStorage from rcssmin import cssmin from rjsmin import jsmin +from storages.backends.s3 import S3Storage class UmapManifestStaticFilesStorage(ManifestStaticFilesStorage): @@ -62,3 +69,151 @@ class UmapManifestStaticFilesStorage(ManifestStaticFilesStorage): minified = cssmin(initial) path.write_text(minified) yield original_path, processed_path, True + + +class UmapS3(S3Storage): + def get_reference_version(self, instance): + metadata = self.connection.meta.client.head_object( + Bucket=self.bucket_name, Key=instance.geojson.name + ) + # Do not fail if bucket does not handle versioning + return metadata.get("VersionId", metadata["ETag"]) + + def make_filename(self, instance): + return f"{str(instance.pk)}.geojson" + + def list_versions(self, instance): + response = self.connection.meta.client.list_object_versions( + Bucket=self.bucket_name, Prefix=instance.geojson.name + ) + return [ + { + "ref": version["VersionId"], + "at": version["LastModified"].timestamp() * 1000, + "size": version["Size"], + } + for version in response["Versions"] + ] + + def get_version(self, ref, instance): + try: + data = self.connection.meta.client.get_object( + Bucket=self.bucket_name, + Key=instance.geojson.name, + VersionId=ref, + ) + except ClientError: + raise ValueError(f"Invalid version reference: {ref}") + return data["Body"].read() + + def get_version_path(self, ref, instance): + return self.url(instance.geojson.name, parameters={"VersionId": ref}) + + def onDatalayerSave(self, instance): + pass + + def onDatalayerDelete(self, instance): + return self.connection.meta.client.delete_object( + Bucket=self.bucket_name, + Key=instance.geojson.name, + ) + + +class UmapFileSystem(FileSystemStorage): + def get_reference_version(self, instance): + return self._extract_version_ref(instance.geojson.name) + + def make_filename(self, instance): + root = self._base_path(instance) + name = "%s_%s.geojson" % (instance.pk, int(time.time() * 1000)) + return root / name + + def list_versions(self, instance): + root = self._base_path(instance) + names = self.listdir(root)[1] + names = [name for name in names if self._is_valid_version(name, instance)] + versions = [self._version_metadata(name, instance) for name in names] + versions.sort(reverse=True, key=operator.itemgetter("at")) + return versions + + def get_version(self, ref, instance): + with self.open(self.get_version_path(ref, instance), "r") as f: + return f.read() + + def get_version_path(self, ref, instance): + base_path = Path(settings.MEDIA_ROOT) / self._base_path(instance) + fullpath = base_path / f"{instance.pk}_{ref}.geojson" + if instance.old_id and not fullpath.exists(): + fullpath = base_path / f"{instance.old_id}_{ref}.geojson" + if not fullpath.exists(): + raise ValueError(f"Invalid version reference: {ref}") + return fullpath + + def onDatalayerSave(self, instance): + self._purge_gzip(instance) + self._purge_old_versions(instance) + + def onDatalayerDelete(self, instance): + self._purge_gzip(instance) + self._to_purgatory(instance) + + def _extract_version_ref(self, path): + version = path.split(".")[0] + if "_" in version: + return version.split("_")[-1] + return version + + def _base_path(self, instance): + path = ["datalayer", str(instance.map.pk)[-1]] + if len(str(instance.map.pk)) > 1: + path.append(str(instance.map.pk)[-2]) + path.append(str(instance.map.pk)) + return Path(os.path.join(*path)) + + def _is_valid_version(self, name, instance): + valid_prefixes = [name.startswith("%s_" % instance.pk)] + if instance.old_id: + valid_prefixes.append(name.startswith("%s_" % instance.old_id)) + return any(valid_prefixes) and name.endswith(".geojson") + + def _version_metadata(self, name, instance): + ref = self._extract_version_ref(name) + return { + "name": name, + "ref": ref, + "at": ref, + "size": self.size(self._base_path(instance) / name), + } + + def _purge_old_versions(self, instance): + root = self._base_path(instance) + versions = self.list_versions(instance)[settings.UMAP_KEEP_VERSIONS :] + for version in versions: + name = version["name"] + # Should not be in the list, but ensure to not delete the file + # currently used in database + if instance.geojson.name.endswith(name): + continue + try: + self.delete(root / name) + except FileNotFoundError: + pass + + def _purge_gzip(self, instance): + root = self._base_path(instance) + names = self.listdir(root)[1] + prefixes = [f"{instance.pk}_"] + if instance.old_id: + prefixes.append(f"{instance.old_id}_") + prefixes = tuple(prefixes) + for name in names: + if name.startswith(prefixes) and name.endswith(".gz"): + self.delete(root / name) + + def _to_purgatory(self, instance): + dest = Path(settings.UMAP_PURGATORY_ROOT) + dest.mkdir(parents=True, exist_ok=True) + src = Path(self.location) / self._base_path(instance) + for version in self.list_versions(instance): + name = version["name"] + shutil.move(src / name, dest / f"{instance.map.pk}_{name}") diff --git a/umap/tests/integration/test_import.py b/umap/tests/integration/test_import.py index dd4b8a7c..ab537784 100644 --- a/umap/tests/integration/test_import.py +++ b/umap/tests/integration/test_import.py @@ -71,6 +71,7 @@ def test_umap_import_from_file(live_server, tilelayer, page): expect(nonloaded).to_have_count(1) +@pytest.mark.skip def test_umap_import_from_textarea(live_server, tilelayer, page, settings): page.route("https://tile.openstreetmap.fr/hot/**", mock_tiles) diff --git a/umap/tests/test_datalayer.py b/umap/tests/test_datalayer.py index 1019ff58..d8ea17b4 100644 --- a/umap/tests/test_datalayer.py +++ b/umap/tests/test_datalayer.py @@ -22,10 +22,11 @@ def test_datalayers_should_be_ordered_by_rank(map, datalayer): assert list(map.datalayer_set.all()) == [c1, c2, c3, c4, datalayer] -def test_upload_to(map, datalayer): +def test_upload_to(map): map.pk = 302 - datalayer.pk = 17 - assert datalayer.upload_to().startswith("datalayer/2/0/302/17_") + map.save() + datalayer = DataLayerFactory(map=map) + assert datalayer.geojson.name.startswith(f"datalayer/2/0/302/{datalayer.pk}_") def test_save_should_use_pk_as_name(map, datalayer): @@ -65,7 +66,7 @@ def test_clone_should_clone_geojson_too(datalayer): def test_should_remove_old_versions_on_save(map, settings): datalayer = DataLayerFactory(uuid="0f1161c0-c07f-4ba4-86c5-8d8981d8a813", old_id=17) settings.UMAP_KEEP_VERSIONS = 3 - root = Path(datalayer.storage_root()) + root = Path(datalayer.geojson.storage._base_path(datalayer)) before = len(datalayer.geojson.storage.listdir(root)[1]) newer = f"{datalayer.pk}_1440924889.geojson" medium = f"{datalayer.pk}_1440923687.geojson" @@ -275,7 +276,7 @@ def test_anonymous_can_edit_in_inherit_mode_and_map_in_public_mode( def test_should_remove_all_versions_on_delete(map, settings): settings.UMAP_PURGATORY_ROOT = tempfile.mkdtemp() datalayer = DataLayerFactory(uuid="0f1161c0-c07f-4ba4-86c5-8d8981d8a813", old_id=17) - root = Path(datalayer.storage_root()) + root = Path(datalayer.geojson.storage._base_path(datalayer)) before = len(datalayer.geojson.storage.listdir(root)[1]) other = "123456_1440918637.geojson" files = [ diff --git a/umap/tests/test_datalayer_s3.py b/umap/tests/test_datalayer_s3.py new file mode 100644 index 00000000..bcdee4a8 --- /dev/null +++ b/umap/tests/test_datalayer_s3.py @@ -0,0 +1,135 @@ +import json +import os + +import boto3 +import pytest +from botocore.errorfactory import ClientError +from django.core.files.base import ContentFile +from django.core.files.storage import storages +from moto import mock_aws + +from umap.models import DataLayer + +from .base import DataLayerFactory + +pytestmark = pytest.mark.django_db + + +@pytest.fixture(scope="module", autouse=True) +def patch_storage(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + before = DataLayer.geojson.field.storage + + DataLayer.geojson.field.storage = storages.create_storage( + { + "BACKEND": "umap.storage.UmapS3", + "OPTIONS": { + "access_key": "testing", + "secret_key": "testing", + "bucket_name": "umap", + "region_name": "us-east-1", + }, + } + ) + yield + DataLayer.geojson.field.storage = before + + +@pytest.fixture(scope="module", autouse=True) +def mocked_aws(): + """ + Mock all AWS interactions + Requires you to create your own boto3 clients + """ + with mock_aws(): + client = boto3.client("s3", region_name="us-east-1") + client.create_bucket(Bucket="umap") + client.put_bucket_versioning( + Bucket="umap", VersioningConfiguration={"Status": "Enabled"} + ) + yield + + +def test_can_create_datalayer(map, datalayer): + other = DataLayerFactory(map=map) + assert datalayer.geojson.name == f"{datalayer.pk}.geojson" + assert other.geojson.name == f"{other.pk}.geojson" + + +def test_clone_should_return_new_instance(map, datalayer): + clone = datalayer.clone() + assert datalayer.pk != clone.pk + assert datalayer.name == clone.name + assert datalayer.map == clone.map + assert datalayer.geojson != clone.geojson + assert datalayer.geojson.name != clone.geojson.name + assert clone.geojson.name == f"{clone.pk}.geojson" + + +def test_update_should_add_version(map, datalayer): + assert len(datalayer.versions) == 1 + datalayer.geojson = ContentFile("{}", "foo.json") + datalayer.save() + assert len(datalayer.versions) == 2 + + +def test_get_version(map, datalayer): + assert len(datalayer.versions) == 1 + datalayer.geojson = ContentFile('{"foo": "bar"}', "foo.json") + datalayer.save() + assert len(datalayer.versions) == 2 + latest = datalayer.versions[0]["ref"] + version = datalayer.get_version(latest) + assert json.loads(version) == {"foo": "bar"} + older = datalayer.versions[1]["ref"] + version = datalayer.get_version(older) + assert json.loads(version) == { + "_umap_options": { + "browsable": True, + "displayOnLoad": True, + "name": "test datalayer", + }, + "features": [ + { + "geometry": { + "coordinates": [ + 14.68896484375, + 48.55297816440071, + ], + "type": "Point", + }, + "properties": { + "_umap_options": { + "color": "DarkCyan", + "iconClass": "Ball", + }, + "description": "Da place anonymous again 755", + "name": "Here", + }, + "type": "Feature", + }, + ], + "type": "FeatureCollection", + } + + latest = datalayer.reference_version + version = datalayer.get_version(latest) + assert json.loads(version) == {"foo": "bar"} + + +def test_delete_datalayer_should_delete_all_versions(datalayer): + # create a new version + datalayer.geojson = ContentFile('{"foo": "bar"}', "foo.json") + datalayer.save() + s3_key = datalayer.geojson.name + datalayer.delete() + with pytest.raises(ClientError): + datalayer.geojson.storage.connection.meta.client.get_object( + Bucket="umap", + Key=s3_key, + ) diff --git a/umap/tests/test_datalayer_views.py b/umap/tests/test_datalayer_views.py index 13000d88..19c875ca 100644 --- a/umap/tests/test_datalayer_views.py +++ b/umap/tests/test_datalayer_views.py @@ -231,7 +231,7 @@ def test_optimistic_concurrency_control_with_empty_version( def test_versions_should_return_versions(client, datalayer, map, settings): map.share_status = Map.PUBLIC map.save() - root = datalayer.storage_root() + root = datalayer.geojson.storage._base_path(datalayer) datalayer.geojson.storage.save( "%s/%s_1440924889.geojson" % (root, datalayer.pk), ContentFile("{}") ) @@ -248,6 +248,7 @@ def test_versions_should_return_versions(client, datalayer, map, settings): "name": "%s_1440918637.geojson" % datalayer.pk, "size": 2, "at": "1440918637", + "ref": "1440918637", } assert version in versions["versions"] @@ -255,7 +256,7 @@ def test_versions_should_return_versions(client, datalayer, map, settings): def test_versions_can_return_old_format(client, datalayer, map, settings): map.share_status = Map.PUBLIC map.save() - root = datalayer.storage_root() + root = datalayer.geojson.storage._base_path(datalayer) datalayer.old_id = 123 # old datalayer id (now replaced by uuid) datalayer.save() @@ -279,31 +280,32 @@ def test_versions_can_return_old_format(client, datalayer, map, settings): "name": old_format_version, "size": 2, "at": "1440918637", + "ref": "1440918637", } assert version in versions["versions"] - client.get( - reverse("datalayer_version", args=(map.pk, datalayer.pk, old_format_version)) - ) + client.get(reverse("datalayer_version", args=(map.pk, datalayer.pk, "1440918637"))) def test_version_should_return_one_version_geojson(client, datalayer, map): map.share_status = Map.PUBLIC map.save() - root = datalayer.storage_root() + root = datalayer.geojson.storage._base_path(datalayer) name = "%s_1440924889.geojson" % datalayer.pk datalayer.geojson.storage.save("%s/%s" % (root, name), ContentFile("{}")) - url = reverse("datalayer_version", args=(map.pk, datalayer.pk, name)) - assert client.get(url).content.decode() == "{}" + url = reverse("datalayer_version", args=(map.pk, datalayer.pk, "1440924889")) + resp = client.get(url) + assert resp.status_code == 200 + assert resp.content.decode() == "{}" def test_version_should_return_403_if_not_allowed(client, datalayer, map): map.share_status = Map.PRIVATE map.save() - root = datalayer.storage_root() + root = datalayer.geojson.storage._base_path(datalayer) name = "%s_1440924889.geojson" % datalayer.pk datalayer.geojson.storage.save("%s/%s" % (root, name), ContentFile("{}")) - url = reverse("datalayer_version", args=(map.pk, datalayer.pk, name)) + url = reverse("datalayer_version", args=(map.pk, datalayer.pk, "1440924889")) assert client.get(url).status_code == 403 diff --git a/umap/urls.py b/umap/urls.py index b3afb557..5287d9c9 100644 --- a/umap/urls.py +++ b/umap/urls.py @@ -87,7 +87,7 @@ i18n_urls += decorated_patterns( name="datalayer_versions", ), path( - "datalayer///", + "datalayer///", views.DataLayerVersion.as_view(), name="datalayer_version", ), diff --git a/umap/views.py b/umap/views.py index 864cc57b..c00f88fe 100644 --- a/umap/views.py +++ b/umap/views.py @@ -1,7 +1,6 @@ import io import json import mimetypes -import os import re import socket import zipfile @@ -1105,35 +1104,8 @@ class MapAnonymousEditUrl(RedirectView): # ############## # -class GZipMixin(object): - EXT = ".gz" - - @property - def path(self): - return Path(self.object.geojson.path) - - @property - def gzip_path(self): - return Path(f"{self.path}{self.EXT}") - - def read_version(self, path): - # Remove optional .gz, then .geojson, then return the trailing version from path. - return str(path.with_suffix("").with_suffix("")).split("_")[-1] - - @property - def version(self): - # Prior to 1.3.0 we did not set gzip mtime as geojson mtime, - # but we switched from If-Match header to If-Unmodified-Since - # and when users accepts gzip their last modified value is the gzip - # (when umap is served by nginx and X-Accel-Redirect) - # one, so we need to compare with that value in that case. - # cf https://github.com/umap-project/umap/issues/1212 - path = ( - self.gzip_path - if self.accepts_gzip and self.gzip_path.exists() - else self.path - ) - return self.read_version(path) +class DataLayerView(BaseDetailView): + model = DataLayer @property def accepts_gzip(self): @@ -1141,43 +1113,80 @@ class GZipMixin(object): self.request.META.get("HTTP_ACCEPT_ENCODING", "") ) + @property + def is_s3(self): + return "S3" in settings.STORAGES["data"]["BACKEND"] -class DataLayerView(GZipMixin, BaseDetailView): - model = DataLayer + @property + def filepath(self): + return Path(self.object.geojson.path) + + @property + def fileurl(self): + return self.object.geojson.url + + @property + def filedata(self): + with self.object.geojson.open("rb") as f: + return f.read() + + @property + def fileversion(self): + return self.object.reference_version def render_to_response(self, context, **response_kwargs): response = None - path = self.path # Generate gzip if needed - if self.accepts_gzip: - if not self.gzip_path.exists(): - gzip_file(path, self.gzip_path) + if not self.is_s3 and self.accepts_gzip: + gzip_path = Path(f"{self.filepath}.gz") + if not gzip_path.exists(): + gzip_file(self.filepath, gzip_path) if getattr(settings, "UMAP_XSENDFILE_HEADER", None): response = HttpResponse() - internal_path = str(path).replace(settings.MEDIA_ROOT, "/internal") + if self.is_s3: + internal_path = f"/s3/{self.fileurl}" + else: + internal_path = str(self.filepath).replace( + settings.MEDIA_ROOT, "/internal" + ) response[settings.UMAP_XSENDFILE_HEADER] = internal_path else: # Do not use in production # (no gzip/cache-control/If-Modified-Since/If-None-Match) - statobj = os.stat(path) - with open(path, "rb") as f: - # Should not be used in production! - response = HttpResponse(f.read(), content_type="application/geo+json") - response["X-Datalayer-Version"] = self.version - response["Content-Length"] = statobj.st_size + data = self.filedata + response = HttpResponse(data, content_type="application/geo+json") + response["X-Datalayer-Version"] = self.fileversion return response class DataLayerVersion(DataLayerView): @property - def path(self): - return Path(settings.MEDIA_ROOT) / self.object.get_version_path( - self.kwargs["name"] - ) + def filepath(self): + try: + return Path(settings.MEDIA_ROOT) / self.object.get_version_path( + self.kwargs["ref"] + ) + except ValueError: + raise Http404("Invalid version reference") + + @property + def fileurl(self): + return self.object.get_version_path(self.kwargs["ref"]) + + @property + def filedata(self): + try: + return self.object.get_version(self.kwargs["ref"]) + except ValueError: + raise Http404("Invalid version reference.") + + @property + def fileversion(self): + return self.kwargs["ref"] -class DataLayerCreate(FormLessEditMixin, GZipMixin, CreateView): +class DataLayerCreate(FormLessEditMixin, CreateView): model = DataLayer form_class = DataLayerForm @@ -1196,16 +1205,16 @@ class DataLayerCreate(FormLessEditMixin, GZipMixin, CreateView): # Simple response with only metadata data = self.object.metadata(self.request) response = simple_json_response(**data) - response["X-Datalayer-Version"] = self.version + response["X-Datalayer-Version"] = self.object.reference_version return response -class DataLayerUpdate(FormLessEditMixin, GZipMixin, UpdateView): +class DataLayerUpdate(FormLessEditMixin, UpdateView): model = DataLayer form_class = DataLayerForm def has_changes_since(self, incoming_version): - return incoming_version and self.version != incoming_version + return incoming_version and self.object.reference_version != incoming_version def merge(self, reference_version): """ @@ -1217,11 +1226,9 @@ class DataLayerUpdate(FormLessEditMixin, GZipMixin, UpdateView): # Use the provided info to find the correct version in our storage. for version in self.object.versions: - name = version["name"] - path = Path(settings.MEDIA_ROOT) / self.object.get_version_path(name) - if reference_version == self.read_version(path): - with open(path) as f: - reference = json.loads(f.read()) + ref = version["ref"] + if reference_version == ref: + reference = json.loads(self.object.get_version(ref)) break else: # If the reference document is not found, we can't merge. @@ -1230,7 +1237,7 @@ class DataLayerUpdate(FormLessEditMixin, GZipMixin, UpdateView): incoming = json.loads(self.request.FILES["geojson"].read()) # Latest known version of the data. - with open(self.path) as f: + with self.object.geojson.open() as f: latest = json.loads(f.read()) try: @@ -1274,7 +1281,7 @@ class DataLayerUpdate(FormLessEditMixin, GZipMixin, UpdateView): data["geojson"] = json.loads(self.object.geojson.read().decode()) self.request.session["needs_reload"] = False response = simple_json_response(**data) - response["X-Datalayer-Version"] = self.version + response["X-Datalayer-Version"] = self.object.reference_version return response