chore: make S3 dependencies optional

This commit is contained in:
Yohan Boniface 2024-12-11 19:16:17 +01:00
parent c25ee5e29c
commit 8b65674e0a
10 changed files with 241 additions and 228 deletions

View file

@ -10,7 +10,7 @@ This can be configured through the `STORAGES` settings. uMap will use three keys
but by default uses a custom storage that will add hash to the filenames, to be sure they but by default uses a custom storage that will add hash to the filenames, to be sure they
are not kept in any cache after a release are not kept in any cache after a release
- `data`, used to store the layers data. This one should follow the uMap needs, and currently - `data`, used to store the layers data. This one should follow the uMap needs, and currently
uMap provides only two options: `umap.storage.UmapFileSystem` and `umap.storage.UmapS3` uMap provides only two options: `umap.storage.fs.FSDataStorage` and `umap.storage.s3.S3DataStorage`
## Default settings: ## Default settings:
@ -22,10 +22,10 @@ STORAGES = {
"BACKEND": "django.core.files.storage.FileSystemStorage", "BACKEND": "django.core.files.storage.FileSystemStorage",
}, },
"data": { "data": {
"BACKEND": "umap.storage.UmapFileSystem", "BACKEND": "umap.storage.fs.FSDataStorage",
}, },
"staticfiles": { "staticfiles": {
"BACKEND": "umap.storage.UmapManifestStaticFilesStorage", "BACKEND": "umap.storage.staticfiles.UmapManifestStaticFilesStorage",
}, },
} }
``` ```
@ -43,7 +43,7 @@ STORAGES = {
"BACKEND": "django.core.files.storage.FileSystemStorage", "BACKEND": "django.core.files.storage.FileSystemStorage",
}, },
"data": { "data": {
"BACKEND": "umap.storage.UmapS3", "BACKEND": "umap.storage.s3.S3DataStorage",
"OPTIONS": { "OPTIONS": {
"access_key": "xxx", "access_key": "xxx",
"secret_key": "yyy", "secret_key": "yyy",
@ -53,7 +53,7 @@ STORAGES = {
}, },
}, },
"staticfiles": { "staticfiles": {
"BACKEND": "umap.storage.UmapManifestStaticFilesStorage", "BACKEND": "umap.storage.staticfiles.UmapManifestStaticFilesStorage",
}, },
} }
``` ```

View file

@ -2,7 +2,7 @@ from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from umap.models import DataLayer from umap.models import DataLayer
from umap.storage import UmapFileSystem from umap.storage.fs import FSDataStorage
class Command(BaseCommand): class Command(BaseCommand):
@ -11,9 +11,9 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
assert settings.UMAP_READONLY, "You must run that script with a read-only uMap." assert settings.UMAP_READONLY, "You must run that script with a read-only uMap."
assert ( assert (
settings.STORAGES["data"]["BACKEND"] == "umap.storage.UmapS3" settings.STORAGES["data"]["BACKEND"] == "umap.storage.s3.S3DataStorage"
), "You must configure your storages to point to S3" ), "You must configure your storages to point to S3"
fs_storage = UmapFileSystem() fs_storage = FSDataStorage()
for datalayer in DataLayer.objects.all(): for datalayer in DataLayer.objects.all():
geojson_fs_path = str(datalayer.geojson) geojson_fs_path = str(datalayer.geojson)
try: try:

View file

@ -176,10 +176,10 @@ STORAGES = {
"BACKEND": "django.core.files.storage.FileSystemStorage", "BACKEND": "django.core.files.storage.FileSystemStorage",
}, },
"data": { "data": {
"BACKEND": "umap.storage.UmapFileSystem", "BACKEND": "umap.storage.fs.FSDataStorage",
}, },
"staticfiles": { "staticfiles": {
"BACKEND": "umap.storage.UmapManifestStaticFilesStorage", "BACKEND": "umap.storage.staticfiles.UmapManifestStaticFilesStorage",
}, },
} }
# Add application/json and application/geo+json to default django-storages setting # Add application/json and application/geo+json to default django-storages setting

View file

@ -1,216 +0,0 @@
import operator
import os
import shutil
import time
from gzip import GzipFile
from pathlib import Path
from botocore.exceptions import ClientError
from django.conf import settings
from django.contrib.staticfiles.storage import ManifestStaticFilesStorage
from django.core.files.storage import FileSystemStorage
from rcssmin import cssmin
from rjsmin import jsmin
from storages.backends.s3 import S3Storage
class UmapManifestStaticFilesStorage(ManifestStaticFilesStorage):
support_js_module_import_aggregation = True
max_post_process_passes = 15
# We remove `;` at the end of all regexps to match our biome config.
_js_module_import_aggregation_patterns = (
"*.js",
(
(
(
r"""(?P<matched>import(?s:(?P<import>[\s\{].*?))"""
r"""\s*from\s*['"](?P<url>[\.\/].*?)["']\s*)"""
),
'import%(import)s from "%(url)s"\n',
),
(
(
r"""(?P<matched>export(?s:(?P<exports>[\s\{].*?))"""
r"""\s*from\s*["'](?P<url>[\.\/].*?)["']\s*)"""
),
'export%(exports)s from "%(url)s"\n',
),
(
r"""(?P<matched>import\s*['"](?P<url>[\.\/].*?)["']\s*)""",
'import"%(url)s"\n',
),
(
r"""(?P<matched>import\(["'](?P<url>.*?)["']\)\.then)""",
"""import("%(url)s").then""",
),
(
r"""(?P<matched>await import\(["'](?P<url>.*?)["']\))""",
"""await import("%(url)s")""",
),
),
)
def post_process(self, paths, **options):
collected = super().post_process(paths, **options)
for original_path, processed_path, processed in collected:
if isinstance(processed, Exception):
print("Error with file", original_path)
raise processed
if processed_path.endswith(".js"):
path = Path(settings.STATIC_ROOT) / processed_path
initial = path.read_text()
if "sourceMappingURL" not in initial: # Already minified.
minified = jsmin(initial)
path.write_text(minified)
if processed_path.endswith(".css"):
path = Path(settings.STATIC_ROOT) / processed_path
initial = path.read_text()
if "sourceMappingURL" not in initial: # Already minified.
minified = cssmin(initial)
path.write_text(minified)
yield original_path, processed_path, True
class UmapS3(S3Storage):
gzip = True
def get_reference_version(self, instance):
metadata = self.connection.meta.client.head_object(
Bucket=self.bucket_name, Key=instance.geojson.name
)
# Do not fail if bucket does not handle versioning
return metadata.get("VersionId", metadata["ETag"])
def make_filename(self, instance):
return f"{str(instance.pk)}.geojson"
def list_versions(self, instance):
response = self.connection.meta.client.list_object_versions(
Bucket=self.bucket_name, Prefix=instance.geojson.name
)
return [
{
"ref": version["VersionId"],
"at": version["LastModified"].timestamp() * 1000,
"size": version["Size"],
}
for version in response["Versions"]
]
def get_version(self, ref, instance):
try:
data = self.connection.meta.client.get_object(
Bucket=self.bucket_name,
Key=instance.geojson.name,
VersionId=ref,
)
except ClientError:
raise ValueError(f"Invalid version reference: {ref}")
return GzipFile(mode="r", fileobj=data["Body"]).read()
def get_version_path(self, ref, instance):
return self.url(instance.geojson.name, parameters={"VersionId": ref})
def onDatalayerSave(self, instance):
pass
def onDatalayerDelete(self, instance):
return self.connection.meta.client.delete_object(
Bucket=self.bucket_name,
Key=instance.geojson.name,
)
class UmapFileSystem(FileSystemStorage):
def get_reference_version(self, instance):
return self._extract_version_ref(instance.geojson.name)
def make_filename(self, instance):
root = self._base_path(instance)
name = "%s_%s.geojson" % (instance.pk, int(time.time() * 1000))
return root / name
def list_versions(self, instance):
root = self._base_path(instance)
names = self.listdir(root)[1]
names = [name for name in names if self._is_valid_version(name, instance)]
versions = [self._version_metadata(name, instance) for name in names]
versions.sort(reverse=True, key=operator.itemgetter("at"))
return versions
def get_version(self, ref, instance):
with self.open(self.get_version_path(ref, instance), "r") as f:
return f.read()
def get_version_path(self, ref, instance):
base_path = Path(settings.MEDIA_ROOT) / self._base_path(instance)
fullpath = base_path / f"{instance.pk}_{ref}.geojson"
if instance.old_id and not fullpath.exists():
fullpath = base_path / f"{instance.old_id}_{ref}.geojson"
if not fullpath.exists():
raise ValueError(f"Invalid version reference: {ref}")
return fullpath
def onDatalayerSave(self, instance):
self._purge_gzip(instance)
self._purge_old_versions(instance, keep=settings.UMAP_KEEP_VERSIONS)
def onDatalayerDelete(self, instance):
self._purge_gzip(instance)
self._purge_old_versions(instance, keep=None)
def _extract_version_ref(self, path):
version = path.split(".")[0]
if "_" in version:
return version.split("_")[-1]
return version
def _base_path(self, instance):
path = ["datalayer", str(instance.map.pk)[-1]]
if len(str(instance.map.pk)) > 1:
path.append(str(instance.map.pk)[-2])
path.append(str(instance.map.pk))
return Path(os.path.join(*path))
def _is_valid_version(self, name, instance):
valid_prefixes = [name.startswith("%s_" % instance.pk)]
if instance.old_id:
valid_prefixes.append(name.startswith("%s_" % instance.old_id))
return any(valid_prefixes) and name.endswith(".geojson")
def _version_metadata(self, name, instance):
ref = self._extract_version_ref(name)
return {
"name": name,
"ref": ref,
"at": ref,
"size": self.size(self._base_path(instance) / name),
}
def _purge_old_versions(self, instance, keep=None):
root = self._base_path(instance)
versions = self.list_versions(instance)
if keep is not None:
versions = versions[keep:]
for version in versions:
name = version["name"]
# Should not be in the list, but ensure to not delete the file
# currently used in database
if keep is not None and instance.geojson.name.endswith(name):
continue
try:
self.delete(root / name)
except FileNotFoundError:
pass
def _purge_gzip(self, instance):
root = self._base_path(instance)
names = self.listdir(root)[1]
prefixes = [f"{instance.pk}_"]
if instance.old_id:
prefixes.append(f"{instance.old_id}_")
prefixes = tuple(prefixes)
for name in names:
if name.startswith(prefixes) and name.endswith(".gz"):
self.delete(root / name)

3
umap/storage/__init__.py Normal file
View file

@ -0,0 +1,3 @@
# Retrocompat
from .staticfiles import UmapManifestStaticFilesStorage # noqa: F401

101
umap/storage/fs.py Normal file
View file

@ -0,0 +1,101 @@
import operator
import os
import time
from pathlib import Path
from django.conf import settings
from django.core.files.storage import FileSystemStorage
class FSDataStorage(FileSystemStorage):
def get_reference_version(self, instance):
return self._extract_version_ref(instance.geojson.name)
def make_filename(self, instance):
root = self._base_path(instance)
name = "%s_%s.geojson" % (instance.pk, int(time.time() * 1000))
return root / name
def list_versions(self, instance):
root = self._base_path(instance)
names = self.listdir(root)[1]
names = [name for name in names if self._is_valid_version(name, instance)]
versions = [self._version_metadata(name, instance) for name in names]
versions.sort(reverse=True, key=operator.itemgetter("at"))
return versions
def get_version(self, ref, instance):
with self.open(self.get_version_path(ref, instance), "r") as f:
return f.read()
def get_version_path(self, ref, instance):
base_path = Path(settings.MEDIA_ROOT) / self._base_path(instance)
fullpath = base_path / f"{instance.pk}_{ref}.geojson"
if instance.old_id and not fullpath.exists():
fullpath = base_path / f"{instance.old_id}_{ref}.geojson"
if not fullpath.exists():
raise ValueError(f"Invalid version reference: {ref}")
return fullpath
def onDatalayerSave(self, instance):
self._purge_gzip(instance)
self._purge_old_versions(instance, keep=settings.UMAP_KEEP_VERSIONS)
def onDatalayerDelete(self, instance):
self._purge_gzip(instance)
self._purge_old_versions(instance, keep=None)
def _extract_version_ref(self, path):
version = path.split(".")[0]
if "_" in version:
return version.split("_")[-1]
return version
def _base_path(self, instance):
path = ["datalayer", str(instance.map.pk)[-1]]
if len(str(instance.map.pk)) > 1:
path.append(str(instance.map.pk)[-2])
path.append(str(instance.map.pk))
return Path(os.path.join(*path))
def _is_valid_version(self, name, instance):
valid_prefixes = [name.startswith("%s_" % instance.pk)]
if instance.old_id:
valid_prefixes.append(name.startswith("%s_" % instance.old_id))
return any(valid_prefixes) and name.endswith(".geojson")
def _version_metadata(self, name, instance):
ref = self._extract_version_ref(name)
return {
"name": name,
"ref": ref,
"at": ref,
"size": self.size(self._base_path(instance) / name),
}
def _purge_old_versions(self, instance, keep=None):
root = self._base_path(instance)
versions = self.list_versions(instance)
if keep is not None:
versions = versions[keep:]
for version in versions:
name = version["name"]
# Should not be in the list, but ensure to not delete the file
# currently used in database
if keep is not None and instance.geojson.name.endswith(name):
continue
try:
self.delete(root / name)
except FileNotFoundError:
pass
def _purge_gzip(self, instance):
root = self._base_path(instance)
names = self.listdir(root)[1]
prefixes = [f"{instance.pk}_"]
if instance.old_id:
prefixes.append(f"{instance.old_id}_")
prefixes = tuple(prefixes)
for name in names:
if name.startswith(prefixes) and name.endswith(".gz"):
self.delete(root / name)

61
umap/storage/s3.py Normal file
View file

@ -0,0 +1,61 @@
from gzip import GzipFile
from django.core.exceptions import ImproperlyConfigured
try:
from botocore.exceptions import ClientError
from storages.backends.s3 import S3Storage
except ImportError:
raise ImproperlyConfigured(
"You need to install s3 dependencies: pip install umap-project[s3]"
)
class S3DataStorage(S3Storage):
gzip = True
def get_reference_version(self, instance):
metadata = self.connection.meta.client.head_object(
Bucket=self.bucket_name, Key=instance.geojson.name
)
# Do not fail if bucket does not handle versioning
return metadata.get("VersionId", metadata["ETag"])
def make_filename(self, instance):
return f"{str(instance.pk)}.geojson"
def list_versions(self, instance):
response = self.connection.meta.client.list_object_versions(
Bucket=self.bucket_name, Prefix=instance.geojson.name
)
return [
{
"ref": version["VersionId"],
"at": version["LastModified"].timestamp() * 1000,
"size": version["Size"],
}
for version in response["Versions"]
]
def get_version(self, ref, instance):
try:
data = self.connection.meta.client.get_object(
Bucket=self.bucket_name,
Key=instance.geojson.name,
VersionId=ref,
)
except ClientError:
raise ValueError(f"Invalid version reference: {ref}")
return GzipFile(mode="r", fileobj=data["Body"]).read()
def get_version_path(self, ref, instance):
return self.url(instance.geojson.name, parameters={"VersionId": ref})
def onDatalayerSave(self, instance):
pass
def onDatalayerDelete(self, instance):
return self.connection.meta.client.delete_object(
Bucket=self.bucket_name,
Key=instance.geojson.name,
)

View file

@ -0,0 +1,64 @@
from pathlib import Path
from django.conf import settings
from django.contrib.staticfiles.storage import ManifestStaticFilesStorage
from rcssmin import cssmin
from rjsmin import jsmin
class UmapManifestStaticFilesStorage(ManifestStaticFilesStorage):
support_js_module_import_aggregation = True
max_post_process_passes = 15
# We remove `;` at the end of all regexps to match our biome config.
_js_module_import_aggregation_patterns = (
"*.js",
(
(
(
r"""(?P<matched>import(?s:(?P<import>[\s\{].*?))"""
r"""\s*from\s*['"](?P<url>[\.\/].*?)["']\s*)"""
),
'import%(import)s from "%(url)s"\n',
),
(
(
r"""(?P<matched>export(?s:(?P<exports>[\s\{].*?))"""
r"""\s*from\s*["'](?P<url>[\.\/].*?)["']\s*)"""
),
'export%(exports)s from "%(url)s"\n',
),
(
r"""(?P<matched>import\s*['"](?P<url>[\.\/].*?)["']\s*)""",
'import"%(url)s"\n',
),
(
r"""(?P<matched>import\(["'](?P<url>.*?)["']\)\.then)""",
"""import("%(url)s").then""",
),
(
r"""(?P<matched>await import\(["'](?P<url>.*?)["']\))""",
"""await import("%(url)s")""",
),
),
)
def post_process(self, paths, **options):
collected = super().post_process(paths, **options)
for original_path, processed_path, processed in collected:
if isinstance(processed, Exception):
print("Error with file", original_path)
raise processed
if processed_path.endswith(".js"):
path = Path(settings.STATIC_ROOT) / processed_path
initial = path.read_text()
if "sourceMappingURL" not in initial: # Already minified.
minified = jsmin(initial)
path.write_text(minified)
if processed_path.endswith(".css"):
path = Path(settings.STATIC_ROOT) / processed_path
initial = path.read_text()
if "sourceMappingURL" not in initial: # Already minified.
minified = cssmin(initial)
path.write_text(minified)
yield original_path, processed_path, True

View file

@ -27,7 +27,7 @@ def patch_storage():
DataLayer.geojson.field.storage = storages.create_storage( DataLayer.geojson.field.storage = storages.create_storage(
{ {
"BACKEND": "umap.storage.UmapS3", "BACKEND": "umap.storage.s3.S3DataStorage",
"OPTIONS": { "OPTIONS": {
"access_key": "testing", "access_key": "testing",
"secret_key": "testing", "secret_key": "testing",

View file

@ -15,7 +15,7 @@ def staticfiles(settings):
# Make sure settings are properly reset after the test # Make sure settings are properly reset after the test
settings.STORAGES = deepcopy(settings.STORAGES) settings.STORAGES = deepcopy(settings.STORAGES)
settings.STORAGES["staticfiles"]["BACKEND"] = ( settings.STORAGES["staticfiles"]["BACKEND"] = (
"umap.storage.UmapManifestStaticFilesStorage" "umap.storage.staticfiles.UmapManifestStaticFilesStorage"
) )
try: try:
call_command("collectstatic", "--noinput") call_command("collectstatic", "--noinput")