Fix problems introduced by duplicates (#137)

* Throw an exception when duplicates are found

* When dupes are found, append a "-dedupe" to them.

* Change the way product refs are being generated.

Add a salt of randomness to avoid conflicts.
This commit is contained in:
Alexis Metaireau 2022-11-10 15:21:04 +01:00 committed by GitHub
parent 2b0459e94a
commit ced93b0f69
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 1 deletions

View file

@ -1,6 +1,7 @@
import inspect import inspect
import threading import threading
import uuid import uuid
from collections import Counter
from datetime import datetime, timedelta from datetime import datetime, timedelta
from dataclasses import dataclass, field, asdict from dataclasses import dataclass, field, asdict
from pathlib import Path from pathlib import Path
@ -425,11 +426,52 @@ class Delivery(PersistedBase):
path = cls.get_root() / f"{id}.yml" path = cls.get_root() / f"{id}.yml"
if not path.exists(): if not path.exists():
raise DoesNotExist raise DoesNotExist
def _dedupe_products(raw_data):
"""On some rare occasions, different products get
the same identifier (ref).
This function finds them and appends "-dedupe" to it.
This is not ideal but fixes the problem before it causes more
trouble (such as https://github.com/spiral-project/copanier/issues/136)
This function returns True if dupes have been found.
"""
if ('products' not in raw_data) or len(raw_data['products']) < 1:
return False
products = raw_data['products']
counter = Counter([p['ref'] for p in products])
most_common = counter.most_common(1)[0]
number_of_dupes = most_common[1]
if number_of_dupes < 2:
return False
dupe_id = most_common[0]
# Reconstruct the products list but change the duplicated ID.
counter = 0
new_products = []
for product in products:
ref = product['ref']
if ref == dupe_id:
counter = counter + 1
if counter == number_of_dupes: # Only change the last occurence.
product['ref'] = f'{ref}-dedupe'
new_products.append(product)
raw_data['products'] = new_products
return True
data = yaml.safe_load(path.read_text()) data = yaml.safe_load(path.read_text())
dupe_found = _dedupe_products(data)
# Tolerate extra fields (but we'll lose them if instance is persisted) # Tolerate extra fields (but we'll lose them if instance is persisted)
data = {k: v for k, v in data.items() if k in cls.__dataclass_fields__} data = {k: v for k, v in data.items() if k in cls.__dataclass_fields__}
delivery = cls(**data) delivery = cls(**data)
delivery.id = id delivery.id = id
if dupe_found:
delivery.persist()
return delivery return delivery
@classmethod @classmethod

View file

@ -1,5 +1,8 @@
from datetime import datetime from datetime import datetime
import random
import string
from slugify import slugify from slugify import slugify
from .core import app from .core import app
from ..models import Delivery, Product, Producer from ..models import Delivery, Product, Producer
@ -153,7 +156,8 @@ async def create_product(request, response, delivery_id, producer_id):
product.producer = producer_id product.producer = producer_id
form = request.form form = request.form
product.update_from_form(form) product.update_from_form(form)
product.ref = slugify(f"{producer_id}-{product.name}-{product.unit}") random_string = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
product.ref = slugify(f"{producer_id}-{product.name}-{product.unit}-{random_string}")
delivery.products.append(product) delivery.products.append(product)
delivery.persist() delivery.persist()