Fix problems introduced by duplicates (#137)

* Throw an exception when duplicates are found * When dupes are found, append a "-dedupe" to them. * Change the way product refs are being generated. Add a salt of randomness to avoid conflicts.
2025-04-28 11:32:38 +02:00 · 2022-11-10 15:21:04 +01:00 · 2022-11-10 15:21:04 +01:00 · ced93b0f69
commit ced93b0f69
parent 2b0459e94a
2 changed files with 47 additions and 1 deletions
--- a/copanier/models.py
+++ b/copanier/models.py
@ -1,6 +1,7 @@
 import inspect
 import threading
 import uuid
+from collections import Counter
 from datetime import datetime, timedelta
 from dataclasses import dataclass, field, asdict
 from pathlib import Path
@ -425,11 +426,52 @@ class Delivery(PersistedBase):
        path = cls.get_root() / f"{id}.yml"
        if not path.exists():
            raise DoesNotExist
+
+        def _dedupe_products(raw_data):
+            """On some rare occasions, different products get
+               the same identifier (ref).
+
+               This function finds them and appends "-dedupe" to it.
+               This is not ideal but fixes the problem before it causes more
+               trouble (such as https://github.com/spiral-project/copanier/issues/136)
+
+               This function returns True if dupes have been found.
+            """
+            if ('products' not in raw_data) or len(raw_data['products']) < 1:
+                return False
+            
+            products = raw_data['products']
+
+            counter = Counter([p['ref'] for p in products])
+            most_common = counter.most_common(1)[0]
+            number_of_dupes = most_common[1]
+
+            if number_of_dupes < 2:
+                return False
+            
+            dupe_id = most_common[0]
+            # Reconstruct the products list but change the duplicated ID.
+            counter = 0
+            new_products = []
+            for product in products:
+                ref = product['ref']
+                if ref == dupe_id:
+                    counter = counter + 1
+                    if counter == number_of_dupes: # Only change the last occurence.
+                        product['ref'] = f'{ref}-dedupe'
+                new_products.append(product)
+            raw_data['products'] = new_products
+            return True
+
        data = yaml.safe_load(path.read_text())
+        dupe_found = _dedupe_products(data)
        # Tolerate extra fields (but we'll lose them if instance is persisted)
        data = {k: v for k, v in data.items() if k in cls.__dataclass_fields__}
        delivery = cls(**data)
        delivery.id = id
+
+        if dupe_found:
+            delivery.persist()
        return delivery

    @classmethod
--- a/copanier/views/products.py
+++ b/copanier/views/products.py
@ -1,5 +1,8 @@
 from datetime import datetime

+import random
+import string
+
 from slugify import slugify
 from .core import app
 from ..models import Delivery, Product, Producer
@ -153,7 +156,8 @@ async def create_product(request, response, delivery_id, producer_id):
        product.producer = producer_id
        form = request.form
        product.update_from_form(form)
-        product.ref = slugify(f"{producer_id}-{product.name}-{product.unit}")
+        random_string = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+        product.ref = slugify(f"{producer_id}-{product.name}-{product.unit}-{random_string}")

        delivery.products.append(product)
        delivery.persist()