Report exceptions raised in document conversion

Exceptions raised during the document conversion process would be
silently hidden. This was because ThreadPoolExecuter created various
threads and hid any exceptions raised.

Fixes #309
This commit is contained in:
deeplow 2023-01-17 12:09:48 +00:00
parent b9dc882663
commit 78fa35fb64
No known key found for this signature in database
GPG key ID: 577982871529A52A
2 changed files with 24 additions and 4 deletions

View file

@ -200,3 +200,6 @@ class Document:
if not isinstance(other, Document):
return False
return self.input_filename == other.input_filename
def __hash__(self) -> int:
return hash(self.id)

View file

@ -1,4 +1,3 @@
import concurrent.futures
import gzip
import json
import logging
@ -7,7 +6,9 @@ import platform
import shutil
import subprocess
import sys
from typing import Callable, List, Optional
import traceback
from concurrent.futures import Future, ThreadPoolExecutor
from typing import Callable, Dict, List, Optional
import appdirs
import colorama
@ -66,8 +67,24 @@ class DangerzoneCore(object):
)
max_jobs = container.get_max_parallel_conversions()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
executor.map(convert_doc, self.documents)
with ThreadPoolExecutor(max_workers=max_jobs) as executor:
conversions: Dict[Document, Future] = {}
# Start all parallel conversions
for document in self.get_unconverted_documents():
conversion = executor.submit(convert_doc, document)
conversions[document] = conversion
# Check the results to raise any exceptions that may have happened
for document in conversions:
try:
conversion = conversions[document]
conversion.result()
except Exception as e:
log.error(
f"Something unexpected happened when converting document '{document.id}': {e}"
)
traceback.print_exception(type(e), e, e.__traceback__)
def get_unconverted_documents(self) -> List[Document]:
return [doc for doc in self.documents if doc.is_unconverted()]