Fix archiving the same doc twice on Windows

On Windows, if we somehow attempt to archive the same document twice
(e.g, because it got archived once, and then we copy it back), we will
get an error, because Windows does not overwrite the target path, if it
already exists.

Fix this issue by always removing the previously archived version, when
performing the next archival action, and update our tests.
This commit is contained in:
Alex Pyrgiotis 2024-04-30 13:03:52 +03:00
parent 63b12abbdf
commit 1c70ee6771
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
2 changed files with 18 additions and 12 deletions

View file

@ -145,6 +145,8 @@ class Document:
new_file_path = archive_dir / old_file_path.name
log.debug(f"Archiving doc {self.id} to {new_file_path}")
Path.mkdir(archive_dir, exist_ok=True)
# On Windows, moving the file will fail if it already exists.
new_file_path.unlink(missing_ok=True)
old_file_path.rename(new_file_path)
@property

View file

@ -97,24 +97,28 @@ def test_archive_unwriteable_dir(tmp_path: Path) -> None:
def test_archive(mocker: MagicMock, tmp_path: Path) -> None:
test_string = "original file"
original_doc_path = str(tmp_path / "doc.pdf")
archived_doc_path = str(tmp_path / ARCHIVE_SUBDIR / "doc.pdf")
# write some content for later verifying content integrity
with open(original_doc_path, "w") as f:
f.write(test_string)
# Perform the archival operation two times: one with no archive dir, and one with an
# archive dir.
test_strings = ["original file 1", "original file 2"]
for test_string in test_strings:
# write some content for later verifying content integrity
with open(original_doc_path, "w") as f:
f.write(test_string)
d = Document(original_doc_path, archive=True)
d.archive()
# archive the document
d = Document(original_doc_path, archive=True)
d.archive()
# original document has been moved to unsafe/doc.pdf
assert not os.path.exists(original_doc_path)
assert os.path.exists(archived_doc_path)
# original document has been moved to unsafe/doc.pdf
assert not os.path.exists(original_doc_path)
assert os.path.exists(archived_doc_path)
# make sure it is the original file by comparing its content
with open(archived_doc_path) as f:
assert f.read() == test_string
# make sure it is the proper file by comparing its content
with open(archived_doc_path) as f:
assert f.read() == test_string
def test_set_output_dir(sample_pdf: str, tmp_path: Path) -> None: