Fix archiving the same doc twice on Windows

On Windows, if we somehow attempt to archive the same document twice
(e.g, because it got archived once, and then we copy it back), we will
get an error, because Windows does not overwrite the target path, if it
already exists.

Fix this issue by always removing the previously archived version, when
performing the next archival action, and update our tests.
This commit is contained in:
Alex Pyrgiotis 2024-04-30 13:03:52 +03:00
parent 63b12abbdf
commit 1c70ee6771
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
2 changed files with 18 additions and 12 deletions

View file

@ -145,6 +145,8 @@ class Document:
new_file_path = archive_dir / old_file_path.name new_file_path = archive_dir / old_file_path.name
log.debug(f"Archiving doc {self.id} to {new_file_path}") log.debug(f"Archiving doc {self.id} to {new_file_path}")
Path.mkdir(archive_dir, exist_ok=True) Path.mkdir(archive_dir, exist_ok=True)
# On Windows, moving the file will fail if it already exists.
new_file_path.unlink(missing_ok=True)
old_file_path.rename(new_file_path) old_file_path.rename(new_file_path)
@property @property

View file

@ -97,24 +97,28 @@ def test_archive_unwriteable_dir(tmp_path: Path) -> None:
def test_archive(mocker: MagicMock, tmp_path: Path) -> None: def test_archive(mocker: MagicMock, tmp_path: Path) -> None:
test_string = "original file"
original_doc_path = str(tmp_path / "doc.pdf") original_doc_path = str(tmp_path / "doc.pdf")
archived_doc_path = str(tmp_path / ARCHIVE_SUBDIR / "doc.pdf") archived_doc_path = str(tmp_path / ARCHIVE_SUBDIR / "doc.pdf")
# write some content for later verifying content integrity # Perform the archival operation two times: one with no archive dir, and one with an
with open(original_doc_path, "w") as f: # archive dir.
f.write(test_string) test_strings = ["original file 1", "original file 2"]
for test_string in test_strings:
# write some content for later verifying content integrity
with open(original_doc_path, "w") as f:
f.write(test_string)
d = Document(original_doc_path, archive=True) # archive the document
d.archive() d = Document(original_doc_path, archive=True)
d.archive()
# original document has been moved to unsafe/doc.pdf # original document has been moved to unsafe/doc.pdf
assert not os.path.exists(original_doc_path) assert not os.path.exists(original_doc_path)
assert os.path.exists(archived_doc_path) assert os.path.exists(archived_doc_path)
# make sure it is the original file by comparing its content # make sure it is the proper file by comparing its content
with open(archived_doc_path) as f: with open(archived_doc_path) as f:
assert f.read() == test_string assert f.read() == test_string
def test_set_output_dir(sample_pdf: str, tmp_path: Path) -> None: def test_set_output_dir(sample_pdf: str, tmp_path: Path) -> None: