From e9cbc9070f7ff7a8cbea9c7a5b8ceaa2260737c5 Mon Sep 17 00:00:00 2001 From: Mia A Date: Sun, 4 Aug 2024 16:33:09 -0700 Subject: [PATCH 1/4] Add deduplicate conflict mode to only skip actual duplicates --- organize/actions/common/conflict.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/organize/actions/common/conflict.py b/organize/actions/common/conflict.py index 21daea68..ac8225c8 100644 --- a/organize/actions/common/conflict.py +++ b/organize/actions/common/conflict.py @@ -2,6 +2,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Literal, NamedTuple +import filecmp from organize.output import Output from organize.resource import Resource @@ -11,7 +12,7 @@ from jinja2 import Template # TODO: keep_newer, keep_older, keep_bigger, keep_smaller -ConflictMode = Literal["skip", "overwrite", "trash", "rename_new", "rename_existing"] +ConflictMode = Literal["skip", "overwrite", "deduplicate", "trash", "rename_new", "rename_existing"] class ConflictResult(NamedTuple): @@ -103,6 +104,17 @@ def _print(msg: str): delete(path=dst) return ConflictResult(skip_action=False, use_dst=dst) + + elif conflict_mode == "deduplicate": + if filecmp.cmp(res.path, dst, shallow=True): + _print(f"Duplicate skipped.") + return ConflictResult(skip_action=True, use_dst=res.path) + else: + new_path = next_free_name( + dst=dst, + template=rename_template, + ) + return ConflictResult(skip_action=False, use_dst=new_path) elif conflict_mode == "rename_new": new_path = next_free_name( From 31aeec5fba764f54599831f016d5189c01d886a7 Mon Sep 17 00:00:00 2001 From: Mia A Date: Sun, 4 Aug 2024 16:49:04 -0700 Subject: [PATCH 2/4] Add unit tests for deduplicate conflicts --- tests/actions/test_copy.py | 38 ++++++++++++++++++++++++++++++++++++++ tests/actions/test_move.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/tests/actions/test_copy.py b/tests/actions/test_copy.py index 6f788573..7ca9fd1c 100644 --- a/tests/actions/test_copy.py +++ b/tests/actions/test_copy.py @@ -136,6 +136,44 @@ def test_copy_conflict(fs, mode, result): Config.from_string(config).execute(simulate=False) assert read_files("test") == result +def test_copy_deduplicate_conflict(fs): + files = { + "src.txt": "src", + "duplicate": { + "src.txt": "src", + }, + "nonduplicate": { + "src.txt": "src2", + }, + } + + config = """ + rules: + - locations: "/test" + subfolders: true + filters: + - name: src + actions: + - copy: + dest: "/test/dst.txt" + on_conflict: deduplicate + """ + make_files(files, "test") + + Config.from_string(config).execute(simulate=False) + result = read_files("test") + + assert result == { + "src.txt": "src", + "duplicate": { + "src.txt": "src", + }, + "nonduplicate": { + "src.txt": "src2", + }, + "dst.txt": "src", + "dst 2.txt": "src2", + } def test_does_not_create_folder_in_simulation(fs): config = """ diff --git a/tests/actions/test_move.py b/tests/actions/test_move.py index fae28a17..691ac77d 100644 --- a/tests/actions/test_move.py +++ b/tests/actions/test_move.py @@ -55,6 +55,41 @@ def test_move_conflict(fs, mode, result): Config.from_string(config).execute(simulate=False) assert read_files("test") == result +def test_move_deduplicate_conflict(fs): + files = { + "src.txt": "src", + "duplicate": { + "src.txt": "src", + }, + "nonduplicate": { + "src.txt": "src2", + }, + } + + config = """ + rules: + - locations: "/test" + subfolders: true + filters: + - name: src + actions: + - move: + dest: "/test/dst.txt" + on_conflict: deduplicate + """ + make_files(files, "test") + + Config.from_string(config).execute(simulate=False) + result = read_files("test") + + assert result == { + "duplicate": { + "src.txt": "src", + }, + "nonduplicate": {}, + "dst.txt": "src", + "dst 2.txt": "src2", + } def test_move_folder_conflict(fs): make_files( From 6eb29e921a9844c9ce7205a003db0e148dc625f2 Mon Sep 17 00:00:00 2001 From: Mia A Date: Sun, 4 Aug 2024 17:01:36 -0700 Subject: [PATCH 3/4] Update documentation to explain deduplicate --- CHANGELOG.md | 1 + docs/actions.md | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39fdab60..3fae7a0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Added a new conflict mode `deduplicate` which skips duplicate files amd renames non-duplicates ## v3.2.5 (2024-07-09) diff --git a/docs/actions.md b/docs/actions.md index 7350fd65..3fd74234 100644 --- a/docs/actions.md +++ b/docs/actions.md @@ -58,6 +58,21 @@ rules: on_conflict: overwrite ``` +Use a placeholder to copy all .pdf files into a "PDF" folder and all .jpg files into a "JPG" folder. If two files share the same file name and are duplicates, the duplicate will be skipped. If they aren't duplicates, the second file will be renamed. + +```yaml +rules: + - locations: ~/Desktop + filters: + - extension: + - pdf + - jpg + actions: + - copy: + dest: "~/Desktop/{extension.upper()}/" + on_conflict: deduplicate +``` + Copy into the folder `Invoices`. Keep the filename but do not overwrite existing files. To prevent overwriting files, an index is added to the filename, so `somefile.jpg` becomes `somefile 2.jpg`. The counter separator is `' '` by default, but can be changed using the `counter_separator` property. From 5fb83975649cae85231a02f6cd2275bed46b300e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 00:41:12 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- organize/actions/common/conflict.py | 10 ++++++---- tests/actions/test_copy.py | 4 +++- tests/actions/test_move.py | 4 +++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/organize/actions/common/conflict.py b/organize/actions/common/conflict.py index ac8225c8..efca88ce 100644 --- a/organize/actions/common/conflict.py +++ b/organize/actions/common/conflict.py @@ -1,8 +1,8 @@ from __future__ import annotations +import filecmp from pathlib import Path from typing import TYPE_CHECKING, Literal, NamedTuple -import filecmp from organize.output import Output from organize.resource import Resource @@ -12,7 +12,9 @@ from jinja2 import Template # TODO: keep_newer, keep_older, keep_bigger, keep_smaller -ConflictMode = Literal["skip", "overwrite", "deduplicate", "trash", "rename_new", "rename_existing"] +ConflictMode = Literal[ + "skip", "overwrite", "deduplicate", "trash", "rename_new", "rename_existing" +] class ConflictResult(NamedTuple): @@ -104,10 +106,10 @@ def _print(msg: str): delete(path=dst) return ConflictResult(skip_action=False, use_dst=dst) - + elif conflict_mode == "deduplicate": if filecmp.cmp(res.path, dst, shallow=True): - _print(f"Duplicate skipped.") + _print("Duplicate skipped.") return ConflictResult(skip_action=True, use_dst=res.path) else: new_path = next_free_name( diff --git a/tests/actions/test_copy.py b/tests/actions/test_copy.py index 7ca9fd1c..3b38ac35 100644 --- a/tests/actions/test_copy.py +++ b/tests/actions/test_copy.py @@ -136,6 +136,7 @@ def test_copy_conflict(fs, mode, result): Config.from_string(config).execute(simulate=False) assert read_files("test") == result + def test_copy_deduplicate_conflict(fs): files = { "src.txt": "src", @@ -162,7 +163,7 @@ def test_copy_deduplicate_conflict(fs): Config.from_string(config).execute(simulate=False) result = read_files("test") - + assert result == { "src.txt": "src", "duplicate": { @@ -175,6 +176,7 @@ def test_copy_deduplicate_conflict(fs): "dst 2.txt": "src2", } + def test_does_not_create_folder_in_simulation(fs): config = """ rules: diff --git a/tests/actions/test_move.py b/tests/actions/test_move.py index 691ac77d..fbacc035 100644 --- a/tests/actions/test_move.py +++ b/tests/actions/test_move.py @@ -55,6 +55,7 @@ def test_move_conflict(fs, mode, result): Config.from_string(config).execute(simulate=False) assert read_files("test") == result + def test_move_deduplicate_conflict(fs): files = { "src.txt": "src", @@ -81,7 +82,7 @@ def test_move_deduplicate_conflict(fs): Config.from_string(config).execute(simulate=False) result = read_files("test") - + assert result == { "duplicate": { "src.txt": "src", @@ -91,6 +92,7 @@ def test_move_deduplicate_conflict(fs): "dst 2.txt": "src2", } + def test_move_folder_conflict(fs): make_files( {