-
Notifications
You must be signed in to change notification settings - Fork 78
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a remote command for batch duplicate finding. #1524
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* Copyright (C) 2024 The Geeqie Team | ||
* | ||
* Author: Marcin Owsiany <[email protected]> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 2 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along | ||
* with this program; if not, write to the Free Software Foundation, Inc., | ||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
* | ||
* | ||
* Helper class for computing equivalence sets of pictures. | ||
* | ||
*/ | ||
|
||
#include "pic.h" | ||
|
||
pic::pic(char const *cname): name(cname), equivalent{name}, sim(NULL) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
GError *err = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In geeqie this variable is almost always called |
||
GdkPixbuf *buf = gdk_pixbuf_new_from_file(cname, &err); | ||
if (buf == NULL) { | ||
fprintf(stderr, "Unable to read file %s: %s\n", cname, err->message); | ||
g_error_free(err); | ||
return; | ||
} | ||
sim = image_sim_new_from_pixbuf(buf); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider moving |
||
g_object_unref(buf); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using |
||
} | ||
|
||
pic::~pic() { | ||
if (sim != NULL) image_sim_free(sim); | ||
} | ||
|
||
int operator<(const pic &a, const pic &b) | ||
{ | ||
return a.name < b.name; | ||
} | ||
|
||
gdouble pic::compare(const pic & other) | ||
{ | ||
if (sim == NULL || other.sim == NULL) | ||
return 0.0; | ||
return 100.0 * image_sim_compare(sim, other.sim); | ||
} |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,42 @@ | ||||||||
/* | ||||||||
* Copyright (C) 2024 The Geeqie Team | ||||||||
* | ||||||||
* Author: Marcin Owsiany <[email protected]> | ||||||||
* | ||||||||
* This program is free software; you can redistribute it and/or modify | ||||||||
* it under the terms of the GNU General Public License as published by | ||||||||
* the Free Software Foundation; either version 2 of the License, or | ||||||||
* (at your option) any later version. | ||||||||
* | ||||||||
* This program is distributed in the hope that it will be useful, | ||||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||||||
* GNU General Public License for more details. | ||||||||
* | ||||||||
* You should have received a copy of the GNU General Public License along | ||||||||
* with this program; if not, write to the Free Software Foundation, Inc., | ||||||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||||
* | ||||||||
* | ||||||||
* Helper class for computing equivalence sets of pictures. | ||||||||
* | ||||||||
*/ | ||||||||
|
||||||||
#include <set> | ||||||||
#include <string> | ||||||||
#include <gdk-pixbuf/gdk-pixbuf.h> | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
#include <glib/gtypes.h> | ||||||||
|
||||||||
#include "similar.h" | ||||||||
|
||||||||
class pic { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
public: | ||||||||
pic(char const *cname); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This class violates the rule of 3/5/0. See: |
||||||||
~pic(); | ||||||||
gdouble compare(const pic&); | ||||||||
std::string name; | ||||||||
std::set<std::string> equivalent; | ||||||||
private: | ||||||||
ImageSimilarityData *sim; | ||||||||
friend int operator<(const pic &a, const pic &b); | ||||||||
}; |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -21,9 +21,11 @@ | |||||
|
||||||
#include "remote.h" | ||||||
|
||||||
#include <stdio.h> | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Already included as |
||||||
#include <sys/socket.h> | ||||||
#include <sys/stat.h> | ||||||
#include <sys/un.h> | ||||||
#include <sys/wait.h> | ||||||
#include <unistd.h> | ||||||
|
||||||
#include <algorithm> | ||||||
|
@@ -32,6 +34,8 @@ | |||||
#include <cstdio> | ||||||
#include <cstdlib> | ||||||
#include <cstring> | ||||||
#include <map> | ||||||
#include <memory> | ||||||
#include <vector> | ||||||
|
||||||
#include <gtk/gtk.h> | ||||||
|
@@ -57,8 +61,10 @@ | |||||
#include "main.h" | ||||||
#include "misc.h" | ||||||
#include "options.h" | ||||||
#include "pic.h" | ||||||
#include "pixbuf-renderer.h" | ||||||
#include "rcfile.h" | ||||||
#include "similar.h" | ||||||
#include "slideshow.h" | ||||||
#include "typedefs.h" | ||||||
#include "ui-fileops.h" | ||||||
|
@@ -682,6 +688,102 @@ static void gr_slideshow_delay(const gchar *text, GIOChannel *, gpointer) | |||||
options->slideshow.delay = static_cast<gint>(n * 10.0 + 0.01); | ||||||
} | ||||||
|
||||||
static void gr_duplicates_threshold(const gchar *text, GIOChannel *, gpointer) | ||||||
{ | ||||||
gint thresh; | ||||||
gint n; | ||||||
gint res; | ||||||
|
||||||
res = sscanf(text, "%d", &thresh); | ||||||
if (res == 1) | ||||||
{ | ||||||
n = thresh; | ||||||
if (n < 0 || n > 100) | ||||||
{ | ||||||
printf_term(TRUE, "Image similarity threshold out of range (%d to %d)\n", 0, 100); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Range limits are duplicated on line 701. Consider moving them to constants. |
||||||
return; | ||||||
} | ||||||
} | ||||||
else | ||||||
{ | ||||||
n = 99; | ||||||
} | ||||||
|
||||||
options->duplicates_similarity_threshold = static_cast<guint>(n); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like |
||||||
DEBUG_0("threshold set to %d", options->duplicates_similarity_threshold); | ||||||
} | ||||||
|
||||||
static void gr_duplicates_program(const gchar *text, GIOChannel *, gpointer) | ||||||
{ | ||||||
g_strdup(options->duplicates_program); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
options->duplicates_program = g_strdup(text); | ||||||
DEBUG_0("duplicates program set to \"%s\"", options->duplicates_program); | ||||||
} | ||||||
|
||||||
static void gr_process_duplicates(const gchar *, GIOChannel *, gpointer data) | ||||||
{ | ||||||
auto remote_data = static_cast<RemoteData *>(data); | ||||||
std::map<std::string, std::unique_ptr<pic>> pics; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use tab for indentation. |
||||||
GList *work = remote_data->file_list; | ||||||
while (work) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using |
||||||
{ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please fix braces indentation all over PR. |
||||||
FileData *fd = static_cast<FileData *>(work->data); | ||||||
std::string name(fd->path); | ||||||
pics[name] = std::unique_ptr<pic>(new pic(fd->path)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should generally be using For context when referring to language docs, Geeqie is using C++14 (as can be seen in the |
||||||
work = work->next; | ||||||
} | ||||||
DEBUG_1("processing %d files in set", pics.size()); | ||||||
|
||||||
// Compute similarity score for every pair, build equivalence sets. | ||||||
for (auto a = pics.begin(); a != pics.end(); ++a) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use meaningful variable names for temporaries. Maybe |
||||||
auto b = a; | ||||||
b++; | ||||||
for (; b != pics.end(); ++b) { | ||||||
double similarity = a->second->compare(*b->second); | ||||||
DEBUG_1("%s vs %s: %f", a->second->name.c_str(), b->second->name.c_str(), similarity); | ||||||
if (similarity < options->duplicates_similarity_threshold) | ||||||
continue; | ||||||
a->second->equivalent.insert(b->second->equivalent.begin(), b->second->equivalent.end()); | ||||||
for (auto const &f: a->second->equivalent) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meaningful variable name |
||||||
pics[f]->equivalent.insert(a->second->equivalent.begin(), a->second->equivalent.end()); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
std::set<std::string> printed; | ||||||
for (auto const &f: pics) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meaningful variable name |
||||||
if (f.second->equivalent.size() < 2) | ||||||
// skip this pic if not similar to any other one but itself | ||||||
continue; | ||||||
if (printed.find(f.second->name) != printed.end()) | ||||||
// skip this pic if it was already printed (when processing a similar image) | ||||||
continue; | ||||||
std::vector<const char *> cmd; | ||||||
cmd.push_back(options->duplicates_program); | ||||||
for (auto const &e: f.second->equivalent) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meaningful variable name |
||||||
cmd.push_back(e.c_str()); | ||||||
printed.insert(e); | ||||||
} | ||||||
cmd.push_back(NULL); | ||||||
pid_t pid = fork(); | ||||||
if (pid == -1) { | ||||||
perror("fork"); | ||||||
exit(1); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 541 in 8cdab99
With that said, it's probably a lot safer to use an API like https://docs.gtk.org/gio/ctor.Subprocess.new.html instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Geeqie uses |
||||||
} else if (pid == 0) { | ||||||
execvp(const_cast<char *>(cmd[0]), const_cast<char **>(&(cmd[0]))); | ||||||
perror("execv"); | ||||||
exit(1); | ||||||
} else { | ||||||
int status; | ||||||
wait(&status); | ||||||
if (!WIFEXITED(status) || WEXITSTATUS(status)!=0) { | ||||||
fprintf(stderr, "subprocess failed, aborting\n"); | ||||||
exit(1); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
static void gr_tools_show(const gchar *, GIOChannel *, gpointer) | ||||||
{ | ||||||
gboolean popped; | ||||||
|
@@ -1718,6 +1820,8 @@ static RemoteCommandEntry remote_commands[] = { | |||||
{ nullptr, "--cache-shared=", gr_cache_shared, TRUE, FALSE, N_("clean|clear"), N_("clean or clear shared thumbnail cache") }, | ||||||
{ nullptr, "--cache-thumbs=", gr_cache_thumb, TRUE, FALSE, N_("clean|clear"), N_("clean or clear thumbnail cache") }, | ||||||
{ "-d", "--delay=", gr_slideshow_delay, TRUE, FALSE, N_("<[H:][M:][N][.M]>"), N_("set slide show delay to Hrs Mins N.M seconds") }, | ||||||
{ nullptr, "--duplicates-program=", gr_duplicates_program, TRUE, FALSE, N_("<PROGRAM>"), N_("run program with each identified set of duplicate images") }, | ||||||
{ nullptr, "--duplicates-threshold=", gr_duplicates_threshold, TRUE, FALSE, N_("<N>"), N_("set similarity threshold for what is considered a duplicate") }, | ||||||
{ nullptr, "--first", gr_image_first, FALSE, FALSE, nullptr, N_("first image") }, | ||||||
{ "-f", "--fullscreen", gr_fullscreen_toggle, FALSE, TRUE, nullptr, N_("toggle full screen") }, | ||||||
{ nullptr, "--file=", gr_file_load, TRUE, FALSE, N_("<FILE>|<URL>"), N_("open FILE or URL, bring Geeqie window to the top") }, | ||||||
|
@@ -1749,6 +1853,7 @@ static RemoteCommandEntry remote_commands[] = { | |||||
{ "-n", "--next", gr_image_next, FALSE, FALSE, nullptr, N_("next image") }, | ||||||
{ nullptr, "--pixel-info", gr_pixel_info, FALSE, FALSE, nullptr, N_("print pixel info of mouse pointer on current image") }, | ||||||
{ nullptr, "--print0", gr_print0, TRUE, FALSE, nullptr, N_("terminate returned data with null character instead of newline") }, | ||||||
{ "-p", "--process-duplicates", gr_process_duplicates, FALSE, FALSE, nullptr, N_("group duplicate pictures in current collection and process them") }, | ||||||
{ nullptr, "--PWD=", gr_pwd, TRUE, FALSE, N_("<PWD>"), N_("use PWD as working directory for following commands") }, | ||||||
{ "-q", "--quit", gr_quit, FALSE, FALSE, nullptr, N_("quit") }, | ||||||
{ nullptr, "--raise", gr_raise, FALSE, FALSE, nullptr, N_("bring the Geeqie window to the top") }, | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.