From 493bc17199a46bc77846f47bf73c5065d6dd5b29 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 10:54:41 -0600 Subject: [PATCH 01/15] Add the initial Term Cleanup Feature --- .../Classifai/Admin/SimilarTermsListTable.php | 288 ++++ .../Admin/templates/classifai-header.php | 2 +- includes/Classifai/Features/TermCleanup.php | 1214 +++++++++++++++++ .../Features/TermCleanupEPIntegration.php | 252 ++++ .../Classifai/Services/ServicesManager.php | 1 + 5 files changed, 1756 insertions(+), 1 deletion(-) create mode 100644 includes/Classifai/Admin/SimilarTermsListTable.php create mode 100644 includes/Classifai/Features/TermCleanup.php create mode 100644 includes/Classifai/Features/TermCleanupEPIntegration.php diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php new file mode 100644 index 000000000..480578631 --- /dev/null +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -0,0 +1,288 @@ +taxonomy = $taxonomy; + + // Set parent defaults. + parent::__construct( + array( + 'singular' => 'similar_term', + 'plural' => 'similar_terms', + 'ajax' => false, + ) + ); + } + + /** + * Gets the list of columns. + * + * @return string[] Array of column titles keyed by their column name. + */ + public function get_columns() { + $tax = get_taxonomy( $this->taxonomy ); + $labels = get_taxonomy_labels( $tax ); + $label = $labels->singular_name ?? __( 'Term', 'classifai' ); + + return array( + 'term' => $label, + // translators: %s: Singular label of the taxonomy. + 'similar_term' => sprintf( __( 'Similar %s' ), $label ), + 'actions' => __( 'Actions', 'classifai' ), + ); + } + + /** + * Prepares the list of items for displaying. + */ + public function prepare_items() { + $per_page = $this->get_items_per_page( 'edit_post_per_page' ); + $current = $this->get_pagenum(); + $offset = ( $current - 1 ) * $per_page; + $columns = $this->get_columns(); + $hidden = array(); + $sortable = $this->get_sortable_columns(); + + $this->_column_headers = array( $columns, $hidden, $sortable ); + + $this->process_bulk_action(); + + $terms = get_terms( + [ + 'taxonomy' => $this->taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'number' => $per_page, + 'offset' => $offset, + ] + ); + + $items = []; + + foreach ( $terms as $term_id ) { + $similar_terms = get_term_meta( $term_id, 'classifai_similar_terms', true ); + + if ( ! $similar_terms ) { + continue; + } + + foreach ( $similar_terms as $k => $v ) { + $similar_term = get_term( $k ); + if ( $similar_term ) { + $items[] = [ + 'term' => get_term( $term_id ), + 'similar_term' => $similar_term, + 'score' => $v, + ]; + } else { + unset( $similar_terms[ $k ] ); + update_term_meta( $term_id, 'classifai_similar_terms', $similar_terms ); + } + } + + if ( empty( $similar_terms ) ) { + delete_term_meta( $term_id, 'classifai_similar_terms' ); + } + } + + $total = wp_count_terms( + [ + 'taxonomy' => $this->taxonomy, + 'hide_empty' => false, + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ] + ); + + $this->items = $items; + $this->set_pagination_args( + array( + 'total_items' => $total, // WE have to calculate the total number of items. + 'per_page' => $per_page, // WE have to determine how many items to show on a page. + 'total_pages' => ceil( $total / $per_page ), // WE have to calculate the total number of pages. + ) + ); + } + + /** + * Generate term html to show it in Similar terms list table + * + * @param WP_Term $term Term Object. + * @param WP_Term $similar_term Similar Term Object. + * @param float $score Similarity score. + * @return string + */ + public function generate_term_html( $term, $similar_term, $score = null ) { + $args = array( + 'action' => 'classifai_merge_term', + 'taxonomy' => $this->taxonomy, + 'from' => $similar_term->term_id, + 'to' => $term->term_id, + ); + $merge_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_merge_term' ) ); + + return sprintf( + // translators: %s: Term name, %d: Term ID. + __( '%1$s (ID: %2$d)

', 'classifai' ) . + // translators: %s: Term slug. + __( 'Slug: %3$s
', 'classifai' ) . + // translators: %s: Term count. + __( 'Count: %4$s
', 'classifai' ) . + // translators: %s: Term parent name. + __( 'Parent: %5$s
', 'classifai' ) . + // translators: %s: Similarity score. + ( $score ? __( 'Similarity: %6$s
', 'classifai' ) : '%6$s' ) . + // translators: %s: Term description. + __( 'Description: %7$s

', 'classifai' ) . + '%9$s', + esc_html( $term->name ), + esc_html( $term->term_id ), + esc_html( $term->slug ), + esc_html( $term->count ), + esc_html( $term->parent > 0 ? get_term( $term->parent )->name : 'None' ), + $score ? esc_html( round( ( $score - 1 ) * 100, 2 ) . '%' ) : '', + esc_html( $term->description ), + esc_url( $merge_url ), + esc_html__( 'Merge and keep this', 'classifai' ) + ); + } + + /** + * Handles the term column output. + * + * @param array $item The current term item. + */ + public function column_term( $item ) { + $term = $item['term']; + $similar_term = $item['similar_term']; + $this->last_item_id = $term->term_id; + + return $this->generate_term_html( $term, $similar_term ); + } + + /** + * Handles the similar term column output. + * + * @param array $item The current term item. + */ + public function column_similar_term( $item ) { + $term = $item['term']; + $similar_term = $item['similar_term']; + + return $this->generate_term_html( $similar_term, $term, $item['score'] ); + } + + /** + * Handles the term actions output. + * + * @param array $item The current term item. + */ + public function column_actions( $item ) { + $term = $item['term']; + $similar_term = $item['similar_term']; + + $args = array( + 'action' => 'classifai_skip_similar_term', + 'taxonomy' => $this->taxonomy, + 'term' => $term->term_id, + 'similar_term' => $similar_term->term_id, + ); + $skip_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_skip_similar_term' ) ); + + return sprintf( + "%s", + esc_url( $skip_url ), + esc_html__( 'Skip', 'classifai' ) + ); + } + + /** + * Generates content for a single row of the table + * + * @param array $item The current item. + * @param string $column_name The current column name. + */ + protected function column_default( $item, $column_name ) { + return esc_html( $item[ $column_name ] ); + } + + /** + * Generates custom table navigation to prevent conflicting nonces. + * + * @param string $which The location of the bulk actions: Either 'top' or 'bottom'. + */ + protected function display_tablenav( $which ) { + ?> +
+
+ bulk_actions( $which ); ?> +
+ extra_tablenav( $which ); + $this->pagination( $which ); + ?> +
+
+ last_item_id === $term->term_id ) { + $class .= ' skip'; + } + + echo ''; + $this->single_row_columns( $item ); + echo ''; + } +} diff --git a/includes/Classifai/Admin/templates/classifai-header.php b/includes/Classifai/Admin/templates/classifai-header.php index ea29a7fde..e15ad82f7 100644 --- a/includes/Classifai/Admin/templates/classifai-header.php +++ b/includes/Classifai/Admin/templates/classifai-header.php @@ -8,7 +8,7 @@ // phpcs:ignore WordPress.Security.NonceVerification.Recommended $active_page = isset( $_GET['tab'] ) ? sanitize_text_field( wp_unslash( $_GET['tab'] ) ) : 'classifai_settings'; // phpcs:ignore WordPress.Security.NonceVerification.Recommended -$is_setup_page = isset( $_GET['page'] ) && 'classifai_setup' === sanitize_text_field( wp_unslash( $_GET['page'] ) ); +$is_setup_page = isset( $_GET['page'] ) && ( 'classifai_setup' === sanitize_text_field( wp_unslash( $_GET['page'] ) ) || 'classifai-term-cleanup' === sanitize_text_field( wp_unslash( $_GET['page'] ) ) ); ?>
diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php new file mode 100644 index 000000000..01f82d447 --- /dev/null +++ b/includes/Classifai/Features/TermCleanup.php @@ -0,0 +1,1214 @@ +label = __( 'Term Cleanup', 'classifai' ); + + // Contains all providers that are registered to the service. + $this->provider_instances = $this->get_provider_instances( LanguageProcessing::get_service_providers() ); + + // Contains just the providers this feature supports. + $this->supported_providers = [ + OpenAIEmbeddings::ID => __( 'OpenAI Embeddings', 'classifai' ), + AzureEmbeddings::ID => __( 'Azure OpenAI Embeddings', 'classifai' ), + ]; + } + + /** + * Set up necessary hooks. + * + * This will always fire even if the Feature is not enabled. + */ + public function setup() { + parent::setup(); + + if ( $this->is_configured() && $this->is_enabled() ) { + // Check if ElasticPress plugin is installed and use EP selected. + if ( is_elasticpress_installed() && '1' === $this->get_settings( 'use_ep' ) ) { + $this->ep_integration = new TermCleanupEPIntegration( $this ); + $this->ep_integration->init(); + } + } + + $this->setting_page_url = admin_url( 'tools.php?page=classifai-term-cleanup' ); + // $this->background_process = new TermCleanupBackgroundProcess(); // TODO: Implement this class. + } + + /** + * Set up necessary hooks. + * + * This will only fire if the Feature is enabled. + */ + public function feature_setup() { + add_action( 'admin_enqueue_scripts', [ $this, 'enqueue_admin_assets' ] ); + + // Register the settings page for the Feature. + add_action( 'admin_menu', [ $this, 'register_admin_menu_item' ] ); + add_action( 'admin_post_classifai_init_term_cleanup', [ $this, 'start_term_cleanup_process' ] ); + add_action( 'admin_post_classifai_cancel_term_cleanup', [ $this, 'cancel_term_cleanup_process' ] ); + add_action( 'admin_post_classifai_merge_term', [ $this, 'merge_term' ] ); + add_action( 'admin_post_classifai_skip_similar_term', [ $this, 'skip_similar_term' ] ); + + // Ajax action handler + add_action( 'wp_ajax_classifai_get_term_cleanup_status', [ $this, 'get_term_cleanup_status' ] ); + + // Admin notices + add_action( 'admin_notices', [ $this, 'render_notices' ] ); + } + + /** + * Register a sub page under the Tools menu. + */ + public function register_admin_menu_item() { + // Don't register the menu if no taxonomies are enabled. + if ( empty( $this->get_all_feature_taxonomies() ) ) { + return; + } + + add_submenu_page( + 'tools.php', + __( 'Term Cleanup', 'classifai' ), + __( 'Term Cleanup', 'classifai' ), + 'manage_options', + 'classifai-term-cleanup', + [ $this, 'render_settings_page' ] + ); + } + + /** + * Render the settings page for the Term Cleanup Feature. + */ + public function render_settings_page() { + $active_tax = isset( $_GET['tax'] ) ? sanitize_text_field( wp_unslash( $_GET['tax'] ) ) : ''; // phpcs:ignore WordPress.Security.NonceVerification.Recommended + $all_taxonomies = $this->get_taxonomies(); + $taxonomies = $this->get_all_feature_taxonomies(); + ?> + +
+ +

+ +

+
+ + +
+

+ +
+
+

+ background_process && $this->background_process->is_queued() ) { + $this->render_background_processing_status( $active_tax ); + } else { + $plural_label = strtolower( $this->get_taxonomy_label( $active_tax, true ) ); + $singular_label = strtolower( $this->get_taxonomy_label( $active_tax, false ) ); + + // translators: %s: Taxonomy name. + $submit_label = sprintf( __( 'Find similar %s', 'classifai' ), esc_attr( $plural_label ) ); + ?> +

+ +

+
+
+ + + + +
+
+ +
+
+ render_similar_terms( $active_tax ); + ?> +
+
+
+
+
+ esc_url( admin_url( 'admin-ajax.php' ) ), + 'ajax_nonce' => wp_create_nonce( 'classifai-status' ), + ) + ); + } + + /** + * Get the description for the enable field. + * + * @return string + */ + public function get_enable_description(): string { + return esc_html__( 'A Term Cleanup page will be added under Tools that can be used to clean up terms.', 'classifai' ); + } + + /** + * Add any needed custom fields. + */ + public function add_custom_settings_fields() { + $settings = $this->get_settings(); + + if ( ! is_elasticpress_installed() ) { + add_settings_field( + 'use_ep', + esc_html__( 'Use ElasticPress', 'classifai' ), + [ $this, 'render_input' ], + $this->get_option_name(), + $this->get_option_name() . '_section', + [ + 'label_for' => 'use_ep', + 'input_type' => 'hidden', + 'default_value' => '0', + 'description' => sprintf( + // translators: %1$s: ElasticPress plugin link starting tag, %2$s: ending tag. + __( 'Install and activate the %1$sElasticPress%2$s plugin to use Elasticsearch for finding similar terms.', 'classifai' ), + '', + '' + ), + ] + ); + } else { + add_settings_field( + 'use_ep', + esc_html__( 'Use ElasticPress', 'classifai' ), + [ $this, 'render_input' ], + $this->get_option_name(), + $this->get_option_name() . '_section', + [ + 'label_for' => 'use_ep', + 'input_type' => 'checkbox', + 'default_value' => $settings['use_ep'], + 'description' => esc_html__( 'Use Elasticsearch for finding similar terms; this will speed up the process for finding similar terms.', 'classifai' ), + ] + ); + } + + $taxonomies = $this->get_taxonomies(); + + foreach ( $taxonomies as $name => $label ) { + add_settings_field( + $name, + esc_html( $label ), + [ $this, 'render_taxonomies_settings' ], + $this->get_option_name(), + $this->get_option_name() . '_section', + [ + 'name' => $name, + ] + ); + } + } + + /** + * Returns the default settings for the feature. + * + * @return array + */ + public function get_feature_default_settings(): array { + $tax_settings = []; + $taxonomies = $this->get_taxonomies(); + + foreach ( $taxonomies as $name => $label ) { + if ( 'category' === $name ) { + $tax_settings[ $name ] = true; + } else { + $tax_settings[ $name ] = false; + } + + $tax_settings[ "{$name}_threshold" ] = 75; + } + + $settings = [ + 'provider' => OpenAIEmbeddings::ID, + 'use_ep' => 1, + 'taxonomies' => $tax_settings, + ]; + + return $settings; + } + + /** + * Render the Taxonomies settings. + * + * @param array $args Settings for the inputs + */ + public function render_taxonomies_settings( array $args ) { + $name = $args['name']; + $option_index = 'taxonomies'; + $feature_args = [ + 'label_for' => $name, + 'option_index' => $option_index, + 'input_type' => 'checkbox', + ]; + + $threshold_args = [ + 'label_for' => "{$name}_threshold", + 'input_type' => 'number', + 'option_index' => $option_index, + 'default_value' => 75, + 'min' => 0, + 'max' => 100, + 'step' => 1, + ]; + ?> + + + + + +

+ render_input( $feature_args ); ?> + +

+ +

+
+ render_input( $threshold_args ); ?> +

+ get_feature_provider_instance(); + $meta_key = 'classifai_openai_embeddings'; + + if ( $provider instanceof AzureEmbeddings ) { + $meta_key = 'classifai_azure_openai_embeddings'; + } + + /** + * Filter the meta key for embeddings. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_embeddings_meta_key + * + * @param {string} $meta_key Meta key for embeddings. + * @param {TermCleanup} $this Feature instance. + * + * @return {string} Meta key for embeddings. + */ + return apply_filters( 'classifai_' . static::ID . '_embeddings_meta_key', $meta_key, $this ); + } + + /** + * Get all feature taxonomies. + * + * @return array + */ + public function get_all_feature_taxonomies(): array { + $taxonomies = $this->get_taxonomies(); + $settings = $this->get_settings( 'taxonomies' ); + + $enabled_taxonomies = []; + foreach ( $taxonomies as $name => $label ) { + if ( isset( $settings[ $name ] ) && (bool) $settings[ $name ] ) { + $enabled_taxonomies[] = $name; + } + } + + return $enabled_taxonomies; + } + + /** + * Return the list of taxonomies + * + * @return array + */ + public function get_taxonomies(): array { + $taxonomies = get_taxonomies( [], 'objects' ); + $taxonomies = array_filter( $taxonomies, 'is_taxonomy_viewable' ); + $supported = []; + + foreach ( $taxonomies as $taxonomy ) { + if ( 'post_format' === $taxonomy->name ) { + continue; + } + + $supported[ $taxonomy->name ] = $taxonomy->labels->name; + } + + /** + * Filter taxonomies shown in settings. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_setting_taxonomies + * + * @param {array} $supported Array of supported taxonomies. + * @param {object} $this Current instance of the class. + * + * @return {array} Array of taxonomies. + */ + return apply_filters( 'classifai_' . static::ID . '_setting_taxonomies', $supported, $this ); + } + + /** + * Start the term cleanup process. + */ + public function start_term_cleanup_process() { + if ( ! $this->background_process ) { + wp_die( esc_html__( 'Background processing not enabled.', 'classifai' ) ); + } + + if ( + empty( $_POST['classifai_term_cleanup_nonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_POST['classifai_term_cleanup_nonce'] ) ), 'classifai_term_cleanup' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + if ( ! $this->is_feature_enabled() ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $settings = $this->get_settings( 'taxonomies' ); + $taxonomy = isset( $_POST['classifai_term_cleanup_taxonomy'] ) ? sanitize_text_field( wp_unslash( $_POST['classifai_term_cleanup_taxonomy'] ) ) : ''; + $thresold = isset( $settings[ $taxonomy . '_threshold' ] ) ? absint( $settings[ $taxonomy . '_threshold' ] ) : 75; + + if ( empty( $taxonomy ) ) { + wp_die( esc_html__( 'Invalid taxonomy.', 'classifai' ) ); + } + + // Clear previously found similar terms. + $args = [ + 'taxonomy' => $taxonomy, + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ]; + + $terms = get_terms( $args ); + + if ( ! empty( $terms ) ) { + foreach ( $terms as $term_id ) { + delete_term_meta( $term_id, 'classifai_similar_terms' ); + } + } + + $this->background_process->push_to_queue( + array( + 'taxonomy' => $taxonomy, + 'thresold' => $thresold, + 'action' => 'term_cleanup', + 'embeddings_generated' => false, + 'processed' => 0, + 'term_id' => 0, + 'offset' => 0, + 'started_by' => get_current_user_id(), + ) + ); + + $this->add_notice( + __( 'Process for finding similar terms has started.', 'classifai' ), + 'info' + ); + + $this->background_process->save()->dispatch(); + + // Redirect back to the settings page. + wp_safe_redirect( add_query_arg( 'tax', $taxonomy, $this->setting_page_url ) ); + exit; + } + + /** + * Cancel the term cleanup process. + */ + public function cancel_term_cleanup_process() { + if ( ! $this->background_process ) { + wp_die( esc_html__( 'Background processing not enabled.', 'classifai' ) ); + } + + // Check the nonce for security + if ( + empty( $_GET['_wpnonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_GET['_wpnonce'] ) ), 'classifai_cancel_term_cleanup' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; + $this->background_process->cancel(); + + if ( $this->background_process->is_cancelled() ) { + // Add a notice to inform the user that the process will be cancelled soon. + $this->add_notice( + __( 'Process for the finding similar terms will be cancelled soon.', 'classifai' ), + 'info' + ); + } + + // Redirect back to the settings page. + wp_safe_redirect( add_query_arg( 'tax', $taxonomy, $this->setting_page_url ) ); + exit; + } + + /** + * Get the max number of terms to process. + * + * @return int + */ + public function get_max_terms(): int { + return 100; + } + + /** + * Generate embeddings for the terms. + * + * @param string $taxonomy Taxonomy to process. + * @return bool True if embeddings were generated, false otherwise. + */ + public function generate_embeddings( string $taxonomy ): bool { + $exclude = []; + + // Exclude the uncategorized term. + if ( 'category' === $taxonomy ) { + // Exclude the uncategorized term. + $uncat_term = get_term_by( 'name', 'Uncategorized', 'category' ); + if ( $uncat_term ) { + $exclude = [ $uncat_term->term_id ]; + } + } + + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + $args = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'NOT EXISTS', + 'number' => $this->get_max_terms(), + 'exclude' => $exclude, // phpcs:ignore WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_exclude + ]; + + $terms = get_terms( $args ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return false; + } + + $provider = $this->get_feature_provider_instance(); + + // Generate embedding data for each term. + foreach ( $terms as $term_id ) { + $provider->generate_embeddings_for_term( $term_id, false, $this ); + } + + return true; + } + + /** + * Get similar terms. + * + * @param string $taxonomy Taxonomy to process. + * @param int $thresold Thresold to consider terms as duplicates. + * @param array $args Additional arguments. + * @return array|bool + */ + public function get_similar_terms( string $taxonomy, int $thresold, array $args = [] ) { + if ( class_exists( '\\ElasticPress\\Feature' ) && '1' === $this->get_settings( 'use_ep' ) ) { + return $this->get_similar_terms_using_elasticpress( $taxonomy, $thresold, $args ); + } + + return $this->get_similar_terms_using_wpdb( $taxonomy, $thresold, $args ); + } + + /** + * Get similar terms using WPDB. + * + * This method is used to get similar terms using MySQL database. + * This method is slower than using ElasticPress but can be used + * when ElasticPress is not installed or not in use. + * + * @param string $taxonomy Taxonomy to process. + * @param int $thresold Thresold to consider terms as duplicates. + * @param array $args Additional arguments. + * @return array|bool + */ + public function get_similar_terms_using_wpdb( string $taxonomy, int $thresold, array $args = [] ) { + $processed = $args['processed'] ?? 0; + $term_id = $args['term_id'] ?? 0; + $offset = $args['offset'] ?? 0; + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + + if ( ! $term_id ) { + $params = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'number' => 1, + 'offset' => $processed, + ]; + + if ( is_taxonomy_hierarchical( $taxonomy ) ) { + $params['parent'] = 0; + } + + $terms = get_terms( $params ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return false; + } + + $term_id = $terms[0]; + $offset = 0; + $args['term_id'] = $term_id; + $args['offset'] = $offset; + } + + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + $term_embedding = get_term_meta( $term_id, $meta_key, true ); + + if ( 1 === count( $term_embedding ) ) { + $term_embedding = $term_embedding[0]; + } + + global $wpdb; + $limit = apply_filters( 'classifai_term_cleanup_compare_limit', 2000, $taxonomy ); + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + + // SQL query to retrieve term meta using joins + // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching -- Running a custom query to get 1k terms embeddings at a time. + $results = $wpdb->get_results( + $wpdb->prepare( + "SELECT DISTINCT t.term_id, tm.meta_value, tt.count + FROM {$wpdb->terms} AS t + INNER JOIN {$wpdb->term_taxonomy} AS tt ON t.term_id = tt.term_id + INNER JOIN {$wpdb->termmeta} AS tm ON t.term_id = tm.term_id + WHERE tt.taxonomy = %s + AND tm.meta_key = %s + AND t.term_id != %d + AND tt.parent = 0 + ORDER BY tt.count DESC + LIMIT %d OFFSET %d", + $taxonomy, + $meta_key, + $term_id, + $limit, + absint( $offset + $processed ) // Add the processed terms counts to the offset to skip already processed terms. + ) + ); + $count = count( $results ); + + $calculations = new EmbeddingCalculations(); + $similar_terms = []; + + foreach ( $results as $index => $result ) { + // Skip if the term is the same as the term we are comparing. + if ( $term_id === $result->term_id ) { + continue; + } + + $compare_term_id = $result->term_id; + $compare_embedding = maybe_unserialize( $result->meta_value ); + + if ( 1 === count( $compare_embedding ) ) { + $compare_embedding = $compare_embedding[0]; + } + + $similarity = $calculations->cosine_similarity( $term_embedding, $compare_embedding ); + if ( false !== $similarity && ( 1 - $similarity ) >= ( $thresold / 100 ) ) { + $similar_terms[ $compare_term_id ] = 1 - $similarity; + } + } + + if ( ! empty( $similar_terms ) ) { + $existing_similar_terms = get_term_meta( $term_id, 'classifai_similar_terms', true ); + + if ( is_array( $existing_similar_terms ) ) { + $similar_terms = $existing_similar_terms + $similar_terms; + } + + update_term_meta( $term_id, 'classifai_similar_terms', $similar_terms ); + } + + if ( $count < $limit ) { + $args['processed'] = $processed + 1; + $args['term_id'] = 0; + $args['offset'] = 0; + } else { + $args['offset'] = $offset + $limit; + } + + return $args; + } + + /** + * Get similar terms using Elasticsearch via ElasticPress. + * + * @param string $taxonomy Taxonomy to process. + * @param int $thresold Thresold to consider terms as duplicates. + * @param array $args Additional arguments. + * @return array|bool|WP_Error + */ + public function get_similar_terms_using_elasticpress( string $taxonomy, int $thresold, array $args = [] ) { + $processed = $args['processed'] ?? 0; + $meta_key = sanitize_text_field( $this->get_embeddings_meta_key() ); + + $params = [ + 'taxonomy' => $taxonomy, + 'orderby' => 'count', + 'order' => 'DESC', + 'hide_empty' => false, + 'fields' => 'ids', + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'number' => 10, + 'offset' => $processed, + ]; + + if ( is_taxonomy_hierarchical( $taxonomy ) ) { + $params['parent'] = 0; + } + + $terms = get_terms( $params ); + + if ( is_wp_error( $terms ) || empty( $terms ) ) { + return false; + } + + if ( ! $this->ep_integration ) { + $this->ep_integration = new TermCleanupEPIntegration( $this ); + } + + foreach ( $terms as $term_id ) { + // Find similar terms for the term. + $search_results = $this->ep_integration->exact_knn_search( $term_id, 'term', 500, $thresold ); + + if ( is_wp_error( $search_results ) ) { + return $search_results; + } + + $similar_terms = []; + $filtered_results = array_filter( + $search_results, + function ( $result ) use ( $taxonomy ) { + return $result['taxonomy'] === $taxonomy; + } + ); + + foreach ( $filtered_results as $index => $result ) { + $compare_term_id = $result['term_id']; + $existing_similar_terms = get_term_meta( $compare_term_id, 'classifai_similar_terms', true ); + + // Skip if it is already present in the similar terms list of the term we are comparing. + if ( ! empty( $existing_similar_terms ) && isset( $existing_similar_terms[ $term_id ] ) ) { + continue; + } + + $similar_terms[ $compare_term_id ] = $result['score']; + } + + if ( ! empty( $similar_terms ) ) { + $existing_similar_terms = get_term_meta( $term_id, 'classifai_similar_terms', true ); + + if ( is_array( $existing_similar_terms ) ) { + $similar_terms = $existing_similar_terms + $similar_terms; + } + + update_term_meta( $term_id, 'classifai_similar_terms', $similar_terms ); + } + + $args['processed'] = $args['processed'] + 1; + } + + $args['term_id'] = 0; + + return $args; + } + + /** + * Get the background processing status. + * + * @param string $taxonomy Taxonomy to process. + * @return array + */ + public function get_background_processing_status( string $taxonomy ): array { + if ( ! $this->background_process ) { + return []; + } + + $batches = $this->background_process->get_batches(); + + if ( ! empty( $batches ) ) { + foreach ( $batches as $batch ) { + foreach ( $batch->data as $key => $value ) { + if ( 'term_cleanup' === $value['action'] && $taxonomy === $value['taxonomy'] ) { + return $value; + } + } + } + } + + return []; + } + + /** + * Render the processing status. + * + * @param string $taxonomy Taxonomy to process. + */ + public function render_background_processing_status( $taxonomy ) { + $status = $this->get_background_processing_status( $taxonomy ); + + if ( empty( $status ) ) { + ?> +

+ + 'classifai_cancel_term_cleanup', + 'taxonomy' => $taxonomy, + ); + $cancel_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_cancel_term_cleanup' ) ); + $label = strtolower( $this->get_taxonomy_label( $taxonomy, true ) ); + ?> + +

+

+ +

+ + +

+ + +

+

+ + setting_page_url ); + $refresh = sprintf( + // translators: %s: Refresh the page link. + esc_html__( '%s to see these results.', 'classifai' ), + '' . esc_html__( 'Refresh the page', 'classifai' ) . '' + ); + echo wp_kses_post( + sprintf( + /* translators: %1$s: Taxonomy name, %d: Number of terms processed */ + __( 'Finding similar %1$s, %2$d %1$s processed. %3$s', 'classifai' ), + esc_html( $label ), + absint( $processed ), + ( absint( $processed ) > 0 ) ? $refresh : '' + ) + ); + ?> +

+ get_embeddings_meta_key() ); + $generated = wp_count_terms( + [ + 'taxonomy' => $taxonomy, + 'hide_empty' => false, + 'meta_key' => $meta_key, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ] + ); + ?> +

+ + %2$d %1$s processed.', 'classifai' ), + esc_html( $label ), + absint( $generated ) + ) + ); + ?> +

+ + + +
+ + get_taxonomy_label( $taxonomy, true ); + $count = wp_count_terms( + [ + 'taxonomy' => $taxonomy, + 'hide_empty' => false, + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + ] + ); + + if ( $count > 0 ) { + ?> +

+ +

+ prepare_items(); + $list_table->display(); + } + } + + /** + * Get taxonomy labels. + * + * @param string $taxonomy Taxonomy to get labels for. + * @param bool $plural Whether to get plural label. + * @return string + */ + public function get_taxonomy_label( $taxonomy, $plural = false ): string { + $tax = get_taxonomy( $taxonomy ); + $labels = get_taxonomy_labels( $tax ); + + if ( $plural ) { + $label = $labels->name ?? __( 'Terms', 'classifai' ); + } else { + $label = $labels->singular_name ?? __( 'Term', 'classifai' ); + } + + return $label; + } + + /** + * Ajax handler for refresh compare status. + */ + public function get_term_cleanup_status() { + if ( ! $this->background_process ) { + wp_send_json_error( [ 'error' => __( 'Background processing not enabled.', 'classifai' ) ] ); + } + + // Check the nonce for security + check_ajax_referer( 'classifai-status', 'nonce' ); + + $data = array( + 'is_running' => false, + 'status' => '', + ); + $taxonomy = isset( $_POST['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_POST['taxonomy'] ) ) : ''; + + if ( empty( $taxonomy ) ) { + $data['error'] = __( 'Taxonomy is required', 'classifai' ); + wp_send_json_error( $data ); + } + + if ( $this->background_process->is_queued() ) { + $data['is_running'] = true; + ob_start(); + $this->render_background_processing_status( $taxonomy ); + $data['status'] = ob_get_clean(); + } + + wp_send_json_success( $data ); + } + + /** + * Merge term. + */ + public function merge_term() { + // Check the nonce for security + if ( + empty( $_GET['_wpnonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_GET['_wpnonce'] ) ), 'classifai_merge_term' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; + $to = isset( $_GET['to'] ) ? absint( wp_unslash( $_GET['to'] ) ) : 0; + $from = isset( $_GET['from'] ) ? absint( wp_unslash( $_GET['from'] ) ) : 0; + $to_term = get_term( $to, $taxonomy ); + $from_term = get_term( $from, $taxonomy ); + $redirect = add_query_arg( 'tax', $taxonomy, $this->setting_page_url ); + + if ( empty( $taxonomy ) || empty( $to ) || empty( $from ) ) { + $this->add_notice( + __( 'Invalid request.', 'classifai' ), + 'error' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + if ( $to === $from ) { + $this->add_notice( + __( 'Cannot merge term with itself.', 'classifai' ), + 'error' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + $ret = wp_delete_term( + $from, + $taxonomy, + array( + 'default' => $to, + 'force_default' => true, + ) + ); + + if ( is_wp_error( $ret ) ) { + $this->add_notice( + // translators: %s: Error message. + sprintf( __( 'Error merging terms: %s.', 'classifai' ), $ret->get_error_message() ), + 'error' + ); + } + + $this->add_notice( + // translators: %1$s: From term name, %2$s: To term name. + sprintf( __( 'Merged term "%1$s" into "%2$s".', 'classifai' ), $from_term->name, $to_term->name ), + 'success' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + /** + * Skip similar term. + */ + public function skip_similar_term() { + // Check the nonce for security + if ( + empty( $_GET['_wpnonce'] ) || + ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_GET['_wpnonce'] ) ), 'classifai_skip_similar_term' ) + ) { + wp_die( esc_html__( 'You don\'t have permission to perform this operation.', 'classifai' ) ); + } + + $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; + $term = isset( $_GET['term'] ) ? absint( wp_unslash( $_GET['term'] ) ) : 0; + $similar_term = isset( $_GET['similar_term'] ) ? absint( wp_unslash( $_GET['similar_term'] ) ) : 0; + $redirect = add_query_arg( 'tax', $taxonomy, $this->setting_page_url ); + + // SKip/Ignore the similar term. + $term_meta = get_term_meta( $term, 'classifai_similar_terms', true ); + if ( is_array( $term_meta ) && isset( $term_meta[ $similar_term ] ) ) { + unset( $term_meta[ $similar_term ] ); + if ( empty( $term_meta ) ) { + delete_term_meta( $term, 'classifai_similar_terms' ); + } else { + update_term_meta( $term, 'classifai_similar_terms', $term_meta ); + } + } + + $this->add_notice( + esc_html__( 'Skipped similar term.', 'classifai' ), + 'success' + ); + + // Redirect back to the settings page. + wp_safe_redirect( $redirect ); + exit; + } + + /** + * Add a notice to be displayed. + * + * @param string $message Message to display. + * @param string $type Type of notice. + */ + public function add_notice( $message, $type = 'success' ) { + $notices = get_transient( $this->notices_transient_key ); + + if ( ! is_array( $notices ) ) { + $notices = []; + } + + $notices[] = array( + 'message' => $message, + 'type' => $type, + ); + + set_transient( $this->notices_transient_key, $notices, 300 ); + } + + /** + * Render notices. + */ + public function render_notices() { + $notices = get_transient( $this->notices_transient_key ); + + if ( ! empty( $notices ) ) { + foreach ( $notices as $notice ) { + ?> +
+

+ +

+
+ notices_transient_key ); + } + } +} diff --git a/includes/Classifai/Features/TermCleanupEPIntegration.php b/includes/Classifai/Features/TermCleanupEPIntegration.php new file mode 100644 index 000000000..016441326 --- /dev/null +++ b/includes/Classifai/Features/TermCleanupEPIntegration.php @@ -0,0 +1,252 @@ +term_cleanup = $feature; + $this->es_version = Elasticsearch::factory()->get_elasticsearch_version(); + } + + /** + * Inintialize the class and register the needed hooks. + */ + public function init() { + // Vector support was added in Elasticsearch 7.0. + if ( version_compare( $this->es_version, '7.0', '<=' ) ) { + return; + } + + add_filter( 'ep_term_mapping', [ $this, 'add_term_vector_field_mapping' ] ); + add_filter( 'ep_prepare_term_meta_excluded_public_keys', [ $this, 'exclude_vector_meta' ] ); + add_filter( 'ep_term_sync_args', [ $this, 'add_vector_field_to_term_sync' ], 10, 2 ); + } + + /** + * Add our vector field mapping to the Elasticsearch term index. + * + * @param array $mapping Current mapping. + * @param int $dimensions Number of dimensions for the vector field. Default 512. + * @param bool $quantization Whether to use quantization for the vector field. Default false. + * @return array + */ + public function add_term_vector_field_mapping( array $mapping, int $dimensions = 512, bool $quantization = true ): array { + // Don't add the field if it already exists. + if ( isset( $mapping['mappings']['properties']['chunks'] ) ) { + return $mapping; + } + + // Add the default vector field mapping. + $mapping['mappings']['properties']['chunks'] = [ + 'type' => 'nested', + 'properties' => [ + 'vector' => [ + 'type' => 'dense_vector', + 'dims' => (int) $dimensions, // This needs to match the dimensions your model uses. + ], + ], + ]; + + // Add extra vector fields for newer versions of Elasticsearch. + if ( version_compare( $this->es_version, '8.0', '>=' ) ) { + // The index (true or false, default true) and similarity (l2_norm, dot_product or cosine) fields + // were added in 8.0. The similarity field must be set if index is true. + $mapping['mappings']['properties']['chunks']['properties']['vector'] = array_merge( + $mapping['mappings']['properties']['chunks']['properties']['vector'], + [ + 'index' => true, + 'similarity' => 'cosine', + ] + ); + + // The element_type field was added in 8.6. This can be either float (default) or byte. + if ( version_compare( $this->es_version, '8.6', '>=' ) ) { + $mapping['mappings']['properties']['chunks']['properties']['vector']['element_type'] = 'float'; + } + + // The int8_hnsw type was added in 8.12. + if ( $quantization && version_compare( $this->es_version, '8.12', '>=' ) ) { + // This is supposed to result in better performance but slightly less accurate results. + // See https://www.elastic.co/guide/en/elasticsearch/reference/8.13/knn-search.html#knn-search-quantized-example. + // Can test with this on and off and compare results to see what works best. + $mapping['mappings']['properties']['chunks']['properties']['vector']['index_options']['type'] = 'int8_hnsw'; + } + } + + return $mapping; + } + + /** + * Exclude our vector meta from being synced. + * + * @param array $excluded_keys Current excluded keys. + * @return array + */ + public function exclude_vector_meta( array $excluded_keys ): array { + $excluded_keys[] = $this->term_cleanup->get_embeddings_meta_key(); + + return $excluded_keys; + } + + /** + * Add the embedding data to the term vector sync args. + * + * @param array $args Current sync args. + * @param int $term_id Term ID being synced. + * @return array + */ + public function add_vector_field_to_term_sync( array $args, int $term_id ): array { + // Try to use the stored embeddings first. + $meta_key = $this->term_cleanup->get_embeddings_meta_key(); + $embeddings = get_term_meta( $term_id, $meta_key, true ); + + // If they don't exist, make API requests to generate them. + if ( ! $embeddings ) { + $provider = $this->term_cleanup->get_feature_provider_instance(); + $embeddings = $provider->generate_embeddings_for_term( $term_id, false, $this->term_cleanup ); + } + + // If we still don't have embeddings, return early. + if ( ! $embeddings || empty( $embeddings ) ) { + return $args; + } + + // Add the embeddings data to the sync args. + $args['chunks'] = []; + + foreach ( $embeddings as $embedding ) { + $args['chunks'][] = [ + 'vector' => array_map( 'floatval', $embedding ), + ]; + } + + return $args; + } + + /** + * Add the score field to the document. + * + * @param array $document Document retrieved from Elasticsearch. + * @param array $hit Raw Elasticsearch hit. + * @return array + */ + public function add_score_field_to_document( array $document, array $hit ): array { + // Add the score to the document if it exists. + if ( isset( $hit['_score'] ) ) { + $document['score'] = $hit['_score']; + } + + return $document; + } + + /** + * Run an exact k-nearest neighbor (kNN) search. + * + * @param int $term_id Term ID to search for. + * @param string $index Indexable to run the query against. Default term. + * @param int $num Number of items to return. + * @param int $threshold Threshold for the minimum score. + * @return array|WP_Error + */ + public function exact_knn_search( int $term_id, string $index = 'term', int $num = 1000, $threshold = 75 ) { + $provider = $this->term_cleanup->get_feature_provider_instance(); + $query_embedding = $provider->generate_embeddings_for_term( $term_id, false, $this->term_cleanup ); + $min_score = 1 + ( $threshold / 100 ); + + if ( is_wp_error( $query_embedding ) ) { + return $query_embedding; + } + + if ( is_array( $query_embedding ) ) { + $query_embedding = $query_embedding[0]; + } + + // Get the ElasticPress indexable. + $indexable = Indexables::factory()->get( $index ); + + if ( ! $indexable ) { + return new WP_Error( 'invalid_index', esc_html__( 'Invalid indexable provided.', 'classifai' ) ); + } + + // Build our exact kNN query. + $knn_query = [ + 'from' => 0, + 'size' => (int) $num, + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'nested' => [ + 'path' => 'chunks', + 'query' => [ + 'script_score' => [ + 'query' => [ + 'match_all' => (object) [], + ], + 'script' => [ + 'source' => 'cosineSimilarity(params.query_vector, "chunks.vector") + 1.0', + 'params' => [ + 'query_vector' => array_map( 'floatval', $query_embedding ), + ], + ], + ], + ], + ], + ], + ], + 'must_not' => [ + [ + 'term' => [ + 'term_id' => $term_id, + ], + ], + ], + ], + ], + '_source' => [ 'term_id', 'score', 'taxonomy' ], + 'min_score' => $min_score, + ]; + + // Add the score field to the document. + add_filter( 'ep_retrieve_the_term', [ $this, 'add_score_field_to_document' ], 10, 2 ); + + // Run the query using the ElasticPress indexable. + $res = $indexable->query_es( $knn_query, [] ); + + if ( false === $res || ! isset( $res['documents'] ) ) { + return new WP_Error( 'es_error', esc_html__( 'Unable to query Elasticsearch', 'classifai' ) ); + } + + return $res['documents']; + } +} diff --git a/includes/Classifai/Services/ServicesManager.php b/includes/Classifai/Services/ServicesManager.php index 564c7d0e2..807dbf9e1 100644 --- a/includes/Classifai/Services/ServicesManager.php +++ b/includes/Classifai/Services/ServicesManager.php @@ -77,6 +77,7 @@ public function register_language_processing_features( array $features ): array '\Classifai\Features\AudioTranscriptsGeneration', '\Classifai\Features\Moderation', '\Classifai\Features\Smart404', + '\Classifai\Features\TermCleanup', ]; foreach ( $core_features as $feature ) { From 516af1ec99a0e4184deb4f910daadebcca7eda5d Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 11:06:51 -0600 Subject: [PATCH 02/15] Update readmes --- README.md | 44 +++++++++++++++++++++++++++++++++++++++++++- readme.txt | 4 +++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 823ae151e..806f80a5d 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) * Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress) +* Find similar terms to merge together using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress). Note this only compares top-level terms and if you merge a term that has children, these become top-level terms as per default WordPress behavior * BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) *(note that this service has been [deprecated by Microsoft](https://learn.microsoft.com/en-us/azure/ai-services/personalizer/) and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality (see [issue#392](https://github.com/10up/classifai/issues/392))* * Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) * Scan images and PDF files for embedded text and save for use in post meta using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) @@ -56,7 +57,8 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://aka.ms/oai/access) for OpenAI access. * To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account. * To utilize the AWS Language Processing functionality, you will need an active [AWS](https://console.aws.amazon.com/) account. -* To utilize the Smart 404 feature, you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Smart 404 feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access and you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Term Cleanup feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access. For better performance, you will need [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. ## Pricing @@ -561,6 +563,46 @@ docker run -p 9200:9200 -d --name elasticsearch \ This will download, install and start Elasticsearch v7.9.0 to your local machine. You can then access Elasticsearch at `http://localhost:9200`, which is the same URL you can use to configure ElasticPress with. It is recommended that you change the `Content Items per Index Cycle` setting in ElasticPress to `20` to ensure indexing doesn't timeout. Also be aware of API rate limits on the OpenAI Embeddings API. +## Set Up the Term Cleanup Feature + +### 1. Decide on Provider + +* This Feature is powered by either OpenAI or Azure OpenAI. +* Once you've chosen a Provider, you'll need to create an account and get authentication details. + * When setting things up on the Azure side, ensure you choose either the `text-embedding-3-small` or `text-embedding-3-large` model. The Feature will not work with other models. + +### 2. Configure Settings under Tools > ClassifAI > Language Processing > Term Cleanup + +* Select the proper Provider in the provider dropdown. +* Enter your authentication details. +* Configure any other settings as desired. + +### 3. ElasticPress configuration + +It is recommended to use ElasticPress with this Feature, especially if processing more than 500 terms, as performance will be significantly better. Once the Term Cleanup Feature is configured, you can then proceed to get ElasticPress set up to index the data. + +If on a standard WordPress installation: + +* Install and activate the [ElasticPress](https://github.com/10up/elasticpress) plugin. +* Set your Elasticsearch URL in the ElasticPress settings (`ElasticPress > Settings`). +* Enable the [term index](https://www.elasticpress.io/blog/2023/03/enabling-comments-and-terms-in-elasticpress-5-0/) feature. +* Go to the `ElasticPress > Sync` settings page and trigger a sync, ensuring this is set to run a sync from scratch. This will send over the new schema to Elasticsearch and index all content, including creating vector embeddings for each term. + +If on a WordPress VIP hosted environment: + +* [Enable Enterprise Search](https://docs.wpvip.com/enterprise-search/enable/). +* [Enable the term index](https://docs.wpvip.com/enterprise-search/enable-features/#h-terms). Example command: `vip @example-app.develop -- wp vip-search activate-feature terms`. +* [Run the VIP-CLI `index` command](https://docs.wpvip.com/enterprise-search/index/). This sends the new schema to Elasticsearch and indexes all content, including creating vector embeddings for each term. Note you may need to use the `--setup` flag to ensure the schema is created correctly. + +### 4. Start the Term Cleanup Process + +Once configured, the plugin will add a new submenu under the Tools menu called Term Cleanup. + +* Go to the Term Cleanup page, click on your desired taxonomy, then click on the "Find similar" button. +* This initializes a background process that will compare each term to find ones that are similar. +* Once done, all the results will be displayed. +* You can then skip or merge the potential duplicate terms from the settings page. + ## Set Up Image Processing features (via Microsoft Azure) Note that [Azure AI Vision](https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/home#image-requirements) can analyze and crop images that meet the following requirements: diff --git a/readme.txt b/readme.txt index cc92fd113..9df316e52 100644 --- a/readme.txt +++ b/readme.txt @@ -26,6 +26,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) * Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress) +* Find similar terms to merge together using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress). Note this only compares top-level terms and if you merge a term that has children, these become top-level terms as per default WordPress behavior * BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) _(note that this service has been deprecated by Microsoft and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality)_ * Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) * Scan images and PDF files for embedded text and save for use in post meta using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) @@ -39,7 +40,8 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUNTZBNzRKNlVQSFhZMU9aV09EVzYxWFdORCQlQCN0PWcu) for OpenAI access. * To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account. * To utilize the AWS Language Processing functionality, you will need an active [AWS](https://console.aws.amazon.com/) account. -* To utilize the Smart 404 feature, you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Smart 404 feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access and you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. +* To utilize the Term Cleanup feature, you will need an active [OpenAI](https://platform.openai.com/signup) account or [Microsoft Azure](https://signup.azure.com/signup) account with OpenAI access. For better performance, you will need [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+. == Upgrade Notice == From e6435a5cf7da8051bad32ae0640fd21414977330 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 11:17:11 -0600 Subject: [PATCH 03/15] Add basic tests --- .../term-cleanup-azure-openai.test.js | 68 +++++++++++++++++++ .../term-cleanup-openai.test.js | 54 +++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js create mode 100644 tests/cypress/integration/language-processing/term-cleanup-openai.test.js diff --git a/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js b/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js new file mode 100644 index 000000000..67750e863 --- /dev/null +++ b/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js @@ -0,0 +1,68 @@ +describe( '[Language processing] Term Cleanup - Azure OpenAI Tests', () => { + before( () => { + cy.login(); + cy.optInAllFeatures(); + } ); + + beforeEach( () => { + cy.login(); + } ); + + it( "ElasticPress option is hidden if the plugin isn't active", () => { + cy.disableElasticPress(); + + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_term_cleanup' + ); + + cy.get( '#use_ep' ).should( 'be:hidden' ); + } ); + + it( 'Can save Term Cleanup settings', () => { + cy.enableElasticPress(); + + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_term_cleanup' + ); + + // Enabled Feature. + cy.get( '#status' ).check(); + + // Setup Provider. + cy.get( '#provider' ).select( 'azure_openai_embeddings' ); + cy.get( + 'input[name="classifai_feature_term_cleanup[azure_openai_embeddings][endpoint_url]"]' + ) + .clear() + .type( 'https://e2e-test-azure-openai.test/' ); + cy.get( + 'input[name="classifai_feature_term_cleanup[azure_openai_embeddings][api_key]"]' + ) + .clear() + .type( 'password' ); + cy.get( + 'input[name="classifai_feature_term_cleanup[azure_openai_embeddings][deployment]"]' + ) + .clear() + .type( 'test' ); + + // Change all settings. + cy.get( '#use_ep' ).check(); + cy.get( '#category' ).uncheck(); + cy.get( '#category_threshold' ).clear().type( 80 ); + cy.get( '#post_tag' ).check(); + cy.get( '#post_tag_threshold' ).clear().type( 80 ); + + // Save settings. + cy.get( '#submit' ).click(); + + // Ensure settings page now exists. + cy.visit( + '/wp-admin/tools.php?page=classifai-term-cleanup&tax=post_tag' + ); + + cy.get( '.classifai-wrapper .submit-wrapper' ).should( 'exist' ); + + cy.disableElasticPress(); + } ); +} ); diff --git a/tests/cypress/integration/language-processing/term-cleanup-openai.test.js b/tests/cypress/integration/language-processing/term-cleanup-openai.test.js new file mode 100644 index 000000000..65716c691 --- /dev/null +++ b/tests/cypress/integration/language-processing/term-cleanup-openai.test.js @@ -0,0 +1,54 @@ +describe( '[Language processing] Term Cleanup - OpenAI Tests', () => { + before( () => { + cy.login(); + cy.optInAllFeatures(); + } ); + + beforeEach( () => { + cy.login(); + } ); + + it( "ElasticPress option is hidden if the plugin isn't active", () => { + cy.disableElasticPress(); + + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_term_cleanup' + ); + + cy.get( '#use_ep' ).should( 'be:hidden' ); + } ); + + it( 'Can save Term Cleanup settings', () => { + cy.enableElasticPress(); + + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_term_cleanup' + ); + + // Enabled Feature. + cy.get( '#status' ).check(); + + // Setup Provider. + cy.get( '#provider' ).select( 'openai_embeddings' ); + cy.get( '#api_key' ).clear().type( 'password' ); + + // Change all settings. + cy.get( '#use_ep' ).check(); + cy.get( '#category' ).uncheck(); + cy.get( '#category_threshold' ).clear().type( 80 ); + cy.get( '#post_tag' ).check(); + cy.get( '#post_tag_threshold' ).clear().type( 80 ); + + // Save settings. + cy.get( '#submit' ).click(); + + // Ensure settings page now exists. + cy.visit( + '/wp-admin/tools.php?page=classifai-term-cleanup&tax=post_tag' + ); + + cy.get( '.classifai-wrapper .submit-wrapper' ).should( 'exist' ); + + cy.disableElasticPress(); + } ); +} ); From 86311ffab3bd1789c1994a4a066f546a301145e0 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 11:47:29 -0600 Subject: [PATCH 04/15] Fix typo in namespace. Add necessary CSS to main admin.css file. Remove enqueues that aren't being used yet. Fix test assertion --- .../Classifai/Admin/SimilarTermsListTable.php | 2 - includes/Classifai/Features/TermCleanup.php | 42 +----- src/scss/admin.scss | 132 ++++++++++++++++++ .../term-cleanup-azure-openai.test.js | 2 +- .../term-cleanup-openai.test.js | 2 +- 5 files changed, 136 insertions(+), 44 deletions(-) diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php index 480578631..9b7512a10 100644 --- a/includes/Classifai/Admin/SimilarTermsListTable.php +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -74,8 +74,6 @@ public function prepare_items() { $this->_column_headers = array( $columns, $hidden, $sortable ); - $this->process_bulk_action(); - $terms = get_terms( [ 'taxonomy' => $this->taxonomy, diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index 01f82d447..f776eb760 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -1,6 +1,6 @@ setting_page_url = admin_url( 'tools.php?page=classifai-term-cleanup' ); + $this->setting_page_url = admin_url( 'tools.php?page=classifai-term-cleanup' ); // $this->background_process = new TermCleanupBackgroundProcess(); // TODO: Implement this class. } @@ -93,8 +93,6 @@ public function setup() { * This will only fire if the Feature is enabled. */ public function feature_setup() { - add_action( 'admin_enqueue_scripts', [ $this, 'enqueue_admin_assets' ] ); - // Register the settings page for the Feature. add_action( 'admin_menu', [ $this, 'register_admin_menu_item' ] ); add_action( 'admin_post_classifai_init_term_cleanup', [ $this, 'start_term_cleanup_process' ] ); @@ -220,42 +218,6 @@ public function render_settings_page() { esc_url( admin_url( 'admin-ajax.php' ) ), - 'ajax_nonce' => wp_create_nonce( 'classifai-status' ), - ) - ); - } - /** * Get the description for the enable field. * diff --git a/src/scss/admin.scss b/src/scss/admin.scss index 62ef171e2..46ef0adac 100644 --- a/src/scss/admin.scss +++ b/src/scss/admin.scss @@ -923,3 +923,135 @@ div.classifai-openai__result-disable-link { display: block; padding: 0 1em 1.5em 1em; } + +.classifai-content .classifai-term-consolidation { + margin-top: 20px; +} + +.classifai-term-consolidation .classifai-tabs.tabs-center { + margin-bottom: 24px +} + +.classifai-term-consolidation .classifai-tabs.tabs-justify { + table-layout: fixed; + width: 100% +} + +.classifai-term-consolidation .classifai-tabs a.tab { + color: #1d2327; + cursor: pointer; + display: block; + font-size: 14px; + padding: 16px 12px; + position: relative; + text-decoration: none; + transform: translateZ(0); + transition: all .3s ease; + margin-bottom: 4px; + background: #f9f9f9; +} + +.classifai-term-consolidation .classifai-tabs a.tab:focus { + box-shadow: none +} + +.classifai-term-consolidation .classifai-tabs a.tab:hover { + color: var(--classifai-admin-theme-color) +} + +.classifai-term-consolidation .classifai-tabs a.tab.active { + background: #f0f0f0; + border-radius: 4px; + box-shadow: none; + font-weight: 600 +} + +.classifai-term-consolidation .classifai-tabs a.tab.active:after { + opacity: 1; + transform: scale(1) +} + +.classifai-term-consolidation .classifai-term-consolidation-wrapper { + display: flex; + flex-direction: row; + flex-wrap: wrap; + margin-top: 20px; +} + + +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper .classifai-term-consolidation-content-wrapper-field-label { + text-align: left +} + +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper .classifai-term-consolidation-content-wrapper-field { + padding: 0 +} + +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper .classifai-term-consolidation-content-wrapper-field-label>label { + display: block; + font-weight: 700; + margin-bottom: 0; + text-transform: uppercase +} + +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper input[type=password], +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper input[type=text] { + font-size: 14px; + height: 38px; + margin-bottom: 4px; + width: 100% +} + +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper .classifai-setup-footer { + margin-top: 40px +} + +@media screen and (max-width: 782px) { + .classifai-term-consolidation .classifai-term-consolidation-content-wrapper { + padding-left: 18px + } +} + +@media screen and (max-width: 600px) { + .classifai-term-consolidation .classifai-term-consolidation-content-wrapper { + margin-bottom: 20px; + padding-left: 0; + width: 100% + } +} + +.classifai-term-consolidation-process-status p{ + font-size: 14px; +} + +.classifai-term-consolidation-process-status .dashicons-yes-alt{ + color: #48be1e; +} + +.classifai-term-consolidation .classifai-term-consolidation-content-wrapper input[type=text].current-page{ + width: auto; + height: auto; +} + +table.similar_terms { + border-collapse: collapse; + border: 2px solid #c3c4c7; +} + +table.similar_terms tbody tr.border { + border-left: 2px solid #c3c4c7; + border-right: 2px solid #c3c4c7; + border-top: 2px solid #c3c4c7; +} + +table.similar_terms tbody tr.border.skip { + border-top: 0px; +} + +table.similar_terms tbody tr.border:last-child { + border-bottom: 2px solid #c3c4c7; +} + +table.similar_terms th#actions { + width: 15%; +} diff --git a/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js b/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js index 67750e863..8a766ba1c 100644 --- a/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js +++ b/tests/cypress/integration/language-processing/term-cleanup-azure-openai.test.js @@ -15,7 +15,7 @@ describe( '[Language processing] Term Cleanup - Azure OpenAI Tests', () => { '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_term_cleanup' ); - cy.get( '#use_ep' ).should( 'be:hidden' ); + cy.get( '#use_ep' ).should( 'be.hidden' ); } ); it( 'Can save Term Cleanup settings', () => { diff --git a/tests/cypress/integration/language-processing/term-cleanup-openai.test.js b/tests/cypress/integration/language-processing/term-cleanup-openai.test.js index 65716c691..339404817 100644 --- a/tests/cypress/integration/language-processing/term-cleanup-openai.test.js +++ b/tests/cypress/integration/language-processing/term-cleanup-openai.test.js @@ -15,7 +15,7 @@ describe( '[Language processing] Term Cleanup - OpenAI Tests', () => { '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_term_cleanup' ); - cy.get( '#use_ep' ).should( 'be:hidden' ); + cy.get( '#use_ep' ).should( 'be.hidden' ); } ); it( 'Can save Term Cleanup settings', () => { From 1272cead16809a24de55bc36ada79c9d0bb8ba87 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 14:38:54 -0600 Subject: [PATCH 05/15] Wire up the Term Cleanup Feature to use Action Scheduler. Add TODO statements for remaining methods that are needed because of that switch --- includes/Classifai/Features/TermCleanup.php | 46 +++--- includes/Classifai/Plugin.php | 18 ++- includes/Classifai/TermCleanupScheduler.php | 166 ++++++++++++++++++++ 3 files changed, 203 insertions(+), 27 deletions(-) create mode 100644 includes/Classifai/TermCleanupScheduler.php diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index f776eb760..92671c8f3 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -7,6 +7,7 @@ use Classifai\Providers\OpenAI\Embeddings as OpenAIEmbeddings; use Classifai\Providers\Azure\Embeddings as AzureEmbeddings; use Classifai\Providers\OpenAI\EmbeddingCalculations; +use Classifai\TermCleanupScheduler; use WP_Error; use function Classifai\is_elasticpress_installed; @@ -33,7 +34,7 @@ class TermCleanup extends Feature { /** * Background process instance. * - * @var TermCleanupBackgroundProcess + * @var TermCleanupScheduler */ private $background_process; @@ -84,7 +85,9 @@ public function setup() { } $this->setting_page_url = admin_url( 'tools.php?page=classifai-term-cleanup' ); - // $this->background_process = new TermCleanupBackgroundProcess(); // TODO: Implement this class. + + $this->background_process = new TermCleanupScheduler( 'classifai_schedule_term_cleanup_job' ); + $this->background_process->init(); } /** @@ -179,7 +182,7 @@ public function render_settings_page() {

background_process && $this->background_process->is_queued() ) { + if ( $this->background_process && $this->background_process->in_progress() ) { $this->render_background_processing_status( $active_tax ); } else { $plural_label = strtolower( $this->get_taxonomy_label( $active_tax, true ) ); @@ -439,10 +442,6 @@ public function get_taxonomies(): array { * Start the term cleanup process. */ public function start_term_cleanup_process() { - if ( ! $this->background_process ) { - wp_die( esc_html__( 'Background processing not enabled.', 'classifai' ) ); - } - if ( empty( $_POST['classifai_term_cleanup_nonce'] ) || ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_POST['classifai_term_cleanup_nonce'] ) ), 'classifai_term_cleanup' ) @@ -479,8 +478,8 @@ public function start_term_cleanup_process() { } } - $this->background_process->push_to_queue( - array( + $job_args = [ + [ 'taxonomy' => $taxonomy, 'thresold' => $thresold, 'action' => 'term_cleanup', @@ -489,16 +488,16 @@ public function start_term_cleanup_process() { 'term_id' => 0, 'offset' => 0, 'started_by' => get_current_user_id(), - ) - ); + ], + ]; + + $this->background_process->schedule( $job_args ); $this->add_notice( __( 'Process for finding similar terms has started.', 'classifai' ), 'info' ); - $this->background_process->save()->dispatch(); - // Redirect back to the settings page. wp_safe_redirect( add_query_arg( 'tax', $taxonomy, $this->setting_page_url ) ); exit; @@ -508,6 +507,7 @@ public function start_term_cleanup_process() { * Cancel the term cleanup process. */ public function cancel_term_cleanup_process() { + // TODO if ( ! $this->background_process ) { wp_die( esc_html__( 'Background processing not enabled.', 'classifai' ) ); } @@ -549,9 +549,9 @@ public function get_max_terms(): int { * Generate embeddings for the terms. * * @param string $taxonomy Taxonomy to process. - * @return bool True if embeddings were generated, false otherwise. + * @return bool|WP_Error True if embeddings were generated, false otherwise. */ - public function generate_embeddings( string $taxonomy ): bool { + public function generate_embeddings( string $taxonomy ) { $exclude = []; // Exclude the uncategorized term. @@ -586,7 +586,11 @@ public function generate_embeddings( string $taxonomy ): bool { // Generate embedding data for each term. foreach ( $terms as $term_id ) { - $provider->generate_embeddings_for_term( $term_id, false, $this ); + $result = $provider->generate_embeddings_for_term( $term_id, false, $this ); + + if ( is_wp_error( $result ) ) { + return $result; + } } return true; @@ -598,7 +602,7 @@ public function generate_embeddings( string $taxonomy ): bool { * @param string $taxonomy Taxonomy to process. * @param int $thresold Thresold to consider terms as duplicates. * @param array $args Additional arguments. - * @return array|bool + * @return array|bool|WP_Error */ public function get_similar_terms( string $taxonomy, int $thresold, array $args = [] ) { if ( class_exists( '\\ElasticPress\\Feature' ) && '1' === $this->get_settings( 'use_ep' ) ) { @@ -827,7 +831,9 @@ public function get_background_processing_status( string $taxonomy ): array { return []; } - $batches = $this->background_process->get_batches(); + // TODO + // $batches = $this->background_process->get_batches(); + $batches = []; if ( ! empty( $batches ) ) { foreach ( $batches as $batch ) { @@ -848,6 +854,7 @@ public function get_background_processing_status( string $taxonomy ): array { * @param string $taxonomy Taxonomy to process. */ public function render_background_processing_status( $taxonomy ) { + // TODO $status = $this->get_background_processing_status( $taxonomy ); if ( empty( $status ) ) { @@ -998,6 +1005,7 @@ public function get_taxonomy_label( $taxonomy, $plural = false ): string { * Ajax handler for refresh compare status. */ public function get_term_cleanup_status() { + // TODO if ( ! $this->background_process ) { wp_send_json_error( [ 'error' => __( 'Background processing not enabled.', 'classifai' ) ] ); } @@ -1016,7 +1024,7 @@ public function get_term_cleanup_status() { wp_send_json_error( $data ); } - if ( $this->background_process->is_queued() ) { + if ( $this->background_process->in_progress() ) { $data['is_running'] = true; ob_start(); $this->render_background_processing_status( $taxonomy ); diff --git a/includes/Classifai/Plugin.php b/includes/Classifai/Plugin.php index fc44e3683..58fb5974e 100644 --- a/includes/Classifai/Plugin.php +++ b/includes/Classifai/Plugin.php @@ -240,18 +240,20 @@ public function filter_plugin_action_links( $links ): array { * Load the Action Scheduler library. */ public function load_action_scheduler() { - $feature = new \Classifai\Features\Classification(); + $features = [ new \Classifai\Features\Classification(), new \Classifai\Features\TermCleanup() ]; $is_feature_being_enabled = false; - if ( isset( $_POST['classifai_feature_classification'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification.Missing - $is_feature_being_enabled = sanitize_text_field( wp_unslash( $_POST['classifai_feature_classification']['status'] ?? false ) ); // phpcs:ignore WordPress.Security.NonceVerification.Missing - } + foreach ( $features as $feature ) { + if ( isset( $_POST['classifai_feature_classification'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification.Missing + $is_feature_being_enabled = sanitize_text_field( wp_unslash( $_POST['classifai_feature_classification']['status'] ?? false ) ); // phpcs:ignore WordPress.Security.NonceVerification.Missing + } - if ( ! ( $feature->is_enabled() || '1' === $is_feature_being_enabled ) ) { - return; - } + if ( ! ( $feature->is_enabled() || '1' === $is_feature_being_enabled ) ) { + continue; + } - require_once CLASSIFAI_PLUGIN_DIR . '/vendor/woocommerce/action-scheduler/action-scheduler.php'; + require_once CLASSIFAI_PLUGIN_DIR . '/vendor/woocommerce/action-scheduler/action-scheduler.php'; + } } /** diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php new file mode 100644 index 000000000..9c8b06624 --- /dev/null +++ b/includes/Classifai/TermCleanupScheduler.php @@ -0,0 +1,166 @@ +job_name = $job_name; + } + + /** + * Initialize the class. + */ + public function init() { + add_action( 'classifai_schedule_term_cleanup_job', [ $this, 'run' ] ); + // add_filter( 'heartbeat_send', [ $this, 'check_embedding_generation_status' ] ); + // add_action( 'classifai_before_feature_nav', [ $this, 'render_embeddings_generation_status' ] ); + } + + /** + * Run the term cleanup job. + * + * @param array $item Item details to process. + */ + public function run( array $item = [] ) { + $action = $item['action']; + + if ( ! $action ) { + return; + } + + switch ( $action ) { + case 'term_cleanup': + $started_by = absint( $item['started_by'] ); + $taxonomy = $item['taxonomy']; + $thresold = $item['thresold']; + $term_cleanup = new TermCleanup(); + $embeddings_generated = (bool) $item['embeddings_generated']; + + $original_user_id = get_current_user_id(); + + // Set the user to the one who started the process, to avoid permission issues. + wp_set_current_user( (int) $started_by ); + + // Generate embeddings if not already generated. + if ( ! $embeddings_generated ) { + $results = $term_cleanup->generate_embeddings( $taxonomy ); + + if ( is_wp_error( $results ) ) { + $term_cleanup->add_notice( + // translators: %s: error message. + sprintf( esc_html__( 'Error in generating embeddings: %s', 'classifai' ), $results->get_error_message() ), + 'error' + ); + + return; + } + + // If get we false, then there are no further terms to process. + if ( false === $results ) { + $item['embeddings_generated'] = true; + $this->schedule( [ $item ] ); + return; + } + + $this->schedule( [ $item ] ); + return; + } + + // Find similar terms. + $args = array( + 'processed' => $item['processed'] ?? 0, + 'term_id' => $item['term_id'] ?? 0, + 'offset' => $item['offset'] ?? 0, + ); + $res = $term_cleanup->get_similar_terms( $taxonomy, $thresold, $args ); + + // Restore original user. + wp_set_current_user( $original_user_id ); + + if ( is_wp_error( $res ) ) { + $term_cleanup->add_notice( + // translators: %s: error message. + sprintf( esc_html__( 'Error in finding similar terms: %s', 'classifai' ), $res->get_error_message() ), + 'error' + ); + + return; + } + + if ( false === $res ) { + $label = strtolower( $term_cleanup->get_taxonomy_label( $taxonomy, true ) ); + + // Show notice to user. + $term_cleanup->add_notice( + // translators: %s: taxonomy label. + sprintf( __( 'Process for finding similar %s has been completed.', 'classifai' ), $label ), + 'success' + ); + + // No more terms to process. + return; + } + + // Update item. + $item['processed'] = $res['processed']; + $item['term_id'] = $res['term_id']; + $item['offset'] = $res['offset']; + + $this->schedule( [ $item ] ); + return; + default: + return; + } + } + + /** + * Schedule the term cleanup job. + * + * @param array $args Arguments to pass to the job. + */ + public function schedule( array $args = [] ) { + if ( function_exists( 'as_enqueue_async_action' ) ) { + as_enqueue_async_action( 'classifai_schedule_term_cleanup_job', $args ); + } + } + + /** + * Check if job is in progress. + * + * @return bool + */ + public function in_progress(): bool { + if ( ! class_exists( 'ActionScheduler_Store' ) ) { + return false; + } + + $store = ActionScheduler_Store::instance(); + + $action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_PENDING, + ) + ); + + return ! empty( $action_id ); + } +} From 6c928b971853eb163a5ed5d1af4ef6d58a288ec0 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 15:21:37 -0600 Subject: [PATCH 06/15] Show proper status messages as the term cleanup runs. Fix some PHP issues --- .eslintrc.json | 3 +- .../Classifai/Admin/SimilarTermsListTable.php | 2 +- includes/Classifai/Features/TermCleanup.php | 48 ++++++++++++------- .../Providers/Azure/ComputerVision.php | 2 +- includes/Classifai/TermCleanupScheduler.php | 33 +++++++++++-- src/js/admin.js | 46 ++++++++++++++++++ 6 files changed, 109 insertions(+), 25 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index 85f09bd41..226e179cb 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -21,7 +21,8 @@ "Headers": "readonly", "requestAnimationFrame": "readonly", "React": "readonly", - "Block": "readonly" + "Block": "readonly", + "classifai_term_cleanup_params": "readonly" }, "extends": ["plugin:@wordpress/eslint-plugin/recommended"], "ignorePatterns": ["*.json", "webpack.config.js"] diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php index 9b7512a10..49878cef6 100644 --- a/includes/Classifai/Admin/SimilarTermsListTable.php +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -56,7 +56,7 @@ public function get_columns() { return array( 'term' => $label, // translators: %s: Singular label of the taxonomy. - 'similar_term' => sprintf( __( 'Similar %s' ), $label ), + 'similar_term' => sprintf( __( 'Similar %s', 'classifai' ), $label ), 'actions' => __( 'Actions', 'classifai' ), ); } diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index 92671c8f3..d6da7fec9 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -96,6 +96,8 @@ public function setup() { * This will only fire if the Feature is enabled. */ public function feature_setup() { + add_action( 'admin_enqueue_scripts', [ $this, 'enqueue_admin_assets' ] ); + // Register the settings page for the Feature. add_action( 'admin_menu', [ $this, 'register_admin_menu_item' ] ); add_action( 'admin_post_classifai_init_term_cleanup', [ $this, 'start_term_cleanup_process' ] ); @@ -110,6 +112,26 @@ public function feature_setup() { add_action( 'admin_notices', [ $this, 'render_notices' ] ); } + /** + * Enqueue the admin scripts. + * + * @param string $hook_suffix The current admin page. + */ + public function enqueue_admin_assets( string $hook_suffix ) { + if ( 'tools_page_classifai-term-cleanup' !== $hook_suffix ) { + return; + } + + wp_localize_script( + 'classifai-admin-script', + 'classifai_term_cleanup_params', + array( + 'ajax_url' => esc_url( admin_url( 'admin-ajax.php' ) ), + 'ajax_nonce' => wp_create_nonce( 'classifai-term-cleanup-status' ), + ) + ); + } + /** * Register a sub page under the Tools menu. */ @@ -831,16 +853,12 @@ public function get_background_processing_status( string $taxonomy ): array { return []; } - // TODO - // $batches = $this->background_process->get_batches(); - $batches = []; - - if ( ! empty( $batches ) ) { - foreach ( $batches as $batch ) { - foreach ( $batch->data as $key => $value ) { - if ( 'term_cleanup' === $value['action'] && $taxonomy === $value['taxonomy'] ) { - return $value; - } + $args = $this->background_process->get_args(); + + if ( ! empty( $args ) ) { + foreach ( $args as $arg ) { + if ( 'term_cleanup' === $arg['action'] && $taxonomy === $arg['taxonomy'] ) { + return $arg; } } } @@ -854,7 +872,6 @@ public function get_background_processing_status( string $taxonomy ): array { * @param string $taxonomy Taxonomy to process. */ public function render_background_processing_status( $taxonomy ) { - // TODO $status = $this->get_background_processing_status( $taxonomy ); if ( empty( $status ) ) { @@ -877,7 +894,7 @@ public function render_background_processing_status( $taxonomy ) { $label = strtolower( $this->get_taxonomy_label( $taxonomy, true ) ); ?> -
+

background_process ) { - wp_send_json_error( [ 'error' => __( 'Background processing not enabled.', 'classifai' ) ] ); - } - // Check the nonce for security - check_ajax_referer( 'classifai-status', 'nonce' ); + check_ajax_referer( 'classifai-term-cleanup-status', 'nonce' ); $data = array( 'is_running' => false, diff --git a/includes/Classifai/Providers/Azure/ComputerVision.php b/includes/Classifai/Providers/Azure/ComputerVision.php index 0b4022b82..961e05c4a 100644 --- a/includes/Classifai/Providers/Azure/ComputerVision.php +++ b/includes/Classifai/Providers/Azure/ComputerVision.php @@ -767,7 +767,7 @@ public function rest_endpoint_callback( $attachment_id, string $route_to_call = } if ( empty( $image_url ) ) { - return new WP_Error( 'error', esc_html__( 'Valid image size not found. Make sure the image is less than 4MB.' ) ); + return new WP_Error( 'error', esc_html__( 'Valid image size not found. Make sure the image is less than 4MB.', 'classifai' ) ); } switch ( $route_to_call ) { diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php index 9c8b06624..45e3896eb 100644 --- a/includes/Classifai/TermCleanupScheduler.php +++ b/includes/Classifai/TermCleanupScheduler.php @@ -5,8 +5,6 @@ use Classifai\Features\TermCleanup; use ActionScheduler_Store; -use function as_enqueue_async_action; - class TermCleanupScheduler { /** @@ -30,8 +28,6 @@ public function __construct( string $job_name = '' ) { */ public function init() { add_action( 'classifai_schedule_term_cleanup_job', [ $this, 'run' ] ); - // add_filter( 'heartbeat_send', [ $this, 'check_embedding_generation_status' ] ); - // add_action( 'classifai_before_feature_nav', [ $this, 'render_embeddings_generation_status' ] ); } /** @@ -163,4 +159,33 @@ public function in_progress(): bool { return ! empty( $action_id ); } + + /** + * Get the arguments for the current job. + * + * @return array|bool + */ + public function get_args() { + if ( ! class_exists( 'ActionScheduler_Store' ) ) { + return false; + } + + $store = ActionScheduler_Store::instance(); + + $action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_PENDING, + ) + ); + + if ( empty( $action_id ) ) { + return false; + } + + $action = $store->fetch_action( $action_id ); + $args = $action->get_args(); + + return $args; + } } diff --git a/src/js/admin.js b/src/js/admin.js index 9817bc1bf..632a6edb0 100644 --- a/src/js/admin.js +++ b/src/js/admin.js @@ -444,3 +444,49 @@ document.addEventListener( 'DOMContentLoaded', function () { } } ); } )( jQuery ); + +// Update the Term Cleanup status. +( function ( $ ) { + const statusWrapper = $( '.classifai-term-cleanup-process-status' ); + const processRunning = statusWrapper.length; + const taxonomy = statusWrapper.data( 'taxonomy' ); + const ajaxUrl = classifai_term_cleanup_params.ajax_url; + const ajaxNonce = classifai_term_cleanup_params.ajax_nonce; + + if ( ! processRunning || ! taxonomy ) { + return; + } + + const intervalId = setInterval( function () { + $.ajax( { + url: ajaxUrl, + type: 'POST', + data: { + action: 'classifai_get_term_cleanup_status', + taxonomy, + nonce: ajaxNonce, + }, + success( response ) { + if ( response.success && response.data ) { + if ( response.data.is_running && response.data.status ) { + // Update the sync status on the page + statusWrapper.html( response.data.status ); + } else { + // Clear interval and reload the page. + clearInterval( intervalId ); + window.location.reload(); + } + } + }, + error( jqXHR, textStatus, errorThrown ) { + // eslint-disable-next-line no-console + console.error( + 'Error: ', + textStatus, + ', Details: ', + errorThrown + ); + }, + } ); + }, 30000 ); // 30000 milliseconds = 30 seconds +} )( jQuery ); From 4190acf501638eea13aa596e538e2b12ffd7f6b1 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 15:25:25 -0600 Subject: [PATCH 07/15] Ignore non-camel case params --- src/js/admin.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/js/admin.js b/src/js/admin.js index 632a6edb0..329d58c54 100644 --- a/src/js/admin.js +++ b/src/js/admin.js @@ -450,8 +450,8 @@ document.addEventListener( 'DOMContentLoaded', function () { const statusWrapper = $( '.classifai-term-cleanup-process-status' ); const processRunning = statusWrapper.length; const taxonomy = statusWrapper.data( 'taxonomy' ); - const ajaxUrl = classifai_term_cleanup_params.ajax_url; - const ajaxNonce = classifai_term_cleanup_params.ajax_nonce; + const ajaxUrl = classifai_term_cleanup_params.ajax_url; // eslint-disable-line camelcase + const ajaxNonce = classifai_term_cleanup_params.ajax_nonce; // eslint-disable-line camelcase if ( ! processRunning || ! taxonomy ) { return; From 67bf6080bea1b02bde827927b875dfd1e30829de Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Wed, 9 Oct 2024 15:42:58 -0600 Subject: [PATCH 08/15] Add ability to cancel an in progress job. Fix undefined JS errro --- includes/Classifai/Features/TermCleanup.php | 10 +++------- includes/Classifai/TermCleanupScheduler.php | 19 +++++++++++++++++-- src/js/admin.js | 5 +++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index d6da7fec9..af83fdd0e 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -529,11 +529,6 @@ public function start_term_cleanup_process() { * Cancel the term cleanup process. */ public function cancel_term_cleanup_process() { - // TODO - if ( ! $this->background_process ) { - wp_die( esc_html__( 'Background processing not enabled.', 'classifai' ) ); - } - // Check the nonce for security if ( empty( $_GET['_wpnonce'] ) || @@ -543,9 +538,10 @@ public function cancel_term_cleanup_process() { } $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; - $this->background_process->cancel(); - if ( $this->background_process->is_cancelled() ) { + $unschedule = $this->background_process->unschedule(); + + if ( $unschedule ) { // Add a notice to inform the user that the process will be cancelled soon. $this->add_notice( __( 'Process for the finding similar terms will be cancelled soon.', 'classifai' ), diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php index 45e3896eb..b8e4c5a48 100644 --- a/includes/Classifai/TermCleanupScheduler.php +++ b/includes/Classifai/TermCleanupScheduler.php @@ -27,7 +27,7 @@ public function __construct( string $job_name = '' ) { * Initialize the class. */ public function init() { - add_action( 'classifai_schedule_term_cleanup_job', [ $this, 'run' ] ); + add_action( $this->job_name, [ $this, 'run' ] ); } /** @@ -134,10 +134,25 @@ public function run( array $item = [] ) { */ public function schedule( array $args = [] ) { if ( function_exists( 'as_enqueue_async_action' ) ) { - as_enqueue_async_action( 'classifai_schedule_term_cleanup_job', $args ); + as_enqueue_async_action( $this->job_name, $args ); } } + /** + * Unschedule the term cleanup job. + * + * @return bool + */ + public function unschedule() { + if ( function_exists( 'as_unschedule_all_actions' ) ) { + $action_id = as_unschedule_all_actions( $this->job_name ); + + return $action_id ? true : false; + } + + return false; + } + /** * Check if job is in progress. * diff --git a/src/js/admin.js b/src/js/admin.js index 329d58c54..ab1b69d89 100644 --- a/src/js/admin.js +++ b/src/js/admin.js @@ -450,13 +450,14 @@ document.addEventListener( 'DOMContentLoaded', function () { const statusWrapper = $( '.classifai-term-cleanup-process-status' ); const processRunning = statusWrapper.length; const taxonomy = statusWrapper.data( 'taxonomy' ); - const ajaxUrl = classifai_term_cleanup_params.ajax_url; // eslint-disable-line camelcase - const ajaxNonce = classifai_term_cleanup_params.ajax_nonce; // eslint-disable-line camelcase if ( ! processRunning || ! taxonomy ) { return; } + const ajaxUrl = classifai_term_cleanup_params?.ajax_url; // eslint-disable-line camelcase + const ajaxNonce = classifai_term_cleanup_params?.ajax_nonce; // eslint-disable-line camelcase + const intervalId = setInterval( function () { $.ajax( { url: ajaxUrl, From ea82fad08b4d89d04b03750b02fcd26623cef824 Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Thu, 10 Oct 2024 17:53:51 +0530 Subject: [PATCH 09/15] Fix in progress background process status. --- includes/Classifai/TermCleanupScheduler.php | 31 ++++++++++----------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php index b8e4c5a48..ab2bbbb1a 100644 --- a/includes/Classifai/TermCleanupScheduler.php +++ b/includes/Classifai/TermCleanupScheduler.php @@ -159,20 +159,11 @@ public function unschedule() { * @return bool */ public function in_progress(): bool { - if ( ! class_exists( 'ActionScheduler_Store' ) ) { - return false; + if ( function_exists( 'as_has_scheduled_action' ) ) { + return as_has_scheduled_action( $this->job_name ); } - $store = ActionScheduler_Store::instance(); - - $action_id = $store->find_action( - $this->job_name, - array( - 'status' => ActionScheduler_Store::STATUS_PENDING, - ) - ); - - return ! empty( $action_id ); + return false; } /** @@ -187,19 +178,27 @@ public function get_args() { $store = ActionScheduler_Store::instance(); - $action_id = $store->find_action( + $running_action_id = $store->find_action( $this->job_name, array( 'status' => ActionScheduler_Store::STATUS_PENDING, ) ); - if ( empty( $action_id ) ) { + $pending_action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_RUNNING, + ) + ); + + if ( empty( $running_action_id ) && empty( $pending_action_id ) ) { return false; } - $action = $store->fetch_action( $action_id ); - $args = $action->get_args(); + $action_id = ! empty( $running_action_id ) ? $running_action_id : $pending_action_id; + $action = $store->fetch_action( $action_id ); + $args = $action->get_args(); return $args; } From e887b44fe522afdedc1963e29b6aead0f2c8e3fe Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Thu, 10 Oct 2024 20:23:04 +0530 Subject: [PATCH 10/15] Fix cancel cleanup process. --- includes/Classifai/Features/TermCleanup.php | 1 + includes/Classifai/TermCleanupScheduler.php | 41 ++++++++++++++++++--- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index af83fdd0e..64e85a540 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -510,6 +510,7 @@ public function start_term_cleanup_process() { 'term_id' => 0, 'offset' => 0, 'started_by' => get_current_user_id(), + 'job_id' => str_replace( '-', '', wp_generate_uuid4() ), ], ]; diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php index ab2bbbb1a..aab61fd74 100644 --- a/includes/Classifai/TermCleanupScheduler.php +++ b/includes/Classifai/TermCleanupScheduler.php @@ -55,6 +55,12 @@ public function run( array $item = [] ) { // Set the user to the one who started the process, to avoid permission issues. wp_set_current_user( (int) $started_by ); + // Check if cancel request is made. + if ( isset( $item['job_id'] ) && get_transient( 'classifai_cancel_term_cleanup_process' ) === $item['job_id'] ) { + delete_transient( 'classifai_cancel_term_cleanup_process' ); + return; + } + // Generate embeddings if not already generated. if ( ! $embeddings_generated ) { $results = $term_cleanup->generate_embeddings( $taxonomy ); @@ -145,9 +151,34 @@ public function schedule( array $args = [] ) { */ public function unschedule() { if ( function_exists( 'as_unschedule_all_actions' ) ) { - $action_id = as_unschedule_all_actions( $this->job_name ); - - return $action_id ? true : false; + as_unschedule_all_actions( $this->job_name ); + + if ( ! class_exists( 'ActionScheduler_Store' ) ) { + return false; + } + + $store = ActionScheduler_Store::instance(); + + // Check if the job is still in progress. + $action_id = $store->find_action( + $this->job_name, + array( + 'status' => ActionScheduler_Store::STATUS_RUNNING, + ) + ); + + // If no action running, return true. + if ( empty( $action_id ) ) { + return true; + } + + $action = $store->fetch_action( $action_id ); + $args = $action->get_args(); + if ( ! empty( $args ) && isset( $args[0]['job_id'] ) ) { + set_transient( 'classifai_cancel_term_cleanup_process', $args[0]['job_id'], 300 ); + } + + return true; } return false; @@ -181,14 +212,14 @@ public function get_args() { $running_action_id = $store->find_action( $this->job_name, array( - 'status' => ActionScheduler_Store::STATUS_PENDING, + 'status' => ActionScheduler_Store::STATUS_RUNNING, ) ); $pending_action_id = $store->find_action( $this->job_name, array( - 'status' => ActionScheduler_Store::STATUS_RUNNING, + 'status' => ActionScheduler_Store::STATUS_PENDING, ) ); From 1334e69737a46054a7341c6ed74b76dda63a4dec Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Thu, 10 Oct 2024 20:45:31 +0530 Subject: [PATCH 11/15] Fix similarity score for database comparison. --- includes/Classifai/Admin/SimilarTermsListTable.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php index 49878cef6..1218e51f4 100644 --- a/includes/Classifai/Admin/SimilarTermsListTable.php +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -151,6 +151,7 @@ public function generate_term_html( $term, $similar_term, $score = null ) { 'to' => $term->term_id, ); $merge_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_merge_term' ) ); + $score = $score ? ( $score > 1 ? $score - 1 : $score ) : ''; return sprintf( // translators: %s: Term name, %d: Term ID. @@ -171,7 +172,7 @@ public function generate_term_html( $term, $similar_term, $score = null ) { esc_html( $term->slug ), esc_html( $term->count ), esc_html( $term->parent > 0 ? get_term( $term->parent )->name : 'None' ), - $score ? esc_html( round( ( $score - 1 ) * 100, 2 ) . '%' ) : '', + $score ? esc_html( round( $score * 100, 2 ) . '%' ) : '', esc_html( $term->description ), esc_url( $merge_url ), esc_html__( 'Merge and keep this', 'classifai' ) From 32b40bf5ed2d15e22602b2ff55ff28b5a6e56c2a Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Fri, 11 Oct 2024 16:00:37 +0530 Subject: [PATCH 12/15] UX: some improvements --- .../Classifai/Admin/SimilarTermsListTable.php | 15 ++++++--------- src/scss/admin.scss | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php index 1218e51f4..d6b6a6616 100644 --- a/includes/Classifai/Admin/SimilarTermsListTable.php +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -57,7 +57,7 @@ public function get_columns() { 'term' => $label, // translators: %s: Singular label of the taxonomy. 'similar_term' => sprintf( __( 'Similar %s', 'classifai' ), $label ), - 'actions' => __( 'Actions', 'classifai' ), + 'actions' => __( 'Action', 'classifai' ), ); } @@ -155,25 +155,22 @@ public function generate_term_html( $term, $similar_term, $score = null ) { return sprintf( // translators: %s: Term name, %d: Term ID. - __( '%1$s (ID: %2$d)

', 'classifai' ) . + __( '%1$s (ID: %2$s)

', 'classifai' ) . // translators: %s: Term slug. __( 'Slug: %3$s
', 'classifai' ) . // translators: %s: Term count. - __( 'Count: %4$s
', 'classifai' ) . + __( 'Used: %4$s
', 'classifai' ) . // translators: %s: Term parent name. __( 'Parent: %5$s
', 'classifai' ) . // translators: %s: Similarity score. ( $score ? __( 'Similarity: %6$s
', 'classifai' ) : '%6$s' ) . - // translators: %s: Term description. - __( 'Description: %7$s

', 'classifai' ) . - '%9$s', + '%8$s', esc_html( $term->name ), - esc_html( $term->term_id ), + '' . esc_html( $term->term_id ) . '', esc_html( $term->slug ), - esc_html( $term->count ), + '' . esc_html( sprintf( _n( '%d time', '%d times', $term->count, 'classifai' ), $term->count ) ) . '', esc_html( $term->parent > 0 ? get_term( $term->parent )->name : 'None' ), $score ? esc_html( round( $score * 100, 2 ) . '%' ) : '', - esc_html( $term->description ), esc_url( $merge_url ), esc_html__( 'Merge and keep this', 'classifai' ) ); diff --git a/src/scss/admin.scss b/src/scss/admin.scss index 46ef0adac..e99955c7e 100644 --- a/src/scss/admin.scss +++ b/src/scss/admin.scss @@ -1055,3 +1055,18 @@ table.similar_terms tbody tr.border:last-child { table.similar_terms th#actions { width: 15%; } + +table.similar_terms .term-merge-button { + margin-top: 16px; + margin-bottom: 8px; +} + +table.similar_terms.widefat td, +table.similar_terms.widefat th { + padding: 14px; + font-size: 14px; +} + +table.similar_terms.widefat thead th { + font-weight: bold; +} From f142087555ee18036d645f0c6b770a735d3dc698 Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Fri, 11 Oct 2024 17:11:46 +0530 Subject: [PATCH 13/15] Add search term. --- .../Classifai/Admin/SimilarTermsListTable.php | 45 +++++++++++-------- includes/Classifai/Features/TermCleanup.php | 27 ++++++++--- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php index d6b6a6616..c5cdff648 100644 --- a/includes/Classifai/Admin/SimilarTermsListTable.php +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -66,14 +66,34 @@ public function get_columns() { */ public function prepare_items() { $per_page = $this->get_items_per_page( 'edit_post_per_page' ); - $current = $this->get_pagenum(); - $offset = ( $current - 1 ) * $per_page; $columns = $this->get_columns(); $hidden = array(); $sortable = $this->get_sortable_columns(); + $search = isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : ''; $this->_column_headers = array( $columns, $hidden, $sortable ); + $total = wp_count_terms( + [ + 'taxonomy' => $this->taxonomy, + 'hide_empty' => false, + 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key + 'meta_compare' => 'EXISTS', + 'search' => $search, + ] + ); + + $this->set_pagination_args( + array( + 'total_items' => $total, // WE have to calculate the total number of items. + 'per_page' => $per_page, // WE have to determine how many items to show on a page. + 'total_pages' => ceil( $total / $per_page ), // WE have to calculate the total number of pages. + ) + ); + + $current = $this->get_pagenum(); + $offset = ( $current - 1 ) * $per_page; + $terms = get_terms( [ 'taxonomy' => $this->taxonomy, @@ -85,6 +105,7 @@ public function prepare_items() { 'meta_compare' => 'EXISTS', 'number' => $per_page, 'offset' => $offset, + 'search' => $search, ] ); @@ -116,23 +137,7 @@ public function prepare_items() { } } - $total = wp_count_terms( - [ - 'taxonomy' => $this->taxonomy, - 'hide_empty' => false, - 'meta_key' => 'classifai_similar_terms', // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key - 'meta_compare' => 'EXISTS', - ] - ); - $this->items = $items; - $this->set_pagination_args( - array( - 'total_items' => $total, // WE have to calculate the total number of items. - 'per_page' => $per_page, // WE have to determine how many items to show on a page. - 'total_pages' => ceil( $total / $per_page ), // WE have to calculate the total number of pages. - ) - ); } /** @@ -149,6 +154,8 @@ public function generate_term_html( $term, $similar_term, $score = null ) { 'taxonomy' => $this->taxonomy, 'from' => $similar_term->term_id, 'to' => $term->term_id, + 'paged' => $this->get_pagenum(), + 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, ); $merge_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_merge_term' ) ); $score = $score ? ( $score > 1 ? $score - 1 : $score ) : ''; @@ -215,6 +222,8 @@ public function column_actions( $item ) { 'taxonomy' => $this->taxonomy, 'term' => $term->term_id, 'similar_term' => $similar_term->term_id, + 'paged' => $this->get_pagenum(), + 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, ); $skip_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_skip_similar_term' ) ); diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index 64e85a540..2f82b2f15 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -988,10 +988,17 @@ public function render_similar_terms( $taxonomy ) { printf( esc_html__( 'Similar %s', 'classifai' ), esc_html( $label ) ); ?>

+
+ + + prepare_items(); + $list_table->search_box( esc_html__( 'Search', 'classifai' ), 'search-term' ); + $list_table->display(); + ?> +
prepare_items(); - $list_table->display(); } } @@ -1060,7 +1067,12 @@ public function merge_term() { $from = isset( $_GET['from'] ) ? absint( wp_unslash( $_GET['from'] ) ) : 0; $to_term = get_term( $to, $taxonomy ); $from_term = get_term( $from, $taxonomy ); - $redirect = add_query_arg( 'tax', $taxonomy, $this->setting_page_url ); + $args = [ + 'tax' => $taxonomy, + 's' => isset( $_GET['s'] ) ? sanitize_text_field( wp_unslash( $_GET['s'] ) ) : false, + 'paged' => isset( $_GET['paged'] ) ? absint( wp_unslash( $_GET['paged'] ) ) : false, + ]; + $redirect = add_query_arg( $args, $this->setting_page_url ); if ( empty( $taxonomy ) || empty( $to ) || empty( $from ) ) { $this->add_notice( @@ -1127,7 +1139,12 @@ public function skip_similar_term() { $taxonomy = isset( $_GET['taxonomy'] ) ? sanitize_text_field( wp_unslash( $_GET['taxonomy'] ) ) : ''; $term = isset( $_GET['term'] ) ? absint( wp_unslash( $_GET['term'] ) ) : 0; $similar_term = isset( $_GET['similar_term'] ) ? absint( wp_unslash( $_GET['similar_term'] ) ) : 0; - $redirect = add_query_arg( 'tax', $taxonomy, $this->setting_page_url ); + $args = [ + 'tax' => $taxonomy, + 's' => isset( $_GET['s'] ) ? sanitize_text_field( wp_unslash( $_GET['s'] ) ) : false, + 'paged' => isset( $_GET['paged'] ) ? absint( wp_unslash( $_GET['paged'] ) ) : false, + ]; + $redirect = add_query_arg( $args, $this->setting_page_url ); // SKip/Ignore the similar term. $term_meta = get_term_meta( $term, 'classifai_similar_terms', true ); From c90ed5108e46b0ce0e03958f2e7b8fe8b8ad635e Mon Sep 17 00:00:00 2001 From: Dharmesh Patel Date: Fri, 11 Oct 2024 17:31:48 +0530 Subject: [PATCH 14/15] Fix spacing issue. --- .../Classifai/Admin/SimilarTermsListTable.php | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/includes/Classifai/Admin/SimilarTermsListTable.php b/includes/Classifai/Admin/SimilarTermsListTable.php index c5cdff648..57c74eccb 100644 --- a/includes/Classifai/Admin/SimilarTermsListTable.php +++ b/includes/Classifai/Admin/SimilarTermsListTable.php @@ -69,7 +69,7 @@ public function prepare_items() { $columns = $this->get_columns(); $hidden = array(); $sortable = $this->get_sortable_columns(); - $search = isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : ''; + $search = isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : ''; // phpcs:ignore WordPress.Security.NonceVerification.Recommended $this->_column_headers = array( $columns, $hidden, $sortable ); @@ -91,8 +91,8 @@ public function prepare_items() { ) ); - $current = $this->get_pagenum(); - $offset = ( $current - 1 ) * $per_page; + $current = $this->get_pagenum(); + $offset = ( $current - 1 ) * $per_page; $terms = get_terms( [ @@ -154,8 +154,8 @@ public function generate_term_html( $term, $similar_term, $score = null ) { 'taxonomy' => $this->taxonomy, 'from' => $similar_term->term_id, 'to' => $term->term_id, - 'paged' => $this->get_pagenum(), - 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, + 'paged' => $this->get_pagenum(), + 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, // phpcs:ignore WordPress.Security.NonceVerification.Recommended ); $merge_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_merge_term' ) ); $score = $score ? ( $score > 1 ? $score - 1 : $score ) : ''; @@ -175,7 +175,8 @@ public function generate_term_html( $term, $similar_term, $score = null ) { esc_html( $term->name ), '' . esc_html( $term->term_id ) . '', esc_html( $term->slug ), - '' . esc_html( sprintf( _n( '%d time', '%d times', $term->count, 'classifai' ), $term->count ) ) . '', + // translators: %d: Term count. + '' . esc_html( sprintf( _n( '%d time', '%d times', $term->count, 'classifai' ), $term->count ) ) . '', esc_html( $term->parent > 0 ? get_term( $term->parent )->name : 'None' ), $score ? esc_html( round( $score * 100, 2 ) . '%' ) : '', esc_url( $merge_url ), @@ -223,7 +224,7 @@ public function column_actions( $item ) { 'term' => $term->term_id, 'similar_term' => $similar_term->term_id, 'paged' => $this->get_pagenum(), - 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, + 's' => isset( $_REQUEST['s'] ) ? sanitize_text_field( wp_unslash( $_REQUEST['s'] ) ) : false, // phpcs:ignore WordPress.Security.NonceVerification.Recommended ); $skip_url = add_query_arg( $args, wp_nonce_url( admin_url( 'admin-post.php' ), 'classifai_skip_similar_term' ) ); From bca8413cfbfee9e7a53d132d1102fb8f8fa7b68c Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Tue, 15 Oct 2024 13:41:26 -0600 Subject: [PATCH 15/15] Add in custom hooks before and after basic functionality runs, allowing third parties to hook in and do things like logging --- includes/Classifai/Features/TermCleanup.php | 61 +++++++++++++++++++++ includes/Classifai/TermCleanupScheduler.php | 12 ++++ 2 files changed, 73 insertions(+) diff --git a/includes/Classifai/Features/TermCleanup.php b/includes/Classifai/Features/TermCleanup.php index 2f82b2f15..64bbde51a 100644 --- a/includes/Classifai/Features/TermCleanup.php +++ b/includes/Classifai/Features/TermCleanup.php @@ -607,6 +607,18 @@ public function generate_embeddings( string $taxonomy ) { foreach ( $terms as $term_id ) { $result = $provider->generate_embeddings_for_term( $term_id, false, $this ); + /** + * Fires when an embedding is generated for a term. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_generate_embedding + * + * @param {int} $term_id ID of term. + * @param {array|WP_Error} $result Result of embedding generation. + * @param {TermCleanup} $this Feature instance. + */ + do_action( 'classifai_feature_term_cleanup_generate_embedding', $term_id, $result, $this ); + if ( is_wp_error( $result ) ) { return $result; } @@ -1096,6 +1108,18 @@ public function merge_term() { exit; } + /** + * Fires before terms are merged together. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_pre_merge_term + * + * @param {int} $from Term ID being merged. + * @param {int} $to Term ID we're merging into. + * @param {string} $taxonomy Taxonomy of terms being merged. + */ + do_action( 'classifai_feature_term_cleanup_pre_merge_term', $from, $to, $taxonomy ); + $ret = wp_delete_term( $from, $taxonomy, @@ -1105,6 +1129,19 @@ public function merge_term() { ) ); + /** + * Fires after terms are merged together. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_post_merge_term + * + * @param {int} $from Term ID being merged. + * @param {int} $to Term ID we're merging into. + * @param {string} $taxonomy Taxonomy of terms being merged. + * @param {bool|int|WP_Error} $ret Result of merge process. + */ + do_action( 'classifai_feature_term_cleanup_post_merge_term', $from, $to, $taxonomy, $ret ); + if ( is_wp_error( $ret ) ) { $this->add_notice( // translators: %s: Error message. @@ -1146,6 +1183,18 @@ public function skip_similar_term() { ]; $redirect = add_query_arg( $args, $this->setting_page_url ); + /** + * Fires before a term is skipped. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_pre_skip_term + * + * @param {int} $term Term ID being skipped. + * @param {int} $similar_term Term ID that matched. + * @param {string} $taxonomy Taxonomy of terms being merged. + */ + do_action( 'classifai_feature_term_cleanup_pre_skip_term', $term, $similar_term, $taxonomy ); + // SKip/Ignore the similar term. $term_meta = get_term_meta( $term, 'classifai_similar_terms', true ); if ( is_array( $term_meta ) && isset( $term_meta[ $similar_term ] ) ) { @@ -1157,6 +1206,18 @@ public function skip_similar_term() { } } + /** + * Fires after a term is skipped. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_post_skip_term + * + * @param {int} $term Term ID being skipped. + * @param {int} $similar_term Term ID that matched. + * @param {string} $taxonomy Taxonomy of terms being merged. + */ + do_action( 'classifai_feature_term_cleanup_post_skip_term', $term, $similar_term, $taxonomy ); + $this->add_notice( esc_html__( 'Skipped similar term.', 'classifai' ), 'success' diff --git a/includes/Classifai/TermCleanupScheduler.php b/includes/Classifai/TermCleanupScheduler.php index aab61fd74..618dad28e 100644 --- a/includes/Classifai/TermCleanupScheduler.php +++ b/includes/Classifai/TermCleanupScheduler.php @@ -94,6 +94,18 @@ public function run( array $item = [] ) { ); $res = $term_cleanup->get_similar_terms( $taxonomy, $thresold, $args ); + /** + * Fires when a batch of similar terms are calculated. + * + * @since x.x.x + * @hook classifai_feature_term_cleanup_get_similar_terms + * + * @param {array|bool|WP_Error} $res Response from the get_similar_terms method. + * @param {string} $taxonomy Taxonomy of terms we are comparing. + * @param {array} $args Arguments used for getting similar terms. + */ + do_action( 'classifai_feature_term_cleanup_get_similar_terms', $res, $taxonomy, $args ); + // Restore original user. wp_set_current_user( $original_user_id );