Skip to content

Commit

Permalink
Add legacydedupefinder
Browse files Browse the repository at this point in the history
=====
Overview
====

This adds a new extension for existing dedupe finder behaviour, allowing us to support the
hook that is jammed in the middle of the dedupe finder code going forwards without
being blocked by it (I have an existing PR that cuts many hours off the
dedupe query but it is bending over backwards to work alongside the legacy
attempts to allow integration).

The new extension is enabled on install and on upgrade and there is no behaviour change unless
a site admin disables it. If they choose to do so then

1) the legacy hook & legacy reserved query wrangling will no longer be called
2) dedupe queries will speed up once the blocked code is also merged
#30591
  • Loading branch information
eileenmcnaughton committed Jan 2, 2025
1 parent 15ea0db commit c6314b1
Show file tree
Hide file tree
Showing 17 changed files with 1,095 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
!/ext/payflowpro
!/ext/ckeditor4
!/ext/legacycustomsearches
!/ext/legacydedupefinder
!/ext/civiimport
!/ext/civi_campaign
!/ext/civi_case
Expand Down
64 changes: 55 additions & 9 deletions CRM/Dedupe/BAO/DedupeRuleGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,19 @@
* @package CRM
* @copyright CiviCRM LLC https://civicrm.org/licensing
*/
use Civi\Core\HookInterface;

/**
* The CiviCRM duplicate discovery engine is based on an
* algorithm designed by David Strauss <[email protected]>.
*/
class CRM_Dedupe_BAO_DedupeRuleGroup extends CRM_Dedupe_DAO_DedupeRuleGroup {
class CRM_Dedupe_BAO_DedupeRuleGroup extends CRM_Dedupe_DAO_DedupeRuleGroup implements \Symfony\Component\EventDispatcher\EventSubscriberInterface {

public static function getSubscribedEvents(): array {
return [
'hook_civicrm_findExistingDuplicates' => ['hook_civicrm_findExistingDuplicates', -20],
];
}

/**
* @var array
Expand Down Expand Up @@ -131,6 +138,36 @@ public function tableDropQuery() {
return 'DROP TEMPORARY TABLE IF EXISTS dedupe';
}

/**
* Find existing duplicates in the database for the given rule and limitations.
*
* @return void
*/
public static function hook_civicrm_findExistingDuplicates(\Civi\Core\Event\GenericHookEvent $event) {
$ruleGroupIDs = $event->ruleGroupIDs;
$ruleGroup = new \CRM_Dedupe_BAO_DedupeRuleGroup();
$ruleGroup->id = reset($ruleGroupIDs);
$contactIDs = [];
$whereClauses = $event->whereClauses;
if (!empty($whereClauses)) {
foreach ($whereClauses as $whereClause) {
if ($whereClause[0] === 'id' && $whereClause[1] === 'IN') {
$contactIDs = $whereClause[2];
}
}
}
if (!$ruleGroup->fillTable($ruleGroup->id, $contactIDs, [], FALSE)) {
return;
}
$dao = \CRM_Core_DAO::executeQuery($ruleGroup->thresholdQuery($event->checkPermissions));
$duplicates = [];
while ($dao->fetch()) {
$duplicates[] = ['entity_id1' => $dao->id1, 'entity_id2' => $dao->id2, 'weight' => $dao->weight];
}
$event->duplicates = $duplicates;
\CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery());
}

/**
* Fill the dedupe finder table.
*
Expand All @@ -139,32 +176,41 @@ public function tableDropQuery() {
* @param int $id
* @param array $contactIDs
* @param array $params
* @param bool $legacyMode
* Legacy mode is called to give backward hook compatibility, in the legacydedupefinder
* extension. It is intended to be transitional, with the non-legacy mode being
* separated out and optimized once it no longer has to comply with the legacy
* hook and reserved query methodology.
*
* @return bool
* @throws \Civi\Core\Exception\DBQueryException
*/
public function fillTable(int $id, array $contactIDs, array $params): bool {
$this->contactIds = $contactIDs;
$this->params = $params;
public function fillTable(int $id, array $contactIDs, array $params, $legacyMode = TRUE): bool {
if ($legacyMode) {
$this->contactIds = $contactIDs;
$this->params = $params;
}
$this->id = $id;
// make sure we've got a fetched dbrecord, not sure if this is enforced
$this->find(TRUE);
$optimizer = new CRM_Dedupe_FinderQueryOptimizer($this->id, $contactIDs, $params);
// Reserved Rule Groups can optionally get special treatment by
// implementing an optimization class and returning a query array.
if ($optimizer->isUseReservedQuery()) {
if ($legacyMode && $optimizer->isUseReservedQuery()) {
$tableQueries = $optimizer->getReservedQuery();
}
else {
$tableQueries = $optimizer->getRuleQueries();
}
// if there are no rules in this rule group
// add an empty query fulfilling the pattern
if (!$tableQueries) {
// Just for the hook.... (which is deprecated).
$this->noRules = TRUE;
if ($legacyMode) {
if (!$tableQueries) {
// Just for the hook.... (which is deprecated).
$this->noRules = TRUE;
}
CRM_Utils_Hook::dupeQuery($this, 'table', $tableQueries);
}
CRM_Utils_Hook::dupeQuery($this, 'table', $tableQueries);
if (empty($tableQueries)) {
return FALSE;
}
Expand Down
13 changes: 4 additions & 9 deletions CRM/Dedupe/Finder.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class CRM_Dedupe_Finder {
* Return a contact_id-keyed array of arrays of possible dupes
* (of the key contact_id) - limited to dupes of $cids if provided.
*
* @deprecated avoid calling this function directly as it is likely to change.
*
* @param int $rgid
* Rule group id.
* @param array $cids
Expand All @@ -45,16 +47,9 @@ public static function dupes($rgid, $cids = [], $checkPermissions = TRUE) {
throw new CRM_Core_Exception('Dedupe rule not found for selected contacts');
}

if (!$rgBao->fillTable($rgid, $cids, [])) {
return [];
}
$dao = CRM_Core_DAO::executeQuery($rgBao->thresholdQuery($checkPermissions));
$dupes = [];
while ($dao->fetch()) {
$dupes[] = [$dao->id1, $dao->id2, $dao->weight];
}
CRM_Core_DAO::executeQuery($rgBao->tableDropQuery());

$where = empty($cids) ? [] : [['id', 'IN', $cids]];
CRM_Utils_Hook::findExistingDuplicates($dupes, [$rgid], $where, $checkPermissions);
return $dupes;
}

Expand Down
1 change: 1 addition & 0 deletions CRM/Upgrade/Incremental/php/FiveEightyTwo.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class CRM_Upgrade_Incremental_php_FiveEightyTwo extends CRM_Upgrade_Incremental_
*/
public function upgrade_5_82_alpha1($rev): void {
$this->addTask(ts('Upgrade DB to %1: SQL', [1 => $rev]), 'runSql', $rev);
$this->addSimpleExtensionTask(ts('enable dedupe backward compatibility'), ['legacydedupefinder']);
}

}
27 changes: 27 additions & 0 deletions CRM/Utils/Hook.php
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,33 @@ public static function findDuplicates($dedupeParams, &$dedupeResults, $contextPa
->invoke(['dedupeParams', 'dedupeResults', 'contextParams'], $dedupeParams, $dedupeResults, $contextParams, $null, $null, $null, 'civicrm_findDuplicates');
}

/**
* Check for existing duplicates in the database.
*
* This hook is called when
*
* @param array $duplicates
* Array of duplicate pairs found using the rule, with the weight.
* ['entity_id1' => 5, 'entity_id2' => 6, 'weight' => 7] where 5 & 6 are contact IDs and 7 is the weight of the match.
* @param int[] $ruleGroupIDs
* Array of rule group IDs.
* @param array $whereClauses
* Currently only ['id', 'IN', [5, 6, 7]] would be present.
* @param bool $checkPermissions
* @todo the existing implementation looks for situations where ONE of the contacts
* is consistent with the where clause criteria. Potentially we might
* implement a mode where both/all contacts must be consistent with the clause criteria.
* There is a use case for both scenarios - although core code currently only implements
* one.
*
* @return mixed
*/
public static function findExistingDuplicates(array &$duplicates, array $ruleGroupIDs, array $whereClauses, bool $checkPermissions) {
$null = NULL;
return self::singleton()
->invoke(['duplicates', 'ruleGroupIDs', 'whereClauses', 'checkPermissions'], $duplicates, $ruleGroupIDs, $whereClauses, $checkPermissions, $null, $null, 'civicrm_findExistingDuplicates');
}

/**
* This hook is called AFTER EACH email has been processed by the script bin/EmailProcessor.php
*
Expand Down
2 changes: 1 addition & 1 deletion bin/regen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ php GenerateData.php

## Prune local data
$MYSQLCMD -e "DROP TABLE IF EXISTS civicrm_install_canary; DELETE FROM civicrm_cache; DELETE FROM civicrm_setting;"
$MYSQLCMD -e "DELETE FROM civicrm_extension WHERE full_name NOT IN ('sequentialcreditnotes', 'eventcart', 'greenwich', 'org.civicrm.search_kit', 'org.civicrm.afform', 'authx', 'org.civicrm.flexmailer', 'financialacls', 'contributioncancelactions', 'recaptcha', 'ckeditor4', 'legacycustomsearches', 'civiimport', 'message_admin') and full_name NOT LIKE 'civi_%';"
$MYSQLCMD -e "DELETE FROM civicrm_extension WHERE full_name NOT IN ('sequentialcreditnotes', 'eventcart', 'greenwich', 'org.civicrm.search_kit', 'org.civicrm.afform', 'authx', 'org.civicrm.flexmailer', 'financialacls', 'contributioncancelactions', 'recaptcha', 'ckeditor4', 'legacycustomsearches', 'civiimport', 'message_admin', 'legacydedupefinder) and full_name NOT LIKE 'civi_%';"
TABLENAMES=$( echo "show tables like 'civicrm_%'" | $MYSQLCMD | grep ^civicrm_ | xargs )

cd $CIVISOURCEDIR/sql
Expand Down
File renamed without changes.
40 changes: 40 additions & 0 deletions ext/legacydedupefinder/Civi/LegacyFinder/Finder.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

namespace Civi\LegacyFinder;

use Civi\Core\Event\GenericHookEvent;

class Finder {

/**
* This function exists to provide legacy hook support for finding duplicates.
*
* @return void
*/
public static function findExistingDuplicates(GenericHookEvent $event) {
$event->stopPropagation();
$ruleGroupIDs = $event->ruleGroupIDs;
$ruleGroup = new \CRM_Dedupe_BAO_DedupeRuleGroup();
$ruleGroup->id = reset($ruleGroupIDs);
$contactIDs = [];
$whereClauses = $event->whereClauses;
if (!empty($whereClauses)) {
foreach ($whereClauses as $whereClause) {
if ($whereClause[0] === 'id' && $whereClause[1] === 'IN') {
$contactIDs = $whereClause[2];
}
}
}
if (!$ruleGroup->fillTable($ruleGroup->id, $contactIDs, [])) {
return;
}
$dao = \CRM_Core_DAO::executeQuery($ruleGroup->thresholdQuery($event->checkPermissions));
$duplicates = [];
while ($dao->fetch()) {
$duplicates[] = ['entity_id1' => $dao->id1, 'entity_id2' => $dao->id2, 'weight' => $dao->weight];
}
$event->duplicates = $duplicates;
\CRM_Core_DAO::executeQuery($ruleGroup->tableDropQuery());
}

}
Loading

0 comments on commit c6314b1

Please sign in to comment.