jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/342554 )
Change subject: Restore blanked addresses overwritten on merge. ...................................................................... Restore blanked addresses overwritten on merge. I feel like this is a pretty cautious approach to restoring the simplest cases where blank addresses overwrote real addresses. As this took a long time at low load I put it in a drush script for scheduling, rather than an upgrade script drush civicrm-repair-address Bug: T159408 Change-Id: Ia10d5095073afb652b552d9af5c36b8f204d1d90 --- A sites/all/modules/wmf_civicrm/scripts/civicrm_repair_blank_addresses.drush.inc M sites/all/modules/wmf_civicrm/tests/phpunit/MergeTest.php A sites/all/modules/wmf_civicrm/update_restore_addresses.php M sites/all/modules/wmf_civicrm/wmf_civicrm.module 4 files changed, 410 insertions(+), 1 deletion(-) Approvals: jenkins-bot: Verified Ejegg: Looks good to me, approved diff --git a/sites/all/modules/wmf_civicrm/scripts/civicrm_repair_blank_addresses.drush.inc b/sites/all/modules/wmf_civicrm/scripts/civicrm_repair_blank_addresses.drush.inc new file mode 100644 index 0000000..6591ab9 --- /dev/null +++ b/sites/all/modules/wmf_civicrm/scripts/civicrm_repair_blank_addresses.drush.inc @@ -0,0 +1,80 @@ +<?php + +/** +* Implementation of hook_drush_command() +*/ +function civicrm_repair_blank_addresses_drush_command() { + $items = array(); + $items['civicrm-repair-address'] = array( + 'description' => 'Repair address lost on blank merging over a valid address', + 'options' => array( + 'batch' => "Batch size", + 'threshold' => 'Threshold for aborting. If there are more than this number of contributions in the threshold period then abort.', + 'threshold_period' => 'Number of minutes in the threshold period', + ), + ); + + return $items; +} + +/** + * Implementation of hook_drush_help(). + */ +function civicrm_repair_blank_addresses_drush_help($section) { + switch ( $section ) { + case 'drush:civicrm-repair-address': + return dt('Repair addresses where blank emails have merged over real emails'); + } +} + +/** + * Repair addresses on a bunch of contacts. + * + * We are rolling back a situation where a contact with a blank address was merged into + * a contact with valid address data, and overwrote the valid data. Blank address data + * was common until early 2017. + * + * On staging this script took a very long time so moving to drush so it can run in batches. + * + * @throws \CiviCRM_API3_Exception + */ +function drush_civicrm_repair_blank_addresses_civicrm_repair_address() { + module_invoke('civicrm', 'initialize'); + + // This threshold stuff is a bit cheaty since I'm leveraging another drush file function. + // OTOH this should be a temporary script & maybe no-one will spot it.... + require_once 'civicrm_merge.drush.inc'; + $threshold = (int) drush_get_option('threshold'); + if ($threshold) { + $thresholdNumberOfMinutes = (int) drush_get_option('threshold_period', 5); + if (_drush_civicrm_queue_is_backed_up($threshold, $thresholdNumberOfMinutes)) { + return; + } + } + + $startVariableName = 'civicrm_repair_address_last_processed_id'; + $start = variable_get($startVariableName, 1); + $batch_size = (integer) drush_get_option('batch', 5000); + $maxAffectedID = CRM_Core_DAO::singleValueQuery(" + SELECT max(id) FROM civicrm_address + WHERE street_address IS NULL + AND city IS NULL + AND postal_code IS NULL + AND state_province_id IS NULL + AND country_id IS NULL + AND id > $start + "); + + if (empty($maxAffectedID)) { + watchdog('civicrm_repair_address', 'No more addresses to process'); + return; + } + + watchdog('civicrm_repair_address', 'Repairing up to %batch addresses from address id %start', array('%start' => $start, '%batch' => $batch_size), WATCHDOG_INFO); + require_once __DIR__ . '/../update_restore_addresses.php'; + $highestRepairedID = repair_lost_addresses_batch($batch_size, $start); + + variable_set($startVariableName, $highestRepairedID); + watchdog('civicrm_repair_address', 'Repaired address range: %start to %end', array('%start' => $start, '%end' => $highestRepairedID), WATCHDOG_INFO); + drush_print("Processed id range $start to " . $highestRepairedID); +} diff --git a/sites/all/modules/wmf_civicrm/tests/phpunit/MergeTest.php b/sites/all/modules/wmf_civicrm/tests/phpunit/MergeTest.php index 5263d4f..7e449c1 100644 --- a/sites/all/modules/wmf_civicrm/tests/phpunit/MergeTest.php +++ b/sites/all/modules/wmf_civicrm/tests/phpunit/MergeTest.php @@ -1226,4 +1226,147 @@ return $contact['id']; } + /** + * Test recovery where a blank email has overwritten a non-blank email on merge. + * + * In this case an email existed during merge that held no data. It was used + * on the merge, but now we want the lost data. + */ + public function testRepairBlankedAddressOnMerge() { + $this->prepareForBlankAddressTests(); + $this->replicateBlankedAddress(); + + $address = $this->callAPISuccessGetSingle('Address', array('contact_id' => $this->contactID)); + $this->assertTrue(empty($address['street_address'])); + + wmf_civicrm_fix_blanked_address($address['id']); + $address = $this->callAPISuccessGetSingle('Address', array('contact_id' => $this->contactID)); + $this->assertEquals('25 Mousey Way', $address['street_address']); + + $this->cleanupFromBlankAddressRepairTests(); + } + + /** + * Test recovery where an always-blank email has been transferred to another contact on merge. + * + * We have established the address was always blank and still exists. Lets + * anihilate it. + */ + public function testRemoveEternallyBlankMergedAddress() { + $this->prepareForBlankAddressTests(); + + $this->replicateBlankedAddress(array( + 'street_address' => NULL, + 'country_id' => NULL, + 'location_type_id' => 'Main', + )); + + $address = $this->callAPISuccessGetSingle('Address', array('contact_id' => $this->contactID)); + $this->assertTrue(empty($address['street_address'])); + + wmf_civicrm_fix_blanked_address($address['id']); + $address = $this->callAPISuccess('Address', 'get', array('contact_id' => $this->contactID)); + $this->assertEquals(0, $address['count']); + + $this->cleanupFromBlankAddressRepairTests(); + } + + /** + * Test recovery where a secondary always-blank email has been transferred to another contact on merge. + * + * We have established the address was always blank and still exists, and there is + * a valid other address. Lets annihilate it. + */ + public function testRemoveEternallyBlankNonPrimaryMergedAddress() { + $this->prepareForBlankAddressTests(); + $this->createContributions(); + + $this->callAPISuccess('Address', 'create', array( + 'street_address' => '25 Mousey Way', + 'country_id' => 'US', + 'contact_id' => $this->contactID, + 'location_type_id' => 'Main', + )); + $this->callAPISuccess('Address', 'create', array( + 'street_address' => 'something', + 'contact_id' => $this->contactID2, + 'location_type_id' => 'Main', + )); + $this->callAPISuccess('Address', 'create', array( + 'street_address' => '', + 'contact_id' => $this->contactID2, + 'location_type_id' => 'Main', + )); + $this->callAPISuccess('Job', 'process_batch_merge', array('mode' => 'safe')); + + $address = $this->callAPISuccess('Address', 'get', array('contact_id' => $this->contactID, 'sequential' => 1)); + $this->assertEquals(2, $address['count']); + $this->assertTrue(empty($address['values'][1]['street_address'])); + + wmf_civicrm_fix_blanked_address($address['values'][1]['id']); + $address = $this->callAPISuccessGetSingle('Address', array('contact_id' => $this->contactID)); + $this->assertEquals('something', $address['street_address']); + + $this->cleanupFromBlankAddressRepairTests(); + } + + /** + * Replicate the merge that would result in a blanked address. + * + * @param array $overrides + */ + protected function replicateBlankedAddress($overrides = array()) { + $this->createContributions(); + $this->callAPISuccess('Address', 'create', array_merge(array( + 'street_address' => '25 Mousey Way', + 'country_id' => 'US', + 'contact_id' => $this->contactID, + 'location_type_id' => 'Main', + ), $overrides)); + $this->callAPISuccess('Address', 'create', array( + 'street_address' => NULL, + 'contact_id' => $this->contactID2, + 'location_type_id' => 'Main', + )); + $this->callAPISuccess('Job', 'process_batch_merge', array('mode' => 'safe')); + } + + /** + * Common code for testing blank address repairs. + */ + protected function prepareForBlankAddressTests() { + civicrm_api3('Setting', 'create', array( + 'logging_no_trigger_permission' => 0, + )); + civicrm_api3('Setting', 'create', array('logging' => 1)); + + CRM_Core_DAO::executeQuery('DROP TABLE IF EXISTS blank_addresses'); + require_once __DIR__ . '/../../wmf_civicrm.install'; + require_once __DIR__ . '/../../update_restore_addresses.php'; + wmf_civicrm_update_7475(); + } + + protected function cleanupFromBlankAddressRepairTests() { + CRM_Core_DAO::executeQuery('DROP TABLE blank_addresses'); + + civicrm_api3('Setting', 'create', array( + 'logging_no_trigger_permission' => 1, + )); + } + + protected function createContributions() { + $this->contributionCreate(array( + 'contact_id' => $this->contactID, + 'receive_date' => '2010-01-01', + 'invoice_id' => 1, + 'trxn_id' => 1 + )); + $this->contributionCreate(array( + 'contact_id' => $this->contactID2, + 'receive_date' => '2012-01-01', + 'invoice_id' => 2, + 'trxn_id' => 2 + )); + } + } diff --git a/sites/all/modules/wmf_civicrm/update_restore_addresses.php b/sites/all/modules/wmf_civicrm/update_restore_addresses.php new file mode 100644 index 0000000..e8f1089 --- /dev/null +++ b/sites/all/modules/wmf_civicrm/update_restore_addresses.php @@ -0,0 +1,186 @@ +<?php +/** + * Fix data loss through blanked address + * + * @param int $batch + * How many to process. + * @param int $start + * Number to start at. + * + * @return int + * The number to start at for the next batch. + */ +function repair_lost_addresses_batch($batch = 5000, $start = 0) { + civicrm_initialize(); + $result = CRM_Core_DAO::executeQuery(" + SELECT id FROM civicrm_address + WHERE street_address IS NULL + AND city IS NULL + AND postal_code IS NULL + AND state_province_id IS NULL + AND country_id IS NULL + AND id > $start + LIMIT $batch"); + while ($result->fetch()) { + wmf_civicrm_fix_blanked_address($result->id); + } + return $result->id; +} + +/** + * Fix data loss through blanked address for an individual address. + * + * We have a situation where a number of addresses have been overwritten by blank addresses. + * + * Unfortunately some of these have since been merged, resulting in some unravelling to be done. + * + * This update seeks to unravel ones with a single merge since then. + * + * @param int $addressID + */ +function wmf_civicrm_fix_blanked_address($addressID) { + $result = CRM_Core_DAO::executeQuery('SELECT * FROM log_civicrm_address WHERE id = %1 ORDER BY log_date', array( + 1 => array( + $addressID, + 'Int' + ) + )); + while ($result->fetch()) { + $logEntries[] = (array) $result; + } + + $dataFields = array( + 'street_address', + 'city', + 'postal_code', + 'state_province_id', + 'country_id', + 'supplemental_address_1', + 'supplemental_address_2', + 'supplemental_address_3', + 'county_id', + 'postal_code_suffix', + 'name' + ); + + // We want to make sure both the creation of the address and the update record + // don't hold any address data. Otherwise it is a more complex update. + foreach ($dataFields as $dataField) { + if (!empty($logEntries[0][$dataField]) || !empty($logEntries[1][$dataField])) { + // Not blank enough to process. + return; + } + } + // Fetch all the address changes that happened during this merge action. + $logs = civicrm_api3('Logging', 'get', array( + 'tables' => array('civicrm_address'), + 'log_conn_id' => $logEntries[1]['log_conn_id'] + )); + $updates = array(); + $deletedAddresses = array(); + foreach ($logs['values'] as $log) { + if ($log['action'] == 'Delete') { + $deletedAddresses[$log['id']][$log['field']] = $log['from']; + } + } + // Q. What do we do if more than one address was deleted during the merged? + // A. Chicken out. + if (count($deletedAddresses) > 1) { + return; + } + + /** + * Some checks / precautions. + * - how many times has this address been changed? + * - only 2? we are just dealing with a single address moved from one + * contact to another (double check that because we like belts & braces, red ones). + * - more than 2? ug complications. hide. + */ + if ( + count($logEntries) > 2 + || !in_array($logEntries[0]['log_action'], array('Insert', 'Initialization')) + || $logEntries[1]['log_action'] !== 'Update' + ) { + return; + } + + // We are specifically trying to handle merged data at this stage. + // May extend if we identify other patterns. + if ($logEntries[0]['contact_id'] == $logEntries[1]['contact_id']) { + return; + } + + // We definitely have a situation where a blank record was inserted & then, on merge + // was transferred to another contact. We have the log_conn_id & the new contact id. + // let's make sure the new contact has not since been deleted (merged) + $keptContact = civicrm_api3('Contact', 'get', array( + 'id' => $logEntries[1]['contact_id'], + 'sequential' => 1 + )); + if ($keptContact['count'] == 0 || !empty($keptContact['values'][0]['is_deleted'])) { + // subject to a subsequent merge & deleted, next round for these. + return; + } + + // If our address is NOT the primary + $addresses = civicrm_api3('Address', 'get', array( + 'contact_id' => $logEntries[1]['contact_id'], + )); + $emptyAddress = $addresses['values'][$addressID]; + $isPrimary = $emptyAddress['is_primary']; + if ($addresses['count'] > 1 && $isPrimary && !empty($deletedAddresses)) { + // Still being precautionary as we work through examples. Have concluded + // unchanged non-primary addresses are extraneous, not sure about primary. + // We still only revert or remove this address if it has been through a + // 'simple change'. + return; + } + + // Let's first establish if there was only one address deleted in the merge. + // if so, we're gonna get through this. If not, bottle out. + // Let's say... if OUR address is the primary and one address was deleted then + // we are dealing with a change to our address. + // If our address is not primary, but the deleted address is, it probably is + // not related to ours & we should just delete ours & not recover it. + // we are getting into diminishing returns with non-primaries.... + if (count($deletedAddresses) === 1) { + $deletedAddress = reset($deletedAddresses); + foreach ($deletedAddress as $fieldName => $value) { + if (in_array($fieldName, $dataFields) && ($isPrimary || $deletedAddress['is_primary'] === 0)) { + $updates[$fieldName] = $value; + } + } + } + else { + // Here is what we know about the record + // 1) it was inserted blank & it remains blank + // 2) it was transferred from one contact to another during the + // merge. + // But... was another address deleted to make way for it? + // If more than one address was deleted in this merge we will chicken out. + if (count($deletedAddresses) > 1) { + return; + } + $updates = array(); + } + + if (empty($updates)) { + // We are dealing with an address that was created blank & then merged + // to a contact without any loss of address data. + // Just get rid of the blank address. + civicrm_api3('Address', 'Delete', array('id' => $addressID)); + } + else { + $updates['id'] = $addressID; + civicrm_api3('Address', 'create', $updates); + } + + // Remove from the tracking table. + CRM_Core_DAO::executeQuery('DELETE FROM blank_addresses WHERE id = %1', array( + 1 => array( + $addressID, + 'Int' + ) + )); + +} diff --git a/sites/all/modules/wmf_civicrm/wmf_civicrm.module b/sites/all/modules/wmf_civicrm/wmf_civicrm.module index 64560a5..9fae35e 100644 --- a/sites/all/modules/wmf_civicrm/wmf_civicrm.module +++ b/sites/all/modules/wmf_civicrm/wmf_civicrm.module @@ -2875,7 +2875,7 @@ // Do we have any geo info for this address? if ( - $address['country_id'] == $usId && + !empty($address['country_id']) && $address['country_id']== $usId && !empty( $address['postal_code'] ) ) { $sql = "SELECT city, s.id AS state_id, latitude, longitude, timezone -- To view, visit https://gerrit.wikimedia.org/r/342554 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia10d5095073afb652b552d9af5c36b8f204d1d90 Gerrit-PatchSet: 15 Gerrit-Project: wikimedia/fundraising/crm Gerrit-Branch: master Gerrit-Owner: Eileen <emcnaugh...@wikimedia.org> Gerrit-Reviewer: Awight <awi...@wikimedia.org> Gerrit-Reviewer: Cdentinger <cdentin...@wikimedia.org> Gerrit-Reviewer: Eileen <emcnaugh...@wikimedia.org> Gerrit-Reviewer: Ejegg <eeggles...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits