From 576dbb595e4960497da650762c5ff03a7e0ed0f6 Mon Sep 17 00:00:00 2001 From: Greg Anderson Date: Wed, 11 Aug 2010 04:40:56 +0000 Subject: [PATCH] #861822 by greg.1.anderson: Add an option to sanitize email addresses and passwords from user table post sql-sync. --- commands/sql/sql.drush.inc | 72 ++++++++++++- commands/sql/sync.sql.inc | 209 +++++++++++++++++++++++++++---------- drush.api.php | 21 ++++ includes/drush.inc | 32 +++++- includes/sitealias.inc | 13 +++ 5 files changed, 283 insertions(+), 64 deletions(-) diff --git a/commands/sql/sql.drush.inc b/commands/sql/sql.drush.inc index 650bf37812..1ef3a62763 100644 --- a/commands/sql/sql.drush.inc +++ b/commands/sql/sql.drush.inc @@ -115,6 +115,9 @@ function sql_drush_command() { '--create-db' => 'Create a new database before importing the database dump on the target machine.', '--db-su' => 'Account to use when creating a new database. Optional.', '--db-su-pw' => 'Password for the "db-su" account. Optional.', + '--sanitize' => 'Obscure email addresses and reset passwords in the user table post-sync. Optional.', + ' --sanitize-password' => 'The password to assign to all accounts in the sanitization operation, or "no" to keep passwords unchanged. Default is "password".', + ' --sanitize-email' => 'The username for test email addresses in the sanitization operation, or "no" to keep email addresses unchanged. May contain replacement patterns %uid, %mail or %login. Default is "user+%uid@localhost".', ), ); if (drush_drupal_major_version() >= 7) { @@ -214,7 +217,7 @@ function drush_sql_build_dump_command($tabel_selection, $db_spec = NULL) { $skip_tables = $tabel_selection['skip']; $structure_tables = $tabel_selection['structure']; $tables = $tabel_selection['tables']; - + $ignores = array(); $skip_tables += $structure_tables; $data_only = drush_get_option('data-only'); @@ -314,7 +317,7 @@ function drush_sql_build_dump_command($tabel_selection, $db_spec = NULL) { * specified. * * @param option_name - * The option name to check: skip-tables, structure-tables + * The option name to check: skip-tables, structure-tables * or tables. This funciton will check both *-key and *-list, * and, in the case of sql-sync, will also check target-* * and source-*, to see if an alias set one of these options. @@ -344,7 +347,7 @@ function _drush_sql_get_table_list($option_name) { } } } - + return array(); } @@ -385,11 +388,16 @@ function _drush_sql_query($query, $db_spec = NULL) { $exec = 'psql'; $exec .= _drush_sql_get_credentials($db_spec); $exec .= (drush_get_context('DRUSH_VERBOSE') ? '' : ' -q'); - $exec .= ' ' . (drush_get_option('extra') ? drush_get_option('extra') : "--no-align --field-separator=$'\t' --pset footer=off"); + $exec .= ' ' . (drush_get_option('extra') ? drush_get_option('extra') : "--no-align --field-separator='\t' --pset footer=off"); $exec .= " --file $file"; break; } - + // In --simulate mode, drush_op will show the call to mysql or psql, + // but the sql query itself is stored in a temp file and not displayed. + // We will therefore show the query explicitly in the interest of full disclosure. + if (drush_get_context('DRUSH_SIMULATE')) { + drush_print('sql-query: ' . $query); + } $return = drush_op('system', $exec) !== FALSE; return $return; } @@ -557,3 +565,57 @@ function _drush_sql_get_invalid_url_msg($db_spec = NULL) { return dt('Unable to parse DB connection array'); } } + +/** + * Call from a pre-sql-sync hook to register an sql + * query to be executed in the post-sql-sync hook. + * @see drush_sql_pre_sql_sync() and @see drush_sql_post_sql_sync(). + * + * @param $id + * String containing an identifier representing this + * operation. This id is not actually used at the + * moment, it is just used to fufill the contract + * of drush contexts. + * @param $message + * String with the confirmation message that describes + * to the user what the post-sync operation is going + * to do. This confirmation message is printed out + * just before the user is asked whether or not the + * sql-sync operation should be continued. + * @param $query + * String containing the sql query to execute. If no + * query is provided, then the confirmation message will + * be displayed to the user, but no action will be taken + * in the post-sync hook. This is useful for drush modules + * that wish to provide their own post-sync hooks to fix + * up the target database in other ways (e.g. through + * Drupal APIs). + */ +function drush_sql_register_post_sync_op($id, $message, $query = NULL) { + $options = drush_get_context('post-sync-ops'); + + $options[$id] = array('message' => $message, 'query' => $query); + + drush_set_context('post-sync-ops', $options); +} + +/** + * Builds a confirmation message for all post-sync operations. + * + * @return string + * All post-sync operation messages concatenated together. + */ +function _drush_sql_get_post_sync_messages() { + $messages = FALSE; + + $options = drush_get_context('post-sync-ops'); + if (!empty($options)) { + $messages = dt('The following post-sync operations will be done on the destination:') . "\n"; + + foreach($options as $id => $data) { + $messages .= " * " . $data['message'] . "\n"; + } + } + + return $messages; +} diff --git a/commands/sql/sync.sql.inc b/commands/sql/sync.sql.inc index ce7e3c0d9c..e965509cae 100644 --- a/commands/sql/sync.sql.inc +++ b/commands/sql/sync.sql.inc @@ -3,41 +3,16 @@ require_once DRUSH_BASE_PATH . '/commands/core/rsync.core.inc'; -function drush_sql_sync($source = NULL, $destination = NULL) { - $source_database = drush_get_option('source-database', 'default'); - $source_target = drush_get_option('source-target'); - $target_database = drush_get_option('target-database', 'default'); - $target_target = drush_get_option('target-target'); - - // - // If the destination was not explicitly set, but a particular - // target database was specified on the command line, then we - // will implicitly assume that the destination alias is the - // same as the source alias. - // - if (!isset($destination) && (isset($target_database) || (isset($target_target)))) { - $destination = $source; - } - - // - // If there is no destination specification, then exit. - // - if (!isset($destination)) { - drush_print(dt("You must specify a destination target.")); - exit(1); - } - // - // Default branch: copy 'sync' with the specified source - // and destination. - // - else { - _drush_sql_sync($source, $destination, TRUE); - } -} - -function _drush_sql_sync($source, $destination, $show_warning = TRUE) { +/** + * Sql sync validate function. Look up the version of the + * Drupal site being sync'ed. At least one of the sites + * must be local for this to work; if both sites are remote, + * then $major_version will be FALSE. + */ +function drush_sql_sync_validate($source = NULL, $destination = NULL) { // Preflight destination in case it defines the alias used by the source _drush_sitealias_get_record($destination); + // After preflight, get source and destination settings $source_settings = drush_sitealias_get_record($source); $destination_settings = drush_sitealias_get_record($destination); @@ -50,6 +25,98 @@ function _drush_sql_sync($source, $destination, $show_warning = TRUE) { sitealias_get_databases_from_record($source_settings); sitealias_get_databases_from_record($destination_settings); + // Cache what version of Drupal we're syncing. + // Note that the version is cached in drush.inc; just + // call drush_drupal_version or drush_drupal_major_version + // to retrieve it later. + $major_version = drush_drupal_major_version_of_site(array($source_settings, $destination_settings)); + + return TRUE; +} + +/** + * Pre sql sync function. This hook function will sanitize usernames and + * passwords in the user table when the --sanitize option is used. It is + * also an example of how to write a database sanitizer for sql sync. + * + * To write your own sync hook function, define drush_mymodule_pre_sql_sync() + * and follow the form of this function to add your own database + * sanitization operations via the register post-sync op function; + * @see drush_sql_register_post_sync_op(). This is the only thing that the + * sync hook function needs to do; sql-sync takes care of the rest. + * + * The function below has a lot of logic to process user preferences and + * generate the correct SQL regardless of whether Postgres, Mysql, + * Drupal 6 or Drupal 7 is in use. A simpler sanitize function that + * always used default values and only worked with Drupal 6 + mysql + * appears in the drush.api.php. @see drush_hook_pre_sql_sync(). + */ +function drush_sql_pre_sql_sync($source = NULL, $destination = NULL) { + $source_settings = drush_sitealias_get_record($source); + $destination_settings = drush_sitealias_get_record($destination); + $user_table_updates = array(); + $message_list = array(); + + // Test to see if 'sanitize' option was specified. + if (drush_get_option(array('sanitize', 'destination-sanitize'), FALSE)) { + // Sanitize email addresses + $newpassword = drush_get_option(array('sanitize-password', 'destination-sanitize-password'), 'password'); + if ($newpassword != 'no') { + $major_version = drush_drupal_major_version(); + $pw_op = ""; + + // In Drupal 6, passwords are hashed via the MD5 algorithm. + if ($major_version == 6) { + $pw_op = "MD5('$newpassword')"; + } + // In Drupal 7, passwords are hashed via a more complex algorithm, + // available via the user_hash_password function. + elseif ($major_version >= 7) { + $drupal_root = sitealias_find_local_drupal_root(array($source_settings, $destination_settings)); + if (isset($drupal_root)) { + include_once $drupal_root . '/includes/password.inc'; + include_once $drupal_root . '/includes/bootstrap.inc'; + $hash = user_hash_password($newpassword); + $pw_op = "'$hash'"; + } + } + if (!empty($pw_op)) { + $user_table_updates[] = "pass = $pw_op"; + $message_list[] = "passwords"; + } + } + + // Sanitize passwords + $newemail = drush_get_option(array('sanitize-email', 'destination-sanitize-email'), 'user+%uid@localhost'); + if ($newemail != 'no') { + if (strpos($newemail, '%') !== FALSE) { + // We need a different sanitization query for Postgres and Mysql + $db_driver = $destination_settings['databases']['default']['default']['driver']; + if ($db_driver == 'pgsql') { + $email_map = array('%uid' => "' || uid || '", '%mail' => "' || replace(mail, '@', '_') || '", '%login' => "' || replace(login, ' ', '_') || '"); + $newmail = "'" . str_replace(array_keys($email_map), array_values($email_map), $newemail) . "'"; + } + else { + $email_map = array('%uid' => "', uid, '", '%mail' => "', replace(mail, '@', '_'), '", '%login' => "', replace(login, ' ', '_'), '"); + $newmail = "concat('" . str_replace(array_keys($email_map), array_values($email_map), $newemail) . "')"; + } + } + $user_table_updates[] = "mail = $newmail"; + $message_list[] = 'email addresses'; + } + + if (!empty($user_table_updates)) { + $sanitize_query = "update users set " . implode(', ', $user_table_updates) . " where uid > 0;"; + drush_sql_register_post_sync_op('user-email', dt('Reset !message in user table', array('!message' => implode(' and ', $message_list))), $sanitize_query); + } + } +} + + +function drush_sql_sync($source = NULL, $destination = NULL) { + $source_settings = drush_sitealias_get_record($source); + $destination_settings = drush_sitealias_get_record($destination); + // Check to see if this is an sql-sync multiple command (multiple sources and multiple destinations) $is_multiple = drush_do_multiple_command('sql-sync', $source_settings, $destination_settings); @@ -165,44 +232,53 @@ function _drush_sql_sync($source, $destination, $show_warning = TRUE) { } // Prompt for confirmation. This is destructive. - if (!drush_get_context('DRUSH_SIMULATE') && $show_warning) { + if (!drush_get_context('DRUSH_SIMULATE')) { + // Check to see if we are using a temporary file in a situation + // where the user did not specify "--temp". + if (($source_is_tmp || $target_is_tmp) && (!isset($use_temp_files)) && (isset($source_db_url['remote-host']) || isset($target_db_url['remote-host']))) { + drush_print(dt('WARNING: Using temporary files to store and transfer sql-dump. It is recommended that you specify --source-dump and --target-dump options on the command line, or set \'%dump\' in the path-aliases section of your site alias records. This facilitates fast file transfer via rsync.')); + } + + + if (array_key_exists('tables', $table_selection) && (count($table_selection['tables']) > 0)) { + drush_print(); + drush_print(dt(' Only the following tables will be transferred: !list', array('!list' => implode(',', $table_selection['tables'])))); + } + elseif (!empty($table_selection)) { + $skip_tables_list = implode(',', $table_selection['skip'] + $table_selection['structure']); + if(!empty($skip_tables_list)) { + drush_print(); + drush_print(dt(' The following tables will be skipped: !list', array('!list' => $skip_tables_list))); + } + } + + // If any sanitization operations are to be done, then get the + // sanitization messages and print them as part of the confirmation. + $messages = _drush_sql_get_post_sync_messages(); + if ($messages) { + drush_print(); + drush_print($messages); + } // If there are multiple destinations, then // prompt once here and suppress the warning message // and the normal confirmation below. if (array_key_exists('site-list', $destination_settings)) { + drush_print(); drush_print(dt('You are about to sync the database from !source, overwriting all of the following targets:', array('!source' => $source))); foreach ($destination_settings['site-list'] as $one_destination) { drush_print(dt(' !target', array('!target' => $one_destination))); } - drush_print(); } else { - // Check to see if we are using a temporary file in a situation - // where the user did not specify "--temp". - if (($source_is_tmp || $target_is_tmp) && (!isset($use_temp_files)) && (isset($source_db_url['remote-host']) || isset($target_db_url['remote-host']))) { - drush_print(dt('WARNING: Using temporary files to store and transfer sql-dump. It is recommended that you specify --source-dump and --target-dump options on the command line, or set \'%dump\' in the path-aliases section of your site alias records. This facilitates fast file transfer via rsync.')); - } - + drush_print(); $txt_source = (isset($source_db_url['remote-host']) ? $source_db_url['remote-host'] . '/' : '') . $source_db_url['database']; $txt_destination = (isset($target_db_url['remote-host']) ? $target_db_url['remote-host'] . '/' : '') . $target_db_url['database']; drush_print(dt("You will destroy data from !target and replace with data from !source.", array('!source' => $txt_source, '!target' => $txt_destination))); - drush_print(); - } - - if (array_key_exists('tables', $table_selection) && (count($table_selection['tables']) > 0)) { - drush_print(dt(' Only the following tables will be transferred: !list', array('!list' => implode(',', $table_selection['tables'])))); - drush_print(); - } - elseif (!empty($table_selection)) { - $skip_tables_list = implode(',', $table_selection['skip'] + $table_selection['structure']); - if(!empty($skip_tables_list)) { - drush_print(dt(' The following tables will be skipped: !list', array('!list' => $skip_tables_list))); - drush_print(); - } } // TODO: actually make the backup if desired. + drush_print(); drush_print(dt("You might want to make a backup first, using sql_dump command.\n")); if (!drush_confirm(dt('Do you really want to continue?'))) { drush_die('Aborting.'); @@ -342,3 +418,28 @@ function _drush_sql_sync($source, $destination, $show_warning = TRUE) { } } } + +/** + * Apply all post-sync operations that were registered in any pre-sync hook. + * Follow the pattern of this function to make your own post-sync hook. + * If changing the database, be sure to also include a pre-sync hook to + * notify the user of the change that will be made. @see drush_sql_pre_sql_sync(). + */ +function drush_sql_post_sql_sync($source = NULL, $destination = NULL) { + $options = drush_get_context('post-sync-ops'); + if (!empty($options)) { + $destination_settings = drush_sitealias_get_record($destination); + $sanitize_query = ''; + foreach($options as $id => $data) { + $sanitize_query .= $data['query'] . " "; + } + if ($sanitize_query) { + if (!drush_get_context('DRUSH_SIMULATE')) { + $result = drush_do_site_command($destination_settings, "sql-query", array($sanitize_query)); + } + else { + drush_print("Executing on $destination: $sanitize_query"); + } + } + } +} diff --git a/drush.api.php b/drush.api.php index 2d9c1d8244..1659859f9c 100644 --- a/drush.api.php +++ b/drush.api.php @@ -152,6 +152,27 @@ function hook_drush_pm_adjust_download_destination(&$project, $release) { } } +/** + * Post-sync sanitization example. This is equivalent to + * the built-in --sanitize option of sql-sync, but simplified + * to only work with default values on Drupal 6 + mysql. + * + * We test for both 'my-sanitize' and 'destination-my-sanitize' + * options because we want to allow options set in a site-alias + * to control the post-sync operations. The options from the + * destination alias are applied to the drush options context + * with the prefix 'destination-'. + * + * @see drush_sql_pre_sql_sync(). + */ +function drush_hook_pre_sql_sync($source = NULL, $destination = NULL) { + if (drush_get_option(array('my-sanitize', 'destination-my-sanitize'), FALSE)) { + drush_sql_register_post_sync_op('my-sanitize-id', + dt('Reset passwords and email addresses in user table', + "update users set pass = MD5('password'), mail = concat('user+', uid, '@localhost') where uid > 0;"); + } +} + /** * @} End of "addtogroup hooks". */ diff --git a/includes/drush.inc b/includes/drush.inc index a7b5aaeb75..b16c522dd0 100644 --- a/includes/drush.inc +++ b/includes/drush.inc @@ -201,11 +201,11 @@ function drush_include_engine($type, $engine, $version = NULL, $path = NULL) { * A string containing the version number of the current * Drupal installation, if any. Otherwise, return FALSE. */ -function drush_drupal_version() { +function drush_drupal_version($drupal_root = NULL) { static $version = FALSE; if (!$version) { - if ($drupal_root = drush_get_context('DRUSH_DRUPAL_ROOT')) { + if (($drupal_root != NULL) || ($drupal_root = drush_get_context('DRUSH_DRUPAL_ROOT'))) { // D7 stores VERSION in bootstrap.inc $version_constant_paths = array('/modules/system/system.module', '/includes/bootstrap.inc'); foreach ($version_constant_paths as $path) { @@ -233,9 +233,9 @@ function drush_drupal_cache_clear_all() { /** * Returns the Drupal major version number (5, 6, 7 ...) */ -function drush_drupal_major_version() { +function drush_drupal_major_version($drupal_root = NULL) { $major_version = FALSE; - if ($version = drush_drupal_version()) { + if ($version = drush_drupal_version($drupal_root)) { $version_parts = explode('.', $version); if (is_numeric($version_parts[0])) { $major_version = (integer)$version_parts[0]; @@ -244,6 +244,28 @@ function drush_drupal_major_version() { return $major_version; } +/** + * Return the major version of one of the local sites in + * a list. Usually there will only be two sites: a source + * and a destination, one of which might be remote. Both + * of these should always be the same version, because they + * should be variants of the same site (e.g. dev and live). + * + * @param $site_list + * Array of site alias records + * @return + * Drush major version. + */ +function drush_drupal_major_version_of_site($site_list) { + $major_version = FALSE; + $drupal_root = sitealias_find_local_drupal_root($site_list); + if (isset($drupal_root)) { + $major_version = drush_drupal_major_version($drupal_root); + } + + return $major_version; +} + /** * Replace named placeholders in a WHERE snippet. * @@ -251,7 +273,7 @@ function drush_drupal_major_version() { * with named placeholders in code for Drupal 5 and 6. * * @param $where - * Stringwith a WHERE snippet using named placeholders. + * String with a WHERE snippet using named placeholders. * @param $args * Array of placeholder values. * @return diff --git a/includes/sitealias.inc b/includes/sitealias.inc index 737e457058..4f2e53469d 100644 --- a/includes/sitealias.inc +++ b/includes/sitealias.inc @@ -1507,3 +1507,16 @@ function drush_sitealias_evaluate_path($path, &$additional_options) { function drush_sitealias_site_selection_keys() { return array('remote-host', 'remote-user', 'ssh-options', 'name'); } + + +function sitealias_find_local_drupal_root($site_list) { + $drupal_root = NULL; + + foreach ($site_list as $site) { + if (($drupal_root == NULL) && (array_key_exists('root', $site) && !array_key_exists('remote-host', $site))) { + $drupal_root = $site['root']; + } + } + + return $drupal_root; +}