Created
April 23, 2020 09:53
-
-
Save anned20/e574db4fb93e2ece89a0301ace99b606 to your computer and use it in GitHub Desktop.
Convert a latin1 database to utf8mb4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This script can convert a latin1 database(s) to utf8mb4 and then actually convert the data too. | |
* | |
* Actually stolen from and tweaked a lot from | |
* @link https://gist.github.com/pobegov/f8b293fb6eb658a13feb1c318e6c07ed | |
* | |
* Disclaimer: This is a really _hacky_ script but it does the job. | |
* | |
* ===== | |
* USAGE | |
* ===== | |
* | |
* This script is not designed to be run more than once. | |
* | |
* This is designed to be use as a CLI script. | |
* | |
* Fill in the correct username, password and the database(s) you want to convert and it'll do the heavy lifting. | |
* | |
* It's probably a good idea to repair and optimize all tables afterwards from the command line using: | |
* mysqlcheck -u root -p --auto-repair --optimize --all-databases | |
* | |
* ========= | |
* IMPORTANT | |
* ========= | |
* | |
* Make sure your database has the following parameters set: | |
* innodb_file_per_table : ON | |
* innodb_large_prefix : ON | |
* innodb_file_format : Barracuda | |
* innodb_file_format_max : Barracuda | |
* innodb_default_row_format : dynamic | |
* | |
* You can verify this from the MySQL command line by using the following commands: | |
* SHOW VARIABLES LIKE "innodb_file_per_table"; | |
* SHOW VARIABLES LIKE "innodb_file_format%"; | |
* SHOW VARIABLES LIKE "innodb_large_prefix"; | |
* SHOW VARIABLES LIKE "innodb_default_row_format"; | |
* | |
* You can put these settings in your MySQL config like this: | |
* innodb_file_per_table=ON | |
* innodb_large_prefix=ON | |
* innodb_file_format=Barracuda | |
* innodb_default_row_format='DYNAMIC' | |
* | |
* Verify this script before running it, and make sure to BACKUP YOUR DATABASE BEFORE | |
* | |
* @author https://github.com/anned20 | |
*/ | |
$dsn = 'mysql:host=localhost;port=3306;charset=utf8'; | |
$user = 'sibben'; // @TODO CHANGE ME | |
$password = 'sibben'; // @TODO CHANGE ME | |
$options = [ | |
\PDO::ATTR_CURSOR => \PDO::CURSOR_FWDONLY, | |
\PDO::MYSQL_ATTR_USE_BUFFERED_QUERY => true, | |
\PDO::MYSQL_ATTR_INIT_COMMAND => 'SET CHARACTER SET latin1', | |
]; | |
$dbManager = new \PDO($dsn, $user, $password, $options); | |
// Databases to actually convert | |
$databasesToConvert = ['oz_dev']; | |
$typesToConvert = ['char', 'varchar', 'tinytext', 'mediumtext', 'text', 'longtext']; | |
// Disable foreign_key_checks for this session | |
$dbManager->exec('SET foreign_key_checks=0'); | |
/** | |
* Helper function to print a nice line | |
*/ | |
function line($msg = '', ...$vars) { | |
echo sprintf($msg, ...$vars).PHP_EOL; | |
}; | |
/** | |
* Helper function to handle any MySQL errors | |
*/ | |
function handlePossibleError($dbManager) { | |
$databaseErrors = $dbManager->errorInfo(); | |
if (!empty($databaseErrors) && reset($databaseErrors)[0] != 0) { | |
line('!!!!!!!!!!!!! ERROR OCCURED %s', print_r($databaseErrors, true)); | |
$dbManager->exec('SET foreign_key_checks=1'); | |
exit(1); | |
} | |
}; | |
foreach ($databasesToConvert as $database) { | |
line($database); | |
line(str_repeat('=', strlen($database) + 1)); | |
$dbManager->exec("USE `{$database}`"); | |
line('Converting database to correct locale'); | |
$dbManager->exec("ALTER DATABASE `{$database}` CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci"); | |
$tablesStatement = $dbManager->query('SHOW TABLES'); | |
while (($table = $tablesStatement->fetchColumn())) { | |
line('Table %s:', $table); | |
line(str_repeat('-', strlen($table) + 8)); | |
$columnsToConvert = []; | |
$columsStatement = $dbManager->query("DESCRIBE `{$table}`"); | |
while (($tableInfo = $columsStatement->fetch(\PDO::FETCH_ASSOC))) { | |
$column = $tableInfo['Field']; | |
$type = preg_replace('~\(\d+\)~', '', $tableInfo['Type']); | |
if (in_array($type, $typesToConvert)) { | |
$action = 'must be converted'; | |
$columnsToConvert[] = $column; | |
} else { | |
$action = 'not relevant'; | |
} | |
line('* %s: %s => %s', $column, $tableInfo['Type'], $action); | |
} | |
$convert = "ALTER TABLE `{$table}` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; ALTER TABLE `{$table}` ROW_FORMAT=DYNAMIC"; | |
line(); | |
line($convert); | |
$dbManager->exec($convert); | |
handlePossibleError($dbManager); | |
if (!empty($columnsToConvert)) { | |
$converts = array_map(function ($column) { | |
return "`{$column}` = CONVERT(BINARY(CONVERT(`{$column}` USING latin1)) USING utf8mb4)"; | |
}, $columnsToConvert); | |
$query = "UPDATE IGNORE `{$table}` SET ".implode(', ', $converts); | |
line(); | |
line($query); | |
$dbManager->exec($query); | |
handlePossibleError($dbManager); | |
} | |
line('--'); | |
line(); | |
} | |
} | |
$dbManager->exec('SET foreign_key_checks=1'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment