fixing database character sets

This commit is contained in:
Graham Knop 2008-04-03 14:53:33 +00:00
parent 5e6b0e0b2e
commit db6a8f28a9
4 changed files with 74 additions and 106 deletions

View file

@ -1,7 +1,6 @@
7.5.9
- fixed: Collaboration System attachments follow site's max size instead of CS's
- fixed: Rich editor image uploader doesn't follow size limits
- make sure all tables are using UTF-8
- fixed: Poll question/answers with international text corrupted
- fixed: Thingy: on demo sites the edit and delete icons in search results have wrong url's
- fixed: Form::FieldType->getTypes should exclude Form::Slider base class (Yung Han Khoe)
@ -70,6 +69,7 @@
- fixed: events get start/end time even when none specified (also can offset start/end day)
- fixed: event related links may not work with some group names
- Use UTF8 for database connection when connecting to MySQL
- Convert all database tables to UTF8, repairing erroneous data
- Internationalized Calendar templates
- fixed: exporting as HTML leaks sessions for inaccessible assets,
- new YUI based date picker

View file

@ -7,7 +7,18 @@ upgrading from one version to the next, or even between multiple
versions. Be sure to heed the warnings contained herein as they will
save you many hours of grief.
7.5.1
7.5.9
--------------------------------------------------------------------
* WebGUI 7.5.6 uses a Unicode database connection, but this can cause problems
with old data stored in an erroneous format. The 7.5.6 upgrade has been
adjusted to compensate for this, but users who have already upgraded will
need to repair the data separately, as any international text added after
that point will be stored in the correct format. Doing the repair on the
correct data will corrupt it. If you have already upgraded, see
http://www.webgui.org/bugs/tracker/charset-db-connection
7.5.4
--------------------------------------------------------------------
* YUI has been upgraded to 2.5.0. The resizable textarea implementation in
WebGUI has been rewritten using YUI instead of the Ext library. This has

View file

@ -23,6 +23,7 @@ my $quiet; # this line required
my $session = start(); # this line required
convertCacheToBinary($session);
repairDBCharset($session);
addLayoutOrderSetting( $session );
installThingyAsset($session);
@ -36,12 +37,66 @@ sub convertCacheToBinary {
$session->db->write('DELETE FROM `cache`');
}
##-------------------------------------------------
#sub exampleFunction {
# my $session = shift;
# print "\tWe're doing some stuff here that you should know about.\n" unless ($quiet);
# # and here's our code
#}
sub repairDBCharset {
my $session = shift;
print "\tRepairing erroneous UTF8 data. This may take a long time... " unless $quiet;
# Bypassing normal connection to get default settings
my $dbh = DBI->connect( $session->config->get('dsn'), $session->config->get('dbuser'), $session->config->get('dbpass') );
my (undef, $connection_charset) = $dbh->selectrow_array("SHOW VARIABLES LIKE 'character_set_connection'");
$dbh->disconnect;
# Now use normal connection
$dbh = $session->db->dbh;
my $sth;
my @tables;
my @stmts;
# Get table list
$sth = $dbh->table_info(undef, undef, '%');
while (my $row = $sth->fetchrow_hashref) {
push @tables, $row->{TABLE_NAME};
}
$sth->finish;
for my $table (@tables) {
# Find table's default charset
my (undef, $create) = $dbh->selectrow_array('SHOW CREATE TABLE ' . $dbh->quote_identifier($table));
$create =~ s/.*\)//s;
my $table_charset;
if ($create =~ /CHARSET=(\S+)/) {
$table_charset = $1;
}
# Getting all columns, and looking at text
my $sth = $dbh->prepare("SHOW FULL COLUMNS FROM " . $dbh->quote_identifier($table));
$sth->execute;
while (my $row = $sth->fetchrow_hashref) {
if ($row->{Type} =~ /TEXT/i || $row->{Type} =~ /VARCHAR/i) {
# Perl was always sending UTF8, but MySQL may have been treating it like a different charser
# This lead to it converting it based on the connection, even if the table was UTF8
# First, we undo that conversion
push @stmts, sprintf("ALTER TABLE %s CHANGE %s %s %s CHARACTER SET $connection_charset", $dbh->quote_identifier($table), $dbh->quote_identifier($row->{Field}), $dbh->quote_identifier($row->{Field}), $row->{Type});
# We should now have the correct byte sequences, but if we do a direct convert to UTF8,
# MySQL will attept to convert it again. Converting to binary first prevents this
push @stmts, sprintf("ALTER TABLE %s CHANGE %s %s %s CHARACTER SET binary", $dbh->quote_identifier($table), $dbh->quote_identifier($row->{Field}), $dbh->quote_identifier($row->{Field}), $row->{Type});
# Now we convert to UTF8, and it should be stored properly. We're using a UTF8 connection,
# so everything should just work from here on
push @stmts, sprintf("ALTER TABLE %s CHANGE %s %s %s CHARACTER SET utf8", $dbh->quote_identifier($table), $dbh->quote_identifier($row->{Field}), $dbh->quote_identifier($row->{Field}), $row->{Type});
}
}
$sth->finish;
# We also want to make sure the table is set as UTF8
if ($table_charset ne 'utf8') {
push @stmts, 'ALTER TABLE ' . $dbh->quote_identifier($table) . ' DEFAULT CHARACTER SET = utf8';
}
}
for my $stmt (@stmts) {
$dbh->do($stmt);
}
print "Done.\n" unless $quiet;
}
#----------------------------------------------------------------------------
# Add a column to the Gallery

View file

@ -22,7 +22,6 @@ my $quiet; # this line required
my $session = start(); # this line required
ensureUTF8($session);
addRichEditInlinePopup($session);
updateRichEditorButtons($session);
setPMFloatingDuration($session);
@ -81,103 +80,6 @@ sub updateRichEditorButtons {
}
#----------------------------------------------------------------------------
sub ensureUTF8 {
my $session = shift;
print "\tConverting all tables to UTF-8... " unless $quiet;
my @tables = qw(
Article
Calendar Calendar_feeds
Collaboration
Dashboard
DataForm DataForm_entry DataForm_entryData DataForm_field DataForm_tab
Event
EventManagementSystem EventManagementSystem_badges EventManagementSystem_discountPasses
EventManagementSystem_metaData EventManagementSystem_metaField EventManagementSystem_prerequisiteEvents
EventManagementSystem_prerequisites EventManagementSystem_products EventManagementSystem_purchases
EventManagementSystem_registrations EventManagementSystem_sessionPurchaseRef
Event_recur Event_relatedlink
FileAsset
Folder
Gallery GalleryAlbum GalleryFile GalleryFile_comment
HttpProxy
ITransact_recurringStatus
ImageAsset
InOutBoard InOutBoard_delegates InOutBoard_status InOutBoard_statusLog
Layout
Matrix Matrix_field Matrix_listing Matrix_listingData Matrix_rating Matrix_ratingSummary
MessageBoard
MultiSearch
Navigation
Newsletter Newsletter_subscriptions
PM_project PM_task PM_taskResource PM_wobject
Photo Photo_rating
Poll Poll_answer
Post Post_rating
Product Product_accessory Product_benefit Product_feature Product_related Product_specification
RSSCapable RSSFromParent
RichEdit
SQLForm SQLForm_fieldDefinitions SQLForm_fieldOrder SQLForm_fieldTypes SQLForm_regexes
SQLReport
Shortcut Shortcut_overrides
StockData
Survey Survey_answer Survey_question Survey_questionResponse Survey_response Survey_section
SyndicatedContent
TT_projectList TT_projectResourceList TT_projectTasks TT_report TT_timeEntry TT_wobject
Thingy Thingy_fields Thingy_things
Thread Thread_read
WSClient
WeatherData
WikiMaster WikiPage
Workflow WorkflowActivity WorkflowActivityData WorkflowInstance WorkflowInstanceScratch WorkflowSchedule
ZipArchiveAsset
adSpace
advertisement
asset assetData assetHistory assetIndex assetKeyword assetVersionTag
authentication
cache
commerceSalesTax commerceSettings
databaseLink
friendInvitations
groupGroupings groupings groups
imageColor imageFont imagePalette imagePaletteColors
inbox
incrementer
karmaLog
ldapLink
mailQueue
metaData_properties metaData_values
passiveProfileAOI passiveProfileLog
productParameterOptions productParameters productVariants products
redirect
replacements
search
settings
shoppingCart
snippet
storageTranslation
subscription subscriptionCode subscriptionCodeBatch subscriptionCodeSubscriptions
template
transaction transactionItem
userInvitations
userLoginLog
userProfileCategory userProfileData userProfileField
userSession userSessionScratch
users
webguiVersion
wgFieldUserData
wobject
);
for my $table (@tables) {
$session->db->write(
"ALTER TABLE `$table` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin"
);
}
# and here's our code
print "Done!\n" unless $quiet;
}
# -------------- DO NOT EDIT BELOW THIS LINE --------------------------------
#----------------------------------------------------------------------------