From 63fe276dac70f64abf4719493634869c9e559568 Mon Sep 17 00:00:00 2001 From: Chris Nehren Date: Thu, 31 Jul 2008 22:02:09 +0000 Subject: [PATCH] Fixed: splitCSV and joinCSV had issues with complex CSV data. They now use Text::CSV_XS internally. --- docs/changelog/7.x.x.txt | 1 + lib/WebGUI/Text.pm | 63 +++++++++++++--------------------------- t/Text.t | 3 +- 3 files changed, 23 insertions(+), 44 deletions(-) diff --git a/docs/changelog/7.x.x.txt b/docs/changelog/7.x.x.txt index ee6b0f28a..54dbd7c36 100644 --- a/docs/changelog/7.x.x.txt +++ b/docs/changelog/7.x.x.txt @@ -13,6 +13,7 @@ - fixed: product title problem - fixed: adding payment brings me to / - fixed: Gallery uses too much disk space. Added a way to change an images pixel density. + - fixed: splitCSV and joinCSV had issues with complex CSV data 7.5.18 - fixed: Collateral Image Manager broken in Firefox 3 diff --git a/lib/WebGUI/Text.pm b/lib/WebGUI/Text.pm index 4776338a7..4569de3e7 100644 --- a/lib/WebGUI/Text.pm +++ b/lib/WebGUI/Text.pm @@ -18,6 +18,7 @@ package WebGUI::Text; use strict; #use warnings; +use Text::CSV_XS; use base 'Exporter'; our @EXPORT_OK = qw( @@ -31,6 +32,9 @@ our %EXPORT_TAGS = ( ); +# use a single CSV object instead of reconstructing one repeatedly +my $csv = Text::CSV_XS->new( { binary => 1 } ); + =head1 NAME WebGUI::Text - Routines for manipulating text. @@ -63,24 +67,14 @@ string according to the de-facto standard outlined by RFC 4180. =cut sub joinCSV { - my @input = @_; - my @fixed; # The properly escaped data - for my $i (@input) { - # Ignore all characters that aren't ASCII printable characters - $i =~ s/[^\x09\x20-\x7e]//g; - - # All strings with these chars in them must be quoted - if ($i =~ /[",\n\t]/ || $i =~ /^\s|\s$/s) { - # " must be doubled ("") - $i =~ s/"/""/g; - - $i = qq{"$i"}; - } - - push @fixed, $i; - } - - return join ",",@fixed; + my @inputColumns = @_; + $csv->combine(@inputColumns); + my $joinedLine = $csv->string; + if(my $errorString = $csv->error_diag) { + warn "Problems parsing @inputColumns: $errorString"; + return; + } + return $joinedLine; } @@ -94,31 +88,14 @@ Splits a CSV string and fixes any escaping done. =cut sub splitCSV { - my $s = shift; - - # Split on , - # Negative LIMIT so that empty trailing fields are preserved - my @array = split /,/, $s, -1; - - for (my $i = 0; $i < @array; $i++) { - # Fix quoted strings being used to escape commas. - # If it begins with a " but doesn't end with an odd number of " - # shift, add to previous, and try again - if ($array[$i] =~ /^"/s && length(($array[$i] =~ m/("*)$/s)[0]) % 2 == 0 ) { - # If there are no more elements, this line is erroneous - if ($i+1 > @array) { warn "Error parsing CSV line."; return undef; } - $array[$i] .= ",".splice(@array,$i+1,1); - redo; - } - - # Remove quotes on end of string - $array[$i] =~ s/^"|"$//sg; - - # Fix doubled quotes - $array[$i] =~ s/""/"/g; - } - - return @array; + my $inputString = shift; + $csv->parse($inputString); + my @splitColumns = $csv->fields; + if(my $errorString = $csv->error_diag) { + warn "Problems parsing $inputString: $errorString"; + return; + } + return @splitColumns; } diff --git a/t/Text.t b/t/Text.t index 8f062cf99..9531855ba 100644 --- a/t/Text.t +++ b/t/Text.t @@ -23,7 +23,8 @@ my @tests = ['all null', ['', '', ''], ',,'], ['single null', [], ''], ['escape commas', ['w,x', 'y,z'], '"w,x","y,z"'], - ['escape double quotes', ['abc"def', 'ghi-jkl', 'mnop'], '"abc""def",ghi-jkl,mnop']); + ['escape double quotes', ['abc"def', 'ghi-jkl', 'mnop'], '"abc""def",ghi-jkl,mnop'], + ['cruel embedded newlines', ['foo', 'bar', 'baz', "hello\nworld", 'how are you'], qq{foo,bar,baz,"hello\nworld","how are you"}]); plan(tests => scalar(@tests) * 2); foreach my $testspec (@tests) {