#!/pro/bin/perl # csv2xlsx: Convert csv to xlsx # (m)'16 [06 Jun 2016] Copyright H.M.Brand 2007-2017 use 5.14.0; use warnings; our $VERSION = "1.01"; sub usage { my $err = shift and select STDERR; print <] [-q ] [-w ] [-d ] [-o ] [file.csv] -s use as seperator char, auto-detect, default = ',' The string "tab" is allowed. -e use as escape char, auto-detect, default = '"' The string "undef" is allowed. -q use as quotation char, default = '"' The string "undef" will disable quotation. -w use as default minimum column width default = 4 -o write output to file named , defaults to input file name with .csv replaced with .xlsx if from standard input, defaults to csv2xlsx.xlsx -F allow formula's. Otherwise fields starting with an equal sign are forced to string -f force usage of if already exists (unlink before use) -d use as date formats. Default = 'dd-mm-yyyy' -C use as currency formats for currency , no default -D cols only convert dates in columns . Default is everywhere. -u CSV is UTF8 -m merge multiple CSV's into a single xlsx (separate sheets) -o is required, all arguments should be existing files -v [] verbosity (default = 1) EOU exit $err; } # usage use Getopt::Long qw(:config bundling passthrough); my $quo = '"'; my $esc = '"'; my $wdt = 4; # Default minimal column width my $dtf = "dd-mm-yyyy"; # Date format my $crf = ""; # Currency format, e.g.: $:### ### ##0.00 my $opt_v = 1; my $dtc; GetOptions ( "help|?" => sub { usage (0); }, "V|version" => sub { say $0 =~ s{.*/}{}r, " [$VERSION]"; exit 0; }, "c|s|sep=s" => \my $sep, # Set after reading first line in attempt to auto-detect "q|quo=s" => \$quo, "e|esc=s" => \$esc, "w|width=i" => \$wdt, "o|x|out=s" => \my $xls, "d|date-fmt=s" => \$dtf, "D|date-col=s" => \$dtc, "C|curr-fmt=s" => \$crf, "f|force!" => \my $frc, "F|formulas!" => \my $frm, "u|utf-8|utf8!" => \my $utf, "m|merge!" => \my $mrg, "v|verbose:1" => \$opt_v, ) or usage (1); if ($mrg) { my @csv; for (@ARGV) { if (m/\.xlsx?$/i) { $xls and usage (1); $xls = $_; next; } if (m/\.csv$/i && -s $_) { push @csv, $_; next; } warn "Argument $_ is not an existing (CSV) file\n"; usage (1); } $xls && @csv or usage (1); @ARGV = @csv; } my $base //= @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xlsx"; $xls ||= $base =~ s/(?:\.csv)?$/.xlsx/ir; -s $xls && $frc and unlink $xls; if (-s $xls) { print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b"; scalar =~ m/^[yj](es|a)?$/i or exit; } # Don't split ourselves when modules do it _much_ better, and follow the standards use Text::CSV_XS; use Date::Calc qw( Delta_Days Days_in_Month ); use Excel::Writer::XLSX; use Encode qw( from_to ); my $wbk = Excel::Writer::XLSX->new ($xls); $dtf =~ s/j/y/g; my %fmt = ( date => $wbk->add_format (align => "center", num_format => $dtf), rest => $wbk->add_format (align => "left"), wrap => $wbk->add_format (text_wrap => 1), ); $crf =~ s/^([^:]+):(.*)/$1/ and $fmt{currency} = $wbk->add_format ( num_format => "$1 $2", align => "right", ); my @args = @ARGV ? @ARGV : (""); foreach my $csvf (@args) { my $wks = $wbk->add_worksheet ($csvf =~ s{\.csv$}{}ir || "Sheet 1"); my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths my $row; my $firstline; my $fh; if (-f $csvf) { $opt_v and say "Reading $csvf"; open $fh, "<", $csvf or die "$csvf: $!\n"; } else { $opt_v and say "Reading STDIN"; $fh = *ARGV; } my $Sep = $sep; unless ($Sep) { # No sep char passed, try to auto-detect; while (<$fh>) { m/\S/ or next; # Skip empty leading blank lines $Sep = # start auto-detect with quoted strings m/["\d];["\d;]/ ? ";" : m/["\d],["\d,]/ ? "," : m/["\d]\t["\d,]/ ? "\t" : # If neither, then for unquoted strings m/\w;[\w;]/ ? ";" : m/\w,[\w,]/ ? "," : m/\w\t[\w,]/ ? "\t" : # And pipes (lowest prio) m/["\d]\|["\d,]/ ? "|" : m/\w\|[\w,]/ ? "|" : m/,/ && !m/[;\t]/ ? "," : m/;/ && !m/[,\t]/ ? ";" : m/\t/ && !m/[,;]/ ? "\t" : "," ; $firstline = $_; last; } } my $csv = Text::CSV_XS-> new ({ sep_char => $Sep eq "tab" ? "\t" : $Sep, quote_char => $quo eq "undef" ? undef : $quo, escape_char => $esc eq "undef" ? undef : $esc, binary => 1, keep_meta_info => 1, auto_diag => 1, }); if ($firstline) { $csv->parse ($firstline) or die $csv->error_diag (); $row = [ $csv->fields ]; } if ($opt_v > 3) { foreach my $k (qw( sep_char quote_char escape_char )) { my $c = $csv->$k () || "undef"; $c =~ s/\t/\\t/g; $c =~ s/\r/\\r/g; $c =~ s/\n/\\n/g; $c =~ s/\0/\\0/g; $c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge; printf STDERR "%-11s = %s\n", $k, $c; } } if (my $rows = $dtc) { $rows =~ s/-$/-999/; # 3,6- $rows =~ s/-/../g; eval "\$dtc = { map { \$_ => 1 } $rows }"; } while ($row && @$row or $row = $csv->getline ($fh)) { my @row = @$row; @row > $w and push @w, ($wdt) x (($w = @row) - @w); foreach my $c (0 .. $#row) { my $val = $row[$c] // ""; my $l = length $val; $l > ($w[$c] // -1) and $w[$c] = $l; if ($utf and $csv->is_binary ($c)) { from_to ($val, "utf-8", "ucs2"); $wks->write_unicode ($h, $c, $val); next; } if ($csv->is_quoted ($c)) { $val =~ s/\r\n/\n/g; if ($utf) { from_to ($val, "utf-8", "ucs2"); $val =~ m/\n/ ? $wks->write_unicode ($h, $c, $val, $fmt{wrap}) : $wks->write_unicode ($h, $c, $val); } else { $val =~ m/\n/ ? $wks->write_string ($h, $c, $val, $fmt{wrap}) : $wks->write_string ($h, $c, $val); } next; } if (!$dtc or $dtc->{$c + 1}) { my @d = (0, 0, 0); # Y, M, D $val =~ m/^(\d{4})(\d{2})(\d{2})$/ and @d = ($1, $2, $3); $val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1); if ( $d[2] >= 1 && $d[2] <= 31 && $d[1] >= 1 && $d[1] <= 12 && $d[0] >= 1900 && $d[0] <= 2199) { my $dm = Days_in_Month (@d[0,1]); $d[2] < 1 and $d[2] = 1; $d[2] > $dm and $d[2] = $dm; my $dt = 2 + Delta_Days (1900, 1, 1, @d); $wks->write ($h, $c, $dt, $fmt{date}); next; } } if ($crf and $val =~ m/^\s*\Q$crf\E\s*([0-9.]+)$/) { $wks->write ($h, $c, $1 + 0, $fmt{currency}); next; } if (!$frm && $val =~ m/^=/) { $wks->write_string ($h, $c, $val); } else { $wks->write ($h, $c, $val); } } ++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h; } continue { $row = undef } close $fh; $opt_v && printf STDERR "%6d x %6d\n", $w, $h; $wks->set_column ($_, $_, $w[$_]) for 0 .. $#w; } $wbk->close ();