#!/usr/bin/perl -w -- # -*- Perl -*- # # gallery2xml -- convert Gallery albums into XML # # Version 1.2 # # $Id: gallery2xml,v 1.10 2002/12/27 13:13:32 ndw Exp $ # # Copyright (C) 2002 Norman Walsh, All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 dated June, 1991. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # use strict; use Data::Dumper; use Getopt::Std; use vars qw($opt_b $opt_o $opt_v); my $usage = "$0 [-v] [ -b basedir ] [ -o output.xml ] albumName\n"; die $usage if !getopts('b:o:v'); my $albumName = shift @ARGV || die $usage; my %parse_func = ("O:" => \&parse_object, "a:" => \&parse_array, "s:" => \&parse_string, "i:" => \&parse_integer, "b:" => \&parse_integer, "N;" => \&parse_null); my $basedir = $opt_b || "/share/online/www/albums"; my $verbose = $opt_v; my $data = undef; my %ALBUMS = (); my @ALBUMS = (); $ALBUMS{$albumName} = 1; push (@ALBUMS, $albumName); if ($opt_o) { open (OUT, ">$opt_o") || die "Failed to open $opt_o\n"; } else { open (OUT, ">-"); } print OUT "\n\n"; while (@ALBUMS) { $data = undef; my $albumName = shift @ALBUMS; my $file = "$basedir/$albumName/photos.dat"; my $ptree = undef; my $pindex = undef; if (-f $file) { print STDERR "Parsing $file ($albumName)\n"; open (F, $file); read (F, $data, -s $file); close (F); ($ptree, $pindex) = &parse(0); if ($verbose) { print '='x60, "\n"; print "=== $albumName ($file)\n"; print '='x60, "\n\n"; $Data::Dumper::Indent = 1; print Dumper($ptree); } } $file = "$basedir/$albumName/album.dat"; print STDERR "Parsing $file ($albumName)\n"; open (F, $file); read (F, $data, -s $file); close (F); my ($tree, $index) = &parse(0); if ($verbose) { print '='x60, "\n"; print "=== $albumName ($file)\n"; print '='x60, "\n\n"; $Data::Dumper::Indent = 1; print Dumper($tree); } foreach my $key (keys %{$tree}) { if ($key eq 'album (class)') { &dumpAlbum($albumName, $tree->{$key}, $ptree); } else { die "Unexpected key in album: $key\n"; } } } print OUT "\n"; # ====================================================================== sub dumpAlbum { my $name = shift; my $album = shift; my $photos = shift; print OUT "\n"; foreach my $item (@{$album}) { if (exists $item->{'fields'}) { print OUT " $name\n"; # sometimes the name field is wrong!!? &dumpFields($item->{'fields'}); } elsif (exists $item->{'version'}) { &dumpVersion($item->{'version'}); } elsif (exists $item->{'updateSerial'}) { # who cares } elsif (exists $item->{'photos'}) { &dumpPhotos($name, $item->{'photos'}); } elsif (exists $item->{'tsilb'}) { # nop } else { print STDERR "Unexected item: $item\n"; foreach my $key (keys %{$item}) { print STDERR "\t$key\n"; } exit 1; } } if (defined $photos) { dumpPhotos($name, $photos); } print OUT "\n\n"; } sub dumpFields { my $fields = shift; foreach my $item (@{$fields}) { next if ref $item ne 'HASH'; foreach my $field ('title', 'description', 'bgcolor', 'textcolor', 'linkcolor', 'font', 'border', 'bordercolor', 'thumb_size', 'resize_size', 'rows', 'cols', 'parentAlbumName', 'clicks', 'clicks_date', 'display_clicks', 'serial_number', 'last_mod_time') { if (exists $item->{$field}) { next if $item->{$field} eq ''; print OUT " <$field>"; if ($field eq 'clicks_date' || $field eq 'last_mod_time') { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($item->{$field}); print OUT "\n"; print OUT " ", $year+1900, "\n"; print OUT " ", sprintf("%02d", $mon+1), "\n"; print OUT " ", sprintf("%02d", $mday), "\n"; print OUT " ", sprintf("%02d", $hour), "\n"; print OUT " ", sprintf("%02d", $min), "\n"; print OUT " ", sprintf("%02d", $sec), "\n"; print OUT " "; } else { print OUT &xml($item->{$field}); } print OUT "\n"; } } } } sub dumpVersion { my $fields = shift; print OUT " ", $fields, "\n"; } sub dumpPhotos { my $albumName = shift; my $fields = shift; die "Photos is not an array\n" if ref $fields ne 'ARRAY'; my @photos = @{$fields}; for (my $count = 0; $count <= $#photos; $count++) { my $hash = $photos[$count]->{$count}; die "Missing key?\n" if !$hash->{'albumitem (class)'}; my @items = @{$hash->{'albumitem (class)'}}; # The 'isAlbumName' key does not come in a defined location in the array # it's safe to assume this of the 'highlight' key as well my $thisIsAnAlbum = undef; my $thisIsHighlighted = undef; foreach my $i (0..$#items) { if (exists $items[$i]->{'isAlbumName'}) { $thisIsAnAlbum = $i; # location in array of the isAlbumName key for later } if (exists $items[$i]->{'highlight'}) { $thisIsHighlighted = $items[$i]->{'highlight'}; } } if (defined($thisIsAnAlbum)) { print OUT " "; print OUT $items[$thisIsAnAlbum]->{'isAlbumName'}; print OUT "\n"; # process this album too... push (@ALBUMS, $items[$thisIsAnAlbum]->{'isAlbumName'}) if ! exists $ALBUMS{$items[$thisIsAnAlbum]->{'isAlbumName'}}; $ALBUMS{$items[$thisIsAnAlbum]->{'isAlbumName'}} = 1; } else { # All the images that have ever been the highlight image have the # highlightImage key. But we only want to output that key for the # one that actually is the highlight image. So we do a quick check # to see if this one actually is...and while we're at it, we see if # this image has cached exifData... my $highlight = 0; my $exifData = 0; my $filename = undef; foreach my $item (@items) { $highlight = 1 if ($item->{'highlight'}); $exifData = 1 if ($item->{'exifData'}); } print OUT " \n"; foreach my $item (@items) { if ($item->{'image'}) { $filename = &dumpImage('image', $item->{'image'}); } elsif ($item->{'thumbnail'}) { &dumpImage('thumbnail', $item->{'thumbnail'}); } elsif ($item->{'highlightImage'}) { &dumpImage('highlightImage', $item->{'highlightImage'}) if $highlight; } elsif ($item->{'caption'}) { print OUT " ", &xml($item->{'caption'}), "\n"; } elsif ($item->{'itemCaptureDate'}) { print OUT " \n"; foreach my $field (@{$item->{'itemCaptureDate'}}) { my @keys = keys %{$field}; die "What!?" if $#keys != 0; my $key = $keys[0]; my $printkey = $key; # problem with the XML if the tag is <0> and next if ($key eq '0'); $printkey = "hour" if $key eq 'hours'; $printkey = "min" if $key eq 'minutes'; $printkey = "sec" if $key eq 'seconds'; $printkey = "month" if $key eq 'mon'; $printkey = "day" if $key eq 'mday'; print OUT " <$printkey>", $field->{$key}, "\n"; } print OUT " \n"; } elsif ($item->{'uploadDate'}) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($item->{'uploadDate'}); print OUT " \n"; print OUT " ", $year+1900, "\n"; print OUT " ", sprintf("%02d", $mon+1), "\n"; print OUT " ", sprintf("%02d", $mday), "\n"; print OUT " ", sprintf("%02d", $hour), "\n"; print OUT " ", sprintf("%02d", $min), "\n"; print OUT " ", sprintf("%02d", $sec), "\n"; print OUT " \n"; } elsif (exists $item->{'highlight'}) { # nop; handled by attribute above. } elsif (exists $item->{'clicks'}) { print OUT " "; print OUT $item->{'clicks'}; print OUT "\n"; } elsif (exists $item->{'exifData'}) { # look for the filename my $filename = undef; foreach my $field (@{$item->{'exifData'}}) { my @keys = keys %{$field}; die "What!?" if $#keys != 0; my $key = $keys[0]; next if $key eq ''; $filename = $field->{$key} if $key eq 'File name'; } # Ok, we found the filename, now we suppress it anyway. # In the XML, the exifData is always contained within the # to which it applies. $filename = undef; print OUT " \n"; foreach my $field (@{$item->{'exifData'}}) { my @keys = keys %{$field}; die "What!?" if $#keys != 0; my $key = $keys[0]; next if $key eq ''; next if $key eq 'File name'; next if $key eq 'Comment'; print OUT " ", &xml($field->{$key}), "\n"; } print OUT " \n"; } elsif (exists $item->{'keywords'}) { print OUT " ", &xml($item->{'keywords'}), "\n"; } elsif ($item->{'comments'}) { # nop; ignore them } else { print STDERR "Unexpected key in photos: ", join(", ", keys %{$item}), "\n"; } } if (!$exifData && defined($filename)) { my $fname = "$albumName/$filename.jpg"; my $full_fname = "$basedir/$albumName/$filename.jpg"; if (-f $full_fname) { print OUT " \n"; open (JHEAD, "jhead $full_fname |"); while () { chop; next if !/^(.*?)\s*:\s*(.*?)\s*$/; my $field = $1; my $value = $2; next if $field eq 'File name'; # exif comments seem to contain random binary data...skip them next if $field eq 'Comment'; print OUT " ", &xml($value), "\n"; } close (JHEAD); print OUT " \n"; } } print OUT " \n"; } } } sub dumpImage { my $type = shift; my $image = shift; my $filename = undef; print OUT " <$type>\n"; foreach my $field (@{$image->{'image (class)'}}) { my @keys = keys %{$field}; die "What!?" if $#keys != 0; my $key = $keys[0]; print OUT " <$key>", &xml($field->{$key}), "\n"; $filename = $field->{$key} if $key eq 'name'; } print OUT " \n"; return $filename; } # ====================================================================== sub parse { my $index = shift; my $type = substr($data, $index, 2); my $func = $parse_func{$type}; if (!$func) { die "Unknown type '$type' at position $index"; } &$func($index+2); } sub parse_object { my $index = shift; my $self = {}; my ($name, $members); ($name, $index) = &get_string($index); $index = skip($index, ':'); ($members, $index) = &parse_array($index); $self->{"$name (class)"} = $members; return ($self, $index); } sub parse_null { my $index = shift; return (undef, $index); } sub parse_integer { my $index = shift; my $value; my $self = {}; ($value, $index) = get_number($index); $index = skip($index, ';'); return ($value, $index); } sub parse_string { my $index = shift; my $value; my $size = 0; ($value, $index) = get_string($index); $index = skip($index, ';'); return ($value, $index); } sub parse_array { my $index = shift; my $self = []; my $count = 0; ($count, $index) = get_number($index); $index = skip($index, ':{'); for (my $i = 0; $i < $count; $i++) { my ($key, $value); ($key, $index) = &parse($index); ($value, $index) = &parse($index); push(@$self, { $key => $value }) if defined($value); } $index = skip($index, '}'); return ($self, $index); } sub get_string { my $index = shift; my $size; my $value; ($size, $index) = get_number($index); $index = skip($index, ':"'); $value = substr($data, $index, $size); $index += $size; $index = skip($index, '"'); return ($value, $index); } sub get_number { my $index = shift; my $ch; my $num = undef; while(($ch = substr($data, $index, 1)) =~ /\d/) { $index++; if (!defined($num)) { $num = 0; } $num = 10 * $num + int($ch); } if (!defined($num)) { die "Expected number at $index, found '$ch' instead\n"; } return ($num, $index); } sub skip { my $index = shift; my $expected = shift; my $size = length($expected); my $actual = substr($data, $index, $size); if ($actual ne $expected) { die "Expected '$expected' at $index, found '$actual' instead\n"; exit; } return $index + $size; } sub xml { my $str = shift; local $_ = ""; # We have to be careful here. We need to convert this string into XML # which means we need to replace literal <, &, and > (because of the ]]> # restriction) into entities. But we must not accidentally encode the # & on numeric character references... # Step 1, remove numeric character references by UTF8 encoding the string $str =~ s/\&\#(\d+);/XmlUtf8Encode($1)/sge; # Step 2, turn it back into text, encoding the special chars # I used to do this with XML::DOM::encodeText, but that turns out # to be dangerous if one of the multi-byte Unicode sequence chars # happens to be in the string. #$str = XML::DOM::encodeText($str, '&<'); # Step 3, turn control characters into numeric character refs my @chars = split(//, $str); foreach my $char (@chars) { if ($char eq '&') { $_ .= "&"; } elsif ($char eq '<') { $_ .= "<"; } elsif ($char eq '>') { $_ .= ">"; } elsif ((ord($char) < 32 || ord($char) > 127) && (ord($char) != 9 && ord($char) != 10 && ord($char) != 13)) { $_ .= "&#" . ord($char) . ";"; } else { $_ .= $char; } } return $_; } sub XmlUtf8Encode { my $n = shift; if ($n < 0x80) { return chr ($n); } elsif ($n < 0x800) { return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80)); } elsif ($n < 0x10000) { return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); } elsif ($n < 0x110000) { return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80), ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); } die "number is too large for Unicode [$n] in &XmlUtf8Encode"; }