#!/usr/bin/perl -w -- # -*- Perl -*-
#
# gallery2xml -- convert Gallery albums into XML
#
# Version 1.2
#
# $Id: gallery2xml,v 1.10 2002/12/27 13:13:32 ndw Exp $
#
# Copyright (C) 2002 Norman Walsh, All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
use strict;
use Data::Dumper;
use Getopt::Std;
use vars qw($opt_b $opt_o $opt_v);
my $usage = "$0 [-v] [ -b basedir ] [ -o output.xml ] albumName\n";
die $usage if !getopts('b:o:v');
my $albumName = shift @ARGV || die $usage;
my %parse_func =
("O:" => \&parse_object,
"a:" => \&parse_array,
"s:" => \&parse_string,
"i:" => \&parse_integer,
"b:" => \&parse_integer,
"N;" => \&parse_null);
my $basedir = $opt_b || "/share/online/www/albums";
my $verbose = $opt_v;
my $data = undef;
my %ALBUMS = ();
my @ALBUMS = ();
$ALBUMS{$albumName} = 1;
push (@ALBUMS, $albumName);
if ($opt_o) {
open (OUT, ">$opt_o") || die "Failed to open $opt_o\n";
} else {
open (OUT, ">-");
}
print OUT "\n\n";
while (@ALBUMS) {
$data = undef;
my $albumName = shift @ALBUMS;
my $file = "$basedir/$albumName/photos.dat";
my $ptree = undef;
my $pindex = undef;
if (-f $file) {
print STDERR "Parsing $file ($albumName)\n";
open (F, $file);
read (F, $data, -s $file);
close (F);
($ptree, $pindex) = &parse(0);
if ($verbose) {
print '='x60, "\n";
print "=== $albumName ($file)\n";
print '='x60, "\n\n";
$Data::Dumper::Indent = 1;
print Dumper($ptree);
}
}
$file = "$basedir/$albumName/album.dat";
print STDERR "Parsing $file ($albumName)\n";
open (F, $file);
read (F, $data, -s $file);
close (F);
my ($tree, $index) = &parse(0);
if ($verbose) {
print '='x60, "\n";
print "=== $albumName ($file)\n";
print '='x60, "\n\n";
$Data::Dumper::Indent = 1;
print Dumper($tree);
}
foreach my $key (keys %{$tree}) {
if ($key eq 'album (class)') {
&dumpAlbum($albumName, $tree->{$key}, $ptree);
} else {
die "Unexpected key in album: $key\n";
}
}
}
print OUT "\n";
# ======================================================================
sub dumpAlbum {
my $name = shift;
my $album = shift;
my $photos = shift;
print OUT "\n";
foreach my $item (@{$album}) {
if (exists $item->{'fields'}) {
print OUT " $name\n"; # sometimes the name field is wrong!!?
&dumpFields($item->{'fields'});
} elsif (exists $item->{'version'}) {
&dumpVersion($item->{'version'});
} elsif (exists $item->{'updateSerial'}) {
# who cares
} elsif (exists $item->{'photos'}) {
&dumpPhotos($name, $item->{'photos'});
} elsif (exists $item->{'tsilb'}) {
# nop
} else {
print STDERR "Unexected item: $item\n";
foreach my $key (keys %{$item}) {
print STDERR "\t$key\n";
}
exit 1;
}
}
if (defined $photos) {
dumpPhotos($name, $photos);
}
print OUT "\n\n";
}
sub dumpFields {
my $fields = shift;
foreach my $item (@{$fields}) {
next if ref $item ne 'HASH';
foreach my $field ('title', 'description',
'bgcolor', 'textcolor', 'linkcolor', 'font',
'border', 'bordercolor', 'thumb_size', 'resize_size',
'rows', 'cols', 'parentAlbumName',
'clicks', 'clicks_date', 'display_clicks',
'serial_number', 'last_mod_time') {
if (exists $item->{$field}) {
next if $item->{$field} eq '';
print OUT " <$field>";
if ($field eq 'clicks_date' || $field eq 'last_mod_time') {
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
localtime($item->{$field});
print OUT "\n";
print OUT " ", $year+1900, "\n";
print OUT " ", sprintf("%02d", $mon+1), "\n";
print OUT " ", sprintf("%02d", $mday), "\n";
print OUT " ", sprintf("%02d", $hour), "\n";
print OUT " ", sprintf("%02d", $min), "\n";
print OUT " ", sprintf("%02d", $sec), "\n";
print OUT " ";
} else {
print OUT &xml($item->{$field});
}
print OUT "$field>\n";
}
}
}
}
sub dumpVersion {
my $fields = shift;
print OUT " ", $fields, "\n";
}
sub dumpPhotos {
my $albumName = shift;
my $fields = shift;
die "Photos is not an array\n" if ref $fields ne 'ARRAY';
my @photos = @{$fields};
for (my $count = 0; $count <= $#photos; $count++) {
my $hash = $photos[$count]->{$count};
die "Missing key?\n" if !$hash->{'albumitem (class)'};
my @items = @{$hash->{'albumitem (class)'}};
# The 'isAlbumName' key does not come in a defined location in the array
# it's safe to assume this of the 'highlight' key as well
my $thisIsAnAlbum = undef;
my $thisIsHighlighted = undef;
foreach my $i (0..$#items) {
if (exists $items[$i]->{'isAlbumName'}) {
$thisIsAnAlbum = $i; # location in array of the isAlbumName key for later
}
if (exists $items[$i]->{'highlight'}) {
$thisIsHighlighted = $items[$i]->{'highlight'};
}
}
if (defined($thisIsAnAlbum)) {
print OUT " ";
print OUT $items[$thisIsAnAlbum]->{'isAlbumName'};
print OUT "\n";
# process this album too...
push (@ALBUMS, $items[$thisIsAnAlbum]->{'isAlbumName'})
if ! exists $ALBUMS{$items[$thisIsAnAlbum]->{'isAlbumName'}};
$ALBUMS{$items[$thisIsAnAlbum]->{'isAlbumName'}} = 1;
} else {
# All the images that have ever been the highlight image have the
# highlightImage key. But we only want to output that key for the
# one that actually is the highlight image. So we do a quick check
# to see if this one actually is...and while we're at it, we see if
# this image has cached exifData...
my $highlight = 0;
my $exifData = 0;
my $filename = undef;
foreach my $item (@items) {
$highlight = 1 if ($item->{'highlight'});
$exifData = 1 if ($item->{'exifData'});
}
print OUT " \n";
foreach my $item (@items) {
if ($item->{'image'}) {
$filename = &dumpImage('image', $item->{'image'});
} elsif ($item->{'thumbnail'}) {
&dumpImage('thumbnail', $item->{'thumbnail'});
} elsif ($item->{'highlightImage'}) {
&dumpImage('highlightImage', $item->{'highlightImage'})
if $highlight;
} elsif ($item->{'caption'}) {
print OUT " ", &xml($item->{'caption'}), "\n";
} elsif ($item->{'itemCaptureDate'}) {
print OUT " \n";
foreach my $field (@{$item->{'itemCaptureDate'}}) {
my @keys = keys %{$field};
die "What!?" if $#keys != 0;
my $key = $keys[0];
my $printkey = $key;
# problem with the XML if the tag is <0> and 0>
next if ($key eq '0');
$printkey = "hour" if $key eq 'hours';
$printkey = "min" if $key eq 'minutes';
$printkey = "sec" if $key eq 'seconds';
$printkey = "month" if $key eq 'mon';
$printkey = "day" if $key eq 'mday';
print OUT " <$printkey>", $field->{$key}, "$printkey>\n";
}
print OUT " \n";
} elsif ($item->{'uploadDate'}) {
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
localtime($item->{'uploadDate'});
print OUT " \n";
print OUT " ", $year+1900, "\n";
print OUT " ", sprintf("%02d", $mon+1), "\n";
print OUT " ", sprintf("%02d", $mday), "\n";
print OUT " ", sprintf("%02d", $hour), "\n";
print OUT " ", sprintf("%02d", $min), "\n";
print OUT " ", sprintf("%02d", $sec), "\n";
print OUT " \n";
} elsif (exists $item->{'highlight'}) {
# nop; handled by attribute above.
} elsif (exists $item->{'clicks'}) {
print OUT " ";
print OUT $item->{'clicks'};
print OUT "\n";
} elsif (exists $item->{'exifData'}) {
# look for the filename
my $filename = undef;
foreach my $field (@{$item->{'exifData'}}) {
my @keys = keys %{$field};
die "What!?" if $#keys != 0;
my $key = $keys[0];
next if $key eq '';
$filename = $field->{$key} if $key eq 'File name';
}
# Ok, we found the filename, now we suppress it anyway.
# In the XML, the exifData is always contained within the
# to which it applies.
$filename = undef;
print OUT " \n";
foreach my $field (@{$item->{'exifData'}}) {
my @keys = keys %{$field};
die "What!?" if $#keys != 0;
my $key = $keys[0];
next if $key eq '';
next if $key eq 'File name';
next if $key eq 'Comment';
print OUT " ", &xml($field->{$key}), "\n";
}
print OUT " \n";
} elsif (exists $item->{'keywords'}) {
print OUT " ", &xml($item->{'keywords'}), "\n";
} elsif ($item->{'comments'}) {
# nop; ignore them
} else {
print STDERR "Unexpected key in photos: ", join(", ", keys %{$item}), "\n";
}
}
if (!$exifData && defined($filename)) {
my $fname = "$albumName/$filename.jpg";
my $full_fname = "$basedir/$albumName/$filename.jpg";
if (-f $full_fname) {
print OUT " \n";
open (JHEAD, "jhead $full_fname |");
while () {
chop;
next if !/^(.*?)\s*:\s*(.*?)\s*$/;
my $field = $1;
my $value = $2;
next if $field eq 'File name';
# exif comments seem to contain random binary data...skip them
next if $field eq 'Comment';
print OUT " ", &xml($value), "\n";
}
close (JHEAD);
print OUT " \n";
}
}
print OUT " \n";
}
}
}
sub dumpImage {
my $type = shift;
my $image = shift;
my $filename = undef;
print OUT " <$type>\n";
foreach my $field (@{$image->{'image (class)'}}) {
my @keys = keys %{$field};
die "What!?" if $#keys != 0;
my $key = $keys[0];
print OUT " <$key>", &xml($field->{$key}), "$key>\n";
$filename = $field->{$key} if $key eq 'name';
}
print OUT " $type>\n";
return $filename;
}
# ======================================================================
sub parse {
my $index = shift;
my $type = substr($data, $index, 2);
my $func = $parse_func{$type};
if (!$func) {
die "Unknown type '$type' at position $index";
}
&$func($index+2);
}
sub parse_object {
my $index = shift;
my $self = {};
my ($name, $members);
($name, $index) = &get_string($index);
$index = skip($index, ':');
($members, $index) = &parse_array($index);
$self->{"$name (class)"} = $members;
return ($self, $index);
}
sub parse_null {
my $index = shift;
return (undef, $index);
}
sub parse_integer {
my $index = shift;
my $value;
my $self = {};
($value, $index) = get_number($index);
$index = skip($index, ';');
return ($value, $index);
}
sub parse_string {
my $index = shift;
my $value;
my $size = 0;
($value, $index) = get_string($index);
$index = skip($index, ';');
return ($value, $index);
}
sub parse_array {
my $index = shift;
my $self = [];
my $count = 0;
($count, $index) = get_number($index);
$index = skip($index, ':{');
for (my $i = 0; $i < $count; $i++) {
my ($key, $value);
($key, $index) = &parse($index);
($value, $index) = &parse($index);
push(@$self, { $key => $value })
if defined($value);
}
$index = skip($index, '}');
return ($self, $index);
}
sub get_string {
my $index = shift;
my $size;
my $value;
($size, $index) = get_number($index);
$index = skip($index, ':"');
$value = substr($data, $index, $size);
$index += $size;
$index = skip($index, '"');
return ($value, $index);
}
sub get_number {
my $index = shift;
my $ch;
my $num = undef;
while(($ch = substr($data, $index, 1)) =~ /\d/) {
$index++;
if (!defined($num)) {
$num = 0;
}
$num = 10 * $num + int($ch);
}
if (!defined($num)) {
die "Expected number at $index, found '$ch' instead\n";
}
return ($num, $index);
}
sub skip {
my $index = shift;
my $expected = shift;
my $size = length($expected);
my $actual = substr($data, $index, $size);
if ($actual ne $expected) {
die "Expected '$expected' at $index, found '$actual' instead\n";
exit;
}
return $index + $size;
}
sub xml {
my $str = shift;
local $_ = "";
# We have to be careful here. We need to convert this string into XML
# which means we need to replace literal <, &, and > (because of the ]]>
# restriction) into entities. But we must not accidentally encode the
# & on numeric character references...
# Step 1, remove numeric character references by UTF8 encoding the string
$str =~ s/\&\#(\d+);/XmlUtf8Encode($1)/sge;
# Step 2, turn it back into text, encoding the special chars
# I used to do this with XML::DOM::encodeText, but that turns out
# to be dangerous if one of the multi-byte Unicode sequence chars
# happens to be in the string.
#$str = XML::DOM::encodeText($str, '&<');
# Step 3, turn control characters into numeric character refs
my @chars = split(//, $str);
foreach my $char (@chars) {
if ($char eq '&') {
$_ .= "&";
} elsif ($char eq '<') {
$_ .= "<";
} elsif ($char eq '>') {
$_ .= ">";
} elsif ((ord($char) < 32 || ord($char) > 127)
&& (ord($char) != 9
&& ord($char) != 10
&& ord($char) != 13)) {
$_ .= "" . ord($char) . ";";
} else {
$_ .= $char;
}
}
return $_;
}
sub XmlUtf8Encode {
my $n = shift;
if ($n < 0x80) {
return chr ($n);
} elsif ($n < 0x800) {
return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80));
} elsif ($n < 0x10000) {
return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80),
(($n & 0x3f) | 0x80));
} elsif ($n < 0x110000) {
return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80),
((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80));
}
die "number is too large for Unicode [$n] in &XmlUtf8Encode";
}