diff options
Diffstat (limited to 'intltool-merge.in')
-rw-r--r-- | intltool-merge.in | 1356 |
1 files changed, 1356 insertions, 0 deletions
diff --git a/intltool-merge.in b/intltool-merge.in new file mode 100644 index 0000000..9b3a539 --- /dev/null +++ b/intltool-merge.in @@ -0,0 +1,1356 @@ +#!@INTLTOOL_PERL@ -w +# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- + +# +# The Intltool Message Merger +# +# Copyright (C) 2000, 2003 Free Software Foundation. +# Copyright (C) 2000, 2001 Eazel, Inc +# +# Intltool is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# version 2 published by the Free Software Foundation. +# +# Intltool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. +# +# Authors: Maciej Stachowiak <mjs@noisehavoc.org> +# Kenneth Christiansen <kenneth@gnu.org> +# Darin Adler <darin@bentspoon.com> +# +# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> +# + +## Release information +my $PROGRAM = "intltool-merge"; +my $PACKAGE = "intltool"; +my $VERSION = "0.34.1"; + +## Loaded modules +use strict; +use Getopt::Long; +use Text::Wrap; +use File::Basename; + +my $must_end_tag = -1; +my $last_depth = -1; +my $translation_depth = -1; +my @tag_stack = (); +my @entered_tag = (); +my @translation_strings = (); +my $leading_space = ""; + +## Scalars used by the option stuff +my $HELP_ARG = 0; +my $VERSION_ARG = 0; +my $BA_STYLE_ARG = 0; +my $XML_STYLE_ARG = 0; +my $KEYS_STYLE_ARG = 0; +my $DESKTOP_STYLE_ARG = 0; +my $SCHEMAS_STYLE_ARG = 0; +my $RFC822DEB_STYLE_ARG = 0; +my $QUIET_ARG = 0; +my $PASS_THROUGH_ARG = 0; +my $UTF8_ARG = 0; +my $MULTIPLE_OUTPUT = 0; +my $cache_file; + +## Handle options +GetOptions +( + "help" => \$HELP_ARG, + "version" => \$VERSION_ARG, + "quiet|q" => \$QUIET_ARG, + "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility + "ba-style|b" => \$BA_STYLE_ARG, + "xml-style|x" => \$XML_STYLE_ARG, + "keys-style|k" => \$KEYS_STYLE_ARG, + "desktop-style|d" => \$DESKTOP_STYLE_ARG, + "schemas-style|s" => \$SCHEMAS_STYLE_ARG, + "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, + "pass-through|p" => \$PASS_THROUGH_ARG, + "utf8|u" => \$UTF8_ARG, + "multiple-output|m" => \$MULTIPLE_OUTPUT, + "cache|c=s" => \$cache_file + ) or &error; + +my $PO_DIR; +my $FILE; +my $OUTFILE; + +my %po_files_by_lang = (); +my %translations = (); +my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv"; +my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null'); + +# Use this instead of \w for XML files to handle more possible characters. +my $w = "[-A-Za-z0-9._:]"; + +# XML quoted string contents +my $q = "[^\\\"]*"; + +## Check for options. + +if ($VERSION_ARG) +{ + &print_version; +} +elsif ($HELP_ARG) +{ + &print_help; +} +elsif ($BA_STYLE_ARG && @ARGV > 2) +{ + &utf8_sanity_check; + &preparation; + &print_message; + &ba_merge_translations; + &finalize; +} +elsif ($XML_STYLE_ARG && @ARGV > 2) +{ + &utf8_sanity_check; + &preparation; + &print_message; + &xml_merge_output; + &finalize; +} +elsif ($KEYS_STYLE_ARG && @ARGV > 2) +{ + &utf8_sanity_check; + &preparation; + &print_message; + &keys_merge_translations; + &finalize; +} +elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) +{ + &utf8_sanity_check; + &preparation; + &print_message; + &desktop_merge_translations; + &finalize; +} +elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) +{ + &utf8_sanity_check; + &preparation; + &print_message; + &schemas_merge_translations; + &finalize; +} +elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) +{ + &preparation; + &print_message; + &rfc822deb_merge_translations; + &finalize; +} +else +{ + &print_help; +} + +exit; + +## Sub for printing release information +sub print_version +{ + print <<_EOF_; +${PROGRAM} (${PACKAGE}) ${VERSION} +Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. + +Copyright (C) 2000-2003 Free Software Foundation, Inc. +Copyright (C) 2000-2001 Eazel, Inc. +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +_EOF_ + exit; +} + +## Sub for printing usage information +sub print_help +{ + print <<_EOF_; +Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE +Generates an output file that includes some localized attributes from an +untranslated source file. + +Mandatory options: (exactly one must be specified) + -b, --ba-style includes translations in the bonobo-activation style + -d, --desktop-style includes translations in the desktop style + -k, --keys-style includes translations in the keys style + -s, --schemas-style includes translations in the schemas style + -r, --rfc822deb-style includes translations in the RFC822 style + -x, --xml-style includes translations in the standard xml style + +Other options: + -u, --utf8 convert all strings to UTF-8 before merging + (default for everything except RFC822 style) + -p, --pass-through deprecated, does nothing and issues a warning + -m, --multiple-output output one localized file per locale, instead of + a single file containing all localized elements + -c, --cache=FILE specify cache file name + (usually \$top_builddir/po/.intltool-merge-cache) + -q, --quiet suppress most messages + --help display this help and exit + --version output version information and exit + +Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") +or send email to <xml-i18n-tools\@gnome.org>. +_EOF_ + exit; +} + + +## Sub for printing error messages +sub print_error +{ + print STDERR "Try `${PROGRAM} --help' for more information.\n"; + exit; +} + + +sub print_message +{ + print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; +} + + +sub preparation +{ + $PO_DIR = $ARGV[0]; + $FILE = $ARGV[1]; + $OUTFILE = $ARGV[2]; + + &gather_po_files; + &get_translation_database; +} + +# General-purpose code for looking up translations in .po files + +sub po_file2lang +{ + my ($tmp) = @_; + $tmp =~ s/^.*\/(.*)\.po$/$1/; + return $tmp; +} + +sub gather_po_files +{ + for my $po_file (glob "$PO_DIR/*.po") { + $po_files_by_lang{po_file2lang($po_file)} = $po_file; + } +} + +sub get_local_charset +{ + my ($encoding) = @_; + my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias"; + + # seek character encoding aliases in charset.alias (glib) + + if (open CHARSET_ALIAS, $alias_file) + { + while (<CHARSET_ALIAS>) + { + next if /^\#/; + return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) + } + + close CHARSET_ALIAS; + } + + # if not found, return input string + + return $encoding; +} + +sub get_po_encoding +{ + my ($in_po_file) = @_; + my $encoding = ""; + + open IN_PO_FILE, $in_po_file or die; + while (<IN_PO_FILE>) + { + ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" + if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) + { + $encoding = $1; + last; + } + } + close IN_PO_FILE; + + if (!$encoding) + { + print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; + $encoding = "ISO-8859-1"; + } + + system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull"); + if ($?) { + $encoding = get_local_charset($encoding); + } + + return $encoding +} + +sub utf8_sanity_check +{ + print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; + $UTF8_ARG = 1; +} + +sub get_translation_database +{ + if ($cache_file) { + &get_cached_translation_database; + } else { + &create_translation_database; + } +} + +sub get_newest_po_age +{ + my $newest_age; + + foreach my $file (values %po_files_by_lang) + { + my $file_age = -M $file; + $newest_age = $file_age if !$newest_age || $file_age < $newest_age; + } + + $newest_age = 0 if !$newest_age; + + return $newest_age; +} + +sub create_cache +{ + print "Generating and caching the translation database\n" unless $QUIET_ARG; + + &create_translation_database; + + open CACHE, ">$cache_file" || die; + print CACHE join "\x01", %translations; + close CACHE; +} + +sub load_cache +{ + print "Found cached translation database\n" unless $QUIET_ARG; + + my $contents; + open CACHE, "<$cache_file" || die; + { + local $/; + $contents = <CACHE>; + } + close CACHE; + %translations = split "\x01", $contents; +} + +sub get_cached_translation_database +{ + my $cache_file_age = -M $cache_file; + if (defined $cache_file_age) + { + if ($cache_file_age <= &get_newest_po_age) + { + &load_cache; + return; + } + print "Found too-old cached translation database\n" unless $QUIET_ARG; + } + + &create_cache; +} + +sub create_translation_database +{ + for my $lang (keys %po_files_by_lang) + { + my $po_file = $po_files_by_lang{$lang}; + + if ($UTF8_ARG) + { + my $encoding = get_po_encoding ($po_file); + + if (lc $encoding eq "utf-8") + { + open PO_FILE, "<$po_file"; + } + else + { + print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; + + open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; + } + } + else + { + open PO_FILE, "<$po_file"; + } + + my $nextfuzzy = 0; + my $inmsgid = 0; + my $inmsgstr = 0; + my $msgid = ""; + my $msgstr = ""; + + while (<PO_FILE>) + { + $nextfuzzy = 1 if /^#, fuzzy/; + + if (/^msgid "((\\.|[^\\])*)"/ ) + { + $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; + $msgid = ""; + $msgstr = ""; + + if ($nextfuzzy) { + $inmsgid = 0; + } else { + $msgid = unescape_po_string($1); + $inmsgid = 1; + } + $inmsgstr = 0; + $nextfuzzy = 0; + } + + if (/^msgstr "((\\.|[^\\])*)"/) + { + $msgstr = unescape_po_string($1); + $inmsgstr = 1; + $inmsgid = 0; + } + + if (/^"((\\.|[^\\])*)"/) + { + $msgid .= unescape_po_string($1) if $inmsgid; + $msgstr .= unescape_po_string($1) if $inmsgstr; + } + } + $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; + } +} + +sub finalize +{ +} + +sub unescape_one_sequence +{ + my ($sequence) = @_; + + return "\\" if $sequence eq "\\\\"; + return "\"" if $sequence eq "\\\""; + return "\n" if $sequence eq "\\n"; + return "\r" if $sequence eq "\\r"; + return "\t" if $sequence eq "\\t"; + return "\b" if $sequence eq "\\b"; + return "\f" if $sequence eq "\\f"; + return "\a" if $sequence eq "\\a"; + return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) + + return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); + return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); + + # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 + + return $sequence; +} + +sub unescape_po_string +{ + my ($string) = @_; + + $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; + + return $string; +} + +## NOTE: deal with < - < but not > - > because it seems its ok to have +## > in the entity. For further info please look at #84738. +sub entity_decode +{ + local ($_) = @_; + + s/'/'/g; # ' + s/"/"/g; # " + s/&/&/g; + s/</</g; + + return $_; +} + +# entity_encode: (string) +# +# Encode the given string to XML format (encode '<' etc). + +sub entity_encode +{ + my ($pre_encoded) = @_; + + my @list_of_chars = unpack ('C*', $pre_encoded); + + # with UTF-8 we only encode minimalistic + return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); +} + +sub entity_encode_int_minimalist +{ + return """ if $_ == 34; + return "&" if $_ == 38; + return "'" if $_ == 39; + return "<" if $_ == 60; + return chr $_; +} + +sub entity_encoded_translation +{ + my ($lang, $string) = @_; + + my $translation = $translations{$lang, $string}; + return $string if !$translation; + return entity_encode ($translation); +} + +## XML (bonobo-activation specific) merge code + +sub ba_merge_translations +{ + my $source; + + { + local $/; # slurp mode + open INPUT, "<$FILE" or die "can't open $FILE: $!"; + $source = <INPUT>; + close INPUT; + } + + open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; + # Binmode so that selftest works ok if using a native Win32 Perl... + binmode (OUTPUT) if $^O eq 'MSWin32'; + + while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) + { + print OUTPUT $1; + + my $node = $2 . "\n"; + + my @strings = (); + $_ = $node; + while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { + push @strings, entity_decode($3); + } + print OUTPUT; + + my %langs; + for my $string (@strings) + { + for my $lang (keys %po_files_by_lang) + { + $langs{$lang} = 1 if $translations{$lang, $string}; + } + } + + for my $lang (sort keys %langs) + { + $_ = $node; + s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; + s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; + print OUTPUT; + } + } + + print OUTPUT $source; + + close OUTPUT; +} + + +## XML (non-bonobo-activation) merge code + + +# Process tag attributes +# Only parameter is a HASH containing attributes -> values mapping +sub getAttributeString +{ + my $sub = shift; + my $do_translate = shift || 0; + my $language = shift || ""; + my $result = ""; + my $translate = shift; + foreach my $e (reverse(sort(keys %{ $sub }))) { + my $key = $e; + my $string = $sub->{$e}; + my $quote = '"'; + + $string =~ s/^[\s]+//; + $string =~ s/[\s]+$//; + + if ($string =~ /^'.*'$/) + { + $quote = "'"; + } + $string =~ s/^['"]//g; + $string =~ s/['"]$//g; + + if ($do_translate && $key =~ /^_/) { + $key =~ s|^_||g; + if ($language) { + # Handle translation + my $decode_string = entity_decode($string); + my $translation = $translations{$language, $decode_string}; + if ($translation) { + $translation = entity_encode($translation); + $string = $translation; + } + $$translate = 2; + } else { + $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate + } + } + + $result .= " $key=$quote$string$quote"; + } + return $result; +} + +# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree +sub getXMLstring +{ + my $ref = shift; + my $spacepreserve = shift || 0; + my @list = @{ $ref }; + my $result = ""; + + my $count = scalar(@list); + my $attrs = $list[0]; + my $index = 1; + + $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); + $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); + + while ($index < $count) { + my $type = $list[$index]; + my $content = $list[$index+1]; + if (! $type ) { + # We've got CDATA + if ($content) { + # lets strip the whitespace here, and *ONLY* here + $content =~ s/\s+/ /gs if (!$spacepreserve); + $result .= $content; + } + } elsif ( "$type" ne "1" ) { + # We've got another element + $result .= "<$type"; + $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements + if ($content) { + my $subresult = getXMLstring($content, $spacepreserve); + if ($subresult) { + $result .= ">".$subresult . "</$type>"; + } else { + $result .= "/>"; + } + } else { + $result .= "/>"; + } + } + $index += 2; + } + return $result; +} + +# Translate list of nodes if necessary +sub translate_subnodes +{ + my $fh = shift; + my $content = shift; + my $language = shift || ""; + my $singlelang = shift || 0; + my $spacepreserve = shift || 0; + + my @nodes = @{ $content }; + + my $count = scalar(@nodes); + my $index = 0; + while ($index < $count) { + my $type = $nodes[$index]; + my $rest = $nodes[$index+1]; + if ($singlelang) { + my $oldMO = $MULTIPLE_OUTPUT; + $MULTIPLE_OUTPUT = 1; + traverse($fh, $type, $rest, $language, $spacepreserve); + $MULTIPLE_OUTPUT = $oldMO; + } else { + traverse($fh, $type, $rest, $language, $spacepreserve); + } + $index += 2; + } +} + +sub isWellFormedXmlFragment +{ + my $ret = eval 'require XML::Parser'; + if(!$ret) { + die "You must have XML::Parser installed to run $0\n\n"; + } + + my $fragment = shift; + return 0 if (!$fragment); + + $fragment = "<root>$fragment</root>"; + my $xp = new XML::Parser(Style => 'Tree'); + my $tree = 0; + eval { $tree = $xp->parse($fragment); }; + return $tree; +} + +sub traverse +{ + my $fh = shift; + my $nodename = shift; + my $content = shift; + my $language = shift || ""; + my $spacepreserve = shift || 0; + + if (!$nodename) { + if ($content =~ /^[\s]*$/) { + $leading_space .= $content; + } + print $fh $content; + } else { + # element + my @all = @{ $content }; + my $attrs = shift @all; + my $translate = 0; + my $outattr = getAttributeString($attrs, 1, $language, \$translate); + + if ($nodename =~ /^_/) { + $translate = 1; + $nodename =~ s/^_//; + } + my $lookup = ''; + + $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); + $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); + + print $fh "<$nodename", $outattr; + if ($translate) { + $lookup = getXMLstring($content, $spacepreserve); + if (!$spacepreserve) { + $lookup =~ s/^\s+//s; + $lookup =~ s/\s+$//s; + } + + if ($lookup || $translate == 2) { + my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup}); + if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { + $translation = $lookup if (!$translation); + print $fh " xml:lang=\"", $language, "\"" if $language; + print $fh ">"; + if ($translate == 2) { + translate_subnodes($fh, \@all, $language, 1, $spacepreserve); + } else { + print $fh $translation; + } + print $fh "</$nodename>"; + + return; # this means there will be no same translation with xml:lang="$language"... + # if we want them both, just remove this "return" + } else { + print $fh ">"; + if ($translate == 2) { + translate_subnodes($fh, \@all, $language, 1, $spacepreserve); + } else { + print $fh $lookup; + } + print $fh "</$nodename>"; + } + } else { + print $fh "/>"; + } + + for my $lang (sort keys %po_files_by_lang) { + if ($MULTIPLE_OUTPUT && $lang ne "$language") { + next; + } + if ($lang) { + # Handle translation + # + my $translate = 0; + my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); + my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup}); + if ($translate && !$translation) { + $translation = $lookup; + } + + if ($translation || $translate) { + print $fh "\n"; + $leading_space =~ s/.*\n//g; + print $fh $leading_space; + print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">"; + if ($translate == 2) { + translate_subnodes($fh, \@all, $lang, 1, $spacepreserve); + } else { + print $fh $translation; + } + print $fh "</$nodename>"; + } + } + } + + } else { + my $count = scalar(@all); + if ($count > 0) { + print $fh ">"; + my $index = 0; + while ($index < $count) { + my $type = $all[$index]; + my $rest = $all[$index+1]; + traverse($fh, $type, $rest, $language, $spacepreserve); + $index += 2; + } + print $fh "</$nodename>"; + } else { + print $fh "/>"; + } + } + } +} + +sub intltool_tree_comment +{ + my $expat = shift; + my $data = shift; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + push @$clist, 1 => $data; +} + +sub intltool_tree_cdatastart +{ + my $expat = shift; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + push @$clist, 0 => $expat->original_string(); +} + +sub intltool_tree_cdataend +{ + my $expat = shift; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + $clist->[$pos] .= $expat->original_string(); +} + +sub intltool_tree_char +{ + my $expat = shift; + my $text = shift; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + # Use original_string so that we retain escaped entities + # in CDATA sections. + # + if ($pos > 0 and $clist->[$pos - 1] eq '0') { + $clist->[$pos] .= $expat->original_string(); + } else { + push @$clist, 0 => $expat->original_string(); + } +} + +sub intltool_tree_start +{ + my $expat = shift; + my $tag = shift; + my @origlist = (); + + # Use original_string so that we retain escaped entities + # in attribute values. We must convert the string to an + # @origlist array to conform to the structure of the Tree + # Style. + # + my @original_array = split /\x/, $expat->original_string(); + my $source = $expat->original_string(); + + # Remove leading tag. + # + $source =~ s|^\s*<\s*(\S+)||s; + + # Grab attribute key/value pairs and push onto @origlist array. + # + while ($source) + { + if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) + { + $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; + push @origlist, $1; + push @origlist, '"' . $2 . '"'; + } + elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) + { + $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; + push @origlist, $1; + push @origlist, "'" . $2 . "'"; + } + else + { + last; + } + } + + my $ol = [ { @origlist } ]; + + push @{ $expat->{Lists} }, $expat->{Curlist}; + push @{ $expat->{Curlist} }, $tag => $ol; + $expat->{Curlist} = $ol; +} + +sub readXml +{ + my $filename = shift || return; + if(!-f $filename) { + die "ERROR Cannot find filename: $filename\n"; + } + + my $ret = eval 'require XML::Parser'; + if(!$ret) { + die "You must have XML::Parser installed to run $0\n\n"; + } + my $xp = new XML::Parser(Style => 'Tree'); + $xp->setHandlers(Char => \&intltool_tree_char); + $xp->setHandlers(Start => \&intltool_tree_start); + $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); + $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); + my $tree = $xp->parsefile($filename); + +# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> +# would be: +# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, +# 0, "Howdy", ref, [{}]], 0, "do" ] ] + + return $tree; +} + +sub print_header +{ + my $infile = shift; + my $fh = shift; + my $source; + + if(!-f $infile) { + die "ERROR Cannot find filename: $infile\n"; + } + + print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n}; + { + local $/; + open DOCINPUT, "<${FILE}" or die; + $source = <DOCINPUT>; + close DOCINPUT; + } + if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s) + { + print $fh "$1\n"; + } + elsif ($source =~ /(<!DOCTYPE[^>]*>)/s) + { + print $fh "$1\n"; + } +} + +sub parseTree +{ + my $fh = shift; + my $ref = shift; + my $language = shift || ""; + + my $name = shift @{ $ref }; + my $cont = shift @{ $ref }; + + while (!$name || "$name" eq "1") { + $name = shift @{ $ref }; + $cont = shift @{ $ref }; + } + + my $spacepreserve = 0; + my $attrs = @{$cont}[0]; + $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); + + traverse($fh, $name, $cont, $language, $spacepreserve); +} + +sub xml_merge_output +{ + my $source; + + if ($MULTIPLE_OUTPUT) { + for my $lang (sort keys %po_files_by_lang) { + if ( ! -e $lang ) { + mkdir $lang or die "Cannot create subdirectory $lang: $!\n"; + } + open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; + binmode (OUTPUT) if $^O eq 'MSWin32'; + my $tree = readXml($FILE); + print_header($FILE, \*OUTPUT); + parseTree(\*OUTPUT, $tree, $lang); + close OUTPUT; + print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; + } + } + open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; + binmode (OUTPUT) if $^O eq 'MSWin32'; + my $tree = readXml($FILE); + print_header($FILE, \*OUTPUT); + parseTree(\*OUTPUT, $tree); + close OUTPUT; + print "CREATED $OUTFILE\n" unless $QUIET_ARG; +} + +sub keys_merge_translations +{ + open INPUT, "<${FILE}" or die; + open OUTPUT, ">${OUTFILE}" or die; + binmode (OUTPUT) if $^O eq 'MSWin32'; + + while (<INPUT>) + { + if (s/^(\s*)_(\w+=(.*))/$1$2/) + { + my $string = $3; + + print OUTPUT; + + my $non_translated_line = $_; + + for my $lang (sort keys %po_files_by_lang) + { + my $translation = $translations{$lang, $string}; + next if !$translation; + + $_ = $non_translated_line; + s/(\w+)=.*/[$lang]$1=$translation/; + print OUTPUT; + } + } + else + { + print OUTPUT; + } + } + + close OUTPUT; + close INPUT; +} + +sub desktop_merge_translations +{ + open INPUT, "<${FILE}" or die; + open OUTPUT, ">${OUTFILE}" or die; + binmode (OUTPUT) if $^O eq 'MSWin32'; + + while (<INPUT>) + { + if (s/^(\s*)_(\w+=(.*))/$1$2/) + { + my $string = $3; + + print OUTPUT; + + my $non_translated_line = $_; + + for my $lang (sort keys %po_files_by_lang) + { + my $translation = $translations{$lang, $string}; + next if !$translation; + + $_ = $non_translated_line; + s/(\w+)=.*/${1}[$lang]=$translation/; + print OUTPUT; + } + } + else + { + print OUTPUT; + } + } + + close OUTPUT; + close INPUT; +} + +sub schemas_merge_translations +{ + my $source; + + { + local $/; # slurp mode + open INPUT, "<$FILE" or die "can't open $FILE: $!"; + $source = <INPUT>; + close INPUT; + } + + open OUTPUT, ">$OUTFILE" or die; + binmode (OUTPUT) if $^O eq 'MSWin32'; + + # FIXME: support attribute translations + + # Empty nodes never need translation, so unmark all of them. + # For example, <_foo/> is just replaced by <foo/>. + $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; + + while ($source =~ s/ + (.*?) + (\s+)(<locale\ name="C">(\s*) + (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*) + (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*) + (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*) + <\/locale>) + //sx) + { + print OUTPUT $1; + + my $locale_start_spaces = $2 ? $2 : ''; + my $default_spaces = $4 ? $4 : ''; + my $short_spaces = $7 ? $7 : ''; + my $long_spaces = $10 ? $10 : ''; + my $locale_end_spaces = $13 ? $13 : ''; + my $c_default_block = $3 ? $3 : ''; + my $default_string = $6 ? $6 : ''; + my $short_string = $9 ? $9 : ''; + my $long_string = $12 ? $12 : ''; + + print OUTPUT "$locale_start_spaces$c_default_block"; + + $default_string =~ s/\s+/ /g; + $default_string = entity_decode($default_string); + $short_string =~ s/\s+/ /g; + $short_string = entity_decode($short_string); + $long_string =~ s/\s+/ /g; + $long_string = entity_decode($long_string); + + for my $lang (sort keys %po_files_by_lang) + { + my $default_translation = $translations{$lang, $default_string}; + my $short_translation = $translations{$lang, $short_string}; + my $long_translation = $translations{$lang, $long_string}; + + next if (!$default_translation && !$short_translation && + !$long_translation); + + print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; + + print OUTPUT "$default_spaces"; + + if ($default_translation) + { + $default_translation = entity_encode($default_translation); + print OUTPUT "<default>$default_translation</default>"; + } + + print OUTPUT "$short_spaces"; + + if ($short_translation) + { + $short_translation = entity_encode($short_translation); + print OUTPUT "<short>$short_translation</short>"; + } + + print OUTPUT "$long_spaces"; + + if ($long_translation) + { + $long_translation = entity_encode($long_translation); + print OUTPUT "<long>$long_translation</long>"; + } + + print OUTPUT "$locale_end_spaces</locale>"; + } + } + + print OUTPUT $source; + + close OUTPUT; +} + +sub rfc822deb_merge_translations +{ + my %encodings = (); + for my $lang (keys %po_files_by_lang) { + $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); + } + + my $source; + + $Text::Wrap::huge = 'overflow'; + $Text::Wrap::break = qr/\n|\s(?=\S)/; + + { + local $/; # slurp mode + open INPUT, "<$FILE" or die "can't open $FILE: $!"; + $source = <INPUT>; + close INPUT; + } + + open OUTPUT, ">${OUTFILE}" or die; + binmode (OUTPUT) if $^O eq 'MSWin32'; + + while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) + { + my $sep = $1; + my $non_translated_line = $3.$4; + my $string = $5; + my $underscore = length($2); + next if $underscore eq 0 && $non_translated_line =~ /^#/; + # Remove [] dummy strings + my $stripped = $string; + $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; + $stripped =~ s/\[\s[^\[\]]*\]$//; + $non_translated_line .= $stripped; + + print OUTPUT $sep.$non_translated_line; + + if ($underscore) + { + my @str_list = rfc822deb_split($underscore, $string); + + for my $lang (sort keys %po_files_by_lang) + { + my $is_translated = 1; + my $str_translated = ''; + my $first = 1; + + for my $str (@str_list) + { + my $translation = $translations{$lang, $str}; + + if (!$translation) + { + $is_translated = 0; + last; + } + + # $translation may also contain [] dummy + # strings, mostly to indicate an empty string + $translation =~ s/\[\s[^\[\]]*\]$//; + + if ($first) + { + if ($underscore eq 2) + { + $str_translated .= $translation; + } + else + { + $str_translated .= + Text::Tabs::expand($translation) . + "\n"; + } + } + else + { + if ($underscore eq 2) + { + $str_translated .= ', ' . $translation; + } + else + { + $str_translated .= Text::Tabs::expand( + Text::Wrap::wrap(' ', ' ', $translation)) . + "\n .\n"; + } + } + $first = 0; + + # To fix some problems with Text::Wrap::wrap + $str_translated =~ s/(\n )+\n/\n .\n/g; + } + next unless $is_translated; + + $str_translated =~ s/\n \.\n$//; + $str_translated =~ s/\s+$//; + + $_ = $non_translated_line; + s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; + print OUTPUT; + } + } + } + print OUTPUT "\n"; + + close OUTPUT; + close INPUT; +} + +sub rfc822deb_split +{ + # Debian defines a special way to deal with rfc822-style files: + # when a value contain newlines, it consists of + # 1. a short form (first line) + # 2. a long description, all lines begin with a space, + # and paragraphs are separated by a single dot on a line + # This routine returns an array of all paragraphs, and reformat + # them. + # When first argument is 2, the string is a comma separated list of + # values. + my $type = shift; + my $text = shift; + $text =~ s/^[ \t]//mg; + return (split(/, */, $text, 0)) if $type ne 1; + return ($text) if $text !~ /\n/; + + $text =~ s/([^\n]*)\n//; + my @list = ($1); + my $str = ''; + + for my $line (split (/\n/, $text)) + { + chomp $line; + if ($line =~ /^\.\s*$/) + { + # New paragraph + $str =~ s/\s*$//; + push(@list, $str); + $str = ''; + } + elsif ($line =~ /^\s/) + { + # Line which must not be reformatted + $str .= "\n" if length ($str) && $str !~ /\n$/; + $line =~ s/\s+$//; + $str .= $line."\n"; + } + else + { + # Continuation line, remove newline + $str .= " " if length ($str) && $str !~ /\n$/; + $str .= $line; + } + } + + $str =~ s/\s*$//; + push(@list, $str) if length ($str); + + return @list; +} + |