diff options
Diffstat (limited to 'intltool-extract.in')
-rw-r--r-- | intltool-extract.in | 839 |
1 files changed, 0 insertions, 839 deletions
diff --git a/intltool-extract.in b/intltool-extract.in deleted file mode 100644 index adfa971..0000000 --- a/intltool-extract.in +++ /dev/null @@ -1,839 +0,0 @@ -#!@INTLTOOL_PERL@ -w -# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- - -# -# The Intltool Message Extractor -# -# Copyright (C) 2000-2001, 2003 Free Software Foundation. -# -# Intltool is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# Intltool is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. -# -# Authors: Kenneth Christiansen <kenneth@gnu.org> -# Darin Adler <darin@bentspoon.com> -# - -## Release information -my $PROGRAM = "intltool-extract"; -my $PACKAGE = "intltool"; -my $VERSION = "0.34.1"; - -## Loaded modules -use strict; -use File::Basename; -use Getopt::Long; - -## Scalars used by the option stuff -my $TYPE_ARG = "0"; -my $LOCAL_ARG = "0"; -my $HELP_ARG = "0"; -my $VERSION_ARG = "0"; -my $UPDATE_ARG = "0"; -my $QUIET_ARG = "0"; -my $SRCDIR_ARG = "."; - -my $FILE; -my $OUTFILE; - -my $gettext_type = ""; -my $input; -my %messages = (); -my %loc = (); -my %count = (); -my %comments = (); -my $strcount = 0; - -my $XMLCOMMENT = ""; - -## Use this instead of \w for XML files to handle more possible characters. -my $w = "[-A-Za-z0-9._:]"; - -## Always print first -$| = 1; - -## Handle options -GetOptions ( - "type=s" => \$TYPE_ARG, - "local|l" => \$LOCAL_ARG, - "help|h" => \$HELP_ARG, - "version|v" => \$VERSION_ARG, - "update" => \$UPDATE_ARG, - "quiet|q" => \$QUIET_ARG, - "srcdir=s" => \$SRCDIR_ARG, - ) or &error; - -&split_on_argument; - - -## Check for options. -## This section will check for the different options. - -sub split_on_argument { - - if ($VERSION_ARG) { - &version; - - } elsif ($HELP_ARG) { - &help; - - } elsif ($LOCAL_ARG) { - &place_local; - &extract; - - } elsif ($UPDATE_ARG) { - &place_normal; - &extract; - - } elsif (@ARGV > 0) { - &place_normal; - &message; - &extract; - - } else { - &help; - - } -} - -sub place_normal { - $FILE = $ARGV[0]; - $OUTFILE = "$FILE.h"; -} - -sub place_local { - $FILE = $ARGV[0]; - $OUTFILE = fileparse($FILE, ()); - if (!-e "tmp/") { - system("mkdir tmp/"); - } - $OUTFILE = "./tmp/$OUTFILE.h" -} - -sub determine_type { - if ($TYPE_ARG =~ /^gettext\/(.*)/) { - $gettext_type=$1 - } -} - -## Sub for printing release information -sub version{ - print <<_EOF_; -${PROGRAM} (${PACKAGE}) $VERSION -Copyright (C) 2000, 2003 Free Software Foundation, Inc. -Written by Kenneth Christiansen, 2000. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -_EOF_ - exit; -} - -## Sub for printing usage information -sub help { - print <<_EOF_; -Usage: ${PROGRAM} [OPTION]... [FILENAME] -Generates a header file from an XML source file. - -It grabs all strings between <_translatable_node> and its end tag in -XML files. Read manpage (man ${PROGRAM}) for more info. - - --type=TYPE Specify the file type of FILENAME. Currently supports: - "gettext/glade", "gettext/ini", "gettext/keys" - "gettext/rfc822deb", "gettext/schemas", - "gettext/scheme", "gettext/xml" - -l, --local Writes output into current working directory - (conflicts with --update) - --update Writes output into the same directory the source file - reside (conflicts with --local) - --srcdir Root of the source tree - -v, --version Output version information and exit - -h, --help Display this help and exit - -q, --quiet Quiet mode - -Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") -or send email to <xml-i18n-tools\@gnome.org>. -_EOF_ - exit; -} - -## Sub for printing error messages -sub error{ - print STDERR "Try `${PROGRAM} --help' for more information.\n"; - exit; -} - -sub message { - print "Generating C format header file for translation.\n" unless $QUIET_ARG; -} - -sub extract { - &determine_type; - - &convert; - - open OUT, ">$OUTFILE"; - binmode (OUT) if $^O eq 'MSWin32'; - &msg_write; - close OUT; - - print "Wrote $OUTFILE\n" unless $QUIET_ARG; -} - -sub convert { - - ## Reading the file - { - local (*IN); - local $/; #slurp mode - open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!"; - $input = <IN>; - } - - &type_ini if $gettext_type eq "ini"; - &type_keys if $gettext_type eq "keys"; - &type_xml if $gettext_type eq "xml"; - &type_glade if $gettext_type eq "glade"; - &type_scheme if $gettext_type eq "scheme"; - &type_schemas if $gettext_type eq "schemas"; - &type_rfc822deb if $gettext_type eq "rfc822deb"; -} - -sub entity_decode_minimal -{ - local ($_) = @_; - - s/'/'/g; # ' - s/"/"/g; # " - s/&/&/g; - - return $_; -} - -sub entity_decode -{ - local ($_) = @_; - - s/'/'/g; # ' - s/"/"/g; # " - s/&/&/g; - s/</</g; - s/>/>/g; - - return $_; -} - -sub escape_char -{ - return '\"' if $_ eq '"'; - return '\n' if $_ eq "\n"; - return '\\' if $_ eq '\\'; - - return $_; -} - -sub escape -{ - my ($string) = @_; - return join "", map &escape_char, split //, $string; -} - -sub type_ini { - ### For generic translatable desktop files ### - while ($input =~ /^_.*=(.*)$/mg) { - $messages{$1} = []; - } -} - -sub type_keys { - ### For generic translatable mime/keys files ### - while ($input =~ /^\s*_\w+=(.*)$/mg) { - $messages{$1} = []; - } -} - -sub type_xml { - ### For generic translatable XML files ### - my $tree = readXml($input); - parseTree(0, $tree); -} - -sub print_var { - my $var = shift; - my $vartype = ref $var; - - if ($vartype =~ /ARRAY/) { - my @arr = @{$var}; - print "[ "; - foreach my $el (@arr) { - print_var($el); - print ", "; - } - print "] "; - } elsif ($vartype =~ /HASH/) { - my %hash = %{$var}; - print "{ "; - foreach my $key (keys %hash) { - print "$key => "; - print_var($hash{$key}); - print ", "; - } - print "} "; - } else { - print $var; - } -} - -# Same syntax as getAttributeString in intltool-merge.in.in, similar logic (look for ## differences comment) -sub getAttributeString -{ - my $sub = shift; - my $do_translate = shift || 1; - my $language = shift || ""; - my $translate = shift; - my $result = ""; - foreach my $e (reverse(sort(keys %{ $sub }))) { - my $key = $e; - my $string = $sub->{$e}; - my $quote = '"'; - - $string =~ s/^[\s]+//; - $string =~ s/[\s]+$//; - - if ($string =~ /^'.*'$/) - { - $quote = "'"; - } - $string =~ s/^['"]//g; - $string =~ s/['"]$//g; - - ## differences from intltool-merge.in.in - if ($key =~ /^_/) { - $comments{entity_decode($string)} = $XMLCOMMENT if $XMLCOMMENT; - $messages{entity_decode($string)} = []; - $$translate = 2; - } - ## differences end here from intltool-merge.in.in - $result .= " $key=$quote$string$quote"; - } - return $result; -} - -# Verbatim copy from intltool-merge.in.in -sub getXMLstring -{ - my $ref = shift; - my $spacepreserve = shift || 0; - my @list = @{ $ref }; - my $result = ""; - - my $count = scalar(@list); - my $attrs = $list[0]; - my $index = 1; - - $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); - $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); - - while ($index < $count) { - my $type = $list[$index]; - my $content = $list[$index+1]; - if (! $type ) { - # We've got CDATA - if ($content) { - # lets strip the whitespace here, and *ONLY* here - $content =~ s/\s+/ /gs if (!$spacepreserve); - $result .= $content; - } - } elsif ( "$type" ne "1" ) { - # We've got another element - $result .= "<$type"; - $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements - if ($content) { - my $subresult = getXMLstring($content, $spacepreserve); - if ($subresult) { - $result .= ">".$subresult . "</$type>"; - } else { - $result .= "/>"; - } - } else { - $result .= "/>"; - } - } - $index += 2; - } - return $result; -} - -# Verbatim copy from intltool-merge.in.in, except for MULTIPLE_OUTPUT handling removed -# Translate list of nodes if necessary -sub translate_subnodes -{ - my $fh = shift; - my $content = shift; - my $language = shift || ""; - my $singlelang = shift || 0; - my $spacepreserve = shift || 0; - - my @nodes = @{ $content }; - - my $count = scalar(@nodes); - my $index = 0; - while ($index < $count) { - my $type = $nodes[$index]; - my $rest = $nodes[$index+1]; - traverse($fh, $type, $rest, $language, $spacepreserve); - $index += 2; - } -} - -# Based on traverse() in intltool-merge.in.in -sub traverse -{ - my $fh = shift; # unused, to allow us to sync code between -merge and -extract - my $nodename = shift; - my $content = shift; - my $language = shift || ""; - my $spacepreserve = shift || 0; - - if ($nodename && "$nodename" eq "1") { - $XMLCOMMENT = $content; - } elsif ($nodename) { - # element - my @all = @{ $content }; - my $attrs = shift @all; - my $translate = 0; - my $outattr = getAttributeString($attrs, 1, $language, \$translate); - - if ($nodename =~ /^_/) { - $translate = 1; - $nodename =~ s/^_//; - } - my $lookup = ''; - - $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); - $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); - - if ($translate) { - $lookup = getXMLstring($content, $spacepreserve); - if (!$spacepreserve) { - $lookup =~ s/^\s+//s; - $lookup =~ s/\s+$//s; - } - - if ($lookup && $translate != 2) { - $comments{$lookup} = $XMLCOMMENT if $XMLCOMMENT; - $messages{$lookup} = []; - } elsif ($translate == 2) { - translate_subnodes($fh, \@all, $language, 1, $spacepreserve); - } - } else { - $XMLCOMMENT = ""; - my $count = scalar(@all); - if ($count > 0) { - my $index = 0; - while ($index < $count) { - my $type = $all[$index]; - my $rest = $all[$index+1]; - traverse($fh, $type, $rest, $language, $spacepreserve); - $index += 2; - } - } - } - $XMLCOMMENT = ""; - } -} - - -# Verbatim copy from intltool-merge.in.in, $fh for compatibility -sub parseTree -{ - my $fh = shift; - my $ref = shift; - my $language = shift || ""; - - my $name = shift @{ $ref }; - my $cont = shift @{ $ref }; - - while (!$name || "$name" eq "1") { - $name = shift @{ $ref }; - $cont = shift @{ $ref }; - } - - my $spacepreserve = 0; - my $attrs = @{$cont}[0]; - $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); - - traverse($fh, $name, $cont, $language, $spacepreserve); -} - -# Verbatim copy from intltool-merge.in.in -sub intltool_tree_comment -{ - my $expat = shift; - my $data = shift; - my $clist = $expat->{Curlist}; - my $pos = $#$clist; - - push @$clist, 1 => $data; -} - -# Verbatim copy from intltool-merge.in.in -sub intltool_tree_cdatastart -{ - my $expat = shift; - my $clist = $expat->{Curlist}; - my $pos = $#$clist; - - push @$clist, 0 => $expat->original_string(); -} - -# Verbatim copy from intltool-merge.in.in -sub intltool_tree_cdataend -{ - my $expat = shift; - my $clist = $expat->{Curlist}; - my $pos = $#$clist; - - $clist->[$pos] .= $expat->original_string(); -} - -# Verbatim copy from intltool-merge.in.in -sub intltool_tree_char -{ - my $expat = shift; - my $text = shift; - my $clist = $expat->{Curlist}; - my $pos = $#$clist; - - # Use original_string so that we retain escaped entities - # in CDATA sections. - # - if ($pos > 0 and $clist->[$pos - 1] eq '0') { - $clist->[$pos] .= $expat->original_string(); - } else { - push @$clist, 0 => $expat->original_string(); - } -} - -# Verbatim copy from intltool-merge.in.in -sub intltool_tree_start -{ - my $expat = shift; - my $tag = shift; - my @origlist = (); - - # Use original_string so that we retain escaped entities - # in attribute values. We must convert the string to an - # @origlist array to conform to the structure of the Tree - # Style. - # - my @original_array = split /\x/, $expat->original_string(); - my $source = $expat->original_string(); - - # Remove leading tag. - # - $source =~ s|^\s*<\s*(\S+)||s; - - # Grab attribute key/value pairs and push onto @origlist array. - # - while ($source) - { - if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) - { - $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; - push @origlist, $1; - push @origlist, '"' . $2 . '"'; - } - elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) - { - $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; - push @origlist, $1; - push @origlist, "'" . $2 . "'"; - } - else - { - last; - } - } - - my $ol = [ { @origlist } ]; - - push @{ $expat->{Lists} }, $expat->{Curlist}; - push @{ $expat->{Curlist} }, $tag => $ol; - $expat->{Curlist} = $ol; -} - -# Copied from intltool-merge.in.in and added comment handler. -sub readXml -{ - my $xmldoc = shift || return; - my $ret = eval 'require XML::Parser'; - if(!$ret) { - die "You must have XML::Parser installed to run $0\n\n"; - } - my $xp = new XML::Parser(Style => 'Tree'); - $xp->setHandlers(Char => \&intltool_tree_char); - $xp->setHandlers(Start => \&intltool_tree_start); - $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); - $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); - - ## differences from intltool-merge.in.in - $xp->setHandlers(Comment => \&intltool_tree_comment); - ## differences end here from intltool-merge.in.in - - my $tree = $xp->parse($xmldoc); - #print_var($tree); - -# <foo><!-- comment --><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> -# would be: -# [foo, [{}, 1, "comment", head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, -# [{}, 0, "Howdy", ref, [{}]], 0, "do" ] ] - - return $tree; -} - -sub type_schemas { - ### For schemas XML files ### - - # FIXME: We should handle escaped < (less than) - while ($input =~ / - <locale\ name="C">\s* - (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)? - (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)? - (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)? - <\/locale> - /sgx) { - my @totranslate = ($3,$6,$9); - my @eachcomment = ($2,$5,$8); - foreach (@totranslate) { - my $currentcomment = shift @eachcomment; - next if !$_; - s/\s+/ /g; - $messages{entity_decode_minimal($_)} = []; - $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment)); - } - } -} - -sub type_rfc822deb { - ### For rfc822-style Debian configuration files ### - - my $lineno = 1; - my $type = ''; - while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg) - { - my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5); - while ($pre =~ m/\n/g) - { - $lineno ++; - } - $lineno += length($newline); - my @str_list = rfc822deb_split(length($underscore), $text); - for my $str (@str_list) - { - $strcount++; - $messages{$str} = []; - $loc{$str} = $lineno; - $count{$str} = $strcount; - my $usercomment = ''; - while($pre =~ s/(^|\n)#([^\n]*)$//s) - { - $usercomment = "\n" . $2 . $usercomment; - } - $comments{$str} = $tag . $usercomment; - } - $lineno += ($text =~ s/\n//g); - } -} - -sub rfc822deb_split { - # Debian defines a special way to deal with rfc822-style files: - # when a value contain newlines, it consists of - # 1. a short form (first line) - # 2. a long description, all lines begin with a space, - # and paragraphs are separated by a single dot on a line - # This routine returns an array of all paragraphs, and reformat - # them. - # When first argument is 2, the string is a comma separated list of - # values. - my $type = shift; - my $text = shift; - $text =~ s/^[ \t]//mg; - return (split(/, */, $text, 0)) if $type ne 1; - return ($text) if $text !~ /\n/; - - $text =~ s/([^\n]*)\n//; - my @list = ($1); - my $str = ''; - for my $line (split (/\n/, $text)) - { - chomp $line; - if ($line =~ /^\.\s*$/) - { - # New paragraph - $str =~ s/\s*$//; - push(@list, $str); - $str = ''; - } - elsif ($line =~ /^\s/) - { - # Line which must not be reformatted - $str .= "\n" if length ($str) && $str !~ /\n$/; - $line =~ s/\s+$//; - $str .= $line."\n"; - } - else - { - # Continuation line, remove newline - $str .= " " if length ($str) && $str !~ /\n$/; - $str .= $line; - } - } - $str =~ s/\s*$//; - push(@list, $str) if length ($str); - return @list; -} - -sub type_glade { - ### For translatable Glade XML files ### - - my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message"; - - while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) { - # Glade sometimes uses tags that normally mark translatable things for - # little bits of non-translatable content. We work around this by not - # translating strings that only includes something like label4 or window1. - $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label|dialog)[0-9]+$/; - } - - while ($input =~ /<items>(..[^<]*)<\/items>/sg) { - for my $item (split (/\n/, $1)) { - $messages{entity_decode($item)} = []; - } - } - - ## handle new glade files - while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) { - $messages{entity_decode($3)} = [] unless $3 =~ /^(window|label)[0-9]+$/; - if (defined($2) and !($3 =~ /^(window|label)[0-9]+$/)) { - $comments{entity_decode($3)} = entity_decode($2) ; - } - } - while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) { - $messages{entity_decode_minimal($2)} = []; - } -} - -sub type_scheme { - my ($line, $i, $state, $str, $trcomment, $char); - for $line (split(/\n/, $input)) { - $i = 0; - $state = 0; # 0 - nothing, 1 - string, 2 - translatable string - while ($i < length($line)) { - if (substr($line,$i,1) eq "\"") { - if ($state == 2) { - $comments{$str} = $trcomment if ($trcomment); - $messages{$str} = []; - $str = ''; - $state = 0; $trcomment = ""; - } elsif ($state == 1) { - $str = ''; - $state = 0; $trcomment = ""; - } else { - $state = 1; - $str = ''; - if ($i>0 && substr($line,$i-1,1) eq '_') { - $state = 2; - } - } - } elsif (!$state) { - if (substr($line,$i,1) eq ";") { - $trcomment = substr($line,$i+1); - $trcomment =~ s/^;*\s*//; - $i = length($line); - } elsif ($trcomment && substr($line,$i,1) !~ /\s|\(|\)|_/) { - $trcomment = ""; - } - } else { - if (substr($line,$i,1) eq "\\") { - $char = substr($line,$i+1,1); - if ($char ne "\"" && $char ne "\\") { - $str = $str . "\\"; - } - $i++; - } - $str = $str . substr($line,$i,1); - } - $i++; - } - } -} - -sub msg_write { - my @msgids; - if (%count) - { - @msgids = sort { $count{$a} <=> $count{$b} } keys %count; - } - else - { - @msgids = sort keys %messages; - } - for my $message (@msgids) - { - my $offsetlines = 1; - $offsetlines++ if $message =~ /%/; - if (defined ($comments{$message})) - { - while ($comments{$message} =~ m/\n/g) - { - $offsetlines++; - } - } - print OUT "# ".($loc{$message} - $offsetlines). " \"$FILE\"\n" - if defined $loc{$message}; - print OUT "/* ".$comments{$message}." */\n" - if defined $comments{$message}; - print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/; - - my @lines = split (/\n/, $message, -1); - for (my $n = 0; $n < @lines; $n++) - { - if ($n == 0) - { - print OUT "char *s = N_(\""; - } - else - { - print OUT " \""; - } - - print OUT escape($lines[$n]); - - if ($n < @lines - 1) - { - print OUT "\\n\"\n"; - } - else - { - print OUT "\");\n"; - } - } - } -} - |