Hier das erwähnte Perlscript. Im Code wird noch eine Datenbank aufgerufen, das habe ich aber auskommentiert.
Das Script liest hier aus dem DATA Block und schreibt mit Print nach STDOUT. Viele Terminals mit UTF-8 nicht richtig klar kommen, kann die Ausgabe von Zeichen mit Akzent ggf. schlecht aussehen.
Wie gesagt ich habe eine andere Version im Einsatz mit Lesen und Zurückschreiben in Access-DB, und ein zweites Script was aus einem po File liest und ein neues po file schreibt, aber das hab ich gerade nicht hier.
Code: Alles auswählen
#!/usr/bin/perl
##############################################################################
# read strings from a database, or a __DATA__ section
# get eng-french translations from web services,
# write it back to the database (this code is commented out)
##############################################################################
use strict;
use WWW::Babelfish;
#use DBI;
use DBI qw(:utils);
#use DBD::ADO;
#Win32::OLE->Option( CP => Win32::OLE::CP_UTF8 );
#use Devel::Peek;
# use utf8;
use Text::Unidecode;
use Encode;
use constant NOTRANS => "*** not translated ***";
#database connection; use ADO because it works better with UTF-8
#our $dbh = DBI->connect( "dbi:ADO:Provider=Microsoft." . "Jet.OLEDB.4.0;Data Source=G:\\someDir\\translation-french.mdb" );
#some variables
my ( $i, $j, $c, $u, $sql, $msgid, $msgstr, $srv, $bab_tr, $goog_tr, $yahoo_tr, $sth, $dat );
# what must be translated? we read from a database
#$sql = qq{select ID, msgid , msgstr from [eng-italian] where msgid like "msgid%" order by ID};
#enable slurp mode
undef $/;
# we read from the __DATA__ block
# we are dealing with pairs of strings, eng-fr
# some of which are untranslated
# make a hash of
my $file = <DATA>;
my %str = map { /msgid "(.*)"\nmsgstr "(.*)"/; $1 => $2 } split /\n\n/msi, $file;
#counters
$i = $j = 1;
# count untranslated strings
foreach my $k ( keys %str ) {
$u++ unless $str{$k};
$c++;
}
# the web sites return unicode / utf8, thus make print happy
binmode( STDOUT, ":utf8" );
#translate untranslated strings
#for ( my $j = 0 ; $j < @$dat ; $j++ ) {
foreach my $k ( sort { length($a) <=> length($b) } keys %str ){
#my $k = $dat->[$j]{msgid} ;
#my $id = $dat->[$j]{ID};
$i = sprintf( "%03d", $j );
$j++;
chomp $k;
if ($k) {
$sql = "";
my $id = 0;
print "\n$i of $c ($u total untranslated)\n";
print "\t$k\t" . data_string_desc($k) . "\n";
$srv = "Babelfish";
$bab_tr = translate_string( $k, $srv );
update_db( $srv, qq{"$bab_tr"}, $id);
$srv = "Google";
$goog_tr = translate_string( $k, $srv );
update_db( $srv, qq{"$goog_tr"}, $id );
$srv = "Yahoo";
$yahoo_tr = translate_string( $k, $srv );
#$yahoo_tr = unidecode($yahoo_tr); #remove weird chars
update_db( $srv, $yahoo_tr, $id );
} else {
print "\t### $sql\n";
$sql = "";
}
$k = "";
#print "$k\t$str{$k}\n";
$i++;
}
#######################################
# End of "main program"
#######################################
sub update_db {
my ( $srv, $tr, $id) = @_;
#my $sql = qq{ update [eng-italian] set [msgstr-fr-$srv] = 'msgstr $tr' where ID = $id}; #and [msgstr-fr-google] is null
#$dbh->do($sql) or warn "$srv: can't execute $sql: $! \ndb said: " . $dbh->errstr unless (index($tr, NOTRANS) || $tr =~ /UNAVAIL/i);
print "\t" . data_string_desc($tr) . "\n";
print "\t$srv:\t$tr\n";
#print "\t$sql\n";
}
sub translate_string {
my ( $text, $objstr ) = @_;
# define translator objects
my $obj = new WWW::Babelfish( service => $objstr, agent => 'Mozilla/8.0' );
return "$objstr Translation server unavailable\n" unless defined($obj) && ref($obj) eq "WWW::Babelfish";
my $french_text = $obj->translate(
'source' => 'English',
'destination' => 'French',
'text' => Encode::decode_utf8($text),
'delimiter' => "\n\t"
);
sleep(1);
warn( "Could not translate: " . $obj->error ) unless defined($french_text);
#Encode::_utf8_on($french_text) if $objstr eq "Yahoo";
chomp($french_text);
$french_text =~ s/(?:l')?\s*identification\s+de\s+message\s*//ig; #cleanup
$french_text =~ s/'/''/g;
$french_text =~ s/^(\W+)(.*)$/$1\u$2/; #uppercase first char, even with nonword chars at first position
if ( Encode::is_utf8($french_text) ) {
#Dump $french_text;
#eval{$french_text = Encode::from_to($french_text, "utf8", "iso-8859-15")};
#Encode::_utf8_on($french_text);
#print "$objstr: UTF-8 text found\n";
#Dump $french_text;
} else {
#print "\t $objstr: text not UTF-8\n";
Encode::decode_utf8($french_text); # decode it to utf-8, DBD:ADO wants it this way
}
$french_text ||= NOTRANS;
undef $obj;
#return $dbh->quote($french_text);
return $french_text;
}
#######################################
# test data
#######################################
__DATA__
msgid "OK"
msgstr ""
msgid "Cancel"
msgstr ""
msgid "Colorpicker"
msgstr ""
msgid "Insert Link"
msgstr ""
msgid "Internal link"
msgstr ""
msgid "Please choose"
msgstr ""
msgid "None"
msgstr ""
msgid "PDF"
msgstr ""
msgid "Media"
msgstr ""
msgid "External link"
msgstr ""
msgid "Target"
msgstr ""
msgid "Open in same window"
msgstr ""
msgid "Open in new window"
msgstr ""
msgid "Open in parent frame"
msgstr ""
msgid "Open in top frame"
msgstr ""