1
0
Fork 0
dynamic_liquid/autotranslate.py

134 lines
6.5 KiB
Python
Raw Normal View History

# A quick-and-dirty script to run untranslated text through Google Translate's API.
# The result will likely include comical errors a native speaker will laugh at you for
# or that will puzzle them, and some manual correction of escaped codes such as @1 and @= may be
# required, but hopefully it will serve as a start to something useful
# Copyright (C) 2020 FaceDeer
# LGPLv2.1+
# See https://github.com/minetest-tools/update_translations for
# potential future updates to this script.
from googletrans import Translator, LANGUAGES
import os, re, shutil
pattern_tr_filename = re.compile(r'\.tr$')
pattern_tr_id = re.compile(r'\.([^.]*)\.tr$')
pattern_line_to_translate = re.compile(r'^([^#].*[^@])=$') #finds lines that don't have a translation
translator = Translator()
def translate(tr_filename):
lang_id = pattern_tr_id.search(tr_filename)
if not lang_id:
print("Could not find language ID in tr filename " + tr_filename)
return
lang_id = lang_id.group(1)
if not lang_id in LANGUAGES:
print("language ID " + lang_id + " is not supported by Google Translate's API")
return
lines_to_translate = [] # this list of strings will ultimately be sent to Google for translation
with open(tr_filename, "r", encoding="utf-8") as tr_file_handle:
for line in tr_file_handle:
# Look for lines that end in "=", ie, that don't have a valid translation added to them
line_lacking_translation = pattern_line_to_translate.search(line)
if line_lacking_translation:
#break the line up at @n markers, this is not ideal for Google
#as it may remove some context but it's necessary to allow the
#@n markers to be preserved in the output later
lines_to_translate = lines_to_translate + line_lacking_translation.group(1).split("@n")
# Remove duplicates, and the empty string (a common artefact of splitting)
line_set = set(lines_to_translate)
line_set.discard("")
lines_to_translate = list(line_set)
# Only do more work if there are lines in need of translation
if lines_to_translate:
print("Calling Google API for " + tr_filename)
output = translator.translate(lines_to_translate, src="en", dest=lang_id)
#convert the output translations into a dictionary for easy substitution later
translation_dictionary = dict()
for out_line in output:
#Google's API sometimes seems to fail to translate a line for no apparent reason
#Don't put them in the dictionary, we can leave those untranslated and maybe try again
if out_line.origin != out_line.text:
translation_dictionary[out_line.origin] = out_line.text
translation_dictionary["@n"] = "@n" #These are to be left unchanged
tr_file_handle.seek(0)
with open(tr_filename + ".temp", "w", encoding="utf-8") as tr_file_new:
for line in tr_file_handle:
line_lacking_translation = pattern_line_to_translate.search(line)
if line_lacking_translation:
line = line.rstrip('\n') #remove trailing newline so we can add the translated string to the same line
line_split = re.split("(@n)", line[:-1]) #slice to leave off the "=" that's the last character of the line
translated_line = ""
#After splitting the line up on @n again, as was done before, we should have
#line segments that match the strings that were sent to Google.
for line_piece in line_split:
if line_piece in translation_dictionary:
translated_line = translated_line + translation_dictionary[line_piece]
else:
print("Google returned string unchanged in file " + tr_filename + ":")
print(line_piece)
translated_line = None
break
if translated_line:
tr_file_new.write("#WARNING: AUTOTRANSLATED BY GOOGLE TRANSLATE\n")
tr_file_new.write(line)
tr_file_new.write(translated_line)
tr_file_new.write("\n")
else:
tr_file_new.write(line)
tr_file_new.write("\n")
else:
tr_file_new.write(line)
shutil.move(tr_filename + ".temp", tr_filename) # Overwrite the original file with the new one
pattern_domain = re.compile(r'^# textdomain: (.+)$')
def create_tr_files_from_template(folder, lang_id):
if not lang_id in LANGUAGES:
print("language ID " + lang_id + " is not supported by Google Translate's API")
return
for root, dirs, files in os.walk(folder):
if root == "." or os.path.split(root)[1] == "locale":
for name in files:
if name == "template.txt":
template_filename = os.path.join(root,name)
with open(template_filename, "r", encoding="utf-8") as template_file:
first_line = template_file.readline()
domain = pattern_domain.search(first_line)
if domain:
translation_filename = domain.group(1) + "." + lang_id + ".tr"
translation_filename = os.path.join(root,translation_filename)
if not os.path.isfile(translation_filename):
print("Copying template.txt to " + translation_filename)
shutil.copy(template_filename, translation_filename)
else:
print(translation_filename + " already exists")
#If there are already .tr files in /locale, returns a list of their names
def get_existing_tr_files(folder):
out = []
for root, dirs, files in os.walk(folder):
for name in files:
if pattern_tr_filename.search(name):
out.append(os.path.join(root,name))
return out
#create_tr_files_from_template(".", "de")
#create_tr_files_from_template(".", "it")
tr_files = get_existing_tr_files(".")
for tr_file in tr_files:
translate(tr_file)