Generalize tag translation using emojis heck yeah

2022-12-25 23:24:37 -03:00
parent ab65f0349a
commit 9464ade313
2 changed files with 51 additions and 32 deletions
--- a/Trans/Translator.py
+++ b/Trans/Translator.py
@ -1,9 +1,10 @@
-#Translates the game using good ol fashioned google translator. Be sure to replace the dialogue code in the translator and the tags in es_tags
-#Also this breaks a tiny bit if the dialogue contains \n, but oh well. I don't want to open that can of worms.
-#Also by break I mean that it may leave some text in english, is not that it will implode... I think
+# Translates the game using good ol fashioned google translator. Be sure to replace the dialogue code in the translator.
+# Also this breaks a tiny bit if the dialogue contains \n, but oh well. I don't want to open that can of worms.
+# Also by break I mean that it may leave some text in english, is not that it will implode... I think.

 import csv
 import os
+import re
 from colorama import init
 from colorama import Fore
 from googletrans import Translator
@ -16,12 +17,30 @@ init()

 times = []

-# Good ol google doesn't know what renpy tags are, so it translates them. This was the cheapest solution.
-es_tags = {
-  "{rápido}":"{fast}",
-  "{w =": "{w=",
-  "{alfa":"{alpha"
-}
+def translate(string):
+  # Split the string by it's tags
+  tokens = re.findall(r'\b\w+\b|{[^}]*}|[^\w\s]|\s+', string)
+
+  # We encode this bitch up so that google trans doesn't botch the tags
+  to_restore = []
+  for idx, token in enumerate(tokens):
+    if token[0] == "{":
+      to_restore.append(token)
+      # Emojis aren't touched by the translator and retain their position 😎
+      tokens[idx] = "🔠"
+  encoded_string = "".join(tokens)
+
+  # Translate the encoded string
+  trans = translator.translate(encoded_string, dest='es')
+  temp = list(trans.text)
+
+  # Restore the original tags
+  for tag in to_restore:
+    for idx, char in enumerate(temp):
+      if char == "🔠":
+        temp[idx] = tag
+        break
+  return "".join(temp)

 def avrg(nums):
  average = sum(nums)/len(nums)
@ -32,6 +51,7 @@ with open('original.csv', 'r', encoding='utf-8') as input_file, \
     open('trans.csv', 'w', encoding='utf-8',newline='') as output_file:

  length = len(input_file.readlines())
+  # Wacky hacks to calculate the time left
  input_file.seek(0)

  # Create readers and writers for the input and output files
@ -43,13 +63,10 @@ with open('original.csv', 'r', encoding='utf-8') as input_file, \
    start_time = datetime.now().replace(microsecond=0)
    try:
      # Translate the string
-      translation = translator.translate(row[0], dest='es')
+      translation = translate(row[0])

-      for tag_es, tag in es_tags.items():
-        translation.text = translation.text.replace(tag_es,tag)
-
-      #Hope this works for escaping lol
-      translation.text = translation.text.replace('"',r'\"')
+      #Hope this works for escaping, lol
+      translation = translation.replace('"',r'\"')

    except Exception as e:
      print(f'{Fore.RESET}An error occurred: {e}')