You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

323 lines
9.8 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Author: Sotiris Papadopoulos <ytubedlg@gmail.com>
  5. Last-Revision: 2017-04-19
  6. Script to automatically check PO files
  7. """
  8. from __future__ import unicode_literals
  9. import os
  10. import sys
  11. import logging
  12. import argparse
  13. from time import sleep
  14. from datetime import datetime, timedelta, tzinfo
  15. try:
  16. import polib
  17. import google_translate
  18. except ImportError as error:
  19. print(error)
  20. sys.exit(1)
  21. WTIME = 2.0 # Time in seconds to wait between requests to avoid ban
  22. PACKAGE = "youtube_dl_gui"
  23. PO_FILENAME = "{}.po".format(PACKAGE)
  24. LOCALE_PATH_TMPL = os.path.join(PACKAGE, "locale", "{lang}", "LC_MESSAGES", PO_FILENAME)
  25. logging.basicConfig(level=logging.ERROR)
  26. def parse():
  27. """Parse command line arguments."""
  28. parser = argparse.ArgumentParser(description="Script to automatically check PO files")
  29. parser.add_argument("language", help="language of the PO file to check")
  30. parser.add_argument("-w", "--werror", action="store_true", help="treat all warning messages as errors")
  31. parser.add_argument("-o", "--only-headers", action="store_true", help="check only the PO file headers")
  32. parser.add_argument("-n", "--no-translate", action="store_true", help="do not use the translator to check 'msgstr' fields")
  33. parser.add_argument("-t", "--tlang", help="force a different language on the translator than the one given")
  34. return parser.parse_args()
  35. class UTC_Offset_Timezone(tzinfo):
  36. """Class that represents a UTC offset in the format +/-0000."""
  37. def __init__(self, offset_string):
  38. self.offset = timedelta(seconds=UTC_Offset_Timezone.parse_offset(offset_string))
  39. def utcoffset(self, dt):
  40. return self.offset + self.dst(dt)
  41. def dst(self, dt):
  42. return timedelta(0)
  43. @staticmethod
  44. def parse_offset(offset_string):
  45. """Parse the offset string into seconds."""
  46. if len(offset_string) != 5:
  47. raise ValueError("Invalid length for offset string ({})".format(offset_string))
  48. hours = offset_string[1:3]
  49. minutes = offset_string[3:5]
  50. offset = int(hours) * 3600 + int(minutes) * 60
  51. if offset_string[0] == "-":
  52. return -1 * offset
  53. return offset
  54. def parse_date(date_string):
  55. """Parse date string into an aware datetime object."""
  56. # Just a small list with the most common timezones
  57. offset_list = [
  58. ("JST", "0900"),
  59. ("EEST", "0300"),
  60. ("EET", "0200"),
  61. ("GMT", "0000"),
  62. ("UTC", "0000")
  63. ]
  64. # Replace all the timezones with the offset
  65. for item in offset_list:
  66. timezone, offset = item
  67. date_string = date_string.replace(timezone, offset)
  68. datetime_string = date_string[:16]
  69. offset_string = date_string[16:]
  70. naive_date = datetime.strptime(datetime_string, "%Y-%m-%d %H:%M")
  71. # Create & return an aware datetime object based on the offset
  72. return naive_date.replace(tzinfo=UTC_Offset_Timezone(offset_string))
  73. # Print helpers
  74. def my_print(msg, char="*", value=None, exit=False):
  75. """Print 'msg', debug 'value' and exit if 'exit' is True."""
  76. print("[{}] {}".format(char, msg))
  77. if value is not None:
  78. print("\tvalue= \"{}\"".format(value))
  79. if exit:
  80. sys.exit(1)
  81. def perror(msg, value=None):
  82. my_print(msg, "-", value, True)
  83. def pwarn(msg, value=None, exit=False):
  84. my_print(msg, "!", value, exit)
  85. def pinfo(msg):
  86. my_print(msg)
  87. #############################
  88. def main(args):
  89. os.chdir("..")
  90. # setup
  91. pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
  92. po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)
  93. if not os.path.exists(pot_file_path):
  94. perror("Failed to locate POT file, exiting...", pot_file_path)
  95. if not os.path.exists(po_file_path):
  96. perror("Failed to locate PO file, exiting...", po_file_path)
  97. pot_file = polib.pofile(pot_file_path)
  98. po_file = polib.pofile(po_file_path)
  99. # check headers
  100. pinfo("Checking PO headers")
  101. pot_headers = pot_file.metadata
  102. po_headers = po_file.metadata
  103. if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
  104. pwarn("'Project-Id-Version' headers do not match", exit=args.werror)
  105. if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
  106. pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)
  107. po_creation_date = parse_date(po_headers["POT-Creation-Date"])
  108. po_revision_date = parse_date(po_headers["PO-Revision-Date"])
  109. # Aware datetimes convert to UTC automatically when comparing
  110. if po_revision_date <= po_creation_date:
  111. pwarn("PO file seems outdated", exit=args.werror)
  112. if "Language" in po_headers and po_headers["Language"] != args.language:
  113. pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror)
  114. pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))
  115. # check translations
  116. if args.only_headers:
  117. sys.exit(0)
  118. pinfo("Checking translations, this might take a while...")
  119. pot_msgid = [entry.msgid for entry in pot_file]
  120. po_msgid = [entry.msgid for entry in po_file]
  121. # lists to hold reports
  122. missing_msgid = []
  123. not_translated = []
  124. same_msgstr = []
  125. with_typo = []
  126. verify_trans = []
  127. fuzzy_trans = po_file.fuzzy_entries()
  128. for msgid in pot_msgid:
  129. if msgid not in po_msgid:
  130. missing_msgid.append(msgid)
  131. # Init translator only if the '--no-translate' flag is NOT set
  132. translator = None
  133. if not args.no_translate:
  134. translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME)
  135. # Set source language for GoogleTranslator
  136. if args.tlang is not None:
  137. src_lang = args.tlang
  138. pinfo("Forcing '{}' as the translator's source language".format(src_lang))
  139. else:
  140. # Get a valid source language for Google
  141. # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
  142. src_lang = args.language
  143. if src_lang not in translator._lang_dict:
  144. src_lang = src_lang.replace("_", "-")
  145. if src_lang not in translator._lang_dict:
  146. src_lang = src_lang.split("-")[0]
  147. # Keep entries that need further analysis using the translator
  148. further_analysis = []
  149. for entry in po_file:
  150. if not entry.translated():
  151. not_translated.append(entry)
  152. elif entry.msgid == entry.msgstr:
  153. same_msgstr.append(entry)
  154. else:
  155. further_analysis.append(entry)
  156. if translator is not None and further_analysis:
  157. # eta = (items_to_analyze * (WTIME + avg_ms)) - WTIME
  158. # We subtract WTIME at the end because there is no wait for the last item on the list
  159. # avg_msg = 200ms
  160. eta_seconds = (len(further_analysis) * (WTIME + 0.2)) - WTIME
  161. eta_seconds = int(round(eta_seconds))
  162. eta = timedelta(seconds=eta_seconds)
  163. pinfo("Approximate time to check translations online: {}".format(eta))
  164. # Pass translations as a list since GoogleTranslator can handle them
  165. words_dict = translator.get_info_dict([entry.msgstr for entry in further_analysis], "en", src_lang)
  166. for index, word_dict in enumerate(words_dict):
  167. # Get the corresponding POEntry since the words_dict does not contain those
  168. entry = further_analysis[index]
  169. if word_dict is not None:
  170. if word_dict["has_typo"]:
  171. with_typo.append(entry)
  172. if word_dict["translation"].lower() != entry.msgid.lower():
  173. found = False
  174. # Check verbs, nouns, adverbs, etc..
  175. for key in word_dict["extra"]:
  176. if entry.msgid.lower() in word_dict["extra"][key].keys():
  177. found = True
  178. break
  179. if not found:
  180. verify_trans.append((entry, word_dict["translation"]))
  181. # time to report
  182. print("=" * 25 + "Report" + "=" * 25)
  183. if missing_msgid:
  184. print("Missing msgids")
  185. for msgid in missing_msgid:
  186. print(" \"{}\"".format(msgid))
  187. if not_translated:
  188. print("Not translated")
  189. for entry in not_translated:
  190. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  191. if same_msgstr:
  192. print("Same msgstr")
  193. for entry in same_msgstr:
  194. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  195. if with_typo:
  196. print("With typo")
  197. for entry in with_typo:
  198. print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format(entry.linenum, entry.msgid, entry.msgstr))
  199. if verify_trans:
  200. print("Verify translation")
  201. for item in verify_trans:
  202. entry, translation = item
  203. print(" line: {} msgid: \"{}\" trans: \"{}\"".format(entry.linenum, entry.msgid, translation))
  204. if fuzzy_trans:
  205. print("Fuzzy translations")
  206. for entry in fuzzy_trans:
  207. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  208. total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(with_typo) + len(verify_trans) + len(fuzzy_trans)
  209. print("")
  210. print("Missing msgids\t\t: {}".format(len(missing_msgid)))
  211. print("Not translated\t\t: {}".format(len(not_translated)))
  212. print("Same msgstr\t\t: {}".format(len(same_msgstr)))
  213. print("With typo\t\t: {}".format(len(with_typo)))
  214. print("Verify translation\t: {}".format(len(verify_trans)))
  215. print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
  216. print("Total\t\t\t: {}".format(total))
  217. print("")
  218. print("Total entries\t\t: {}".format(len(po_file)))
  219. if __name__ == "__main__":
  220. try:
  221. main(parse())
  222. except KeyboardInterrupt:
  223. print("KeyboardInterrupt")