You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

314 lines
9.4 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Author: Sotiris Papadopoulos <ytubedlg@gmail.com>
  5. Last-Revision: 2017-04-19
  6. Script to automatically check PO files
  7. """
  8. from __future__ import unicode_literals
  9. import os
  10. import sys
  11. import logging
  12. import argparse
  13. from time import sleep
  14. from datetime import datetime, timedelta, tzinfo
  15. try:
  16. import polib
  17. import google_translate
  18. except ImportError as error:
  19. print(error)
  20. sys.exit(1)
  21. WTIME = 2.0 # Time in seconds to wait between requests to avoid ban
  22. PACKAGE = "youtube_dl_gui"
  23. PO_FILENAME = "{}.po".format(PACKAGE)
  24. LOCALE_PATH_TMPL = os.path.join(PACKAGE, "locale", "{lang}", "LC_MESSAGES", PO_FILENAME)
  25. logging.basicConfig(level=logging.ERROR)
  26. def parse():
  27. """Parse command line arguments."""
  28. parser = argparse.ArgumentParser(description="Script to automatically check PO files")
  29. parser.add_argument("language", help="language of the PO file to check")
  30. parser.add_argument("-w", "--werror", action="store_true", help="treat all warning messages as errors")
  31. parser.add_argument("-o", "--only-headers", action="store_true", help="check only the PO file headers")
  32. parser.add_argument("-n", "--no-translate", action="store_true", help="do not use the translator to check 'msgstr' fields")
  33. parser.add_argument("-t", "--tlang", help="force a different language on the translator than the one given")
  34. return parser.parse_args()
  35. class UTC_Offset_Timezone(tzinfo):
  36. """Class that represents a UTC offset in the format +/-0000."""
  37. def __init__(self, offset_string):
  38. self.offset = timedelta(seconds=UTC_Offset_Timezone.parse_offset(offset_string))
  39. def utcoffset(self, dt):
  40. return self.offset + self.dst(dt)
  41. def dst(self, dt):
  42. return timedelta(0)
  43. @staticmethod
  44. def parse_offset(offset_string):
  45. """Parse the offset string into seconds."""
  46. if len(offset_string) != 5:
  47. raise ValueError("Invalid length for offset string ({})".format(offset_string))
  48. hours = offset_string[1:3]
  49. minutes = offset_string[3:5]
  50. offset = int(hours) * 3600 + int(minutes) * 60
  51. if offset_string[0] == "-":
  52. return -1 * offset
  53. return offset
  54. def parse_date(date_string):
  55. """Parse date string into an aware datetime object."""
  56. # Just a small list with the most common timezones
  57. offset_list = [
  58. ("EEST", "0300"),
  59. ("EET", "0200"),
  60. ("GMT", "0000"),
  61. ("UTC", "0000")
  62. ]
  63. # Replace all the timezones with the offset
  64. for item in offset_list:
  65. timezone, offset = item
  66. date_string = date_string.replace(timezone, offset)
  67. datetime_string = date_string[:16]
  68. offset_string = date_string[16:]
  69. naive_date = datetime.strptime(datetime_string, "%Y-%m-%d %H:%M")
  70. # Create & return an aware datetime object based on the offset
  71. return naive_date.replace(tzinfo=UTC_Offset_Timezone(offset_string))
  72. # Print helpers
  73. def my_print(msg, char="*", value=None, exit=False):
  74. """Print 'msg', debug 'value' and exit if 'exit' is True."""
  75. print("[{}] {}".format(char, msg))
  76. if value is not None:
  77. print("\tvalue= \"{}\"".format(value))
  78. if exit:
  79. sys.exit(1)
  80. def perror(msg, value=None):
  81. my_print(msg, "-", value, True)
  82. def pwarn(msg, value=None, exit=False):
  83. my_print(msg, "!", value, exit)
  84. def pinfo(msg):
  85. my_print(msg)
  86. #############################
  87. def main(args):
  88. os.chdir("..")
  89. # setup
  90. pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
  91. po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)
  92. if not os.path.exists(pot_file_path):
  93. perror("Failed to locate POT file, exiting...", pot_file_path)
  94. if not os.path.exists(po_file_path):
  95. perror("Failed to locate PO file, exiting...", po_file_path)
  96. pot_file = polib.pofile(pot_file_path)
  97. po_file = polib.pofile(po_file_path)
  98. # check headers
  99. pinfo("Checking PO headers")
  100. pot_headers = pot_file.metadata
  101. po_headers = po_file.metadata
  102. if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
  103. pwarn("'Project-Id-Version' headers do not match", exit=args.werror)
  104. if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
  105. pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)
  106. po_creation_date = parse_date(po_headers["POT-Creation-Date"])
  107. po_revision_date = parse_date(po_headers["PO-Revision-Date"])
  108. # Aware datetimes convert to UTC automatically when comparing
  109. if po_revision_date <= po_creation_date:
  110. pwarn("PO file seems outdated", exit=args.werror)
  111. if "Language" in po_headers and po_headers["Language"] != args.language:
  112. pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror)
  113. pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))
  114. # check translations
  115. if args.only_headers:
  116. sys.exit(0)
  117. pinfo("Checking translations, this might take a while...")
  118. pot_msgid = [entry.msgid for entry in pot_file]
  119. po_msgid = [entry.msgid for entry in po_file]
  120. # lists to hold reports
  121. missing_msgid = []
  122. not_translated = []
  123. same_msgstr = []
  124. with_typo = []
  125. verify_trans = []
  126. fuzzy_trans = po_file.fuzzy_entries()
  127. for msgid in pot_msgid:
  128. if msgid not in po_msgid:
  129. missing_msgid.append(msgid)
  130. # Init translator only if the '--no-translate' flag is NOT set
  131. translator = None
  132. if not args.no_translate:
  133. eta = timedelta(seconds=len(pot_file) * WTIME)
  134. pinfo("Approximate time to check translations online: {}".format(eta))
  135. translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME)
  136. # Set source language for GoogleTranslator
  137. if args.tlang is not None:
  138. src_lang = args.tlang
  139. pinfo("Forcing '{}' as the translator's source language".format(src_lang))
  140. else:
  141. # Get a valid source language for Google
  142. # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
  143. src_lang = args.language
  144. if src_lang not in translator._lang_dict:
  145. src_lang = src_lang.replace("_", "-")
  146. if src_lang not in translator._lang_dict:
  147. src_lang = src_lang.split("-")[0]
  148. # Keep entries that need further analysis using the translator
  149. further_analysis = []
  150. for entry in po_file:
  151. if not entry.translated():
  152. not_translated.append(entry)
  153. elif entry.msgid == entry.msgstr:
  154. same_msgstr.append(entry)
  155. else:
  156. further_analysis.append(entry)
  157. if translator is not None:
  158. # Pass translations as a list since GoogleTranslator can handle them
  159. words_dict = translator.get_info_dict([entry.msgstr for entry in further_analysis], "en", src_lang)
  160. for index, word_dict in enumerate(words_dict):
  161. # Get the corresponding POEntry since the words_dict does not contain those
  162. entry = further_analysis[index]
  163. if word_dict is not None:
  164. if word_dict["has_typo"]:
  165. with_typo.append(entry)
  166. if word_dict["translation"].lower() != entry.msgid.lower():
  167. found = False
  168. # Check verbs, nouns, adverbs, etc..
  169. for key in word_dict["extra"]:
  170. if entry.msgid.lower() in word_dict["extra"][key].keys():
  171. found = True
  172. break
  173. if not found:
  174. verify_trans.append((entry, word_dict["translation"]))
  175. # time to report
  176. print("=" * 25 + "Report" + "=" * 25)
  177. if missing_msgid:
  178. print("Missing msgids")
  179. for msgid in missing_msgid:
  180. print(" \"{}\"".format(msgid))
  181. if not_translated:
  182. print("Not translated")
  183. for entry in not_translated:
  184. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  185. if same_msgstr:
  186. print("Same msgstr")
  187. for entry in same_msgstr:
  188. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  189. if with_typo:
  190. print("With typo")
  191. for entry in with_typo:
  192. print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format(entry.linenum, entry.msgid, entry.msgstr))
  193. if verify_trans:
  194. print("Verify translation")
  195. for item in verify_trans:
  196. entry, translation = item
  197. print(" line: {} msgid: \"{}\" trans: \"{}\"".format(entry.linenum, entry.msgid, translation))
  198. if fuzzy_trans:
  199. print("Fuzzy translations")
  200. for entry in fuzzy_trans:
  201. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  202. total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(with_typo) + len(verify_trans) + len(fuzzy_trans)
  203. print("")
  204. print("Missing msgids\t\t: {}".format(len(missing_msgid)))
  205. print("Not translated\t\t: {}".format(len(not_translated)))
  206. print("Same msgstr\t\t: {}".format(len(same_msgstr)))
  207. print("With typo\t\t: {}".format(len(with_typo)))
  208. print("Verify translation\t: {}".format(len(verify_trans)))
  209. print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
  210. print("Total\t\t\t: {}".format(total))
  211. if __name__ == "__main__":
  212. try:
  213. main(parse())
  214. except KeyboardInterrupt:
  215. print("KeyboardInterrupt")