You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
8.8 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Author: Sotiris Papadopoulos <ytubedlg@gmail.com>
  5. Last-Revision: 2017-04-19
  6. Script to automatically check PO files
  7. """
  8. from __future__ import unicode_literals
  9. import os
  10. import sys
  11. import logging
  12. import argparse
  13. from time import sleep
  14. from datetime import datetime, timedelta, tzinfo
  15. try:
  16. import polib
  17. import google_translate
  18. except ImportError as error:
  19. print(error)
  20. sys.exit(1)
  21. WTIME = 2.0 # Time in seconds to wait between requests to avoid ban
  22. PACKAGE = "youtube_dl_gui"
  23. PO_FILENAME = "{}.po".format(PACKAGE)
  24. LOCALE_PATH_TMPL = os.path.join(PACKAGE, "locale", "{lang}", "LC_MESSAGES", PO_FILENAME)
  25. logging.basicConfig(level=logging.ERROR)
  26. def parse():
  27. """Parse command line arguments."""
  28. parser = argparse.ArgumentParser(description="Script to automatically check PO files")
  29. parser.add_argument("language", help="language of the PO file to check")
  30. parser.add_argument("-w", "--werror", action="store_true", help="treat all warning messages as errors")
  31. parser.add_argument("-o", "--only-headers", action="store_true", help="check only the PO file headers")
  32. parser.add_argument("-n", "--no-translate", action="store_true", help="do not use the translator to check 'msgstr' fields")
  33. parser.add_argument("-t", "--tlang", help="force a different language on the translator than the one given")
  34. return parser.parse_args()
  35. class UTC_Offset_Timezone(tzinfo):
  36. """Class that represents a UTC offset in the format +/-0000."""
  37. def __init__(self, offset_string):
  38. self.offset = timedelta(seconds=UTC_Offset_Timezone.parse_offset(offset_string))
  39. def utcoffset(self, dt):
  40. return self.offset + self.dst(dt)
  41. def dst(self, dt):
  42. return timedelta(0)
  43. @staticmethod
  44. def parse_offset(offset_string):
  45. """Parse the offset string into seconds."""
  46. if len(offset_string) != 5:
  47. raise ValueError("Invalid length for offset string ({})".format(offset_string))
  48. hours = offset_string[1:3]
  49. minutes = offset_string[3:5]
  50. offset = int(hours) * 3600 + int(minutes) * 60
  51. if offset_string[0] == "-":
  52. return -1 * offset
  53. return offset
  54. def parse_date(date_string):
  55. """Parse date string into an aware datetime object."""
  56. # Just a small list with the most common timezones
  57. offset_list = [
  58. ("EEST", "0300"),
  59. ("EET", "0200"),
  60. ("GMT", "0000"),
  61. ("UTC", "0000")
  62. ]
  63. # Replace all the timezones with the offset
  64. for item in offset_list:
  65. timezone, offset = item
  66. date_string = date_string.replace(timezone, offset)
  67. datetime_string = date_string[:16]
  68. offset_string = date_string[16:]
  69. naive_date = datetime.strptime(datetime_string, "%Y-%m-%d %H:%M")
  70. # Create & return an aware datetime object based on the offset
  71. return naive_date.replace(tzinfo=UTC_Offset_Timezone(offset_string))
  72. # Print helpers
  73. def my_print(msg, char="*", value=None, exit=False):
  74. """Print 'msg', debug 'value' and exit if 'exit' is True."""
  75. print("[{}] {}".format(char, msg))
  76. if value is not None:
  77. print("\tvalue= \"{}\"".format(value))
  78. if exit:
  79. sys.exit(1)
  80. def perror(msg, value=None):
  81. my_print(msg, "-", value, True)
  82. def pwarn(msg, value=None, exit=False):
  83. my_print(msg, "!", value, exit)
  84. def pinfo(msg):
  85. my_print(msg)
  86. #############################
  87. def main(args):
  88. os.chdir("..")
  89. # setup
  90. pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
  91. po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)
  92. if not os.path.exists(pot_file_path):
  93. perror("Failed to locate POT file, exiting...", pot_file_path)
  94. if not os.path.exists(po_file_path):
  95. perror("Failed to locate PO file, exiting...", po_file_path)
  96. pot_file = polib.pofile(pot_file_path)
  97. po_file = polib.pofile(po_file_path)
  98. # check headers
  99. pinfo("Checking PO headers")
  100. pot_headers = pot_file.metadata
  101. po_headers = po_file.metadata
  102. if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
  103. pwarn("'Project-Id-Version' headers do not match", exit=args.werror)
  104. if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
  105. pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)
  106. po_creation_date = parse_date(po_headers["POT-Creation-Date"])
  107. po_revision_date = parse_date(po_headers["PO-Revision-Date"])
  108. # Aware datetimes convert to UTC automatically when comparing
  109. if po_revision_date <= po_creation_date:
  110. pwarn("PO file seems outdated", exit=args.werror)
  111. if "Language" in po_headers and po_headers["Language"] != args.language:
  112. pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror)
  113. pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))
  114. # check translations
  115. if args.only_headers:
  116. sys.exit(0)
  117. pinfo("Checking translations, this might take a while...")
  118. eta = timedelta(seconds=len(pot_file) * WTIME)
  119. pinfo("Approximate time to check translations online: {}".format(eta))
  120. pot_msgid = [entry.msgid for entry in pot_file]
  121. po_msgid = [entry.msgid for entry in po_file]
  122. # lists to hold reports
  123. missing_msgid = []
  124. not_translated = []
  125. same_msgstr = []
  126. with_typo = []
  127. verify_trans = []
  128. fuzzy_trans = po_file.fuzzy_entries()
  129. for msgid in pot_msgid:
  130. if msgid not in po_msgid:
  131. missing_msgid.append(msgid)
  132. translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME)
  133. # Set source language for GoogleTranslator
  134. if args.tlang is not None:
  135. src_lang = args.tlang
  136. pinfo("Forcing '{}' as the translator's source language".format(src_lang))
  137. else:
  138. # Get a valid source language for Google
  139. # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
  140. src_lang = args.language
  141. if src_lang not in translator._lang_dict:
  142. src_lang = src_lang.replace("_", "-")
  143. if src_lang not in translator._lang_dict:
  144. src_lang = src_lang.split("-")[0]
  145. for entry in po_file:
  146. if not entry.translated():
  147. not_translated.append(entry)
  148. elif entry.msgid == entry.msgstr:
  149. same_msgstr.append(entry)
  150. else:
  151. if args.no_translate:
  152. continue
  153. word_dict = translator.get_info_dict(entry.msgstr, "en", src_lang)
  154. if word_dict is not None:
  155. if word_dict["has_typo"]:
  156. with_typo.append(entry)
  157. if word_dict["translation"].lower() != entry.msgid.lower():
  158. found = False
  159. # Check verbs, nouns, adverbs, etc..
  160. for key in word_dict["extra"]:
  161. if entry.msgid.lower() in word_dict["extra"][key].keys():
  162. found = True
  163. break
  164. if not found:
  165. verify_trans.append((entry, word_dict["translation"]))
  166. sleep(WTIME)
  167. # time to report
  168. print("=" * 25 + "Report" + "=" * 25)
  169. if missing_msgid:
  170. print("Missing msgids")
  171. for msgid in missing_msgid:
  172. print(" \"{}\"".format(msgid))
  173. if not_translated:
  174. print("Not translated")
  175. for entry in not_translated:
  176. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  177. if same_msgstr:
  178. print("Same msgstr")
  179. for entry in same_msgstr:
  180. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  181. if with_typo:
  182. print("With typo")
  183. for entry in with_typo:
  184. print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format(entry.linenum, entry.msgid, entry.msgstr))
  185. if verify_trans:
  186. print("Verify translation")
  187. for item in verify_trans:
  188. entry, translation = item
  189. print(" line: {} msgid: \"{}\" trans: \"{}\"".format(entry.linenum, entry.msgid, translation))
  190. if fuzzy_trans:
  191. print("Fuzzy translations")
  192. for entry in fuzzy_trans:
  193. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  194. total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(with_typo) + len(verify_trans) + len(fuzzy_trans)
  195. print("")
  196. print("Missing msgids\t\t: {}".format(len(missing_msgid)))
  197. print("Not translated\t\t: {}".format(len(not_translated)))
  198. print("Same msgstr\t\t: {}".format(len(same_msgstr)))
  199. print("With typo\t\t: {}".format(len(with_typo)))
  200. print("Verify translation\t: {}".format(len(verify_trans)))
  201. print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
  202. print("Total\t\t\t: {}".format(total))
  203. if __name__ == "__main__":
  204. try:
  205. main(parse())
  206. except KeyboardInterrupt:
  207. print("KeyboardInterrupt")