Source code for sepal_ui.translator.translator

"""The translator object allow developer to support translation for their application."""

import json
from configparser import ConfigParser
from pathlib import Path
from typing import List, Tuple, Union

import pandas as pd
from box import Box
from deprecated.sphinx import deprecated, versionadded

from sepal_ui.conf import config_file


[docs] class Translator(Box): _protected_keys = [ "find_target", "search_key", "sanitize", "_update", "missing_keys", "available_locales", "merge_dict", "delete_empty", ] + dir(Box) "keys that cannot be used as var names as they are protected for methods"
[docs] def __init__( self, json_folder: Union[str, Path], target: str = "", default: str = "en" ) -> None: """Python ``Box`` of ``Box`` representing all the nested translation key, value pairs. It reads 2 Json files, the first one being the source language (usually English) and the second one the target language. It will replace in the source dictionary every key that exist in both json dictionaries. Following this procedure, every message that is not translated can still be accessed in the source language. To access the dictionary keys, instead of using [], you can simply use key name as in an object ex: translator.first_key.secondary_key. There are no depth limits, just respect the snake_case convention when naming your keys in the .json files. 5 internal keys are created upon initialization (there name cannot be used as keys in the translation message): - (str) _default : the default locale of the translator - (str) _targeted : the initially requested language. Use to display debug information to the user agent - (str) _target : the target locale of the translator - (bool) _match : if the target language match the one requested one by user, used to trigger information in appBar - (str) _folder : the path to the l10n folder Args: json_folder: The folder where the dictionaries are stored target: The language code (IETF BCP 47) of the target lang (it should be the same as the target dictionary). Default to either the language specified in the parameter file or the default one. default: The language code (IETF BCP 47) of the source lang. default to "en" (it should be the same as the source dictionary) """ # the name of the 5 variables that cannot be used as init keys FORBIDDEN_KEYS = ["_folder", "_default", "_target", "_targeted", "_match"] # init the box with the folder folder = Path(json_folder) # reading the default dict default_dict = self.merge_dict(folder / default) # create a dictionary in the target language targeted, target = self.find_target(folder, target) target = target or default target_dict = self.merge_dict(folder / target) # evaluate the matching of requested and obtained values match = targeted == target # create the composite dictionary ms_dict = self._update(default_dict, target_dict) # check if forbidden keys are being used # this will raise an error if any [self.search_key(ms_dict, k) for k in FORBIDDEN_KEYS + self._protected_keys] # # unpack the json as a simple namespace ms_json = json.dumps(ms_dict) ms_boxes = json.loads(ms_json, object_hook=lambda d: Box(**d, frozen_box=True)) private_keys = { "_folder": str(folder), "_default": default, "_targeted": targeted, "_target": target, "_match": match, } # the final box is not frozen # waiting for an answer here: https://github.com/cdgriffith/Box/issues/223 # it the meantime it's easy to call the translator using a frozen_box argument super(Box, self).__init__(**private_keys, **ms_boxes)
[docs] @versionadded(version="2.7.0") @staticmethod def find_target(folder: Path, target: str = "") -> Tuple[str, str]: """find the target language in the available language folder. given a folder and a target lang, this function returns the closest language available in the folder If nothing is found falling back to any working subvariety and return None if it doesn't exist Args: folder: the folder where the languages dictionaries are stored target: the target lang in IETF BCP 47. If not specified, the value in the sepal-ui config file will be used Returns: the targeted language code, the closest lang in IETF BCP 47 """ # init lang lang = "" # if target is not set try to find one in the config file # exit with none if the config file is not yet existing if target == "": if config_file.is_file(): config = ConfigParser() config.read(config_file) target = config.get("sepal-ui", "locale", fallback="en") else: return ("", "en") # first scenario the target is available if (folder / target).is_dir(): lang = target # second scenario the "main lang" is set elif (folder / target[:2]).is_dir(): lang = target[:2] # third scenario we search for any closely related language else: try: f = next(f for f in folder.glob(f"{target[:2]}-*") if f.is_dir()) lang = f.stem except StopIteration: pass return (target, lang)
[docs] @classmethod def search_key(cls, d: dict, key: str) -> None: """Search a specific key in the d dictionary and raise an error if found. Args: d: the dictionary to study key: the key to look for """ if key in d: msg = f"You cannot use the key {key} in your translation dictionary" raise Exception(msg) for v in d.values(): if isinstance(v, dict): return cls.search_key(v, key)
[docs] @classmethod def sanitize(cls, d: Union[dict, list]) -> dict: """Identify numbered dictionaries embedded in the dict and transform them into lists. This function is an helper to prevent deprecation after the introduction of pontoon for translation. The user is now force to use keys even for numbered lists. SimpleNamespace doesn't support integer indexing so this function will transform back this "numbered" dictionary (with integer keys) into lists. Args: d: the dictionary to sanitize Returns: the sanitized dictionary """ ms = d.copy() # create generator based on input type if isinstance(ms, dict): gen = ms.items() elif isinstance(ms, list): gen = enumerate(ms) # loop into the keys of the dict modify them for k, v in gen: if isinstance(v, dict): tmp = v if len(tmp) and all([k.isnumeric() for k in tmp]): tmp = list(tmp.values()) ms[k] = cls.sanitize(tmp) else: ms[k] = v return ms
def _update(self, d: dict, u: dict) -> dict: """Update the fallback dictionnaire (d) values with the keys that exist in the target (u) dictionnaire. Args: d: The fallback dictionary u: the target dctionnary Returns: The updated dictionnay """ ms = d.copy() for k, v in d.items(): if isinstance(v, dict): ms[k] = self._update(v, u.get(k, {})) else: ms[k] = u.get(k, v) return ms
[docs] @deprecated(version="2.9.0", reason="Not needed with automatic translators") def missing_keys(self): """Nothing.""" pass
[docs] def available_locales(self) -> List[str]: """Return the available locales in the l10n folder. Returns: the list of str codes """ return [f.name for f in Path(self._folder).glob("[!^._]*") if f.is_dir()]
[docs] @versionadded(version="2.7.0") @classmethod def merge_dict(cls, folder: Path) -> dict: """Gather all the .json file in the provided l10n folder as 1 single json dict. The json dict will be sanitysed and the key will be used as if they were coming from 1 single file. be careful with duplication. empty string keys will be removed. Args: folder: the folder where all the .json files are stored Returns: the json dict with all the keys """ final_json = {} for f in folder.glob("*.json"): tmp_dict = cls.delete_empty(json.loads(f.read_text())) final_json = {**final_json, **cls.sanitize(tmp_dict)} return final_json
[docs] @versionadded(version="2.8.1") @classmethod def delete_empty(cls, d: dict) -> dict: """Remove empty strings ("") recursively from the dictionaries. This is to prevent untranslated strings from Crowdin to be uploaded. The dictionary must only embed dictionaries and no lists. Args: d: the dictionary to sanitize Returns: the sanitized dictionary """ for k, v in list(d.items()): if isinstance(v, dict): cls.delete_empty(v) elif v == "": d.pop(k) return d
[docs] @versionadded(version="2.10.0") def key_use(self, folder: Path, name: str) -> List[str]: """Parse all the files in the folder and check if keys are all used at least once. Return the unused key names. .. warning:: Don't forget that there are many ways of calling Translator variables (getattr, save.cm.xxx in another variable etc...) SO don't forget to check manually the variables suggested by this method before deleting them Args: folder: The application folder using this translator data name: the name use by the translator in this app (usually "cm") Returns: the list of unused keys """ # cannot set FORBIDDEN_KEY in the Box as it would lock another key FORBIDDEN_KEYS = ["_folder", "_default", "_target", "_targeted", "_match"] # sanitize folder folder = Path(folder) # get all the python files recursively py_files = [] all_files = [f for f in folder.glob("**/*") if f.suffix in [".py", ".ipynb"]] for f in all_files: generated_files = [".ipynb_checkpoints", "__pycache__"] if all([err not in str(f) for err in generated_files]): py_files.append(f) # get the flat version of all keys keys = list(set(pd.json_normalize(self).columns) ^ set(FORBIDDEN_KEYS)) # init the unused keys list unused_keys = [] for k in keys: # by default we consider that the is never used is_present = False # read each python file and search for the pattern of the key # if it's find change status of the counter and exit the search for f in py_files: tmp = f.read_text() if f"{name}.{k}" in tmp: is_present = True break # if nothing is find, the value is still False and the key can be # added to the list is_present or unused_keys.append(k) return unused_keys