From a83c3d635f840d9a7dd74beb64896786260f07b5 Mon Sep 17 00:00:00 2001 From: Benjamin Renard Date: Mon, 16 Jan 2023 12:23:50 +0100 Subject: [PATCH] Add mylib.mapping.map_hash() --- HashMap.py | 174 -------------------------------------- mylib/mapping.py | 139 ++++++++++++++++++++++++++++++ mylib/scripts/map_test.py | 69 +++++++++++++++ setup.py | 1 + 4 files changed, 209 insertions(+), 174 deletions(-) delete mode 100644 HashMap.py create mode 100644 mylib/mapping.py create mode 100644 mylib/scripts/map_test.py diff --git a/HashMap.py b/HashMap.py deleted file mode 100644 index cdb033c..0000000 --- a/HashMap.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# -# My hash mapping library -# -# Mapping configuration -# { -# '[dst key 1]': { # Key name in the result -# -# 'order': [int], # Processing order between destinations keys -# -# # Source values -# 'other_key': [key], # Other key of the destination to use as source of values -# 'key' : '[src key]', # Key of source hash to get source values -# 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values -# -# # Clean / convert values -# 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+] -# 'convert': [function], # Function to use to convert value : Original value will be passed -# # as argument and the value retrieve will replace source value in -# # the result -# # Ex : -# # lambda x: x.strip() -# # lambda x: "myformat : %s" % x -# # Deduplicate / check values -# 'deduplicate': [bool], # If True, sources values will be depluplicated -# 'check': [function], # Function to use to check source value : Source value will be passed -# # as argument and if function return True, the value will be preserved -# # Ex : -# # lambda x: x in my_global_hash -# # Join values -# 'join': '[glue]', # If present, sources values will be join using the "glue" -# -# # Alternative mapping -# 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s) -# # with this other mapping configuration -# }, -# '[dst key 2]': { -# [...] -# } -# } -# -# Return format : -# { -# '[dst key 1]': ['v1','v2', ...], -# '[dst key 2]': [ ... ], -# [...] -# } - -import logging, re - -def clean_value(value): - if isinstance(value, int): - value=str(value) - return value.encode('utf8') - -def map(map_keys,src,dst={}): - - def get_values(dst_key,src,m): - # Extract sources values - values=[] - if 'other_key' in m: - if m['other_key'] in dst: - values=dst[m['other_key']] - if 'key' in m: - if m['key'] in src and src[m['key']]!='': - values.append(clean_value(src[m['key']])) - - if 'keys' in m: - for key in m['keys']: - if key in src and src[key]!='': - values.append(clean_value(src[key])) - - # Clean and convert values - if 'cleanRegex' in m and len(values)>0: - new_values=[] - for v in values: - nv=re.sub(m['cleanRegex'],'',v) - if nv!='': - new_values.append(nv) - values=new_values - - if 'convert' in m and len(values)>0: - new_values=[] - for v in values: - nv=m['convert'](v) - if nv!='': - new_values.append(nv) - values=new_values - - # Deduplicate values - if m.get('deduplicate') and len(values)>1: - new_values=[] - for v in values: - if v not in new_values: - new_values.append(v) - values=new_values - - # Check values - if 'check' in m and len(values)>0: - new_values=[] - for v in values: - if m['check'](v): - new_values.append(v) - else: - logging.debug('Invalid value %s for key %s' % (v,dst_key)) - if dst_key not in invalid_values: - invalid_values[dst_key]=[] - if v not in invalid_values[dst_key]: - invalid_values[dst_key].append(v) - values=new_values - - # Join values - if 'join' in m and len(values)>1: - values=[m['join'].join(values)] - - # Manage alternative mapping case - if len(values)==0 and 'or' in m: - values=get_values(dst_key,src,m['or']) - - - return values - - for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']): - values=get_values(dst_key,src,map_keys[dst_key]) - - if len(values)==0: - if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']: - logging.debug('Destination key %s could not be filled from source but is required' % dst_key) - return False - continue - - dst[dst_key]=values - return dst - - -if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) - - src={ - 'uid': 'hmartin', - 'firstname': 'Martin', - 'lastname': 'Martin', - 'disp_name': 'Henri Martin', - 'line_1': '3 rue de Paris', - 'line_2': 'Pour Pierre', - 'zip_text': '92 120', - 'city_text': 'Montrouge', - 'line_city': '92120 Montrouge', - 'tel1': '01 00 00 00 00', - 'tel2': '09 00 00 00 00', - 'mobile': '06 00 00 00 00', - 'fax': '01 00 00 00 00', - 'email': 'H.MARTIN@GMAIL.COM', - } - - map_c={ - 'uid': {'order': 0, 'key': 'uid','required': True}, - 'givenName': {'order': 1, 'key': 'firstname'}, - 'sn': {'order': 2, 'key': 'lastname'}, - 'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}}, - 'displayName': {'order': 4, 'other_key': 'displayName'}, - 'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']}, - 'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'}, - 'l': {'order': 7, 'key': 'city_text'}, - 'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']}, - 'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True}, - 'mobile': {'order': 10,'key': 'mobile'}, - 'facsimileTelephoneNumber': {'order': 11,'key': 'fax'}, - 'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()} - } - - logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c)) - logging.debug('[TEST] Result : %s' % map(map_c,src)) diff --git a/mylib/mapping.py b/mylib/mapping.py new file mode 100644 index 0000000..19622a3 --- /dev/null +++ b/mylib/mapping.py @@ -0,0 +1,139 @@ +""" +My hash mapping library +Mapping configuration + { + '[dst key 1]': { # Key name in the result + + 'order': [int], # Processing order between destinations keys + + # Source values + 'other_key': [key], # Other key of the destination to use as source of values + 'key' : '[src key]', # Key of source hash to get source values + 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values + + # Clean / convert values + 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+] + 'convert': [function], # Function to use to convert value : Original value will be passed + # as argument and the value retrieve will replace source value in + # the result + # Ex : + # lambda x: x.strip() + # lambda x: "myformat : %s" % x + # Deduplicate / check values + 'deduplicate': [bool], # If True, sources values will be depluplicated + 'check': [function], # Function to use to check source value : Source value will be passed + # as argument and if function return True, the value will be preserved + # Ex : + # lambda x: x in my_global_hash + # Join values + 'join': '[glue]', # If present, sources values will be join using the "glue" + + # Alternative mapping + 'or': { [map configuration] } # If this mapping case does not retreive any value, try to + # get value(s) with this other mapping configuration + }, + '[dst key 2]': { + [...] + } +} + +Return format : +{ + '[dst key 1]': ['v1','v2', ...], + '[dst key 2]': [ ... ], + [...] +} +""" + +import logging +import re + + +log = logging.getLogger(__name__) + + +def clean_value(value): + """ Clean value as encoded string """ + if isinstance(value, int): + value = str(value) + return value + + +def get_values(dst, dst_key, src, m): + """ Extract sources values """ + values = [] + if "other_key" in m: + if m["other_key"] in dst: + values = dst[m["other_key"]] + if "key" in m: + if m["key"] in src and src[m["key"]] != "": + values.append(clean_value(src[m["key"]])) + + if "keys" in m: + for key in m["keys"]: + if key in src and src[key] != "": + values.append(clean_value(src[key])) + + # Clean and convert values + if "cleanRegex" in m and len(values) > 0: + new_values = [] + for v in values: + nv = re.sub(m["cleanRegex"], "", v) + if nv != "": + new_values.append(nv) + values = new_values + + if "convert" in m and len(values) > 0: + new_values = [] + for v in values: + nv = m["convert"](v) + if nv != "": + new_values.append(nv) + values = new_values + + # Deduplicate values + if m.get("deduplicate") and len(values) > 1: + new_values = [] + for v in values: + if v not in new_values: + new_values.append(v) + values = new_values + + # Check values + if "check" in m and len(values) > 0: + new_values = [] + for v in values: + if m["check"](v): + new_values.append(v) + else: + log.debug("Invalid value %s for key %s", v, dst_key) + values = new_values + + # Join values + if "join" in m and len(values) > 1: + values = [m["join"].join(values)] + + # Manage alternative mapping case + if len(values) == 0 and "or" in m: + values = get_values(dst, dst_key, src, m["or"]) + + return values + + +def map_hash(mapping, src, dst=None): + """Map hash""" + dst = dst if dst else {} + assert isinstance(dst, dict) + for dst_key in sorted(mapping.keys(), key=lambda x: mapping[x]["order"]): + values = get_values(dst, dst_key, src, mapping[dst_key]) + + if len(values) == 0: + if "required" in mapping[dst_key] and mapping[dst_key]["required"]: + log.debug( + "Destination key %s could not be filled from source but is required", dst_key + ) + return False + continue + + dst[dst_key] = values + return dst diff --git a/mylib/scripts/map_test.py b/mylib/scripts/map_test.py new file mode 100644 index 0000000..28c27de --- /dev/null +++ b/mylib/scripts/map_test.py @@ -0,0 +1,69 @@ +""" Test mapping """ +import logging +import sys + +from mylib import pretty_format_value +from mylib.mapping import map_hash +from mylib.scripts.helpers import get_opts_parser, init_logging + +log = logging.getLogger(__name__) + + +def main(argv=None): + """Script main""" + if argv is None: + argv = sys.argv[1:] + + # Options parser + parser = get_opts_parser(progress=True) + options = parser.parse_args() + + # Initialize logs + init_logging(options, "Test mapping") + + src = { + "uid": "hmartin", + "firstname": "Martin", + "lastname": "Martin", + "disp_name": "Henri Martin", + "line_1": "3 rue de Paris", + "line_2": "Pour Pierre", + "zip_text": "92 120", + "city_text": "Montrouge", + "line_city": "92120 Montrouge", + "tel1": "01 00 00 00 00", + "tel2": "09 00 00 00 00", + "mobile": "06 00 00 00 00", + "fax": "01 00 00 00 00", + "email": "H.MARTIN@GMAIL.COM", + } + + map_c = { + "uid": {"order": 0, "key": "uid", "required": True}, + "givenName": {"order": 1, "key": "firstname"}, + "sn": {"order": 2, "key": "lastname"}, + "cn": { + "order": 3, + "key": "disp_name", + "required": True, + "or": {"attrs": ["firstname", "lastname"], "join": " "}, + }, + "displayName": {"order": 4, "other_key": "displayName"}, + "street": {"order": 5, "join": " / ", "keys": ["ligne_1", "ligne_2"]}, + "postalCode": {"order": 6, "key": "zip_text", "cleanRegex": "[^0-9]"}, + "l": {"order": 7, "key": "city_text"}, + "postalAddress": {"order": 8, "join": "$", "keys": ["ligne_1", "ligne_2", "ligne_city"]}, + "telephoneNumber": { + "order": 9, + "keys": ["tel1", "tel2"], + "cleanRegex": "[^0-9+]", + "deduplicate": True, + }, + "mobile": {"order": 10, "key": "mobile"}, + "facsimileTelephoneNumber": {"order": 11, "key": "fax"}, + "mail": {"order": 12, "key": "email", "convert": lambda x: x.lower().strip()}, + } + + print('Mapping source:\n' + pretty_format_value(src)) + print('Mapping config:\n' + pretty_format_value(map_c)) + print('Mapping result:\n' + pretty_format_value(map_hash(map_c, src))) diff --git a/setup.py b/setup.py index d80a168..42aaf7f 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ setup( 'console_scripts': [ 'mylib-test-email = mylib.scripts.email_test:main', 'mylib-test-email-with-config = mylib.scripts.email_test_with_config:main', + 'mylib-test-map = mylib.scripts.map_test:main', 'mylib-test-pbar = mylib.scripts.pbar_test:main', 'mylib-test-report = mylib.scripts.report_test:main', 'mylib-test-ldap = mylib.scripts.ldap_test:main',