diff --git a/HashMap.py b/HashMap.py new file mode 100644 index 0000000..4775760 --- /dev/null +++ b/HashMap.py @@ -0,0 +1,174 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# My hash mapping library +# +# Mapping configuration +# { +# '[dst key 1]': { # Key name in the result +# +# 'order': [int], # Processing order between destinations keys +# +# # Source values +# 'other_key': [key], # Other key of the destination to use as source of values +# 'key' : '[src key]', # Key of source hash to get source values +# 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values +# +# # Clean / convert values +# 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+] +# 'convert': [function], # Function to use to convert value : Original value will be passed +# # as argument and the value retrieve will replace source value in +# # the result +# # Ex : +# # lambda x: x.strip() +# # lambda x: "myformat : %s" % x +# # Deduplicate / check values +# 'deduplicate': [bool], # If True, sources values will be depluplicated +# 'check': [function], # Function to use to check source value : Source value will be passed +# # as argument and if function return True, the value will be preserved +# # Ex : +# # lambda x: x in my_global_hash +# # Join values +# 'join': '[glue]', # If present, sources values will be join using the "glue" +# +# # Alternative mapping +# 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s) +# # with this other mapping configuration +# }, +# '[dst key 2]': { +# [...] +# } +# } +# +# Return format : +# { +# '[dst key 1]': ['v1','v2', ...], +# '[dst key 2]': [ ... ], +# [...] +# } + +import logging, re + +def clean_value(value): + if isinstance(value, int): + value=str(value) + return value.encode('utf8') + +def map(map_keys,src,dst={}): + + def get_values(dst_key,src,m): + # Extract sources values + values=[] + if 'other_key' in m: + if m['other_key'] in dst: + values=dst[m['other_key']] + if 'key' in m: + if m['key'] in src and src[m['key']]!='': + values.append(clean_value(src[m['key']])) + + if 'keys' in m: + for key in m['keys']: + if key in src and src[key]!='': + values.append(clean_value(src[key])) + + # Clean and convert values + if 'cleanRegex' in m and len(values)>0: + new_values=[] + for v in values: + nv=re.sub(m['cleanRegex'],'',v) + if nv!='': + new_values.append(nv) + values=new_values + + if 'convert' in m and len(values)>0: + new_values=[] + for v in values: + nv=m['convert'](v) + if nv!='': + new_values.append(nv) + values=new_values + + # Deduplicate values + if m.get('deduplicate') and len(values)>1: + new_values=[] + for v in values: + if v not in new_values: + new_values.append(v) + values=new_values + + # Check values + if 'check' in m and len(values)>0: + new_values=[] + for v in values: + if m['check'](v): + new_values.append(v) + else: + logging.debug('Invalid value %s for key %s' % (v,dst_key)) + if dst_key not in invalid_values: + invalid_values[dst_key]=[] + if v not in invalid_values[dst_key]: + invalid_values[dst_key].append(v) + values=new_values + + # Join values + if 'join' in m and len(values)>1: + values=[m['join'].join(values)] + + # Manage alternative mapping case + if len(values)==0 and 'or' in m: + values=get_values(dst_key,src,m['or']) + + + return values + + for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']): + values=get_values(dst_key,src,map_keys[dst_key]) + + if len(values)==0: + if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']: + logging.debug('Destination key %s could not be filled from source but is required' % dst_key) + return False + continue + + dst[dst_key]=values + return dst + + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + + src={ + 'uid': 'hmartin', + 'firstname': 'Martin', + 'lastname': 'Martin', + 'disp_name': 'Henri Martin', + 'line_1': '3 rue de Paris', + 'line_2': 'Pour Pierre', + 'zip_text': '92 120', + 'city_text': 'Montrouge', + 'line_city': '92120 Montrouge', + 'tel1': '01 00 00 00 00', + 'tel2': '09 00 00 00 00', + 'mobile': '06 00 00 00 00', + 'fax': '01 00 00 00 00', + 'email': 'H.MARTIN@GMAIL.COM', + } + + map_c={ + 'uid': {'order': 0, 'key': 'uid','required': True}, + 'givenName': {'order': 1, 'key': 'firstname'}, + 'sn': {'order': 2, 'key': 'lastname'}, + 'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}}, + 'displayName': {'order': 4, 'other_key': 'displayName'}, + 'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']}, + 'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'}, + 'l': {'order': 7, 'key': 'city_text'}, + 'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']}, + 'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True}, + 'mobile': {'order': 10,'key': 'mobile'}, + 'facsimileTelephoneNumber': {'order': 11,'key': 'fax'}, + 'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()} + } + + logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c)) + logging.debug('[TEST] Result : %s' % map(map_c,src))