Add first version of HashMap lib

This commit is contained in:
Benjamin Renard 2015-05-26 17:53:00 +02:00
parent a3cca712c2
commit 199fad0049

174
HashMap.py Normal file
View file

@ -0,0 +1,174 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# My hash mapping library
#
# Mapping configuration
# {
# '[dst key 1]': { # Key name in the result
#
# 'order': [int], # Processing order between destinations keys
#
# # Source values
# 'other_key': [key], # Other key of the destination to use as source of values
# 'key' : '[src key]', # Key of source hash to get source values
# 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values
#
# # Clean / convert values
# 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+]
# 'convert': [function], # Function to use to convert value : Original value will be passed
# # as argument and the value retrieve will replace source value in
# # the result
# # Ex :
# # lambda x: x.strip()
# # lambda x: "myformat : %s" % x
# # Deduplicate / check values
# 'deduplicate': [bool], # If True, sources values will be depluplicated
# 'check': [function], # Function to use to check source value : Source value will be passed
# # as argument and if function return True, the value will be preserved
# # Ex :
# # lambda x: x in my_global_hash
# # Join values
# 'join': '[glue]', # If present, sources values will be join using the "glue"
#
# # Alternative mapping
# 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s)
# # with this other mapping configuration
# },
# '[dst key 2]': {
# [...]
# }
# }
#
# Return format :
# {
# '[dst key 1]': ['v1','v2', ...],
# '[dst key 2]': [ ... ],
# [...]
# }
import logging, re
def clean_value(value):
if isinstance(value, int):
value=str(value)
return value.encode('utf8')
def map(map_keys,src,dst={}):
def get_values(dst_key,src,m):
# Extract sources values
values=[]
if 'other_key' in m:
if m['other_key'] in dst:
values=dst[m['other_key']]
if 'key' in m:
if m['key'] in src and src[m['key']]!='':
values.append(clean_value(src[m['key']]))
if 'keys' in m:
for key in m['keys']:
if key in src and src[key]!='':
values.append(clean_value(src[key]))
# Clean and convert values
if 'cleanRegex' in m and len(values)>0:
new_values=[]
for v in values:
nv=re.sub(m['cleanRegex'],'',v)
if nv!='':
new_values.append(nv)
values=new_values
if 'convert' in m and len(values)>0:
new_values=[]
for v in values:
nv=m['convert'](v)
if nv!='':
new_values.append(nv)
values=new_values
# Deduplicate values
if m.get('deduplicate') and len(values)>1:
new_values=[]
for v in values:
if v not in new_values:
new_values.append(v)
values=new_values
# Check values
if 'check' in m and len(values)>0:
new_values=[]
for v in values:
if m['check'](v):
new_values.append(v)
else:
logging.debug('Invalid value %s for key %s' % (v,dst_key))
if dst_key not in invalid_values:
invalid_values[dst_key]=[]
if v not in invalid_values[dst_key]:
invalid_values[dst_key].append(v)
values=new_values
# Join values
if 'join' in m and len(values)>1:
values=[m['join'].join(values)]
# Manage alternative mapping case
if len(values)==0 and 'or' in m:
values=get_values(dst_key,src,m['or'])
return values
for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']):
values=get_values(dst_key,src,map_keys[dst_key])
if len(values)==0:
if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']:
logging.debug('Destination key %s could not be filled from source but is required' % dst_key)
return False
continue
dst[dst_key]=values
return dst
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
src={
'uid': 'hmartin',
'firstname': 'Martin',
'lastname': 'Martin',
'disp_name': 'Henri Martin',
'line_1': '3 rue de Paris',
'line_2': 'Pour Pierre',
'zip_text': '92 120',
'city_text': 'Montrouge',
'line_city': '92120 Montrouge',
'tel1': '01 00 00 00 00',
'tel2': '09 00 00 00 00',
'mobile': '06 00 00 00 00',
'fax': '01 00 00 00 00',
'email': 'H.MARTIN@GMAIL.COM',
}
map_c={
'uid': {'order': 0, 'key': 'uid','required': True},
'givenName': {'order': 1, 'key': 'firstname'},
'sn': {'order': 2, 'key': 'lastname'},
'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}},
'displayName': {'order': 4, 'other_key': 'displayName'},
'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']},
'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'},
'l': {'order': 7, 'key': 'city_text'},
'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']},
'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True},
'mobile': {'order': 10,'key': 'mobile'},
'facsimileTelephoneNumber': {'order': 11,'key': 'fax'},
'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()}
}
logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c))
logging.debug('[TEST] Result : %s' % map(map_c,src))