Add mylib.mapping.map_hash()
This commit is contained in:
parent
69d6a596a8
commit
a83c3d635f
4 changed files with 209 additions and 174 deletions
174
HashMap.py
174
HashMap.py
|
@ -1,174 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# My hash mapping library
|
||||
#
|
||||
# Mapping configuration
|
||||
# {
|
||||
# '[dst key 1]': { # Key name in the result
|
||||
#
|
||||
# 'order': [int], # Processing order between destinations keys
|
||||
#
|
||||
# # Source values
|
||||
# 'other_key': [key], # Other key of the destination to use as source of values
|
||||
# 'key' : '[src key]', # Key of source hash to get source values
|
||||
# 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values
|
||||
#
|
||||
# # Clean / convert values
|
||||
# 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+]
|
||||
# 'convert': [function], # Function to use to convert value : Original value will be passed
|
||||
# # as argument and the value retrieve will replace source value in
|
||||
# # the result
|
||||
# # Ex :
|
||||
# # lambda x: x.strip()
|
||||
# # lambda x: "myformat : %s" % x
|
||||
# # Deduplicate / check values
|
||||
# 'deduplicate': [bool], # If True, sources values will be depluplicated
|
||||
# 'check': [function], # Function to use to check source value : Source value will be passed
|
||||
# # as argument and if function return True, the value will be preserved
|
||||
# # Ex :
|
||||
# # lambda x: x in my_global_hash
|
||||
# # Join values
|
||||
# 'join': '[glue]', # If present, sources values will be join using the "glue"
|
||||
#
|
||||
# # Alternative mapping
|
||||
# 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s)
|
||||
# # with this other mapping configuration
|
||||
# },
|
||||
# '[dst key 2]': {
|
||||
# [...]
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# Return format :
|
||||
# {
|
||||
# '[dst key 1]': ['v1','v2', ...],
|
||||
# '[dst key 2]': [ ... ],
|
||||
# [...]
|
||||
# }
|
||||
|
||||
import logging, re
|
||||
|
||||
def clean_value(value):
|
||||
if isinstance(value, int):
|
||||
value=str(value)
|
||||
return value.encode('utf8')
|
||||
|
||||
def map(map_keys,src,dst={}):
|
||||
|
||||
def get_values(dst_key,src,m):
|
||||
# Extract sources values
|
||||
values=[]
|
||||
if 'other_key' in m:
|
||||
if m['other_key'] in dst:
|
||||
values=dst[m['other_key']]
|
||||
if 'key' in m:
|
||||
if m['key'] in src and src[m['key']]!='':
|
||||
values.append(clean_value(src[m['key']]))
|
||||
|
||||
if 'keys' in m:
|
||||
for key in m['keys']:
|
||||
if key in src and src[key]!='':
|
||||
values.append(clean_value(src[key]))
|
||||
|
||||
# Clean and convert values
|
||||
if 'cleanRegex' in m and len(values)>0:
|
||||
new_values=[]
|
||||
for v in values:
|
||||
nv=re.sub(m['cleanRegex'],'',v)
|
||||
if nv!='':
|
||||
new_values.append(nv)
|
||||
values=new_values
|
||||
|
||||
if 'convert' in m and len(values)>0:
|
||||
new_values=[]
|
||||
for v in values:
|
||||
nv=m['convert'](v)
|
||||
if nv!='':
|
||||
new_values.append(nv)
|
||||
values=new_values
|
||||
|
||||
# Deduplicate values
|
||||
if m.get('deduplicate') and len(values)>1:
|
||||
new_values=[]
|
||||
for v in values:
|
||||
if v not in new_values:
|
||||
new_values.append(v)
|
||||
values=new_values
|
||||
|
||||
# Check values
|
||||
if 'check' in m and len(values)>0:
|
||||
new_values=[]
|
||||
for v in values:
|
||||
if m['check'](v):
|
||||
new_values.append(v)
|
||||
else:
|
||||
logging.debug('Invalid value %s for key %s' % (v,dst_key))
|
||||
if dst_key not in invalid_values:
|
||||
invalid_values[dst_key]=[]
|
||||
if v not in invalid_values[dst_key]:
|
||||
invalid_values[dst_key].append(v)
|
||||
values=new_values
|
||||
|
||||
# Join values
|
||||
if 'join' in m and len(values)>1:
|
||||
values=[m['join'].join(values)]
|
||||
|
||||
# Manage alternative mapping case
|
||||
if len(values)==0 and 'or' in m:
|
||||
values=get_values(dst_key,src,m['or'])
|
||||
|
||||
|
||||
return values
|
||||
|
||||
for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']):
|
||||
values=get_values(dst_key,src,map_keys[dst_key])
|
||||
|
||||
if len(values)==0:
|
||||
if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']:
|
||||
logging.debug('Destination key %s could not be filled from source but is required' % dst_key)
|
||||
return False
|
||||
continue
|
||||
|
||||
dst[dst_key]=values
|
||||
return dst
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
src={
|
||||
'uid': 'hmartin',
|
||||
'firstname': 'Martin',
|
||||
'lastname': 'Martin',
|
||||
'disp_name': 'Henri Martin',
|
||||
'line_1': '3 rue de Paris',
|
||||
'line_2': 'Pour Pierre',
|
||||
'zip_text': '92 120',
|
||||
'city_text': 'Montrouge',
|
||||
'line_city': '92120 Montrouge',
|
||||
'tel1': '01 00 00 00 00',
|
||||
'tel2': '09 00 00 00 00',
|
||||
'mobile': '06 00 00 00 00',
|
||||
'fax': '01 00 00 00 00',
|
||||
'email': 'H.MARTIN@GMAIL.COM',
|
||||
}
|
||||
|
||||
map_c={
|
||||
'uid': {'order': 0, 'key': 'uid','required': True},
|
||||
'givenName': {'order': 1, 'key': 'firstname'},
|
||||
'sn': {'order': 2, 'key': 'lastname'},
|
||||
'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}},
|
||||
'displayName': {'order': 4, 'other_key': 'displayName'},
|
||||
'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']},
|
||||
'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'},
|
||||
'l': {'order': 7, 'key': 'city_text'},
|
||||
'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']},
|
||||
'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True},
|
||||
'mobile': {'order': 10,'key': 'mobile'},
|
||||
'facsimileTelephoneNumber': {'order': 11,'key': 'fax'},
|
||||
'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()}
|
||||
}
|
||||
|
||||
logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c))
|
||||
logging.debug('[TEST] Result : %s' % map(map_c,src))
|
139
mylib/mapping.py
Normal file
139
mylib/mapping.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
"""
|
||||
My hash mapping library
|
||||
Mapping configuration
|
||||
{
|
||||
'[dst key 1]': { # Key name in the result
|
||||
|
||||
'order': [int], # Processing order between destinations keys
|
||||
|
||||
# Source values
|
||||
'other_key': [key], # Other key of the destination to use as source of values
|
||||
'key' : '[src key]', # Key of source hash to get source values
|
||||
'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values
|
||||
|
||||
# Clean / convert values
|
||||
'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+]
|
||||
'convert': [function], # Function to use to convert value : Original value will be passed
|
||||
# as argument and the value retrieve will replace source value in
|
||||
# the result
|
||||
# Ex :
|
||||
# lambda x: x.strip()
|
||||
# lambda x: "myformat : %s" % x
|
||||
# Deduplicate / check values
|
||||
'deduplicate': [bool], # If True, sources values will be depluplicated
|
||||
'check': [function], # Function to use to check source value : Source value will be passed
|
||||
# as argument and if function return True, the value will be preserved
|
||||
# Ex :
|
||||
# lambda x: x in my_global_hash
|
||||
# Join values
|
||||
'join': '[glue]', # If present, sources values will be join using the "glue"
|
||||
|
||||
# Alternative mapping
|
||||
'or': { [map configuration] } # If this mapping case does not retreive any value, try to
|
||||
# get value(s) with this other mapping configuration
|
||||
},
|
||||
'[dst key 2]': {
|
||||
[...]
|
||||
}
|
||||
}
|
||||
|
||||
Return format :
|
||||
{
|
||||
'[dst key 1]': ['v1','v2', ...],
|
||||
'[dst key 2]': [ ... ],
|
||||
[...]
|
||||
}
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def clean_value(value):
|
||||
""" Clean value as encoded string """
|
||||
if isinstance(value, int):
|
||||
value = str(value)
|
||||
return value
|
||||
|
||||
|
||||
def get_values(dst, dst_key, src, m):
|
||||
""" Extract sources values """
|
||||
values = []
|
||||
if "other_key" in m:
|
||||
if m["other_key"] in dst:
|
||||
values = dst[m["other_key"]]
|
||||
if "key" in m:
|
||||
if m["key"] in src and src[m["key"]] != "":
|
||||
values.append(clean_value(src[m["key"]]))
|
||||
|
||||
if "keys" in m:
|
||||
for key in m["keys"]:
|
||||
if key in src and src[key] != "":
|
||||
values.append(clean_value(src[key]))
|
||||
|
||||
# Clean and convert values
|
||||
if "cleanRegex" in m and len(values) > 0:
|
||||
new_values = []
|
||||
for v in values:
|
||||
nv = re.sub(m["cleanRegex"], "", v)
|
||||
if nv != "":
|
||||
new_values.append(nv)
|
||||
values = new_values
|
||||
|
||||
if "convert" in m and len(values) > 0:
|
||||
new_values = []
|
||||
for v in values:
|
||||
nv = m["convert"](v)
|
||||
if nv != "":
|
||||
new_values.append(nv)
|
||||
values = new_values
|
||||
|
||||
# Deduplicate values
|
||||
if m.get("deduplicate") and len(values) > 1:
|
||||
new_values = []
|
||||
for v in values:
|
||||
if v not in new_values:
|
||||
new_values.append(v)
|
||||
values = new_values
|
||||
|
||||
# Check values
|
||||
if "check" in m and len(values) > 0:
|
||||
new_values = []
|
||||
for v in values:
|
||||
if m["check"](v):
|
||||
new_values.append(v)
|
||||
else:
|
||||
log.debug("Invalid value %s for key %s", v, dst_key)
|
||||
values = new_values
|
||||
|
||||
# Join values
|
||||
if "join" in m and len(values) > 1:
|
||||
values = [m["join"].join(values)]
|
||||
|
||||
# Manage alternative mapping case
|
||||
if len(values) == 0 and "or" in m:
|
||||
values = get_values(dst, dst_key, src, m["or"])
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def map_hash(mapping, src, dst=None):
|
||||
"""Map hash"""
|
||||
dst = dst if dst else {}
|
||||
assert isinstance(dst, dict)
|
||||
for dst_key in sorted(mapping.keys(), key=lambda x: mapping[x]["order"]):
|
||||
values = get_values(dst, dst_key, src, mapping[dst_key])
|
||||
|
||||
if len(values) == 0:
|
||||
if "required" in mapping[dst_key] and mapping[dst_key]["required"]:
|
||||
log.debug(
|
||||
"Destination key %s could not be filled from source but is required", dst_key
|
||||
)
|
||||
return False
|
||||
continue
|
||||
|
||||
dst[dst_key] = values
|
||||
return dst
|
69
mylib/scripts/map_test.py
Normal file
69
mylib/scripts/map_test.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
""" Test mapping """
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from mylib import pretty_format_value
|
||||
from mylib.mapping import map_hash
|
||||
from mylib.scripts.helpers import get_opts_parser, init_logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
"""Script main"""
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
# Options parser
|
||||
parser = get_opts_parser(progress=True)
|
||||
options = parser.parse_args()
|
||||
|
||||
# Initialize logs
|
||||
init_logging(options, "Test mapping")
|
||||
|
||||
src = {
|
||||
"uid": "hmartin",
|
||||
"firstname": "Martin",
|
||||
"lastname": "Martin",
|
||||
"disp_name": "Henri Martin",
|
||||
"line_1": "3 rue de Paris",
|
||||
"line_2": "Pour Pierre",
|
||||
"zip_text": "92 120",
|
||||
"city_text": "Montrouge",
|
||||
"line_city": "92120 Montrouge",
|
||||
"tel1": "01 00 00 00 00",
|
||||
"tel2": "09 00 00 00 00",
|
||||
"mobile": "06 00 00 00 00",
|
||||
"fax": "01 00 00 00 00",
|
||||
"email": "H.MARTIN@GMAIL.COM",
|
||||
}
|
||||
|
||||
map_c = {
|
||||
"uid": {"order": 0, "key": "uid", "required": True},
|
||||
"givenName": {"order": 1, "key": "firstname"},
|
||||
"sn": {"order": 2, "key": "lastname"},
|
||||
"cn": {
|
||||
"order": 3,
|
||||
"key": "disp_name",
|
||||
"required": True,
|
||||
"or": {"attrs": ["firstname", "lastname"], "join": " "},
|
||||
},
|
||||
"displayName": {"order": 4, "other_key": "displayName"},
|
||||
"street": {"order": 5, "join": " / ", "keys": ["ligne_1", "ligne_2"]},
|
||||
"postalCode": {"order": 6, "key": "zip_text", "cleanRegex": "[^0-9]"},
|
||||
"l": {"order": 7, "key": "city_text"},
|
||||
"postalAddress": {"order": 8, "join": "$", "keys": ["ligne_1", "ligne_2", "ligne_city"]},
|
||||
"telephoneNumber": {
|
||||
"order": 9,
|
||||
"keys": ["tel1", "tel2"],
|
||||
"cleanRegex": "[^0-9+]",
|
||||
"deduplicate": True,
|
||||
},
|
||||
"mobile": {"order": 10, "key": "mobile"},
|
||||
"facsimileTelephoneNumber": {"order": 11, "key": "fax"},
|
||||
"mail": {"order": 12, "key": "email", "convert": lambda x: x.lower().strip()},
|
||||
}
|
||||
|
||||
print('Mapping source:\n' + pretty_format_value(src))
|
||||
print('Mapping config:\n' + pretty_format_value(map_c))
|
||||
print('Mapping result:\n' + pretty_format_value(map_hash(map_c, src)))
|
1
setup.py
1
setup.py
|
@ -66,6 +66,7 @@ setup(
|
|||
'console_scripts': [
|
||||
'mylib-test-email = mylib.scripts.email_test:main',
|
||||
'mylib-test-email-with-config = mylib.scripts.email_test_with_config:main',
|
||||
'mylib-test-map = mylib.scripts.map_test:main',
|
||||
'mylib-test-pbar = mylib.scripts.pbar_test:main',
|
||||
'mylib-test-report = mylib.scripts.report_test:main',
|
||||
'mylib-test-ldap = mylib.scripts.ldap_test:main',
|
||||
|
|
Loading…
Reference in a new issue