2023-01-16 12:23:50 +01:00
|
|
|
"""
|
|
|
|
My hash mapping library
|
|
|
|
Mapping configuration
|
|
|
|
{
|
|
|
|
'[dst key 1]': { # Key name in the result
|
|
|
|
|
|
|
|
'order': [int], # Processing order between destinations keys
|
|
|
|
|
|
|
|
# Source values
|
|
|
|
'other_key': [key], # Other key of the destination to use as source of values
|
|
|
|
'key' : '[src key]', # Key of source hash to get source values
|
|
|
|
'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values
|
|
|
|
|
|
|
|
# Clean / convert values
|
|
|
|
'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+]
|
|
|
|
'convert': [function], # Function to use to convert value : Original value will be passed
|
|
|
|
# as argument and the value retrieve will replace source value in
|
|
|
|
# the result
|
|
|
|
# Ex :
|
|
|
|
# lambda x: x.strip()
|
|
|
|
# lambda x: "myformat : %s" % x
|
|
|
|
# Deduplicate / check values
|
|
|
|
'deduplicate': [bool], # If True, sources values will be depluplicated
|
|
|
|
'check': [function], # Function to use to check source value : Source value will be passed
|
|
|
|
# as argument and if function return True, the value will be preserved
|
|
|
|
# Ex :
|
|
|
|
# lambda x: x in my_global_hash
|
|
|
|
# Join values
|
|
|
|
'join': '[glue]', # If present, sources values will be join using the "glue"
|
|
|
|
|
|
|
|
# Alternative mapping
|
|
|
|
'or': { [map configuration] } # If this mapping case does not retreive any value, try to
|
|
|
|
# get value(s) with this other mapping configuration
|
|
|
|
},
|
|
|
|
'[dst key 2]': {
|
|
|
|
[...]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Return format :
|
|
|
|
{
|
|
|
|
'[dst key 1]': ['v1','v2', ...],
|
|
|
|
'[dst key 2]': [ ... ],
|
|
|
|
[...]
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
|
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
def clean_value(value):
|
2023-01-16 12:56:12 +01:00
|
|
|
"""Clean value as encoded string"""
|
2023-01-16 12:23:50 +01:00
|
|
|
if isinstance(value, int):
|
|
|
|
value = str(value)
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
def get_values(dst, dst_key, src, m):
|
2023-01-16 12:56:12 +01:00
|
|
|
"""Extract sources values"""
|
2023-01-16 12:23:50 +01:00
|
|
|
values = []
|
|
|
|
if "other_key" in m:
|
|
|
|
if m["other_key"] in dst:
|
|
|
|
values = dst[m["other_key"]]
|
|
|
|
if "key" in m:
|
|
|
|
if m["key"] in src and src[m["key"]] != "":
|
|
|
|
values.append(clean_value(src[m["key"]]))
|
|
|
|
|
|
|
|
if "keys" in m:
|
|
|
|
for key in m["keys"]:
|
|
|
|
if key in src and src[key] != "":
|
|
|
|
values.append(clean_value(src[key]))
|
|
|
|
|
|
|
|
# Clean and convert values
|
|
|
|
if "cleanRegex" in m and len(values) > 0:
|
|
|
|
new_values = []
|
|
|
|
for v in values:
|
|
|
|
nv = re.sub(m["cleanRegex"], "", v)
|
|
|
|
if nv != "":
|
|
|
|
new_values.append(nv)
|
|
|
|
values = new_values
|
|
|
|
|
|
|
|
if "convert" in m and len(values) > 0:
|
|
|
|
new_values = []
|
|
|
|
for v in values:
|
|
|
|
nv = m["convert"](v)
|
|
|
|
if nv != "":
|
|
|
|
new_values.append(nv)
|
|
|
|
values = new_values
|
|
|
|
|
|
|
|
# Deduplicate values
|
|
|
|
if m.get("deduplicate") and len(values) > 1:
|
|
|
|
new_values = []
|
|
|
|
for v in values:
|
|
|
|
if v not in new_values:
|
|
|
|
new_values.append(v)
|
|
|
|
values = new_values
|
|
|
|
|
|
|
|
# Check values
|
|
|
|
if "check" in m and len(values) > 0:
|
|
|
|
new_values = []
|
|
|
|
for v in values:
|
|
|
|
if m["check"](v):
|
|
|
|
new_values.append(v)
|
|
|
|
else:
|
|
|
|
log.debug("Invalid value %s for key %s", v, dst_key)
|
|
|
|
values = new_values
|
|
|
|
|
|
|
|
# Join values
|
|
|
|
if "join" in m and len(values) > 1:
|
|
|
|
values = [m["join"].join(values)]
|
|
|
|
|
|
|
|
# Manage alternative mapping case
|
|
|
|
if len(values) == 0 and "or" in m:
|
|
|
|
values = get_values(dst, dst_key, src, m["or"])
|
|
|
|
|
|
|
|
return values
|
|
|
|
|
|
|
|
|
|
|
|
def map_hash(mapping, src, dst=None):
|
|
|
|
"""Map hash"""
|
|
|
|
dst = dst if dst else {}
|
|
|
|
assert isinstance(dst, dict)
|
|
|
|
for dst_key in sorted(mapping.keys(), key=lambda x: mapping[x]["order"]):
|
|
|
|
values = get_values(dst, dst_key, src, mapping[dst_key])
|
|
|
|
|
|
|
|
if len(values) == 0:
|
|
|
|
if "required" in mapping[dst_key] and mapping[dst_key]["required"]:
|
|
|
|
log.debug(
|
|
|
|
"Destination key %s could not be filled from source but is required", dst_key
|
|
|
|
)
|
|
|
|
return False
|
|
|
|
continue
|
|
|
|
|
|
|
|
dst[dst_key] = values
|
|
|
|
return dst
|