convert.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from collections import Counter
  2. import re
  3. def remove_extra_lines(s:str):
  4. # Alternatively: os.linesep.join([line for line in s.splitlines() if line])
  5. return re.sub('\n+', '\n', s)
  6. def remove_email_extra(s:str):
  7. s = remove_extra_lines(s)
  8. return s.replace("=20", "").replace('"3D', "").replace("=\n", "")
  9. def remove_email_content_id(s:str, repl="<ID>"):
  10. return re.sub(r"(?<================)[0-9]+(?===)", repl, s)
  11. def remove_email_message_id(s:str, repl="<message_id>"):
  12. return re.sub(r"(?<=Message-ID: <).+?(?=>)", repl, s)
  13. def remove_date(s:str, repl="<date>"):
  14. regex = r'(?<=Date: )[A-Za-z]{3}, [0-9]{2} [A-Za-z]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2}( [+-][0-9]{4})?'
  15. return re.sub(regex, repl, s)
  16. def prune_generated_headers(s:str):
  17. transformers = (
  18. remove_email_content_id,
  19. remove_email_message_id,
  20. remove_date
  21. )
  22. for transf in transformers:
  23. s = transf(s)
  24. return s
  25. def payloads_to_dict(*parts):
  26. data = {}
  27. for part in parts:
  28. payload = part.get_payload()
  29. key = part.get_content_type()
  30. if key in data:
  31. new_key = key
  32. n = 0
  33. while new_key in data:
  34. n += 1
  35. new_key = key + f"_{n}"
  36. key = new_key
  37. if isinstance(payload, str):
  38. data[key] = payload
  39. elif payload is None:
  40. # Most likely empty message
  41. pass
  42. else:
  43. data[key] = payloads_to_dict(*payload)
  44. return data