convert.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. from collections import Counter
  2. import re
  3. def remove_extra_lines(s:str):
  4. # Alternatively: os.linesep.join([line for line in s.splitlines() if line])
  5. return re.sub('\n+', '\n', s)
  6. def remove_email_extra(s:str):
  7. s = remove_extra_lines(s)
  8. return s.replace("=20", "").replace('"3D', "").replace("=\n", "")
  9. def remove_email_content_id(s:str, repl="<ID>"):
  10. return re.sub(r"(?<================)[0-9]+(?===)", repl, s)
  11. def remove_email_message_id(s:str, repl="<message_id>"):
  12. return re.sub(r"(?<=Message-ID: <).+?(?=>)", repl, s)
  13. def payloads_to_dict(*parts):
  14. data = {}
  15. for part in parts:
  16. payload = part.get_payload()
  17. key = part.get_content_type()
  18. if key in data:
  19. new_key = key
  20. n = 0
  21. while new_key in data:
  22. n += 1
  23. new_key = key + f"_{n}"
  24. key = new_key
  25. if isinstance(payload, str):
  26. data[key] = payload
  27. elif payload is None:
  28. # Most likely empty message
  29. pass
  30. else:
  31. data[key] = payloads_to_dict(*payload)
  32. return data