get.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. # This file is part of Radicale - CalDAV and CardDAV server
  2. # Copyright © 2014 Jean-Marc Martins
  3. # Copyright © 2012-2017 Guillaume Ayoub
  4. # Copyright © 2017-2022 Unrud <unrud@outlook.com>
  5. # Copyright © 2024-2024 Peter Bieringer <pb@bieringer.de>
  6. #
  7. # This library is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # This library is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with Radicale. If not, see <http://www.gnu.org/licenses/>.
  19. import os
  20. import sys
  21. import time
  22. from typing import Iterable, Iterator, Optional, Tuple
  23. import radicale.item as radicale_item
  24. from radicale import pathutils
  25. from radicale.log import logger
  26. from radicale.storage import multifilesystem
  27. from radicale.storage.multifilesystem.base import CollectionBase
  28. from radicale.storage.multifilesystem.cache import CollectionPartCache
  29. from radicale.storage.multifilesystem.lock import CollectionPartLock
  30. class CollectionPartGet(CollectionPartCache, CollectionPartLock,
  31. CollectionBase):
  32. _item_cache_cleaned: bool
  33. def __init__(self, storage_: "multifilesystem.Storage", path: str,
  34. filesystem_path: Optional[str] = None) -> None:
  35. super().__init__(storage_, path, filesystem_path)
  36. self._item_cache_cleaned = False
  37. def _list(self) -> Iterator[str]:
  38. for entry in os.scandir(self._filesystem_path):
  39. if not entry.is_file():
  40. continue
  41. href = entry.name
  42. if not pathutils.is_safe_filesystem_path_component(href):
  43. if not href.startswith(".Radicale"):
  44. logger.debug("Skipping item %r in %r", href, self.path)
  45. continue
  46. yield href
  47. def _get(self, href: str, verify_href: bool = True
  48. ) -> Optional[radicale_item.Item]:
  49. if verify_href:
  50. try:
  51. if not pathutils.is_safe_filesystem_path_component(href):
  52. raise pathutils.UnsafePathError(href)
  53. path = pathutils.path_to_filesystem(self._filesystem_path,
  54. href)
  55. except ValueError as e:
  56. logger.debug(
  57. "Can't translate name %r safely to filesystem in %r: %s",
  58. href, self.path, e, exc_info=True)
  59. return None
  60. else:
  61. path = os.path.join(self._filesystem_path, href)
  62. try:
  63. with open(path, "rb") as f:
  64. raw_text = f.read()
  65. except (FileNotFoundError, IsADirectoryError):
  66. return None
  67. except PermissionError:
  68. # Windows raises ``PermissionError`` when ``path`` is a directory
  69. if (sys.platform == "win32" and
  70. os.path.isdir(path) and os.access(path, os.R_OK)):
  71. return None
  72. raise
  73. # The hash of the component in the file system. This is used to check,
  74. # if the entry in the cache is still valid.
  75. if self._storage._use_mtime_and_size_for_item_cache is True:
  76. cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
  77. if self._storage._debug_cache_actions is True:
  78. logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash)
  79. else:
  80. cache_hash = self._item_cache_hash(raw_text)
  81. if self._storage._debug_cache_actions is True:
  82. logger.debug("Item cache check for: %r with hash %r", path, cache_hash)
  83. cache_content = self._load_item_cache(href, cache_hash)
  84. if cache_content is None:
  85. if self._storage._debug_cache_actions is True:
  86. logger.debug("Item cache miss for: %r", path)
  87. with self._acquire_cache_lock("item"):
  88. # Lock the item cache to prevent multiple processes from
  89. # generating the same data in parallel.
  90. # This improves the performance for multiple requests.
  91. if self._storage._lock.locked == "r":
  92. # Check if another process created the file in the meantime
  93. cache_content = self._load_item_cache(href, cache_hash)
  94. if cache_content is None:
  95. try:
  96. vobject_items = radicale_item.read_components(
  97. raw_text.decode(self._encoding))
  98. radicale_item.check_and_sanitize_items(
  99. vobject_items, tag=self.tag)
  100. vobject_item, = vobject_items
  101. temp_item = radicale_item.Item(
  102. collection=self, vobject_item=vobject_item)
  103. if self._storage._debug_cache_actions is True:
  104. logger.debug("Item cache store for: %r", path)
  105. cache_content = self._store_item_cache(
  106. href, temp_item, cache_hash)
  107. except Exception as e:
  108. if self._skip_broken_item:
  109. logger.warning("Skip broken item %r in %r: %s", href, self.path, e)
  110. return None
  111. else:
  112. raise RuntimeError("Failed to load item %r in %r: %s" %
  113. (href, self.path, e)) from e
  114. # Clean cache entries once after the data in the file
  115. # system was edited externally.
  116. if not self._item_cache_cleaned:
  117. self._item_cache_cleaned = True
  118. self._clean_item_cache()
  119. else:
  120. if self._storage._debug_cache_actions is True:
  121. logger.debug("Item cache hit for: %r", path)
  122. last_modified = time.strftime(
  123. "%a, %d %b %Y %H:%M:%S GMT",
  124. time.gmtime(os.path.getmtime(path)))
  125. # Don't keep reference to ``vobject_item``, because it requires a lot
  126. # of memory.
  127. return radicale_item.Item(
  128. collection=self, href=href, last_modified=last_modified,
  129. etag=cache_content.etag, text=cache_content.text,
  130. uid=cache_content.uid, name=cache_content.name,
  131. component_name=cache_content.tag,
  132. time_range=(cache_content.start, cache_content.end))
  133. def get_multi(self, hrefs: Iterable[str]
  134. ) -> Iterator[Tuple[str, Optional[radicale_item.Item]]]:
  135. # It's faster to check for file name collisions here, because
  136. # we only need to call os.listdir once.
  137. files = None
  138. for href in hrefs:
  139. if files is None:
  140. # List dir after hrefs returned one item, the iterator may be
  141. # empty and the for-loop is never executed.
  142. files = os.listdir(self._filesystem_path)
  143. path = os.path.join(self._filesystem_path, href)
  144. if (not pathutils.is_safe_filesystem_path_component(href) or
  145. href not in files and os.path.lexists(path)):
  146. logger.debug("Can't translate name safely to filesystem: %r",
  147. href)
  148. yield (href, None)
  149. else:
  150. yield (href, self._get(href, verify_href=False))
  151. def get_all(self) -> Iterator[radicale_item.Item]:
  152. for href in self._list():
  153. # We don't need to check for collisions, because the file names
  154. # are from os.listdir.
  155. item = self._get(href, verify_href=False)
  156. if item is not None:
  157. yield item