get.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # This file is part of Radicale - CalDAV and CardDAV server
  2. # Copyright © 2014 Jean-Marc Martins
  3. # Copyright © 2012-2017 Guillaume Ayoub
  4. # Copyright © 2017-2022 Unrud <unrud@outlook.com>
  5. # Copyright © 2024-2024 Peter Bieringer <pb@bieringer.de>
  6. #
  7. # This library is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # This library is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with Radicale. If not, see <http://www.gnu.org/licenses/>.
  19. import os
  20. import sys
  21. import time
  22. from typing import Iterable, Iterator, Optional, Tuple
  23. import radicale.item as radicale_item
  24. from radicale import pathutils
  25. from radicale.log import logger
  26. from radicale.storage import multifilesystem
  27. from radicale.storage.multifilesystem.base import CollectionBase
  28. from radicale.storage.multifilesystem.cache import CollectionPartCache
  29. from radicale.storage.multifilesystem.lock import CollectionPartLock
  30. class CollectionPartGet(CollectionPartCache, CollectionPartLock,
  31. CollectionBase):
  32. _item_cache_cleaned: bool
  33. def __init__(self, storage_: "multifilesystem.Storage", path: str,
  34. filesystem_path: Optional[str] = None) -> None:
  35. super().__init__(storage_, path, filesystem_path)
  36. self._item_cache_cleaned = False
  37. def _list(self) -> Iterator[str]:
  38. for entry in os.scandir(self._filesystem_path):
  39. if not entry.is_file():
  40. continue
  41. href = entry.name
  42. if not pathutils.is_safe_filesystem_path_component(href):
  43. if not href.startswith(".Radicale"):
  44. logger.debug("Skipping item %r in %r", href, self.path)
  45. continue
  46. yield href
  47. def _get(self, href: str, verify_href: bool = True
  48. ) -> Optional[radicale_item.Item]:
  49. if verify_href:
  50. try:
  51. if not pathutils.is_safe_filesystem_path_component(href):
  52. raise pathutils.UnsafePathError(href)
  53. path = pathutils.path_to_filesystem(self._filesystem_path,
  54. href)
  55. except ValueError as e:
  56. logger.debug(
  57. "Can't translate name %r safely to filesystem in %r: %s",
  58. href, self.path, e, exc_info=True)
  59. return None
  60. else:
  61. path = os.path.join(self._filesystem_path, href)
  62. try:
  63. if self._storage._use_mtime_and_size_for_item_cache is True:
  64. # try to avoid "open"
  65. if not os.path.isfile(path):
  66. if not os.path.exists(path):
  67. raise FileNotFoundError(path)
  68. if os.path.isdir(path):
  69. raise IsADirectoryError(path)
  70. if not os.access(path, os.R_OK):
  71. raise PermissionError(path)
  72. else:
  73. with open(path, "rb") as f:
  74. # early read of the content
  75. if self._storage._debug_cache_actions is True:
  76. logger.debug("Item cache early read: %r", path)
  77. raw_text = f.read()
  78. except (FileNotFoundError, IsADirectoryError):
  79. return None
  80. except PermissionError:
  81. # Windows raises ``PermissionError`` when ``path`` is a directory
  82. if (sys.platform == "win32" and
  83. os.path.isdir(path) and os.access(path, os.R_OK)):
  84. return None
  85. raise
  86. # The hash of the component in the file system. This is used to check,
  87. # if the entry in the cache is still valid.
  88. if self._storage._use_mtime_and_size_for_item_cache is True:
  89. cache_hash = self._item_cache_mtime_and_size(os.stat(path).st_size, os.stat(path).st_mtime_ns)
  90. if self._storage._debug_cache_actions is True:
  91. logger.debug("Item cache check for: %r with mtime and size %r", path, cache_hash)
  92. else:
  93. cache_hash = self._item_cache_hash(raw_text)
  94. if self._storage._debug_cache_actions is True:
  95. logger.debug("Item cache check for: %r with hash %r", path, cache_hash)
  96. cache_content = self._load_item_cache(href, cache_hash)
  97. if cache_content is None:
  98. if self._storage._debug_cache_actions is True:
  99. logger.debug("Item cache miss for: %r", path)
  100. with self._acquire_cache_lock("item"):
  101. # Lock the item cache to prevent multiple processes from
  102. # generating the same data in parallel.
  103. # This improves the performance for multiple requests.
  104. if self._storage._lock.locked == "r":
  105. # Check if another process created the file in the meantime
  106. cache_content = self._load_item_cache(href, cache_hash)
  107. if cache_content is None:
  108. if self._storage._use_mtime_and_size_for_item_cache is True:
  109. # late read of the content
  110. if self._storage._debug_cache_actions is True:
  111. logger.debug("Item cache late read : %r", path)
  112. with open(path, "rb") as f:
  113. raw_text = f.read()
  114. try:
  115. vobject_items = radicale_item.read_components(
  116. raw_text.decode(self._encoding))
  117. radicale_item.check_and_sanitize_items(
  118. vobject_items, tag=self.tag)
  119. vobject_item, = vobject_items
  120. temp_item = radicale_item.Item(
  121. collection=self, vobject_item=vobject_item)
  122. if self._storage._debug_cache_actions is True:
  123. logger.debug("Item cache store for: %r", path)
  124. cache_content = self._store_item_cache(
  125. href, temp_item, cache_hash)
  126. except Exception as e:
  127. if self._skip_broken_item:
  128. logger.warning("Skip broken item %r in %r: %s", href, self.path, e)
  129. return None
  130. else:
  131. raise RuntimeError("Failed to load item %r in %r: %s" %
  132. (href, self.path, e)) from e
  133. # Clean cache entries once after the data in the file
  134. # system was edited externally.
  135. if not self._item_cache_cleaned:
  136. self._item_cache_cleaned = True
  137. self._clean_item_cache()
  138. else:
  139. if self._storage._debug_cache_actions is True:
  140. logger.debug("Item cache hit for: %r", path)
  141. last_modified = time.strftime(
  142. "%a, %d %b %Y %H:%M:%S GMT",
  143. time.gmtime(os.path.getmtime(path)))
  144. # Don't keep reference to ``vobject_item``, because it requires a lot
  145. # of memory.
  146. return radicale_item.Item(
  147. collection=self, href=href, last_modified=last_modified,
  148. etag=cache_content.etag, text=cache_content.text,
  149. uid=cache_content.uid, name=cache_content.name,
  150. component_name=cache_content.tag,
  151. time_range=(cache_content.start, cache_content.end))
  152. def get_multi(self, hrefs: Iterable[str]
  153. ) -> Iterator[Tuple[str, Optional[radicale_item.Item]]]:
  154. # It's faster to check for file name collisions here, because
  155. # we only need to call os.listdir once.
  156. files = None
  157. for href in hrefs:
  158. if files is None:
  159. # List dir after hrefs returned one item, the iterator may be
  160. # empty and the for-loop is never executed.
  161. files = os.listdir(self._filesystem_path)
  162. path = os.path.join(self._filesystem_path, href)
  163. if (not pathutils.is_safe_filesystem_path_component(href) or
  164. href not in files and os.path.lexists(path)):
  165. logger.debug("Can't translate name safely to filesystem: %r",
  166. href)
  167. yield (href, None)
  168. else:
  169. yield (href, self._get(href, verify_href=False))
  170. def get_all(self) -> Iterator[radicale_item.Item]:
  171. for href in self._list():
  172. # We don't need to check for collisions, because the file names
  173. # are from os.listdir.
  174. item = self._get(href, verify_href=False)
  175. if item is not None:
  176. yield item