storage.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. # This file is part of Radicale Server - Calendar Server
  2. # Copyright © 2014 Jean-Marc Martins
  3. # Copyright © 2012-2016 Guillaume Ayoub
  4. #
  5. # This library is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This library is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with Radicale. If not, see <http://www.gnu.org/licenses/>.
  17. """
  18. Storage backends.
  19. This module loads the storage backend, according to the storage configuration.
  20. Default storage uses one folder per collection and one file per collection
  21. entry.
  22. """
  23. import errno
  24. import json
  25. import os
  26. import posixpath
  27. import shutil
  28. import stat
  29. import threading
  30. import time
  31. from contextlib import contextmanager
  32. from hashlib import md5
  33. from importlib import import_module
  34. from itertools import groupby
  35. from random import getrandbits
  36. from atomicwrites import AtomicWriter
  37. import vobject
  38. if os.name == "nt":
  39. import ctypes
  40. import ctypes.wintypes
  41. import msvcrt
  42. LOCKFILE_EXCLUSIVE_LOCK = 2
  43. if ctypes.sizeof(ctypes.c_void_p) == 4:
  44. ULONG_PTR = ctypes.c_uint32
  45. else:
  46. ULONG_PTR = ctypes.c_uint64
  47. class Overlapped(ctypes.Structure):
  48. _fields_ = [("internal", ULONG_PTR),
  49. ("internal_high", ULONG_PTR),
  50. ("offset", ctypes.wintypes.DWORD),
  51. ("offset_high", ctypes.wintypes.DWORD),
  52. ("h_event", ctypes.wintypes.HANDLE)]
  53. lock_file_ex = ctypes.windll.kernel32.LockFileEx
  54. lock_file_ex.argtypes = [ctypes.wintypes.HANDLE,
  55. ctypes.wintypes.DWORD,
  56. ctypes.wintypes.DWORD,
  57. ctypes.wintypes.DWORD,
  58. ctypes.wintypes.DWORD,
  59. ctypes.POINTER(Overlapped)]
  60. lock_file_ex.restype = ctypes.wintypes.BOOL
  61. unlock_file_ex = ctypes.windll.kernel32.UnlockFileEx
  62. unlock_file_ex.argtypes = [ctypes.wintypes.HANDLE,
  63. ctypes.wintypes.DWORD,
  64. ctypes.wintypes.DWORD,
  65. ctypes.wintypes.DWORD,
  66. ctypes.POINTER(Overlapped)]
  67. unlock_file_ex.restype = ctypes.wintypes.BOOL
  68. elif os.name == "posix":
  69. import fcntl
  70. def load(configuration, logger):
  71. """Load the storage manager chosen in configuration."""
  72. storage_type = configuration.get("storage", "type")
  73. if storage_type == "multifilesystem":
  74. collection_class = Collection
  75. else:
  76. collection_class = import_module(storage_type).Collection
  77. class CollectionCopy(collection_class):
  78. """Collection copy, avoids overriding the original class attributes."""
  79. CollectionCopy.configuration = configuration
  80. CollectionCopy.logger = logger
  81. return CollectionCopy
  82. MIMETYPES = {"VADDRESSBOOK": "text/vcard", "VCALENDAR": "text/calendar"}
  83. def get_etag(text):
  84. """Etag from collection or item."""
  85. etag = md5()
  86. etag.update(text.encode("utf-8"))
  87. return '"%s"' % etag.hexdigest()
  88. def is_safe_path_component(path):
  89. """Check if path is a single component of a path.
  90. Check that the path is safe to join too.
  91. """
  92. return path and "/" not in path and path not in (".", "..")
  93. def sanitize_path(path):
  94. """Make path absolute with leading slash to prevent access to other data.
  95. Preserve a potential trailing slash.
  96. """
  97. trailing_slash = "/" if path.endswith("/") else ""
  98. path = posixpath.normpath(path)
  99. new_path = "/"
  100. for part in path.split("/"):
  101. if not part or part in (".", ".."):
  102. continue
  103. new_path = posixpath.join(new_path, part)
  104. trailing_slash = "" if new_path.endswith("/") else trailing_slash
  105. return new_path + trailing_slash
  106. def is_safe_filesystem_path_component(path):
  107. """Check if path is a single component of a filesystem path.
  108. Check that the path is safe to join too.
  109. """
  110. return (
  111. path and not os.path.splitdrive(path)[0] and
  112. not os.path.split(path)[0] and path not in (os.curdir, os.pardir) and
  113. not path.startswith(".") and not path.endswith("~"))
  114. def path_to_filesystem(root, *paths):
  115. """Convert path to a local filesystem path relative to base_folder.
  116. `root` must be a secure filesystem path, it will be prepend to the path.
  117. Conversion of `paths` is done in a secure manner, or raises ``ValueError``.
  118. """
  119. paths = [sanitize_path(path).strip("/") for path in paths]
  120. safe_path = root
  121. for path in paths:
  122. if not path:
  123. continue
  124. for part in path.split("/"):
  125. if not is_safe_filesystem_path_component(part):
  126. raise ValueError(
  127. "Can't tranlate name safely to filesystem: %s" % part)
  128. safe_path = os.path.join(safe_path, part)
  129. return safe_path
  130. class _EncodedAtomicWriter(AtomicWriter):
  131. def __init__(self, path, encoding, mode="w", overwrite=True):
  132. self._encoding = encoding
  133. return super().__init__(path, mode, overwrite=True)
  134. def get_fileobject(self, **kwargs):
  135. return super().get_fileobject(encoding=self._encoding,
  136. prefix=".Radicale.tmp-", **kwargs)
  137. class Item:
  138. def __init__(self, collection, item, href, last_modified=None):
  139. self.collection = collection
  140. self.item = item
  141. self.href = href
  142. self.last_modified = last_modified
  143. def __getattr__(self, attr):
  144. return getattr(self.item, attr)
  145. @property
  146. def etag(self):
  147. return get_etag(self.serialize())
  148. class BaseCollection:
  149. # Overriden on copy by the "load" function
  150. configuration = None
  151. logger = None
  152. def __init__(self, path, principal=False):
  153. """Initialize the collection.
  154. ``path`` must be the normalized relative path of the collection, using
  155. the slash as the folder delimiter, with no leading nor trailing slash.
  156. """
  157. raise NotImplementedError
  158. @classmethod
  159. def discover(cls, path, depth="1"):
  160. """Discover a list of collections under the given ``path``.
  161. If ``depth`` is "0", only the actual object under ``path`` is
  162. returned.
  163. If ``depth`` is anything but "0", it is considered as "1" and direct
  164. children are included in the result. If ``include_container`` is
  165. ``True`` (the default), the containing object is included in the
  166. result.
  167. The ``path`` is relative.
  168. """
  169. raise NotImplementedError
  170. @property
  171. def etag(self):
  172. return get_etag(self.serialize())
  173. @classmethod
  174. def create_collection(cls, href, collection=None, tag=None):
  175. """Create a collection.
  176. ``collection`` is a list of vobject components.
  177. ``tag`` is the type of collection (VCALENDAR or VADDRESSBOOK). If
  178. ``tag`` is not given, it is guessed from the collection.
  179. """
  180. raise NotImplementedError
  181. def list(self):
  182. """List collection items."""
  183. raise NotImplementedError
  184. def get(self, href):
  185. """Fetch a single item."""
  186. raise NotImplementedError
  187. def get_multi(self, hrefs):
  188. """Fetch multiple items. Duplicate hrefs must be ignored.
  189. Functionally similar to ``get``, but might bring performance benefits
  190. on some storages when used cleverly.
  191. """
  192. for href in set(hrefs):
  193. yield self.get(href)
  194. def pre_filtered_list(self, filters):
  195. """List collection items with optional pre filtering.
  196. This could largely improve performance of reports depending on
  197. the filters and this implementation.
  198. This returns all event by default
  199. """
  200. return [self.get(href) for href, _ in self.list()]
  201. def has(self, href):
  202. """Check if an item exists by its href.
  203. Functionally similar to ``get``, but might bring performance benefits
  204. on some storages when used cleverly.
  205. """
  206. return self.get(href) is not None
  207. def upload(self, href, vobject_item):
  208. """Upload a new item."""
  209. raise NotImplementedError
  210. def update(self, href, vobject_item, etag=None):
  211. """Update an item.
  212. Functionally similar to ``delete`` plus ``upload``, but might bring
  213. performance benefits on some storages when used cleverly.
  214. """
  215. self.delete(href, etag)
  216. self.upload(href, vobject_item)
  217. def delete(self, href=None, etag=None):
  218. """Delete an item.
  219. When ``href`` is ``None``, delete the collection.
  220. """
  221. raise NotImplementedError
  222. def get_meta(self, key):
  223. """Get metadata value for collection."""
  224. raise NotImplementedError
  225. def set_meta(self, key, value):
  226. """Set metadata value for collection."""
  227. raise NotImplementedError
  228. @property
  229. def last_modified(self):
  230. """Get the HTTP-datetime of when the collection was modified."""
  231. raise NotImplementedError
  232. def serialize(self):
  233. """Get the unicode string representing the whole collection."""
  234. raise NotImplementedError
  235. @classmethod
  236. @contextmanager
  237. def acquire_lock(cls, mode):
  238. """Set a context manager to lock the whole storage.
  239. ``mode`` must either be "r" for shared access or "w" for exclusive
  240. access.
  241. """
  242. raise NotImplementedError
  243. class Collection(BaseCollection):
  244. """Collection stored in several files per calendar."""
  245. def __init__(self, path, principal=False):
  246. folder = os.path.expanduser(
  247. self.configuration.get("storage", "filesystem_folder"))
  248. # path should already be sanitized
  249. self.path = sanitize_path(path).strip("/")
  250. self.storage_encoding = self.configuration.get("encoding", "stock")
  251. self._filesystem_path = path_to_filesystem(folder, self.path)
  252. split_path = self.path.split("/")
  253. if len(split_path) > 1:
  254. # URL with at least one folder
  255. self.owner = split_path[0]
  256. else:
  257. self.owner = None
  258. self.is_principal = principal
  259. @contextmanager
  260. def _atomic_write(self, path, mode="w"):
  261. with _EncodedAtomicWriter(
  262. path, self.storage_encoding, mode).open() as fd:
  263. yield fd
  264. def _find_available_file_name(self):
  265. # Prevent infinite loop
  266. for _ in range(10000):
  267. file_name = hex(getrandbits(32))[2:]
  268. if not self.has(file_name):
  269. return file_name
  270. raise FileExistsError(errno.EEXIST, "No usable file name found")
  271. @classmethod
  272. def discover(cls, path, depth="1"):
  273. # path == None means wrong URL
  274. if path is None:
  275. return
  276. # path should already be sanitized
  277. sane_path = sanitize_path(path).strip("/")
  278. attributes = sane_path.split("/")
  279. if not attributes[0]:
  280. attributes.pop()
  281. # Try to guess if the path leads to a collection or an item
  282. folder = os.path.expanduser(
  283. cls.configuration.get("storage", "filesystem_folder"))
  284. if not os.path.isdir(path_to_filesystem(folder, sane_path)):
  285. # path is not a collection
  286. if attributes and os.path.isfile(path_to_filesystem(folder,
  287. sane_path)):
  288. # path is an item
  289. attributes.pop()
  290. elif attributes and os.path.isdir(path_to_filesystem(
  291. folder, *attributes[:-1])):
  292. # path parent is a collection
  293. attributes.pop()
  294. # TODO: else: return?
  295. path = "/".join(attributes)
  296. principal = len(attributes) == 1
  297. collection = cls(path, principal)
  298. yield collection
  299. if depth != "0":
  300. # TODO: fix this
  301. items = list(collection.list())
  302. if items:
  303. for item in items:
  304. yield collection.get(item[0])
  305. _, directories, _ = next(os.walk(collection._filesystem_path))
  306. for sub_path in directories:
  307. if not is_safe_filesystem_path_component(sub_path):
  308. cls.logger.debug("Skipping collection: %s", sub_path)
  309. continue
  310. full_path = os.path.join(collection._filesystem_path, sub_path)
  311. if os.path.exists(full_path):
  312. yield cls(posixpath.join(path, sub_path))
  313. @classmethod
  314. def create_collection(cls, href, collection=None, tag=None):
  315. folder = os.path.expanduser(
  316. cls.configuration.get("storage", "filesystem_folder"))
  317. path = path_to_filesystem(folder, href)
  318. self = cls(href)
  319. if os.path.exists(path):
  320. return self
  321. else:
  322. os.makedirs(path)
  323. if not tag and collection:
  324. tag = collection[0].name
  325. if tag == "VCALENDAR":
  326. self.set_meta("tag", "VCALENDAR")
  327. if collection:
  328. collection, = collection
  329. items = []
  330. for content in ("vevent", "vtodo", "vjournal"):
  331. items.extend(getattr(collection, "%s_list" % content, []))
  332. def get_uid(item):
  333. return hasattr(item, "uid") and item.uid.value
  334. items_by_uid = groupby(
  335. sorted(items, key=get_uid), get_uid)
  336. for uid, items in items_by_uid:
  337. new_collection = vobject.iCalendar()
  338. for item in items:
  339. new_collection.add(item)
  340. self.upload(
  341. self._find_available_file_name(), new_collection)
  342. elif tag == "VCARD":
  343. self.set_meta("tag", "VADDRESSBOOK")
  344. if collection:
  345. for card in collection:
  346. self.upload(self._find_available_file_name(), card)
  347. return self
  348. def list(self):
  349. try:
  350. hrefs = os.listdir(self._filesystem_path)
  351. except IOError:
  352. return
  353. for href in hrefs:
  354. if not is_safe_filesystem_path_component(href):
  355. self.logger.debug("Skipping component: %s", href)
  356. continue
  357. path = os.path.join(self._filesystem_path, href)
  358. if not href.endswith(".props") and os.path.isfile(path):
  359. with open(path, encoding=self.storage_encoding) as fd:
  360. yield href, get_etag(fd.read())
  361. def get(self, href):
  362. if not href:
  363. return None
  364. href = href.strip("{}").replace("/", "_")
  365. if not is_safe_filesystem_path_component(href):
  366. self.logger.debug(
  367. "Can't tranlate name safely to filesystem: %s", href)
  368. return None
  369. path = path_to_filesystem(self._filesystem_path, href)
  370. if not os.path.isfile(path):
  371. return None
  372. with open(path, encoding=self.storage_encoding) as fd:
  373. text = fd.read()
  374. last_modified = time.strftime(
  375. "%a, %d %b %Y %H:%M:%S GMT",
  376. time.gmtime(os.path.getmtime(path)))
  377. return Item(self, vobject.readOne(text), href, last_modified)
  378. def has(self, href):
  379. return self.get(href) is not None
  380. def upload(self, href, vobject_item):
  381. # TODO: use returned object in code
  382. if not is_safe_filesystem_path_component(href):
  383. raise ValueError(
  384. "Can't tranlate name safely to filesystem: %s" % href)
  385. path = path_to_filesystem(self._filesystem_path, href)
  386. if os.path.exists(path):
  387. raise ValueError("Component already exists: %s" % href)
  388. item = Item(self, vobject_item, href)
  389. with self._atomic_write(path) as fd:
  390. fd.write(item.serialize())
  391. return item
  392. def update(self, href, vobject_item, etag=None):
  393. # TODO: use etag in code and test it here
  394. # TODO: use returned object in code
  395. if not is_safe_filesystem_path_component(href):
  396. raise ValueError(
  397. "Can't tranlate name safely to filesystem: %s" % href)
  398. path = path_to_filesystem(self._filesystem_path, href)
  399. if not os.path.isfile(path):
  400. raise ValueError("Component doesn't exist: %s" % href)
  401. with open(path, encoding=self.storage_encoding) as fd:
  402. text = fd.read()
  403. if etag and etag != get_etag(text):
  404. raise ValueError(
  405. "ETag doesn't match: %s != %s" % (etag, get_etag(text)))
  406. item = Item(self, vobject_item, href)
  407. with self._atomic_write(path) as fd:
  408. fd.write(item.serialize())
  409. return item
  410. def delete(self, href=None, etag=None):
  411. # TODO: use etag in code and test it here
  412. # TODO: use returned object in code
  413. if href is None:
  414. # Delete the collection
  415. if os.path.isdir(self._filesystem_path):
  416. shutil.rmtree(self._filesystem_path)
  417. props_path = self._filesystem_path + ".props"
  418. if os.path.isfile(props_path):
  419. os.remove(props_path)
  420. else:
  421. # Delete an item
  422. if not is_safe_filesystem_path_component(href):
  423. raise ValueError(
  424. "Can't tranlate name safely to filesystem: %s" % href)
  425. path = path_to_filesystem(self._filesystem_path, href)
  426. if not os.path.isfile(path):
  427. raise ValueError("Component doesn't exist: %s" % href)
  428. with open(path, encoding=self.storage_encoding) as fd:
  429. text = fd.read()
  430. if etag and etag != get_etag(text):
  431. raise ValueError(
  432. "ETag doesn't match: %s != %s" % (etag, get_etag(text)))
  433. os.remove(path)
  434. def get_meta(self, key):
  435. props_path = self._filesystem_path + ".props"
  436. if os.path.exists(props_path):
  437. with open(props_path, encoding=self.storage_encoding) as prop:
  438. return json.load(prop).get(key)
  439. def set_meta(self, key, value):
  440. props_path = self._filesystem_path + ".props"
  441. properties = {}
  442. if os.path.exists(props_path):
  443. with open(props_path, encoding=self.storage_encoding) as prop:
  444. properties.update(json.load(prop))
  445. if value:
  446. properties[key] = value
  447. else:
  448. properties.pop(key, None)
  449. with self._atomic_write(props_path, "w+") as prop:
  450. json.dump(properties, prop)
  451. @property
  452. def last_modified(self):
  453. last = max([os.path.getmtime(self._filesystem_path)] + [
  454. os.path.getmtime(os.path.join(self._filesystem_path, filename))
  455. for filename in os.listdir(self._filesystem_path)] or [0])
  456. return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))
  457. def serialize(self):
  458. if not os.path.exists(self._filesystem_path):
  459. return None
  460. items = []
  461. for href in os.listdir(self._filesystem_path):
  462. if not is_safe_filesystem_path_component(href):
  463. self.logger.debug("Skipping component: %s", href)
  464. continue
  465. path = os.path.join(self._filesystem_path, href)
  466. if os.path.isfile(path) and not path.endswith(".props"):
  467. with open(path, encoding=self.storage_encoding) as fd:
  468. items.append(vobject.readOne(fd.read()))
  469. if self.get_meta("tag") == "VCALENDAR":
  470. collection = vobject.iCalendar()
  471. for item in items:
  472. for content in ("vevent", "vtodo", "vjournal"):
  473. if content in item.contents:
  474. for item_part in getattr(item, "%s_list" % content):
  475. collection.add(item_part)
  476. break
  477. return collection.serialize()
  478. elif self.get_meta("tag") == "VADDRESSBOOK":
  479. return "".join([item.serialize() for item in items])
  480. return ""
  481. _lock = threading.Lock()
  482. _waiters = []
  483. _lock_file = None
  484. _lock_file_locked = False
  485. _readers = 0
  486. _writer = False
  487. @classmethod
  488. @contextmanager
  489. def acquire_lock(cls, mode):
  490. def condition():
  491. if mode == "r":
  492. return not cls._writer
  493. else:
  494. return not cls._writer and cls._readers == 0
  495. if mode not in ("r", "w"):
  496. raise ValueError("Invalid lock mode: %s" % mode)
  497. # Use a primitive lock which only works within one process as a
  498. # precondition for inter-process file-based locking
  499. with cls._lock:
  500. if cls._waiters or not condition():
  501. # use FIFO for access requests
  502. waiter = threading.Condition(lock=cls._lock)
  503. cls._waiters.append(waiter)
  504. while True:
  505. waiter.wait()
  506. if condition():
  507. break
  508. cls._waiters.pop(0)
  509. if mode == "r":
  510. cls._readers += 1
  511. # notify additional potential readers
  512. if cls._waiters:
  513. cls._waiters[0].notify()
  514. else:
  515. cls._writer = True
  516. if not cls._lock_file:
  517. folder = os.path.expanduser(
  518. cls.configuration.get("storage", "filesystem_folder"))
  519. if not os.path.exists(folder):
  520. os.makedirs(folder, exist_ok=True)
  521. lock_path = os.path.join(folder, ".Radicale.lock")
  522. cls._lock_file = open(lock_path, "w+")
  523. # set access rights to a necessary minimum to prevent locking
  524. # by arbitrary users
  525. try:
  526. os.chmod(lock_path, stat.S_IWUSR | stat.S_IRUSR)
  527. except OSError:
  528. cls.logger.debug("Failed to set permissions on lock file")
  529. if not cls._lock_file_locked:
  530. if os.name == "nt":
  531. handle = msvcrt.get_osfhandle(cls._lock_file.fileno())
  532. flags = LOCKFILE_EXCLUSIVE_LOCK if mode == "w" else 0
  533. overlapped = Overlapped()
  534. if not lock_file_ex(handle, flags, 0, 1, 0, overlapped):
  535. cls.logger.debug("Locking not supported")
  536. elif os.name == "posix":
  537. _cmd = fcntl.LOCK_EX if mode == "w" else fcntl.LOCK_SH
  538. try:
  539. fcntl.lockf(cls._lock_file.fileno(), _cmd)
  540. except OSError:
  541. cls.logger.debug("Locking not supported")
  542. cls._lock_file_locked = True
  543. try:
  544. yield
  545. finally:
  546. with cls._lock:
  547. if mode == "r":
  548. cls._readers -= 1
  549. else:
  550. cls._writer = False
  551. if cls._readers == 0:
  552. if os.name == "nt":
  553. handle = msvcrt.get_osfhandle(cls._lock_file.fileno())
  554. overlapped = Overlapped()
  555. if not unlock_file_ex(handle, 0, 1, 0, overlapped):
  556. cls.logger.debug("Unlocking not supported")
  557. elif os.name == "posix":
  558. try:
  559. fcntl.lockf(cls._lock_file.fileno(), fcntl.LOCK_UN)
  560. except OSError:
  561. cls.logger.debug("Unlocking not supported")
  562. cls._lock_file_locked = False
  563. if cls._waiters:
  564. cls._waiters[0].notify()