storage.py 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303
  1. # This file is part of Radicale Server - Calendar Server
  2. # Copyright © 2014 Jean-Marc Martins
  3. # Copyright © 2012-2017 Guillaume Ayoub
  4. #
  5. # This library is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This library is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with Radicale. If not, see <http://www.gnu.org/licenses/>.
  17. """
  18. Storage backends.
  19. This module loads the storage backend, according to the storage configuration.
  20. Default storage uses one folder per collection and one file per collection
  21. entry.
  22. """
  23. import binascii
  24. import contextlib
  25. import json
  26. import os
  27. import pickle
  28. import posixpath
  29. import shlex
  30. import stat
  31. import subprocess
  32. import sys
  33. import threading
  34. import time
  35. from contextlib import contextmanager
  36. from hashlib import md5
  37. from importlib import import_module
  38. from itertools import chain, groupby
  39. from random import getrandbits
  40. from tempfile import NamedTemporaryFile, TemporaryDirectory
  41. import vobject
  42. if sys.version_info >= (3, 5):
  43. # HACK: Avoid import cycle for Python < 3.5
  44. from . import xmlutils
  45. if os.name == "nt":
  46. import ctypes
  47. import ctypes.wintypes
  48. import msvcrt
  49. LOCKFILE_EXCLUSIVE_LOCK = 2
  50. if ctypes.sizeof(ctypes.c_void_p) == 4:
  51. ULONG_PTR = ctypes.c_uint32
  52. else:
  53. ULONG_PTR = ctypes.c_uint64
  54. class Overlapped(ctypes.Structure):
  55. _fields_ = [
  56. ("internal", ULONG_PTR),
  57. ("internal_high", ULONG_PTR),
  58. ("offset", ctypes.wintypes.DWORD),
  59. ("offset_high", ctypes.wintypes.DWORD),
  60. ("h_event", ctypes.wintypes.HANDLE)]
  61. lock_file_ex = ctypes.windll.kernel32.LockFileEx
  62. lock_file_ex.argtypes = [
  63. ctypes.wintypes.HANDLE,
  64. ctypes.wintypes.DWORD,
  65. ctypes.wintypes.DWORD,
  66. ctypes.wintypes.DWORD,
  67. ctypes.wintypes.DWORD,
  68. ctypes.POINTER(Overlapped)]
  69. lock_file_ex.restype = ctypes.wintypes.BOOL
  70. unlock_file_ex = ctypes.windll.kernel32.UnlockFileEx
  71. unlock_file_ex.argtypes = [
  72. ctypes.wintypes.HANDLE,
  73. ctypes.wintypes.DWORD,
  74. ctypes.wintypes.DWORD,
  75. ctypes.wintypes.DWORD,
  76. ctypes.POINTER(Overlapped)]
  77. unlock_file_ex.restype = ctypes.wintypes.BOOL
  78. elif os.name == "posix":
  79. import fcntl
  80. def load(configuration, logger):
  81. """Load the storage manager chosen in configuration."""
  82. if sys.version_info < (3, 5):
  83. # HACK: Avoid import cycle for Python < 3.5
  84. global xmlutils
  85. from . import xmlutils
  86. storage_type = configuration.get("storage", "type")
  87. if storage_type == "multifilesystem":
  88. collection_class = Collection
  89. else:
  90. try:
  91. collection_class = import_module(storage_type).Collection
  92. except ImportError as e:
  93. raise RuntimeError("Storage module %r not found" %
  94. storage_type) from e
  95. logger.info("Storage type is %r", storage_type)
  96. class CollectionCopy(collection_class):
  97. """Collection copy, avoids overriding the original class attributes."""
  98. CollectionCopy.configuration = configuration
  99. CollectionCopy.logger = logger
  100. return CollectionCopy
  101. def scandir(path, only_dirs=False, only_files=False):
  102. """Iterator for directory elements. (For compatibility with Python < 3.5)
  103. ``only_dirs`` only return directories
  104. ``only_files`` only return files
  105. """
  106. if sys.version_info >= (3, 5):
  107. for entry in os.scandir(path):
  108. if ((not only_files or entry.is_file()) and
  109. (not only_dirs or entry.is_dir())):
  110. yield entry.name
  111. else:
  112. for name in os.listdir(path):
  113. p = os.path.join(path, name)
  114. if ((not only_files or os.path.isfile(p)) and
  115. (not only_dirs or os.path.isdir(p))):
  116. yield name
  117. def get_etag(text):
  118. """Etag from collection or item.
  119. Encoded as quoted-string (see RFC 2616).
  120. """
  121. etag = md5()
  122. etag.update(text.encode("utf-8"))
  123. return '"%s"' % etag.hexdigest()
  124. def get_uid(item):
  125. """UID value of an item if defined."""
  126. return hasattr(item, "uid") and item.uid.value
  127. def sanitize_path(path):
  128. """Make path absolute with leading slash to prevent access to other data.
  129. Preserve a potential trailing slash.
  130. """
  131. trailing_slash = "/" if path.endswith("/") else ""
  132. path = posixpath.normpath(path)
  133. new_path = "/"
  134. for part in path.split("/"):
  135. if not is_safe_path_component(part):
  136. continue
  137. new_path = posixpath.join(new_path, part)
  138. trailing_slash = "" if new_path.endswith("/") else trailing_slash
  139. return new_path + trailing_slash
  140. def is_safe_path_component(path):
  141. """Check if path is a single component of a path.
  142. Check that the path is safe to join too.
  143. """
  144. return path and "/" not in path and path not in (".", "..")
  145. def is_safe_filesystem_path_component(path):
  146. """Check if path is a single component of a local and posix filesystem
  147. path.
  148. Check that the path is safe to join too.
  149. """
  150. return (
  151. path and not os.path.splitdrive(path)[0] and
  152. not os.path.split(path)[0] and path not in (os.curdir, os.pardir) and
  153. not path.startswith(".") and not path.endswith("~") and
  154. is_safe_path_component(path))
  155. def path_to_filesystem(root, *paths):
  156. """Convert path to a local filesystem path relative to base_folder.
  157. `root` must be a secure filesystem path, it will be prepend to the path.
  158. Conversion of `paths` is done in a secure manner, or raises ``ValueError``.
  159. """
  160. paths = [sanitize_path(path).strip("/") for path in paths]
  161. safe_path = root
  162. for path in paths:
  163. if not path:
  164. continue
  165. for part in path.split("/"):
  166. if not is_safe_filesystem_path_component(part):
  167. raise UnsafePathError(part)
  168. safe_path_parent = safe_path
  169. safe_path = os.path.join(safe_path, part)
  170. # Check for conflicting files (e.g. case-insensitive file systems
  171. # or short names on Windows file systems)
  172. if (os.path.lexists(safe_path) and
  173. part not in scandir(safe_path_parent)):
  174. raise CollidingPathError(part)
  175. return safe_path
  176. class UnsafePathError(ValueError):
  177. def __init__(self, path):
  178. message = "Can't translate name safely to filesystem: %r" % path
  179. super().__init__(message)
  180. class CollidingPathError(ValueError):
  181. def __init__(self, path):
  182. message = "File name collision: %r" % path
  183. super().__init__(message)
  184. class ComponentExistsError(ValueError):
  185. def __init__(self, path):
  186. message = "Component already exists: %r" % path
  187. super().__init__(message)
  188. class ComponentNotFoundError(ValueError):
  189. def __init__(self, path):
  190. message = "Component doesn't exist: %r" % path
  191. super().__init__(message)
  192. class Item:
  193. def __init__(self, collection, item=None, href=None, last_modified=None,
  194. text=None, etag=None):
  195. """Initialize an item.
  196. ``collection`` the parent collection.
  197. ``href`` the href of the item.
  198. ``last_modified`` the HTTP-datetime of when the item was modified.
  199. ``text`` the text representation of the item (optional if ``item`` is
  200. set).
  201. ``item`` the vobject item (optional if ``text`` is set).
  202. ``etag`` the etag of the item (optional). See ``get_etag``.
  203. """
  204. if text is None and item is None:
  205. raise ValueError("at least one of 'text' or 'item' must be set")
  206. self.collection = collection
  207. self.href = href
  208. self.last_modified = last_modified
  209. self._text = text
  210. self._item = item
  211. self._etag = etag
  212. def __getattr__(self, attr):
  213. return getattr(self.item, attr)
  214. def serialize(self):
  215. if self._text is None:
  216. self._text = self.item.serialize()
  217. return self._text
  218. @property
  219. def item(self):
  220. if self._item is None:
  221. try:
  222. self._item = vobject.readOne(self._text)
  223. except Exception as e:
  224. raise RuntimeError("Failed to parse item %r in %r: %s" %
  225. (self.href, self.collection.path, e)) from e
  226. return self._item
  227. @property
  228. def etag(self):
  229. """Encoded as quoted-string (see RFC 2616)."""
  230. if self._etag is None:
  231. self._etag = get_etag(self.serialize())
  232. return self._etag
  233. class BaseCollection:
  234. # Overriden on copy by the "load" function
  235. configuration = None
  236. logger = None
  237. def __init__(self, path, principal=False):
  238. """Initialize the collection.
  239. ``path`` must be the normalized relative path of the collection, using
  240. the slash as the folder delimiter, with no leading nor trailing slash.
  241. """
  242. raise NotImplementedError
  243. @classmethod
  244. def discover(cls, path, depth="0"):
  245. """Discover a list of collections under the given ``path``.
  246. If ``depth`` is "0", only the actual object under ``path`` is
  247. returned.
  248. If ``depth`` is anything but "0", it is considered as "1" and direct
  249. children are included in the result.
  250. The ``path`` is relative.
  251. The root collection "/" must always exist.
  252. """
  253. raise NotImplementedError
  254. @classmethod
  255. def move(cls, item, to_collection, to_href):
  256. """Move an object.
  257. ``item`` is the item to move.
  258. ``to_collection`` is the target collection.
  259. ``to_href`` is the target name in ``to_collection``. An item with the
  260. same name might already exist.
  261. """
  262. if item.collection.path == to_collection.path and item.href == to_href:
  263. return
  264. to_collection.upload(to_href, item.item)
  265. item.collection.delete(item.href)
  266. @property
  267. def etag(self):
  268. """Encoded as quoted-string (see RFC 2616)."""
  269. return get_etag(self.serialize())
  270. @classmethod
  271. def create_collection(cls, href, collection=None, props=None):
  272. """Create a collection.
  273. If the collection already exists and neither ``collection`` nor
  274. ``props`` are set, this method shouldn't do anything. Otherwise the
  275. existing collection must be replaced.
  276. ``collection`` is a list of vobject components.
  277. ``props`` are metadata values for the collection.
  278. ``props["tag"]`` is the type of collection (VCALENDAR or
  279. VADDRESSBOOK). If the key ``tag`` is missing, it is guessed from the
  280. collection.
  281. """
  282. raise NotImplementedError
  283. def sync(self, old_token=None):
  284. """Get the current sync token and changed items for synchronization.
  285. ``old_token`` an old sync token which is used as the base of the
  286. delta update. If sync token is missing, all items are returned.
  287. ValueError is raised for invalid or old tokens.
  288. WARNING: This simple default implementation treats all sync-token as
  289. invalid. It adheres to the specification but some clients
  290. (e.g. InfCloud) don't like it. Subclasses should provide a
  291. more sophisticated implementation.
  292. """
  293. token = "http://radicale.org/ns/sync/%s" % self.etag.strip("\"")
  294. if old_token:
  295. raise ValueError("Sync token are not supported")
  296. return token, self.list()
  297. def list(self):
  298. """List collection items."""
  299. raise NotImplementedError
  300. def get(self, href):
  301. """Fetch a single item."""
  302. raise NotImplementedError
  303. def get_multi(self, hrefs):
  304. """Fetch multiple items. Duplicate hrefs must be ignored.
  305. DEPRECATED: use ``get_multi2`` instead
  306. """
  307. return (self.get(href) for href in set(hrefs))
  308. def get_multi2(self, hrefs):
  309. """Fetch multiple items.
  310. Functionally similar to ``get``, but might bring performance benefits
  311. on some storages when used cleverly. It's not required to return the
  312. requested items in the correct order. Duplicated hrefs can be ignored.
  313. Returns tuples with the href and the item or None if the item doesn't
  314. exist.
  315. """
  316. return ((href, self.get(href)) for href in hrefs)
  317. def get_all(self):
  318. """Fetch all items.
  319. Functionally similar to ``get``, but might bring performance benefits
  320. on some storages when used cleverly.
  321. """
  322. return map(self.get, self.list())
  323. def get_all_filtered(self, filters):
  324. """Fetch all items with optional filtering.
  325. This can largely improve performance of reports depending on
  326. the filters and this implementation.
  327. Returns tuples in the form ``(item, filters_matched)``.
  328. ``filters_matched`` is a bool that indicates if ``filters`` are fully
  329. matched.
  330. This returns all events by default
  331. """
  332. return ((item, False) for item in self.get_all())
  333. def pre_filtered_list(self, filters):
  334. """List collection items with optional pre filtering.
  335. DEPRECATED: use ``get_all_filtered`` instead
  336. """
  337. return self.get_all()
  338. def has(self, href):
  339. """Check if an item exists by its href.
  340. Functionally similar to ``get``, but might bring performance benefits
  341. on some storages when used cleverly.
  342. """
  343. return self.get(href) is not None
  344. def upload(self, href, vobject_item):
  345. """Upload a new or replace an existing item."""
  346. raise NotImplementedError
  347. def delete(self, href=None):
  348. """Delete an item.
  349. When ``href`` is ``None``, delete the collection.
  350. """
  351. raise NotImplementedError
  352. def get_meta(self, key):
  353. """Get metadata value for collection."""
  354. raise NotImplementedError
  355. def set_meta(self, props):
  356. """Set metadata values for collection."""
  357. raise NotImplementedError
  358. @property
  359. def last_modified(self):
  360. """Get the HTTP-datetime of when the collection was modified."""
  361. raise NotImplementedError
  362. def serialize(self):
  363. """Get the unicode string representing the whole collection."""
  364. raise NotImplementedError
  365. @classmethod
  366. @contextmanager
  367. def acquire_lock(cls, mode, user=None):
  368. """Set a context manager to lock the whole storage.
  369. ``mode`` must either be "r" for shared access or "w" for exclusive
  370. access.
  371. ``user`` is the name of the logged in user or empty.
  372. """
  373. raise NotImplementedError
  374. class Collection(BaseCollection):
  375. """Collection stored in several files per calendar."""
  376. def __init__(self, path, principal=False, folder=None):
  377. if not folder:
  378. folder = self._get_collection_root_folder()
  379. # Path should already be sanitized
  380. self.path = sanitize_path(path).strip("/")
  381. self.encoding = self.configuration.get("encoding", "stock")
  382. self._filesystem_path = path_to_filesystem(folder, self.path)
  383. self._props_path = os.path.join(
  384. self._filesystem_path, ".Radicale.props")
  385. split_path = self.path.split("/")
  386. self.owner = split_path[0] if len(split_path) > 1 else None
  387. self.is_principal = principal
  388. self._meta = None
  389. self._etag = None
  390. @classmethod
  391. def _get_collection_root_folder(cls):
  392. filesystem_folder = os.path.expanduser(
  393. cls.configuration.get("storage", "filesystem_folder"))
  394. return os.path.join(filesystem_folder, "collection-root")
  395. @contextmanager
  396. def _atomic_write(self, path, mode="w", newline=None):
  397. directory = os.path.dirname(path)
  398. tmp = NamedTemporaryFile(
  399. mode=mode, dir=directory, delete=False, prefix=".Radicale.tmp-",
  400. newline=newline, encoding=None if "b" in mode else self.encoding)
  401. try:
  402. yield tmp
  403. self._fsync(tmp.fileno())
  404. tmp.close()
  405. os.replace(tmp.name, path)
  406. except:
  407. tmp.close()
  408. os.remove(tmp.name)
  409. raise
  410. self._sync_directory(directory)
  411. @staticmethod
  412. def _find_available_file_name(exists_fn, suffix=""):
  413. # Prevent infinite loop
  414. for _ in range(1000):
  415. file_name = "%016x" % getrandbits(64) + suffix
  416. if not exists_fn(file_name):
  417. return file_name
  418. # something is wrong with the PRNG
  419. raise RuntimeError("No unique random sequence found")
  420. @classmethod
  421. def _fsync(cls, fd):
  422. if cls.configuration.getboolean("storage", "filesystem_fsync"):
  423. if os.name == "posix" and hasattr(fcntl, "F_FULLFSYNC"):
  424. fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
  425. else:
  426. os.fsync(fd)
  427. @classmethod
  428. def _sync_directory(cls, path):
  429. """Sync directory to disk.
  430. This only works on POSIX and does nothing on other systems.
  431. """
  432. if not cls.configuration.getboolean("storage", "filesystem_fsync"):
  433. return
  434. if os.name == "posix":
  435. fd = os.open(path, 0)
  436. try:
  437. cls._fsync(fd)
  438. finally:
  439. os.close(fd)
  440. @classmethod
  441. def _makedirs_synced(cls, filesystem_path):
  442. """Recursively create a directory and its parents in a sync'ed way.
  443. This method acts silently when the folder already exists.
  444. """
  445. if os.path.isdir(filesystem_path):
  446. return
  447. parent_filesystem_path = os.path.dirname(filesystem_path)
  448. # Prevent infinite loop
  449. if filesystem_path != parent_filesystem_path:
  450. # Create parent dirs recursively
  451. cls._makedirs_synced(parent_filesystem_path)
  452. # Possible race!
  453. os.makedirs(filesystem_path, exist_ok=True)
  454. cls._sync_directory(parent_filesystem_path)
  455. @classmethod
  456. def discover(cls, path, depth="0"):
  457. # Path should already be sanitized
  458. sane_path = sanitize_path(path).strip("/")
  459. attributes = sane_path.split("/")
  460. if not attributes[0]:
  461. attributes.pop()
  462. folder = cls._get_collection_root_folder()
  463. # Create the root collection
  464. cls._makedirs_synced(folder)
  465. try:
  466. filesystem_path = path_to_filesystem(folder, sane_path)
  467. except ValueError as e:
  468. # Path is unsafe
  469. cls.logger.debug("Collection with unsafe path %r requested: %s",
  470. sane_path, e, exc_info=True)
  471. return
  472. # Check if the path exists and if it leads to a collection or an item
  473. if not os.path.isdir(filesystem_path):
  474. if attributes and os.path.isfile(filesystem_path):
  475. href = attributes.pop()
  476. else:
  477. return
  478. else:
  479. href = None
  480. path = "/".join(attributes)
  481. principal = len(attributes) == 1
  482. collection = cls(path, principal)
  483. if href:
  484. yield collection.get(href)
  485. return
  486. yield collection
  487. if depth == "0":
  488. return
  489. for item in collection.list():
  490. yield collection.get(item)
  491. for href in scandir(filesystem_path, only_dirs=True):
  492. if not is_safe_filesystem_path_component(href):
  493. if not href.startswith(".Radicale"):
  494. cls.logger.debug("Skipping collection %r in %r", href,
  495. path)
  496. continue
  497. child_path = posixpath.join(path, href)
  498. child_principal = len(attributes) == 0
  499. yield cls(child_path, child_principal)
  500. @classmethod
  501. def create_collection(cls, href, collection=None, props=None):
  502. folder = cls._get_collection_root_folder()
  503. # Path should already be sanitized
  504. sane_path = sanitize_path(href).strip("/")
  505. attributes = sane_path.split("/")
  506. if not attributes[0]:
  507. attributes.pop()
  508. principal = len(attributes) == 1
  509. filesystem_path = path_to_filesystem(folder, sane_path)
  510. if not props:
  511. props = {}
  512. if not props.get("tag") and collection:
  513. props["tag"] = collection[0].name
  514. if not props:
  515. cls._makedirs_synced(filesystem_path)
  516. return cls(sane_path, principal=principal)
  517. parent_dir = os.path.dirname(filesystem_path)
  518. cls._makedirs_synced(parent_dir)
  519. # Create a temporary directory with an unsafe name
  520. with TemporaryDirectory(
  521. prefix=".Radicale.tmp-", dir=parent_dir) as tmp_dir:
  522. # The temporary directory itself can't be renamed
  523. tmp_filesystem_path = os.path.join(tmp_dir, "collection")
  524. os.makedirs(tmp_filesystem_path)
  525. self = cls("/", principal=principal, folder=tmp_filesystem_path)
  526. self.set_meta(props)
  527. if collection:
  528. if props.get("tag") == "VCALENDAR":
  529. collection, = collection
  530. items = []
  531. for content in ("vevent", "vtodo", "vjournal"):
  532. items.extend(
  533. getattr(collection, "%s_list" % content, []))
  534. items_by_uid = groupby(sorted(items, key=get_uid), get_uid)
  535. vobject_items = {}
  536. for uid, items in items_by_uid:
  537. new_collection = vobject.iCalendar()
  538. for item in items:
  539. new_collection.add(item)
  540. # href must comply to is_safe_filesystem_path_component
  541. # and no file name collisions must exist between hrefs
  542. href = self._find_available_file_name(
  543. vobject_items.get, suffix=".ics")
  544. vobject_items[href] = new_collection
  545. self.upload_all_nonatomic(vobject_items)
  546. elif props.get("tag") == "VCARD":
  547. vobject_items = {}
  548. for card in collection:
  549. # href must comply to is_safe_filesystem_path_component
  550. # and no file name collisions must exist between hrefs
  551. href = self._find_available_file_name(
  552. vobject_items.get, suffix=".vcf")
  553. vobject_items[href] = card
  554. self.upload_all_nonatomic(vobject_items)
  555. # This operation is not atomic on the filesystem level but it's
  556. # very unlikely that one rename operations succeeds while the
  557. # other fails or that only one gets written to disk.
  558. if os.path.exists(filesystem_path):
  559. os.rename(filesystem_path, os.path.join(tmp_dir, "delete"))
  560. os.rename(tmp_filesystem_path, filesystem_path)
  561. cls._sync_directory(parent_dir)
  562. return cls(sane_path, principal=principal)
  563. def upload_all_nonatomic(self, vobject_items):
  564. """Upload a new set of items.
  565. This takes a mapping of href and vobject items and
  566. uploads them nonatomic and without existence checks.
  567. """
  568. with contextlib.ExitStack() as stack:
  569. fs = []
  570. for href, item in vobject_items.items():
  571. if not is_safe_filesystem_path_component(href):
  572. raise UnsafePathError(href)
  573. path = path_to_filesystem(self._filesystem_path, href)
  574. fs.append(stack.enter_context(
  575. open(path, "w", encoding=self.encoding, newline="")))
  576. fs[-1].write(item.serialize())
  577. # sync everything at once because it's slightly faster.
  578. for f in fs:
  579. self._fsync(f.fileno())
  580. self._sync_directory(self._filesystem_path)
  581. @classmethod
  582. def move(cls, item, to_collection, to_href):
  583. if not is_safe_filesystem_path_component(to_href):
  584. raise UnsafePathError(to_href)
  585. os.replace(
  586. path_to_filesystem(item.collection._filesystem_path, item.href),
  587. path_to_filesystem(to_collection._filesystem_path, to_href))
  588. cls._sync_directory(to_collection._filesystem_path)
  589. if item.collection._filesystem_path != to_collection._filesystem_path:
  590. cls._sync_directory(item.collection._filesystem_path)
  591. # Track the change
  592. to_collection._update_history_etag(to_href, item)
  593. item.collection._update_history_etag(item.href, None)
  594. to_collection._clean_history_cache()
  595. if item.collection._filesystem_path != to_collection._filesystem_path:
  596. item.collection._clean_history_cache()
  597. @classmethod
  598. def _clean_cache(cls, folder, names, max_age=None):
  599. """Delete all ``names`` in ``folder`` that are older than ``max_age``.
  600. """
  601. age_limit = time.time() - max_age if max_age is not None else None
  602. modified = False
  603. for name in names:
  604. if not is_safe_filesystem_path_component(name):
  605. continue
  606. if age_limit is not None:
  607. try:
  608. # Race: Another process might have deleted the file.
  609. mtime = os.path.getmtime(os.path.join(folder, name))
  610. except FileNotFoundError:
  611. continue
  612. if mtime > age_limit:
  613. continue
  614. cls.logger.debug("Found expired item in cache: %r", name)
  615. # Race: Another process might have deleted or locked the
  616. # file.
  617. try:
  618. os.remove(os.path.join(folder, name))
  619. except (FileNotFoundError, PermissionError):
  620. continue
  621. modified = True
  622. if modified:
  623. cls._sync_directory(folder)
  624. def _update_history_etag(self, href, item):
  625. """Updates and retrieves the history etag from the history cache.
  626. The history cache contains a file for each current and deleted item
  627. of the collection. These files contain the etag of the item (empty
  628. string for deleted items) and a history etag, which is a hash over
  629. the previous history etag and the etag separated by "/".
  630. """
  631. history_folder = os.path.join(self._filesystem_path,
  632. ".Radicale.cache", "history")
  633. try:
  634. with open(os.path.join(history_folder, href), "rb") as f:
  635. cache_etag, history_etag = pickle.load(f)
  636. except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
  637. if isinstance(e, (pickle.UnpicklingError, ValueError)):
  638. self.logger.warning(
  639. "Failed to load history cache entry %r in %r: %s",
  640. href, self.path, e, exc_info=True)
  641. cache_etag = ""
  642. # Initialize with random data to prevent collisions with cleaned
  643. # expired items.
  644. history_etag = binascii.hexlify(os.urandom(16)).decode("ascii")
  645. etag = item.etag if item else ""
  646. if etag != cache_etag:
  647. self._makedirs_synced(history_folder)
  648. history_etag = get_etag(history_etag + "/" + etag).strip("\"")
  649. try:
  650. # Race: Other processes might have created and locked the file.
  651. with self._atomic_write(os.path.join(history_folder, href),
  652. "wb") as f:
  653. pickle.dump([etag, history_etag], f)
  654. except PermissionError:
  655. pass
  656. return history_etag
  657. def _get_deleted_history_hrefs(self):
  658. """Returns the hrefs of all deleted items that are still in the
  659. history cache."""
  660. history_folder = os.path.join(self._filesystem_path,
  661. ".Radicale.cache", "history")
  662. try:
  663. for href in scandir(history_folder):
  664. if not is_safe_filesystem_path_component(href):
  665. continue
  666. if os.path.isfile(os.path.join(self._filesystem_path, href)):
  667. continue
  668. yield href
  669. except FileNotFoundError:
  670. pass
  671. def _clean_history_cache(self):
  672. # Delete all expired cache entries of deleted items.
  673. history_folder = os.path.join(self._filesystem_path,
  674. ".Radicale.cache", "history")
  675. self._clean_cache(history_folder, self._get_deleted_history_hrefs(),
  676. max_age=self.configuration.getint(
  677. "storage", "max_sync_token_age"))
  678. def sync(self, old_token=None):
  679. # The sync token has the form http://radicale.org/ns/sync/TOKEN_NAME
  680. # where TOKEN_NAME is the md5 hash of all history etags of present and
  681. # past items of the collection.
  682. def check_token_name(token_name):
  683. if len(token_name) != 32:
  684. return False
  685. for c in token_name:
  686. if c not in "0123456789abcdef":
  687. return False
  688. return True
  689. old_token_name = None
  690. if old_token:
  691. # Extract the token name from the sync token
  692. if not old_token.startswith("http://radicale.org/ns/sync/"):
  693. raise ValueError("Malformed token: %r" % old_token)
  694. old_token_name = old_token[len("http://radicale.org/ns/sync/"):]
  695. if not check_token_name(old_token_name):
  696. raise ValueError("Malformed token: %r" % old_token)
  697. # Get the current state and sync-token of the collection.
  698. state = {}
  699. token_name_hash = md5()
  700. # Find the history of all existing and deleted items
  701. for href, item in chain(
  702. ((item.href, item) for item in self.get_all()),
  703. ((href, None) for href in self._get_deleted_history_hrefs())):
  704. history_etag = self._update_history_etag(href, item)
  705. state[href] = history_etag
  706. token_name_hash.update((href + "/" + history_etag).encode("utf-8"))
  707. token_name = token_name_hash.hexdigest()
  708. token = "http://radicale.org/ns/sync/%s" % token_name
  709. if token_name == old_token_name:
  710. # Nothing changed
  711. return token, ()
  712. token_folder = os.path.join(self._filesystem_path,
  713. ".Radicale.cache", "sync-token")
  714. token_path = os.path.join(token_folder, token_name)
  715. old_state = {}
  716. if old_token_name:
  717. # load the old token state
  718. old_token_path = os.path.join(token_folder, old_token_name)
  719. try:
  720. # Race: Another process might have deleted the file.
  721. with open(old_token_path, "rb") as f:
  722. old_state = pickle.load(f)
  723. except (FileNotFoundError, pickle.UnpicklingError,
  724. ValueError) as e:
  725. if isinstance(e, (pickle.UnpicklingError, ValueError)):
  726. self.logger.warning(
  727. "Failed to load stored sync token %r in %r: %s",
  728. old_token_name, self.path, e, exc_info=True)
  729. # Delete the damaged file
  730. try:
  731. os.remove(old_token_path)
  732. except (FileNotFoundError, PermissionError):
  733. pass
  734. raise ValueError("Token not found: %r" % old_token)
  735. # write the new token state or update the modification time of
  736. # existing token state
  737. if not os.path.exists(token_path):
  738. self._makedirs_synced(token_folder)
  739. try:
  740. # Race: Other processes might have created and locked the file.
  741. with self._atomic_write(token_path, "wb") as f:
  742. pickle.dump(state, f)
  743. except PermissionError:
  744. pass
  745. else:
  746. # clean up old sync tokens and item cache
  747. self._clean_cache(token_folder, os.listdir(token_folder),
  748. max_age=self.configuration.getint(
  749. "storage", "max_sync_token_age"))
  750. self._clean_history_cache()
  751. else:
  752. # Try to update the modification time
  753. try:
  754. # Race: Another process might have deleted the file.
  755. os.utime(token_path)
  756. except FileNotFoundError:
  757. pass
  758. changes = []
  759. # Find all new, changed and deleted (that are still in the item cache)
  760. # items
  761. for href, history_etag in state.items():
  762. if history_etag != old_state.get(href):
  763. changes.append(href)
  764. # Find all deleted items that are no longer in the item cache
  765. for href, history_etag in old_state.items():
  766. if href not in state:
  767. changes.append(href)
  768. return token, changes
  769. def list(self):
  770. for href in scandir(self._filesystem_path, only_files=True):
  771. if not is_safe_filesystem_path_component(href):
  772. if not href.startswith(".Radicale"):
  773. self.logger.debug(
  774. "Skipping item %r in %r", href, self.path)
  775. continue
  776. yield href
  777. _item_cache_cleaned = False
  778. def get(self, href, verify_href=True):
  779. item, metadata = self._get_with_metadata(href, verify_href=verify_href)
  780. return item
  781. def _get_with_metadata(self, href, verify_href=True):
  782. # Like ``get`` but additonally returns the following metadata:
  783. # tag, start, end: see ``xmlutils.find_tag_and_time_range``
  784. if verify_href:
  785. try:
  786. if not is_safe_filesystem_path_component(href):
  787. raise UnsafePathError(href)
  788. path = path_to_filesystem(self._filesystem_path, href)
  789. except ValueError as e:
  790. self.logger.debug(
  791. "Can't translate name %r safely to filesystem in %r: %s",
  792. href, self.path, e, exc_info=True)
  793. return None, None
  794. else:
  795. path = os.path.join(self._filesystem_path, href)
  796. try:
  797. with open(path, "rb") as f:
  798. btext = f.read()
  799. except (FileNotFoundError, IsADirectoryError):
  800. return None, None
  801. # The hash of the component in the file system. This is used to check,
  802. # if the entry in the cache is still valid.
  803. input_hash = md5()
  804. input_hash.update(btext)
  805. input_hash = input_hash.hexdigest()
  806. cache_folder = os.path.join(self._filesystem_path, ".Radicale.cache",
  807. "item")
  808. try:
  809. with open(os.path.join(cache_folder, href), "rb") as f:
  810. cinput_hash, cetag, ctext, ctag, cstart, cend = pickle.load(f)
  811. except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
  812. if isinstance(e, (pickle.UnpicklingError, ValueError)):
  813. self.logger.warning(
  814. "Failed to load item cache entry %r in %r: %s",
  815. href, self.path, e, exc_info=True)
  816. cinput_hash = cetag = ctext = ctag = cstart = cend = None
  817. vobject_item = None
  818. if input_hash != cinput_hash:
  819. vobject_item = Item(self, href=href,
  820. text=btext.decode(self.encoding)).item
  821. # Serialize the object again, to normalize the text representation.
  822. # The storage may have been edited externally.
  823. ctext = vobject_item.serialize()
  824. cetag = get_etag(ctext)
  825. try:
  826. ctag, cstart, cend = xmlutils.find_tag_and_time_range(
  827. vobject_item)
  828. except Exception as e:
  829. raise RuntimeError("Failed to find tag and time range of item "
  830. "%r from %r: %s" % (href, self.path,
  831. e)) from e
  832. self._makedirs_synced(cache_folder)
  833. try:
  834. # Race: Other processes might have created and locked the
  835. # file.
  836. with self._atomic_write(os.path.join(cache_folder, href),
  837. "wb") as f:
  838. pickle.dump((input_hash, cetag, ctext,
  839. ctag, cstart, cend), f)
  840. except PermissionError:
  841. pass
  842. # Clean cache entries (max once per request)
  843. # This happens once after new uploads, or if the data in the
  844. # file system was edited externally.
  845. if not self._item_cache_cleaned:
  846. self._item_cache_cleaned = True
  847. self._clean_cache(cache_folder, (
  848. href for href in scandir(cache_folder) if not
  849. os.path.isfile(os.path.join(self._filesystem_path, href))))
  850. last_modified = time.strftime(
  851. "%a, %d %b %Y %H:%M:%S GMT",
  852. time.gmtime(os.path.getmtime(path)))
  853. return Item(self, href=href, last_modified=last_modified, etag=cetag,
  854. text=ctext, item=vobject_item), (ctag, cstart, cend)
  855. def get_multi2(self, hrefs):
  856. # It's faster to check for file name collissions here, because
  857. # we only need to call os.listdir once.
  858. files = None
  859. for href in hrefs:
  860. if files is None:
  861. # List dir after hrefs returned one item, the iterator may be
  862. # empty and the for-loop is never executed.
  863. files = os.listdir(self._filesystem_path)
  864. path = os.path.join(self._filesystem_path, href)
  865. if (not is_safe_filesystem_path_component(href) or
  866. href not in files and os.path.lexists(path)):
  867. self.logger.debug(
  868. "Can't translate name safely to filesystem: %r", href)
  869. yield (href, None)
  870. else:
  871. yield (href, self.get(href, verify_href=False))
  872. def get_all(self):
  873. # We don't need to check for collissions, because the the file names
  874. # are from os.listdir.
  875. return (self.get(href, verify_href=False) for href in self.list())
  876. def get_all_filtered(self, filters):
  877. tag, start, end, simple = xmlutils.simplify_prefilters(filters)
  878. if not tag:
  879. # no filter
  880. yield from ((item, simple) for item in self.get_all())
  881. return
  882. for item, (itag, istart, iend) in (
  883. self._get_with_metadata(href, verify_href=False)
  884. for href in self.list()):
  885. if tag == itag and istart < end and iend > start:
  886. yield item, simple and (start <= istart or iend <= end)
  887. def upload(self, href, vobject_item):
  888. if not is_safe_filesystem_path_component(href):
  889. raise UnsafePathError(href)
  890. path = path_to_filesystem(self._filesystem_path, href)
  891. item = Item(self, href=href, item=vobject_item)
  892. with self._atomic_write(path, newline="") as fd:
  893. fd.write(item.serialize())
  894. # Track the change
  895. self._update_history_etag(href, item)
  896. self._clean_history_cache()
  897. return item
  898. def delete(self, href=None):
  899. if href is None:
  900. # Delete the collection
  901. parent_dir = os.path.dirname(self._filesystem_path)
  902. try:
  903. os.rmdir(self._filesystem_path)
  904. except OSError:
  905. with TemporaryDirectory(
  906. prefix=".Radicale.tmp-", dir=parent_dir) as tmp:
  907. os.rename(self._filesystem_path, os.path.join(
  908. tmp, os.path.basename(self._filesystem_path)))
  909. self._sync_directory(parent_dir)
  910. else:
  911. self._sync_directory(parent_dir)
  912. else:
  913. # Delete an item
  914. if not is_safe_filesystem_path_component(href):
  915. raise UnsafePathError(href)
  916. path = path_to_filesystem(self._filesystem_path, href)
  917. if not os.path.isfile(path):
  918. raise ComponentNotFoundError(href)
  919. os.remove(path)
  920. self._sync_directory(os.path.dirname(path))
  921. # Track the change
  922. self._update_history_etag(href, None)
  923. self._clean_history_cache()
  924. def get_meta(self, key=None):
  925. # reuse cached value if the storage is read-only
  926. if self._writer or self._meta is None:
  927. try:
  928. with open(self._props_path, encoding=self.encoding) as f:
  929. self._meta = json.load(f)
  930. except FileNotFoundError:
  931. self._meta = {}
  932. except ValueError as e:
  933. raise RuntimeError("Failed to load properties of collect"
  934. "ion %r: %s" % (self.path, e)) from e
  935. return self._meta.get(key) if key else self._meta
  936. def set_meta(self, props):
  937. new_props = self.get_meta()
  938. new_props.update(props)
  939. for key in tuple(new_props.keys()):
  940. if not new_props[key]:
  941. del new_props[key]
  942. with self._atomic_write(self._props_path, "w") as f:
  943. json.dump(new_props, f)
  944. @property
  945. def last_modified(self):
  946. relevant_files = chain(
  947. (self._filesystem_path,),
  948. (self._props_path,) if os.path.exists(self._props_path) else (),
  949. (os.path.join(self._filesystem_path, h) for h in self.list()))
  950. last = max(map(os.path.getmtime, relevant_files))
  951. return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))
  952. def serialize(self):
  953. # serialize collection
  954. if self.get_meta("tag") == "VCALENDAR":
  955. in_vcalendar = False
  956. vtimezones = ""
  957. included_tzids = set()
  958. vtimezone = []
  959. tzid = None
  960. components = ""
  961. # Concatenate all child elements of VCALENDAR from all items
  962. # together, while preventing duplicated VTIMEZONE entries.
  963. # VTIMEZONEs are only distinguished by their TZID, if different
  964. # timezones share the same TZID this produces errornous ouput.
  965. # VObject fails at this too.
  966. for item in self.get_all():
  967. depth = 0
  968. for line in item.serialize().split("\r\n"):
  969. if line.startswith("BEGIN:"):
  970. depth += 1
  971. if depth == 1 and line == "BEGIN:VCALENDAR":
  972. in_vcalendar = True
  973. elif in_vcalendar:
  974. if depth == 1 and line.startswith("END:"):
  975. in_vcalendar = False
  976. if depth == 2 and line == "BEGIN:VTIMEZONE":
  977. vtimezone.append(line)
  978. elif vtimezone:
  979. vtimezone.append(line)
  980. if depth == 2 and line.startswith("TZID:"):
  981. tzid = line[len("TZID:"):]
  982. elif depth == 2 and line.startswith("END:"):
  983. if tzid is None or tzid not in included_tzids:
  984. if vtimezones:
  985. vtimezones += "\r\n"
  986. vtimezones += "\r\n".join(vtimezone)
  987. included_tzids.add(tzid)
  988. vtimezone.clear()
  989. tzid = None
  990. elif depth >= 2:
  991. if components:
  992. components += "\r\n"
  993. components += line
  994. if line.startswith("END:"):
  995. depth -= 1
  996. return "\r\n".join(filter(bool, (
  997. "BEGIN:VCALENDAR",
  998. "VERSION:2.0",
  999. "PRODID:-//PYVOBJECT//NONSGML Version 1//EN",
  1000. vtimezones,
  1001. components,
  1002. "END:VCALENDAR")))
  1003. elif self.get_meta("tag") == "VADDRESSBOOK":
  1004. return "".join((item.serialize() for item in self.get_all()))
  1005. return ""
  1006. @property
  1007. def etag(self):
  1008. # reuse cached value if the storage is read-only
  1009. if self._writer or self._etag is None:
  1010. etag = md5()
  1011. for item in self.get_all():
  1012. etag.update((item.href + "/" + item.etag).encode("utf-8"))
  1013. self._etag = '"%s"' % etag.hexdigest()
  1014. return self._etag
  1015. _lock = threading.Lock()
  1016. _waiters = []
  1017. _lock_file = None
  1018. _lock_file_locked = False
  1019. _readers = 0
  1020. _writer = False
  1021. @classmethod
  1022. @contextmanager
  1023. def acquire_lock(cls, mode, user=None):
  1024. def condition():
  1025. if mode == "r":
  1026. return not cls._writer
  1027. else:
  1028. return not cls._writer and cls._readers == 0
  1029. file_locking = cls.configuration.getboolean("storage",
  1030. "filesystem_locking")
  1031. folder = os.path.expanduser(cls.configuration.get(
  1032. "storage", "filesystem_folder"))
  1033. # Use a primitive lock which only works within one process as a
  1034. # precondition for inter-process file-based locking
  1035. with cls._lock:
  1036. if cls._waiters or not condition():
  1037. # Use FIFO for access requests
  1038. waiter = threading.Condition(lock=cls._lock)
  1039. cls._waiters.append(waiter)
  1040. while True:
  1041. waiter.wait()
  1042. if condition():
  1043. break
  1044. cls._waiters.pop(0)
  1045. if mode == "r":
  1046. cls._readers += 1
  1047. # Notify additional potential readers
  1048. if cls._waiters:
  1049. cls._waiters[0].notify()
  1050. else:
  1051. cls._writer = True
  1052. if not cls._lock_file:
  1053. cls._makedirs_synced(folder)
  1054. lock_path = os.path.join(folder, ".Radicale.lock")
  1055. cls._lock_file = open(lock_path, "w+")
  1056. # Set access rights to a necessary minimum to prevent locking
  1057. # by arbitrary users
  1058. try:
  1059. os.chmod(lock_path, stat.S_IWUSR | stat.S_IRUSR)
  1060. except OSError as e:
  1061. cls.logger.info("Failed to set permissions on lock file:"
  1062. " %s", e, exc_info=True)
  1063. if file_locking and not cls._lock_file_locked:
  1064. if os.name == "nt":
  1065. handle = msvcrt.get_osfhandle(cls._lock_file.fileno())
  1066. flags = LOCKFILE_EXCLUSIVE_LOCK if mode == "w" else 0
  1067. overlapped = Overlapped()
  1068. if not lock_file_ex(handle, flags, 0, 1, 0, overlapped):
  1069. raise RuntimeError("Locking the storage failed: %s" %
  1070. ctypes.FormatError())
  1071. elif os.name == "posix":
  1072. _cmd = fcntl.LOCK_EX if mode == "w" else fcntl.LOCK_SH
  1073. try:
  1074. fcntl.flock(cls._lock_file.fileno(), _cmd)
  1075. except OSError as e:
  1076. raise RuntimeError("Locking the storage failed: %s" %
  1077. e) from e
  1078. else:
  1079. raise RuntimeError("Locking the storage failed: "
  1080. "Unsupported operating system")
  1081. cls._lock_file_locked = True
  1082. try:
  1083. yield
  1084. # execute hook
  1085. hook = cls.configuration.get("storage", "hook")
  1086. if mode == "w" and hook:
  1087. cls.logger.debug("Running hook")
  1088. subprocess.check_call(
  1089. hook % {"user": shlex.quote(user or "Anonymous")},
  1090. shell=True, cwd=folder)
  1091. finally:
  1092. with cls._lock:
  1093. if mode == "r":
  1094. cls._readers -= 1
  1095. else:
  1096. cls._writer = False
  1097. if file_locking and cls._readers == 0:
  1098. if os.name == "nt":
  1099. handle = msvcrt.get_osfhandle(cls._lock_file.fileno())
  1100. overlapped = Overlapped()
  1101. if not unlock_file_ex(handle, 0, 1, 0, overlapped):
  1102. raise RuntimeError("Unlocking the storage failed: "
  1103. "%s" % ctypes.FormatError())
  1104. elif os.name == "posix":
  1105. try:
  1106. fcntl.flock(cls._lock_file.fileno(), fcntl.LOCK_UN)
  1107. except OSError as e:
  1108. raise RuntimeError("Unlocking the storage failed: "
  1109. "%s" % e) from e
  1110. else:
  1111. raise RuntimeError("Unlocking the storage failed: "
  1112. "Unsupported operating system")
  1113. cls._lock_file_locked = False
  1114. if cls._waiters:
  1115. cls._waiters[0].notify()
  1116. if (cls.configuration.getboolean(
  1117. "storage", "filesystem_close_lock_file") and
  1118. cls._readers == 0 and not cls._waiters):
  1119. cls._lock_file.close()
  1120. cls._lock_file = None