filter.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. # This file is part of Radicale - CalDAV and CardDAV server
  2. # Copyright © 2008 Nicolas Kandel
  3. # Copyright © 2008 Pascal Halter
  4. # Copyright © 2008-2015 Guillaume Ayoub
  5. # Copyright © 2017-2021 Unrud <unrud@outlook.com>
  6. # Copyright © 2023-2024 Ray <ray@react0r.com>
  7. # Copyright © 2024-2025 Peter Bieringer <pb@bieringer.de>
  8. #
  9. # This library is free software: you can redistribute it and/or modify
  10. # it under the terms of the GNU General Public License as published by
  11. # the Free Software Foundation, either version 3 of the License, or
  12. # (at your option) any later version.
  13. #
  14. # This library is distributed in the hope that it will be useful,
  15. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. # GNU General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU General Public License
  20. # along with Radicale. If not, see <http://www.gnu.org/licenses/>.
  21. import math
  22. import xml.etree.ElementTree as ET
  23. from datetime import date, datetime, timedelta, timezone
  24. from itertools import chain
  25. from typing import (Callable, Iterable, Iterator, List, Optional, Sequence,
  26. Tuple)
  27. import vobject
  28. from radicale import item, xmlutils
  29. from radicale.log import logger
  30. DAY: timedelta = timedelta(days=1)
  31. SECOND: timedelta = timedelta(seconds=1)
  32. DATETIME_MIN: datetime = datetime.min.replace(tzinfo=timezone.utc)
  33. DATETIME_MAX: datetime = datetime.max.replace(tzinfo=timezone.utc)
  34. TIMESTAMP_MIN: int = math.floor(DATETIME_MIN.timestamp())
  35. TIMESTAMP_MAX: int = math.ceil(DATETIME_MAX.timestamp())
  36. def date_to_datetime(d: date) -> datetime:
  37. """Transform any date to a UTC datetime.
  38. If ``d`` is a datetime without timezone, return as UTC datetime. If ``d``
  39. is already a datetime with timezone, return as is.
  40. """
  41. if not isinstance(d, datetime):
  42. d = datetime.combine(d, datetime.min.time())
  43. if not d.tzinfo:
  44. # NOTE: using vobject's UTC as it wasn't playing well with datetime's.
  45. d = d.replace(tzinfo=vobject.icalendar.utc)
  46. return d
  47. def parse_time_range(time_filter: ET.Element) -> Tuple[datetime, datetime]:
  48. start_text = time_filter.get("start")
  49. end_text = time_filter.get("end")
  50. if start_text:
  51. start = datetime.strptime(
  52. start_text, "%Y%m%dT%H%M%SZ").replace(
  53. tzinfo=timezone.utc)
  54. else:
  55. start = DATETIME_MIN
  56. if end_text:
  57. end = datetime.strptime(
  58. end_text, "%Y%m%dT%H%M%SZ").replace(
  59. tzinfo=timezone.utc)
  60. else:
  61. end = DATETIME_MAX
  62. return start, end
  63. def time_range_timestamps(time_filter: ET.Element) -> Tuple[int, int]:
  64. start, end = parse_time_range(time_filter)
  65. return (math.floor(start.timestamp()), math.ceil(end.timestamp()))
  66. def comp_match(item: "item.Item", filter_: ET.Element, level: int = 0) -> bool:
  67. """Check whether the ``item`` matches the comp ``filter_``.
  68. If ``level`` is ``0``, the filter is applied on the
  69. item's collection. Otherwise, it's applied on the item.
  70. See rfc4791-9.7.1.
  71. """
  72. # TODO: Improve filtering for VALARM and VFREEBUSY
  73. # so far only filtering based on existence of such component is implemented
  74. # HACK: the filters are tested separately against all components
  75. name = filter_.get("name", "").upper()
  76. if level == 0:
  77. tag = item.name
  78. elif level == 1:
  79. tag = item.component_name
  80. elif level == 2:
  81. tag = item.component_name
  82. else:
  83. logger.warning(
  84. "Filters with %d levels of comp-filter are not supported", level)
  85. return True
  86. if not tag:
  87. return False
  88. if len(filter_) == 0:
  89. # Point #1 of rfc4791-9.7.1
  90. return name == tag
  91. if len(filter_) == 1:
  92. if filter_[0].tag == xmlutils.make_clark("C:is-not-defined"):
  93. # Point #2 of rfc4791-9.7.1
  94. return name != tag
  95. if (level < 2) and (name != tag):
  96. return False
  97. if ((level == 0 and name != "VCALENDAR") or
  98. (level == 1 and name not in ("VTODO", "VEVENT", "VJOURNAL")) or
  99. (level == 2 and name not in ("VALARM", "VFREEBUSY"))):
  100. logger.warning("Filtering %s is not supported", name)
  101. return True
  102. # Point #3 and #4 of rfc4791-9.7.1
  103. if level == 0:
  104. components = [item.vobject_item]
  105. elif level == 1:
  106. components = list(getattr(item.vobject_item, "%s_list" % tag.lower()))
  107. elif level == 2:
  108. components = list(getattr(item.vobject_item, "%s_list" % tag.lower()))
  109. for comp in components:
  110. if not hasattr(comp, name.lower()):
  111. return False
  112. for child in filter_:
  113. if child.tag == xmlutils.make_clark("C:prop-filter"):
  114. if not any(prop_match(comp, child, "C")
  115. for comp in components):
  116. return False
  117. elif child.tag == xmlutils.make_clark("C:time-range"):
  118. if not time_range_match(item.vobject_item, filter_[0], tag):
  119. return False
  120. elif child.tag == xmlutils.make_clark("C:comp-filter"):
  121. if not comp_match(item, child, level=level + 1):
  122. return False
  123. else:
  124. raise ValueError("Unexpected %r in comp-filter" % child.tag)
  125. return True
  126. def prop_match(vobject_item: vobject.base.Component,
  127. filter_: ET.Element, ns: str) -> bool:
  128. """Check whether the ``item`` matches the prop ``filter_``.
  129. See rfc4791-9.7.2 and rfc6352-10.5.1.
  130. """
  131. name = filter_.get("name", "").lower()
  132. if len(filter_) == 0:
  133. # Point #1 of rfc4791-9.7.2
  134. return name in vobject_item.contents
  135. if len(filter_) == 1:
  136. if filter_[0].tag == xmlutils.make_clark("%s:is-not-defined" % ns):
  137. # Point #2 of rfc4791-9.7.2
  138. return name not in vobject_item.contents
  139. if name not in vobject_item.contents:
  140. return False
  141. # Point #3 and #4 of rfc4791-9.7.2
  142. for child in filter_:
  143. if ns == "C" and child.tag == xmlutils.make_clark("C:time-range"):
  144. if not time_range_match(vobject_item, child, name):
  145. return False
  146. elif child.tag == xmlutils.make_clark("%s:text-match" % ns):
  147. if not text_match(vobject_item, child, name, ns):
  148. return False
  149. elif child.tag == xmlutils.make_clark("%s:param-filter" % ns):
  150. if not param_filter_match(vobject_item, child, name, ns):
  151. return False
  152. else:
  153. raise ValueError("Unexpected %r in prop-filter" % child.tag)
  154. return True
  155. def time_range_match(vobject_item: vobject.base.Component,
  156. filter_: ET.Element, child_name: str) -> bool:
  157. """Check whether the component/property ``child_name`` of
  158. ``vobject_item`` matches the time-range ``filter_``."""
  159. if not filter_.get("start") and not filter_.get("end"):
  160. return False
  161. start, end = parse_time_range(filter_)
  162. matched = False
  163. def range_fn(range_start: datetime, range_end: datetime,
  164. is_recurrence: bool) -> bool:
  165. nonlocal matched
  166. if start < range_end and range_start < end:
  167. matched = True
  168. return True
  169. if end < range_start and not is_recurrence:
  170. return True
  171. return False
  172. def infinity_fn(start: datetime) -> bool:
  173. return False
  174. visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
  175. return matched
  176. def time_range_fill(vobject_item: vobject.base.Component,
  177. filter_: ET.Element, child_name: str, n: int = 1
  178. ) -> List[Tuple[datetime, datetime]]:
  179. """Create a list of ``n`` occurances from the component/property ``child_name``
  180. of ``vobject_item``."""
  181. if not filter_.get("start") and not filter_.get("end"):
  182. return []
  183. start, end = parse_time_range(filter_)
  184. ranges: List[Tuple[datetime, datetime]] = []
  185. def range_fn(range_start: datetime, range_end: datetime,
  186. is_recurrence: bool) -> bool:
  187. nonlocal ranges
  188. if start < range_end and range_start < end:
  189. ranges.append((range_start, range_end))
  190. if n > 0 and len(ranges) >= n:
  191. return True
  192. if end < range_start and not is_recurrence:
  193. return True
  194. return False
  195. def infinity_fn(range_start: datetime) -> bool:
  196. return False
  197. visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
  198. return ranges
  199. def visit_time_ranges(vobject_item: vobject.base.Component, child_name: str,
  200. range_fn: Callable[[datetime, datetime, bool], bool],
  201. infinity_fn: Callable[[datetime], bool]) -> None:
  202. """Visit all time ranges in the component/property ``child_name`` of
  203. `vobject_item`` with visitors ``range_fn`` and ``infinity_fn``.
  204. ``range_fn`` gets called for every time_range with ``start`` and ``end``
  205. datetimes and ``is_recurrence`` as arguments. If the function returns True,
  206. the operation is cancelled.
  207. ``infinity_fn`` gets called when an infinite recurrence rule is detected
  208. with ``start`` datetime as argument. If the function returns True, the
  209. operation is cancelled.
  210. See rfc4791-9.9.
  211. """
  212. # HACK: According to rfc5545-3.8.4.4 a recurrence that is rescheduled
  213. # with Recurrence ID affects the recurrence itself and all following
  214. # recurrences too. This is not respected and client don't seem to bother
  215. # either.
  216. def getrruleset(child: vobject.base.Component, ignore: Sequence[date]
  217. ) -> Tuple[Iterable[date], bool]:
  218. infinite = False
  219. for rrule in child.contents.get("rrule", []):
  220. if (";UNTIL=" not in rrule.value.upper() and
  221. ";COUNT=" not in rrule.value.upper()):
  222. infinite = True
  223. break
  224. if infinite:
  225. for dtstart in child.getrruleset(addRDate=True):
  226. if dtstart in ignore:
  227. continue
  228. if infinity_fn(date_to_datetime(dtstart)):
  229. return (), True
  230. break
  231. return filter(lambda dtstart: dtstart not in ignore,
  232. child.getrruleset(addRDate=True)), False
  233. def get_children(components: Iterable[vobject.base.Component]) -> Iterator[
  234. Tuple[vobject.base.Component, bool, List[date]]]:
  235. main = None
  236. rec_main = None
  237. recurrences = []
  238. for comp in components:
  239. if hasattr(comp, "recurrence_id") and comp.recurrence_id.value:
  240. recurrences.append(comp.recurrence_id.value)
  241. if comp.rruleset:
  242. if comp.rruleset._len is None:
  243. logger.warning("Ignore empty RRULESET in item at RECURRENCE-ID with value '%s' and UID '%s'", comp.recurrence_id.value, comp.uid.value)
  244. else:
  245. # Prevent possible infinite loop
  246. raise ValueError("Overwritten recurrence with RRULESET")
  247. rec_main = comp
  248. yield comp, True, []
  249. else:
  250. if main is not None:
  251. raise ValueError("Multiple main components. Got comp: {}".format(comp))
  252. main = comp
  253. if main is None and len(recurrences) == 1:
  254. main = rec_main
  255. if main is None:
  256. raise ValueError("Main component missing")
  257. yield main, False, recurrences
  258. # Comments give the lines in the tables of the specification
  259. if child_name == "VEVENT":
  260. for child, is_recurrence, recurrences in get_children(
  261. vobject_item.vevent_list):
  262. # TODO: check if there's a timezone
  263. dtstart = child.dtstart.value
  264. if child.rruleset:
  265. dtstarts, infinity = getrruleset(child, recurrences)
  266. if infinity:
  267. return
  268. else:
  269. dtstarts = (dtstart,)
  270. dtend = getattr(child, "dtend", None)
  271. if dtend is not None:
  272. dtend = dtend.value
  273. original_duration = (dtend - dtstart).total_seconds()
  274. dtend = date_to_datetime(dtend)
  275. duration = getattr(child, "duration", None)
  276. if duration is not None:
  277. original_duration = duration = duration.value
  278. for dtstart in dtstarts:
  279. dtstart_is_datetime = isinstance(dtstart, datetime)
  280. dtstart = date_to_datetime(dtstart)
  281. if dtend is not None:
  282. # Line 1
  283. dtend = dtstart + timedelta(seconds=original_duration)
  284. if range_fn(dtstart, dtend, is_recurrence):
  285. return
  286. elif duration is not None:
  287. if original_duration is None:
  288. original_duration = duration.seconds
  289. if duration.seconds > 0:
  290. # Line 2
  291. if range_fn(dtstart, dtstart + duration,
  292. is_recurrence):
  293. return
  294. else:
  295. # Line 3
  296. if range_fn(dtstart, dtstart + SECOND, is_recurrence):
  297. return
  298. elif dtstart_is_datetime:
  299. # Line 4
  300. if range_fn(dtstart, dtstart + SECOND, is_recurrence):
  301. return
  302. else:
  303. # Line 5
  304. if range_fn(dtstart, dtstart + DAY, is_recurrence):
  305. return
  306. elif child_name == "VTODO":
  307. for child, is_recurrence, recurrences in get_children(
  308. vobject_item.vtodo_list):
  309. dtstart = getattr(child, "dtstart", None)
  310. duration = getattr(child, "duration", None)
  311. due = getattr(child, "due", None)
  312. completed = getattr(child, "completed", None)
  313. created = getattr(child, "created", None)
  314. if dtstart is not None:
  315. dtstart = date_to_datetime(dtstart.value)
  316. if duration is not None:
  317. duration = duration.value
  318. if due is not None:
  319. due = date_to_datetime(due.value)
  320. if dtstart is not None:
  321. original_duration = (due - dtstart).total_seconds()
  322. if completed is not None:
  323. completed = date_to_datetime(completed.value)
  324. if created is not None:
  325. created = date_to_datetime(created.value)
  326. original_duration = (completed - created).total_seconds()
  327. elif created is not None:
  328. created = date_to_datetime(created.value)
  329. if child.rruleset:
  330. reference_dates, infinity = getrruleset(child, recurrences)
  331. if infinity:
  332. return
  333. else:
  334. if dtstart is not None:
  335. reference_dates = (dtstart,)
  336. elif due is not None:
  337. reference_dates = (due,)
  338. elif completed is not None:
  339. reference_dates = (completed,)
  340. elif created is not None:
  341. reference_dates = (created,)
  342. else:
  343. # Line 8
  344. if range_fn(DATETIME_MIN, DATETIME_MAX, is_recurrence):
  345. return
  346. reference_dates = ()
  347. for reference_date in reference_dates:
  348. reference_date = date_to_datetime(reference_date)
  349. if dtstart is not None and duration is not None:
  350. # Line 1
  351. if range_fn(reference_date,
  352. reference_date + duration + SECOND,
  353. is_recurrence):
  354. return
  355. if range_fn(reference_date + duration - SECOND,
  356. reference_date + duration + SECOND,
  357. is_recurrence):
  358. return
  359. elif dtstart is not None and due is not None:
  360. # Line 2
  361. due = reference_date + timedelta(seconds=original_duration)
  362. if (range_fn(reference_date, due, is_recurrence) or
  363. range_fn(reference_date,
  364. reference_date + SECOND, is_recurrence) or
  365. range_fn(due - SECOND, due, is_recurrence) or
  366. range_fn(due - SECOND, reference_date + SECOND,
  367. is_recurrence)):
  368. return
  369. elif dtstart is not None:
  370. if range_fn(reference_date, reference_date + SECOND,
  371. is_recurrence):
  372. return
  373. elif due is not None:
  374. # Line 4
  375. if range_fn(reference_date - SECOND, reference_date,
  376. is_recurrence):
  377. return
  378. elif completed is not None and created is not None:
  379. # Line 5
  380. completed = reference_date + timedelta(
  381. seconds=original_duration)
  382. if (range_fn(reference_date - SECOND,
  383. reference_date + SECOND,
  384. is_recurrence) or
  385. range_fn(completed - SECOND, completed + SECOND,
  386. is_recurrence) or
  387. range_fn(reference_date - SECOND,
  388. reference_date + SECOND, is_recurrence) or
  389. range_fn(completed - SECOND, completed + SECOND,
  390. is_recurrence)):
  391. return
  392. elif completed is not None:
  393. # Line 6
  394. if range_fn(reference_date - SECOND,
  395. reference_date + SECOND, is_recurrence):
  396. return
  397. elif created is not None:
  398. # Line 7
  399. if range_fn(reference_date, DATETIME_MAX, is_recurrence):
  400. return
  401. elif child_name == "VJOURNAL":
  402. for child, is_recurrence, recurrences in get_children(
  403. vobject_item.vjournal_list):
  404. dtstart = getattr(child, "dtstart", None)
  405. if dtstart is not None:
  406. dtstart = dtstart.value
  407. if child.rruleset:
  408. dtstarts, infinity = getrruleset(child, recurrences)
  409. if infinity:
  410. return
  411. else:
  412. dtstarts = (dtstart,)
  413. for dtstart in dtstarts:
  414. dtstart_is_datetime = isinstance(dtstart, datetime)
  415. dtstart = date_to_datetime(dtstart)
  416. if dtstart_is_datetime:
  417. # Line 1
  418. if range_fn(dtstart, dtstart + SECOND, is_recurrence):
  419. return
  420. else:
  421. # Line 2
  422. if range_fn(dtstart, dtstart + DAY, is_recurrence):
  423. return
  424. else:
  425. # Match a property
  426. child = getattr(vobject_item, child_name.lower())
  427. if isinstance(child.value, date):
  428. child_is_datetime = isinstance(child.value, datetime)
  429. child = date_to_datetime(child.value)
  430. if child_is_datetime:
  431. range_fn(child, child + SECOND, False)
  432. else:
  433. range_fn(child, child + DAY, False)
  434. def text_match(vobject_item: vobject.base.Component,
  435. filter_: ET.Element, child_name: str, ns: str,
  436. attrib_name: Optional[str] = None) -> bool:
  437. """Check whether the ``item`` matches the text-match ``filter_``.
  438. See rfc4791-9.7.5.
  439. """
  440. # TODO: collations are not supported, but the default ones needed
  441. # for DAV servers are actually pretty useless. Texts are lowered to
  442. # be case-insensitive, almost as the "i;ascii-casemap" value.
  443. text = next(filter_.itertext()).lower()
  444. match_type = "contains"
  445. if ns == "CR":
  446. match_type = filter_.get("match-type", match_type)
  447. def match(value: str) -> bool:
  448. value = value.lower()
  449. if match_type == "equals":
  450. return value == text
  451. if match_type == "contains":
  452. return text in value
  453. if match_type == "starts-with":
  454. return value.startswith(text)
  455. if match_type == "ends-with":
  456. return value.endswith(text)
  457. raise ValueError("Unexpected text-match match-type: %r" % match_type)
  458. children = getattr(vobject_item, "%s_list" % child_name, [])
  459. if attrib_name is not None:
  460. condition = any(
  461. match(attrib) for child in children
  462. for attrib in child.params.get(attrib_name, []))
  463. else:
  464. res = []
  465. for child in children:
  466. # Some filters such as CATEGORIES provide a list in child.value
  467. if type(child.value) is list:
  468. for value in child.value:
  469. res.append(match(value))
  470. else:
  471. res.append(match(child.value))
  472. condition = any(res)
  473. if filter_.get("negate-condition") == "yes":
  474. return not condition
  475. return condition
  476. def param_filter_match(vobject_item: vobject.base.Component,
  477. filter_: ET.Element, parent_name: str, ns: str) -> bool:
  478. """Check whether the ``item`` matches the param-filter ``filter_``.
  479. See rfc4791-9.7.3.
  480. """
  481. name = filter_.get("name", "").upper()
  482. children = getattr(vobject_item, "%s_list" % parent_name, [])
  483. condition = any(name in child.params for child in children)
  484. if len(filter_) > 0:
  485. if filter_[0].tag == xmlutils.make_clark("%s:text-match" % ns):
  486. return condition and text_match(
  487. vobject_item, filter_[0], parent_name, ns, name)
  488. if filter_[0].tag == xmlutils.make_clark("%s:is-not-defined" % ns):
  489. return not condition
  490. return condition
  491. def simplify_prefilters(filters: Iterable[ET.Element], collection_tag: str
  492. ) -> Tuple[Optional[str], int, int, bool]:
  493. """Creates a simplified condition from ``filters``.
  494. Returns a tuple (``tag``, ``start``, ``end``, ``simple``) where ``tag`` is
  495. a string or None (match all) and ``start`` and ``end`` are POSIX
  496. timestamps (as int). ``simple`` is a bool that indicates that ``filters``
  497. and the simplified condition are identical.
  498. """
  499. flat_filters = list(chain.from_iterable(filters))
  500. simple = len(flat_filters) <= 1
  501. for col_filter in flat_filters:
  502. if collection_tag != "VCALENDAR":
  503. simple = False
  504. break
  505. if (col_filter.tag != xmlutils.make_clark("C:comp-filter") or
  506. col_filter.get("name", "").upper() != "VCALENDAR"):
  507. simple = False
  508. continue
  509. simple &= len(col_filter) <= 1
  510. for comp_filter in col_filter:
  511. if comp_filter.tag != xmlutils.make_clark("C:comp-filter"):
  512. simple = False
  513. continue
  514. tag = comp_filter.get("name", "").upper()
  515. if comp_filter.find(
  516. xmlutils.make_clark("C:is-not-defined")) is not None:
  517. simple = False
  518. continue
  519. simple &= len(comp_filter) <= 1
  520. for time_filter in comp_filter:
  521. if tag not in ("VTODO", "VEVENT", "VJOURNAL"):
  522. simple = False
  523. break
  524. if time_filter.tag != xmlutils.make_clark("C:time-range"):
  525. simple = False
  526. continue
  527. start, end = time_range_timestamps(time_filter)
  528. return tag, start, end, simple
  529. return tag, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
  530. return None, TIMESTAMP_MIN, TIMESTAMP_MAX, simple