cache/
lib.rs

1//! A general purpose cache with possibly multiple remote servers for storing and retrieving data.
2//!
3//! The cache includes both type-mapped and namespaced APIs. Caching can be done in-memory or persistently
4//! via a cache server that manages a filesystem cache. The cache also supports caching across
5//! several cache servers.
6#![warn(missing_docs)]
7
8use std::fmt::Formatter;
9use std::ops::Deref;
10use std::{any::Any, fmt::Debug, hash::Hash, sync::Arc, thread};
11
12use error::{ArcResult, Error, TryInnerError};
13use lazy_static::lazy_static;
14use once_cell::sync::OnceCell;
15use regex::Regex;
16use serde::{Deserialize, Serialize, de::DeserializeOwned};
17use sha2::{Digest, Sha256};
18
19pub mod error;
20pub mod mem;
21pub mod multi;
22pub mod persistent;
23#[doc(hidden)]
24pub mod rpc;
25#[cfg(test)]
26pub(crate) mod tests;
27
28lazy_static! {
29    /// A regex for matching valid namespaces.
30    pub static ref NAMESPACE_REGEX: Regex =
31        Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*\.)*[A-Za-z_][A-Za-z0-9_]*$").unwrap();
32}
33
34/// A function for mapping a cached value to another value.
35pub trait ValueMapFn<V1, V2>: Fn(ArcResult<&V1>) -> ArcResult<V2> + Send + Sync + Any {}
36impl<V1, V2, T: Fn(ArcResult<&V1>) -> ArcResult<V2> + Send + Sync + Any> ValueMapFn<V1, V2> for T {}
37
38/// A function that can be used to generate a value in a background thread.
39pub trait RawGenerateFn<V>: FnOnce() -> V + Send + Any {}
40impl<V, T: FnOnce() -> V + Send + Any> RawGenerateFn<V> for T {}
41
42/// A function that can be used to generate a value based on a key in a background thread.
43pub trait GenerateFn<K, V>: FnOnce(&K) -> V + Send + Any {}
44impl<K, V, T: FnOnce(&K) -> V + Send + Any> GenerateFn<K, V> for T {}
45
46/// A stateful function that can be used to generate a value based on a key in a background thread.
47pub trait GenerateWithStateFn<K, S, V>: FnOnce(&K, S) -> V + Send + Any {}
48impl<K, S, V, T: FnOnce(&K, S) -> V + Send + Any> GenerateWithStateFn<K, S, V> for T {}
49
50/// A function that can be used to generate a result based on a key in a background thread.
51pub trait GenerateResultFn<K, V, E>: FnOnce(&K) -> Result<V, E> + Send + Any {}
52impl<K, V, E, T: FnOnce(&K) -> Result<V, E> + Send + Any> GenerateResultFn<K, V, E> for T {}
53
54/// A stateful function that can be used to generate a result based on a key in a background thread.
55pub trait GenerateResultWithStateFn<K, S, V, E>:
56    FnOnce(&K, S) -> Result<V, E> + Send + Any
57{
58}
59impl<K, S, V, E, T: FnOnce(&K, S) -> Result<V, E> + Send + Any>
60    GenerateResultWithStateFn<K, S, V, E> for T
61{
62}
63
64/// A namespace used for addressing a set of cached items.
65///
66/// Must match the [`NAMESPACE_REGEX`](static@NAMESPACE_REGEX) regular expression.
67#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
68pub struct Namespace(String);
69
70impl Namespace {
71    /// Creates a new [`Namespace`].
72    ///
73    /// # Panics
74    ///
75    /// Panics if the provided string does not match [`NAMESPACE_REGEX`](static@NAMESPACE_REGEX).
76    pub fn new(namespace: impl Into<String>) -> Self {
77        let namespace: String = namespace.into();
78        if !Namespace::validate(&namespace) {
79            panic!(
80                "invalid namespace, does not match regex {:?}",
81                NAMESPACE_REGEX.as_str(),
82            );
83        }
84        Self(namespace)
85    }
86
87    /// Returns `true` if the provided string is a valid namespace.
88    pub fn validate(namespace: &str) -> bool {
89        NAMESPACE_REGEX.is_match(namespace)
90    }
91
92    /// Converts the namespace into its string value.
93    pub fn into_inner(self) -> String {
94        self.0
95    }
96}
97
98impl<T: Into<String>> From<T> for Namespace {
99    fn from(value: T) -> Self {
100        Self::new(value)
101    }
102}
103
104impl Deref for Namespace {
105    type Target = str;
106
107    fn deref(&self) -> &Self::Target {
108        &self.0
109    }
110}
111
112impl AsRef<str> for Namespace {
113    fn as_ref(&self) -> &str {
114        &self.0
115    }
116}
117
118/// A cacheable object.
119///
120/// # Examples
121///
122/// ```
123/// use cache::Cacheable;
124/// use serde::{Deserialize, Serialize};
125///
126/// #[derive(Deserialize, Serialize, Hash, Eq, PartialEq)]
127/// pub struct Params {
128///     param1: u64,
129///     param2: String,
130/// };
131///
132/// impl Cacheable for Params {
133///     type Output = u64;
134///     type Error = anyhow::Error;
135///
136///     fn generate(&self) -> anyhow::Result<u64> {
137///         println!("Executing an expensive computation...");
138///
139///         // ...
140///         # let error_condition = true;
141///         # let computation_result = 64;
142///
143///         if error_condition {
144///             anyhow::bail!("an error occured during computation");
145///         }
146///
147///         Ok(computation_result)
148///     }
149/// }
150/// ```
151pub trait Cacheable: Serialize + DeserializeOwned + Hash + Eq + Send + Sync + Any {
152    /// The output produced by generating the object.
153    type Output: Send + Sync + Serialize + DeserializeOwned;
154    /// The error type returned by [`Cacheable::generate`].
155    type Error: Send + Sync;
156
157    /// Generates the output of the cacheable object.
158    fn generate(&self) -> std::result::Result<Self::Output, Self::Error>;
159}
160
161impl<T: Cacheable> Cacheable for Arc<T> {
162    type Output = T::Output;
163    type Error = T::Error;
164
165    fn generate(&self) -> std::result::Result<Self::Output, Self::Error> {
166        <T as Cacheable>::generate(self)
167    }
168}
169
170/// A cacheable object whose generator needs to store state.
171///
172/// # Examples
173///
174/// ```
175/// use std::sync::{Arc, Mutex};
176/// use cache::CacheableWithState;
177/// use serde::{Deserialize, Serialize};
178///
179/// #[derive(Deserialize, Serialize, Clone, Hash, Eq, PartialEq)]
180/// pub struct Params {
181///     param1: u64,
182///     param2: String,
183/// };
184///
185/// #[derive(Clone)]
186/// pub struct Log(Arc<Mutex<Vec<Params>>>);
187///
188/// impl CacheableWithState<Log> for Params {
189///     type Output = u64;
190///     type Error = anyhow::Error;
191///
192///     fn generate_with_state(&self, state: Log) -> anyhow::Result<u64> {
193///         println!("Logging parameters...");
194///         state.0.lock().unwrap().push(self.clone());
195///
196///         println!("Executing an expensive computation...");
197///
198///         // ...
199///         # let error_condition = true;
200///         # let computation_result = 64;
201///
202///         if error_condition {
203///             anyhow::bail!("an error occured during computation");
204///         }
205///
206///         Ok(computation_result)
207///     }
208/// }
209/// ```
210pub trait CacheableWithState<S: Send + Sync + Any>:
211    Serialize + DeserializeOwned + Hash + Eq + Send + Sync + Any
212{
213    /// The output produced by generating the object.
214    type Output: Send + Sync + Serialize + DeserializeOwned;
215    /// The error type returned by [`CacheableWithState::generate_with_state`].
216    type Error: Send + Sync;
217
218    /// Generates the output of the cacheable object using `state`.
219    ///
220    /// **Note:** The state is not used to determine whether the object should be regenerated. As
221    /// such, it should not impact the output of this function but rather should only be used to
222    /// store collateral or reuse computation from other function calls.
223    fn generate_with_state(&self, state: S) -> std::result::Result<Self::Output, Self::Error>;
224}
225
226impl<S: Send + Sync + Any, T: CacheableWithState<S>> CacheableWithState<S> for Arc<T> {
227    type Output = T::Output;
228    type Error = T::Error;
229
230    fn generate_with_state(&self, state: S) -> std::result::Result<Self::Output, Self::Error> {
231        <T as CacheableWithState<S>>::generate_with_state(self, state)
232    }
233}
234
235trait CacheValueHolder<V>: Send + Sync {
236    /// Blocks on the cache entry, returning the result once it is ready.
237    ///
238    /// Returns an error if one was returned by the generator.
239    fn try_get(&self) -> ArcResult<&V>;
240
241    /// Checks whether the underlying entry is ready.
242    ///
243    /// Returns the entry if available, otherwise returns [`None`].
244    fn poll(&self) -> Option<ArcResult<&V>>;
245}
246
247#[derive(Debug)]
248pub(crate) struct CacheHandleInner<V>(Arc<OnceCell<ArcResult<V>>>);
249
250impl<V> Default for CacheHandleInner<V> {
251    fn default() -> Self {
252        Self(Arc::new(OnceCell::new()))
253    }
254}
255
256impl<V> Clone for CacheHandleInner<V> {
257    fn clone(&self) -> Self {
258        Self(self.0.clone())
259    }
260}
261
262impl<V: Send + Sync> CacheValueHolder<V> for CacheHandleInner<V> {
263    fn try_get(&self) -> ArcResult<&V> {
264        self.0.wait().as_ref().map_err(|e| e.clone())
265    }
266
267    fn poll(&self) -> Option<ArcResult<&V>> {
268        Some(self.0.get()?.as_ref().map_err(|e| e.clone()))
269    }
270}
271
272impl<V> CacheHandleInner<V> {
273    /// Sets the value of the cache handle.
274    ///
275    /// # Panics
276    ///
277    /// Panics if the cache handle has already been set.
278    pub(crate) fn set(&self, value: ArcResult<V>) {
279        if self.0.set(value).is_err() {
280            tracing::error!("failed to set cache handle value");
281            panic!("failed to set cache handle value");
282        }
283    }
284}
285
286/// A handle to a cache entry that might still be generating.
287pub struct CacheHandle<V>(Arc<dyn CacheValueHolder<V>>);
288
289impl<V> std::fmt::Debug for CacheHandle<V> {
290    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
291        f.debug_struct("CacheHandle").finish()
292    }
293}
294
295impl<V> Clone for CacheHandle<V> {
296    fn clone(&self) -> Self {
297        Self(self.0.clone())
298    }
299}
300impl<V: Any + Send + Sync> CacheHandle<V> {
301    pub(crate) fn from_inner(inner: Arc<dyn CacheValueHolder<V>>) -> Self {
302        Self(inner)
303    }
304
305    pub(crate) fn empty() -> (Self, CacheHandleInner<V>) {
306        let inner = CacheHandleInner::default();
307        (Self(Arc::new(inner.clone())), inner)
308    }
309
310    /// Creates a new cache handle, generating its value immediately.
311    pub(crate) fn new_blocking(generate_fn: impl RawGenerateFn<V>) -> Self {
312        let (handle, inner) = Self::empty();
313        inner.set(run_generator(generate_fn));
314        handle
315    }
316
317    /// Creates a new cache handle, spawning a thread to generate its value using the provided
318    /// function.
319    pub(crate) fn new(generate_fn: impl RawGenerateFn<V>) -> Self {
320        let (handle, inner) = Self::empty();
321        thread::spawn(move || {
322            inner.set(run_generator(generate_fn));
323        });
324        handle
325    }
326
327    /// Maps an existing [`CacheHandle`] to a [`CacheHandle`] of a different type.
328    ///
329    /// # Example
330    ///
331    /// ```
332    /// use std::sync::{Arc, Mutex};
333    /// use cache::{mem::TypeCache, error::Error, CacheableWithState};
334    ///
335    /// let mut cache = TypeCache::new();
336    ///
337    /// fn generate_fn(tuple: &(u64, u64)) -> u64 {
338    ///     tuple.0 + tuple.1
339    /// }
340    ///
341    /// let handle = cache.generate((5, 6), generate_fn);
342    /// assert_eq!(*handle.get(), 11);
343    ///
344    /// // Does not call `generate_fn` again as the result has been cached.
345    /// let mapped_handle = handle.map(|res| res.map(|sum| *sum > 50));
346    /// assert_eq!(*mapped_handle.get(), false);
347    /// ```
348    pub fn map<V2: Send + Sync + Any>(&self, map_fn: impl ValueMapFn<V, V2>) -> CacheHandle<V2> {
349        CacheHandle(Arc::new(MappedCacheHandleInner::new(self.clone(), map_fn)))
350    }
351}
352
353impl<V> CacheHandle<V> {
354    /// Blocks on the cache entry, returning the result once it is ready.
355    ///
356    /// Returns an error if one was returned by the generator.
357    pub fn try_get(&self) -> ArcResult<&V> {
358        self.0.try_get()
359    }
360
361    /// Checks whether the underlying entry is ready.
362    ///
363    /// Returns the entry if available, otherwise returns [`None`].
364    pub fn poll(&self) -> Option<ArcResult<&V>> {
365        self.0.poll()
366    }
367
368    /// Blocks on the cache entry, returning its output.
369    ///
370    /// # Panics
371    ///
372    /// Panics if the generator failed to run or an internal error was thrown by the cache.
373    pub fn get(&self) -> &V {
374        self.try_get().unwrap()
375    }
376}
377
378impl<V: Debug> CacheHandle<V> {
379    /// Blocks on the cache entry, returning the error thrown by the cache.
380    ///
381    /// # Panics
382    ///
383    /// Panics if no error was thrown by the cache.
384    pub fn get_err(&self) -> Arc<error::Error> {
385        self.try_get().unwrap_err()
386    }
387}
388
389impl<V, E> CacheHandle<std::result::Result<V, E>> {
390    /// Blocks on the cache entry, returning the inner result.
391    ///
392    /// Returns an error if the generator panicked or threw an error, or if the cache threw an
393    /// error.
394    pub fn try_inner(&self) -> std::result::Result<&V, TryInnerError<'_, E>> {
395        Ok(self
396            .try_get()
397            .map_err(|e| TryInnerError::CacheError(e))?
398            .as_ref()?)
399    }
400}
401
402impl<V, E: Debug> CacheHandle<std::result::Result<V, E>> {
403    /// Blocks on the cache entry, returning its output.
404    ///
405    /// # Panics
406    ///
407    /// Panics if the generator panicked or threw an error, or if an internal error was thrown by the cache.
408    pub fn unwrap_inner(&self) -> &V {
409        self.get().as_ref().unwrap()
410    }
411}
412
413impl<V: Debug, E> CacheHandle<std::result::Result<V, E>> {
414    /// Blocks on the cache entry, returning the error returned by the generator.
415    ///
416    /// # Panics
417    ///
418    /// Panics if the generator panicked or an internal error was thrown by the cache. Also panics
419    /// if the generator did not return an error.
420    pub fn unwrap_err_inner(&self) -> &E {
421        self.get().as_ref().unwrap_err()
422    }
423}
424
425pub(crate) struct MappedCacheHandleInner<V1, V2> {
426    handle: Arc<dyn CacheValueHolder<V1>>,
427    map_fn: Arc<dyn ValueMapFn<V1, V2, Output = ArcResult<V2>>>,
428    result: Arc<OnceCell<ArcResult<V2>>>,
429}
430
431impl<V1, V2> Clone for MappedCacheHandleInner<V1, V2> {
432    fn clone(&self) -> Self {
433        Self {
434            handle: self.handle.clone(),
435            map_fn: self.map_fn.clone(),
436            result: self.result.clone(),
437        }
438    }
439}
440
441impl<V1: Send + Sync, V2: Send + Sync> CacheValueHolder<V2> for MappedCacheHandleInner<V1, V2> {
442    fn try_get(&self) -> ArcResult<&V2> {
443        self.result
444            .get_or_init(|| (self.map_fn)(self.handle.try_get()))
445            .as_ref()
446            .map_err(|e| e.clone())
447    }
448
449    fn poll(&self) -> Option<ArcResult<&V2>> {
450        let res = self.handle.poll().map(|res| (self.map_fn)(res))?;
451        Some(
452            self.result
453                .get_or_init(|| res)
454                .as_ref()
455                .map_err(|e| e.clone()),
456        )
457    }
458}
459
460impl<V1, V2> MappedCacheHandleInner<V1, V2> {
461    fn new(handle: CacheHandle<V1>, map_fn: impl ValueMapFn<V1, V2>) -> Self {
462        Self {
463            handle: handle.0,
464            map_fn: Arc::new(map_fn),
465            result: Arc::new(OnceCell::new()),
466        }
467    }
468}
469
470pub(crate) fn hash(val: &[u8]) -> Vec<u8> {
471    let mut hasher = Sha256::new();
472    hasher.update(val);
473    hasher.finalize()[..].into()
474}
475
476/// Runs the provided generator in a new thread, returning the result.
477pub(crate) fn run_generator<V: Any + Send + Sync>(
478    generate_fn: impl FnOnce() -> V + Send + Any,
479) -> ArcResult<V> {
480    let join_handle = thread::spawn(generate_fn);
481    join_handle.join().map_err(|_| Arc::new(Error::Panic))
482}