summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2020-05-28 14:39:08 -0400
committerTavian Barnes <tavianator@tavianator.com>2020-06-24 10:02:22 -0400
commit911ec5008fd78d726cd62768069fa5dbfb4212c0 (patch)
tree10600dee186d3bfdd58ce7b17919fe0c2aaa1035
parent37cc3288e4fd168ba631c8f7e63df7537fb33b8d (diff)
downloadacap-911ec5008fd78d726cd62768069fa5dbfb4212c0.tar.xz
distance: Implement abstract distances, proximities, metrics
-rw-r--r--Cargo.toml1
-rw-r--r--src/distance.rs103
-rw-r--r--src/lib.rs4
3 files changed, 108 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 36f5667..724f6ea 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,3 +10,4 @@ keywords = ["ann", "knn", "nearest-neighbors"]
categories = ["algorithms", "data-structures"]
[dependencies]
+num-traits = "0.2.11"
diff --git a/src/distance.rs b/src/distance.rs
new file mode 100644
index 0000000..9ff9fd4
--- /dev/null
+++ b/src/distance.rs
@@ -0,0 +1,103 @@
+//! Abstract notions of distance.
+
+use num_traits::{Num, NumAssign, Signed};
+
+/// A number type suitable for distance values.
+///
+/// This trait is automatically implemented for all types that support the required operations.
+pub trait Value: Copy + Num + NumAssign + Signed + PartialOrd {}
+
+/// Blanket [Value] implementation.
+impl<T: Num + NumAssign + Signed + Copy + PartialOrd> Value for T {}
+
+/// A distance between two points.
+///
+/// An implementation may be an actual numerical distance, or an [order embedding] of the true
+/// distance. This allows for optimizations whenever distances can be compared more efficiently
+/// than their exact values can be computed, as is the case for [Euclidean distance]. Implementors
+/// must satisfy, for all distances `x` and `y`:
+///
+/// * `x < y` iff `x.value() < y.value()`
+/// * `x.value() < y` iff `x.value() < y.value()`
+/// * `x < y.value()` iff `x.value() < y.value()`
+///
+/// [order embedding]: https://en.wikipedia.org/wiki/Order_embedding
+/// [Euclidean distance]: crate::euclid::EuclideanDistance
+pub trait Distance
+where
+ Self: Copy,
+ Self: Into<<Self as Distance>::Value>,
+ Self: PartialOrd<<Self as Distance>::Value>,
+ <Self as Distance>::Value: PartialOrd<Self>,
+ Self: PartialOrd,
+{
+ /// The type of actual numerical distances.
+ type Value: Value;
+
+ /// Get the real numerical value of this distance.
+ fn value(self) -> Self::Value {
+ self.into()
+ }
+}
+
+/// Any numerical distance value can be a [Distance].
+impl<T: Value> Distance for T {
+ type Value = T;
+}
+
+/// A space with some notion of distance between points.
+///
+/// Distances in this space don't need to obey any particular rules like symmetry or the [triangle
+/// inequality]. However, spaces that satisfy those rules, at least approximately, often allow for
+/// more accurate and efficient searches.
+///
+/// Type parameters:
+///
+/// * `T`: The type to compare against.
+///
+/// [triangle inequality]: https://en.wikipedia.org/wiki/Triangle_inequality
+pub trait Proximity<T: ?Sized = Self> {
+ /// The type that represents distances.
+ type Distance: Distance;
+
+ /// Calculate the distance between this point and another one.
+ fn distance(&self, other: &T) -> Self::Distance;
+}
+
+// See https://github.com/rust-lang/rust/issues/38078
+/// Shorthand for `K::Distance::Value`.
+pub type DistanceValue<K, V = K> = <<K as Proximity<V>>::Distance as Distance>::Value;
+
+/// Blanket [Proximity] implementation for references.
+impl<'k, 'v, K: Proximity<V>, V> Proximity<&'v V> for &'k K {
+ type Distance = K::Distance;
+
+ fn distance(&self, other: &&'v V) -> Self::Distance {
+ (*self).distance(*other)
+ }
+}
+
+/// Marker trait for [metric spaces].
+///
+/// A metric must be symmetric and obey the [triangle inequality]. More precisely, let `x`, `y`,
+/// and `z` be any elements of a metric space, and let `d(x, y) = x.distance(y).value()`. Then the
+/// following rules must hold:
+///
+/// * `d(x, x) == 0`,
+/// * `d(x, y) == d(y, z)` (symmetry), and
+/// * `d(x, z) <= d(x, y) + d(y, z)` (triangle inequality).
+///
+/// Those conditions also imply the following condition:
+///
+/// * `d(x, y) >= 0` (non-negativity)
+///
+/// Because we do not prohibit `d(x, y) == 0` for distinct `x` and `y`, these spaces are more
+/// properly known as [pseudometric spaces]. This distinction is usually unimportant.
+///
+/// [metric spaces]: https://en.wikipedia.org/wiki/Metric_space
+/// [triangle inequality]: https://en.wikipedia.org/wiki/Triangle_inequality
+/// [pseudometric spaces]: https://en.wikipedia.org/wiki/Pseudometric_space
+pub trait Metric<T: ?Sized = Self>: Proximity<T> {}
+
+/// Blanket [Metric] implementation for references.
+impl<'k, 'v, K: Metric<V>, V> Metric<&'v V> for &'k K {}
diff --git a/src/lib.rs b/src/lib.rs
index 63d8a0e..8b0c32b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,7 @@
//! As Close As Possible — [nearest neighbor search] in Rust.
//!
//! [nearest neighbor search]: https://en.wikipedia.org/wiki/Nearest_neighbor_search
+
+pub mod distance;
+
+pub use distance::{Distance, Metric, Proximity};