You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

915 lines
37 KiB
C++

//----------------------------------------------------------------------
// File: ANN.h
// Programmer: Sunil Arya and David Mount
// Last modified: 05/03/05 (Release 1.1)
// Description: Basic include file for approximate nearest
// neighbor searching.
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount. All Rights Reserved.
//
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN). This software is provided under
// the provisions of the Lesser GNU Public License (LGPL). See the
// file ../ReadMe.txt for further information.
//
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose. It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
// Revision 0.1 03/04/98
// Initial release
// Revision 1.0 04/01/05
// Added copyright and revision information
// Added ANNcoordPrec for coordinate precision.
// Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
// Cleaned up C++ structure for modern compilers
// Revision 1.1 05/03/05
// Added fixed-radius k-NN searching
//----------------------------------------------------------------------
//----------------------------------------------------------------------
// ANN - approximate nearest neighbor searching
// ANN is a library for approximate nearest neighbor searching,
// based on the use of standard and priority search in kd-trees
// and balanced box-decomposition (bbd) trees. Here are some
// references to the main algorithmic techniques used here:
//
// kd-trees:
// Friedman, Bentley, and Finkel, ``An algorithm for finding
// best matches in logarithmic expected time,'' ACM
// Transactions on Mathematical Software, 3(3):209-226, 1977.
//
// Priority search in kd-trees:
// Arya and Mount, ``Algorithms for fast vector quantization,''
// Proc. of DCC '93: Data Compression Conference, eds. J. A.
// Storer and M. Cohn, IEEE Press, 1993, 381-390.
//
// Approximate nearest neighbor search and bbd-trees:
// Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
// algorithm for approximate nearest neighbor searching,''
// 5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
// 1994, 573-582.
//----------------------------------------------------------------------
#ifndef ANN_H
#define ANN_H
#ifdef WIN32
//----------------------------------------------------------------------
// For Microsoft Visual C++, externally accessible symbols must be
// explicitly indicated with DLL_API, which is somewhat like "extern."
//
// The following ifdef block is the standard way of creating macros
// which make exporting from a DLL simpler. All files within this DLL
// are compiled with the DLL_EXPORTS preprocessor symbol defined on the
// command line. In contrast, projects that use (or import) the DLL
// objects do not define the DLL_EXPORTS symbol. This way any other
// project whose source files include this file see DLL_API functions as
// being imported from a DLL, wheras this DLL sees symbols defined with
// this macro as being exported.
//----------------------------------------------------------------------
#ifdef DLL_EXPORTS
#define DLL_API __declspec(dllexport)
#else
#define DLL_API __declspec(dllimport)
#endif
//----------------------------------------------------------------------
// DLL_API is ignored for all other systems
//----------------------------------------------------------------------
#else
#define DLL_API
#endif
//----------------------------------------------------------------------
// basic includes
//----------------------------------------------------------------------
#include <cmath> // math includes
#include <iostream> // I/O streams
#include <iomanip> // I/O manipulators
//----------------------------------------------------------------------
// Limits
// There are a number of places where we use the maximum double value as
// default initializers (and others may be used, depending on the
// data/distance representation). These can usually be found in limits.h
// (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
//
// Not all systems have these files. If you are using such a system,
// you should set the preprocessor symbol ANN_NO_LIMITS_H when
// compiling, and modify the statements below to generate the
// appropriate value. For practical purposes, this does not need to be
// the maximum double value. It is sufficient that it be at least as
// large than the maximum squared distance between between any two
// points.
//----------------------------------------------------------------------
#ifdef ANN_NO_LIMITS_H // limits.h unavailable
#include <cvalues> // replacement for limits.h
const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double
#else
#include <climits>
#include <cfloat>
const double ANN_DBL_MAX = DBL_MAX;
#endif
#define ANNversion "1.1.1" // ANN version and information
#define ANNversionCmt ""
#define ANNcopyright "David M. Mount and Sunil Arya"
#define ANNlatestRev "Aug 4, 2006"
//----------------------------------------------------------------------
// ANNbool
// This is a simple boolean type. Although ANSI C++ is supposed
// to support the type bool, some compilers do not have it.
//----------------------------------------------------------------------
enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
//----------------------------------------------------------------------
// ANNcoord, ANNdist
// ANNcoord and ANNdist are the types used for representing
// point coordinates and distances. They can be modified by the
// user, with some care. It is assumed that they are both numeric
// types, and that ANNdist is generally of an equal or higher type
// from ANNcoord. A variable of type ANNdist should be large
// enough to store the sum of squared components of a variable
// of type ANNcoord for the number of dimensions needed in the
// application. For example, the following combinations are
// legal:
//
// ANNcoord ANNdist
// --------- -------------------------------
// short short, int, long, float, double
// int int, long, float, double
// long long, float, double
// float float, double
// double double
//
// It is the user's responsibility to make sure that overflow does
// not occur in distance calculation.
//----------------------------------------------------------------------
typedef double ANNcoord; // coordinate data type
typedef double ANNdist; // distance data type
//----------------------------------------------------------------------
// ANNidx
// ANNidx is a point index. When the data structure is built, the
// points are given as an array. Nearest neighbor results are
// returned as an integer index into this array. To make it
// clearer when this is happening, we define the integer type
// ANNidx. Indexing starts from 0.
//
// For fixed-radius near neighbor searching, it is possible that
// there are not k nearest neighbors within the search radius. To
// indicate this, the algorithm returns ANN_NULL_IDX as its result.
// It should be distinguishable from any valid array index.
//----------------------------------------------------------------------
typedef int ANNidx; // point index
const ANNidx ANN_NULL_IDX = -1; // a NULL point index
//----------------------------------------------------------------------
// Infinite distance:
// The code assumes that there is an "infinite distance" which it
// uses to initialize distances before performing nearest neighbor
// searches. It should be as larger or larger than any legitimate
// nearest neighbor distance.
//
// On most systems, these should be found in the standard include
// file <limits.h> or possibly <float.h>. If you do not have these
// file, some suggested values are listed below, assuming 64-bit
// long, 32-bit int and 16-bit short.
//
// ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>)
// ------- ------------ ------------------------------------
// double DBL_MAX 1.79769313486231570e+308
// float FLT_MAX 3.40282346638528860e+38
// long LONG_MAX 0x7fffffffffffffff
// int INT_MAX 0x7fffffff
// short SHRT_MAX 0x7fff
//----------------------------------------------------------------------
const ANNdist ANN_DIST_INF = ANN_DBL_MAX;
//----------------------------------------------------------------------
// Significant digits for tree dumps:
// When floating point coordinates are used, the routine that dumps
// a tree needs to know roughly how many significant digits there
// are in a ANNcoord, so it can output points to full precision.
// This is defined to be ANNcoordPrec. On most systems these
// values can be found in the standard include files <limits.h> or
// <float.h>. For integer types, the value is essentially ignored.
//
// ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>)
// -------- ------------ ------------------------------------
// double DBL_DIG 15
// float FLT_DIG 6
// long doesn't matter 19
// int doesn't matter 10
// short doesn't matter 5
//----------------------------------------------------------------------
#ifdef DBL_DIG // number of sig. bits in ANNcoord
const int ANNcoordPrec = DBL_DIG;
#else
const int ANNcoordPrec = 15; // default precision
#endif
//----------------------------------------------------------------------
// Self match?
// In some applications, the nearest neighbor of a point is not
// allowed to be the point itself. This occurs, for example, when
// computing all nearest neighbors in a set. By setting the
// parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
// is the closest point whose distance from the query point is
// strictly positive.
//----------------------------------------------------------------------
const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue;
//----------------------------------------------------------------------
// Norms and metrics:
// ANN supports any Minkowski norm for defining distance. In
// particular, for any p >= 1, the L_p Minkowski norm defines the
// length of a d-vector (v0, v1, ..., v(d-1)) to be
//
// (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
//
// (where ^ denotes exponentiation, and |.| denotes absolute
// value). The distance between two points is defined to be the
// norm of the vector joining them. Some common distance metrics
// include
//
// Euclidean metric p = 2
// Manhattan metric p = 1
// Max metric p = infinity
//
// In the case of the max metric, the norm is computed by taking
// the maxima of the absolute values of the components. ANN is
// highly "coordinate-based" and does not support general distances
// functions (e.g. those obeying just the triangle inequality). It
// also does not support distance functions based on
// inner-products.
//
// For the purpose of computing nearest neighbors, it is not
// necessary to compute the final power (1/p). Thus the only
// component that is used by the program is |v(i)|^p.
//
// ANN parameterizes the distance computation through the following
// macros. (Macros are used rather than procedures for
// efficiency.) Recall that the distance between two points is
// given by the length of the vector joining them, and the length
// or norm of a vector v is given by formula:
//
// |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
//
// where ROOT, POW are unary functions and # is an associative and
// commutative binary operator mapping the following types:
//
// ** POW: ANNcoord --> ANNdist
// ** #: ANNdist x ANNdist --> ANNdist
// ** ROOT: ANNdist (>0) --> double
//
// For early termination in distance calculation (partial distance
// calculation) we assume that POW and # together are monotonically
// increasing on sequences of arguments, meaning that for all
// v0..vk and y:
//
// POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
//
// Incremental Distance Calculation:
// The program uses an optimized method of computing distances for
// kd-trees and bd-trees, called incremental distance calculation.
// It is used when distances are to be updated when only a single
// coordinate of a point has been changed. In order to use this,
// we assume that there is an incremental update function DIFF(x,y)
// for #, such that if:
//
// s = x0 # ... # xi # ... # xk
//
// then if s' is equal to s but with xi replaced by y, that is,
//
// s' = x0 # ... # y # ... # xk
//
// then the length of s' can be computed by:
//
// |s'| = |s| # DIFF(xi,y).
//
// Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity
// norm we make use of the fact that in the program this function
// is only invoked when y > xi, and hence DIFF(xi,y)=y.
//
// Finally, for approximate nearest neighbor queries we assume
// that POW and ROOT are related such that
//
// v*ROOT(x) = ROOT(POW(v)*x)
//
// Here are the values for the various Minkowski norms:
//
// L_p: p even: p odd:
// ------------------------- ------------------------
// POW(v) = v^p POW(v) = |v|^p
// ROOT(x) = x^(1/p) ROOT(x) = x^(1/p)
// # = + # = +
// DIFF(x,y) = y - x DIFF(x,y) = y - x
//
// L_inf:
// POW(v) = |v|
// ROOT(x) = x
// # = max
// DIFF(x,y) = y
//
// By default the Euclidean norm is assumed. To change the norm,
// uncomment the appropriate set of macros below.
//----------------------------------------------------------------------
//----------------------------------------------------------------------
// Use the following for the Euclidean norm
//----------------------------------------------------------------------
#define ANN_POW(v) ((v)*(v))
#define ANN_ROOT(x) sqrt(x)
#define ANN_SUM(x,y) ((x) + (y))
#define ANN_DIFF(x,y) ((y) - (x))
//----------------------------------------------------------------------
// Use the following for the L_1 (Manhattan) norm
//----------------------------------------------------------------------
// #define ANN_POW(v) fabs(v)
// #define ANN_ROOT(x) (x)
// #define ANN_SUM(x,y) ((x) + (y))
// #define ANN_DIFF(x,y) ((y) - (x))
//----------------------------------------------------------------------
// Use the following for a general L_p norm
//----------------------------------------------------------------------
// #define ANN_POW(v) pow(fabs(v),p)
// #define ANN_ROOT(x) pow(fabs(x),1/p)
// #define ANN_SUM(x,y) ((x) + (y))
// #define ANN_DIFF(x,y) ((y) - (x))
//----------------------------------------------------------------------
// Use the following for the L_infinity (Max) norm
//----------------------------------------------------------------------
// #define ANN_POW(v) fabs(v)
// #define ANN_ROOT(x) (x)
// #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y))
// #define ANN_DIFF(x,y) (y)
//----------------------------------------------------------------------
// Array types
// The following array types are of basic interest. A point is
// just a dimensionless array of coordinates, a point array is a
// dimensionless array of points. A distance array is a
// dimensionless array of distances and an index array is a
// dimensionless array of point indices. The latter two are used
// when returning the results of k-nearest neighbor queries.
//----------------------------------------------------------------------
typedef ANNcoord* ANNpoint; // a point
typedef ANNpoint* ANNpointArray; // an array of points
typedef ANNdist* ANNdistArray; // an array of distances
typedef ANNidx* ANNidxArray; // an array of point indices
//----------------------------------------------------------------------
// Basic point and array utilities:
// The following procedures are useful supplements to ANN's nearest
// neighbor capabilities.
//
// annDist():
// Computes the (squared) distance between a pair of points.
// Note that this routine is not used internally by ANN for
// computing distance calculations. For reasons of efficiency
// this is done using incremental distance calculation. Thus,
// this routine cannot be modified as a method of changing the
// metric.
//
// Because points (somewhat like strings in C) are stored as
// pointers. Consequently, creating and destroying copies of
// points may require storage allocation. These procedures do
// this.
//
// annAllocPt() and annDeallocPt():
// Allocate a deallocate storage for a single point, and
// return a pointer to it. The argument to AllocPt() is
// used to initialize all components.
//
// annAllocPts() and annDeallocPts():
// Allocate and deallocate an array of points as well a
// place to store their coordinates, and initializes the
// points to point to their respective coordinates. It
// allocates point storage in a contiguous block large
// enough to store all the points. It performs no
// initialization.
//
// annCopyPt():
// Creates a copy of a given point, allocating space for
// the new point. It returns a pointer to the newly
// allocated copy.
//----------------------------------------------------------------------
DLL_API ANNdist annDist(
int dim, // dimension of space
ANNpoint p, // points
ANNpoint q);
DLL_API ANNpoint annAllocPt(
int dim, // dimension
ANNcoord c = 0); // coordinate value (all equal)
DLL_API ANNpointArray annAllocPts(
int n, // number of points
int dim); // dimension
DLL_API void annDeallocPt(
ANNpoint &p); // deallocate 1 point
DLL_API void annDeallocPts(
ANNpointArray &pa); // point array
DLL_API ANNpoint annCopyPt(
int dim, // dimension
ANNpoint source); // point to copy
//----------------------------------------------------------------------
//Overall structure: ANN supports a number of different data structures
//for approximate and exact nearest neighbor searching. These are:
//
// ANNbruteForce A simple brute-force search structure.
// ANNkd_tree A kd-tree tree search structure. ANNbd_tree
// A bd-tree tree search structure (a kd-tree with shrink
// capabilities).
//
// At a minimum, each of these data structures support k-nearest
// neighbor queries. The nearest neighbor query, annkSearch,
// returns an integer identifier and the distance to the nearest
// neighbor(s) and annRangeSearch returns the nearest points that
// lie within a given query ball.
//
// Each structure is built by invoking the appropriate constructor
// and passing it (at a minimum) the array of points, the total
// number of points and the dimension of the space. Each structure
// is also assumed to support a destructor and member functions
// that return basic information about the point set.
//
// Note that the array of points is not copied by the data
// structure (for reasons of space efficiency), and it is assumed
// to be constant throughout the lifetime of the search structure.
//
// The search algorithm, annkSearch, is given the query point (q),
// and the desired number of nearest neighbors to report (k), and
// the error bound (eps) (whose default value is 0, implying exact
// nearest neighbors). It returns two arrays which are assumed to
// contain at least k elements: one (nn_idx) contains the indices
// (within the point array) of the nearest neighbors and the other
// (dd) contains the squared distances to these nearest neighbors.
//
// The search algorithm, annkFRSearch, is a fixed-radius kNN
// search. In addition to a query point, it is given a (squared)
// radius bound. (This is done for consistency, because the search
// returns distances as squared quantities.) It does two things.
// First, it computes the k nearest neighbors within the radius
// bound, and second, it returns the total number of points lying
// within the radius bound. It is permitted to set k = 0, in which
// case it effectively answers a range counting query. If the
// error bound epsilon is positive, then the search is approximate
// in the sense that it is free to ignore any point that lies
// outside a ball of radius r/(1+epsilon), where r is the given
// (unsquared) radius bound.
//
// The generic object from which all the search structures are
// dervied is given below. It is a virtual object, and is useless
// by itself.
//----------------------------------------------------------------------
class DLL_API ANNpointSet {
public:
virtual ~ANNpointSet() {} // virtual distructor
virtual void annkSearch( // approx k near neighbor search
ANNpoint q, // query point
int k, // number of near neighbors to return
ANNidxArray nn_idx, // nearest neighbor array (modified)
ANNdistArray dd, // dist to near neighbors (modified)
double eps=0.0 // error bound
) = 0; // pure virtual (defined elsewhere)
virtual int annkFRSearch( // approx fixed-radius kNN search
ANNpoint q, // query point
ANNdist sqRad, // squared radius
int k = 0, // number of near neighbors to return
ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
ANNdistArray dd = NULL, // dist to near neighbors (modified)
double eps=0.0 // error bound
) = 0; // pure virtual (defined elsewhere)
virtual int theDim() = 0; // return dimension of space
virtual int nPoints() = 0; // return number of points
// return pointer to points
virtual ANNpointArray thePoints() = 0;
};
//----------------------------------------------------------------------
// Brute-force nearest neighbor search:
// The brute-force search structure is very simple but inefficient.
// It has been provided primarily for the sake of comparison with
// and validation of the more complex search structures.
//
// Query processing is the same as described above, but the value
// of epsilon is ignored, since all distance calculations are
// performed exactly.
//
// WARNING: This data structure is very slow, and should not be
// used unless the number of points is very small.
//
// Internal information:
// ---------------------
// This data structure bascially consists of the array of points
// (each a pointer to an array of coordinates). The search is
// performed by a simple linear scan of all the points.
//----------------------------------------------------------------------
class DLL_API ANNbruteForce: public ANNpointSet {
int dim; // dimension
int n_pts; // number of points
ANNpointArray pts; // point array
public:
ANNbruteForce( // constructor from point array
ANNpointArray pa, // point array
int n, // number of points
int dd); // dimension
~ANNbruteForce(); // destructor
void annkSearch( // approx k near neighbor search
ANNpoint q, // query point
int k, // number of near neighbors to return
ANNidxArray nn_idx, // nearest neighbor array (modified)
ANNdistArray dd, // dist to near neighbors (modified)
double eps=0.0); // error bound
int annkFRSearch( // approx fixed-radius kNN search
ANNpoint q, // query point
ANNdist sqRad, // squared radius
int k = 0, // number of near neighbors to return
ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
ANNdistArray dd = NULL, // dist to near neighbors (modified)
double eps=0.0); // error bound
int theDim() // return dimension of space
{ return dim; }
int nPoints() // return number of points
{ return n_pts; }
ANNpointArray thePoints() // return pointer to points
{ return pts; }
};
//----------------------------------------------------------------------
// kd- and bd-tree splitting and shrinking rules
// kd-trees supports a collection of different splitting rules.
// In addition to the standard kd-tree splitting rule proposed
// by Friedman, Bentley, and Finkel, we have introduced a
// number of other splitting rules, which seem to perform
// as well or better (for the distributions we have tested).
//
// The splitting methods given below allow the user to tailor
// the data structure to the particular data set. They are
// are described in greater details in the kd_split.cc source
// file. The method ANN_KD_SUGGEST is the method chosen (rather
// subjectively) by the implementors as the one giving the
// fastest performance, and is the default splitting method.
//
// As with splitting rules, there are a number of different
// shrinking rules. The shrinking rule ANN_BD_NONE does no
// shrinking (and hence produces a kd-tree tree). The rule
// ANN_BD_SUGGEST uses the implementors favorite rule.
//----------------------------------------------------------------------
enum ANNsplitRule {
ANN_KD_STD = 0, // the optimized kd-splitting rule
ANN_KD_MIDPT = 1, // midpoint split
ANN_KD_FAIR = 2, // fair split
ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method
ANN_KD_SL_FAIR = 4, // sliding fair split method
ANN_KD_SUGGEST = 5}; // the authors' suggestion for best
const int ANN_N_SPLIT_RULES = 6; // number of split rules
enum ANNshrinkRule {
ANN_BD_NONE = 0, // no shrinking at all (just kd-tree)
ANN_BD_SIMPLE = 1, // simple splitting
ANN_BD_CENTROID = 2, // centroid splitting
ANN_BD_SUGGEST = 3}; // the authors' suggested choice
const int ANN_N_SHRINK_RULES = 4; // number of shrink rules
//----------------------------------------------------------------------
// kd-tree:
// The main search data structure supported by ANN is a kd-tree.
// The main constructor is given a set of points and a choice of
// splitting method to use in building the tree.
//
// Construction:
// -------------
// The constructor is given the point array, number of points,
// dimension, bucket size (default = 1), and the splitting rule
// (default = ANN_KD_SUGGEST). The point array is not copied, and
// is assumed to be kept constant throughout the lifetime of the
// search structure. There is also a "load" constructor that
// builds a tree from a file description that was created by the
// Dump operation.
//
// Search:
// -------
// There are two search methods:
//
// Standard search (annkSearch()):
// Searches nodes in tree-traversal order, always visiting
// the closer child first.
// Priority search (annkPriSearch()):
// Searches nodes in order of increasing distance of the
// associated cell from the query point. For many
// distributions the standard search seems to work just
// fine, but priority search is safer for worst-case
// performance.
//
// Printing:
// ---------
// There are two methods provided for printing the tree. Print()
// is used to produce a "human-readable" display of the tree, with
// indenation, which is handy for debugging. Dump() produces a
// format that is suitable reading by another program. There is a
// "load" constructor, which constructs a tree which is assumed to
// have been saved by the Dump() procedure.
//
// Performance and Structure Statistics:
// -------------------------------------
// The procedure getStats() collects statistics information on the
// tree (its size, height, etc.) See ANNperf.h for information on
// the stats structure it returns.
//
// Internal information:
// ---------------------
// The data structure consists of three major chunks of storage.
// The first (implicit) storage are the points themselves (pts),
// which have been provided by the users as an argument to the
// constructor, or are allocated dynamically if the tree is built
// using the load constructor). These should not be changed during
// the lifetime of the search structure. It is the user's
// responsibility to delete these after the tree is destroyed.
//
// The second is the tree itself (which is dynamically allocated in
// the constructor) and is given as a pointer to its root node
// (root). These nodes are automatically deallocated when the tree
// is deleted. See the file src/kd_tree.h for further information
// on the structure of the tree nodes.
//
// Each leaf of the tree does not contain a pointer directly to a
// point, but rather contains a pointer to a "bucket", which is an
// array consisting of point indices. The third major chunk of
// storage is an array (pidx), which is a large array in which all
// these bucket subarrays reside. (The reason for storing them
// separately is the buckets are typically small, but of varying
// sizes. This was done to avoid fragmentation.) This array is
// also deallocated when the tree is deleted.
//
// In addition to this, the tree consists of a number of other
// pieces of information which are used in searching and for
// subsequent tree operations. These consist of the following:
//
// dim Dimension of space
// n_pts Number of points currently in the tree
// n_max Maximum number of points that are allowed
// in the tree
// bkt_size Maximum bucket size (no. of points per leaf)
// bnd_box_lo Bounding box low point
// bnd_box_hi Bounding box high point
// splitRule Splitting method used
//
//----------------------------------------------------------------------
//----------------------------------------------------------------------
// Orthogonal (axis aligned) rectangle
// Orthogonal rectangles are represented by two points, one
// for the lower left corner (min coordinates) and the other
// for the upper right corner (max coordinates).
//
// The constructor initializes from either a pair of coordinates,
// pair of points, or another rectangle. Note that all constructors
// allocate new point storage. The destructor deallocates this
// storage.
//
// BEWARE: Orthogonal rectangles should be passed ONLY BY REFERENCE.
// (C++'s default copy constructor will not allocate new point
// storage, then on return the destructor free's storage, and then
// you get into big trouble in the calling procedure.)
//----------------------------------------------------------------------
class ANNorthRect {
public:
ANNpoint lo; // rectangle lower bounds
ANNpoint hi; // rectangle upper bounds
//
ANNorthRect( // basic constructor
int dd, // dimension of space
ANNcoord l=0, // default is empty
ANNcoord h=0)
{ lo = annAllocPt(dd, l); hi = annAllocPt(dd, h); }
ANNorthRect( // (almost a) copy constructor
int dd, // dimension
const ANNorthRect &r) // rectangle to copy
{ lo = annCopyPt(dd, r.lo); hi = annCopyPt(dd, r.hi); }
ANNorthRect( // construct from points
int dd, // dimension
ANNpoint l, // low point
ANNpoint h) // hight point
{ lo = annCopyPt(dd, l); hi = annCopyPt(dd, h); }
~ANNorthRect() // destructor
{ annDeallocPt(lo); annDeallocPt(hi); }
ANNbool inside(int dim, ANNpoint p);// is point p inside rectangle?
};
void annAssignRect( // assign one rect to another
int dim, // dimension (both must be same)
ANNorthRect &dest, // destination (modified)
const ANNorthRect &source); // source
//----------------------------------------------------------------------
// Some types and objects used by kd-tree functions
// See src/kd_tree.h and src/kd_tree.cpp for definitions
//----------------------------------------------------------------------
class ANNkdStats; // stats on kd-tree
//class ANNkd_node; // generic node in a kd-tree
//----------------------------------------------------------------------
// Generic kd-tree node
//
// Nodes in kd-trees are of two types, splitting nodes which contain
// splitting information (a splitting hyperplane orthogonal to one
// of the coordinate axes) and leaf nodes which contain point
// information (an array of points stored in a bucket). This is
// handled by making a generic class kd_node, which is essentially an
// empty shell, and then deriving the leaf and splitting nodes from
// this.
//----------------------------------------------------------------------
class ANNkd_node{ // generic kd-tree node (empty shell)
public:
virtual ~ANNkd_node() {} // virtual distroyer
virtual void ann_search(ANNdist) = 0; // tree search
virtual void ann_pri_search(ANNdist) = 0; // priority search
virtual void ann_FR_search(ANNdist) = 0; // fixed-radius search
virtual void getStats( // get tree statistics
int dim, // dimension of space
ANNkdStats &st, // statistics
ANNorthRect &bnd_box) = 0; // bounding box
// print node
virtual void print(int level, std::ostream &out) = 0;
virtual void dump(std::ostream &out) = 0; // dump node
virtual bool isLeaf() {return false;} //Deyuan Qiu
friend class ANNkd_tree; // allow kd-tree to access us
};
typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node
class DLL_API ANNkd_tree: public ANNpointSet {
protected:
int dim; // dimension of space
int n_pts; // number of points in tree
int bkt_size; // bucket size
ANNpointArray pts; // the points
ANNidxArray pidx; // point indices (to pts array)
ANNkd_ptr root; // root of kd-tree
ANNpoint bnd_box_lo; // bounding box low point
ANNpoint bnd_box_hi; // bounding box high point
void SkeletonTree( // construct skeleton tree
int n, // number of points
int dd, // dimension
int bs, // bucket size
ANNpointArray pa = NULL, // point array (optional)
ANNidxArray pi = NULL); // point indices (optional)
public:
ANNkd_tree( // build skeleton tree
int n = 0, // number of points
int dd = 0, // dimension
int bs = 1); // bucket size
ANNkd_tree( // build from point array
ANNpointArray pa, // point array
int n, // number of points
int dd, // dimension
int bs = 1, // bucket size
ANNsplitRule split = ANN_KD_SUGGEST); // splitting method
ANNkd_tree( // build from dump file
std::istream& in); // input stream for dump file
~ANNkd_tree(); // tree destructor
void annkSearch( // approx k near neighbor search
ANNpoint q, // query point
int k, // number of near neighbors to return
ANNidxArray nn_idx, // nearest neighbor array (modified)
ANNdistArray dd, // dist to near neighbors (modified)
double eps=0.0); // error bound
void annkPriSearch( // priority k near neighbor search
ANNpoint q, // query point
int k, // number of near neighbors to return
ANNidxArray nn_idx, // nearest neighbor array (modified)
ANNdistArray dd, // dist to near neighbors (modified)
double eps=0.0); // error bound
int annkFRSearch( // approx fixed-radius kNN search
ANNpoint q, // the query point
ANNdist sqRad, // squared radius of query ball
int k, // number of neighbors to return
ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
ANNdistArray dd = NULL, // dist to near neighbors (modified)
double eps=0.0); // error bound
int theDim() // return dimension of space
{ return dim; }
int nPoints() // return number of points
{ return n_pts; }
ANNpointArray thePoints() // return pointer to points
{ return pts; }
virtual void Print( // print the tree (for debugging)
ANNbool with_pts, // print points as well?
std::ostream& out); // output stream
virtual void Dump( // dump entire tree
ANNbool with_pts, // print points as well?
std::ostream& out); // output stream
virtual void getStats( // compute tree statistics
ANNkdStats& st); // the statistics (modified)
ANNkd_ptr getRoot() //Deyuan Qiu
{ return root; }
};
//----------------------------------------------------------------------
// Box decomposition tree (bd-tree)
// The bd-tree is inherited from a kd-tree. The main difference
// in the bd-tree and the kd-tree is a new type of internal node
// called a shrinking node (in the kd-tree there is only one type
// of internal node, a splitting node). The shrinking node
// makes it possible to generate balanced trees in which the
// cells have bounded aspect ratio, by allowing the decomposition
// to zoom in on regions of dense point concentration. Although
// this is a nice idea in theory, few point distributions are so
// densely clustered that this is really needed.
//----------------------------------------------------------------------
class DLL_API ANNbd_tree: public ANNkd_tree {
public:
ANNbd_tree( // build skeleton tree
int n, // number of points
int dd, // dimension
int bs = 1) // bucket size
: ANNkd_tree(n, dd, bs) {} // build base kd-tree
ANNbd_tree( // build from point array
ANNpointArray pa, // point array
int n, // number of points
int dd, // dimension
int bs = 1, // bucket size
ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule
ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule
ANNbd_tree( // build from dump file
std::istream& in); // input stream for dump file
};
//----------------------------------------------------------------------
// Other functions
// annMaxPtsVisit Sets a limit on the maximum number of points
// to visit in the search.
// annClose Can be called when all use of ANN is finished.
// It clears up a minor memory leak.
//----------------------------------------------------------------------
DLL_API void annMaxPtsVisit( // max. pts to visit in search
int maxPts); // the limit
DLL_API void annClose(); // called to end use of ANN
#endif