-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathHNSW.js
More file actions
executable file
·249 lines (241 loc) · 8.89 KB
/
HNSW.js
File metadata and controls
executable file
·249 lines (241 loc) · 8.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import { euclidean } from '../metrics/index';
import { Heap } from '../datastructure/index';
import { Randomizer } from '../util/index';
/**
* @class
* @alias HNSW
*/
export class HNSW {
/**
* Hierarchical navigable small world graph. Efficient and robust approximate nearest neighbor search.
* @constructor
* @memberof module:knn
* @alias HNSW
* @param {Function} [metric = euclidean] - metric to use: (a, b) => distance.
* @param {Boolean} [heuristic = true] - use heuristics or naive selection.
* @param {Number} [m = 5] - max number of connections.
* @param {Number} [ef = 200] - size of candidate list.
* @param {Number} [m0 = 2 * m] - max number of connections for ground layer.
* @param {Number} [mL = 1 / Math.log2(m)] - normalization factor for level generation.
* @param {Number} [seed = 1987] - seed for random number generator.
* @see {@link https://arxiv.org/abs/1603.09320}
* @see {@link https://arxiv.org/pdf/1904.02077}
*/
constructor(metric = euclidean, heuristic = true, m = 5, ef = 200, m0 = null, mL = null, seed = 1987) {
this._metric = metric;
this._select = heuristic ? this._select_heuristic : this._select_simple;
this._m = m;
this._ef = ef;
this._m0 = m0 || 2 * m;
this._graph = new Map();
this._ep = null;
this._L = null;
this._mL = mL || 1 / Math.log2(m);
this._randomizer = new Randomizer(seed);
}
addOne(element) {
this.add([element])
}
/**
*
* @param {Array<*>} elements - new elements.
* @returns {HNSW}
*/
add(elements) {
const m = this._m;
const ef = this._ef;
const m0 = this._m0;
const mL = this._mL;
const randomizer = this._randomizer;
let graph = this._graph;
for (const element of elements) {
let ep = this._ep ? this._ep.slice() : null;
let W = [];
const L = this._L;
const rand = Math.min(randomizer.random + 1e-8, 1);
let l = Math.floor(-Math.log(rand * mL))
const min_L_l = Math.min(L, l);
if (L) {
for (let l_c = graph.size - 1; l_c > min_L_l; --l_c) {
ep = this._search_layer(element, ep, 1, l_c);
}
for (let l_c = min_L_l; l_c >= 0; --l_c) {
const layer_c = graph.get(l_c);//[l_c];
layer_c.points.push(element)
W = this._search_layer(element, ep, ef, l_c);
const neighbors = l_c > 3 ? this._select(element, W, m, l_c) : this._select_simple(element, W, m);
for (const p of neighbors) {
if (p !== element) {
const edges_p = layer_c.edges.get(p);
if (!edges_p) {
layer_c.edges.set(p, [element]) ;
}else {
edges_p.push(element);
}
const edges_e = layer_c.edges.get(element);
if (!edges_e) {
layer_c.edges.set(element, [p]) ;
} else {
edges_e.push(p);
}
}
}
const max = (l_c === 0 ? m0 : m);
for (const e of neighbors) {
const e_conn = layer_c.edges.get(e);
if (e_conn.length > max) {
const neighborhood = this._select(e, e_conn, max, l_c);
layer_c.edges.delete(e);
layer_c.edges.set(e, neighborhood);
}
}
ep = W;
}
}
let N = graph.size;
if (N < l || l > L) {
for (let i = N; i <= l; ++i) {
graph.set(i, {
"l_c": i,
"points": [element],
"edges": new Map(),
});
}
this._ep = [element];
this._L = l;
}
}
return this;
}
/**
* @private
* @param {*} q - base element.
* @param {Array} candidates - candidate elements.
* @param {Number} M - number of neighbors to return.
* @param {Number} l_c - layer number.
* @param {Boolean} [extend_candidates = true] - flag indicating wheter or not to extend candidate list.
* @param {Boolean} [keep_pruned_connections = true] - flag indicating wheter or not to add discarded elements.
* @returns M elements selected by the heuristic.
*/
_select_heuristic(q, candidates, M, l_c, extend_candidates = true, keep_pruned_connections = true) {
if (l_c > this._graph.size - 1) return candidates
const metric = this._metric;
const randomizer = this._randomizer;
const layer = this._graph.get(l_c);
let R = [];
let W_set = new Set(candidates);
if (extend_candidates) {
for (const c of candidates) {
const edges = layer.edges.get(c);
if (!edges) break;
for (const c_adj of edges) {
W_set.add(c_adj)
}
}
}
let W = new Heap(W_set, d => metric(d, q), "min")
let W_d = new Heap(null, d => metric(d, q), "min");
while (!W.empty && R.length < M) {
let e = W.pop()
let random_r = randomizer.random_int % R.length;
if (R.length === 0 || e.value < metric(R[random_r], q)) {
R.push(e.element);
} else {
W_d.push(e.element)
}
}
if (keep_pruned_connections) {
while (!W_d.empty && R.length < M) {
R.push(W_d.pop().element)
}
}
return R
}
/**
* @private
* @param {*} q - base element.
* @param {Array} C - candidate elements.
* @param {Number} M - number of neighbors to return.
* @returns {Array} M nearest elements from C to q.
*/
_select_simple(q, C, M) {
const metric = this._metric;
let res = C.sort((a,b) => metric(a, q) - metric(b, q)).slice(0,M);
return res
}
/**
* @private
* @param {*} q - query element.
* @param {Array} ep - enter points.
* @param {Number} ef - number of nearest to {@link q} elements to return.
* @param {Number} l_c - layer number.
* @returns {Array} ef closest neighbors to q.
*/
_search_layer(q, ep, ef, l_c) {
const metric = this._metric;
const layer = this._graph.get(l_c)//.find(l => l.l_c === l_c);//[l_c];
if (layer.edges.size === 0) return ep;
let v = new Set(ep);
let C = new Heap(v, d => metric(d, q), "min");
let W = new Heap(v, d => metric(d, q), "max");
while (!C.empty) {
const c = C.pop();
let f = W.first;
if (c.value > f.value) {
break;
}
const edges = layer.edges.get(c.element);
if (!edges) break;
for (const e of edges) {
if (!v.has(e)) {
v.add(e);
f = W.first;
if (metric(e, q) < metric(f.element, q) || W.length < ef) {
C.push(e);
W.push(e);
if (W.length > ef) {
W.pop();
}
}
}
}
}
return W.data();
}
/**
*
* @param {*} q - query element.
* @param {*} K - number of nearest neighbors to return.
* @param {*} ef - size of the dynamic cnadidate list.
* @returns {Array} K nearest elements to q.
*/
search(q, K, ef = 1) {
let ep = this._ep.slice();
let L = this._L;
for (let l_c = L; l_c > 0; --l_c) {
ep = this._search_layer(q, ep, ef, l_c);
}
ep = this._search_layer(q, ep, K, 0);
return ep;
}
/**
* Iterator for searching the HNSW graphs
* @param {*} q - query element.
* @param {*} K - number of nearest neighbors to return.
* @param {*} ef - size of the dynamic cnadidate list.
* @yields {Array} K nearest elements to q.
*/
* search_iter(q, K, ef = 1) {
let ep = this._ep.slice();
let L = this._L;
yield{"l_c": L, "ep": [q]}
for (let l_c = L; l_c > 0; --l_c) {
yield {"l_c": l_c, "ep": ep}
ep = this._search_layer(q, ep, ef, l_c);
yield {"l_c": l_c, "ep": ep}
}
yield {"l_c": 0, "ep": ep}
ep = this._search_layer(q, ep, K, 0);
yield {"l_c": 0, "ep": ep}
}
}