-
Notifications
You must be signed in to change notification settings - Fork 20
Open
Description
I'm finding that adding and searching an objectbox database is really fast. However, the remove operation is really slow (1 second per object.) The database is on a local NVME SSD drive. It contains about 20,000 hashes and takes about 6GB.
My find_unique hash_box.query operation is fast - it's literally the call to hash_box.remove that takes the time.
What am I doing wrong?
@Entity()
class ImHash:
id = Id
key = String(index=Index(IndexType.HASH), unique=True)
cos_value = Float32Vector(index=HnswIndex(
dimensions=62720,
distance_type=VectorDistanceType.COSINE,
))
def hash_image(im: Image.Image) -> list[float]:
vector = img2vec.get_vec(im, tensor=True)
return vector.detach().cpu().numpy().flatten()
def hash_and_store(name_or_fp, key: str):
im = Image.open(name_or_fp)
h = hash_image(im)
ih = find_unique(key)
if ih is None:
# create
ih = ImHash()
ih.key = key
ih.cos_value = h
with store_lock:
hash_box.put(ih)
def init(db_dir: pathlib.Path):
global store, hash_box, img2vec
store = Store(directory=str(db_dir / directory_name),
model_json_file=str(db_dir / json_model_name),
max_db_size_in_kb=10 * 1024 * 1024)
hash_box = store.box(ImHash)
img2vec = Img2Vec(cuda=False, model='efficientnet_b0')
def close():
store.close()
def find_unique(key: str):
with store_lock:
query = hash_box.query(ImHash.key.equals(key)).build()
result = query.find()
if len(result) == 0:
return None
elif len(result) > 1:
print('Multiple matches found')
return None
else:
return result[0]
def find_similar(key: str) -> list[tuple[ImHash, float]]:
target = find_unique(key)
with store_lock:
query = hash_box.query(ImHash.cos_value.nearest_neighbor(target.cos_value, 8)).build()
results = query.find_with_scores()
results.sort(key=lambda x: x[1])
return results
def remove(key: str):
target = find_unique(key)
if target is not None:
with store_lock:
hash_box.remove(target)
def remove_many(keys: list[str]):
with store.write_tx():
for k in keys:
i = find_unique(k)
if i is None:
print('Hash key "%s" was already gone' % k)
else:
with store_lock:
hash_box.remove(i.id)Metadata
Metadata
Assignees
Labels
No labels