Source code for autofaiss.external.metadata
"""
Index metadata for Faiss indices.
"""
import re
from enum import Enum
from math import ceil
from autofaiss.utils.cast import cast_bytes_to_memory_string
from autofaiss.external.descriptions import (
INDEX_DESCRIPTION_BLOCKS,
IndexBlock,
TUNABLE_PARAMETERS_DESCRIPTION_BLOCKS,
TunableParam,
)
[docs]
class IndexType(Enum):
FLAT = 0
HNSW = 1
OPQ_IVF_PQ = 2
OPQ_IVF_HNSW_PQ = 3
PAD_IVF_HNSW_PQ = 4
NOT_SUPPORTED = 5
IVF_FLAT = 6
[docs]
class IndexMetadata:
"""
Class to compute index metadata given the index_key, the number of vectors and their dimension.
Note: We don't create classes for each index type in order to keep the code simple.
"""
def __init__(self, index_key: str, nb_vectors: int, dim_vector: int, make_direct_map: bool = False):
self.index_key = index_key
self.nb_vectors = nb_vectors
self.dim_vector = dim_vector
self.fast_description = ""
self.description_blocs = []
self.tunable_params = []
self.params = {}
self.make_direct_map = make_direct_map
params = [int(x) for x in re.findall(r"\d+", index_key)]
if any(re.findall(r"OPQ\d+_\d+,IVF\d+,PQ\d+", index_key)):
self.index_type = IndexType.OPQ_IVF_PQ
self.fast_description = "An inverted file index (IVF) with quantization and OPQ preprocessing."
self.description_blocs = [IndexBlock.IVF, IndexBlock.PQ, IndexBlock.OPQ]
self.tunable_params = [TunableParam.NPROBE, TunableParam.HT]
self.params["pq"] = params[3]
self.params["nbits"] = params[4] if len(params) == 5 else 8 # default value
self.params["ncentroids"] = params[2]
self.params["out_d"] = params[1]
self.params["M_OPQ"] = params[0]
elif any(re.findall(r"OPQ\d+_\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)):
self.index_type = IndexType.OPQ_IVF_HNSW_PQ
self.fast_description = "An inverted file index (IVF) with quantization, OPQ preprocessing, and HNSW index."
self.description_blocs = [IndexBlock.IVF_HNSW, IndexBlock.HNSW, IndexBlock.PQ, IndexBlock.OPQ]
self.tunable_params = [TunableParam.NPROBE, TunableParam.EFSEARCH, TunableParam.HT]
self.params["M_HNSW"] = params[3]
self.params["pq"] = params[4]
self.params["nbits"] = params[5] if len(params) == 6 else 8 # default value
self.params["ncentroids"] = params[2]
self.params["out_d"] = params[1]
self.params["M_OPQ"] = params[0]
elif any(re.findall(r"Pad\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)):
self.index_type = IndexType.PAD_IVF_HNSW_PQ
self.fast_description = (
"An inverted file index (IVF) with quantization, a padding on input vectors, and HNSW index."
)
self.description_blocs = [IndexBlock.IVF_HNSW, IndexBlock.HNSW, IndexBlock.PQ, IndexBlock.PAD]
self.tunable_params = [TunableParam.NPROBE, TunableParam.EFSEARCH, TunableParam.HT]
self.params["out_d"] = params[0]
self.params["M_HNSW"] = params[2]
self.params["pq"] = params[3]
self.params["nbits"] = params[4] if len(params) == 5 else 8 # default value
self.params["ncentroids"] = params[1]
elif any(re.findall(r"HNSW\d+", index_key)):
self.index_type = IndexType.HNSW
self.fast_description = "An HNSW index."
self.description_blocs = [IndexBlock.HNSW]
self.tunable_params = [TunableParam.EFSEARCH]
self.params["M_HNSW"] = params[0]
elif any(re.findall(r"IVF\d+,Flat", index_key)):
self.index_type = IndexType.IVF_FLAT
self.fast_description = "An inverted file index (IVF) with no quantization"
self.description_blocs = [IndexBlock.IVF]
self.tunable_params = [TunableParam.NPROBE]
self.params["ncentroids"] = params[0]
elif index_key == "Flat":
self.index_type = IndexType.FLAT
self.fast_description = "A simple flat index."
self.description_blocs = [IndexBlock.FLAT]
self.tunable_params = []
else:
self.index_type = IndexType.NOT_SUPPORTED
self.fast_description = "No description for this index, feel free to contribute :)"
self.description_blocs = []
self.tunable_params = []
[docs]
def estimated_index_size_in_bytes(self) -> int:
"""
Compute the estimated size of the index in bytes.
"""
if self.index_type == IndexType.FLAT:
return self.nb_vectors * self.dim_vector * 4
if self.index_type == IndexType.HNSW:
# M bidirectional links per vector in the HNSW graph
hnsw_graph_in_bytes = self.nb_vectors * self.params["M_HNSW"] * 2 * 4
vectors_size_in_bytes = self.nb_vectors * self.dim_vector * 4
return vectors_size_in_bytes + hnsw_graph_in_bytes
if self.index_type in [IndexType.OPQ_IVF_PQ, IndexType.OPQ_IVF_HNSW_PQ, IndexType.PAD_IVF_HNSW_PQ]:
direct_map_overhead = 8 * self.nb_vectors if self.make_direct_map else 0
# We neglict the size of the OPQ table for the moment.
code_size = ceil(self.params["pq"] * self.params["nbits"] / 8)
# https://github.com/facebookresearch/faiss/blob/main/faiss/invlists/InvertedLists.h#L193
embedding_id_byte = 8
vector_size_byte = code_size + embedding_id_byte
vectors_size_in_bytes = self.nb_vectors * vector_size_byte
centroid_size_in_bytes = self.params["ncentroids"] * self.dim_vector * 4
total_size_in_byte = direct_map_overhead + vectors_size_in_bytes + centroid_size_in_bytes
if self.index_type in [IndexType.OPQ_IVF_HNSW_PQ, IndexType.PAD_IVF_HNSW_PQ]:
total_size_in_byte += self.params["ncentroids"] * self.params["M_HNSW"] * 2 * 4
if self.index_type in [IndexType.OPQ_IVF_PQ, IndexType.OPQ_IVF_HNSW_PQ]:
total_size_in_byte += self.params["M_OPQ"] * self.params["out_d"] * 4
return total_size_in_byte
if self.index_type == IndexType.IVF_FLAT:
direct_map_overhead = 8 * self.nb_vectors if self.make_direct_map else 0
vectors_size_in_bytes = self.nb_vectors * self.dim_vector * 4
centroid_size_in_bytes = self.params["ncentroids"] * self.dim_vector * 4
return direct_map_overhead + vectors_size_in_bytes + centroid_size_in_bytes
return -1
[docs]
def get_index_description(self, tunable_parameters_infos=False) -> str:
"""
Gives a generic description of the index.
"""
description = self.fast_description
if self.index_type == IndexType.NOT_SUPPORTED:
return description
description += "\n"
index_size_string = cast_bytes_to_memory_string(self.estimated_index_size_in_bytes())
description += f"The size of the index should be around {index_size_string}.\n\n"
description += "\n".join(INDEX_DESCRIPTION_BLOCKS[desc] for desc in self.description_blocs) + "\n\n"
if tunable_parameters_infos:
if not self.tunable_params:
description += "No parameters can be tuned to find a query speed VS recall tradeoff\n\n"
else:
description += "List of parameters that can be tuned to find a query speed VS recall tradeoff:\n"
description += (
"\n".join(TUNABLE_PARAMETERS_DESCRIPTION_BLOCKS[desc] for desc in self.tunable_params) + "\n\n"
)
description += """
For all indices except the flat index, the query speed can be adjusted.
The lower the speed limit the lower the recall. With a looser constraint
on the query time, the recall can be higher, but it is limited by the index
structure (if there is quantization for instance).
"""
return description
[docs]
def compute_memory_necessary_for_training(self, nb_training_vectors: int) -> float:
"""
Function that computes the memory necessary to train an index with nb_training_vectors vectors
"""
if self.index_type == IndexType.FLAT:
return 0
elif self.index_type == IndexType.IVF_FLAT:
return self.compute_memory_necessary_for_ivf_flat(nb_training_vectors)
elif self.index_type == IndexType.HNSW:
return 0
elif self.index_type == IndexType.OPQ_IVF_PQ:
return self.compute_memory_necessary_for_opq_ivf_pq(nb_training_vectors)
elif self.index_type == IndexType.OPQ_IVF_HNSW_PQ:
return self.compute_memory_necessary_for_opq_ivf_hnsw_pq(nb_training_vectors)
elif self.index_type == IndexType.PAD_IVF_HNSW_PQ:
return self.compute_memory_necessary_for_pad_ivf_hnsw_pq(nb_training_vectors)
else:
return 500 * 10**6
[docs]
def compute_memory_necessary_for_ivf_flat(self, nb_training_vectors: int):
"""Compute the memory estimation for index type IVF_FLAT."""
ivf_memory_in_bytes = self._get_ivf_training_memory_usage_in_bytes()
# TODO: remove x1.5 when estimation is correct
return self._get_vectors_training_memory_usage_in_bytes(nb_training_vectors) * 1.5 + ivf_memory_in_bytes
[docs]
def compute_memory_necessary_for_opq_ivf_pq(self, nb_training_vectors: int) -> float:
"""Compute the memory estimation for index type OPQ_IVF_PQ."""
# TODO: remove x1.5 when estimation is correct
return (
self._get_vectors_training_memory_usage_in_bytes(nb_training_vectors) * 1.5
+ self._get_opq_training_memory_usage_in_bytes(nb_training_vectors)
+ self._get_ivf_training_memory_usage_in_bytes()
+ self._get_pq_training_memory_usage_in_bytes()
)
[docs]
def compute_memory_necessary_for_opq_ivf_hnsw_pq(self, nb_training_vectors: int) -> float:
"""Compute the memory estimation for index type OPQ_IVF_HNSW_PQ."""
# TODO: remove x1.5 when estimation is correct
return (
self._get_vectors_training_memory_usage_in_bytes(nb_training_vectors) * 1.5
+ self._get_opq_training_memory_usage_in_bytes(nb_training_vectors)
+ self._get_ivf_training_memory_usage_in_bytes()
+ self._get_ivf_hnsw_training_memory_usage_in_bytes()
+ self._get_pq_training_memory_usage_in_bytes()
)
[docs]
def compute_memory_necessary_for_pad_ivf_hnsw_pq(self, nb_training_vectors: int):
"""Compute the memory estimation for index type PAD_IVF_HNSW_PQ."""
# TODO: remove x1.5 when estimation is correct
return (
self._get_vectors_training_memory_usage_in_bytes(nb_training_vectors) * 1.5
+ self._get_ivf_training_memory_usage_in_bytes()
+ self._get_ivf_hnsw_training_memory_usage_in_bytes()
+ self._get_pq_training_memory_usage_in_bytes()
)
def _get_vectors_training_memory_usage_in_bytes(self, nb_training_vectors: int):
"""Get vectors memory estimation in bytes."""
return 4.0 * self.dim_vector * nb_training_vectors
def _get_ivf_training_memory_usage_in_bytes(self):
"""Get IVF memory estimation in bytes."""
return 4.0 * self.params["ncentroids"] * self.dim_vector
def _get_ivf_hnsw_training_memory_usage_in_bytes(self):
"""Get HNSW followed by IVF memory estimation in bytes."""
hnsw_graph_in_bytes = self.params["ncentroids"] * self.params["M_HNSW"] * 2 * 4
vectors_size_in_bytes = self.params["ncentroids"] * self.dim_vector * 4
return vectors_size_in_bytes + hnsw_graph_in_bytes
def _get_opq_training_memory_usage_in_bytes(self, nb_training_vectors: int):
"""Get OPQ memory estimation in bytes."""
# see OPQMatrix::train on https://github.com/facebookresearch/faiss/blob/main/faiss/VectorTransform.cpp#L987
d_in, d_out, code_size = self.dim_vector, self.params["out_d"], self.params["M_OPQ"]
n = min(256 * 256, nb_training_vectors)
d = max(d_in, d_out)
d2 = d_out
xproj = d2 * n
pq_recons = d2 * n
xxr = d * n
tmp = d * d * 4
mem_code_size = code_size * n
opq_memory_in_bytes = (xproj + pq_recons + xxr + tmp) * 4.0 + mem_code_size * 1.0
return opq_memory_in_bytes
def _get_pq_training_memory_usage_in_bytes(self):
"""Get PQ memory estimation in bytes."""
return ceil(self.params["pq"] * self.params["nbits"] / 8)
[docs]
def compute_memory_necessary_for_training_wrapper(nb_training_vectors: int, index_key: str, dim_vector: int):
# nb_vectors is useless for training memory estimation, so just put -1
return IndexMetadata(index_key, -1, dim_vector).compute_memory_necessary_for_training(
nb_training_vectors=nb_training_vectors
)