Feature Extraction
Transformers
Safetensors
sentence-transformers
multilingual
jina_embeddings_v5_omni
image-feature-extraction
embedding
qwen3
jina-embeddings-v5
multimodal
vision
audio
vllm
video
audio-feature-extraction
video-feature-extraction
sentence-similarity
custom_code
🇪🇺 Region: EU
Instructions to use jinaai/jina-embeddings-v5-omni-small with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use jinaai/jina-embeddings-v5-omni-small with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="jinaai/jina-embeddings-v5-omni-small", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("jinaai/jina-embeddings-v5-omni-small", trust_remote_code=True, dtype="auto") - sentence-transformers
How to use jinaai/jina-embeddings-v5-omni-small with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("jinaai/jina-embeddings-v5-omni-small", trust_remote_code=True) sentences = [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [3, 3] - Notebooks
- Google Colab
- Kaggle
Why does the model take more than 8 minutes to load?
#2
by tepirale - opened
Why does the model take more than 8 minutes to load?
!pip install --force-reinstall --no-deps transformers==5.5.0 "tokenizers>=0.22.0,<=0.23.0"
# At the BEGINNING of the file, before any import of transformers/sentence_transformers
import os
os.environ["HF_HUB_OFFLINE"] = "1" # After the first download: removes network checks to the Hub
os.environ["TRANSFORMERS_OFFLINE"] = "1"
os.environ["HF_HUB_ETAG_TIMEOUT"] = "2" # Metadata check: 2s max
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "5"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
JINA_ID = "jinaai/jina-embeddings-v5-omni-small"
JINA_DEVICE = "cuda:0"
JINA_MODALITY = "vision"
- code: 1
# Placeholder de imagen que exige el doc para la torre de visión
JINA_IMG_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>"
class MultimodalEmbedder:
def __init__(self):
from transformers import AutoModel, AutoProcessor
print(f"[load] embebedor multimodal: {JINA_ID} (modality={JINA_MODALITY})")
self.model = AutoModel.from_pretrained(
JINA_ID,
trust_remote_code=True,
default_task="retrieval", # antes iba en model_kwargs; aquí va directo
modality=JINA_MODALITY, # "vision" -> carga visión + texto, omite audio
dtype=torch.bfloat16, # opcional; el doc dice que no es obligatorio
).to(JINA_DEVICE).eval() #enable or disable
self.proc = AutoProcessor.from_pretrained(JINA_ID, trust_remote_code=True)
@torch .inference_mode()
def _embed(self, **proc_kwargs):
inputs = self.proc(return_tensors="pt", **proc_kwargs).to(self.model.device)
vec = self.model.embed(**inputs) # ya viene L2-normalizado
return _l2(vec[0].float().cpu().numpy()) # (dim,) en float32
def embed_query_text(self, text):
return self._embed(text="Query: " + (text or ""))
def embed_doc_text(self, text):
return self._embed(text="Document: " + (text or ""))
def embed_doc_image(self, pil_image):
return self._embed(images=pil_image, text="Document: " + JINA_IMG_PLACEHOLDER)
- code: 2
class MultimodalEmbedder:
def __init__(self):
from sentence_transformers import SentenceTransformer
print(f"[load] embebedor multimodal: {JINA_ID} (modality={JINA_MODALITY})")
self.m = SentenceTransformer(
JINA_ID,
trust_remote_code=True,
device=JINA_DEVICE,
model_kwargs={"default_task": "retrieval", "modality": JINA_MODALITY},
# max_memory={0: GPU_MTP, "cpu": CPU_OFFLOAD},
)
def _enc(self, fn, x):
return _l2(fn(x))
def embed_query_text(self, text):
return self._enc(self.m.encode_query, text)
def embed_doc_text(self, text):
return self._enc(self.m.encode_document, text)
def embed_doc_image(self, pil_image):
return self._enc(self.m.encode_document, pil_image)
thank you so much
every time I load the model (without downloading)
Ok thanks, seems like an environment/hardware-specific issue.. Are you using Colab or Kaggle? Have you checked if loading is slow for other models too?