RexReranker
Collection
6 items
•
Updated
A distributional e-commerce neural reranker based on RexBERT-base that predicts relevance scores as a probability distribution, providing both accurate relevance predictions and uncertainty estimates.
pip install transformers sentence-transformers torch
from transformers import AutoModel, AutoTokenizer
import torch
# Load model and tokenizer
model = AutoModel.from_pretrained(
"thebajajra/RexReranker-base",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("thebajajra/RexReranker-base")
# Move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()
# Prepare input (query-document pair)
query = "best laptop for programming"
title = "MacBook Pro M3"
description = "Powerful laptop with M3 chip, 16GB RAM, perfect for developers and creative professionals"
inputs = tokenizer(
f"Query: {query}",
f"Title: {title}\nDescription: {description}",
return_tensors="pt",
truncation=True,
max_length=2048,
).to(device)
# Get relevance score
with torch.no_grad():
score = model.predict_relevance(**inputs)
print(f"Relevance Score: {score.item():.4f}")
from sentence_transformers import CrossEncoder
# Load as CrossEncoder
model = CrossEncoder(
"thebajajra/RexReranker-base",
trust_remote_code=True
)
# Single prediction
query = "best laptop for programming"
document = "MacBook Pro M3 - Powerful laptop with M3 chip for developers"
score = model.predict([(query, document)])[0]
print(f"Score: {score:.4f}")
from sentence_transformers import CrossEncoder
model = CrossEncoder("thebajajra/RexReranker-base", trust_remote_code=True)
query = "best laptop for programming"
documents = [
"MacBook Pro M3 - Powerful laptop with M3 chip for developers",
"Gaming Mouse RGB - High precision gaming mouse with 16000 DPI",
"ThinkPad X1 Carbon - Business ultrabook with long battery life",
"Mechanical Keyboard - Cherry MX switches for typing comfort",
"Dell XPS 15 - Premium laptop with 4K OLED display",
]
# Get scores for all documents
pairs = [(query, doc) for doc in documents]
scores = model.predict(pairs)
# Print ranked results
print(f"Query: {query}\n")
for doc, score in sorted(zip(documents, scores), key=lambda x: x[1], reverse=True):
print(f" {score:.4f} | {doc[:60]}")
from sentence_transformers import CrossEncoder
model = CrossEncoder("thebajajra/RexReranker-base", trust_remote_code=True)
query = "wireless headphones with noise cancellation"
documents = [
"Sony WH-1000XM5 - Industry-leading noise cancellation headphones",
"Apple AirPods Max - Premium over-ear headphones with spatial audio",
"Bose QuietComfort 45 - Comfortable wireless noise cancelling headphones",
"JBL Tune 750BTNC - Affordable wireless headphones with ANC",
"Logitech Gaming Headset - Wired gaming headphones with microphone",
]
# Rank documents
results = model.rank(query, documents, top_k=3)
print(f"Query: {query}\n")
print("Top 3 Results:")
for result in results:
idx = result['corpus_id']
score = result['score']
print(f" {score:.4f} | {documents[idx][:60]}")
from transformers import AutoModel, AutoTokenizer
import torch
model = AutoModel.from_pretrained("thebajajra/RexReranker-base", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("thebajajra/RexReranker-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device).eval()
# Prepare inputs
inputs = tokenizer(
"Query: best laptop for programming",
"Title: MacBook Pro\nDescription: Great laptop for developers",
return_tensors="pt",
truncation=True,
).to(device)
# Get prediction with uncertainty
with torch.no_grad():
result = model.predict_with_uncertainty(**inputs)
print(f"Relevance: {result['relevance'].item():.4f}")
print(f"Variance: {result['variance'].item():.6f}") # Higher = more uncertain
print(f"Entropy: {result['entropy'].item():.4f}") # Higher = more uncertain
# Access full probability distribution
print(f"\nDistribution over bins:")
probs = result['probs'][0].cpu().numpy()
for i, p in enumerate(probs):
bin_center = i / (len(probs) - 1)
bar = "â–ˆ" * int(p * 50)
print(f" {bin_center:.1f}: {bar} ({p:.3f})")
from transformers import AutoModel, AutoTokenizer
import torch
from torch.utils.data import DataLoader
model = AutoModel.from_pretrained("thebajajra/RexReranker-base", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("thebajajra/RexReranker-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device).eval()
def rerank_batch(query: str, documents: list, batch_size: int = 32) -> list:
"""Rerank documents for a query with batched inference."""
# Prepare all inputs
all_inputs = []
for doc in documents:
title = doc.get("title", "")
description = doc.get("description", "")
inputs = tokenizer(
f"Query: {query}",
f"Title: {title}\nDescription: {description}",
truncation=True,
max_length=2048,
padding=False,
)
all_inputs.append(inputs)
# Batch inference
all_scores = []
for i in range(0, len(all_inputs), batch_size):
batch = all_inputs[i:i + batch_size]
padded = tokenizer.pad(batch, return_tensors="pt").to(device)
with torch.no_grad():
scores = model.predict_relevance(**padded)
all_scores.extend(scores.cpu().tolist())
# Add scores to documents and sort
for doc, score in zip(documents, all_scores):
doc["score"] = score
return sorted(documents, key=lambda x: x["score"], reverse=True)
# Example usage
query = "best laptop for programming"
documents = [
{"title": "MacBook Pro M3", "description": "Powerful laptop for developers"},
{"title": "Gaming Mouse", "description": "High DPI gaming mouse"},
{"title": "ThinkPad X1", "description": "Business laptop with long battery"},
]
ranked = rerank_batch(query, documents)
for doc in ranked:
print(f"{doc['score']:.4f} | {doc['title']}")
The model expects query-document pairs formatted as:
| Field | Format |
|---|---|
| Text A (Query) | Query: {your search query} |
| Text B (Document) | Title: {document title}\nDescription: {document description} |
outputs.logits: Shape [B, 1] - Single relevance score per exampleoutputs.relevance: Shape [B] - Same as logits squeezedoutput_distribution=True or predict_with_uncertainty())
relevance: Expected relevance score [0, 1]variance: Prediction variance (higher = less confident)entropy: Distribution entropy (higher = less confident)probs: Full probability distribution over binsdistribution_logits: Raw logits before softmaxBase model
thebajajra/RexBERT-base