Spaces:

AndaiMD
/

brainbench

Sleeping

AndaiMD commited on Jun 19, 2025

Commit

f04940c

1 Parent(s): d1e903b

predict

Files changed (1) hide show

app/main.py CHANGED Viewed

@@ -9,29 +9,34 @@ model, tokenizer = load_model()
 @app.post("/predict")
 async def predict(request: Request):
     data = await request.json()
-    input_text = data.get("input", "")
-    # Extract last 5 words
-    last_5_words = " ".join(input_text.strip().split()[-5:])
-    # Tokenize and generate continuation
-    inputs = tokenizer(last_5_words, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=20,
             do_sample=True,
-            temperature=0.8,
             top_k=50,
-            top_p=0.95,
-            pad_token_id=tokenizer.eos_token_id
         )
-    # Decode generated text
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove the prompt portion to isolate generated words
-    continuation = generated_text[len(last_5_words):].strip()
     return JSONResponse(content={"output": continuation})

 @app.post("/predict")
 async def predict(request: Request):
     data = await request.json()
+    raw_abstract = data.get("input", "")
+    # Get the last sentence (or few words) of the abstract
+    import re
+    sentences = re.split(r'(?<=[.!?]) +', raw_abstract.strip())
+    abstract_tail = sentences[-1] if len(sentences) > 1 else raw_abstract
+    # Construct the prompt
+    prompt = (
+        f"This neuroscience abstract ends as follows:\n"
+        f"\"{abstract_tail}\"\n\n"
+        f"Complete the next sentence logically:"
+    )
+    # Tokenize and generate
+    inputs = tokenizer(prompt, return_tensors="pt")
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=20,
             do_sample=True,
+            temperature=0.7,
             top_k=50,
+            top_p=0.95
         )
+    # Decode and trim
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    continuation = response[len(prompt):].strip()
     return JSONResponse(content={"output": continuation})