Spaces:
Sleeping
Sleeping
Gabriele Tuccio commited on
Commit ·
d3289a1
1
Parent(s): faf3baa
update
Browse files
app.py
CHANGED
|
@@ -376,7 +376,7 @@ def generate_text(model, tokenizer, text, logit_processor, streamer, max_new_tok
|
|
| 376 |
|
| 377 |
|
| 378 |
@spaces.GPU
|
| 379 |
-
def run_grammarllm(prompt, productions_json, model_choice):
|
| 380 |
setup_logging()
|
| 381 |
|
| 382 |
# Parsing productions
|
|
@@ -397,9 +397,10 @@ def run_grammarllm(prompt, productions_json, model_choice):
|
|
| 397 |
}
|
| 398 |
|
| 399 |
try:
|
|
|
|
| 400 |
regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
|
| 401 |
-
except re.error as e:
|
| 402 |
-
return f"Errore
|
| 403 |
|
| 404 |
try:
|
| 405 |
# Selezione del modello basata sulla scelta dell'utente
|
|
@@ -407,8 +408,8 @@ def run_grammarllm(prompt, productions_json, model_choice):
|
|
| 407 |
model_name = "gpt2"
|
| 408 |
elif model_choice == "Llama 3.2 1B":
|
| 409 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
| 410 |
-
elif model_choice == "Llama 3.1 8B":
|
| 411 |
-
|
| 412 |
else:
|
| 413 |
return f"Modello non supportato: {model_choice}", None
|
| 414 |
|
|
@@ -471,7 +472,7 @@ def run_grammarllm(prompt, productions_json, model_choice):
|
|
| 471 |
|
| 472 |
|
| 473 |
default_grammars = {
|
| 474 |
-
"
|
| 475 |
"S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
|
| 476 |
"A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
|
| 477 |
"B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
|
|
@@ -481,15 +482,14 @@ default_grammars = {
|
|
| 481 |
"F": ["<<excited>>"]
|
| 482 |
}, indent=4),
|
| 483 |
|
| 484 |
-
"
|
| 485 |
-
|
| 486 |
-
'A': ["number", "letters", "ε"],
|
| 487 |
-
'B': ['<<)>> letters R'],
|
| 488 |
-
'R': ['C', 'D'],
|
| 489 |
-
'C': ['<<calm>>', '<<indifferent>>', '<<unemotional>>'],
|
| 490 |
-
'D': ['<<angry>>', '<<frustrated>>'],
|
| 491 |
-
'V': ["<<option>>"],
|
| 492 |
}, indent=4),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
}
|
| 494 |
|
| 495 |
|
|
@@ -539,7 +539,7 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
|
|
| 539 |
|
| 540 |
with gr.Column(scale=1):
|
| 541 |
model_choice = gr.Dropdown(
|
| 542 |
-
choices=["GPT-2", "Llama 3.2 1B", "Llama 3.1 8B"],
|
| 543 |
label="Choose the model",
|
| 544 |
value="GPT-2",
|
| 545 |
interactive=True
|
|
@@ -550,7 +550,7 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
|
|
| 550 |
grammar_choice = gr.Dropdown(
|
| 551 |
list(default_grammars.keys()),
|
| 552 |
label="Choose Productions (JSON)",
|
| 553 |
-
value="
|
| 554 |
interactive=True,
|
| 555 |
elem_id="grammar_choice"
|
| 556 |
)
|
|
@@ -564,8 +564,23 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
|
|
| 564 |
productions_text = gr.Textbox(
|
| 565 |
label="Productions (JSON)",
|
| 566 |
lines=15,
|
| 567 |
-
value=default_grammars["
|
| 568 |
-
info="Type your here your grammar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 569 |
)
|
| 570 |
|
| 571 |
with gr.Row():
|
|
@@ -583,15 +598,18 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
|
|
| 583 |
with gr.Column():
|
| 584 |
zip_file = gr.File(label="📦 Download ZIP (if available)")
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
#
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
#
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
|
|
|
|
|
|
|
|
|
| 595 |
|
| 596 |
# Callback: quando cambio dropdown, aggiorno productions_text
|
| 597 |
grammar_choice.change(
|
|
@@ -610,19 +628,26 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
|
|
| 610 |
# Al submit del form chiamo run_grammarllm
|
| 611 |
submit_btn.click(
|
| 612 |
fn=run_grammarllm,
|
| 613 |
-
inputs=[prompt_input, productions_text, model_choice],
|
| 614 |
outputs=[output_text, zip_file],
|
| 615 |
show_progress=True
|
| 616 |
)
|
| 617 |
|
| 618 |
# Funzione per pulire i campi
|
| 619 |
def clear_fields():
|
| 620 |
-
return "", default_grammars["
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
|
| 622 |
clear_btn.click(
|
| 623 |
fn=clear_fields,
|
| 624 |
-
outputs=[prompt_input, productions_text, output_text, zip_file]
|
| 625 |
)
|
| 626 |
-
|
| 627 |
if __name__ == "__main__":
|
| 628 |
demo.launch()
|
|
|
|
| 376 |
|
| 377 |
|
| 378 |
@spaces.GPU
|
| 379 |
+
def run_grammarllm(prompt, productions_json, model_choice,regex_json):
|
| 380 |
setup_logging()
|
| 381 |
|
| 382 |
# Parsing productions
|
|
|
|
| 397 |
}
|
| 398 |
|
| 399 |
try:
|
| 400 |
+
regex_raw = json.loads(regex_json)
|
| 401 |
regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
|
| 402 |
+
except (json.JSONDecodeError, re.error) as e:
|
| 403 |
+
return f"Errore nelle regex personalizzate: {str(e)}", None
|
| 404 |
|
| 405 |
try:
|
| 406 |
# Selezione del modello basata sulla scelta dell'utente
|
|
|
|
| 408 |
model_name = "gpt2"
|
| 409 |
elif model_choice == "Llama 3.2 1B":
|
| 410 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
| 411 |
+
#elif model_choice == "Llama 3.1 8B":
|
| 412 |
+
# model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 413 |
else:
|
| 414 |
return f"Modello non supportato: {model_choice}", None
|
| 415 |
|
|
|
|
| 472 |
|
| 473 |
|
| 474 |
default_grammars = {
|
| 475 |
+
"HC Grammar": json.dumps({
|
| 476 |
"S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
|
| 477 |
"A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
|
| 478 |
"B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
|
|
|
|
| 482 |
"F": ["<<excited>>"]
|
| 483 |
}, indent=4),
|
| 484 |
|
| 485 |
+
"VR Grammar": json.dumps({
|
| 486 |
+
"S*": ["<<positive>> S*", "<<negative>> S*", "<<neutral>> S*"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
}, indent=4),
|
| 488 |
+
|
| 489 |
+
"General Grammar": json.dumps({
|
| 490 |
+
'S*': ["( LETTERS )"],
|
| 491 |
+
'LETTERS': ['letters number LETTERS',"ε"]
|
| 492 |
+
}, indent=4),
|
| 493 |
}
|
| 494 |
|
| 495 |
|
|
|
|
| 539 |
|
| 540 |
with gr.Column(scale=1):
|
| 541 |
model_choice = gr.Dropdown(
|
| 542 |
+
choices=["GPT-2", "Llama 3.2 1B"],#, "Llama 3.1 8B"],
|
| 543 |
label="Choose the model",
|
| 544 |
value="GPT-2",
|
| 545 |
interactive=True
|
|
|
|
| 550 |
grammar_choice = gr.Dropdown(
|
| 551 |
list(default_grammars.keys()),
|
| 552 |
label="Choose Productions (JSON)",
|
| 553 |
+
value="HC Grammar",
|
| 554 |
interactive=True,
|
| 555 |
elem_id="grammar_choice"
|
| 556 |
)
|
|
|
|
| 564 |
productions_text = gr.Textbox(
|
| 565 |
label="Productions (JSON)",
|
| 566 |
lines=15,
|
| 567 |
+
value=default_grammars["HC Grammar"],
|
| 568 |
+
info="Type your here your grammar in json fromat"
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
regex_text = gr.Textbox(
|
| 572 |
+
label="Regex to define Terminals (JSON)",
|
| 573 |
+
lines=10,
|
| 574 |
+
value=json.dumps({
|
| 575 |
+
"regex_alfanum": "[a-zA-Z0-9]+",
|
| 576 |
+
"regex_letters": "[a-zA-Z]+",
|
| 577 |
+
"regex_number": "\\d+",
|
| 578 |
+
"regex_decimal": "\\d+([.,]\\d+)?",
|
| 579 |
+
"regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
|
| 580 |
+
"regex_)": "\\)",
|
| 581 |
+
"regex_(": "\\("
|
| 582 |
+
}, indent=4),
|
| 583 |
+
info="Modify these common regex"
|
| 584 |
)
|
| 585 |
|
| 586 |
with gr.Row():
|
|
|
|
| 598 |
with gr.Column():
|
| 599 |
zip_file = gr.File(label="📦 Download ZIP (if available)")
|
| 600 |
|
| 601 |
+
with gr.Accordion("ℹ️ About GrammarLLM and LLprefix", open=False):
|
| 602 |
+
gr.Markdown("""
|
| 603 |
+
### 📚 What is GrammarLLM?
|
| 604 |
+
GrammarLLM enables structured text generation constrained by a formal grammar, using LLMs (Large Language Models) such as GPT-2 or LLaMA.
|
| 605 |
+
|
| 606 |
+
### 🔍 What you can do:
|
| 607 |
+
- **Hierarchical classification**: Define class hierarchies, as shown in the "HC Grammar" example.
|
| 608 |
+
- **Vocabulary restriction**: Specify a limited set of valid words to be used. Including examples in the prompt is highly recommended to improve output quality.
|
| 609 |
+
- **Constrained generation**: Use LLprefix to define any regular or context-free grammar in JSON format.
|
| 610 |
+
|
| 611 |
+
📄 For more details about LLprefix and the underlying algorithms, refer to the official paper.
|
| 612 |
+
""")
|
| 613 |
|
| 614 |
# Callback: quando cambio dropdown, aggiorno productions_text
|
| 615 |
grammar_choice.change(
|
|
|
|
| 628 |
# Al submit del form chiamo run_grammarllm
|
| 629 |
submit_btn.click(
|
| 630 |
fn=run_grammarllm,
|
| 631 |
+
inputs=[prompt_input, productions_text, model_choice, regex_text],
|
| 632 |
outputs=[output_text, zip_file],
|
| 633 |
show_progress=True
|
| 634 |
)
|
| 635 |
|
| 636 |
# Funzione per pulire i campi
|
| 637 |
def clear_fields():
|
| 638 |
+
return "", default_grammars["HC"], "", None, json.dumps({
|
| 639 |
+
"regex_alfanum": "[a-zA-Z0-9]+",
|
| 640 |
+
"regex_letters": "[a-zA-Z]+",
|
| 641 |
+
"regex_number": "\\d+",
|
| 642 |
+
"regex_decimal": "\\d+([.,]\\d+)?",
|
| 643 |
+
"regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
|
| 644 |
+
"regex_)": "\\)",
|
| 645 |
+
"regex_(": "\\("
|
| 646 |
+
}, indent=4)
|
| 647 |
|
| 648 |
clear_btn.click(
|
| 649 |
fn=clear_fields,
|
| 650 |
+
outputs=[prompt_input, productions_text, output_text, zip_file, regex_text]
|
| 651 |
)
|
|
|
|
| 652 |
if __name__ == "__main__":
|
| 653 |
demo.launch()
|