Spaces:

gabrix00
/

grammarllm

Sleeping

App Files Files Community

Gabriele Tuccio commited on Jun 9, 2025

Commit

d3289a1

1 Parent(s): faf3baa

update

Browse files

Files changed (1) hide show

app.py +56 -31

app.py CHANGED Viewed

@@ -376,7 +376,7 @@ def generate_text(model, tokenizer, text, logit_processor, streamer, max_new_tok
 @spaces.GPU
-def run_grammarllm(prompt, productions_json, model_choice):
     setup_logging()
     # Parsing productions
@@ -397,9 +397,10 @@ def run_grammarllm(prompt, productions_json, model_choice):
     }
     try:
         regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
-    except re.error as e:
-        return f"Errore nella compilazione regex: {str(e)}", None
     try:
         # Selezione del modello basata sulla scelta dell'utente
@@ -407,8 +408,8 @@ def run_grammarllm(prompt, productions_json, model_choice):
             model_name = "gpt2"
         elif model_choice == "Llama 3.2 1B":
             model_name = "meta-llama/Llama-3.2-1B-Instruct"
-        elif model_choice == "Llama 3.1 8B":
-            model_name = "meta-llama/Llama-3.1-8B-Instruct"
         else:
             return f"Modello non supportato: {model_choice}", None
@@ -471,7 +472,7 @@ def run_grammarllm(prompt, productions_json, model_choice):
 default_grammars = {
-    "Default Grammar": json.dumps({
         "S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
         "A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
         "B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
@@ -481,15 +482,14 @@ default_grammars = {
         "F": ["<<excited>>"]
     }, indent=4),
-    "Other example": json.dumps({
-        'S*': ["<<(>> A B", "<<negligent>> V", '<<indifferent>>'],
-        'A': ["number", "letters", "ε"],
-        'B': ['<<)>> letters R'],
-        'R': ['C', 'D'],
-        'C': ['<<calm>>', '<<indifferent>>', '<<unemotional>>'],
-        'D': ['<<angry>>', '<<frustrated>>'],
-        'V': ["<<option>>"],
     }, indent=4),
 }
@@ -539,7 +539,7 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
         with gr.Column(scale=1):
             model_choice = gr.Dropdown(
-                choices=["GPT-2", "Llama 3.2 1B", "Llama 3.1 8B"],
                 label="Choose the model",
                 value="GPT-2",
                 interactive=True
@@ -550,7 +550,7 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
             grammar_choice = gr.Dropdown(
                 list(default_grammars.keys()),
                 label="Choose Productions (JSON)",
-                value="Default Grammar",
                 interactive=True,
                 elem_id="grammar_choice"
             )
@@ -564,8 +564,23 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
     productions_text = gr.Textbox(
         label="Productions (JSON)",
         lines=15,
-        value=default_grammars["Default Grammar"],
-        info="Type your here your grammar i json fromat"
     )
     with gr.Row():
@@ -583,15 +598,18 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
         with gr.Column():
             zip_file = gr.File(label="📦 Download ZIP (if available)")
-    # Informazioni sui modelli
-    #with gr.Accordion("ℹ️ Informations about Models", open=False):
-    #    gr.Markdown("""
-    #    - **GPT-2**: deafult model
-    #    - **Llama 3.2 1B**: Modello più recente e performante, dimensione ridotta
-    #    - **Llama 3.2 8B**: Modello più grande e capace, richiede più risorse
-    #
-    #    *Nota: I modelli Llama utilizzano Zero GPU per l'accelerazione automatica.*
-    #    """)
     # Callback: quando cambio dropdown, aggiorno productions_text
     grammar_choice.change(
@@ -610,19 +628,26 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
     # Al submit del form chiamo run_grammarllm
     submit_btn.click(
         fn=run_grammarllm,
-        inputs=[prompt_input, productions_text, model_choice],
         outputs=[output_text, zip_file],
         show_progress=True
     )
     # Funzione per pulire i campi
     def clear_fields():
-        return "", default_grammars["Default Grammar"], None, None
     clear_btn.click(
         fn=clear_fields,
-        outputs=[prompt_input, productions_text, output_text, zip_file]
     )
 if __name__ == "__main__":
     demo.launch()

 @spaces.GPU
+def run_grammarllm(prompt, productions_json, model_choice,regex_json):
     setup_logging()
     # Parsing productions
     }
     try:
+        regex_raw = json.loads(regex_json)
         regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
+    except (json.JSONDecodeError, re.error) as e:
+        return f"Errore nelle regex personalizzate: {str(e)}", None
     try:
         # Selezione del modello basata sulla scelta dell'utente
             model_name = "gpt2"
         elif model_choice == "Llama 3.2 1B":
             model_name = "meta-llama/Llama-3.2-1B-Instruct"
+        #elif model_choice == "Llama 3.1 8B":
+        #    model_name = "meta-llama/Llama-3.1-8B-Instruct"
         else:
             return f"Modello non supportato: {model_choice}", None
 default_grammars = {
+    "HC Grammar": json.dumps({
         "S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
         "A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
         "B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
         "F": ["<<excited>>"]
     }, indent=4),
+    "VR Grammar": json.dumps({
+        "S*": ["<<positive>> S*", "<<negative>> S*", "<<neutral>> S*"],
     }, indent=4),
+    "General Grammar": json.dumps({
+        'S*': ["( LETTERS )"],
+        'LETTERS': ['letters number LETTERS',"ε"]
+        }, indent=4),
 }
         with gr.Column(scale=1):
             model_choice = gr.Dropdown(
+                choices=["GPT-2", "Llama 3.2 1B"],#, "Llama 3.1 8B"],
                 label="Choose the model",
                 value="GPT-2",
                 interactive=True
             grammar_choice = gr.Dropdown(
                 list(default_grammars.keys()),
                 label="Choose Productions (JSON)",
+                value="HC Grammar",
                 interactive=True,
                 elem_id="grammar_choice"
             )
     productions_text = gr.Textbox(
         label="Productions (JSON)",
         lines=15,
+        value=default_grammars["HC Grammar"],
+        info="Type your here your grammar in json fromat"
+    )
+    regex_text = gr.Textbox(
+    label="Regex to define Terminals (JSON)",
+    lines=10,
+    value=json.dumps({
+        "regex_alfanum": "[a-zA-Z0-9]+",
+        "regex_letters": "[a-zA-Z]+",
+        "regex_number": "\\d+",
+        "regex_decimal": "\\d+([.,]\\d+)?",
+        "regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
+        "regex_)": "\\)",
+        "regex_(": "\\("
+    }, indent=4),
+    info="Modify these common regex"
     )
     with gr.Row():
         with gr.Column():
             zip_file = gr.File(label="📦 Download ZIP (if available)")
+    with gr.Accordion("ℹ️ About GrammarLLM and LLprefix", open=False):
+        gr.Markdown("""
+    ### 📚 What is GrammarLLM?
+    GrammarLLM enables structured text generation constrained by a formal grammar, using LLMs (Large Language Models) such as GPT-2 or LLaMA.
+    ### 🔍 What you can do:
+    - **Hierarchical classification**: Define class hierarchies, as shown in the "HC Grammar" example.
+    - **Vocabulary restriction**: Specify a limited set of valid words to be used. Including examples in the prompt is highly recommended to improve output quality.
+    - **Constrained generation**: Use LLprefix to define any regular or context-free grammar in JSON format.
+    📄 For more details about LLprefix and the underlying algorithms, refer to the official paper.
+        """)
     # Callback: quando cambio dropdown, aggiorno productions_text
     grammar_choice.change(
     # Al submit del form chiamo run_grammarllm
     submit_btn.click(
         fn=run_grammarllm,
+        inputs=[prompt_input, productions_text, model_choice, regex_text],
         outputs=[output_text, zip_file],
         show_progress=True
     )
     # Funzione per pulire i campi
     def clear_fields():
+        return "", default_grammars["HC"], "", None, json.dumps({
+            "regex_alfanum": "[a-zA-Z0-9]+",
+            "regex_letters": "[a-zA-Z]+",
+            "regex_number": "\\d+",
+            "regex_decimal": "\\d+([.,]\\d+)?",
+            "regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
+            "regex_)": "\\)",
+            "regex_(": "\\("
+        }, indent=4)
     clear_btn.click(
         fn=clear_fields,
+        outputs=[prompt_input, productions_text, output_text, zip_file, regex_text]
     )
 if __name__ == "__main__":
     demo.launch()