Gabriele Tuccio commited on
Commit
d3289a1
·
1 Parent(s): faf3baa
Files changed (1) hide show
  1. app.py +56 -31
app.py CHANGED
@@ -376,7 +376,7 @@ def generate_text(model, tokenizer, text, logit_processor, streamer, max_new_tok
376
 
377
 
378
  @spaces.GPU
379
- def run_grammarllm(prompt, productions_json, model_choice):
380
  setup_logging()
381
 
382
  # Parsing productions
@@ -397,9 +397,10 @@ def run_grammarllm(prompt, productions_json, model_choice):
397
  }
398
 
399
  try:
 
400
  regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
401
- except re.error as e:
402
- return f"Errore nella compilazione regex: {str(e)}", None
403
 
404
  try:
405
  # Selezione del modello basata sulla scelta dell'utente
@@ -407,8 +408,8 @@ def run_grammarllm(prompt, productions_json, model_choice):
407
  model_name = "gpt2"
408
  elif model_choice == "Llama 3.2 1B":
409
  model_name = "meta-llama/Llama-3.2-1B-Instruct"
410
- elif model_choice == "Llama 3.1 8B":
411
- model_name = "meta-llama/Llama-3.1-8B-Instruct"
412
  else:
413
  return f"Modello non supportato: {model_choice}", None
414
 
@@ -471,7 +472,7 @@ def run_grammarllm(prompt, productions_json, model_choice):
471
 
472
 
473
  default_grammars = {
474
- "Default Grammar": json.dumps({
475
  "S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
476
  "A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
477
  "B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
@@ -481,15 +482,14 @@ default_grammars = {
481
  "F": ["<<excited>>"]
482
  }, indent=4),
483
 
484
- "Other example": json.dumps({
485
- 'S*': ["<<(>> A B", "<<negligent>> V", '<<indifferent>>'],
486
- 'A': ["number", "letters", "ε"],
487
- 'B': ['<<)>> letters R'],
488
- 'R': ['C', 'D'],
489
- 'C': ['<<calm>>', '<<indifferent>>', '<<unemotional>>'],
490
- 'D': ['<<angry>>', '<<frustrated>>'],
491
- 'V': ["<<option>>"],
492
  }, indent=4),
 
 
 
 
 
493
  }
494
 
495
 
@@ -539,7 +539,7 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
539
 
540
  with gr.Column(scale=1):
541
  model_choice = gr.Dropdown(
542
- choices=["GPT-2", "Llama 3.2 1B", "Llama 3.1 8B"],
543
  label="Choose the model",
544
  value="GPT-2",
545
  interactive=True
@@ -550,7 +550,7 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
550
  grammar_choice = gr.Dropdown(
551
  list(default_grammars.keys()),
552
  label="Choose Productions (JSON)",
553
- value="Default Grammar",
554
  interactive=True,
555
  elem_id="grammar_choice"
556
  )
@@ -564,8 +564,23 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
564
  productions_text = gr.Textbox(
565
  label="Productions (JSON)",
566
  lines=15,
567
- value=default_grammars["Default Grammar"],
568
- info="Type your here your grammar i json fromat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  )
570
 
571
  with gr.Row():
@@ -583,15 +598,18 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
583
  with gr.Column():
584
  zip_file = gr.File(label="📦 Download ZIP (if available)")
585
 
586
- # Informazioni sui modelli
587
- #with gr.Accordion("ℹ️ Informations about Models", open=False):
588
- # gr.Markdown("""
589
- # - **GPT-2**: deafult model
590
- # - **Llama 3.2 1B**: Modello più recente e performante, dimensione ridotta
591
- # - **Llama 3.2 8B**: Modello più grande e capace, richiede più risorse
592
- #
593
- # *Nota: I modelli Llama utilizzano Zero GPU per l'accelerazione automatica.*
594
- # """)
 
 
 
595
 
596
  # Callback: quando cambio dropdown, aggiorno productions_text
597
  grammar_choice.change(
@@ -610,19 +628,26 @@ with gr.Blocks(title="GrammarLLM - enable structured generation via formal langu
610
  # Al submit del form chiamo run_grammarllm
611
  submit_btn.click(
612
  fn=run_grammarllm,
613
- inputs=[prompt_input, productions_text, model_choice],
614
  outputs=[output_text, zip_file],
615
  show_progress=True
616
  )
617
 
618
  # Funzione per pulire i campi
619
  def clear_fields():
620
- return "", default_grammars["Default Grammar"], None, None
 
 
 
 
 
 
 
 
621
 
622
  clear_btn.click(
623
  fn=clear_fields,
624
- outputs=[prompt_input, productions_text, output_text, zip_file]
625
  )
626
-
627
  if __name__ == "__main__":
628
  demo.launch()
 
376
 
377
 
378
  @spaces.GPU
379
+ def run_grammarllm(prompt, productions_json, model_choice,regex_json):
380
  setup_logging()
381
 
382
  # Parsing productions
 
397
  }
398
 
399
  try:
400
+ regex_raw = json.loads(regex_json)
401
  regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
402
+ except (json.JSONDecodeError, re.error) as e:
403
+ return f"Errore nelle regex personalizzate: {str(e)}", None
404
 
405
  try:
406
  # Selezione del modello basata sulla scelta dell'utente
 
408
  model_name = "gpt2"
409
  elif model_choice == "Llama 3.2 1B":
410
  model_name = "meta-llama/Llama-3.2-1B-Instruct"
411
+ #elif model_choice == "Llama 3.1 8B":
412
+ # model_name = "meta-llama/Llama-3.1-8B-Instruct"
413
  else:
414
  return f"Modello non supportato: {model_choice}", None
415
 
 
472
 
473
 
474
  default_grammars = {
475
+ "HC Grammar": json.dumps({
476
  "S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
477
  "A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
478
  "B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
 
482
  "F": ["<<excited>>"]
483
  }, indent=4),
484
 
485
+ "VR Grammar": json.dumps({
486
+ "S*": ["<<positive>> S*", "<<negative>> S*", "<<neutral>> S*"],
 
 
 
 
 
 
487
  }, indent=4),
488
+
489
+ "General Grammar": json.dumps({
490
+ 'S*': ["( LETTERS )"],
491
+ 'LETTERS': ['letters number LETTERS',"ε"]
492
+ }, indent=4),
493
  }
494
 
495
 
 
539
 
540
  with gr.Column(scale=1):
541
  model_choice = gr.Dropdown(
542
+ choices=["GPT-2", "Llama 3.2 1B"],#, "Llama 3.1 8B"],
543
  label="Choose the model",
544
  value="GPT-2",
545
  interactive=True
 
550
  grammar_choice = gr.Dropdown(
551
  list(default_grammars.keys()),
552
  label="Choose Productions (JSON)",
553
+ value="HC Grammar",
554
  interactive=True,
555
  elem_id="grammar_choice"
556
  )
 
564
  productions_text = gr.Textbox(
565
  label="Productions (JSON)",
566
  lines=15,
567
+ value=default_grammars["HC Grammar"],
568
+ info="Type your here your grammar in json fromat"
569
+ )
570
+
571
+ regex_text = gr.Textbox(
572
+ label="Regex to define Terminals (JSON)",
573
+ lines=10,
574
+ value=json.dumps({
575
+ "regex_alfanum": "[a-zA-Z0-9]+",
576
+ "regex_letters": "[a-zA-Z]+",
577
+ "regex_number": "\\d+",
578
+ "regex_decimal": "\\d+([.,]\\d+)?",
579
+ "regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
580
+ "regex_)": "\\)",
581
+ "regex_(": "\\("
582
+ }, indent=4),
583
+ info="Modify these common regex"
584
  )
585
 
586
  with gr.Row():
 
598
  with gr.Column():
599
  zip_file = gr.File(label="📦 Download ZIP (if available)")
600
 
601
+ with gr.Accordion("ℹ️ About GrammarLLM and LLprefix", open=False):
602
+ gr.Markdown("""
603
+ ### 📚 What is GrammarLLM?
604
+ GrammarLLM enables structured text generation constrained by a formal grammar, using LLMs (Large Language Models) such as GPT-2 or LLaMA.
605
+
606
+ ### 🔍 What you can do:
607
+ - **Hierarchical classification**: Define class hierarchies, as shown in the "HC Grammar" example.
608
+ - **Vocabulary restriction**: Specify a limited set of valid words to be used. Including examples in the prompt is highly recommended to improve output quality.
609
+ - **Constrained generation**: Use LLprefix to define any regular or context-free grammar in JSON format.
610
+
611
+ 📄 For more details about LLprefix and the underlying algorithms, refer to the official paper.
612
+ """)
613
 
614
  # Callback: quando cambio dropdown, aggiorno productions_text
615
  grammar_choice.change(
 
628
  # Al submit del form chiamo run_grammarllm
629
  submit_btn.click(
630
  fn=run_grammarllm,
631
+ inputs=[prompt_input, productions_text, model_choice, regex_text],
632
  outputs=[output_text, zip_file],
633
  show_progress=True
634
  )
635
 
636
  # Funzione per pulire i campi
637
  def clear_fields():
638
+ return "", default_grammars["HC"], "", None, json.dumps({
639
+ "regex_alfanum": "[a-zA-Z0-9]+",
640
+ "regex_letters": "[a-zA-Z]+",
641
+ "regex_number": "\\d+",
642
+ "regex_decimal": "\\d+([.,]\\d+)?",
643
+ "regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
644
+ "regex_)": "\\)",
645
+ "regex_(": "\\("
646
+ }, indent=4)
647
 
648
  clear_btn.click(
649
  fn=clear_fields,
650
+ outputs=[prompt_input, productions_text, output_text, zip_file, regex_text]
651
  )
 
652
  if __name__ == "__main__":
653
  demo.launch()