destroyer795 commited on
Commit
b45c7de
Β·
1 Parent(s): 7c08782

fix: make model more robust and aware of the tone of the comment.

Browse files
.gitattributes CHANGED
@@ -2,3 +2,4 @@
2
  *.kv filter=lfs diff=lfs merge=lfs -text
3
  SA_model/model.safetensors filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
2
  *.kv filter=lfs diff=lfs merge=lfs -text
3
  SA_model/model.safetensors filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
5
+ sentiment_analyzer_pro/model.safetensors filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -4,44 +4,70 @@ import torch
4
 
5
  app = Flask(__name__)
6
 
7
- # MODEL LOADING
8
- # Replace './SA_model' with the actual path to your model folder.
9
- model_path = "./SA_model"
10
 
11
- # We use the pipeline API which handles tokenization and inference automatically.
12
- # device=0 for GPU, but use device=-1 for Hugging Face free CPU spaces.
13
- print("Loading DistilBERT model...")
14
- classifier = pipeline(
15
- "sentiment-analysis",
16
- model=model_path,
17
- tokenizer=model_path,
18
- device=-1
19
- )
20
- print("Model loaded successfully!")
 
 
 
21
 
22
- # FLASK ENDPOINT
23
  @app.route('/predict', methods=['POST'])
24
  def predict_endpoint():
 
 
 
 
25
  data = request.get_json()
 
 
26
  if not data or 'text' not in data:
27
  return jsonify({'error': 'No text provided'}), 400
28
 
29
  sentence = data['text']
30
 
31
- # Transformer models handle negation (like "not good") automatically
32
- # No manual negation flipping is needed anymore.
33
  result = classifier(sentence)[0]
34
 
35
- # result is like: {'label': 'POSITIVE', 'score': 0.99}
36
  label = result['label']
37
  score = result['score']
38
 
39
- # For the extension UI, we can normalize the score if needed
40
- # (Transformers already give high confidence for clear sentiment)
 
 
 
 
 
 
 
 
 
 
41
  return jsonify({
42
- 'sentiment': label.capitalize(),
43
- 'score': score
 
44
  })
45
 
 
 
 
 
 
46
  if __name__ == '__main__':
47
- app.run(host='0.0.0.0', port=7860) # Port 7860 is standard for HF Spaces
 
 
 
4
 
5
  app = Flask(__name__)
6
 
7
+ # 1. MODEL CONFIGURATION
8
+ # Ensure this path matches your unzipped folder name exactly.
9
+ MODEL_PATH = "./sentiment_analyzer_pro"
10
 
11
+ # Load the DistilBERT pipeline.
12
+ # We use device=-1 to ensure it runs on CPU, which is standard for free Hugging Face Spaces.
13
+ print("Loading DistilBERT 3-class model...")
14
+ try:
15
+ classifier = pipeline(
16
+ "sentiment-analysis",
17
+ model=MODEL_PATH,
18
+ tokenizer=MODEL_PATH,
19
+ device=-1
20
+ )
21
+ print("Model loaded successfully!")
22
+ except Exception as e:
23
+ print(f"Error loading model: {e}")
24
 
25
+ # 2. PREDICTION ENDPOINT
26
  @app.route('/predict', methods=['POST'])
27
  def predict_endpoint():
28
+ """
29
+ Receives JSON input: {"text": "Your review here"}
30
+ Returns JSON: {"sentiment": "Label", "score": 0.99, "confidence_flag": "High/Low"}
31
+ """
32
  data = request.get_json()
33
+
34
+ # Validate input
35
  if not data or 'text' not in data:
36
  return jsonify({'error': 'No text provided'}), 400
37
 
38
  sentence = data['text']
39
 
40
+ # Perform inference
41
+ # Result is a list: [{'label': 'POSITIVE', 'score': 0.98}]
42
  result = classifier(sentence)[0]
43
 
 
44
  label = result['label']
45
  score = result['score']
46
 
47
+ # 3. INTELLIGENT SARCASM/MIXED LOGIC
48
+ # We use 0.70 (70%) as the "Sureness" threshold.
49
+ # If the model is less than 70% confident, we categorize it as Neutral/Mixed.
50
+ # This captures sarcasm where the model sees conflicting emotional signals.
51
+ if score < 0.70:
52
+ final_sentiment = "Neutral / Mixed"
53
+ confidence_flag = "Low"
54
+ else:
55
+ # Standardize labels from 'POSITIVE' to 'Positive'
56
+ final_sentiment = label.capitalize()
57
+ confidence_flag = "High"
58
+
59
  return jsonify({
60
+ 'sentiment': final_sentiment,
61
+ 'score': round(score, 4),
62
+ 'confidence_flag': confidence_flag
63
  })
64
 
65
+ # 4. HEALTH CHECK
66
+ @app.route('/', methods=['GET'])
67
+ def health_check():
68
+ return "Sentiment Analyzer Pro API is online."
69
+
70
  if __name__ == '__main__':
71
+ # Port 7860 is required for Hugging Face Spaces deployment.
72
+ # host='0.0.0.0' allows external connections (like your Chrome Extension).
73
+ app.run(host='0.0.0.0', port=7860)
{SA_model β†’ sentiment_analyzer_pro}/config.json RENAMED
@@ -10,12 +10,14 @@
10
  "hidden_dim": 3072,
11
  "id2label": {
12
  "0": "NEGATIVE",
13
- "1": "POSITIVE"
 
14
  },
15
  "initializer_range": 0.02,
16
  "label2id": {
17
  "NEGATIVE": 0,
18
- "POSITIVE": 1
 
19
  },
20
  "max_position_embeddings": 512,
21
  "model_type": "distilbert",
 
10
  "hidden_dim": 3072,
11
  "id2label": {
12
  "0": "NEGATIVE",
13
+ "1": "NEUTRAL",
14
+ "2": "POSITIVE"
15
  },
16
  "initializer_range": 0.02,
17
  "label2id": {
18
  "NEGATIVE": 0,
19
+ "NEUTRAL": 1,
20
+ "POSITIVE": 2
21
  },
22
  "max_position_embeddings": 512,
23
  "model_type": "distilbert",
{SA_model β†’ sentiment_analyzer_pro}/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6220a6c2266a1b6c7da3bf162edb758fe1e5ddbfd3bf324c2109ad1344257f11
3
- size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b16ebe1022a63bc0374b028e0f6db207742dfc47630a185499f969cfda26e337
3
+ size 267835644
{SA_model β†’ sentiment_analyzer_pro}/special_tokens_map.json RENAMED
File without changes
{SA_model β†’ sentiment_analyzer_pro}/tokenizer.json RENAMED
File without changes
{SA_model β†’ sentiment_analyzer_pro}/tokenizer_config.json RENAMED
File without changes
{SA_model β†’ sentiment_analyzer_pro}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2179dc78f0e4e7a1a23648e4b57ef62308d7f8eb9a6d6d6f697e66945c10574a
3
- size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92bbcf424eeef97dde37cd8973c7852e1d09ef372ece889b0be6bce50f03c4b
3
+ size 5841
{SA_model β†’ sentiment_analyzer_pro}/vocab.txt RENAMED
File without changes