HMWCS commited on
Commit
b6645f5
·
verified ·
1 Parent(s): 9d191f7

feat: enhance mixed garbage rules and container classification

Browse files
Files changed (2) hide show
  1. classifier.py +93 -79
  2. knowledge_base.py +8 -6
classifier.py CHANGED
@@ -242,125 +242,139 @@ class GarbageClassifier:
242
  return self._calculate_confidence_heuristic(response_lower, classification)
243
 
244
  def _extract_classification(self, response: str) -> str:
245
- """Extract the main classification from the response"""
246
  response_lower = response.lower()
247
 
248
- # Check for mixed garbage warnings first
249
- mixed_garbage_indicators = [
 
 
250
  "multiple garbage types",
 
 
 
 
 
 
 
 
251
  "separate items",
252
- "mixed together",
253
- "different types of garbage"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  ]
255
 
256
- if any(indicator in response_lower for indicator in mixed_garbage_indicators):
257
  return "Unable to classify"
258
 
259
- # Check for contaminated containers that should go to Food/Kitchen Waste
260
- contamination_indicators = [
261
- "food residue", "contaminated", "not empty", "not rinsed",
262
- "tip: empty and rinse", "empty and rinse this container"
 
263
  ]
264
 
265
- if any(indicator in response_lower for indicator in contamination_indicators):
266
- # If it mentions recycling tip but has contamination, it's Food/Kitchen Waste
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  return "Food/Kitchen Waste"
268
 
269
- # First, look for positive waste category indicators
270
- # Check exact category matches first
271
  categories = self.knowledge.get_categories()
272
  waste_categories = [cat for cat in categories if cat != "Unable to classify"]
273
 
274
  for category in waste_categories:
275
  if category.lower() in response_lower:
276
- # Make sure it's not in a negative context
277
  category_index = response_lower.find(category.lower())
278
  context_before = response_lower[max(0, category_index - 30):category_index]
279
 
280
- # Only skip if there's a clear negation right before
281
  if not any(neg in context_before[-10:] for neg in ["not", "cannot", "isn't", "doesn't"]):
282
  return category
283
 
284
- # Look for strong recyclable indicators (only if clean/empty)
285
- recyclable_indicators = [
286
- "recyclable", "recycle", "aluminum", "plastic", "glass", "metal",
287
- "foil", "can", "bottle", "cardboard", "paper", "tin", "steel", "iron"
288
- ]
289
 
290
  if any(indicator in response_lower for indicator in recyclable_indicators):
291
- # Check if it's contaminated or has food content
292
- if not any(cont in response_lower for cont in contamination_indicators):
293
- # Check if it's explicitly said to be recyclable
294
- recyclable_phrases = [
295
- "recyclable", "can be recycled", "made of recyclable",
296
- "recyclable material", "recyclable aluminum", "recyclable plastic",
297
- "clean", "empty", "rinsed"
298
- ]
299
- if any(phrase in response_lower for phrase in recyclable_phrases):
300
- return "Recyclable Waste"
301
-
302
- # Check for specific materials
303
- if any(material in response_lower for material in ["aluminum", "foil", "metal"]):
304
- return "Recyclable Waste"
305
- if any(material in response_lower for material in ["plastic", "bottle"]):
306
- return "Recyclable Waste"
307
- if any(material in response_lower for material in ["glass", "cardboard", "paper"]):
308
- return "Recyclable Waste"
309
-
310
- # Look for food waste indicators
311
- food_indicators = [
312
- "food", "fruit", "vegetable", "organic", "kitchen waste",
313
- "peel", "core", "scraps", "leftovers", "food content", "food residue"
314
- ]
315
  if any(indicator in response_lower for indicator in food_indicators):
316
  return "Food/Kitchen Waste"
317
 
318
- # Look for hazardous waste indicators
319
- hazardous_indicators = [
320
- "battery", "chemical", "medicine", "paint", "toxic", "hazardous"
321
- ]
322
  if any(indicator in response_lower for indicator in hazardous_indicators):
323
  return "Hazardous Waste"
324
 
325
- # Look for other waste indicators
326
- other_waste_indicators = [
327
- "cigarette", "ceramic", "dust", "diaper", "tissue", "other waste"
328
- ]
329
  if any(indicator in response_lower for indicator in other_waste_indicators):
330
  return "Other Waste"
331
 
332
- # Only classify as "Unable to classify" if there are explicit indicators
333
- unable_phrases = [
334
- "unable to classify",
335
- "cannot classify",
336
- "cannot be classified as waste",
337
- "not garbage", "not waste", "not trash"
338
- ]
339
-
340
  if any(phrase in response_lower for phrase in unable_phrases):
341
  return "Unable to classify"
342
 
343
- # Check for non-garbage items (people, living things, etc.)
344
- non_garbage_indicators = [
345
- "person", "people", "human", "face", "man", "woman",
346
- "living", "alive", "animal", "pet",
347
- "portrait", "photo of a person"
348
- ]
349
-
350
  if any(indicator in response_lower for indicator in non_garbage_indicators):
351
  return "Unable to classify"
352
 
353
- # If we found waste-related content but no clear category, try to infer
354
- waste_related = any(word in response_lower for word in [
355
- "waste", "trash", "garbage", "discard", "throw", "bin"
356
- ])
357
-
358
- if waste_related:
359
- # Default to Other Waste if it's clearly waste but unclear category
360
- return "Other Waste"
361
-
362
- # If no clear classification found and no clear non-waste indicators,
363
- # default to "Unable to classify"
364
  return "Unable to classify"
365
 
366
  def _extract_reasoning(self, response: str) -> str:
 
242
  return self._calculate_confidence_heuristic(response_lower, classification)
243
 
244
  def _extract_classification(self, response: str) -> str:
245
+ """Extract the main classification from the response with STRICT mixed garbage enforcement"""
246
  response_lower = response.lower()
247
 
248
+ # STRICT MIXED GARBAGE ENFORCEMENT - Catch ANY mixed scenario
249
+
250
+ # 1. Explicit mixed garbage phrases
251
+ explicit_mixed_phrases = [
252
  "multiple garbage types",
253
+ "multiple different",
254
+ "different types of garbage",
255
+ "various items",
256
+ "mixed items",
257
+ "several different",
258
+ "collection of mixed items",
259
+ "mixture of items",
260
+ "variety of items",
261
  "separate items",
262
+ "please separate"
263
+ ]
264
+
265
+ if any(phrase in response_lower for phrase in explicit_mixed_phrases):
266
+ return "Unable to classify"
267
+
268
+ # 2. Language patterns that indicate multiple items/uncertainty about classification
269
+ uncertainty_patterns = [
270
+ "appears to be containers",
271
+ "what appears to be",
272
+ "including what appears",
273
+ "various colors and textures",
274
+ "don't clearly fall into a single",
275
+ "without further detail",
276
+ "not possible to definitively classify",
277
+ "more information",
278
+ "can't determine",
279
+ "difficult to identify",
280
+ "unclear category",
281
+ "mixed materials"
282
  ]
283
 
284
+ if any(pattern in response_lower for pattern in uncertainty_patterns):
285
  return "Unable to classify"
286
 
287
+ # 3. Multiple container/item indicators
288
+ multiple_item_indicators = [
289
+ "containers (", "bottles, cans", "bags, and", "items, including",
290
+ "bottles and", "cans and", "containers and", "bags and",
291
+ "plastic bottles, cans", "various containers"
292
  ]
293
 
294
+ if any(indicator in response_lower for indicator in multiple_item_indicators):
295
+ return "Unable to classify"
296
+
297
+ # 4. Count different item types mentioned
298
+ item_types = [
299
+ "bottle", "can", "container", "bag", "box", "wrapper",
300
+ "jar", "cup", "plate", "bowl", "package"
301
+ ]
302
+
303
+ item_count = sum(1 for item_type in item_types if item_type in response_lower)
304
+ if item_count >= 3: # If 3+ different container types mentioned, it's mixed
305
+ return "Unable to classify"
306
+
307
+ # ONLY EXCEPTION: Single recyclable container with visible food content
308
+ recyclable_container_indicators = ["container", "bottle", "can", "jar", "box", "wrapper"]
309
+ food_content_indicators = [
310
+ "food residue", "food content", "food inside", "visible food",
311
+ "remains", "leftovers", "scraps inside", "not empty", "not rinsed"
312
+ ]
313
+ recyclable_material_indicators = ["plastic", "aluminum", "glass", "metal", "cardboard"]
314
+
315
+ # Check for recycling tip warning
316
+ has_recycling_tip = any(tip in response_lower for tip in [
317
+ "tip: empty and rinse",
318
+ "empty and rinse this container",
319
+ "clean first", "rinse first"
320
+ ])
321
+
322
+ # ONLY allow Food/Kitchen classification for single contaminated container
323
+ has_single_container = any(indicator in response_lower for indicator in recyclable_container_indicators)
324
+ has_food_content = any(indicator in response_lower for indicator in food_content_indicators)
325
+ has_recyclable_material = any(indicator in response_lower for indicator in recyclable_material_indicators)
326
+
327
+ # Must be single item (not multiple) and contaminated
328
+ if (has_single_container and has_food_content and
329
+ (has_recyclable_material or has_recycling_tip) and
330
+ item_count <= 1): # Only single container
331
  return "Food/Kitchen Waste"
332
 
333
+ # Now proceed with normal classification for single, clear items
 
334
  categories = self.knowledge.get_categories()
335
  waste_categories = [cat for cat in categories if cat != "Unable to classify"]
336
 
337
  for category in waste_categories:
338
  if category.lower() in response_lower:
 
339
  category_index = response_lower.find(category.lower())
340
  context_before = response_lower[max(0, category_index - 30):category_index]
341
 
 
342
  if not any(neg in context_before[-10:] for neg in ["not", "cannot", "isn't", "doesn't"]):
343
  return category
344
 
345
+ # Single item material detection
346
+ recyclable_indicators = ["recyclable", "recycle", "aluminum", "plastic", "glass", "metal", "foil", "cardboard",
347
+ "paper"]
 
 
348
 
349
  if any(indicator in response_lower for indicator in recyclable_indicators):
350
+ if not any(cont in response_lower for cont in food_content_indicators):
351
+ return "Recyclable Waste"
352
+
353
+ # Food waste indicators
354
+ food_indicators = ["food", "fruit", "vegetable", "organic", "kitchen waste", "peel", "core", "scraps"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  if any(indicator in response_lower for indicator in food_indicators):
356
  return "Food/Kitchen Waste"
357
 
358
+ # Hazardous waste indicators
359
+ hazardous_indicators = ["battery", "chemical", "medicine", "paint", "toxic", "hazardous"]
 
 
360
  if any(indicator in response_lower for indicator in hazardous_indicators):
361
  return "Hazardous Waste"
362
 
363
+ # Other waste indicators
364
+ other_waste_indicators = ["cigarette", "ceramic", "dust", "diaper", "tissue"]
 
 
365
  if any(indicator in response_lower for indicator in other_waste_indicators):
366
  return "Other Waste"
367
 
368
+ # Non-garbage detection
369
+ unable_phrases = ["unable to classify", "cannot classify", "not garbage", "not waste"]
 
 
 
 
 
 
370
  if any(phrase in response_lower for phrase in unable_phrases):
371
  return "Unable to classify"
372
 
373
+ non_garbage_indicators = ["person", "people", "human", "face", "living", "animal", "pet"]
 
 
 
 
 
 
374
  if any(indicator in response_lower for indicator in non_garbage_indicators):
375
  return "Unable to classify"
376
 
377
+ # Default fallback
 
 
 
 
 
 
 
 
 
 
378
  return "Unable to classify"
379
 
380
  def _extract_reasoning(self, response: str) -> str:
knowledge_base.py CHANGED
@@ -7,10 +7,11 @@ IMPORTANT: You should ONLY classify items that are actually garbage/waste. If th
7
 
8
  **MIXED GARBAGE HANDLING RULES:**
9
 
10
- 1. **Containers with Food Content**: For any container (bottles, cans, boxes, wrappers) that contains visible food residue or content:
11
  - Classify as "Food/Kitchen Waste" due to contamination risk
12
  - Always include this warning: "⚠️ Tip: Empty and rinse this container first, then it can be recycled!"
13
  - Only completely empty and rinsed containers qualify as "Recyclable Waste"
 
14
 
15
  2. **Multiple Different Garbage Types**: If the image shows multiple different types of garbage mixed together (e.g., electronics with food, batteries with organic waste):
16
  - Classify as "Unable to classify"
@@ -19,12 +20,12 @@ IMPORTANT: You should ONLY classify items that are actually garbage/waste. If th
19
  Garbage classification standards:
20
 
21
  **Recyclable Waste**:
22
- - Paper: newspapers, magazines, books, various packaging papers, office paper, advertising flyers, cardboard boxes, copy paper, etc.
23
- - Plastics: various plastic bags, plastic packaging, disposable plastic food containers and utensils, toothbrushes, cups, water bottles, plastic toys, etc.
24
- - Metals: aluminum cans, tin cans, toothpaste tubes, metal toys, metal stationery, nails, metal sheets, aluminum foil, etc.
25
- - Glass: glass bottles, broken glass pieces, mirrors, light bulbs, vacuum flasks, etc.
26
  - Textiles: old clothing, textile products, shoes, curtains, towels, bags, etc.
27
- - NOTE: Only clean, empty containers qualify. Contaminated containers go to Food/Kitchen Waste.
28
 
29
  **Food/Kitchen Waste**:
30
  - Food scraps: rice, noodles, bread, meat, fish, shrimp shells, crab shells, bones, etc.
@@ -42,6 +43,7 @@ Garbage classification standards:
42
 
43
  **Other Waste**:
44
  - Contaminated non-recyclable paper: toilet paper, diapers, wet wipes, napkins, etc.
 
45
  - Cigarette butts, ceramics, dust, disposable tableware (non-plastic)
46
  - Large bones, hard shells, hard fruit pits (coconut shells, durian shells, walnut shells, corn cobs, etc.)
47
  - Hair, pet waste, cat litter, etc.
 
7
 
8
  **MIXED GARBAGE HANDLING RULES:**
9
 
10
+ 1. **Containers with Food Content**: For any recyclable container (aluminum cans, glass jars, clean plastic bottles, etc.) that contains visible food residue or content:
11
  - Classify as "Food/Kitchen Waste" due to contamination risk
12
  - Always include this warning: "⚠️ Tip: Empty and rinse this container first, then it can be recycled!"
13
  - Only completely empty and rinsed containers qualify as "Recyclable Waste"
14
+ - Non-recyclable containers (styrofoam, wax-coated) with food: classify as "Food/Kitchen Waste" with warning: "⚠️ Tip: Remove food waste for composting, then dispose container in general trash"
15
 
16
  2. **Multiple Different Garbage Types**: If the image shows multiple different types of garbage mixed together (e.g., electronics with food, batteries with organic waste):
17
  - Classify as "Unable to classify"
 
20
  Garbage classification standards:
21
 
22
  **Recyclable Waste**:
23
+ - Paper: newspapers, magazines, books, various packaging papers, office paper, advertising flyers, clean cardboard boxes, copy paper, etc.
24
+ - Plastics: clean plastic bottles (#1 PETE, #2 HDPE), clean plastic containers, plastic bags, toothbrushes, cups, water bottles, plastic toys, etc. (NOT styrofoam #6 or heavily coated containers)
25
+ - Metals: clean aluminum cans, clean tin cans, toothpaste tubes, metal toys, metal stationery, nails, metal sheets, aluminum foil, etc.
26
+ - Glass: clean glass bottles and jars, broken glass pieces, mirrors, light bulbs, vacuum flasks, etc.
27
  - Textiles: old clothing, textile products, shoes, curtains, towels, bags, etc.
28
+ - NOTE: Only clean, empty containers qualify. Contaminated containers go to Food/Kitchen Waste. Wax-coated containers, styrofoam, and multi-material packaging are NOT recyclable.
29
 
30
  **Food/Kitchen Waste**:
31
  - Food scraps: rice, noodles, bread, meat, fish, shrimp shells, crab shells, bones, etc.
 
43
 
44
  **Other Waste**:
45
  - Contaminated non-recyclable paper: toilet paper, diapers, wet wipes, napkins, etc.
46
+ - Non-recyclable containers: styrofoam containers (#6 polystyrene), wax-coated containers, multi-material packaging
47
  - Cigarette butts, ceramics, dust, disposable tableware (non-plastic)
48
  - Large bones, hard shells, hard fruit pits (coconut shells, durian shells, walnut shells, corn cobs, etc.)
49
  - Hair, pet waste, cat litter, etc.