Spaces:

neel692
/

Abusive-Comment-Detection

Sleeping

NeelTA commited on Jun 4, 2023

Commit

e8bae94

1 Parent(s): 65610be

75% to 70%

Files changed (3) hide show

__pycache__/language_detection.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/language_detection.cpython-39.pyc and b/__pycache__/language_detection.cpython-39.pyc differ

flagged/log.csv ADDED Viewed


1	+ text,Result,Cleaned text,flag,username,timestamp
2	+ Edit review not workibg please do something,UN,No cleaned text,,,2023-06-04 14:34:38.389497

language_detection.py CHANGED Viewed

@@ -257,9 +257,11 @@ def en_hi_detection(text):
 #         print(f"{word} ({pos}): {lemmatizer.lemmatize(word, pos)}")
             lem_word = lemmatizer.lemmatize(word, pos)
             if lem_word in wordnet.words():
                 count_en+=1
                 break
             elif lem_word in stop_words:
                 count_en+=1
                 break
     #print("total english words found :", count_en)
@@ -282,7 +284,7 @@ def en_hi_detection(text):
             #print(f"Word '{word}' does not contain any Hindi letters.")
     #print(count/len(words)*100, "% Hindi words found")
-    if count_en/len(words)*100>75:
         return "eng"
     elif count/len(words)*100>75:
         return "hi"

 #         print(f"{word} ({pos}): {lemmatizer.lemmatize(word, pos)}")
             lem_word = lemmatizer.lemmatize(word, pos)
             if lem_word in wordnet.words():
+                print("wordnet :",lem_word)
                 count_en+=1
                 break
             elif lem_word in stop_words:
+                print("stop_words :",lem_word)
                 count_en+=1
                 break
     #print("total english words found :", count_en)
             #print(f"Word '{word}' does not contain any Hindi letters.")
     #print(count/len(words)*100, "% Hindi words found")
+    if count_en/len(words)*100>70:
         return "eng"
     elif count/len(words)*100>75:
         return "hi"