Added NLP Utils to remove stopwords for example
This commit is contained in:
parent
17acd5e05d
commit
3bf6737f45
0
skills/test/wikipedia_search/__init__.py
Normal file
0
skills/test/wikipedia_search/__init__.py
Normal file
@ -14,6 +14,8 @@ def get_language():
|
||||
def get_language_full_name(name=None):
|
||||
"""
|
||||
Return for exemple french for fr-fr, english for en-en, etc (savec in languages.json in the config folder)
|
||||
|
||||
Return english if the language isn't found in the languages.json file
|
||||
"""
|
||||
config_json = json.load(open(path + "/config/languages.json", encoding='utf-8', mode='r'))
|
||||
|
||||
@ -22,3 +24,5 @@ def get_language_full_name(name=None):
|
||||
|
||||
if name in config_json:
|
||||
return config_json.get(name)
|
||||
|
||||
return 'english'
|
||||
|
18
utils/nlp_utils.py
Normal file
18
utils/nlp_utils.py
Normal file
@ -0,0 +1,18 @@
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
from utils.languages_utils import get_language_full_name
|
||||
|
||||
|
||||
def get_text_without_stopwords(sentence, language='english'):
|
||||
# if the language given is something like en-us, get the full variant (english)
|
||||
if '-' in language:
|
||||
language = get_language_full_name(language)
|
||||
|
||||
stop_words = set(stopwords.words(language))
|
||||
filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words]
|
||||
filtered_sentence = " ".join(filtered_sentence)
|
||||
return filtered_sentence
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(get_text_without_stopwords("Hey give me some info about Elon Musk please"))
|
Reference in New Issue
Block a user