Moved everything to jarvis/... and added spacy for stopwords

This commit is contained in:
Mathieu B 2021-07-27 17:44:51 +02:00
parent 3bf6737f45
commit ab86509e61
36 changed files with 45 additions and 37 deletions

2
.gitignore vendored
View File

@ -135,4 +135,4 @@ dmypy.json
/ffmpeg/
/config/secrets.json
/jarvis/config/secrets.json

View File

@ -2,7 +2,7 @@ import nltk
import numpy as np
from nltk.stem.porter import PorterStemmer
from utils import languages_utils
from jarvis.utils import languages_utils
stemmer = PorterStemmer()

View File

@ -3,9 +3,9 @@ import os
import torch
from unidecode import unidecode
import get_path_file
from ia.model import NeuralNet
from ia.nltk_utils import bag_of_words, tokenize
from jarvis import get_path_file
from jarvis.ia.model import NeuralNet
from jarvis.ia.nltk_utils import bag_of_words, tokenize
print("Loading, might take a few seconds...")

View File

@ -5,10 +5,10 @@ import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import get_path_file
from ia.model import NeuralNet
from ia.nltk_utils import bag_of_words, tokenize, stem
from utils import intents_utils
from jarvis import get_path_file
from jarvis.ia.model import NeuralNet
from jarvis.ia.nltk_utils import bag_of_words, tokenize, stem
from jarvis.utils import intents_utils
path = os.path.dirname(get_path_file.__file__)

View File

@ -1,7 +1,6 @@
import flask
from flask import Flask, request, jsonify, Response
import ia.process
from utils import config_utils, flask_utils, intents_utils, utils
app = Flask(__name__)
@ -15,7 +14,7 @@ def process_request():
flask.abort(Response('You must provide a \'sentence\' parameter (not empty aswell)!'))
sentence = data['sentence']
tag_for_request = ia.process.get_tag_for_sentence(sentence)
tag_for_request = src.ia.process.get_tag_for_sentence(sentence)
print("SENTENCE : " + sentence + " /// TAG : " + tag_for_request)

View File

@ -1,7 +1,7 @@
import time
from datetime import datetime
from utils import config_utils, intents_utils
from jarvis.utils import intents_utils, config_utils
def what_time_is_it():

View File

View File

@ -1,6 +1,6 @@
import requests as requests
from utils import languages_utils, config_utils
from jarvis.utils import languages_utils, config_utils
def tell_me_a_joke():

View File

0
jarvis/utils/__init__.py Normal file
View File

View File

@ -3,7 +3,7 @@ import json
import requests
from requests.structures import CaseInsensitiveDict
from utils import config_utils
from jarvis.utils import config_utils
client_url = config_utils.get_in_config("CLIENT_URL")

View File

@ -1,7 +1,7 @@
import json
import os
import get_path_file
from jarvis import get_path_file
path = os.path.dirname(get_path_file.__file__)

View File

@ -3,8 +3,8 @@ import json
import os
import random
import get_path_file
from utils import languages_utils
from jarvis import get_path_file
from jarvis.utils import languages_utils
all_intents = dict()
path = os.path.dirname(get_path_file.__file__)

View File

@ -1,8 +1,8 @@
import json
import os
import get_path_file
from utils import config_utils
from jarvis import get_path_file
from jarvis.utils import config_utils
path = os.path.dirname(get_path_file.__file__)

27
jarvis/utils/nlp_utils.py Normal file
View File

@ -0,0 +1,27 @@
import spacy
from nltk.corpus import stopwords
def get_spacy_nlp():
"""
:return: spacy
"""
nlp = spacy.load("en_core_web_sm")
return nlp
def get_text_without_stopwords(sentence):
stopwords_spacy = get_spacy_nlp().Defaults.stop_words
stop_words = set(stopwords_spacy)
filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words]
filtered_sentence = " ".join(filtered_sentence)
return filtered_sentence
def get_text_without_stopwords_nltk(sentence, language='english'):
stop_words = set(stopwords.words(language))
filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words]
filtered_sentence = " ".join(filtered_sentence)
return filtered_sentence

View File

@ -1,18 +0,0 @@
from nltk.corpus import stopwords
from utils.languages_utils import get_language_full_name
def get_text_without_stopwords(sentence, language='english'):
# if the language given is something like en-us, get the full variant (english)
if '-' in language:
language = get_language_full_name(language)
stop_words = set(stopwords.words(language))
filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words]
filtered_sentence = " ".join(filtered_sentence)
return filtered_sentence
if __name__ == '__main__':
print(get_text_without_stopwords("Hey give me some info about Elon Musk please"))