From ab86509e612d2318f347d69efd82cae02549bfb9 Mon Sep 17 00:00:00 2001 From: Mathieu B Date: Tue, 27 Jul 2021 17:44:51 +0200 Subject: [PATCH] Moved everything to jarvis/... and added spacy for stopwords --- .gitignore | 2 +- {ia => jarvis}/__init__.py | 0 {config => jarvis/config}/config.json | 0 {config => jarvis/config}/languages.json | 0 get_path_file.py => jarvis/get_path_file.py | 0 .../date_and_time => jarvis/ia}/__init__.py | 0 {ia => jarvis/ia}/model.py | 0 {ia => jarvis/ia}/nltk_utils.py | 2 +- {ia => jarvis/ia}/process.py | 6 ++-- {ia => jarvis/ia}/train.py | 8 +++--- {ia => jarvis/ia}/trained_model.pth | Bin main.py => jarvis/main.py | 3 +- .../jokes => jarvis/skills}/__init__.py | 0 .../skills/daily}/__init__.py | 0 .../skills/daily/date_and_time}/__init__.py | 0 .../skills}/daily/date_and_time/info.json | 0 .../skills}/daily/date_and_time/intent.py | 2 +- .../daily/date_and_time/lang/en-en.json | 0 .../daily/date_and_time/lang/fr-fr.json | 0 jarvis/skills/entertainement/__init__.py | 0 .../skills/entertainement/jokes/__init__.py | 0 .../skills}/entertainement/jokes/info.json | 0 .../skills}/entertainement/jokes/intent.py | 2 +- .../entertainement/jokes/lang/en-en.json | 0 .../entertainement/jokes/lang/fr-fr.json | 0 jarvis/skills/research/__init__.py | 0 .../research/wikipedia_search/__init__.py | 0 jarvis/utils/__init__.py | 0 {utils => jarvis/utils}/client_utils.py | 2 +- {utils => jarvis/utils}/config_utils.py | 2 +- {utils => jarvis/utils}/flask_utils.py | 0 {utils => jarvis/utils}/intents_utils.py | 4 +-- {utils => jarvis/utils}/languages_utils.py | 4 +-- jarvis/utils/nlp_utils.py | 27 ++++++++++++++++++ {utils => jarvis/utils}/utils.py | 0 utils/nlp_utils.py | 18 ------------ 36 files changed, 45 insertions(+), 37 deletions(-) rename {ia => jarvis}/__init__.py (100%) rename {config => jarvis/config}/config.json (100%) rename {config => jarvis/config}/languages.json (100%) rename get_path_file.py => jarvis/get_path_file.py (100%) rename {skills/daily/date_and_time => jarvis/ia}/__init__.py (100%) rename {ia => jarvis/ia}/model.py (100%) rename {ia => jarvis/ia}/nltk_utils.py (97%) rename {ia => jarvis/ia}/process.py (92%) rename {ia => jarvis/ia}/train.py (95%) rename {ia => jarvis/ia}/trained_model.pth (100%) rename main.py => jarvis/main.py (93%) rename {skills/entertainement/jokes => jarvis/skills}/__init__.py (100%) rename {skills/test/wikipedia_search => jarvis/skills/daily}/__init__.py (100%) rename {utils => jarvis/skills/daily/date_and_time}/__init__.py (100%) rename {skills => jarvis/skills}/daily/date_and_time/info.json (100%) rename {skills => jarvis/skills}/daily/date_and_time/intent.py (93%) rename {skills => jarvis/skills}/daily/date_and_time/lang/en-en.json (100%) rename {skills => jarvis/skills}/daily/date_and_time/lang/fr-fr.json (100%) create mode 100644 jarvis/skills/entertainement/__init__.py create mode 100644 jarvis/skills/entertainement/jokes/__init__.py rename {skills => jarvis/skills}/entertainement/jokes/info.json (100%) rename {skills => jarvis/skills}/entertainement/jokes/intent.py (94%) rename {skills => jarvis/skills}/entertainement/jokes/lang/en-en.json (100%) rename {skills => jarvis/skills}/entertainement/jokes/lang/fr-fr.json (100%) create mode 100644 jarvis/skills/research/__init__.py create mode 100644 jarvis/skills/research/wikipedia_search/__init__.py create mode 100644 jarvis/utils/__init__.py rename {utils => jarvis/utils}/client_utils.py (97%) rename {utils => jarvis/utils}/config_utils.py (96%) rename {utils => jarvis/utils}/flask_utils.py (100%) rename {utils => jarvis/utils}/intents_utils.py (96%) rename {utils => jarvis/utils}/languages_utils.py (89%) create mode 100644 jarvis/utils/nlp_utils.py rename {utils => jarvis/utils}/utils.py (100%) delete mode 100644 utils/nlp_utils.py diff --git a/.gitignore b/.gitignore index deefc5c..76d87be 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,4 @@ dmypy.json /ffmpeg/ -/config/secrets.json +/jarvis/config/secrets.json diff --git a/ia/__init__.py b/jarvis/__init__.py similarity index 100% rename from ia/__init__.py rename to jarvis/__init__.py diff --git a/config/config.json b/jarvis/config/config.json similarity index 100% rename from config/config.json rename to jarvis/config/config.json diff --git a/config/languages.json b/jarvis/config/languages.json similarity index 100% rename from config/languages.json rename to jarvis/config/languages.json diff --git a/get_path_file.py b/jarvis/get_path_file.py similarity index 100% rename from get_path_file.py rename to jarvis/get_path_file.py diff --git a/skills/daily/date_and_time/__init__.py b/jarvis/ia/__init__.py similarity index 100% rename from skills/daily/date_and_time/__init__.py rename to jarvis/ia/__init__.py diff --git a/ia/model.py b/jarvis/ia/model.py similarity index 100% rename from ia/model.py rename to jarvis/ia/model.py diff --git a/ia/nltk_utils.py b/jarvis/ia/nltk_utils.py similarity index 97% rename from ia/nltk_utils.py rename to jarvis/ia/nltk_utils.py index cf344db..b9c5dfb 100644 --- a/ia/nltk_utils.py +++ b/jarvis/ia/nltk_utils.py @@ -2,7 +2,7 @@ import nltk import numpy as np from nltk.stem.porter import PorterStemmer -from utils import languages_utils +from jarvis.utils import languages_utils stemmer = PorterStemmer() diff --git a/ia/process.py b/jarvis/ia/process.py similarity index 92% rename from ia/process.py rename to jarvis/ia/process.py index 4fe34fc..3d0cac7 100644 --- a/ia/process.py +++ b/jarvis/ia/process.py @@ -3,9 +3,9 @@ import os import torch from unidecode import unidecode -import get_path_file -from ia.model import NeuralNet -from ia.nltk_utils import bag_of_words, tokenize +from jarvis import get_path_file +from jarvis.ia.model import NeuralNet +from jarvis.ia.nltk_utils import bag_of_words, tokenize print("Loading, might take a few seconds...") diff --git a/ia/train.py b/jarvis/ia/train.py similarity index 95% rename from ia/train.py rename to jarvis/ia/train.py index 3b2bb30..4487dda 100644 --- a/ia/train.py +++ b/jarvis/ia/train.py @@ -5,10 +5,10 @@ import torch import torch.nn as nn from torch.utils.data import Dataset, DataLoader -import get_path_file -from ia.model import NeuralNet -from ia.nltk_utils import bag_of_words, tokenize, stem -from utils import intents_utils +from jarvis import get_path_file +from jarvis.ia.model import NeuralNet +from jarvis.ia.nltk_utils import bag_of_words, tokenize, stem +from jarvis.utils import intents_utils path = os.path.dirname(get_path_file.__file__) diff --git a/ia/trained_model.pth b/jarvis/ia/trained_model.pth similarity index 100% rename from ia/trained_model.pth rename to jarvis/ia/trained_model.pth diff --git a/main.py b/jarvis/main.py similarity index 93% rename from main.py rename to jarvis/main.py index 19b7b01..d0c0595 100644 --- a/main.py +++ b/jarvis/main.py @@ -1,7 +1,6 @@ import flask from flask import Flask, request, jsonify, Response -import ia.process from utils import config_utils, flask_utils, intents_utils, utils app = Flask(__name__) @@ -15,7 +14,7 @@ def process_request(): flask.abort(Response('You must provide a \'sentence\' parameter (not empty aswell)!')) sentence = data['sentence'] - tag_for_request = ia.process.get_tag_for_sentence(sentence) + tag_for_request = src.ia.process.get_tag_for_sentence(sentence) print("SENTENCE : " + sentence + " /// TAG : " + tag_for_request) diff --git a/skills/entertainement/jokes/__init__.py b/jarvis/skills/__init__.py similarity index 100% rename from skills/entertainement/jokes/__init__.py rename to jarvis/skills/__init__.py diff --git a/skills/test/wikipedia_search/__init__.py b/jarvis/skills/daily/__init__.py similarity index 100% rename from skills/test/wikipedia_search/__init__.py rename to jarvis/skills/daily/__init__.py diff --git a/utils/__init__.py b/jarvis/skills/daily/date_and_time/__init__.py similarity index 100% rename from utils/__init__.py rename to jarvis/skills/daily/date_and_time/__init__.py diff --git a/skills/daily/date_and_time/info.json b/jarvis/skills/daily/date_and_time/info.json similarity index 100% rename from skills/daily/date_and_time/info.json rename to jarvis/skills/daily/date_and_time/info.json diff --git a/skills/daily/date_and_time/intent.py b/jarvis/skills/daily/date_and_time/intent.py similarity index 93% rename from skills/daily/date_and_time/intent.py rename to jarvis/skills/daily/date_and_time/intent.py index 28ea669..a5e5496 100644 --- a/skills/daily/date_and_time/intent.py +++ b/jarvis/skills/daily/date_and_time/intent.py @@ -1,7 +1,7 @@ import time from datetime import datetime -from utils import config_utils, intents_utils +from jarvis.utils import intents_utils, config_utils def what_time_is_it(): diff --git a/skills/daily/date_and_time/lang/en-en.json b/jarvis/skills/daily/date_and_time/lang/en-en.json similarity index 100% rename from skills/daily/date_and_time/lang/en-en.json rename to jarvis/skills/daily/date_and_time/lang/en-en.json diff --git a/skills/daily/date_and_time/lang/fr-fr.json b/jarvis/skills/daily/date_and_time/lang/fr-fr.json similarity index 100% rename from skills/daily/date_and_time/lang/fr-fr.json rename to jarvis/skills/daily/date_and_time/lang/fr-fr.json diff --git a/jarvis/skills/entertainement/__init__.py b/jarvis/skills/entertainement/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jarvis/skills/entertainement/jokes/__init__.py b/jarvis/skills/entertainement/jokes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/skills/entertainement/jokes/info.json b/jarvis/skills/entertainement/jokes/info.json similarity index 100% rename from skills/entertainement/jokes/info.json rename to jarvis/skills/entertainement/jokes/info.json diff --git a/skills/entertainement/jokes/intent.py b/jarvis/skills/entertainement/jokes/intent.py similarity index 94% rename from skills/entertainement/jokes/intent.py rename to jarvis/skills/entertainement/jokes/intent.py index c5afd26..9c33945 100644 --- a/skills/entertainement/jokes/intent.py +++ b/jarvis/skills/entertainement/jokes/intent.py @@ -1,6 +1,6 @@ import requests as requests -from utils import languages_utils, config_utils +from jarvis.utils import languages_utils, config_utils def tell_me_a_joke(): diff --git a/skills/entertainement/jokes/lang/en-en.json b/jarvis/skills/entertainement/jokes/lang/en-en.json similarity index 100% rename from skills/entertainement/jokes/lang/en-en.json rename to jarvis/skills/entertainement/jokes/lang/en-en.json diff --git a/skills/entertainement/jokes/lang/fr-fr.json b/jarvis/skills/entertainement/jokes/lang/fr-fr.json similarity index 100% rename from skills/entertainement/jokes/lang/fr-fr.json rename to jarvis/skills/entertainement/jokes/lang/fr-fr.json diff --git a/jarvis/skills/research/__init__.py b/jarvis/skills/research/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jarvis/skills/research/wikipedia_search/__init__.py b/jarvis/skills/research/wikipedia_search/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jarvis/utils/__init__.py b/jarvis/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/client_utils.py b/jarvis/utils/client_utils.py similarity index 97% rename from utils/client_utils.py rename to jarvis/utils/client_utils.py index 2c89998..f9a906a 100644 --- a/utils/client_utils.py +++ b/jarvis/utils/client_utils.py @@ -3,7 +3,7 @@ import json import requests from requests.structures import CaseInsensitiveDict -from utils import config_utils +from jarvis.utils import config_utils client_url = config_utils.get_in_config("CLIENT_URL") diff --git a/utils/config_utils.py b/jarvis/utils/config_utils.py similarity index 96% rename from utils/config_utils.py rename to jarvis/utils/config_utils.py index cd80f0a..4dd57ea 100644 --- a/utils/config_utils.py +++ b/jarvis/utils/config_utils.py @@ -1,7 +1,7 @@ import json import os -import get_path_file +from jarvis import get_path_file path = os.path.dirname(get_path_file.__file__) diff --git a/utils/flask_utils.py b/jarvis/utils/flask_utils.py similarity index 100% rename from utils/flask_utils.py rename to jarvis/utils/flask_utils.py diff --git a/utils/intents_utils.py b/jarvis/utils/intents_utils.py similarity index 96% rename from utils/intents_utils.py rename to jarvis/utils/intents_utils.py index 5f23a27..2fca0b7 100644 --- a/utils/intents_utils.py +++ b/jarvis/utils/intents_utils.py @@ -3,8 +3,8 @@ import json import os import random -import get_path_file -from utils import languages_utils +from jarvis import get_path_file +from jarvis.utils import languages_utils all_intents = dict() path = os.path.dirname(get_path_file.__file__) diff --git a/utils/languages_utils.py b/jarvis/utils/languages_utils.py similarity index 89% rename from utils/languages_utils.py rename to jarvis/utils/languages_utils.py index e47ab48..01f1694 100644 --- a/utils/languages_utils.py +++ b/jarvis/utils/languages_utils.py @@ -1,8 +1,8 @@ import json import os -import get_path_file -from utils import config_utils +from jarvis import get_path_file +from jarvis.utils import config_utils path = os.path.dirname(get_path_file.__file__) diff --git a/jarvis/utils/nlp_utils.py b/jarvis/utils/nlp_utils.py new file mode 100644 index 0000000..ec79e77 --- /dev/null +++ b/jarvis/utils/nlp_utils.py @@ -0,0 +1,27 @@ +import spacy +from nltk.corpus import stopwords + + +def get_spacy_nlp(): + """ + + :return: spacy + """ + nlp = spacy.load("en_core_web_sm") + return nlp + + +def get_text_without_stopwords(sentence): + stopwords_spacy = get_spacy_nlp().Defaults.stop_words + + stop_words = set(stopwords_spacy) + filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words] + filtered_sentence = " ".join(filtered_sentence) + return filtered_sentence + + +def get_text_without_stopwords_nltk(sentence, language='english'): + stop_words = set(stopwords.words(language)) + filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words] + filtered_sentence = " ".join(filtered_sentence) + return filtered_sentence diff --git a/utils/utils.py b/jarvis/utils/utils.py similarity index 100% rename from utils/utils.py rename to jarvis/utils/utils.py diff --git a/utils/nlp_utils.py b/utils/nlp_utils.py deleted file mode 100644 index 8c244f8..0000000 --- a/utils/nlp_utils.py +++ /dev/null @@ -1,18 +0,0 @@ -from nltk.corpus import stopwords - -from utils.languages_utils import get_language_full_name - - -def get_text_without_stopwords(sentence, language='english'): - # if the language given is something like en-us, get the full variant (english) - if '-' in language: - language = get_language_full_name(language) - - stop_words = set(stopwords.words(language)) - filtered_sentence = [w for w in sentence.lower().split() if w not in stop_words] - filtered_sentence = " ".join(filtered_sentence) - return filtered_sentence - - -if __name__ == '__main__': - print(get_text_without_stopwords("Hey give me some info about Elon Musk please"))