From 6e28b7076ede48cea9ce171f72695788fe2029d2 Mon Sep 17 00:00:00 2001
From: Mathieu B <m4th1eu@protonmail.com>
Date: Fri, 30 Jul 2021 12:37:16 +0200
Subject: [PATCH] (IMPORTANT) Removed old AI training model and old intents

---
 jarvis/ia/__init__.py         |   0
 jarvis/ia/model.py            |  19 -----
 jarvis/ia/nltk_utils.py       |  52 -------------
 jarvis/ia/process.py          |  60 ---------------
 jarvis/ia/train.py            | 138 ----------------------------------
 jarvis/ia/trained_model.pth   | Bin 3607 -> 0 bytes
 jarvis/utils/intents_utils.py |  98 ------------------------
 7 files changed, 367 deletions(-)
 delete mode 100644 jarvis/ia/__init__.py
 delete mode 100644 jarvis/ia/model.py
 delete mode 100644 jarvis/ia/nltk_utils.py
 delete mode 100644 jarvis/ia/process.py
 delete mode 100644 jarvis/ia/train.py
 delete mode 100644 jarvis/ia/trained_model.pth
 delete mode 100644 jarvis/utils/intents_utils.py

diff --git a/jarvis/ia/__init__.py b/jarvis/ia/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/jarvis/ia/model.py b/jarvis/ia/model.py
deleted file mode 100644
index 69415ba..0000000
--- a/jarvis/ia/model.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import torch.nn as nn
-
-
-class NeuralNet(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super(NeuralNet, self).__init__()
-        self.l1 = nn.Linear(input_size, hidden_size)
-        self.l2 = nn.Linear(hidden_size, hidden_size)
-        self.l3 = nn.Linear(hidden_size, num_classes)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        out = self.l1(x)
-        out = self.relu(out)
-        out = self.l2(out)
-        out = self.relu(out)
-        out = self.l3(out)
-        # no activation and no softmax at the end
-        return out
diff --git a/jarvis/ia/nltk_utils.py b/jarvis/ia/nltk_utils.py
deleted file mode 100644
index b9c5dfb..0000000
--- a/jarvis/ia/nltk_utils.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import nltk
-import numpy as np
-from nltk.stem.porter import PorterStemmer
-
-from jarvis.utils import languages_utils
-
-stemmer = PorterStemmer()
-
-
-# TODO : have a look to replace nltk by spacy or the other way (use only one of them)
-
-def tokenize(sentence):
-    """
-    split sentence into array of words/tokens
-    a token can be a word or punctuation character, or number
-    """
-    # English, Danish, Estonian, French, Greek, Norwegian, Portuguese, Spanish, Turkish,
-    # Czech, Dutch, Finnish, German, Italian, Polish, Slovene, and Swedish
-
-    return nltk.word_tokenize(sentence,
-                              language=languages_utils.get_language_full_name())
-
-
-def stem(word):
-    """
-    stemming = find the root form of the word
-    examples:
-    words = ["organize", "organizes", "organizing"]
-    words = [stem(w) for w in words]
-    -> ["organ", "organ", "organ"]
-    """
-    return stemmer.stem(word.lower())
-
-
-def bag_of_words(tokenized_sentence, words):
-    """
-    return bag of words array:
-    1 for each known word that exists in the sentence, 0 otherwise
-    example:
-    sentence = ["hello", "how", "are", "you"]
-    words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
-    bog   = [  0 ,    1 ,    0 ,   1 ,    0 ,    0 ,      0]
-    """
-    # stem each word
-    sentence_words = [stem(word) for word in tokenized_sentence]
-    # initialize bag with 0 for each word
-    bag = np.zeros(len(words), dtype='float32')
-    for idx, w in enumerate(words):
-        if w in sentence_words:
-            bag[idx] = 1
-
-    return bag
diff --git a/jarvis/ia/process.py b/jarvis/ia/process.py
deleted file mode 100644
index 3d0cac7..0000000
--- a/jarvis/ia/process.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-
-import torch
-from unidecode import unidecode
-
-from jarvis import get_path_file
-from jarvis.ia.model import NeuralNet
-from jarvis.ia.nltk_utils import bag_of_words, tokenize
-
-print("Loading, might take a few seconds...")
-
-path = os.path.dirname(get_path_file.__file__)
-
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-file = path + "/ia/trained_model.pth"
-data = torch.load(file, map_location=device)
-
-input_size = data["input_size"]
-hidden_size = data["hidden_size"]
-output_size = data["output_size"]
-all_words = data['all_words']
-tags = data['tags']
-model_state = data["model_state"]
-
-model = NeuralNet(input_size, hidden_size, output_size).to(device)
-model.load_state_dict(model_state)
-model.eval()
-
-
-def get_tag_for_sentence(input_sentence):
-    """
-    Return the matching tag of the input_sentence given in parameter.
-    It usually is what the STT engine recognise or what the user's type when using no-voice mode
-
-    Parameters
-    ----------
-    input_sentence is your sentence
-
-    Returns tag from the skills.json file
-    -------
-
-    """
-    sentence = unidecode(input_sentence)  # convert accent to better recognition
-    sentence = tokenize(sentence)
-    X = bag_of_words(sentence, all_words)
-    X = X.reshape(1, X.shape[0])
-    X = torch.from_numpy(X).to(device)
-
-    output = model(X)
-    _, predicted = torch.max(output, dim=1)
-
-    tag = tags[predicted.item()]
-
-    probs = torch.softmax(output, dim=1)
-    prob = probs[0][predicted.item()]
-    if prob.item() > 0.75 and len(sentence) > 2:
-        return tag
-    else:
-        return 'dont_understand'
diff --git a/jarvis/ia/train.py b/jarvis/ia/train.py
deleted file mode 100644
index 4487dda..0000000
--- a/jarvis/ia/train.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import os
-
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.utils.data import Dataset, DataLoader
-
-from jarvis import get_path_file
-from jarvis.ia.model import NeuralNet
-from jarvis.ia.nltk_utils import bag_of_words, tokenize, stem
-from jarvis.utils import intents_utils
-
-path = os.path.dirname(get_path_file.__file__)
-
-
-def train():
-    intents_utils.register_all_intents()  # important
-    all_intents_patterns = intents_utils.get_all_patterns()
-
-    all_words = []
-    tags = []
-    xy = []
-    # loop through each sentence in our skills patterns
-    for intent in all_intents_patterns:
-        tag = intent
-        # add to tag list
-        tags.append(tag)
-
-        for pattern in all_intents_patterns[intent]:
-            # tokenize each word in the sentence
-            w = tokenize(pattern)
-            # add to our words list
-            all_words.extend(w)
-            # add to xy pair
-            xy.append((w, tag))
-
-    # stem and lower each word
-    ignore_words = ['?', '.', '!']
-    all_words = [stem(w) for w in all_words if w not in ignore_words]
-    # remove duplicates and sort
-    all_words = sorted(set(all_words))
-    tags = sorted(set(tags))
-
-    print(len(xy), "patterns")
-    print(len(tags), "tags:", tags)
-    print(len(all_words), "unique stemmed words:", all_words)
-
-    # create training data
-    X_train = []
-    y_train = []
-    for (pattern_sentence, tag) in xy:
-        # X: bag of words for each pattern_sentence
-        bag = bag_of_words(pattern_sentence, all_words)
-        X_train.append(bag)
-        # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
-        label = tags.index(tag)
-        y_train.append(label)
-
-    X_train = np.array(X_train)
-    y_train = np.array(y_train)
-
-    # Hyper-parameters
-    num_epochs = 1000
-    batch_size = 8
-    learning_rate = 0.001
-    input_size = len(X_train[0])
-    hidden_size = 8
-    output_size = len(tags)
-    print(input_size, output_size)
-
-    class ChatDataset(Dataset):
-
-        def __init__(self):
-            self.n_samples = len(X_train)
-            self.x_data = X_train
-            self.y_data = y_train
-
-        # support indexing such that dataset[i] can be used to get i-th sample
-        def __getitem__(self, index):
-            return self.x_data[index], self.y_data[index]
-
-        # we can call len(dataset) to return the size
-        def __len__(self):
-            return self.n_samples
-
-    dataset = ChatDataset()
-    train_loader = DataLoader(dataset=dataset,
-                              batch_size=batch_size,
-                              shuffle=True,
-                              num_workers=0)
-
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-    model = NeuralNet(input_size, hidden_size, output_size).to(device)
-
-    # Loss and optimizer
-    criterion = nn.CrossEntropyLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
-
-    # Train the model
-    for epoch in range(num_epochs):
-        for (words, labels) in train_loader:
-            words = words.to(device)
-            labels = labels.to(dtype=torch.long).to(device)
-
-            # Forward pass
-            outputs = model(words)
-            # if y would be one-hot, we must apply
-            # labels = torch.max(labels, 1)[1]
-            loss = criterion(outputs, labels)
-
-            # Backward and optimize
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-
-        if (epoch + 1) % 100 == 0:
-            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
-
-    print(f'Final loss: {loss.item():.4f}')
-
-    data = {
-        "model_state": model.state_dict(),
-        "input_size": input_size,
-        "hidden_size": hidden_size,
-        "output_size": output_size,
-        "all_words": all_words,
-        "tags": tags
-    }
-
-    file = path + "/ia/trained_model.pth"
-    torch.save(data, file)
-
-    print(f'Training complete. file saved to {file}')
-
-
-if __name__ == '__main__':
-    train()
diff --git a/jarvis/ia/trained_model.pth b/jarvis/ia/trained_model.pth
deleted file mode 100644
index faf82083a0cc9e7eaf12d6a047048083326ec3b1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3607
zcmb7H3s_S}77hj!1A>Ssq9|BU)QS)Yh~}QFfT)eiOKn6UhU7}F#KcK%us$ea3+-Cp
zPjs!gD)ohReH9<vNn52@AG;RNBDz-BRa)1s^|M;*igxaeXu^JV?eGnEa_0R1`RAUQ
zxie`hsk=nt=_&c2(pl0)qP6G@YymSwujRCnv-6E9i9V7nm$I;3A(BL>y$F1kS<e_X
zR!++?$W5m+8;y*PW6dV3EY+fCEQ~&x)p0V^Za@Z-MyNXygfTjD4#VadIOMM5%p_K%
z#>TNmt4w2Ia&4?pui+Sz)ojre$YsbQB0}APfULw*o5!H`4u~wpXx4I>4x<cpP`eSI
zDCDVbPo6rPUOOxbbyOYUkStv#QF*Gmmm)7!ca>Wy^8S-S(9(%RJ{ibY<R*f0S*;ay
zHjrqB_67%AH1cz1>!R}HkiW=Qg1V~QN>G4<sT+s7XP`jG(B!Q{>*2yBM?ub9Jyjz(
z)XSmFvlImrfkatv2VWl!g=C;mhcdZCSzi~n81#lSTbQ6M{JFAzg0lV&rU(v^kq&ey
zi)mFh$b~Bwz3I#~SQW^jNP$bLBFct1ltnrCqB$hbKrs$wF%D(1E^G><aAu2BNjVfR
zvX!8rg0ch$(=ZMV&p=9BA)-Wenpq5|6{Z^kF*|gGn&eS}MiiovVz>gvVkMIbC92%4
z>UN~d7>$w&QL^EsL5@Zhq7)}X3>saC#yAXQwo5FG(T2trqPIko&6=y05i4t&ZR0dn
zb{>ONs_x>HHn4g<V{+KWsXRrS*~Y!_8?Tazs<lR=W{%mSx1tHzDAlnXw6>XMn?>K>
zU}I657$DcE%`@9f49B5#wFikP7;Gp*EojO$n@lKEH0$~^Rt`-RN2h1eB(YyF^iNhx
ziL222w)l2QYcOj>>K8xAi<u(2&(N|Q$`YFf#%4iNMK<C)O|%KpP`1c6%gmx2wXgw@
z*5Cp&Ye5>(nQ%=Poso?(BCRM*Xc|$j7|){BnN1whi9QzA!XUj!NJfYlu~k4c%wWWR
zPMc>%dD+Miq4pu6I6^~Gt<h>`n)4aNipDtxEvM0I=W1B1h9GCEeTk=N<XGbCu;w$i
z$TU?4&y1Yll{WI?gqrrdo0yNp*JTqM5|xtZ<-CQ()|J7PclzQr-9E!j`CIs&1ual<
zu!`S)f3Gq(wioSHu@L5c6+>T29E<;U<szIdyGC8IHNwORYx!LdI^(<Bzk&2n1#P?6
zlm6nJ!`0aXj`IoME>_;w`vGD%P$i@5q4JmAux86WIKMNFzidB2t-o0d^Lunw`re7-
zdlgN9L*Xs3)z5=pIWLPpVDjcax_%1Qg`EUjW-;~SlD|_uSC7Y|Tcq^Y=a;~Mx@LHI
zxg8E!XT{U@NwKXq2d`J;;0aBaVRwgf%D~7)6nnUgFB<#<{Q7aG^6=dAO8G(sJ?H*o
z7=P)%IQ#year~-Kyh^?ja()?vYqs5jE9WFQZr?}@BPAI6&jEb&gfe?m0d=QkJp?o~
zLD-_Fygg|tY&f(9Zna$EcUFH0$%FiG@LmqC*Z&Fu%v!j>)$=!XXW+p8t1!6yI5o7m
zng6ceP1vyG7`$=!3DoS_3r4>X>hsAh@b$b|IC)7K-t}&!a_i$$)M8&h7}qrj`~x?_
zor^1vH@}w;?x9<i*VZ9?payX5gFbZX$HwEfunv&p(*#XD9>M&71maIN9~^e8b`{?9
z#6ops%W?Uzlc2o%EzB;ig!}ffaIvHinikrid(wWYi1woIbbl9ire(ogZV{yS_Mu}c
z7ea%_W9a2G1cI_dp)h|Y)JS{LMYR%o+wDNme$PX~v|7j-dX@TcPXaFXx=gXBvmtEY
za_Cw&6H{lrVQPav<gXZlcLWt-)A6HxsJsDY49do5i={YST7f@p+y`zOSSoz^WE}9X
z_Bhdd8(-6}22Srxf@MEHfT}Tl`Sg1`_}gcW4^KOGzWP5QoA{#>)bO6<K(%$uB)oph
z1}gsK+pv56mlQYG9jla2V4~_r$hlaGXH~tAQ+sXW$5-#C^7fZv{O~q>TE86MD9OYF
z+*a_-ORiJGQrY8wvFsYzDuw#0r4r`4RH9Q7L!6h2%Xh!^N8=YqU{=C1_@MeOtVpYZ
zV`wo*E6SnN>P07A`y7P$lzOZ#ge+yD{(<=N*NK0nb|?-icBhxkuK?=jaC&i{Dx5j2
z7>-sSgZm*T;Lcwc;HVLiaOvs@Tr==1*k?_~kBX1Ln(Ohjf9M{(c{b3D;vl3qyk+M{
zt;UC%%c1i96k2H;gB9IW_7x4`pc#J_f<r&T@bo$#-ZFp=`9MbRD-Nfp6!o(=6v~xf
z9$to-5)Mnflfm4Hqo$us!uL;qGMsDY2|?S}@D*oLK&N~PHEYA^_;}iWJ;k41db1Ip
z-H_2sriRmhnd@#>)CSOOMHcu-d1}YAIC_eF2kv`$G`(r*Ke6mcBwhFYavXdu6A})d
z<AZKL1Jk`^T7Tt&nBRJ1%|?lE>>>44@7LWmzcDF^63L+-mO=1{XVj(#p7dw`Zh__*
z7kH|1J9Ign3HJgw@<RShhRN|l?u7a({x<U`_I2`CGW;xzS?5a|J5IrtpsO%&)nlql
z#Td|Rn{3}#e+R<G$?Rtre1{*FC($)$0_YBL&6Izthdp(aOz9VqVXu8;hCuUKJV|pI
zS1+LKN0VpZ$`Pkv7j&iLk{)A0KLiITrW5T#{R8?HucJSDM;U($)j{KwByp~nDymj{
zlQ=^C{aC(|_!qf&exON7jQPKdt##2kJ!53T2*25!H;F86iBQv2Zo=cmncn$B*;(jt
zDQ!>qZPCtqhw$NkS!&3K*BLz|sx8)e-MeCcA_sXFY;;?!^PG0Y&L&3{7p%N3)_K0V
zV$YLXhYL2QE!KIixMHKo8PNqB+ZOAbdspmFZLx~BSmz81$DWsE8o67w#(h3l(p2r+
zzxmvww{-9e-_~1P>+^=NDUebpT+c`$IN=5*IowJ3FYXt_<{{jaB%b6?@OSwB1~1S~
Aq5uE@

diff --git a/jarvis/utils/intents_utils.py b/jarvis/utils/intents_utils.py
deleted file mode 100644
index 2fca0b7..0000000
--- a/jarvis/utils/intents_utils.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import glob
-import json
-import os
-import random
-
-from jarvis import get_path_file
-from jarvis.utils import languages_utils
-
-all_intents = dict()
-path = os.path.dirname(get_path_file.__file__)
-
-
-def register_all_intents():
-    global all_intents
-
-    result = {}
-
-    files = glob.glob(path + "/skills/**/info.json", recursive=True)
-    for f in files:
-        with open(f, "rb") as infile:
-            intent_info_json = json.load(infile)
-            intents_in_info = intent_info_json['intents']
-            intent_path = str(f).replace('info.json', '')
-
-            for intent in intents_in_info:
-                result[intent] = intent_path
-
-    all_intents = result
-
-
-def get_all_intents():
-    if len(all_intents) >= 1:
-        return all_intents
-    else:
-        register_all_intents()
-        return get_all_intents()
-
-
-def get_all_patterns():
-    all_patterns = {}
-
-    # need to run register first
-    if not all_intents:
-        print("Warning : No intent found at all, don't forget to register them!")
-        return {}
-
-    for intent in all_intents:
-        all_patterns[intent] = get_patterns(intent)
-
-    return all_patterns
-
-
-def get_patterns(intent_tag):
-    if exists(intent_tag):
-        patterns = get_lang_for_intent(intent_tag).get(intent_tag).get('patterns')
-        return patterns
-    else:
-        return {}
-
-
-def get_path(intent_tag):
-    if exists(intent_tag):
-        return get_all_intents().get(intent_tag)
-
-
-def get_response(intent_tag):
-    if exists(intent_tag):
-        responses = get_responses(intent_tag)
-        return random.choice(responses)
-
-
-def get_responses(intent_tag):
-    if exists(intent_tag):
-        responses = get_lang_for_intent(intent_tag).get(intent_tag).get('responses')
-        return responses
-    else:
-        return {}
-
-
-def get_lang_for_intent(intent_tag):
-    # first we check the intent
-    if exists(intent_tag):
-        lang_path = str(get_all_intents().get(intent_tag))
-        lang_path = lang_path + 'lang/' + languages_utils.get_language() + '.json'
-
-        if os.path.exists(lang_path):
-            lang_file = open(lang_path)
-            json_lang = json.load(lang_file)
-            return json_lang
-    else:
-        return {}
-
-
-def exists(intent_tag):
-    if intent_tag in get_all_intents():
-        return True
-    else:
-        return False