Browse Source

Initial work on standalone query parsing

mistress
Daniel Muckerman 3 years ago
parent
commit
3a861a8b9e
8 changed files with 64 additions and 1 deletions
  1. +3
    -1
      .gitignore
  2. BIN
      intents.pkl
  3. +1
    -0
      num_words.pkl
  4. BIN
      slots.pkl
  5. BIN
      snips_joint
  6. +60
    -0
      test_query.py
  7. BIN
      word2idx.pkl
  8. BIN
      wordvecs.pkl

+ 3
- 1
.gitignore View File

@ -121,4 +121,6 @@ dmypy.json
# End of https://www.gitignore.io/api/python,visualstudiocode
glove/
venv/
venv/
env/
glove.6B.zip

BIN
intents.pkl View File


+ 1
- 0
num_words.pkl View File

@ -0,0 +1 @@
€Mi/.

BIN
slots.pkl View File


BIN
snips_joint View File


+ 60
- 0
test_query.py View File

@ -0,0 +1,60 @@
import torch
import torch.nn as nn
import numpy as np
import models
import pickle
import time
start_time = time.time()
PAD = "<pad>"
BOS = "<bos>"
EOS = "<eos>"
word2idx = pickle.load(open("word2idx.pkl", "rb"))
wordvecs = pickle.load(open("wordvecs.pkl", "rb"))
slots = pickle.load(open("slots.pkl", "rb"))
intents = pickle.load(open("intents.pkl", "rb"))
num_words = len(word2idx)
num_intent = 7
num_slot = 72
filter_count = 300
dropout = 0
embedding_dim = 100
def pad_query(sequence):
sequence = [word2idx[BOS]] + sequence + [word2idx[EOS]]
sequence = sequence[:50]
sequence = np.pad(sequence, (0, 50 - len(sequence)), mode='constant', constant_values=(word2idx[PAD],))
return sequence
query = "What's the weather like in Great Mills right now?"
q = query.lower().replace("'", " ").replace("?", " ").strip()
true_length = [len(q.split()), 0, 0, 0, 0, 0, 0 ,0]
qq = torch.from_numpy(pad_query([word2idx[word] for word in q.split()]))
model = models.CNNJoint(num_words, embedding_dim, num_intent, num_slot, (filter_count,), 5, dropout, wordvecs)
model.eval()
model.load_state_dict(torch.load('snips_joint', map_location=torch.device('cpu')))
criterion = torch.nn.CrossEntropyLoss(ignore_index=-1)
pad_tensor = torch.from_numpy(pad_query([word2idx[w] for w in []]))
batch = torch.stack([qq, pad_tensor, pad_tensor, pad_tensor, pad_tensor, pad_tensor, pad_tensor, pad_tensor])
pred_intent, pred_slots = model(batch)
slt = [str(item) for batch_num, sublist in enumerate(pred_slots.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num] + 1]]
out_slots = [slots[int(c)] for c in slt]
itnt = pred_intent.max(1)[1].tolist()[0]
out_intent = intents[itnt]
print("Input: {}\nIntent: {}\nSlots: {}".format(query, out_intent, out_slots))
print("--- %s seconds ---" % (time.time() - start_time))

BIN
word2idx.pkl View File


BIN
wordvecs.pkl View File


Loading…
Cancel
Save