import random
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
import numpy as np
|
|
from sklearn.metrics import accuracy_score
|
|
from seqeval.metrics import f1_score
|
|
|
|
import models
|
|
|
|
kernel_size = 5
|
|
|
|
def load_model(model_name, num_words, num_intent, num_slot, dropout, wordvecs=None, embedding_dim=100, filter_count=300):
|
|
if model_name == 'intent':
|
|
model = models.CNNIntent(num_words, embedding_dim, num_intent, (filter_count,), kernel_size, dropout, wordvecs)
|
|
elif model_name == 'slot':
|
|
model = models.CNNSlot(num_words, embedding_dim, num_slot, (filter_count,), kernel_size, dropout, wordvecs)
|
|
elif model_name == 'joint':
|
|
model = models.CNNJoint(num_words, embedding_dim, num_intent, num_slot, (filter_count,), kernel_size, dropout, wordvecs)
|
|
return model
|
|
|
|
def rep(seed=None):
|
|
if not seed:
|
|
seed = random.randint(0, 10000)
|
|
|
|
torch.manual_seed(seed)
|
|
np.random.seed(seed)
|
|
|
|
# CUDA
|
|
torch.backends.cudnn.deterministic = True
|
|
torch.backends.cudnn.benchmark = False
|
|
return seed
|
|
|
|
def train_intent(model, iter, criterion, optimizer, cuda):
|
|
model.train()
|
|
epoch_loss = 0
|
|
true_intents = []
|
|
pred_intents = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
optimizer.zero_grad()
|
|
query = batch[0]
|
|
true_intent = batch[1]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_intent = true_intent.cuda()
|
|
|
|
pred_intent = model(query)
|
|
|
|
true_intents += true_intent.tolist()
|
|
pred_intents += pred_intent.max(1)[1].tolist()
|
|
|
|
loss = criterion(pred_intent, true_intent)
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
epoch_loss += loss.item()
|
|
|
|
return epoch_loss / len(iter), accuracy_score(true_intents, pred_intents)
|
|
|
|
|
|
def distill_intent(teacher, student, temperature, iter, criterion, optimizer, cuda):
|
|
teacher.eval()
|
|
student.train()
|
|
|
|
true_intents = []
|
|
pred_intents = []
|
|
|
|
epoch_loss = 0
|
|
for i, batch in enumerate(iter):
|
|
optimizer.zero_grad()
|
|
query = batch[0]
|
|
true_intent = batch[1]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_intent = true_intent.cuda()
|
|
|
|
with torch.no_grad():
|
|
teacher_pred_intent = teacher(query)
|
|
|
|
student_pred_intent = student(query)
|
|
|
|
true_intents += true_intent.tolist()
|
|
pred_intents += student_pred_intent.max(1)[1].tolist()
|
|
|
|
loss = criterion(F.log_softmax(student_pred_intent / temperature, dim=-1), F.softmax(teacher_pred_intent / temperature, dim=-1))
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
epoch_loss += loss.item()
|
|
|
|
return epoch_loss / len(iter), accuracy_score(true_intents, pred_intents)
|
|
|
|
def valid_intent(model, iter, criterion, cuda):
|
|
model.eval()
|
|
epoch_loss = 0
|
|
true_intents = []
|
|
pred_intents = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
query = batch[0]
|
|
true_intent = batch[1]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_intent = true_intent.cuda()
|
|
|
|
pred_intent = model(query)
|
|
|
|
true_intents += true_intent.tolist()
|
|
pred_intents += pred_intent.max(1)[1].tolist()
|
|
|
|
loss = criterion(pred_intent, true_intent)
|
|
epoch_loss += loss.item()
|
|
|
|
return epoch_loss / len(iter), accuracy_score(true_intents, pred_intents)
|
|
|
|
|
|
def train_slot(model, iter, criterion, optimizer, cuda):
|
|
model.train()
|
|
epoch_loss = 0
|
|
true_history = []
|
|
pred_history = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
optimizer.zero_grad()
|
|
query = batch[0]
|
|
true_slots = batch[2]
|
|
true_length = batch[3]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_slots = true_slots.cuda()
|
|
|
|
pred_slots = model(query).permute(0, 2, 1) # batch * slots * seq len
|
|
|
|
true_history += [str(item) for batch_num, sublist in enumerate(true_slots.tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
pred_history += [str(item) for batch_num, sublist in enumerate(pred_slots.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
|
|
loss = criterion(pred_slots, true_slots)
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
epoch_loss += loss.item()
|
|
|
|
return epoch_loss / len(iter), f1_score(true_history, pred_history)
|
|
|
|
def distill_slot(teacher, student, temperature, iter, criterion, optimizer, cuda):
|
|
teacher.eval()
|
|
student.train()
|
|
|
|
true_history = []
|
|
pred_history = []
|
|
|
|
epoch_loss = 0
|
|
for i, batch in enumerate(iter):
|
|
optimizer.zero_grad()
|
|
query = batch[0]
|
|
true_slots = batch[2]
|
|
true_length = batch[3]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_slots = true_slots.cuda()
|
|
true_length = true_length.cuda()
|
|
|
|
with torch.no_grad():
|
|
teacher_pred_slot = teacher(query).permute(0, 2, 1) # batch * slot * seq len
|
|
|
|
student_pred_slot = student(query).permute(0, 2, 1)
|
|
|
|
true_history += [str(item) for batch_num, sublist in enumerate(true_slots.tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
pred_history += [str(item) for batch_num, sublist in enumerate(student_pred_slot.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
|
|
loss = criterion(F.log_softmax(student_pred_slot / temperature, dim=1), F.softmax(teacher_pred_slot / temperature, dim=1))
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
epoch_loss += loss.item()
|
|
|
|
return epoch_loss / len(iter), f1_score(true_history, pred_history)
|
|
|
|
def valid_slot(model, iter, criterion, cuda):
|
|
model.eval()
|
|
epoch_loss = 0
|
|
true_history = []
|
|
pred_history = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
query = batch[0]
|
|
true_slots = batch[2]
|
|
true_length = batch[3]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_slots = true_slots.cuda()
|
|
|
|
pred_slots = model(query).permute(0, 2, 1) # batch * slots * seq len
|
|
|
|
true_history += [str(item) for batch_num, sublist in enumerate(true_slots.tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
pred_history += [str(item) for batch_num, sublist in enumerate(pred_slots.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
|
|
loss = criterion(pred_slots, true_slots)
|
|
|
|
epoch_loss += loss.item()
|
|
|
|
return epoch_loss / len(iter), f1_score(true_history, pred_history)
|
|
|
|
|
|
def train_joint(model, iter, criterion, optimizer, cuda, alpha):
|
|
model.train()
|
|
epoch_loss = 0
|
|
|
|
epoch_intent_loss = 0
|
|
true_intents = []
|
|
pred_intents = []
|
|
|
|
epoch_slot_loss = 0
|
|
true_history = []
|
|
pred_history = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
optimizer.zero_grad()
|
|
query = batch[0]
|
|
true_intent = batch[1]
|
|
true_slots = batch[2]
|
|
true_length = batch[3]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_intent = true_intent.cuda()
|
|
true_slots = true_slots.cuda()
|
|
true_length = true_length.cuda()
|
|
|
|
pred_intent, pred_slots = model(query)
|
|
|
|
true_intents += true_intent.tolist()
|
|
pred_intents += pred_intent.max(1)[1].tolist()
|
|
intent_loss = criterion(pred_intent, true_intent)
|
|
epoch_intent_loss += intent_loss
|
|
|
|
#pred_slots.permute(0, 2, 1)
|
|
true_history += [str(item) for batch_num, sublist in enumerate(true_slots.tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
pred_history += [str(item) for batch_num, sublist in enumerate(pred_slots.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
slot_loss = criterion(pred_slots, true_slots)
|
|
epoch_slot_loss += slot_loss
|
|
|
|
loss = alpha * intent_loss + (1 - alpha) * slot_loss
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
epoch_loss += loss.item()
|
|
|
|
return (epoch_loss / len(iter),
|
|
(epoch_intent_loss / len(iter), accuracy_score(true_intents, pred_intents)),
|
|
(epoch_slot_loss / len(iter), f1_score(true_history, pred_history)))
|
|
|
|
def distill_joint(teacher, student, temperature, iter, criterion, optimizer, cuda, alpha):
|
|
teacher.eval()
|
|
student.train()
|
|
|
|
epoch_loss = 0
|
|
|
|
epoch_intent_loss = 0
|
|
true_intents = []
|
|
pred_intents = []
|
|
|
|
epoch_slot_loss = 0
|
|
true_history = []
|
|
pred_history = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
optimizer.zero_grad()
|
|
query = batch[0]
|
|
true_intent = batch[1]
|
|
true_slots = batch[2]
|
|
true_length = batch[3]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_intent = true_intent.cuda()
|
|
true_slots = true_slots.cuda()
|
|
true_length = true_length.cuda()
|
|
|
|
with torch.no_grad():
|
|
teacher_pred_intent, teacher_pred_slot = teacher(query)
|
|
|
|
student_pred_intent, student_pred_slot = student(query)
|
|
|
|
true_intents += true_intent.tolist()
|
|
pred_intents += student_pred_intent.max(1)[1].tolist()
|
|
intent_loss = criterion(F.log_softmax(student_pred_intent / temperature, dim=-1), F.softmax(teacher_pred_intent / temperature, dim=-1))
|
|
epoch_intent_loss += intent_loss
|
|
|
|
true_history += [str(item) for batch_num, sublist in enumerate(true_slots.tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
pred_history += [str(item) for batch_num, sublist in enumerate(student_pred_slot.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
slot_loss = criterion(F.log_softmax(student_pred_slot / temperature, dim=1), F.softmax(teacher_pred_slot / temperature, dim=1))
|
|
epoch_slot_loss += slot_loss
|
|
|
|
loss = alpha * intent_loss + (1 - alpha) * slot_loss
|
|
loss.backward()
|
|
optimizer.step()
|
|
epoch_loss += loss.item()
|
|
|
|
return (epoch_loss / len(iter),
|
|
(epoch_intent_loss / len(iter), accuracy_score(true_intents, pred_intents)),
|
|
(epoch_slot_loss / len(iter), f1_score(true_history, pred_history)))
|
|
|
|
def valid_joint(model, iter, criterion, cuda, alpha):
|
|
model.eval()
|
|
epoch_loss = 0
|
|
|
|
epoch_intent_loss = 0
|
|
true_intents = []
|
|
pred_intents = []
|
|
|
|
epoch_slot_loss = 0
|
|
true_history = []
|
|
pred_history = []
|
|
|
|
for i, batch in enumerate(iter):
|
|
query = batch[0]
|
|
true_intent = batch[1]
|
|
true_slots = batch[2]
|
|
true_length = batch[3]
|
|
|
|
if cuda:
|
|
query = query.cuda()
|
|
true_intent = true_intent.cuda()
|
|
true_slots = true_slots.cuda()
|
|
true_length = true_length.cuda()
|
|
|
|
pred_intent, pred_slots = model(query)
|
|
|
|
true_intents += true_intent.tolist()
|
|
pred_intents += pred_intent.max(1)[1].tolist()
|
|
intent_loss = criterion(pred_intent, true_intent)
|
|
epoch_intent_loss += intent_loss
|
|
|
|
#pred_slots.permute(0, 2, 1)
|
|
true_history += [str(item) for batch_num, sublist in enumerate(true_slots.tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
pred_history += [str(item) for batch_num, sublist in enumerate(pred_slots.max(1)[1].tolist()) for item in sublist[1:true_length[batch_num].item() + 1]]
|
|
slot_loss = criterion(pred_slots, true_slots)
|
|
epoch_slot_loss += slot_loss
|
|
|
|
loss = alpha * intent_loss + (1 - alpha) * slot_loss
|
|
epoch_loss += loss.item()
|
|
|
|
return (epoch_loss / len(iter),
|
|
(epoch_intent_loss / len(iter), accuracy_score(true_intents, pred_intents)),
|
|
(epoch_slot_loss / len(iter), f1_score(true_history, pred_history)))
|
|
|
|
|