Browse Source

Initial commit

mistress
Daniel Muckerman 3 years ago
commit
4332977208
6 changed files with 217 additions and 0 deletions
  1. +5
    -0
      .gitignore
  2. +5
    -0
      download_dict.py
  3. +9
    -0
      download_dict.sh
  4. +10
    -0
      get_data.sh
  5. +3
    -0
      requirements.txt
  6. +185
    -0
      window.py

+ 5
- 0
.gitignore View File

@ -0,0 +1,5 @@
dataset/
.DS_Store
env/
*.tar
merged.dict*

+ 5
- 0
download_dict.py View File

@ -0,0 +1,5 @@
from mega import Mega
m = Mega.from_ephemeral()
print("Downloading Dictionary...")
m.download_from_url('https://mega.nz/#!yAMyFYCI!o_UmixbiIzosyYk-6O5xRZZDGpFRik_eMrZum-iQuhQ')

+ 9
- 0
download_dict.sh View File

@ -0,0 +1,9 @@
#!/bin/sh
git clone https://github.com/jeroenmeulenaar/python3-mega
cd python3-mega
pip install -r requirements.txt
python setup.py install
cd ..
python download_dict.py

+ 10
- 0
get_data.sh View File

@ -0,0 +1,10 @@
#!/bin/sh
wget -O dataset.tar --content-disposition --user-agent "macintosh" https://cloud.technicalincompetence.club/index.php/s/W8rLGrCKgrFXw8z/download
tar -xvf dataset.tar
mkdir -p dataset/in
mv wavs/out/* dataset/in/
rm -rf wavs
mkdir -p dataset/out
mkdir -p dataset/discard

+ 3
- 0
requirements.txt View File

@ -0,0 +1,3 @@
ibm-watson==4.5.0
python-mega==0.1.0
python-vlc==3.0.10114

+ 185
- 0
window.py View File

@ -0,0 +1,185 @@
import tkinter as tk
import vlc
from os import listdir, remove
from os.path import isfile, join
import json
import shutil
# Set up IBM speech to text
from ibm_watson import SpeechToTextV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
authenticator = IAMAuthenticator('b373X-km7u5pAaz2JoizXigcVFZFEB8CIntgYgWzbCQ4')
speech_to_text = SpeechToTextV1(
authenticator=authenticator
)
speech_to_text.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/0e7a3edf-309c-4e64-b345-8251781245e4')
speech_to_text.set_disable_ssl_verification(True)
# Set up Arpabet dict
def ARPA(text):
if len(text) == 0:
return ""
out = ''
for word_ in text.split(" "):
word=word_; end_chars = ''
while any(elem in word for elem in r"!?,.;") and len(word) > 1:
if word[-1] == '!': end_chars = '!' + end_chars; word = word[:-1]
if word[-1] == '?': end_chars = '?' + end_chars; word = word[:-1]
if word[-1] == ',': end_chars = ',' + end_chars; word = word[:-1]
if word[-1] == '.': end_chars = '.' + end_chars; word = word[:-1]
if word[-1] == ';': end_chars = ';' + end_chars; word = word[:-1]
else: break
try: word_arpa = thisdict[word.upper()]
except: word_arpa = ''
if len(word_arpa)!=0: word = "{" + str(word_arpa) + "}"
out = (out + " " + word + end_chars).strip()
if out[-1] != "": out = out + ""
return out
thisdict = {} # And load it
for line in reversed((open('merged.dict_1.1.txt', "r").read()).splitlines()):
thisdict[(line.split(" ",1))[0]] = (line.split(" ",1))[1].strip()
# Stuff for dataset path
IDX = 0
MYPATH = "./dataset/in"
OUTPATH = './dataset/'
# f = open("{}filelist.txt".format(OUTPATH), "w")
# f.write("")
# f.close()
# f = open("{}filelist_arpa.txt".format(OUTPATH), "w")
# f.write("")
# f.close()
onlyfiles = [f for f in listdir(MYPATH) if isfile(join(MYPATH, f))]
if len(onlyfiles) > 0:
img = onlyfiles[0]
def keyEvent(event):
global IDX
global v
if (event.keysym == 'Left'):
IDX = IDX - 1
if IDX < 0:
IDX = len(onlyfiles) - 1
w.config(text = onlyfiles[IDX])
v.set("")
if (event.keysym == 'Right'):
IDX = IDX + 1
if IDX >= len(onlyfiles):
IDX = 0
w.config(text = onlyfiles[IDX])
v.set("")
def comp_s(event):
global v
print(v.get())
v.set("")
def playTrack(event):
player = vlc.MediaPlayer("{}/{}".format(MYPATH, onlyfiles[IDX]))
player.play()
def transcribeTrack(event):
global v
with open("{}/{}".format(MYPATH, onlyfiles[IDX]),
'rb') as audio_file:
speech_recognition_results = speech_to_text.recognize(
audio=audio_file,
content_type='audio/wav',
word_alternatives_threshold=0.9,
).get_result()
# print(json.dumps(speech_recognition_results, indent=2))
v.set(speech_recognition_results["results"][0]["alternatives"][0]["transcript"])
def saveTrack(event):
f = open("{}filelist.txt".format(OUTPATH), "a")
f.write("wavs/out/{}|{}\n".format(onlyfiles[IDX], o1.cget('text')))
f.close()
f = open("{}filelist_arpa.txt".format(OUTPATH), "a")
f.write("wavs/out/{}|{}\n".format(onlyfiles[IDX], o2.cget('text')))
f.close()
move_audio_file(IDX, 'out')
def discardTrack(event):
move_audio_file(IDX, 'discard')
def callback(sv):
o1.config(text = "{}␤".format(sv.get().strip()), wraplength=500)
o2.config(text = ARPA(sv.get().strip()), wraplength=500)
def move_audio_file(idx, destination):
file_path = onlyfiles[idx]
onlyfiles.pop(idx)
idx = idx - 1
if idx < 0:
idx = len(onlyfiles) - 1
shutil.copyfile(MYPATH + '/' + file_path, OUTPATH + destination + '/' + file_path)
remove(MYPATH + '/' + file_path)
global v
w.config(text = onlyfiles[idx])
v.set("")
mw = tk.Tk()
mw.title('Training Data - {} remaining'.format(len(onlyfiles)))
mw.geometry('500x200')
mw.configure(bg='black')
toolbar = tk.Frame(master=mw, width='500', height='24', borderwidth=2, bg='slategray4', relief='raised')
toolbar.pack()
play_btn = tk.Button(toolbar, text="(C-p) Play track", command=None)
play_btn.pack(side='left')
save_btn = tk.Button(toolbar, text="(C-s) Save track", command=None)
save_btn.pack(side='left')
transcribe_btn = tk.Button(toolbar, text="(C-t) Transcribe track", command=None)
transcribe_btn.pack(side='left')
discard_btn = tk.Button(toolbar, text="(C-d) Discard track", command=None)
discard_btn.pack(side='left')
back = tk.Frame(master=mw, bg='black')
back.bind('<Key>', keyEvent)
back.bind("<Control-p>", playTrack)
back.bind("<Control-t>", transcribeTrack)
back.bind("<Control-s>", saveTrack)
back.bind("<Control-d>", discardTrack)
back.pack_propagate(0)
back.pack(fill=tk.BOTH, expand=1)
back.focus_set()
# canvas = tk.Canvas(master=back, width='1280', height='720', bg='black', highlightthickness=0)
# canvas.pack()
w = tk.Label(master=back, text=onlyfiles[IDX], bg='black', fg='white')
w.pack()
v = tk.StringVar()
v.trace("w", lambda name, index, mode, sv=v: callback(v))
e = tk.Entry(master=back, textvariable=v, width='500', bg='black', fg='white', highlightbackground='grey')
e.pack()
e.bind('<Return>', comp_s)
e.bind("<Control-p>", playTrack)
e.bind("<Control-t>", transcribeTrack)
e.bind("<Control-s>", saveTrack)
e.bind("<Control-d>", discardTrack)
o1 = tk.Label(master=back, text="text here", bg='black', fg='white')
o1.pack()
o2 = tk.Label(master=back, text="arpabet here", bg='black', fg='white')
o2.pack()
mw.mainloop()

Loading…
Cancel
Save