commit 433297720896628cadd48415f8d2d919d9147018
Author: Daniel Muckerman <danielmuckerman@me.com>
Date:   Fri Jul 3 16:40:28 2020 -0400

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6c03a27
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+dataset/
+.DS_Store
+env/
+*.tar
+merged.dict*
diff --git a/download_dict.py b/download_dict.py
new file mode 100644
index 0000000..91347ad
--- /dev/null
+++ b/download_dict.py
@@ -0,0 +1,5 @@
+from mega import Mega
+
+m = Mega.from_ephemeral()
+print("Downloading Dictionary...")
+m.download_from_url('https://mega.nz/#!yAMyFYCI!o_UmixbiIzosyYk-6O5xRZZDGpFRik_eMrZum-iQuhQ')
diff --git a/download_dict.sh b/download_dict.sh
new file mode 100755
index 0000000..03ed537
--- /dev/null
+++ b/download_dict.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+git clone https://github.com/jeroenmeulenaar/python3-mega
+cd python3-mega
+pip install -r requirements.txt
+python setup.py install
+
+cd ..
+python download_dict.py
\ No newline at end of file
diff --git a/get_data.sh b/get_data.sh
new file mode 100755
index 0000000..7001778
--- /dev/null
+++ b/get_data.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+wget -O dataset.tar --content-disposition --user-agent "macintosh" https://cloud.technicalincompetence.club/index.php/s/W8rLGrCKgrFXw8z/download
+tar -xvf dataset.tar
+mkdir -p dataset/in
+mv wavs/out/* dataset/in/
+rm -rf wavs
+
+mkdir -p dataset/out
+mkdir -p dataset/discard
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3968277
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+ibm-watson==4.5.0
+python-mega==0.1.0
+python-vlc==3.0.10114
\ No newline at end of file
diff --git a/window.py b/window.py
new file mode 100644
index 0000000..16a1347
--- /dev/null
+++ b/window.py
@@ -0,0 +1,185 @@
+import tkinter as tk
+import vlc
+from os import listdir, remove
+from os.path import isfile, join
+import json
+import shutil
+
+# Set up IBM speech to text
+from ibm_watson import SpeechToTextV1
+from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
+
+authenticator = IAMAuthenticator('b373X-km7u5pAaz2JoizXigcVFZFEB8CIntgYgWzbCQ4')
+speech_to_text = SpeechToTextV1(
+    authenticator=authenticator
+)
+
+speech_to_text.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/0e7a3edf-309c-4e64-b345-8251781245e4')
+speech_to_text.set_disable_ssl_verification(True)
+
+# Set up Arpabet dict
+def ARPA(text):
+	if len(text) == 0:
+		return "␤"
+	out = ''
+	for word_ in text.split(" "):
+		word=word_; end_chars = ''
+		while any(elem in word for elem in r"!?,.;") and len(word) > 1:
+			if word[-1] == '!': end_chars = '!' + end_chars; word = word[:-1]
+			if word[-1] == '?': end_chars = '?' + end_chars; word = word[:-1]
+			if word[-1] == ',': end_chars = ',' + end_chars; word = word[:-1]
+			if word[-1] == '.': end_chars = '.' + end_chars; word = word[:-1]
+			if word[-1] == ';': end_chars = ';' + end_chars; word = word[:-1]
+			else: break
+		try: word_arpa = thisdict[word.upper()]
+		except: word_arpa = ''
+		if len(word_arpa)!=0: word = "{" + str(word_arpa) + "}"
+		out = (out + " " + word + end_chars).strip()
+	if out[-1] != "␤": out = out + "␤"
+	return out
+
+thisdict = {}   # And load it
+for line in reversed((open('merged.dict_1.1.txt', "r").read()).splitlines()):
+    thisdict[(line.split(" ",1))[0]] = (line.split(" ",1))[1].strip()
+
+# Stuff for dataset path
+IDX = 0
+MYPATH = "./dataset/in"
+OUTPATH = './dataset/'
+
+# f = open("{}filelist.txt".format(OUTPATH), "w")
+# f.write("")
+# f.close()
+
+# f = open("{}filelist_arpa.txt".format(OUTPATH), "w")
+# f.write("")
+# f.close()
+
+onlyfiles = [f for f in listdir(MYPATH) if isfile(join(MYPATH, f))]
+if len(onlyfiles) > 0:
+        img = onlyfiles[0]
+
+def keyEvent(event):
+	global IDX
+	global v
+	if (event.keysym == 'Left'):
+		IDX = IDX - 1
+
+		if IDX < 0:
+			IDX = len(onlyfiles) - 1
+
+		w.config(text = onlyfiles[IDX])
+		v.set("")
+
+	if (event.keysym == 'Right'):
+		IDX = IDX + 1
+
+		if IDX >= len(onlyfiles):
+			IDX = 0
+
+		w.config(text = onlyfiles[IDX])
+		v.set("")
+
+def comp_s(event):
+	global v
+	print(v.get())
+	v.set("")
+
+def playTrack(event):
+	player = vlc.MediaPlayer("{}/{}".format(MYPATH, onlyfiles[IDX]))
+	player.play()
+
+def transcribeTrack(event):
+	global v
+	with open("{}/{}".format(MYPATH, onlyfiles[IDX]),
+               'rb') as audio_file:
+		speech_recognition_results = speech_to_text.recognize(
+        	audio=audio_file,
+	        content_type='audio/wav',
+    	    word_alternatives_threshold=0.9,
+	    ).get_result()
+	# print(json.dumps(speech_recognition_results, indent=2))
+	v.set(speech_recognition_results["results"][0]["alternatives"][0]["transcript"])
+
+def saveTrack(event):
+	f = open("{}filelist.txt".format(OUTPATH), "a")
+	f.write("wavs/out/{}|{}\n".format(onlyfiles[IDX], o1.cget('text')))
+	f.close()
+
+	f = open("{}filelist_arpa.txt".format(OUTPATH), "a")
+	f.write("wavs/out/{}|{}\n".format(onlyfiles[IDX], o2.cget('text')))
+	f.close()
+
+	move_audio_file(IDX, 'out')
+
+def discardTrack(event):
+	move_audio_file(IDX, 'discard')
+
+def callback(sv):
+	o1.config(text = "{}␤".format(sv.get().strip()), wraplength=500)
+	o2.config(text = ARPA(sv.get().strip()), wraplength=500)
+
+def move_audio_file(idx, destination):
+	file_path = onlyfiles[idx]
+	onlyfiles.pop(idx)
+	idx = idx - 1
+	if idx < 0:
+		idx = len(onlyfiles) - 1
+
+	shutil.copyfile(MYPATH + '/' + file_path, OUTPATH + destination + '/' + file_path)
+	remove(MYPATH + '/' + file_path)
+	global v
+	w.config(text = onlyfiles[idx])
+	v.set("")
+
+
+mw = tk.Tk()
+
+mw.title('Training Data - {} remaining'.format(len(onlyfiles)))
+mw.geometry('500x200')
+mw.configure(bg='black')
+
+
+toolbar = tk.Frame(master=mw, width='500', height='24', borderwidth=2, bg='slategray4', relief='raised')
+toolbar.pack()
+play_btn = tk.Button(toolbar, text="(C-p) Play track", command=None)
+play_btn.pack(side='left')
+save_btn = tk.Button(toolbar, text="(C-s) Save track", command=None)
+save_btn.pack(side='left')
+transcribe_btn = tk.Button(toolbar, text="(C-t) Transcribe track", command=None)
+transcribe_btn.pack(side='left')
+discard_btn = tk.Button(toolbar, text="(C-d) Discard track", command=None)
+discard_btn.pack(side='left')
+
+back = tk.Frame(master=mw, bg='black')
+back.bind('<Key>', keyEvent)
+back.bind("<Control-p>", playTrack)
+back.bind("<Control-t>", transcribeTrack)
+back.bind("<Control-s>", saveTrack)
+back.bind("<Control-d>", discardTrack)
+back.pack_propagate(0)
+back.pack(fill=tk.BOTH, expand=1)
+back.focus_set()
+
+# canvas = tk.Canvas(master=back, width='1280', height='720', bg='black', highlightthickness=0)  
+# canvas.pack()
+
+w = tk.Label(master=back, text=onlyfiles[IDX], bg='black', fg='white')
+w.pack()
+
+v = tk.StringVar()
+v.trace("w", lambda name, index, mode, sv=v: callback(v))
+e = tk.Entry(master=back, textvariable=v, width='500', bg='black', fg='white', highlightbackground='grey')
+e.pack()
+e.bind('<Return>', comp_s)
+e.bind("<Control-p>", playTrack)
+e.bind("<Control-t>", transcribeTrack)
+e.bind("<Control-s>", saveTrack)
+e.bind("<Control-d>", discardTrack)
+
+o1 = tk.Label(master=back, text="text here", bg='black', fg='white')
+o1.pack()
+o2 = tk.Label(master=back, text="arpabet here", bg='black', fg='white')
+o2.pack()
+
+mw.mainloop()