Save youtube before it dies
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

219 lines
6.2 KiB

from helpers import scrape_url, download_video, fetch_or_create_search_key, get_video_json, hide_videos, request_videos
import os
import shutil
from flask import Flask, jsonify, request
from flask_cors import CORS
from rq import Queue
from redis import Redis
import typesense
from typesense.exceptions import ObjectAlreadyExists
from dotenv import dotenv_values
app = Flask(__name__)
CORS(app)
config = dotenv_values(".env")
# Copy a premade database into position if there's no database already
if not os.path.exists("/data/database.sqlite3"):
shutil.copy("/app/database.sqlite3", "/data/database.sqlite3")
client = typesense.Client({
'api_key': config["TYPESENSE_API_KEY"],
'nodes': [{
'host': 'typesense',
'port': '8108',
'protocol': 'http'
}],
'connection_timeout_seconds': 5
})
# Create or ignore the typesense database
try:
create_response = client.collections.create({
"name": "discovered_videos",
"fields": [
#{"name": "id", "type": "string"},
{"name": "fulltitle", "type": "string"},
{"name": "title_vec", "type": "float[]", "num_dim": 384},
{"name": "description", "type": "string"},
{"name": "channel", "type": "string", "facet": True},
{"name": "channel_follower_count", "type": "int32"},
{"name": "channel_id", "type": "string"},
{"name": "playlist_ids", "type": "string[]", "facet": True},
{"name": "duration", "type": "int32"},
{"name": "view_count", "type": "int32"},
{"name": "upload_date", "type": "int64"},
{"name": "filesize_approx", "type": "int64"},
{"name": "extractor", "type": "string"},
{"name": "thumbnail", "type": "string"},
{"name": "requested_by", "type": "string"},
{"name": "status", "type": "string", "facet": True},
],
"default_sorting_field": "upload_date"
})
except ObjectAlreadyExists:
pass
except:
print("ERROR: Some fatal error occurred while creating the typesesne database")
redis_conn = Redis("redis")
scrape_queue = Queue("discover", connection=redis_conn)
download_queue = Queue("download", connection=redis_conn)
@app.route('/video', methods=["GET"])
def video():
video_id = request.args.get("id")
return jsonify(get_video_json(video_id))
@app.route('/typesense_api_key', methods=["GET"])
def typesense_api_key():
key = fetch_or_create_search_key()
return jsonify(key)
@app.route('/scrape', methods=["POST"])
def scrape_endpoint():
data = request.json
url = data["url"]
job = scrape_queue.enqueue(scrape_url, url, job_timeout=1000)
return jsonify({"job_id": job.id})
@app.route('/request', methods=['POST'])
def request_endpoint():
data = request.json
ids = data["ids"]
dl_jobs = []
request_videos(ids)
for video_id in ids:
job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
dl_jobs.append({"job_id": job.id})
return jsonify(dl_jobs)
@app.route('/hide', methods=['POST'])
def hide_endpoint():
data = request.json
ids = data["ids"]
res = hide_videos(ids)
return jsonify(res)
@app.route('/download', methods=['POST'])
def download_endpoint():
data = request.json
ids = data["ids"]
dl_jobs = []
for video_id in ids:
job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
dl_jobs.append(job)
return jsonify({"job_id": job.id})
@app.route('/download_jobs', methods=['GET'])
def get_download_jobs():
all_jobs = []
# Iterate over each job and retrieve relevant information
for job_id in download_queue.scheduled_job_registry.get_job_ids():
job = download_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
# Iterate over each job and retrieve relevant information
for job_id in download_queue.started_job_registry.get_job_ids():
job = download_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
# Iterate over each job and retrieve relevant information
for job_id in download_queue.finished_job_registry.get_job_ids():
job = download_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
# Iterate over each job and retrieve relevant information
for job_id in download_queue.failed_job_registry.get_job_ids():
job = download_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
return jsonify(all_jobs)
@app.route('/scrape_jobs', methods=['GET'])
def get_scrape_jobs():
all_jobs = []
# Iterate over each job and retrieve relevant information
for job_id in scrape_queue.scheduled_job_registry.get_job_ids():
job = scrape_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
# Iterate over each job and retrieve relevant information
for job_id in scrape_queue.started_job_registry.get_job_ids():
job = scrape_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
# Iterate over each job and retrieve relevant information
for job_id in scrape_queue.finished_job_registry.get_job_ids():
job = scrape_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
# Iterate over each job and retrieve relevant information
for job_id in scrape_queue.failed_job_registry.get_job_ids():
job = scrape_queue.fetch_job(job_id)
all_jobs.append({
'job_id': job.id,
'args': job.args,
'status': job.get_status()
})
return jsonify(all_jobs)
if __name__ == '__main__':
app.run(debug=True)