from helpers import scrape_url, download_video, fetch_or_create_search_key, get_video_json, hide_videos, request_videos
|
|
|
|
import os
|
|
import shutil
|
|
|
|
from flask import Flask, jsonify, request
|
|
from flask_cors import CORS
|
|
from rq import Queue
|
|
from redis import Redis
|
|
|
|
import typesense
|
|
from typesense.exceptions import ObjectAlreadyExists
|
|
from dotenv import dotenv_values
|
|
|
|
app = Flask(__name__)
|
|
CORS(app)
|
|
|
|
config = dotenv_values(".env")
|
|
|
|
# Copy a premade database into position if there's no database already
|
|
if not os.path.exists("/data/database.sqlite3"):
|
|
shutil.copy("/app/database.sqlite3", "/data/database.sqlite3")
|
|
|
|
client = typesense.Client({
|
|
'api_key': config["TYPESENSE_API_KEY"],
|
|
'nodes': [{
|
|
'host': 'typesense',
|
|
'port': '8108',
|
|
'protocol': 'http'
|
|
}],
|
|
'connection_timeout_seconds': 5
|
|
})
|
|
|
|
|
|
# Create or ignore the typesense database
|
|
try:
|
|
create_response = client.collections.create({
|
|
"name": "discovered_videos",
|
|
"fields": [
|
|
#{"name": "id", "type": "string"},
|
|
{"name": "fulltitle", "type": "string"},
|
|
{"name": "title_vec", "type": "float[]", "num_dim": 384},
|
|
{"name": "description", "type": "string"},
|
|
{"name": "channel", "type": "string", "facet": True},
|
|
{"name": "channel_follower_count", "type": "int32"},
|
|
{"name": "channel_id", "type": "string"},
|
|
{"name": "playlist_ids", "type": "string[]", "facet": True},
|
|
{"name": "duration", "type": "int32"},
|
|
{"name": "view_count", "type": "int32"},
|
|
{"name": "upload_date", "type": "int64"},
|
|
{"name": "filesize_approx", "type": "int64"},
|
|
{"name": "extractor", "type": "string"},
|
|
{"name": "thumbnail", "type": "string"},
|
|
{"name": "requested_by", "type": "string"},
|
|
{"name": "status", "type": "string", "facet": True},
|
|
],
|
|
"default_sorting_field": "upload_date"
|
|
})
|
|
except ObjectAlreadyExists:
|
|
pass
|
|
except:
|
|
print("ERROR: Some fatal error occurred while creating the typesesne database")
|
|
|
|
|
|
redis_conn = Redis("redis")
|
|
scrape_queue = Queue("discover", connection=redis_conn)
|
|
download_queue = Queue("download", connection=redis_conn)
|
|
|
|
@app.route('/video', methods=["GET"])
|
|
def video():
|
|
video_id = request.args.get("id")
|
|
return jsonify(get_video_json(video_id))
|
|
|
|
|
|
@app.route('/typesense_api_key', methods=["GET"])
|
|
def typesense_api_key():
|
|
key = fetch_or_create_search_key()
|
|
return jsonify(key)
|
|
|
|
|
|
@app.route('/scrape', methods=["POST"])
|
|
def scrape_endpoint():
|
|
data = request.json
|
|
url = data["url"]
|
|
|
|
job = scrape_queue.enqueue(scrape_url, url, job_timeout=1000)
|
|
|
|
return jsonify({"job_id": job.id})
|
|
|
|
|
|
@app.route('/request', methods=['POST'])
|
|
def request_endpoint():
|
|
data = request.json
|
|
ids = data["ids"]
|
|
|
|
dl_jobs = []
|
|
|
|
request_videos(ids)
|
|
|
|
for video_id in ids:
|
|
job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
|
|
dl_jobs.append({"job_id": job.id})
|
|
|
|
return jsonify(dl_jobs)
|
|
|
|
|
|
@app.route('/hide', methods=['POST'])
|
|
def hide_endpoint():
|
|
data = request.json
|
|
ids = data["ids"]
|
|
|
|
res = hide_videos(ids)
|
|
|
|
return jsonify(res)
|
|
|
|
|
|
@app.route('/download', methods=['POST'])
|
|
def download_endpoint():
|
|
data = request.json
|
|
ids = data["ids"]
|
|
|
|
dl_jobs = []
|
|
|
|
for video_id in ids:
|
|
job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
|
|
dl_jobs.append(job)
|
|
|
|
return jsonify({"job_id": job.id})
|
|
|
|
|
|
|
|
@app.route('/download_jobs', methods=['GET'])
|
|
def get_download_jobs():
|
|
all_jobs = []
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in download_queue.scheduled_job_registry.get_job_ids():
|
|
job = download_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in download_queue.started_job_registry.get_job_ids():
|
|
job = download_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in download_queue.finished_job_registry.get_job_ids():
|
|
job = download_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in download_queue.failed_job_registry.get_job_ids():
|
|
job = download_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
return jsonify(all_jobs)
|
|
|
|
|
|
@app.route('/scrape_jobs', methods=['GET'])
|
|
def get_scrape_jobs():
|
|
all_jobs = []
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in scrape_queue.scheduled_job_registry.get_job_ids():
|
|
job = scrape_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in scrape_queue.started_job_registry.get_job_ids():
|
|
job = scrape_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in scrape_queue.finished_job_registry.get_job_ids():
|
|
job = scrape_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
# Iterate over each job and retrieve relevant information
|
|
for job_id in scrape_queue.failed_job_registry.get_job_ids():
|
|
job = scrape_queue.fetch_job(job_id)
|
|
all_jobs.append({
|
|
'job_id': job.id,
|
|
'args': job.args,
|
|
'status': job.get_status()
|
|
})
|
|
|
|
return jsonify(all_jobs)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True)
|