from helpers import scrape_url, download_video, fetch_or_create_search_key, get_video_json, hide_videos, request_videos import os import shutil from flask import Flask, jsonify, request from flask_cors import CORS from rq import Queue from redis import Redis import typesense from typesense.exceptions import ObjectAlreadyExists from dotenv import dotenv_values app = Flask(__name__) CORS(app) config = dotenv_values(".env") # Copy a premade database into position if there's no database already if not os.path.exists("/data/database.sqlite3"): shutil.copy("/app/database.sqlite3", "/data/database.sqlite3") client = typesense.Client({ 'api_key': config["TYPESENSE_API_KEY"], 'nodes': [{ 'host': 'typesense', 'port': '8108', 'protocol': 'http' }], 'connection_timeout_seconds': 5 }) # Create or ignore the typesense database try: create_response = client.collections.create({ "name": "discovered_videos", "fields": [ #{"name": "id", "type": "string"}, {"name": "fulltitle", "type": "string"}, {"name": "title_vec", "type": "float[]", "num_dim": 384}, {"name": "description", "type": "string"}, {"name": "channel", "type": "string", "facet": True}, {"name": "channel_follower_count", "type": "int32"}, {"name": "channel_id", "type": "string"}, {"name": "playlist_ids", "type": "string[]", "facet": True}, {"name": "duration", "type": "int32"}, {"name": "view_count", "type": "int32"}, {"name": "upload_date", "type": "int64"}, {"name": "filesize_approx", "type": "int64"}, {"name": "extractor", "type": "string"}, {"name": "thumbnail", "type": "string"}, {"name": "requested_by", "type": "string"}, {"name": "status", "type": "string", "facet": True}, ], "default_sorting_field": "upload_date" }) except ObjectAlreadyExists: pass except: print("ERROR: Some fatal error occurred while creating the typesesne database") redis_conn = Redis("redis") scrape_queue = Queue("discover", connection=redis_conn) download_queue = Queue("download", connection=redis_conn) @app.route('/video', methods=["GET"]) def video(): video_id = request.args.get("id") return jsonify(get_video_json(video_id)) @app.route('/typesense_api_key', methods=["GET"]) def typesense_api_key(): key = fetch_or_create_search_key() return jsonify(key) @app.route('/scrape', methods=["POST"]) def scrape_endpoint(): data = request.json url = data["url"] job = scrape_queue.enqueue(scrape_url, url, job_timeout=1000) return jsonify({"job_id": job.id}) @app.route('/request', methods=['POST']) def request_endpoint(): data = request.json ids = data["ids"] dl_jobs = [] request_videos(ids) for video_id in ids: job = download_queue.enqueue(download_video, video_id, job_timeout=2500) dl_jobs.append({"job_id": job.id}) return jsonify(dl_jobs) @app.route('/hide', methods=['POST']) def hide_endpoint(): data = request.json ids = data["ids"] res = hide_videos(ids) return jsonify(res) @app.route('/download', methods=['POST']) def download_endpoint(): data = request.json ids = data["ids"] dl_jobs = [] for video_id in ids: job = download_queue.enqueue(download_video, video_id, job_timeout=2500) dl_jobs.append(job) return jsonify({"job_id": job.id}) @app.route('/download_jobs', methods=['GET']) def get_download_jobs(): all_jobs = [] # Iterate over each job and retrieve relevant information for job_id in download_queue.scheduled_job_registry.get_job_ids(): job = download_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) # Iterate over each job and retrieve relevant information for job_id in download_queue.started_job_registry.get_job_ids(): job = download_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) # Iterate over each job and retrieve relevant information for job_id in download_queue.finished_job_registry.get_job_ids(): job = download_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) # Iterate over each job and retrieve relevant information for job_id in download_queue.failed_job_registry.get_job_ids(): job = download_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) return jsonify(all_jobs) @app.route('/scrape_jobs', methods=['GET']) def get_scrape_jobs(): all_jobs = [] # Iterate over each job and retrieve relevant information for job_id in scrape_queue.scheduled_job_registry.get_job_ids(): job = scrape_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) # Iterate over each job and retrieve relevant information for job_id in scrape_queue.started_job_registry.get_job_ids(): job = scrape_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) # Iterate over each job and retrieve relevant information for job_id in scrape_queue.finished_job_registry.get_job_ids(): job = scrape_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) # Iterate over each job and retrieve relevant information for job_id in scrape_queue.failed_job_registry.get_job_ids(): job = scrape_queue.fetch_job(job_id) all_jobs.append({ 'job_id': job.id, 'args': job.args, 'status': job.get_status() }) return jsonify(all_jobs) if __name__ == '__main__': app.run(debug=True)