|
|
- from helpers import scrape_url, download_video, fetch_or_create_search_key, get_video_json, hide_videos, request_videos
-
- import os
- import shutil
-
- from flask import Flask, jsonify, request
- from flask_cors import CORS
- from rq import Queue
- from redis import Redis
-
- import typesense
- from typesense.exceptions import ObjectAlreadyExists
- from dotenv import dotenv_values
-
- app = Flask(__name__)
- CORS(app)
-
- config = dotenv_values(".env")
-
- # Copy a premade database into position if there's no database already
- if not os.path.exists("/data/database.sqlite3"):
- shutil.copy("/app/database.sqlite3", "/data/database.sqlite3")
-
- client = typesense.Client({
- 'api_key': config["TYPESENSE_API_KEY"],
- 'nodes': [{
- 'host': 'typesense',
- 'port': '8108',
- 'protocol': 'http'
- }],
- 'connection_timeout_seconds': 5
- })
-
-
- # Create or ignore the typesense database
- try:
- create_response = client.collections.create({
- "name": "discovered_videos",
- "fields": [
- #{"name": "id", "type": "string"},
- {"name": "fulltitle", "type": "string"},
- {"name": "title_vec", "type": "float[]", "num_dim": 384},
- {"name": "description", "type": "string"},
- {"name": "channel", "type": "string", "facet": True},
- {"name": "channel_follower_count", "type": "int32"},
- {"name": "channel_id", "type": "string"},
- {"name": "playlist_ids", "type": "string[]", "facet": True},
- {"name": "duration", "type": "int32"},
- {"name": "view_count", "type": "int32"},
- {"name": "upload_date", "type": "int64"},
- {"name": "filesize_approx", "type": "int64"},
- {"name": "extractor", "type": "string"},
- {"name": "thumbnail", "type": "string"},
- {"name": "requested_by", "type": "string"},
- {"name": "status", "type": "string", "facet": True},
- ],
- "default_sorting_field": "upload_date"
- })
- except ObjectAlreadyExists:
- pass
- except:
- print("ERROR: Some fatal error occurred while creating the typesesne database")
-
-
- redis_conn = Redis("redis")
- scrape_queue = Queue("discover", connection=redis_conn)
- download_queue = Queue("download", connection=redis_conn)
-
- @app.route('/video', methods=["GET"])
- def video():
- video_id = request.args.get("id")
- return jsonify(get_video_json(video_id))
-
-
- @app.route('/typesense_api_key', methods=["GET"])
- def typesense_api_key():
- key = fetch_or_create_search_key()
- return jsonify(key)
-
-
- @app.route('/scrape', methods=["POST"])
- def scrape_endpoint():
- data = request.json
- url = data["url"]
-
- job = scrape_queue.enqueue(scrape_url, url, job_timeout=1000)
-
- return jsonify({"job_id": job.id})
-
-
- @app.route('/request', methods=['POST'])
- def request_endpoint():
- data = request.json
- ids = data["ids"]
-
- dl_jobs = []
-
- request_videos(ids)
-
- for video_id in ids:
- job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
- dl_jobs.append({"job_id": job.id})
-
- return jsonify(dl_jobs)
-
-
- @app.route('/hide', methods=['POST'])
- def hide_endpoint():
- data = request.json
- ids = data["ids"]
-
- res = hide_videos(ids)
-
- return jsonify(res)
-
-
- @app.route('/download', methods=['POST'])
- def download_endpoint():
- data = request.json
- ids = data["ids"]
-
- dl_jobs = []
-
- for video_id in ids:
- job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
- dl_jobs.append(job)
-
- return jsonify({"job_id": job.id})
-
-
-
- @app.route('/download_jobs', methods=['GET'])
- def get_download_jobs():
- all_jobs = []
-
- # Iterate over each job and retrieve relevant information
- for job_id in download_queue.scheduled_job_registry.get_job_ids():
- job = download_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- # Iterate over each job and retrieve relevant information
- for job_id in download_queue.started_job_registry.get_job_ids():
- job = download_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- # Iterate over each job and retrieve relevant information
- for job_id in download_queue.finished_job_registry.get_job_ids():
- job = download_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- # Iterate over each job and retrieve relevant information
- for job_id in download_queue.failed_job_registry.get_job_ids():
- job = download_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- return jsonify(all_jobs)
-
-
- @app.route('/scrape_jobs', methods=['GET'])
- def get_scrape_jobs():
- all_jobs = []
-
- # Iterate over each job and retrieve relevant information
- for job_id in scrape_queue.scheduled_job_registry.get_job_ids():
- job = scrape_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- # Iterate over each job and retrieve relevant information
- for job_id in scrape_queue.started_job_registry.get_job_ids():
- job = scrape_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- # Iterate over each job and retrieve relevant information
- for job_id in scrape_queue.finished_job_registry.get_job_ids():
- job = scrape_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- # Iterate over each job and retrieve relevant information
- for job_id in scrape_queue.failed_job_registry.get_job_ids():
- job = scrape_queue.fetch_job(job_id)
- all_jobs.append({
- 'job_id': job.id,
- 'args': job.args,
- 'status': job.get_status()
- })
-
- return jsonify(all_jobs)
-
-
- if __name__ == '__main__':
- app.run(debug=True)
|