Save youtube before it dies
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

219 lines
6.2 KiB

1 year ago
  1. from helpers import scrape_url, download_video, fetch_or_create_search_key, get_video_json, hide_videos, request_videos
  2. import os
  3. import shutil
  4. from flask import Flask, jsonify, request
  5. from flask_cors import CORS
  6. from rq import Queue
  7. from redis import Redis
  8. import typesense
  9. from typesense.exceptions import ObjectAlreadyExists
  10. from dotenv import dotenv_values
  11. app = Flask(__name__)
  12. CORS(app)
  13. config = dotenv_values(".env")
  14. # Copy a premade database into position if there's no database already
  15. if not os.path.exists("/data/database.sqlite3"):
  16. shutil.copy("/app/database.sqlite3", "/data/database.sqlite3")
  17. client = typesense.Client({
  18. 'api_key': config["TYPESENSE_API_KEY"],
  19. 'nodes': [{
  20. 'host': 'typesense',
  21. 'port': '8108',
  22. 'protocol': 'http'
  23. }],
  24. 'connection_timeout_seconds': 5
  25. })
  26. # Create or ignore the typesense database
  27. try:
  28. create_response = client.collections.create({
  29. "name": "discovered_videos",
  30. "fields": [
  31. #{"name": "id", "type": "string"},
  32. {"name": "fulltitle", "type": "string"},
  33. {"name": "title_vec", "type": "float[]", "num_dim": 384},
  34. {"name": "description", "type": "string"},
  35. {"name": "channel", "type": "string", "facet": True},
  36. {"name": "channel_follower_count", "type": "int32"},
  37. {"name": "channel_id", "type": "string"},
  38. {"name": "playlist_ids", "type": "string[]", "facet": True},
  39. {"name": "duration", "type": "int32"},
  40. {"name": "view_count", "type": "int32"},
  41. {"name": "upload_date", "type": "int64"},
  42. {"name": "filesize_approx", "type": "int64"},
  43. {"name": "extractor", "type": "string"},
  44. {"name": "thumbnail", "type": "string"},
  45. {"name": "requested_by", "type": "string"},
  46. {"name": "status", "type": "string", "facet": True},
  47. ],
  48. "default_sorting_field": "upload_date"
  49. })
  50. except ObjectAlreadyExists:
  51. pass
  52. except:
  53. print("ERROR: Some fatal error occurred while creating the typesesne database")
  54. redis_conn = Redis("redis")
  55. scrape_queue = Queue("discover", connection=redis_conn)
  56. download_queue = Queue("download", connection=redis_conn)
  57. @app.route('/video', methods=["GET"])
  58. def video():
  59. video_id = request.args.get("id")
  60. return jsonify(get_video_json(video_id))
  61. @app.route('/typesense_api_key', methods=["GET"])
  62. def typesense_api_key():
  63. key = fetch_or_create_search_key()
  64. return jsonify(key)
  65. @app.route('/scrape', methods=["POST"])
  66. def scrape_endpoint():
  67. data = request.json
  68. url = data["url"]
  69. job = scrape_queue.enqueue(scrape_url, url, job_timeout=1000)
  70. return jsonify({"job_id": job.id})
  71. @app.route('/request', methods=['POST'])
  72. def request_endpoint():
  73. data = request.json
  74. ids = data["ids"]
  75. dl_jobs = []
  76. request_videos(ids)
  77. for video_id in ids:
  78. job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
  79. dl_jobs.append({"job_id": job.id})
  80. return jsonify(dl_jobs)
  81. @app.route('/hide', methods=['POST'])
  82. def hide_endpoint():
  83. data = request.json
  84. ids = data["ids"]
  85. res = hide_videos(ids)
  86. return jsonify(res)
  87. @app.route('/download', methods=['POST'])
  88. def download_endpoint():
  89. data = request.json
  90. ids = data["ids"]
  91. dl_jobs = []
  92. for video_id in ids:
  93. job = download_queue.enqueue(download_video, video_id, job_timeout=2500)
  94. dl_jobs.append(job)
  95. return jsonify({"job_id": job.id})
  96. @app.route('/download_jobs', methods=['GET'])
  97. def get_download_jobs():
  98. all_jobs = []
  99. # Iterate over each job and retrieve relevant information
  100. for job_id in download_queue.scheduled_job_registry.get_job_ids():
  101. job = download_queue.fetch_job(job_id)
  102. all_jobs.append({
  103. 'job_id': job.id,
  104. 'args': job.args,
  105. 'status': job.get_status()
  106. })
  107. # Iterate over each job and retrieve relevant information
  108. for job_id in download_queue.started_job_registry.get_job_ids():
  109. job = download_queue.fetch_job(job_id)
  110. all_jobs.append({
  111. 'job_id': job.id,
  112. 'args': job.args,
  113. 'status': job.get_status()
  114. })
  115. # Iterate over each job and retrieve relevant information
  116. for job_id in download_queue.finished_job_registry.get_job_ids():
  117. job = download_queue.fetch_job(job_id)
  118. all_jobs.append({
  119. 'job_id': job.id,
  120. 'args': job.args,
  121. 'status': job.get_status()
  122. })
  123. # Iterate over each job and retrieve relevant information
  124. for job_id in download_queue.failed_job_registry.get_job_ids():
  125. job = download_queue.fetch_job(job_id)
  126. all_jobs.append({
  127. 'job_id': job.id,
  128. 'args': job.args,
  129. 'status': job.get_status()
  130. })
  131. return jsonify(all_jobs)
  132. @app.route('/scrape_jobs', methods=['GET'])
  133. def get_scrape_jobs():
  134. all_jobs = []
  135. # Iterate over each job and retrieve relevant information
  136. for job_id in scrape_queue.scheduled_job_registry.get_job_ids():
  137. job = scrape_queue.fetch_job(job_id)
  138. all_jobs.append({
  139. 'job_id': job.id,
  140. 'args': job.args,
  141. 'status': job.get_status()
  142. })
  143. # Iterate over each job and retrieve relevant information
  144. for job_id in scrape_queue.started_job_registry.get_job_ids():
  145. job = scrape_queue.fetch_job(job_id)
  146. all_jobs.append({
  147. 'job_id': job.id,
  148. 'args': job.args,
  149. 'status': job.get_status()
  150. })
  151. # Iterate over each job and retrieve relevant information
  152. for job_id in scrape_queue.finished_job_registry.get_job_ids():
  153. job = scrape_queue.fetch_job(job_id)
  154. all_jobs.append({
  155. 'job_id': job.id,
  156. 'args': job.args,
  157. 'status': job.get_status()
  158. })
  159. # Iterate over each job and retrieve relevant information
  160. for job_id in scrape_queue.failed_job_registry.get_job_ids():
  161. job = scrape_queue.fetch_job(job_id)
  162. all_jobs.append({
  163. 'job_id': job.id,
  164. 'args': job.args,
  165. 'status': job.get_status()
  166. })
  167. return jsonify(all_jobs)
  168. if __name__ == '__main__':
  169. app.run(debug=True)