@ -0,0 +1,6 @@ | |||||
.venv | |||||
.backup | |||||
dump | |||||
typesense-data | |||||
backend/db/database.sqlite3 | |||||
__pycache__/ |
@ -0,0 +1,38 @@ | |||||
Youtube Disaster plan | |||||
===================== | |||||
YouTube is going to shit and I'd like to archive as many videos as possible before it shits the bed. | |||||
This web application allows users to scrape channels/playlists/videos that they enjoy and request specific videos to be downloaded (with subs, description, metadata, and even current sponsorblock info). | |||||
To handle the creators who might have some bangers but don't always bang, you can also mark videos as hidden to exclude them from your search results so you can just go through videos you haven't decided on yet. | |||||
TODO | |||||
---- | |||||
- LDAP authentication to have multi-user support for the application | |||||
- Automatically download metadata for channels and playlists the user specifies | |||||
- Automatically download videos which match certain criteria (from a channel/playlist/title matches a search/could get crazy with vector search since each video's title is vectorized) | |||||
- Display current scraping and downloading jobs to give the user some feedback as to what is happening | |||||
- Not shit UI | |||||
How-to | |||||
------ | |||||
1. Make a directories for the docker containers to store their database. By default the docker-compose file uses "dump/backend" and "dump/typesense". | |||||
2. Run `docker compose build && docker compose up` | |||||
3. In the frontend directory run `npm install` | |||||
4. Now the frontend just needs `npm start` to run | |||||
Deploying | |||||
--------- | |||||
- Pray | |||||
- Also you'll need to stop hardcoding the backend url in the frontend + put both typesense and the flask app behind a reverse proxy | |||||
Architecture | |||||
------------ | |||||
The backend is a Flask app. | |||||
It makes use of Python RQ+redis to create worker queues for downloading metadata and downloading videos. | |||||
The docker compose file currently spawns two workers, but you can bump that number up to do more things at the same time. | |||||
The search functionality is provided by typesense. | |||||
The frontend is written in Vue and utilizes InstantSearch.js to provide a good search UI. |
@ -0,0 +1,7 @@ | |||||
DATABASE_URL="sqlite:/app/database.sqlite3" | |||||
# Admin key: | |||||
TYPESENSE_API_KEY="jVkJyIqRRJbn5EqzdPQRcSzCurI5V7mt" | |||||
# Search only key: | |||||
#TYPESENSE_API_KEY="Q918wGcKZoy81tl45QvBacaHXQAIdciq" | |||||
# Prod search only key | |||||
#TYPESENSE_API_KEY="7cQXW66tf3nG7ARYnCfUGIvhj7XdhPVG" |
@ -0,0 +1,48 @@ | |||||
FROM python:3.11-bullseye | |||||
# We don't want to run our application as root if it is not strictly necessary, even in a container. | |||||
# Create a user and a group called 'app' to run the processes. | |||||
RUN adduser app | |||||
# Place the application components in a dir below the root dir | |||||
COPY requirements.txt /app/requirements.txt | |||||
# Make the directory the working directory for subsequent commands | |||||
WORKDIR /app | |||||
# Install from the requirements.txt we copied above | |||||
RUN pip install -r requirements.txt | |||||
# Download dbmate | |||||
RUN mkdir /app/bin | |||||
RUN curl -fsSL -o /app/bin/dbmate https://github.com/amacneil/dbmate/releases/latest/download/dbmate-linux-amd64 | |||||
RUN chmod +x /app/bin/dbmate | |||||
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /app/bin/yt-dlp | |||||
RUN chmod +x /app/bin/yt-dlp | |||||
RUN apt-get update && \ | |||||
apt-get -y --no-install-recommends install software-properties-common && \ | |||||
add-apt-repository "deb http://httpredir.debian.org/debian testing main" && \ | |||||
apt-get update && \ | |||||
apt-get -t testing install -y --no-install-recommends python3 ffmpeg | |||||
# Place the application components in a dir below the root dir | |||||
COPY . /app | |||||
# Hand everything over to the 'app' user | |||||
RUN chown -R app:app /app | |||||
# Subsequent commands, either in this Dockerfile or in a | |||||
# docker-compose.yml, will run as user 'app' | |||||
USER app | |||||
# We are done with setting up the image. | |||||
# As this image is used for different | |||||
# purposes and processes no CMD or ENTRYPOINT is specified here, | |||||
# this is done in docker-compose.yml. | |||||
#RUN mkdir $HOME/.cache | |||||
#RUN chown -R app:app $HOME/app | |||||
RUN bin/dbmate up |
@ -0,0 +1,24 @@ | |||||
-- migrate:up | |||||
create table discovered_videos ( | |||||
id TEXT PRIMARY KEY, | |||||
info JSON, | |||||
status TEXT CHECK( status IN ('DISCOVERED', 'HIDDEN', 'REQUESTED', 'DOWNLOADING', "DOWNLOADED") ) NOT NULL, | |||||
requested_by TEXT DEFAULT '' | |||||
); | |||||
create table videos_in_playlists ( | |||||
video_id TEXT NOT NULL, | |||||
playlist_id TEXT NOT NULL, | |||||
playlist_name TEXT, | |||||
PRIMARY KEY (video_id, playlist_id) | |||||
); | |||||
CREATE TABLE typesense_keys ( | |||||
id INTEGER PRIMARY KEY AUTOINCREMENT, | |||||
key TEXT NOT NULL | |||||
); | |||||
-- migrate:down | |||||
drop table discovered_videos; | |||||
drop table videos_in_playlists; | |||||
drop table typesense_keys; |
@ -0,0 +1,19 @@ | |||||
CREATE TABLE IF NOT EXISTS "schema_migrations" (version varchar(128) primary key); | |||||
CREATE TABLE video_states ( | |||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, | |||||
name TEXT | |||||
); | |||||
CREATE TABLE discovered_videos ( | |||||
id TEXT PRIMARY KEY, | |||||
info JSON, | |||||
status INTEGER REFERENCES video_states(id) | |||||
); | |||||
CREATE TABLE videos_in_playlists ( | |||||
video_id TEXT NOT NULL, | |||||
playlist_id TEXT NOT NULL, | |||||
playlist_name TEXT, | |||||
PRIMARY KEY (video_id, playlist_id) | |||||
); | |||||
-- Dbmate schema migrations | |||||
INSERT INTO "schema_migrations" (version) VALUES | |||||
('20230519145608'); |
@ -0,0 +1,240 @@ | |||||
import datetime | |||||
import subprocess | |||||
import json | |||||
import sqlite3 | |||||
import typesense | |||||
from sentence_transformers import SentenceTransformer | |||||
from dotenv import dotenv_values | |||||
config = dotenv_values(".env") | |||||
ts_client = typesense.Client({ | |||||
'api_key': config["TYPESENSE_API_KEY"], | |||||
'nodes': [{ | |||||
'host': 'typesense', | |||||
'port': '8108', | |||||
'protocol': 'http' | |||||
}], | |||||
'connection_timeout_seconds': 2 | |||||
}) | |||||
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", cache_folder="/data") | |||||
def fetch_or_create_search_key(): | |||||
conn = sqlite3.connect("/data/database.sqlite3") | |||||
cursor = conn.cursor() | |||||
cursor.execute("SELECT key FROM typesense_keys LIMIT 1") | |||||
rows = cursor.fetchall() | |||||
if len(rows) == 0: | |||||
key = ts_client.keys.create({"description": "Search-only key.", "actions": ["documents:search"], "collections": ["*"]}) | |||||
key = key["value"] | |||||
cursor.execute("INSERT INTO typesense_keys (key) VALUES (?)", (key,)) | |||||
conn.commit() | |||||
else: | |||||
key = rows[0][0] | |||||
conn.close() | |||||
return key | |||||
def get_video_json(video_id): | |||||
conn = sqlite3.connect("/data/database.sqlite3") | |||||
cursor = conn.cursor() | |||||
cursor.execute("""SELECT info, status FROM discovered_videos | |||||
WHERE id = ? LIMIT 1""", (video_id,)) | |||||
result = cursor.fetchall()[0] | |||||
data = json.loads(result[0]) | |||||
data["status"] = result[1] | |||||
conn.close() | |||||
return data | |||||
def hide_videos(video_ids): | |||||
conn = sqlite3.connect("/data/database.sqlite3") | |||||
cursor = conn.cursor() | |||||
cursor.execute(f"""UPDATE OR IGNORE discovered_videos | |||||
SET status = 'HIDDEN' | |||||
WHERE status = 'DISCOVERED' | |||||
AND id IN ({','.join(['?'] * len(video_ids))}) | |||||
RETURNING status""", video_ids) | |||||
new_status = cursor.fetchall()[0][0] | |||||
conn.commit() | |||||
conn.close() | |||||
for video_id in video_ids: | |||||
doc = { | |||||
"id": video_id, | |||||
"status": "HIDDEN" | |||||
} | |||||
ts_client.collections["discovered_videos"].documents[video_id].update(doc) | |||||
return new_status | |||||
def request_videos(video_ids): | |||||
conn = sqlite3.connect("/data/database.sqlite3") | |||||
cursor = conn.cursor() | |||||
cursor.execute(f"""UPDATE OR ABORT discovered_videos | |||||
SET status = 'REQUESTED' | |||||
WHERE status = 'DISCOVERED' | |||||
AND id IN ({','.join(['?'] * len(video_ids))}) | |||||
RETURNING status""", video_ids) | |||||
new_status = cursor.fetchall()[0][0] | |||||
conn.commit() | |||||
conn.close() | |||||
for video_id in video_ids: | |||||
doc = { | |||||
"id": video_id, | |||||
"status": "REQUESTED" | |||||
} | |||||
ts_client.collections["discovered_videos"].documents[video_id].update(doc) | |||||
return new_status | |||||
def download_video(video_id): | |||||
conn = sqlite3.connect("/data/database.sqlite3") | |||||
cursor = conn.cursor() | |||||
cursor.execute("""UPDATE OR FAIL discovered_videos | |||||
set status = 'DOWNLOADING' | |||||
where status = 'REQUESTED' | |||||
and id = ? | |||||
returning json_extract(info, '$.original_url')""", (video_id,)) | |||||
original_url = cursor.fetchall()[0][0] | |||||
conn.commit() | |||||
doc = { | |||||
"id": video_id, | |||||
"status": "DOWNLOADING" | |||||
} | |||||
ts_client.collections["discovered_videos"].documents[video_id].update(doc) | |||||
command = [ | |||||
"/app/bin/yt-dlp", | |||||
"-S", | |||||
"res:1080", | |||||
"-P", | |||||
"/data", | |||||
"-o", | |||||
"%(extractor)s-store/%(channel)s - %(channel_id)s/%(id)s/%(title)s.%(ext)s", | |||||
"--write-subs", | |||||
"--write-auto-subs", | |||||
"--sub-langs", | |||||
"\"en-US,en,en-us,en-gb,en-GB\"", | |||||
"--write-thumbnail", | |||||
"--continue", | |||||
"--embed-chapters", | |||||
"--embed-subs", | |||||
original_url | |||||
] | |||||
returncode = subprocess.run(command).returncode | |||||
if returncode != 0: | |||||
cursor.execute("""UPDATE OR FAIL discovered_videos | |||||
SET status = 'REQUESTED' | |||||
WHERE status = 'DOWNLOADING' | |||||
AND id = ?""", (video_id,)) | |||||
conn.commit() | |||||
conn.close() | |||||
doc = { | |||||
"id": video_id, | |||||
"status": "REQUESTED" | |||||
} | |||||
ts_client.collections["discovered_videos"].documents[video_id].update(doc) | |||||
raise Exception(f"Download failed for URL: {original_url}") | |||||
return | |||||
cursor.execute("""UPDATE OR FAIL discovered_videos | |||||
SET status = 'DOWNLOADED' | |||||
WHERE status = 'DOWNLOADING' | |||||
AND id = ?""", (video_id,)) | |||||
conn.commit() | |||||
conn.close() | |||||
doc = { | |||||
"id": video_id, | |||||
"status": "DOWNLOADED" | |||||
} | |||||
ts_client.collections["discovered_videos"].documents[video_id].update(doc) | |||||
def scrape_url(url): | |||||
command = [ | |||||
"/app/bin/yt-dlp", | |||||
"--dump-json", | |||||
"--write-subs", | |||||
"--sponsorblock-mark", | |||||
"all", | |||||
"--sub-langs", | |||||
"\"en-US,en,en-us,en-gb,en-GB\"", | |||||
url | |||||
] | |||||
output = subprocess.check_output(command).decode("utf-8") | |||||
for line in output.splitlines(): | |||||
video = json.loads(line) | |||||
del video["formats"] | |||||
del video["requested_formats"] | |||||
del video["thumbnails"] | |||||
del video["automatic_captions"] | |||||
data = json.dumps(video) | |||||
video_id = video["id"] | |||||
conn = sqlite3.connect("/data/database.sqlite3") | |||||
cursor = conn.cursor() | |||||
cursor.execute("INSERT INTO discovered_videos (id, info, status) VALUES (?, ?, 'DISCOVERED') ON CONFLICT(id) DO UPDATE SET info = excluded.info", (video_id, data)) | |||||
channel_id = video["channel_id"] | |||||
playlist_id = video.get("playlist_id", None) | |||||
if playlist_id and channel_id != playlist_id: | |||||
cursor.execute("INSERT INTO videos_in_playlists (video_id, playlist_id, name) VALUES (?, ?, ?) ON CONFLICT(video_id, playlist_id) DO NOTHING", (video_id, playlist_id, video["playlist_name"])) | |||||
conn.commit() | |||||
cursor.execute("SELECT DISTINCT playlist_id FROM videos_in_playlists WHERE video_id = ?", (video_id,)) | |||||
rows = cursor.fetchall() | |||||
playlist_ids = [row[0] for row in rows] | |||||
conn.close() | |||||
embeddings = embedding_model.encode(video["fulltitle"]) | |||||
document = { | |||||
"id": video["id"], | |||||
"fulltitle": video["fulltitle"], | |||||
"title_vec": embeddings.tolist(), | |||||
"description": video["description"], | |||||
"channel": video["channel"], | |||||
"channel_follower_count": video["channel_follower_count"], | |||||
"channel_id": video["channel_id"], | |||||
"duration": video["duration"], | |||||
"view_count": video["view_count"], | |||||
"upload_date": int(video["upload_date"]), | |||||
"filesize_approx": video["filesize_approx"], | |||||
"extractor": video["extractor"], | |||||
"thumbnail": video["thumbnail"], | |||||
"status": "DISCOVERED", | |||||
"requested_by": "", | |||||
"playlist_ids": playlist_ids, | |||||
} | |||||
ts_client.collections["discovered_videos"].documents.upsert(document) |
@ -0,0 +1,219 @@ | |||||
from helpers import scrape_url, download_video, fetch_or_create_search_key, get_video_json, hide_videos, request_videos | |||||
import os | |||||
import shutil | |||||
from flask import Flask, jsonify, request | |||||
from flask_cors import CORS | |||||
from rq import Queue | |||||
from redis import Redis | |||||
import typesense | |||||
from typesense.exceptions import ObjectAlreadyExists | |||||
from dotenv import dotenv_values | |||||
app = Flask(__name__) | |||||
CORS(app) | |||||
config = dotenv_values(".env") | |||||
# Copy a premade database into position if there's no database already | |||||
if not os.path.exists("/data/database.sqlite3"): | |||||
shutil.copy("/app/database.sqlite3", "/data/database.sqlite3") | |||||
client = typesense.Client({ | |||||
'api_key': config["TYPESENSE_API_KEY"], | |||||
'nodes': [{ | |||||
'host': 'typesense', | |||||
'port': '8108', | |||||
'protocol': 'http' | |||||
}], | |||||
'connection_timeout_seconds': 5 | |||||
}) | |||||
# Create or ignore the typesense database | |||||
try: | |||||
create_response = client.collections.create({ | |||||
"name": "discovered_videos", | |||||
"fields": [ | |||||
#{"name": "id", "type": "string"}, | |||||
{"name": "fulltitle", "type": "string"}, | |||||
{"name": "title_vec", "type": "float[]", "num_dim": 384}, | |||||
{"name": "description", "type": "string"}, | |||||
{"name": "channel", "type": "string", "facet": True}, | |||||
{"name": "channel_follower_count", "type": "int32"}, | |||||
{"name": "channel_id", "type": "string"}, | |||||
{"name": "playlist_ids", "type": "string[]", "facet": True}, | |||||
{"name": "duration", "type": "int32"}, | |||||
{"name": "view_count", "type": "int32"}, | |||||
{"name": "upload_date", "type": "int64"}, | |||||
{"name": "filesize_approx", "type": "int64"}, | |||||
{"name": "extractor", "type": "string"}, | |||||
{"name": "thumbnail", "type": "string"}, | |||||
{"name": "requested_by", "type": "string"}, | |||||
{"name": "status", "type": "string", "facet": True}, | |||||
], | |||||
"default_sorting_field": "upload_date" | |||||
}) | |||||
except ObjectAlreadyExists: | |||||
pass | |||||
except: | |||||
print("ERROR: Some fatal error occurred while creating the typesesne database") | |||||
redis_conn = Redis("redis") | |||||
scrape_queue = Queue("discover", connection=redis_conn) | |||||
download_queue = Queue("download", connection=redis_conn) | |||||
@app.route('/video', methods=["GET"]) | |||||
def video(): | |||||
video_id = request.args.get("id") | |||||
return jsonify(get_video_json(video_id)) | |||||
@app.route('/typesense_api_key', methods=["GET"]) | |||||
def typesense_api_key(): | |||||
key = fetch_or_create_search_key() | |||||
return jsonify(key) | |||||
@app.route('/scrape', methods=["POST"]) | |||||
def scrape_endpoint(): | |||||
data = request.json | |||||
url = data["url"] | |||||
job = scrape_queue.enqueue(scrape_url, url, job_timeout=1000) | |||||
return jsonify({"job_id": job.id}) | |||||
@app.route('/request', methods=['POST']) | |||||
def request_endpoint(): | |||||
data = request.json | |||||
ids = data["ids"] | |||||
dl_jobs = [] | |||||
request_videos(ids) | |||||
for video_id in ids: | |||||
job = download_queue.enqueue(download_video, video_id, job_timeout=2500) | |||||
dl_jobs.append({"job_id": job.id}) | |||||
return jsonify(dl_jobs) | |||||
@app.route('/hide', methods=['POST']) | |||||
def hide_endpoint(): | |||||
data = request.json | |||||
ids = data["ids"] | |||||
res = hide_videos(ids) | |||||
return jsonify(res) | |||||
@app.route('/download', methods=['POST']) | |||||
def download_endpoint(): | |||||
data = request.json | |||||
ids = data["ids"] | |||||
dl_jobs = [] | |||||
for video_id in ids: | |||||
job = download_queue.enqueue(download_video, video_id, job_timeout=2500) | |||||
dl_jobs.append(job) | |||||
return jsonify({"job_id": job.id}) | |||||
@app.route('/download_jobs', methods=['GET']) | |||||
def get_download_jobs(): | |||||
all_jobs = [] | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in download_queue.scheduled_job_registry.get_job_ids(): | |||||
job = download_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in download_queue.started_job_registry.get_job_ids(): | |||||
job = download_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in download_queue.finished_job_registry.get_job_ids(): | |||||
job = download_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in download_queue.failed_job_registry.get_job_ids(): | |||||
job = download_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
return jsonify(all_jobs) | |||||
@app.route('/scrape_jobs', methods=['GET']) | |||||
def get_scrape_jobs(): | |||||
all_jobs = [] | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in scrape_queue.scheduled_job_registry.get_job_ids(): | |||||
job = scrape_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in scrape_queue.started_job_registry.get_job_ids(): | |||||
job = scrape_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in scrape_queue.finished_job_registry.get_job_ids(): | |||||
job = scrape_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
# Iterate over each job and retrieve relevant information | |||||
for job_id in scrape_queue.failed_job_registry.get_job_ids(): | |||||
job = scrape_queue.fetch_job(job_id) | |||||
all_jobs.append({ | |||||
'job_id': job.id, | |||||
'args': job.args, | |||||
'status': job.get_status() | |||||
}) | |||||
return jsonify(all_jobs) | |||||
if __name__ == '__main__': | |||||
app.run(debug=True) |
@ -0,0 +1,8 @@ | |||||
Flask==2.3.2 | |||||
python-dotenv==1.0.0 | |||||
redis==4.5.5 | |||||
rq==1.14.1 | |||||
sentence_transformers==2.2.2 | |||||
typesense==0.15.1 | |||||
gunicorn==20.1.0 | |||||
Flask-Cors==3.0.10 |
@ -0,0 +1,42 @@ | |||||
version: '3' | |||||
services: | |||||
web: | |||||
build: ./backend | |||||
image: master-image | |||||
environment: | |||||
- TRANSFORMERS_CACHE=/app/.cache | |||||
ports: | |||||
- 5000:5000 | |||||
command: /usr/local/bin/gunicorn -b :5000 main:app | |||||
volumes: | |||||
- ./dump/backend:/data | |||||
depends_on: | |||||
- redis | |||||
- typesense | |||||
rq-worker: | |||||
image: master-image | |||||
depends_on: | |||||
- redis | |||||
command: rq worker -u redis://redis:6379 discover download | |||||
volumes: | |||||
- ./dump/backend:/data | |||||
deploy: | |||||
replicas: 2 | |||||
redis: | |||||
image: redis:alpine | |||||
ports: | |||||
- 6379:6379 | |||||
typesense: | |||||
image: typesense/typesense:0.24.1 | |||||
environment: | |||||
TYPESENSE_DATA_DIR: /data | |||||
TYPESENSE_API_KEY: jVkJyIqRRJbn5EqzdPQRcSzCurI5V7mt | |||||
TYPESENSE_SEARCH_ONLY_API_KEY: Q918wGcKZoy81tl45QvBacaHXQAIdciq | |||||
volumes: | |||||
- ./dump/typesense:/data | |||||
ports: | |||||
- 8108:8108 |
@ -0,0 +1,9 @@ | |||||
root = true | |||||
[*] | |||||
charset = utf-8 | |||||
indent_style = space | |||||
indent_size = 2 | |||||
end_of_line = lf | |||||
insert_final_newline = true | |||||
trim_trailing_whitespace = true |
@ -0,0 +1,23 @@ | |||||
# Logs | |||||
logs | |||||
*.log | |||||
npm-debug.log* | |||||
yarn-debug.log* | |||||
yarn-error.log* | |||||
pnpm-debug.log* | |||||
lerna-debug.log* | |||||
node_modules | |||||
dist | |||||
dist-ssr | |||||
*.local | |||||
# Editor directories and files | |||||
.vscode | |||||
.idea | |||||
.DS_Store | |||||
*.suo | |||||
*.ntvs* | |||||
*.njsproj | |||||
*.sln | |||||
*.sw? |
@ -0,0 +1,5 @@ | |||||
{ | |||||
"singleQuote": true, | |||||
"proseWrap": "never", | |||||
"trailingComma": "es5" | |||||
} |
@ -0,0 +1,21 @@ | |||||
# yt-archive | |||||
_This project was generated with [create-instantsearch-app](https://github.com/algolia/instantsearch/tree/master/packages/create-instantsearch-app) by [Algolia](https://algolia.com)._ | |||||
## Get started | |||||
To run this project locally, install the dependencies and run the local server: | |||||
```sh | |||||
npm install | |||||
npm start | |||||
``` | |||||
Alternatively, you may use [Yarn](https://http://yarnpkg.com/): | |||||
```sh | |||||
yarn | |||||
yarn start | |||||
``` | |||||
Open http://localhost:3000 to see your app. |
@ -0,0 +1,59 @@ | |||||
<!DOCTYPE html> | |||||
<html lang="en"> | |||||
<head> | |||||
<meta charset="UTF-8" /> | |||||
<meta http-equiv="X-UA-Compatible" content="ie=edge"> | |||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |||||
<link rel="icon" href="/favicon.png" /> | |||||
<!-- | |||||
Do not use @7 in production, use a complete version like x.x.x, see website for latest version: | |||||
https://www.algolia.com/doc/guides/building-search-ui/installation/react/#load-the-style | |||||
--> | |||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/instantsearch.css@7/themes/satellite-min.css"> | |||||
<title>yt-archive</title> | |||||
<style> | |||||
.header { | |||||
display: flex; | |||||
align-items: center; | |||||
min-height: 50px; | |||||
padding: 0.5rem 1rem; | |||||
background-image: linear-gradient(to right, #4dba87, #2f9088); | |||||
color: #fff; | |||||
margin-bottom: 1rem; | |||||
} | |||||
.header a { | |||||
color: #fff; | |||||
text-decoration: none; | |||||
padding: 0 0.5rem; | |||||
} | |||||
.header-title { | |||||
font-size: 1.2rem; | |||||
font-weight: normal; | |||||
} | |||||
.header-title::after { | |||||
content: ' ▸ '; | |||||
padding: 0 0.5rem; | |||||
} | |||||
.header-subtitle { | |||||
font-size: 1.2rem; | |||||
} | |||||
</style> | |||||
</head> | |||||
<body> | |||||
<div id="app"> | |||||
<header class="header"> | |||||
<h1 class="header-title"> | |||||
YouTube Disaster Plan | |||||
</h1> | |||||
<router-link class="navlink" to="/">Search/Request</router-link> | |||||
<router-link class="navlink" to="/scrape">Scrape Video Metadata</router-link> | |||||
</header> | |||||
<router-view></router-view> | |||||
</div> | |||||
<script type="module" src="/src/main.js"></script> | |||||
</body> | |||||
</html> |
@ -0,0 +1,24 @@ | |||||
{ | |||||
"name": "yt-archive", | |||||
"version": "1.0.0", | |||||
"private": true, | |||||
"scripts": { | |||||
"dev": "vite", | |||||
"start": "vite", | |||||
"build": "vite build", | |||||
"preview": "vite preview" | |||||
}, | |||||
"dependencies": { | |||||
"algoliasearch": "4.12.1", | |||||
"instantsearch.js": "^4.7.0", | |||||
"typesense-instantsearch-adapter": "^2.6.0", | |||||
"vue": "3.2.25", | |||||
"vue-instantsearch": "4.8.8", | |||||
"vue-router": "^4.2.1" | |||||
}, | |||||
"devDependencies": { | |||||
"@vitejs/plugin-vue": "2.2.0", | |||||
"@vitejs/plugin-vue-jsx": "1.3.7", | |||||
"vite": "2.8.0" | |||||
} | |||||
} |
@ -0,0 +1,247 @@ | |||||
<template> | |||||
<div> | |||||
<div class="container"> | |||||
<ais-instant-search | |||||
:search-client="searchClient" | |||||
index-name="discovered_videos" | |||||
:routing="routing" | |||||
> | |||||
<ais-configure :hits-per-page.camel="8" /> | |||||
<div class="search-panel"> | |||||
<div class="search-panel__filters"> | |||||
<h3>Channels</h3> | |||||
<ais-refinement-list attribute="channel" searchable /> | |||||
<h3>Statuses</h3> | |||||
<ais-refinement-list attribute="status" :sortBy="['name']"/> | |||||
</div> | |||||
<div class="search-panel__results"> | |||||
<div class="searchbox"> | |||||
<ais-search-box placeholder="" /> | |||||
</div> | |||||
<ais-hits> | |||||
<template v-slot:item="{ item }"> | |||||
<article> | |||||
<router-link :to="{ name: 'video', params: { id: item.id }}"> | |||||
<img :src="item.thumbnail" style="max-width: 100%;max-height: 100px"/> | |||||
<h1> | |||||
<ais-highlight | |||||
:hit="item" | |||||
attribute="fulltitle" | |||||
/> | |||||
</h1> | |||||
</router-link> | |||||
<p> | |||||
{{item.channel}} | |||||
</p> | |||||
<!--<p> | |||||
<ais-highlight :hit="item" attribute="description" /> | |||||
</p>--> | |||||
<div class="buttonlist"> | |||||
<button style="background-color: gray;color: white" :hidden="item.status!=='DISCOVERED'" @click="handleHide(item)">hide</button> | |||||
<button style="background-color: green;color: white" :hidden="item.status!=='DISCOVERED'" @click="handleRequest(item)">Request</button> | |||||
</div> | |||||
</article> | |||||
</template> | |||||
</ais-hits> | |||||
<div class="pagination"> | |||||
<ais-pagination /> | |||||
</div> | |||||
</div> | |||||
</div> | |||||
</ais-instant-search> | |||||
</div> | |||||
</div> | |||||
</template> | |||||
<script setup> | |||||
async function handleHide(item) { | |||||
item.status = "DISCARDED" | |||||
console.log("Hide: " + item.id) | |||||
let data = { | |||||
ids: [item.id] | |||||
} | |||||
let response = await fetch("http://localhost:5000/hide", { | |||||
method: 'POST', | |||||
headers: { | |||||
'Content-Type': 'application/json' | |||||
}, | |||||
body: JSON.stringify(data) | |||||
}) | |||||
if (response.ok) { | |||||
const jsonValue = await response.json(); | |||||
console.log("Hiding: " + jsonValue) | |||||
} else { | |||||
console.log("Hide failed: " + response) | |||||
} | |||||
} | |||||
async function handleRequest(item) { | |||||
item.status = "WANTED" | |||||
console.log("Requesting: " + item.fulltitle + " (" + item.id + ")") | |||||
let data = { | |||||
ids: [item.id] | |||||
} | |||||
let response = await fetch("http://localhost:5000/request", { | |||||
method: 'POST', | |||||
headers: { | |||||
'Content-Type': 'application/json' | |||||
}, | |||||
body: JSON.stringify(data) | |||||
}) | |||||
if (response.ok) { | |||||
const jsonValue = await response.json(); | |||||
console.log("Requesting: " + jsonValue) | |||||
} else { | |||||
console.log("Request failed: " + response) | |||||
} | |||||
} | |||||
</script> | |||||
<script> | |||||
import TypesenseInstantSearchAdapter from "typesense-instantsearch-adapter"; | |||||
import { history } from 'instantsearch.js/es/lib/routers'; | |||||
import { simple } from 'instantsearch.js/es/lib/stateMappings'; | |||||
async function fetchAPIKey() { | |||||
let response = await fetch("http://localhost:5000/typesense_api_key"); | |||||
if (response.ok) { | |||||
const jsonValue = await response.json(); | |||||
console.log(jsonValue); | |||||
return jsonValue; | |||||
} else { | |||||
console.log("Couldn't fetch typesense API key"); | |||||
} | |||||
} | |||||
const key = await fetchAPIKey(); | |||||
const typesenseInstantsearchAdapter = new TypesenseInstantSearchAdapter({ | |||||
server: { | |||||
apiKey: key, | |||||
nodes: [ | |||||
{ | |||||
host: "localhost", | |||||
port: 8108, | |||||
protocol: "http" | |||||
} | |||||
] | |||||
}, | |||||
// The following parameters are directly passed to Typesense's search API endpoint. | |||||
// So you can pass any parameters supported by the search endpoint below. | |||||
// queryBy is required. | |||||
additionalSearchParameters: { | |||||
query_by: "fulltitle", | |||||
sort_by: "upload_date:desc" | |||||
} | |||||
}); | |||||
export default { | |||||
data() { | |||||
return { | |||||
searchClient: typesenseInstantsearchAdapter.searchClient, | |||||
routing: { | |||||
router: history(), | |||||
stateMappings: simple() | |||||
} | |||||
}; | |||||
}, | |||||
}; | |||||
</script> | |||||
<style> | |||||
body, | |||||
h1 { | |||||
margin: 0; | |||||
padding: 0; | |||||
} | |||||
body { | |||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, | |||||
Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; | |||||
} | |||||
h1 { | |||||
font-size: 1rem; | |||||
} | |||||
em { | |||||
background: cyan; | |||||
font-style: normal; | |||||
} | |||||
.header { | |||||
display: flex; | |||||
align-items: center; | |||||
min-height: 50px; | |||||
padding: 0.5rem 1rem; | |||||
background-image: linear-gradient(to right, #4dba87, #2f9088); | |||||
color: #fff; | |||||
margin-bottom: 1rem; | |||||
} | |||||
.header a { | |||||
color: #fff; | |||||
text-decoration: none; | |||||
} | |||||
.header-title { | |||||
font-size: 1.2rem; | |||||
font-weight: normal; | |||||
} | |||||
.header-title::after { | |||||
content: ' ▸ '; | |||||
padding: 0 0.5rem; | |||||
} | |||||
.header-subtitle { | |||||
font-size: 1.2rem; | |||||
} | |||||
.container { | |||||
max-width: 1200px; | |||||
margin: 0 auto; | |||||
padding: 1rem; | |||||
} | |||||
.search-panel { | |||||
display: flex; | |||||
} | |||||
.search-panel__filters { | |||||
flex: 1; | |||||
} | |||||
.search-panel__results { | |||||
flex: 3; | |||||
} | |||||
.searchbox { | |||||
margin-bottom: 2rem; | |||||
} | |||||
.pagination { | |||||
margin: 2rem auto; | |||||
text-align: center; | |||||
} | |||||
.buttonlist { | |||||
display: flex; | |||||
flex-wrap: wrap; | |||||
justify-content: space-between; | |||||
max-width: 100%; | |||||
} | |||||
</style> |
@ -0,0 +1,37 @@ | |||||
<template> | |||||
<h1>Channel/Playlist/Video to scrape metadata</h1> | |||||
<form @submit.prevent="scrapeVideos"> | |||||
<label>URL:</label> <input type="text" name="scrape" class="" v-model="scrapeUrl"> | |||||
<button type="submit">Scrape Video(s)</button> | |||||
</form> | |||||
</template> | |||||
<script setup> | |||||
import { ref } from 'vue'; | |||||
const scrapeUrl = ref("") | |||||
async function scrapeVideos() { | |||||
const url = scrapeUrl.value | |||||
scrapeUrl.value = "" | |||||
console.log("Requesting: " + url) | |||||
let data = { | |||||
url | |||||
} | |||||
let response = await fetch("http://localhost:5000/scrape", { | |||||
method: 'POST', | |||||
headers: { | |||||
'Content-Type': 'application/json' | |||||
}, | |||||
body: JSON.stringify(data) | |||||
}) | |||||
if (response.ok) { | |||||
const jsonValue = await response.json(); | |||||
console.log("Scraping: " + jsonValue) | |||||
} else { | |||||
console.log("Failure: " + response) | |||||
} | |||||
} | |||||
</script> |
@ -0,0 +1,61 @@ | |||||
<template> | |||||
<h1> | |||||
Title: {{ video_json.value.fulltitle }} | |||||
</h1> | |||||
<img :src="video_json.value.thumbnail"/> | |||||
<table> | |||||
<tr> | |||||
<td>Channel</td> | |||||
<td>{{ video_json.value.channel }}</td> | |||||
</tr> | |||||
<tr> | |||||
<td>Views</td> | |||||
<td>{{ video_json.value.view_count }}</td> | |||||
</tr> | |||||
<tr> | |||||
<td>Duration</td> | |||||
<td>{{ video_json.value.duration_string }}</td> | |||||
</tr> | |||||
<tr> | |||||
<td>Like count</td> | |||||
<td>{{ video_json.value.like_count }}</td> | |||||
</tr> | |||||
<tr> | |||||
<td>Resolution</td> | |||||
<td>{{ video_json.value.resolution }}</td> | |||||
</tr> | |||||
<tr> | |||||
<td>Upload Date</td> | |||||
<td>{{ video_json.value.upload_date }}</td> | |||||
</tr> | |||||
<tr> | |||||
<td>Follower count</td> | |||||
<td>{{ video_json.value.channel_follower_count }}</td> | |||||
</tr> | |||||
</table> | |||||
</template> | |||||
<script setup> | |||||
import { reactive } from 'vue'; | |||||
import { useRouter, useRoute } from 'vue-router' | |||||
const route = useRoute() | |||||
const video_json = reactive({}); | |||||
let video_id = route.params.id; | |||||
async function getData(video_id) { | |||||
const res = await fetch("http://localhost:5000/video?id=" + video_id); | |||||
const finalRes = await res.json(); | |||||
video_json.value = finalRes; | |||||
} | |||||
getData(video_id) | |||||
console.log(video_json) | |||||
</script> | |||||
<style> | |||||
.h1 { | |||||
} | |||||
</style> |
@ -0,0 +1,26 @@ | |||||
import { createApp } from 'vue/dist/vue.esm-bundler'; | |||||
import { createRouter, createWebHistory } from 'vue-router'; | |||||
import InstantSearch from 'vue-instantsearch/vue3/es'; | |||||
import Scrape from './Scrape.vue'; | |||||
import Request from './Request.vue'; | |||||
import Video from './Video.vue'; | |||||
const routes = [ | |||||
{ path: '/', component: Request }, | |||||
{ path: '/scrape', component: Scrape }, | |||||
{ path: '/video/:id', name: "video", component: Video } | |||||
] | |||||
// 3. Create the router instance and pass the `routes` option | |||||
// You can pass in additional options here, but let's | |||||
// keep it simple for now. | |||||
const router = createRouter({ | |||||
// 4. Provide the history implementation to use. We are using the hash history for simplicity here. | |||||
history: createWebHistory(), | |||||
routes, // short for `routes: routes` | |||||
}) | |||||
const app = createApp({}); | |||||
app.use(InstantSearch); | |||||
app.use(router); | |||||
app.mount('#app'); |
@ -0,0 +1,8 @@ | |||||
import { defineConfig } from 'vite'; | |||||
import vue from '@vitejs/plugin-vue'; | |||||
import vueJsx from '@vitejs/plugin-vue-jsx'; | |||||
// https://vitejs.dev/config/ | |||||
export default defineConfig({ | |||||
plugins: [vue(), vueJsx()], | |||||
}); |