From 1debeb77f7986de1b659dcfdc442de6415e1d9f5 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Wed, 21 Aug 2024 10:49:23 +0200 Subject: chore: Initial Commit This repository was migrated out of my nixos-config. --- yt_dlp/src/wrapper/info_json.rs | 526 +++++++++++++++++++++++++++++++++++ yt_dlp/src/wrapper/mod.rs | 12 + yt_dlp/src/wrapper/yt_dlp_options.rs | 62 +++++ 3 files changed, 600 insertions(+) create mode 100644 yt_dlp/src/wrapper/info_json.rs create mode 100644 yt_dlp/src/wrapper/mod.rs create mode 100644 yt_dlp/src/wrapper/yt_dlp_options.rs (limited to 'yt_dlp/src/wrapper') diff --git a/yt_dlp/src/wrapper/info_json.rs b/yt_dlp/src/wrapper/info_json.rs new file mode 100644 index 0000000..aceeeb8 --- /dev/null +++ b/yt_dlp/src/wrapper/info_json.rs @@ -0,0 +1,526 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +use std::{collections::HashMap, path::PathBuf}; + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_json::Value; +use url::Url; + +use crate::json_loads_str; + +type Todo = String; + +// TODO: Change this to map `_type` to a structure of values, instead of the options <2024-05-27> +// And replace all the strings with better types (enums or urls) +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct InfoJson { + pub __last_playlist_index: Option, + pub __post_extractor: Option, + pub __x_forwarded_for_ip: Option, + pub _filename: Option, + pub _format_sort_fields: Option>, + pub _has_drm: Option, + pub _type: Option, + pub _version: Option, + pub abr: Option, + pub acodec: Option, + pub age_limit: Option, + pub aspect_ratio: Option, + pub asr: Option, + pub audio_channels: Option, + pub audio_ext: Option, + pub automatic_captions: Option>>, + pub availability: Option, + pub average_rating: Option, + pub categories: Option>, + pub channel: Option, + pub channel_follower_count: Option, + pub channel_id: Option, + pub channel_is_verified: Option, + pub channel_url: Option, + pub chapters: Option>, + pub comment_count: Option, + pub comments: Option>, + pub concurrent_view_count: Option, + pub description: Option, + pub display_id: Option, + pub downloader_options: Option, + pub duration: Option, + pub duration_string: Option, + pub dynamic_range: Option, + pub entries: Option>, + pub episode: Option, + pub episode_number: Option, + pub epoch: Option, + pub ext: Option, + pub extractor: Option, + pub extractor_key: Option, + pub filename: Option, + pub filesize: Option, + pub filesize_approx: Option, + pub format: Option, + pub format_id: Option, + pub format_note: Option, + pub formats: Option>, + pub fps: Option, + pub fulltitle: Option, + pub has_drm: Option, + pub heatmap: Option>, + pub height: Option, + pub http_headers: Option, + pub id: Option, + pub ie_key: Option, + pub is_live: Option, + pub language: Option, + pub language_preference: Option, + pub license: Option, + pub like_count: Option, + pub live_status: Option, + pub location: Option, + pub modified_date: Option, + pub n_entries: Option, + pub original_url: Option, + pub playable_in_embed: Option, + pub playlist: Option, + pub playlist_autonumber: Option, + pub playlist_channel: Option, + pub playlist_channel_id: Option, + pub playlist_count: Option, + pub playlist_id: Option, + pub playlist_index: Option, + pub playlist_title: Option, + pub playlist_uploader: Option, + pub playlist_uploader_id: Option, + pub preference: Option, + pub protocol: Option, + pub quality: Option, + pub release_date: Option, + pub release_timestamp: Option, + pub release_year: Option, + pub requested_downloads: Option>, + pub requested_entries: Option>, + pub requested_formats: Option>, + pub requested_subtitles: Option>, + pub resolution: Option, + pub season: Option, + pub season_number: Option, + pub series: Option, + pub source_preference: Option, + pub sponsorblock_chapters: Option>, + pub stretched_ratio: Option, + pub subtitles: Option>>, + pub tags: Option>, + pub tbr: Option, + pub thumbnail: Option, + pub thumbnails: Option>, + pub timestamp: Option, + pub title: Option, + pub upload_date: Option, + pub uploader: Option, + pub uploader_id: Option, + pub uploader_url: Option, + pub url: Option, + pub vbr: Option, + pub vcodec: Option, + pub video_ext: Option, + pub view_count: Option, + pub was_live: Option, + pub webpage_url: Option, + pub webpage_url_basename: Option, + pub webpage_url_domain: Option, + pub width: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct RequestedDownloads { + pub __files_to_merge: Option>, + pub __finaldir: PathBuf, + pub __infojson_filename: PathBuf, + pub __postprocessors: Vec, + pub __real_download: bool, + pub __write_download_archive: bool, + pub _filename: PathBuf, + pub _type: InfoType, + pub _version: Version, + pub abr: f64, + pub acodec: String, + pub aspect_ratio: f64, + pub asr: u32, + pub audio_channels: u32, + pub chapters: Option>, + pub duration: Option, + pub dynamic_range: String, + pub ext: String, + pub filename: PathBuf, + pub filepath: PathBuf, + pub filesize_approx: u64, + pub format: String, + pub format_id: String, + pub format_note: String, + pub fps: f64, + pub height: u32, + pub infojson_filename: PathBuf, + pub language: Option, + pub protocol: String, + pub requested_formats: Vec, + pub resolution: String, + pub tbr: f64, + pub vbr: f64, + pub vcodec: String, + pub width: u32, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub struct Subtitle { + pub ext: SubtitleExt, + pub filepath: PathBuf, + pub name: String, + pub url: Url, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SubtitleExt { + #[serde(alias = "vtt")] + Vtt, + + #[serde(alias = "json")] + Json, + #[serde(alias = "json3")] + Json3, + + #[serde(alias = "ttml")] + Ttml, + + #[serde(alias = "srv1")] + Srv1, + #[serde(alias = "srv2")] + Srv2, + #[serde(alias = "srv3")] + Srv3, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Caption { + pub ext: SubtitleExt, + pub name: Option, + pub protocol: Option, + pub url: String, + pub filepath: Option, + pub video_id: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Chapter { + pub end_time: f64, + pub start_time: f64, + pub title: String, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct SponsorblockChapter { + /// This is an utterly useless field, and should thus be ignored + pub _categories: Option>>, + + pub categories: Option>, + pub category: Option, + pub category_names: Option>, + pub end_time: f64, + pub name: Option, + pub r#type: Option, + pub start_time: f64, + pub title: String, +} + +pub fn get_none<'de, D, T>(_: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + Ok(None) +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterType { + #[serde(alias = "skip")] + Skip, + + #[serde(alias = "chapter")] + Chapter, +} +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterCategory { + #[serde(alias = "filler")] + Filler, + + #[serde(alias = "sponsor")] + Sponsor, + + #[serde(alias = "selfpromo")] + SelfPromo, + + #[serde(alias = "chapter")] + Chapter, + + #[serde(alias = "intro")] + Intro, + + #[serde(alias = "outro")] + Outro, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct HeatMapEntry { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum Extractor { + #[serde(alias = "generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + + #[serde(alias = "youtube")] + YouTube, + + #[serde(alias = "youtube:tab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum ExtractorKey { + #[serde(alias = "Generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + + #[serde(alias = "Youtube")] + YouTube, + + #[serde(alias = "YoutubeTab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum InfoType { + #[serde(alias = "playlist")] + Playlist, + + #[serde(alias = "url")] + Url, + + #[serde(alias = "video")] + Video, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Version { + pub current_git_head: Option, + pub release_git_head: String, + pub repository: String, + pub version: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub enum Parent { + Root, + Id(String), +} + +impl Parent { + pub fn id(&self) -> Option<&str> { + if let Self::Id(id) = self { + Some(id) + } else { + None + } + } +} + +impl From for Parent { + fn from(value: String) -> Self { + if value == "root" { + Self::Root + } else { + Self::Id(value) + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub struct Id { + pub id: String, +} +impl From for Id { + fn from(value: String) -> Self { + Self { + // Take the last element if the string is split with dots, otherwise take the full id + id: value.split('.').last().unwrap_or(&value).to_owned(), + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Comment { + pub id: Id, + pub text: String, + #[serde(default = "zero")] + pub like_count: u32, + pub is_pinned: bool, + pub author_id: String, + #[serde(default = "unknown")] + pub author: String, + pub author_is_verified: bool, + pub author_thumbnail: Url, + pub parent: Parent, + #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")] + pub edited: bool, + // Can't also be deserialized, as it's already used in 'edited' + // _time_text: String, + pub timestamp: i64, + pub author_url: Url, + pub author_is_uploader: bool, + pub is_favorited: bool, +} +fn unknown() -> String { + "".to_string() +} +fn zero() -> u32 { + 0 +} +fn edited_from_time_text<'de, D>(d: D) -> Result +where + D: Deserializer<'de>, +{ + let s = String::deserialize(d)?; + if s.contains(" (edited)") { + Ok(true) + } else { + Ok(false) + } +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct ThumbNail { + pub id: Option, + pub preference: Option, + /// in the form of "[`height`]x[`width`]" + pub resolution: Option, + pub url: Url, + pub width: Option, + pub height: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Format { + pub __needs_testing: Option, + pub __working: Option, + pub abr: Option, + pub acodec: Option, + pub aspect_ratio: Option, + pub asr: Option, + pub audio_channels: Option, + pub audio_ext: Option, + pub columns: Option, + pub container: Option, + pub downloader_options: Option, + pub dynamic_range: Option, + pub ext: String, + pub filepath: Option, + pub filesize: Option, + pub filesize_approx: Option, + pub format: Option, + pub format_id: String, + pub format_index: Option, + pub format_note: Option, + pub fps: Option, + pub fragment_base_url: Option, + pub fragments: Option>, + pub has_drm: Option, + pub height: Option, + pub http_headers: Option, + pub is_dash_periods: Option, + pub language: Option, + pub language_preference: Option, + pub manifest_stream_number: Option, + pub manifest_url: Option, + pub preference: Option, + pub protocol: Option, + pub quality: Option, + pub resolution: Option, + pub rows: Option, + pub source_preference: Option, + pub tbr: Option, + pub url: Url, + pub vbr: Option, + pub vcodec: String, + pub video_ext: Option, + pub width: Option, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct DownloaderOptions { + http_chunk_size: u64, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct HttpHeader { + #[serde(alias = "User-Agent")] + pub user_agent: Option, + #[serde(alias = "Accept")] + pub accept: Option, + #[serde(alias = "Accept-Language")] + pub accept_language: Option, + #[serde(alias = "Sec-Fetch-Mode")] + pub sec_fetch_mode: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Fragment { + pub url: Option, + pub duration: Option, + pub path: Option, +} + +impl InfoJson { + pub fn to_py_dict(self, py: Python) -> PyResult> { + let output: Bound = json_loads_str(py, self)?; + Ok(output) + } +} diff --git a/yt_dlp/src/wrapper/mod.rs b/yt_dlp/src/wrapper/mod.rs new file mode 100644 index 0000000..3fe3247 --- /dev/null +++ b/yt_dlp/src/wrapper/mod.rs @@ -0,0 +1,12 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +pub mod info_json; +// pub mod yt_dlp_options; diff --git a/yt_dlp/src/wrapper/yt_dlp_options.rs b/yt_dlp/src/wrapper/yt_dlp_options.rs new file mode 100644 index 0000000..c2a86df --- /dev/null +++ b/yt_dlp/src/wrapper/yt_dlp_options.rs @@ -0,0 +1,62 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::Serialize; + +use crate::json_loads; + +#[derive(Serialize, Clone)] +pub struct YtDlpOptions { + pub playliststart: u32, + pub playlistend: u32, + pub noplaylist: bool, + pub extract_flat: ExtractFlat, + // pub extractor_args: ExtractorArgs, + // pub format: String, + // pub fragment_retries: u32, + // #[serde(rename(serialize = "getcomments"))] + // pub get_comments: bool, + // #[serde(rename(serialize = "ignoreerrors"))] + // pub ignore_errors: bool, + // pub retries: u32, + // #[serde(rename(serialize = "writeinfojson"))] + // pub write_info_json: bool, + // pub postprocessors: Vec>, +} + +#[derive(Serialize, Copy, Clone)] +pub enum ExtractFlat { + #[serde(rename(serialize = "in_playlist"))] + InPlaylist, + + #[serde(rename(serialize = "discard_in_playlist"))] + DiscardInPlaylist, +} + +#[derive(Serialize, Clone)] +pub struct ExtractorArgs { + pub youtube: YoutubeExtractorArgs, +} + +#[derive(Serialize, Clone)] +pub struct YoutubeExtractorArgs { + comment_sort: Vec, + max_comments: Vec, +} + +impl YtDlpOptions { + pub fn to_py_dict(self, py: Python) -> PyResult> { + let string = serde_json::to_string(&self).expect("This should always work"); + + let output: Bound = json_loads(py, string)?; + Ok(output) + } +} -- cgit 1.4.1