From 52e99b38eae6b4f3cb991342ff9ba9abbab9e42c Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Fri, 23 Aug 2024 13:06:00 +0200 Subject: refactor(yt_dlp): Also move the `crates` subdirectory --- crates/yt_dlp/src/duration.rs | 71 ++++ crates/yt_dlp/src/lib.rs | 412 +++++++++++++++++++++ crates/yt_dlp/src/logging.rs | 125 +++++++ crates/yt_dlp/src/main.rs | 96 +++++ crates/yt_dlp/src/wrapper/info_json.rs | 550 ++++++++++++++++++++++++++++ crates/yt_dlp/src/wrapper/mod.rs | 12 + crates/yt_dlp/src/wrapper/yt_dlp_options.rs | 62 ++++ 7 files changed, 1328 insertions(+) create mode 100644 crates/yt_dlp/src/duration.rs create mode 100644 crates/yt_dlp/src/lib.rs create mode 100644 crates/yt_dlp/src/logging.rs create mode 100644 crates/yt_dlp/src/main.rs create mode 100644 crates/yt_dlp/src/wrapper/info_json.rs create mode 100644 crates/yt_dlp/src/wrapper/mod.rs create mode 100644 crates/yt_dlp/src/wrapper/yt_dlp_options.rs (limited to 'crates/yt_dlp/src') diff --git a/crates/yt_dlp/src/duration.rs b/crates/yt_dlp/src/duration.rs new file mode 100644 index 0000000..cd7454b --- /dev/null +++ b/crates/yt_dlp/src/duration.rs @@ -0,0 +1,71 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +// TODO: This file should be de-duplicated with the same file in the 'yt' crate <2024-06-25> +pub struct Duration { + time: u32, +} + +impl From<&str> for Duration { + fn from(v: &str) -> Self { + let buf: Vec<_> = v.split(':').take(2).collect(); + Self { + time: (buf[0].parse::().expect("Should be a number") * 60) + + buf[1].parse::().expect("Should be a number"), + } + } +} + +impl From> for Duration { + fn from(value: Option) -> Self { + Self { + time: value.unwrap_or(0.0).ceil() as u32, + } + } +} + +impl std::fmt::Display for Duration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + const SECOND: u32 = 1; + const MINUTE: u32 = 60 * SECOND; + const HOUR: u32 = 60 * MINUTE; + + let base_hour = self.time - (self.time % HOUR); + let base_min = (self.time % HOUR) - ((self.time % HOUR) % MINUTE); + let base_sec = (self.time % HOUR) % MINUTE; + + let h = base_hour / HOUR; + let m = base_min / MINUTE; + let s = base_sec / SECOND; + + if self.time == 0 { + write!(f, "0s") + } else if h > 0 { + write!(f, "{h}h {m}m") + } else { + write!(f, "{m}m {s}s") + } + } +} +#[cfg(test)] +mod test { + use super::Duration; + + #[test] + fn test_display_duration_1h() { + let dur = Duration { time: 60 * 60 }; + assert_eq!("[1h 0m]".to_owned(), dur.to_string()); + } + #[test] + fn test_display_duration_30min() { + let dur = Duration { time: 60 * 30 }; + assert_eq!("[30m 0s]".to_owned(), dur.to_string()); + } +} diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs new file mode 100644 index 0000000..37d0945 --- /dev/null +++ b/crates/yt_dlp/src/lib.rs @@ -0,0 +1,412 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +// use std::{fs::File, io::Write}; + +use std::{path::PathBuf, sync::Once}; + +use crate::{duration::Duration, logging::setup_logging, wrapper::info_json::InfoJson}; + +use bytes::Bytes; +use log::{info, warn}; +use pyo3::types::{PyString, PyTuple, PyTupleMethods}; +use pyo3::{ + pyfunction, + types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyListMethods, PyModule}, + wrap_pyfunction_bound, Bound, PyAny, PyResult, Python, +}; +use serde::Serialize; +use serde_json::{Map, Value}; +use url::Url; + +pub mod duration; +pub mod logging; +pub mod wrapper; + +/// Synchronisation helper, to ensure that we don't setup the logger multiple times +static SYNC_OBJ: Once = Once::new(); + +/// Add a logger to the yt-dlp options. +/// If you have an logger set (i.e. for rust), than this will log to rust +pub fn add_logger_and_sig_handler<'a>( + opts: Bound<'a, PyDict>, + py: Python, +) -> PyResult> { + setup_logging(py, "yt_dlp")?; + + let logging = PyModule::import_bound(py, "logging")?; + let ytdl_logger = logging.call_method1("getLogger", ("yt_dlp",))?; + + // Ensure that all events are logged by setting the log level to NOTSET (we filter on rust's side) + // Also use this static, to ensure that we don't configure the logger every time + SYNC_OBJ.call_once(|| { + // Disable the SIGINT (Ctrl+C) handler, python installs. + // This allows the user to actually stop the application with Ctrl+C. + // This is here because it can only be run in the main thread and this was here already. + py.run_bound( + r#" +import signal +signal.signal(signal.SIGINT, signal.SIG_DFL) + "#, + None, + None, + ) + .expect("This code should always work"); + + let config_opts = PyDict::new_bound(py); + config_opts + .set_item("level", 0) + .expect("Setting this item should always work"); + + logging + .call_method("basicConfig", (), Some(&config_opts)) + .expect("This method exists"); + }); + + // This was taken from `ytcc`, I don't think it is still applicable + // ytdl_logger.setattr("propagate", false)?; + // let logging_null_handler = logging.call_method0("NullHandler")?; + // ytdl_logger.setattr("addHandler", logging_null_handler)?; + + opts.set_item("logger", ytdl_logger).expect("Should work"); + + Ok(opts) +} + +#[pyfunction] +pub fn progress_hook<'a>(py: Python, input: Bound<'_, PyDict>) -> PyResult<()> { + let input: serde_json::Map = serde_json::from_str(&json_dumps( + py, + input + .downcast::() + .expect("Will always work") + .to_owned(), + )?) + .expect("Python should always produce valid json"); + + macro_rules! get { + (@interrogate $item:ident, $type_fun:ident, $get_fun:ident, $name:expr) => {{ + let a = $item.get($name).expect(concat!( + "The field '", + stringify!($name), + "' should exist." + )); + + if a.$type_fun() { + a.$get_fun().expect( + "The should have been checked in the if guard, so unpacking here is fine", + ) + } else { + panic!( + "Value {} => \n{}\n is not of type: {}", + $name, + a, + stringify!($type_fun) + ); + } + }}; + + ($type_fun:ident, $get_fun:ident, $name1:expr, $name2:expr) => {{ + let a = get! {@interrogate input, is_object, as_object, $name1}; + let b = get! {@interrogate a, $type_fun, $get_fun, $name2}; + b + }}; + + ($type_fun:ident, $get_fun:ident, $name:expr) => {{ + get! {@interrogate input, $type_fun, $get_fun, $name} + }}; + } + + macro_rules! default_get { + (@interrogate $item:ident, $default:expr, $get_fun:ident, $name:expr) => {{ + let a = if let Some(field) = $item.get($name) { + field.$get_fun().unwrap_or($default) + } else { + $default + }; + a + }}; + + ($get_fun:ident, $default:expr, $name1:expr, $name2:expr) => {{ + let a = get! {@interrogate input, is_object, as_object, $name1}; + let b = default_get! {@interrogate a, $default, $get_fun, $name2}; + b + }}; + + ($get_fun:ident, $default:expr, $name:expr) => {{ + default_get! {@interrogate input, $default, $get_fun, $name} + }}; + } + + macro_rules! c { + ($color:expr, $format:expr) => { + format!("\x1b[{}m{}\x1b[0m", $color, $format) + }; + } + + fn format_bytes(bytes: u64) -> String { + let bytes = Bytes::new(bytes); + bytes.to_string() + } + + fn format_speed(speed: f64) -> String { + let bytes = Bytes::new(speed.floor() as u64); + format!("{}/s", bytes) + } + + let get_title = |add_extension: bool| -> String { + match get! {is_string, as_str, "info_dict", "ext"} { + "vtt" => { + format!( + "Subtitles ({})", + get! {is_string, as_str, "info_dict", "name"} + ) + } + title_extension @ ("webm" | "mp4" | "m4a") => { + if add_extension { + format!( + "{} ({})", + default_get! { as_str, "", "info_dict", "title"}, + title_extension + ) + } else { + default_get! { as_str, "", "info_dict", "title"}.to_owned() + } + } + other => panic!("The extension '{}' is not yet implemented", other), + } + }; + + match get! {is_string, as_str, "status"} { + "downloading" => { + let elapsed = default_get! {as_f64, 0.0f64, "elapsed"}; + let eta = default_get! {as_f64, 0.0, "eta"}; + let speed = default_get! {as_f64, 0.0, "speed"}; + + let downloaded_bytes = get! {is_u64, as_u64, "downloaded_bytes"}; + let total_bytes = { + let total_bytes = default_get!(as_u64, 0, "total_bytes"); + if total_bytes == 0 { + let estimate = default_get!(as_u64, 0, "total_bytes_estimate"); + warn!( + "The video does not have a total_byte count, using an estimate of '{}'", + estimate + ); + estimate + } else { + total_bytes + } + }; + let percent: f64 = { + if total_bytes == 0 { + 100.0 + } else { + (downloaded_bytes as f64 / total_bytes as f64) * 100.0 + } + }; + + print!("\x1b[1F"); // Move one line up, to allow the `println` after it to print a newline + print!("\x1b[2K"); // Clear whole line. + print!("\x1b[1G"); // Move cursor to column 1. + + println!( + "'{}' [{}/{} at {}] -> [{}/{} {}]", + c!("34;1", get_title(true)), + c!("33;1", Duration::from(Some(elapsed))), + c!("33;1", Duration::from(Some(eta))), + c!("32;1", format_speed(speed)), + c!("31;1", format_bytes(downloaded_bytes)), + c!("31;1", format_bytes(total_bytes)), + c!("36;1", format!("{:.02}%", percent)) + ); + } + "finished" => { + println!("Finished downloading: '{}'", c!("34;1", get_title(false))) + } + "error" => { + panic!("Error whilst downloading: {}", get_title(true)) + } + other => panic!("{} is not a valid state!", other), + }; + + Ok(()) +} + +pub fn add_hooks<'a>(opts: Bound<'a, PyDict>, py: Python) -> PyResult> { + if let Some(hooks) = opts.get_item("progress_hooks")? { + let hooks = hooks.downcast::()?; + hooks.append(wrap_pyfunction_bound!(progress_hook, py)?)?; + + opts.set_item("progress_hooks", hooks)?; + } else { + // No hooks are set yet + let hooks_list = PyList::new_bound(py, &[wrap_pyfunction_bound!(progress_hook, py)?]); + + opts.set_item("progress_hooks", hooks_list)?; + } + + Ok(opts) +} + +/// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` +/// +/// Extract and return the information dictionary of the URL +/// +/// Arguments: +/// @param url URL to extract +/// +/// Keyword arguments: +/// @param download Whether to download videos +/// @param process Whether to resolve all unresolved references (URLs, playlist items). +/// Must be True for download to work +/// @param ie_key Use only the extractor with this key +/// +/// @param extra_info Dictionary containing the extra values to add to the info (For internal use only) +/// @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic') +pub async fn extract_info( + yt_dlp_opts: &Map, + url: &Url, + download: bool, + process: bool, +) -> PyResult { + Python::with_gil(|py| { + let opts = json_map_to_py_dict(yt_dlp_opts, py)?; + + let instance = get_yt_dlp(py, opts)?; + let args = (url.as_str(),); + + let kwargs = PyDict::new_bound(py); + kwargs.set_item("download", download)?; + kwargs.set_item("process", process)?; + + let result = instance.call_method("extract_info", args, Some(&kwargs))?; + + // Remove the ``, by setting it to null + if !process { + result.set_item("entries", ())?; + } + + let result_str = json_dumps(py, result)?; + + //let mut file = File::create("output.info.json").unwrap(); + //write!(file, "{}", result_str).unwrap(); + + Ok(serde_json::from_str(&result_str) + .expect("Python should be able to produce correct json")) + }) +} + +pub fn unsmuggle_url(smug_url: Url) -> PyResult { + Python::with_gil(|py| { + let utils = get_yt_dlp_utils(py)?; + let url = utils + .call_method1("unsmuggle_url", (smug_url.as_str(),))? + .downcast::()? + .get_item(0)?; + + let url: Url = url + .downcast::()? + .to_string() + .parse() + .expect("Python should be able to return a valid url"); + + Ok(url) + }) +} + +/// Download a given list of URLs. +/// Returns the paths they were downloaded to. +pub async fn download( + urls: &[Url], + download_options: &Map, +) -> PyResult> { + let mut out_paths = Vec::with_capacity(urls.len()); + + for url in urls { + info!("Started downloading url: '{}'", url); + let info_json = extract_info(download_options, url, true, true).await?; + + // Try to work around yt-dlp type weirdness + let result_string = if let Some(filename) = info_json.filename { + filename + } else { + (&info_json.requested_downloads.expect("This must exist")[0].filename).to_owned() + }; + + out_paths.push(result_string); + info!("Finished downloading url: '{}'", url); + } + + Ok(out_paths) +} + +fn json_map_to_py_dict<'a>( + map: &Map, + py: Python<'a>, +) -> PyResult> { + let json_string = serde_json::to_string(&map).expect("This must always work"); + + let python_dict = json_loads(py, json_string)?; + + Ok(python_dict) +} + +fn json_dumps(py: Python, input: Bound) -> PyResult { + // json.dumps(yt_dlp.sanitize_info(input)) + + let yt_dlp = get_yt_dlp(py, PyDict::new_bound(py))?; + let sanitized_result = yt_dlp.call_method1("sanitize_info", (input,))?; + + let json = PyModule::import_bound(py, "json")?; + let dumps = json.getattr("dumps")?; + + let output = dumps.call1((sanitized_result,))?; + + let output_str = output.extract::()?; + + Ok(output_str) +} + +fn json_loads_str(py: Python, input: T) -> PyResult> { + let string = serde_json::to_string(&input).expect("Correct json must be pased"); + + json_loads(py, string) +} + +fn json_loads(py: Python, input: String) -> PyResult> { + // json.loads(input) + + let json = PyModule::import_bound(py, "json")?; + let dumps = json.getattr("loads")?; + + let output = dumps.call1((input,))?; + + Ok(output + .downcast::() + .expect("This should always be a PyDict") + .clone()) +} + +fn get_yt_dlp_utils<'a>(py: Python<'a>) -> PyResult> { + let yt_dlp = PyModule::import_bound(py, "yt_dlp")?; + let utils = yt_dlp.getattr("utils")?; + + Ok(utils) +} +fn get_yt_dlp<'a>(py: Python<'a>, opts: Bound<'a, PyDict>) -> PyResult> { + // Unconditionally set a logger + let opts = add_logger_and_sig_handler(opts, py)?; + let opts = add_hooks(opts, py)?; + + let yt_dlp = PyModule::import_bound(py, "yt_dlp")?; + let youtube_dl = yt_dlp.call_method1("YoutubeDL", (opts,))?; + + Ok(youtube_dl) +} diff --git a/crates/yt_dlp/src/logging.rs b/crates/yt_dlp/src/logging.rs new file mode 100644 index 0000000..cca917c --- /dev/null +++ b/crates/yt_dlp/src/logging.rs @@ -0,0 +1,125 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +// This file is taken from: https://github.com/dylanbstorey/pyo3-pylogger/blob/d89e0d6820ebc4f067647e3b74af59dbc4941dd5/src/lib.rs +// It is licensed under the Apache 2.0 License, copyright up to 2024 by Dylan Storey +// It was modified by Benedikt Peetz 2024 + +use log::{logger, Level, MetadataBuilder, Record}; +use pyo3::{ + prelude::{PyAnyMethods, PyListMethods, PyModuleMethods}, + pyfunction, wrap_pyfunction, Bound, PyAny, PyResult, Python, +}; + +/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead. +#[pyfunction] +fn host_log<'a>(record: Bound<'a, PyAny>, rust_target: &str) -> PyResult<()> { + let level = record.getattr("levelno")?; + let message = record.getattr("getMessage")?.call0()?.to_string(); + let pathname = record.getattr("pathname")?.to_string(); + let lineno = record + .getattr("lineno")? + .to_string() + .parse::() + .expect("This should always be a u32"); + + let logger_name = record.getattr("name")?.to_string(); + + let full_target: Option = if logger_name.trim().is_empty() || logger_name == "root" { + None + } else { + // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar, + // and may not deal well with "." as a module separator: + let logger_name = logger_name.replace(".", "::"); + Some(format!("{rust_target}::{logger_name}")) + }; + + let target = full_target + .as_ref() + .map(|x| x.as_str()) + .unwrap_or(rust_target); + + // error + let error_metadata = if level.ge(40u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Error) + .build() + } else if level.ge(30u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Warn) + .build() + } else if level.ge(20u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Info) + .build() + } else if level.ge(10u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Debug) + .build() + } else { + MetadataBuilder::new() + .target(target) + .level(Level::Trace) + .build() + }; + + logger().log( + &Record::builder() + .metadata(error_metadata) + .args(format_args!("{}", &message)) + .line(Some(lineno)) + .file(None) + .module_path(Some(&pathname)) + .build(), + ); + + Ok(()) +} + +/// Registers the host_log function in rust as the event handler for Python's logging logger +/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages +/// arrive to the rust consumer. +pub fn setup_logging(py: Python, target: &str) -> PyResult<()> { + let logging = py.import_bound("logging")?; + + logging.setattr("host_log", wrap_pyfunction!(host_log, &logging)?)?; + + py.run_bound( + format!( + r#" +class HostHandler(Handler): + def __init__(self, level=0): + super().__init__(level=level) + + def emit(self, record): + host_log(record,"{}") + +oldBasicConfig = basicConfig +def basicConfig(*pargs, **kwargs): + if "handlers" not in kwargs: + kwargs["handlers"] = [HostHandler()] + return oldBasicConfig(*pargs, **kwargs) +"#, + target + ) + .as_str(), + Some(&logging.dict()), + None, + )?; + + let all = logging.index()?; + all.append("HostHandler")?; + + Ok(()) +} diff --git a/crates/yt_dlp/src/main.rs b/crates/yt_dlp/src/main.rs new file mode 100644 index 0000000..c40ddc3 --- /dev/null +++ b/crates/yt_dlp/src/main.rs @@ -0,0 +1,96 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +use std::{env::args, fs}; + +use yt_dlp::wrapper::info_json::InfoJson; + +#[cfg(test)] +mod test { + use url::Url; + use yt_dlp::wrapper::yt_dlp_options::{ExtractFlat, YtDlpOptions}; + + const YT_OPTS: YtDlpOptions = YtDlpOptions { + playliststart: 1, + playlistend: 10, + noplaylist: false, + extract_flat: ExtractFlat::InPlaylist, + }; + + #[test] + fn test_extract_info_video() { + let info = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://www.youtube.com/watch?v=dbjPnXaacAU").expect("Is valid."), + false, + false, + false, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", info); + } + + #[test] + fn test_extract_info_url() { + let err = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://google.com").expect("Is valid."), + false, + false, + false, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", err); + } + + #[test] + fn test_extract_info_playlist() { + let err = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://www.youtube.com/@TheGarriFrischer/videos").expect("Is valid."), + false, + false, + true, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", err); + } + #[test] + fn test_extract_info_playlist_full() { + let err = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://www.youtube.com/@NixOS-Foundation/videos").expect("Is valid."), + false, + false, + true, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", err); + } +} + +fn main() { + let input_file: &str = &args().take(2).collect::>()[1]; + + let input = fs::read_to_string(input_file).unwrap(); + + let output: InfoJson = + serde_json::from_str(&input).expect("Python should be able to produce correct json"); + + println!("{:#?}", output); +} diff --git a/crates/yt_dlp/src/wrapper/info_json.rs b/crates/yt_dlp/src/wrapper/info_json.rs new file mode 100644 index 0000000..9c0d464 --- /dev/null +++ b/crates/yt_dlp/src/wrapper/info_json.rs @@ -0,0 +1,550 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +use std::{collections::HashMap, path::PathBuf}; + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_json::Value; +use url::Url; + +use crate::json_loads_str; + +type Todo = String; + +// TODO: Change this to map `_type` to a structure of values, instead of the options <2024-05-27> +// And replace all the strings with better types (enums or urls) +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct InfoJson { + pub __last_playlist_index: Option, + pub __post_extractor: Option, + pub __x_forwarded_for_ip: Option, + pub _filename: Option, + pub _format_sort_fields: Option>, + pub _has_drm: Option, + pub _type: Option, + pub _version: Option, + pub abr: Option, + pub acodec: Option, + pub age_limit: Option, + pub aspect_ratio: Option, + pub asr: Option, + pub audio_channels: Option, + pub audio_ext: Option, + pub automatic_captions: Option>>, + pub availability: Option, + pub average_rating: Option, + pub categories: Option>, + pub channel: Option, + pub channel_follower_count: Option, + pub channel_id: Option, + pub channel_is_verified: Option, + pub channel_url: Option, + pub chapters: Option>, + pub comment_count: Option, + pub comments: Option>, + pub concurrent_view_count: Option, + pub description: Option, + pub display_id: Option, + pub downloader_options: Option, + pub duration: Option, + pub duration_string: Option, + pub dynamic_range: Option, + pub entries: Option>, + pub episode: Option, + pub episode_number: Option, + pub epoch: Option, + pub ext: Option, + pub extractor: Option, + pub extractor_key: Option, + pub filename: Option, + pub filesize: Option, + pub filesize_approx: Option, + pub format: Option, + pub format_id: Option, + pub format_index: Option, + pub format_note: Option, + pub formats: Option>, + pub fps: Option, + pub fulltitle: Option, + pub has_drm: Option, + pub heatmap: Option>, + pub height: Option, + pub http_headers: Option, + pub id: Option, + pub ie_key: Option, + pub is_live: Option, + pub language: Option, + pub language_preference: Option, + pub license: Option, + pub like_count: Option, + pub live_status: Option, + pub location: Option, + pub manifest_url: Option, + pub modified_date: Option, + pub n_entries: Option, + pub original_url: Option, + pub playable_in_embed: Option, + pub playlist: Option, + pub playlist_autonumber: Option, + pub playlist_channel: Option, + pub playlist_channel_id: Option, + pub playlist_count: Option, + pub playlist_id: Option, + pub playlist_index: Option, + pub playlist_title: Option, + pub playlist_uploader: Option, + pub playlist_uploader_id: Option, + pub preference: Option, + pub protocol: Option, + pub quality: Option, + pub release_date: Option, + pub release_timestamp: Option, + pub release_year: Option, + pub requested_downloads: Option>, + pub requested_entries: Option>, + pub requested_formats: Option>, + pub requested_subtitles: Option>, + pub resolution: Option, + pub season: Option, + pub season_number: Option, + pub series: Option, + pub source_preference: Option, + pub sponsorblock_chapters: Option>, + pub stretched_ratio: Option, + pub subtitles: Option>>, + pub tags: Option>, + pub tbr: Option, + pub thumbnail: Option, + pub thumbnails: Option>, + pub timestamp: Option, + pub title: Option, + pub upload_date: Option, + pub uploader: Option, + pub uploader_id: Option, + pub uploader_url: Option, + pub url: Option, + pub vbr: Option, + pub vcodec: Option, + pub video_ext: Option, + pub view_count: Option, + pub was_live: Option, + pub webpage_url: Option, + pub webpage_url_basename: Option, + pub webpage_url_domain: Option, + pub width: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct RequestedDownloads { + pub __files_to_merge: Option>, + pub __finaldir: PathBuf, + pub __infojson_filename: PathBuf, + pub __postprocessors: Vec, + pub __real_download: bool, + pub __write_download_archive: bool, + pub _filename: PathBuf, + pub _type: InfoType, + pub _version: Version, + pub abr: f64, + pub acodec: String, + pub aspect_ratio: f64, + pub asr: u32, + pub audio_channels: u32, + pub chapters: Option>, + pub duration: Option, + pub dynamic_range: String, + pub ext: String, + pub filename: PathBuf, + pub filepath: PathBuf, + pub filesize_approx: u64, + pub format: String, + pub format_id: String, + pub format_note: String, + pub fps: f64, + pub height: u32, + pub infojson_filename: PathBuf, + pub language: Option, + pub protocol: String, + pub requested_formats: Vec, + pub resolution: String, + pub tbr: f64, + pub vbr: f64, + pub vcodec: String, + pub width: u32, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub struct Subtitle { + pub ext: SubtitleExt, + pub filepath: PathBuf, + pub name: String, + pub url: Url, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SubtitleExt { + #[serde(alias = "vtt")] + Vtt, + + #[serde(alias = "json")] + Json, + #[serde(alias = "json3")] + Json3, + + #[serde(alias = "ttml")] + Ttml, + + #[serde(alias = "srv1")] + Srv1, + #[serde(alias = "srv2")] + Srv2, + #[serde(alias = "srv3")] + Srv3, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Caption { + pub ext: SubtitleExt, + pub name: Option, + pub protocol: Option, + pub url: String, + pub filepath: Option, + pub video_id: Option, + pub manifest_url: Option, + pub filesize: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Chapter { + pub end_time: f64, + pub start_time: f64, + pub title: String, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct SponsorblockChapter { + /// This is an utterly useless field, and should thus be ignored + pub _categories: Option>>, + + pub categories: Option>, + pub category: Option, + pub category_names: Option>, + pub end_time: f64, + pub name: Option, + pub r#type: Option, + pub start_time: f64, + pub title: String, +} + +pub fn get_none<'de, D, T>(_: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + Ok(None) +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterType { + #[serde(alias = "skip")] + Skip, + + #[serde(alias = "chapter")] + Chapter, + + #[serde(alias = "poi")] + Poi, +} +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterCategory { + #[serde(alias = "filler")] + Filler, + + #[serde(alias = "interaction")] + Interaction, + + #[serde(alias = "poi_highlight")] + PoiHighlight, + + #[serde(alias = "preview")] + Preview, + + #[serde(alias = "sponsor")] + Sponsor, + + #[serde(alias = "selfpromo")] + SelfPromo, + + #[serde(alias = "chapter")] + Chapter, + + #[serde(alias = "intro")] + Intro, + + #[serde(alias = "outro")] + Outro, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct HeatMapEntry { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum Extractor { + #[serde(alias = "generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + #[serde(alias = "SVTPlay")] + SVTPlay, + + #[serde(alias = "youtube")] + YouTube, + #[serde(alias = "youtube:tab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum ExtractorKey { + #[serde(alias = "Generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + #[serde(alias = "SVTPlay")] + SVTPlay, + + #[serde(alias = "Youtube")] + YouTube, + #[serde(alias = "YoutubeTab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum InfoType { + #[serde(alias = "playlist")] + Playlist, + + #[serde(alias = "url")] + Url, + + #[serde(alias = "video")] + Video, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Version { + pub current_git_head: Option, + pub release_git_head: String, + pub repository: String, + pub version: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub enum Parent { + Root, + Id(String), +} + +impl Parent { + pub fn id(&self) -> Option<&str> { + if let Self::Id(id) = self { + Some(id) + } else { + None + } + } +} + +impl From for Parent { + fn from(value: String) -> Self { + if value == "root" { + Self::Root + } else { + Self::Id(value) + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub struct Id { + pub id: String, +} +impl From for Id { + fn from(value: String) -> Self { + Self { + // Take the last element if the string is split with dots, otherwise take the full id + id: value.split('.').last().unwrap_or(&value).to_owned(), + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Comment { + pub id: Id, + pub text: String, + #[serde(default = "zero")] + pub like_count: u32, + pub is_pinned: bool, + pub author_id: String, + #[serde(default = "unknown")] + pub author: String, + pub author_is_verified: bool, + pub author_thumbnail: Url, + pub parent: Parent, + #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")] + pub edited: bool, + // Can't also be deserialized, as it's already used in 'edited' + // _time_text: String, + pub timestamp: i64, + pub author_url: Url, + pub author_is_uploader: bool, + pub is_favorited: bool, +} +fn unknown() -> String { + "".to_string() +} +fn zero() -> u32 { + 0 +} +fn edited_from_time_text<'de, D>(d: D) -> Result +where + D: Deserializer<'de>, +{ + let s = String::deserialize(d)?; + if s.contains(" (edited)") { + Ok(true) + } else { + Ok(false) + } +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct ThumbNail { + pub id: Option, + pub preference: Option, + /// in the form of "[`height`]x[`width`]" + pub resolution: Option, + pub url: Url, + pub width: Option, + pub height: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Format { + pub __needs_testing: Option, + pub __working: Option, + pub abr: Option, + pub acodec: Option, + pub aspect_ratio: Option, + pub asr: Option, + pub audio_channels: Option, + pub audio_ext: Option, + pub columns: Option, + pub container: Option, + pub downloader_options: Option, + pub dynamic_range: Option, + pub ext: String, + pub filepath: Option, + pub filesize: Option, + pub filesize_approx: Option, + pub format: Option, + pub format_id: String, + pub format_index: Option, + pub format_note: Option, + pub fps: Option, + pub fragment_base_url: Option, + pub fragments: Option>, + pub has_drm: Option, + pub height: Option, + pub http_headers: Option, + pub is_dash_periods: Option, + pub language: Option, + pub language_preference: Option, + pub manifest_stream_number: Option, + pub manifest_url: Option, + pub preference: Option, + pub protocol: Option, + pub quality: Option, + pub resolution: Option, + pub rows: Option, + pub source_preference: Option, + pub tbr: Option, + pub url: Url, + pub vbr: Option, + pub vcodec: String, + pub video_ext: Option, + pub width: Option, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct DownloaderOptions { + http_chunk_size: u64, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct HttpHeader { + #[serde(alias = "User-Agent")] + pub user_agent: Option, + + #[serde(alias = "Accept")] + pub accept: Option, + + #[serde(alias = "X-Forwarded-For")] + pub x_forwarded_for: Option, + + #[serde(alias = "Accept-Language")] + pub accept_language: Option, + + #[serde(alias = "Sec-Fetch-Mode")] + pub sec_fetch_mode: Option, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Fragment { + pub url: Option, + pub duration: Option, + pub path: Option, +} + +impl InfoJson { + pub fn to_py_dict(self, py: Python) -> PyResult> { + let output: Bound = json_loads_str(py, self)?; + Ok(output) + } +} diff --git a/crates/yt_dlp/src/wrapper/mod.rs b/crates/yt_dlp/src/wrapper/mod.rs new file mode 100644 index 0000000..3fe3247 --- /dev/null +++ b/crates/yt_dlp/src/wrapper/mod.rs @@ -0,0 +1,12 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +pub mod info_json; +// pub mod yt_dlp_options; diff --git a/crates/yt_dlp/src/wrapper/yt_dlp_options.rs b/crates/yt_dlp/src/wrapper/yt_dlp_options.rs new file mode 100644 index 0000000..c2a86df --- /dev/null +++ b/crates/yt_dlp/src/wrapper/yt_dlp_options.rs @@ -0,0 +1,62 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::Serialize; + +use crate::json_loads; + +#[derive(Serialize, Clone)] +pub struct YtDlpOptions { + pub playliststart: u32, + pub playlistend: u32, + pub noplaylist: bool, + pub extract_flat: ExtractFlat, + // pub extractor_args: ExtractorArgs, + // pub format: String, + // pub fragment_retries: u32, + // #[serde(rename(serialize = "getcomments"))] + // pub get_comments: bool, + // #[serde(rename(serialize = "ignoreerrors"))] + // pub ignore_errors: bool, + // pub retries: u32, + // #[serde(rename(serialize = "writeinfojson"))] + // pub write_info_json: bool, + // pub postprocessors: Vec>, +} + +#[derive(Serialize, Copy, Clone)] +pub enum ExtractFlat { + #[serde(rename(serialize = "in_playlist"))] + InPlaylist, + + #[serde(rename(serialize = "discard_in_playlist"))] + DiscardInPlaylist, +} + +#[derive(Serialize, Clone)] +pub struct ExtractorArgs { + pub youtube: YoutubeExtractorArgs, +} + +#[derive(Serialize, Clone)] +pub struct YoutubeExtractorArgs { + comment_sort: Vec, + max_comments: Vec, +} + +impl YtDlpOptions { + pub fn to_py_dict(self, py: Python) -> PyResult> { + let string = serde_json::to_string(&self).expect("This should always work"); + + let output: Bound = json_loads(py, string)?; + Ok(output) + } +} -- cgit 1.4.1