about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/lib.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-08-23 13:06:00 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-08-23 13:06:00 +0200
commit52e99b38eae6b4f3cb991342ff9ba9abbab9e42c (patch)
tree9fa6f0582dfb8b6dc7b49bbd6206ab4b533ff900 /crates/yt_dlp/src/lib.rs
parentrefactor(cli): Replace the byte parser with the one from the `bytes` crate (diff)
downloadyt-52e99b38eae6b4f3cb991342ff9ba9abbab9e42c.tar.gz
yt-52e99b38eae6b4f3cb991342ff9ba9abbab9e42c.zip
refactor(yt_dlp): Also move the `crates` subdirectory
Diffstat (limited to 'crates/yt_dlp/src/lib.rs')
-rw-r--r--crates/yt_dlp/src/lib.rs412
1 files changed, 412 insertions, 0 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
new file mode 100644
index 0000000..37d0945
--- /dev/null
+++ b/crates/yt_dlp/src/lib.rs
@@ -0,0 +1,412 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+// use std::{fs::File, io::Write};
+
+use std::{path::PathBuf, sync::Once};
+
+use crate::{duration::Duration, logging::setup_logging, wrapper::info_json::InfoJson};
+
+use bytes::Bytes;
+use log::{info, warn};
+use pyo3::types::{PyString, PyTuple, PyTupleMethods};
+use pyo3::{
+    pyfunction,
+    types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyListMethods, PyModule},
+    wrap_pyfunction_bound, Bound, PyAny, PyResult, Python,
+};
+use serde::Serialize;
+use serde_json::{Map, Value};
+use url::Url;
+
+pub mod duration;
+pub mod logging;
+pub mod wrapper;
+
+/// Synchronisation helper, to ensure that we don't setup the logger multiple times
+static SYNC_OBJ: Once = Once::new();
+
+/// Add a logger to the yt-dlp options.
+/// If you have an logger set (i.e. for rust), than this will log to rust
+pub fn add_logger_and_sig_handler<'a>(
+    opts: Bound<'a, PyDict>,
+    py: Python,
+) -> PyResult<Bound<'a, PyDict>> {
+    setup_logging(py, "yt_dlp")?;
+
+    let logging = PyModule::import_bound(py, "logging")?;
+    let ytdl_logger = logging.call_method1("getLogger", ("yt_dlp",))?;
+
+    // Ensure that all events are logged by setting the log level to NOTSET (we filter on rust's side)
+    // Also use this static, to ensure that we don't configure the logger every time
+    SYNC_OBJ.call_once(|| {
+        // Disable the SIGINT (Ctrl+C) handler, python installs.
+        // This allows the user to actually stop the application with Ctrl+C.
+        // This is here because it can only be run in the main thread and this was here already.
+        py.run_bound(
+            r#"
+import signal
+signal.signal(signal.SIGINT, signal.SIG_DFL)
+        "#,
+            None,
+            None,
+        )
+        .expect("This code should always work");
+
+        let config_opts = PyDict::new_bound(py);
+        config_opts
+            .set_item("level", 0)
+            .expect("Setting this item should always work");
+
+        logging
+            .call_method("basicConfig", (), Some(&config_opts))
+            .expect("This method exists");
+    });
+
+    // This was taken from `ytcc`, I don't think it is still applicable
+    // ytdl_logger.setattr("propagate", false)?;
+    // let logging_null_handler = logging.call_method0("NullHandler")?;
+    // ytdl_logger.setattr("addHandler", logging_null_handler)?;
+
+    opts.set_item("logger", ytdl_logger).expect("Should work");
+
+    Ok(opts)
+}
+
+#[pyfunction]
+pub fn progress_hook<'a>(py: Python, input: Bound<'_, PyDict>) -> PyResult<()> {
+    let input: serde_json::Map<String, Value> = serde_json::from_str(&json_dumps(
+        py,
+        input
+            .downcast::<PyAny>()
+            .expect("Will always work")
+            .to_owned(),
+    )?)
+    .expect("Python should always produce valid json");
+
+    macro_rules! get {
+        (@interrogate $item:ident, $type_fun:ident, $get_fun:ident, $name:expr) => {{
+            let a = $item.get($name).expect(concat!(
+                "The field '",
+                stringify!($name),
+                "' should exist."
+            ));
+
+            if a.$type_fun() {
+                a.$get_fun().expect(
+                    "The should have been checked in the if guard, so unpacking here is fine",
+                )
+            } else {
+                panic!(
+                    "Value {} => \n{}\n is not of type: {}",
+                    $name,
+                    a,
+                    stringify!($type_fun)
+                );
+            }
+        }};
+
+        ($type_fun:ident, $get_fun:ident, $name1:expr, $name2:expr) => {{
+            let a = get! {@interrogate input, is_object, as_object, $name1};
+            let b = get! {@interrogate a, $type_fun, $get_fun, $name2};
+            b
+        }};
+
+        ($type_fun:ident, $get_fun:ident, $name:expr) => {{
+            get! {@interrogate input, $type_fun, $get_fun, $name}
+        }};
+    }
+
+    macro_rules! default_get {
+        (@interrogate $item:ident, $default:expr, $get_fun:ident, $name:expr) => {{
+            let a = if let Some(field) = $item.get($name) {
+                field.$get_fun().unwrap_or($default)
+            } else {
+                $default
+            };
+            a
+        }};
+
+        ($get_fun:ident, $default:expr, $name1:expr, $name2:expr) => {{
+            let a = get! {@interrogate input, is_object, as_object, $name1};
+            let b = default_get! {@interrogate a, $default, $get_fun, $name2};
+            b
+        }};
+
+        ($get_fun:ident, $default:expr, $name:expr) => {{
+            default_get! {@interrogate input, $default, $get_fun, $name}
+        }};
+    }
+
+    macro_rules! c {
+        ($color:expr, $format:expr) => {
+            format!("\x1b[{}m{}\x1b[0m", $color, $format)
+        };
+    }
+
+    fn format_bytes(bytes: u64) -> String {
+        let bytes = Bytes::new(bytes);
+        bytes.to_string()
+    }
+
+    fn format_speed(speed: f64) -> String {
+        let bytes = Bytes::new(speed.floor() as u64);
+        format!("{}/s", bytes)
+    }
+
+    let get_title = |add_extension: bool| -> String {
+        match get! {is_string, as_str, "info_dict", "ext"} {
+            "vtt" => {
+                format!(
+                    "Subtitles ({})",
+                    get! {is_string, as_str, "info_dict", "name"}
+                )
+            }
+            title_extension @ ("webm" | "mp4" | "m4a") => {
+                if add_extension {
+                    format!(
+                        "{} ({})",
+                        default_get! { as_str, "<No title>", "info_dict", "title"},
+                        title_extension
+                    )
+                } else {
+                    default_get! { as_str, "<No title>", "info_dict", "title"}.to_owned()
+                }
+            }
+            other => panic!("The extension '{}' is not yet implemented", other),
+        }
+    };
+
+    match get! {is_string, as_str, "status"} {
+        "downloading" => {
+            let elapsed = default_get! {as_f64, 0.0f64, "elapsed"};
+            let eta = default_get! {as_f64, 0.0, "eta"};
+            let speed = default_get! {as_f64, 0.0, "speed"};
+
+            let downloaded_bytes = get! {is_u64, as_u64, "downloaded_bytes"};
+            let total_bytes = {
+                let total_bytes = default_get!(as_u64, 0, "total_bytes");
+                if total_bytes == 0 {
+                    let estimate = default_get!(as_u64, 0, "total_bytes_estimate");
+                    warn!(
+                        "The video does not have a total_byte count, using an estimate of '{}'",
+                        estimate
+                    );
+                    estimate
+                } else {
+                    total_bytes
+                }
+            };
+            let percent: f64 = {
+                if total_bytes == 0 {
+                    100.0
+                } else {
+                    (downloaded_bytes as f64 / total_bytes as f64) * 100.0
+                }
+            };
+
+            print!("\x1b[1F"); // Move one line up, to allow the `println` after it to print a newline
+            print!("\x1b[2K"); // Clear whole line.
+            print!("\x1b[1G"); // Move cursor to column 1.
+
+            println!(
+                "'{}' [{}/{} at {}] -> [{}/{} {}]",
+                c!("34;1", get_title(true)),
+                c!("33;1", Duration::from(Some(elapsed))),
+                c!("33;1", Duration::from(Some(eta))),
+                c!("32;1", format_speed(speed)),
+                c!("31;1", format_bytes(downloaded_bytes)),
+                c!("31;1", format_bytes(total_bytes)),
+                c!("36;1", format!("{:.02}%", percent))
+            );
+        }
+        "finished" => {
+            println!("Finished downloading: '{}'", c!("34;1", get_title(false)))
+        }
+        "error" => {
+            panic!("Error whilst downloading: {}", get_title(true))
+        }
+        other => panic!("{} is not a valid state!", other),
+    };
+
+    Ok(())
+}
+
+pub fn add_hooks<'a>(opts: Bound<'a, PyDict>, py: Python) -> PyResult<Bound<'a, PyDict>> {
+    if let Some(hooks) = opts.get_item("progress_hooks")? {
+        let hooks = hooks.downcast::<PyList>()?;
+        hooks.append(wrap_pyfunction_bound!(progress_hook, py)?)?;
+
+        opts.set_item("progress_hooks", hooks)?;
+    } else {
+        // No hooks are set yet
+        let hooks_list = PyList::new_bound(py, &[wrap_pyfunction_bound!(progress_hook, py)?]);
+
+        opts.set_item("progress_hooks", hooks_list)?;
+    }
+
+    Ok(opts)
+}
+
+/// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)`
+///
+/// Extract and return the information dictionary of the URL
+///
+/// Arguments:
+/// @param url          URL to extract
+///
+/// Keyword arguments:
+/// @param download     Whether to download videos
+/// @param process      Whether to resolve all unresolved references (URLs, playlist items).
+///                     Must be True for download to work
+/// @param ie_key       Use only the extractor with this key
+///
+/// @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
+/// @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
+pub async fn extract_info(
+    yt_dlp_opts: &Map<String, Value>,
+    url: &Url,
+    download: bool,
+    process: bool,
+) -> PyResult<InfoJson> {
+    Python::with_gil(|py| {
+        let opts = json_map_to_py_dict(yt_dlp_opts, py)?;
+
+        let instance = get_yt_dlp(py, opts)?;
+        let args = (url.as_str(),);
+
+        let kwargs = PyDict::new_bound(py);
+        kwargs.set_item("download", download)?;
+        kwargs.set_item("process", process)?;
+
+        let result = instance.call_method("extract_info", args, Some(&kwargs))?;
+
+        // Remove the `<generator at 0xsome_hex>`, by setting it to null
+        if !process {
+            result.set_item("entries", ())?;
+        }
+
+        let result_str = json_dumps(py, result)?;
+
+        //let mut file = File::create("output.info.json").unwrap();
+        //write!(file, "{}", result_str).unwrap();
+
+        Ok(serde_json::from_str(&result_str)
+            .expect("Python should be able to produce correct json"))
+    })
+}
+
+pub fn unsmuggle_url(smug_url: Url) -> PyResult<Url> {
+    Python::with_gil(|py| {
+        let utils = get_yt_dlp_utils(py)?;
+        let url = utils
+            .call_method1("unsmuggle_url", (smug_url.as_str(),))?
+            .downcast::<PyTuple>()?
+            .get_item(0)?;
+
+        let url: Url = url
+            .downcast::<PyString>()?
+            .to_string()
+            .parse()
+            .expect("Python should be able to return a valid url");
+
+        Ok(url)
+    })
+}
+
+/// Download a given list of URLs.
+/// Returns the paths they were downloaded to.
+pub async fn download(
+    urls: &[Url],
+    download_options: &Map<String, Value>,
+) -> PyResult<Vec<PathBuf>> {
+    let mut out_paths = Vec::with_capacity(urls.len());
+
+    for url in urls {
+        info!("Started downloading url: '{}'", url);
+        let info_json = extract_info(download_options, url, true, true).await?;
+
+        // Try to work around yt-dlp type weirdness
+        let result_string = if let Some(filename) = info_json.filename {
+            filename
+        } else {
+            (&info_json.requested_downloads.expect("This must exist")[0].filename).to_owned()
+        };
+
+        out_paths.push(result_string);
+        info!("Finished downloading url: '{}'", url);
+    }
+
+    Ok(out_paths)
+}
+
+fn json_map_to_py_dict<'a>(
+    map: &Map<String, Value>,
+    py: Python<'a>,
+) -> PyResult<Bound<'a, PyDict>> {
+    let json_string = serde_json::to_string(&map).expect("This must always work");
+
+    let python_dict = json_loads(py, json_string)?;
+
+    Ok(python_dict)
+}
+
+fn json_dumps(py: Python, input: Bound<PyAny>) -> PyResult<String> {
+    //     json.dumps(yt_dlp.sanitize_info(input))
+
+    let yt_dlp = get_yt_dlp(py, PyDict::new_bound(py))?;
+    let sanitized_result = yt_dlp.call_method1("sanitize_info", (input,))?;
+
+    let json = PyModule::import_bound(py, "json")?;
+    let dumps = json.getattr("dumps")?;
+
+    let output = dumps.call1((sanitized_result,))?;
+
+    let output_str = output.extract::<String>()?;
+
+    Ok(output_str)
+}
+
+fn json_loads_str<T: Serialize>(py: Python, input: T) -> PyResult<Bound<PyDict>> {
+    let string = serde_json::to_string(&input).expect("Correct json must be pased");
+
+    json_loads(py, string)
+}
+
+fn json_loads(py: Python, input: String) -> PyResult<Bound<PyDict>> {
+    //     json.loads(input)
+
+    let json = PyModule::import_bound(py, "json")?;
+    let dumps = json.getattr("loads")?;
+
+    let output = dumps.call1((input,))?;
+
+    Ok(output
+        .downcast::<PyDict>()
+        .expect("This should always be a PyDict")
+        .clone())
+}
+
+fn get_yt_dlp_utils<'a>(py: Python<'a>) -> PyResult<Bound<'a, PyAny>> {
+    let yt_dlp = PyModule::import_bound(py, "yt_dlp")?;
+    let utils = yt_dlp.getattr("utils")?;
+
+    Ok(utils)
+}
+fn get_yt_dlp<'a>(py: Python<'a>, opts: Bound<'a, PyDict>) -> PyResult<Bound<'a, PyAny>> {
+    // Unconditionally set a logger
+    let opts = add_logger_and_sig_handler(opts, py)?;
+    let opts = add_hooks(opts, py)?;
+
+    let yt_dlp = PyModule::import_bound(py, "yt_dlp")?;
+    let youtube_dl = yt_dlp.call_method1("YoutubeDL", (opts,))?;
+
+    Ok(youtube_dl)
+}