about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-08-22 14:22:13 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-08-22 14:22:13 +0200
commit6bfc7ee06dc1a598014dd5bec659b14a3aa87bbd (patch)
treef12b4892214fd9cd0fbbd206abd6929179f75d2b
parenttest(benches/update): Init (diff)
downloadyt-6bfc7ee06dc1a598014dd5bec659b14a3aa87bbd.tar.gz
yt-6bfc7ee06dc1a598014dd5bec659b14a3aa87bbd.zip
feat(download): Support limiting the downloader by maximal cache size
-rw-r--r--src/cli.rs47
-rw-r--r--src/constants.rs14
-rw-r--r--src/download/download_options.rs2
-rw-r--r--src/download/mod.rs96
-rw-r--r--src/main.rs10
-rw-r--r--src/storage/video_database/extractor_hash.rs2
6 files changed, 153 insertions, 18 deletions
diff --git a/src/cli.rs b/src/cli.rs
index f3f4b7e..a61a57e 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -10,6 +10,7 @@
 
 use std::path::PathBuf;
 
+use anyhow::{bail, Error};
 use chrono::NaiveDate;
 use clap::{ArgAction, Args, Parser, Subcommand};
 use url::Url;
@@ -47,6 +48,11 @@ pub enum Command {
         /// Forcefully re-download all cached videos (i.e. delete the cache path, then download).
         #[arg(short, long)]
         force: bool,
+
+        /// The maximum size the download dir should have. Beware that the value must be given in
+        /// bytes.
+        #[arg(short, long, default_value = "3 GiB", value_parser = byte_parser)]
+        max_cache_size: u64,
     },
 
     /// Watch the already cached (and selected) videos
@@ -102,6 +108,47 @@ pub enum Command {
     },
 }
 
+fn byte_parser(s: &str) -> Result<u64, Error> {
+    const B: u64 = 1;
+
+    const KIB: u64 = 1024 * B;
+    const MIB: u64 = 1024 * KIB;
+    const GIB: u64 = 1024 * MIB;
+
+    const KB: u64 = 1000 * B;
+    const MB: u64 = 1000 * KB;
+    const GB: u64 = 1000 * MB;
+
+    let s = s
+        .chars()
+        .filter(|elem| !elem.is_whitespace())
+        .collect::<String>();
+
+    let number: u64 = s
+        .chars()
+        .take_while(|x| x.is_numeric())
+        .collect::<String>()
+        .parse()?;
+    let extension = s.chars().skip_while(|x| x.is_numeric()).collect::<String>();
+
+    let output = match extension.to_lowercase().as_str() {
+        "" => number,
+        "b" => number * B,
+        "kib" => number * KIB,
+        "mib" => number * MIB,
+        "gib" => number * GIB,
+        "kb" => number * KB,
+        "mb" => number * MB,
+        "gb" => number * GB,
+        other => bail!(
+            "Your extension '{}' is not yet supported. Only KB,MB,GB or KiB,MiB,GiB are supported",
+            other
+        ),
+    };
+
+    Ok(output)
+}
+
 impl Default for Command {
     fn default() -> Self {
         Self::Select {
diff --git a/src/constants.rs b/src/constants.rs
index f4eef3d..00919ce 100644
--- a/src/constants.rs
+++ b/src/constants.rs
@@ -8,9 +8,9 @@
 // You should have received a copy of the License along with this program.
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
-use std::{env::temp_dir, path::PathBuf};
+use std::{env::temp_dir, fs, path::PathBuf};
 
-use anyhow::Context;
+use anyhow::{Context, Result};
 
 pub const HELP_STR: &str = include_str!("./select/selection_file/help.str");
 pub const LOCAL_COMMENTS_LENGTH: usize = 1000;
@@ -21,9 +21,15 @@ pub const DEFAULT_SUBTITLE_LANGS: &str = "en";
 
 pub const CONCURRENT_DOWNLOADS: u32 = 5;
 // We download to the temp dir to avoid taxing the disk
-pub fn download_dir() -> PathBuf {
+pub fn download_dir(create: bool) -> Result<PathBuf> {
     const DOWNLOAD_DIR: &str = "/tmp/yt";
-    PathBuf::from(DOWNLOAD_DIR)
+    let dir = PathBuf::from(DOWNLOAD_DIR);
+
+    if !dir.exists() && create {
+        fs::create_dir_all(&dir).context("Failed to create the download directory")?
+    }
+
+    Ok(dir)
 }
 
 const PREFIX: &str = "yt";
diff --git a/src/download/download_options.rs b/src/download/download_options.rs
index 17cf66c..04c1600 100644
--- a/src/download/download_options.rs
+++ b/src/download/download_options.rs
@@ -50,7 +50,7 @@ pub fn download_opts(additional_opts: YtDlpOptions) -> serde_json::Map<String, s
       "writeautomaticsub": true,
 
       "outtmpl": {
-        "default": constants::download_dir().join("%(channel)s/%(title)s.%(ext)s"),
+        "default": constants::download_dir(false).expect("We're not creating this dir, thus this function can't error").join("%(channel)s/%(title)s.%(ext)s"),
         "chapter": "%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s"
       },
       "compat_opts": {},
diff --git a/src/download/mod.rs b/src/download/mod.rs
index 3785876..c3d79b7 100644
--- a/src/download/mod.rs
+++ b/src/download/mod.rs
@@ -8,22 +8,24 @@
 // You should have received a copy of the License along with this program.
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
-use std::{sync::Arc, time::Duration};
+use std::{collections::HashMap, sync::Arc, time::Duration};
 
 use crate::{
     app::App,
+    constants::download_dir,
     download::download_options::download_opts,
     storage::video_database::{
         downloader::{get_next_uncached_video, set_video_cache_path},
         extractor_hash::ExtractorHash,
         getters::get_video_yt_dlp_opts,
-        Video,
+        Video, YtDlpOptions,
     },
 };
 
-use anyhow::{Context, Result};
-use log::{debug, info};
-use tokio::{task::JoinHandle, time};
+use anyhow::{bail, Context, Result};
+use futures::{future::BoxFuture, FutureExt};
+use log::{debug, info, warn};
+use tokio::{fs, task::JoinHandle, time};
 
 mod download_options;
 
@@ -53,12 +55,14 @@ impl CurrentDownload {
 
 pub struct Downloader {
     current_download: Option<CurrentDownload>,
+    video_size_cache: HashMap<ExtractorHash, u64>,
 }
 
 impl Downloader {
     pub fn new() -> Self {
         Self {
             current_download: None,
+            video_size_cache: HashMap::new(),
         }
     }
 
@@ -67,7 +71,20 @@ impl Downloader {
     /// change which videos it downloads.
     /// This will run, until the database doesn't contain any watchable videos
     pub async fn consume(&mut self, app: Arc<App>, max_cache_size: u64) -> Result<()> {
-        while let Some(next_video) = get_next_uncached_video(app).await? {
+        while let Some(next_video) = get_next_uncached_video(&app).await? {
+            if Self::get_current_cache_allocation().await?
+                + self.get_approx_video_size(&next_video).await?
+                >= max_cache_size
+            {
+                warn!(
+                    "Can't download video: '{}' as it's too large for the cache.",
+                    next_video.title
+                );
+                // Wait and hope, that a large video is deleted from the cache.
+                time::sleep(Duration::from_secs(10)).await;
+                continue;
+            }
+
             if let Some(_) = &self.current_download {
                 let current_download = self.current_download.take().expect("Is Some");
 
@@ -99,7 +116,6 @@ impl Downloader {
                     );
                     // Reset the taken value
                     self.current_download = Some(current_download);
-                    time::sleep(Duration::new(1, 0)).await;
                 }
             } else {
                 info!(
@@ -111,15 +127,75 @@ impl Downloader {
                 self.current_download = Some(new_current_download);
             }
 
-            // if get_allocated_cache().await? < CONCURRENT {
-            //     .cache_video(next_video).await?;
-            // }
+            time::sleep(Duration::new(1, 0)).await;
         }
 
         info!("Finished downloading!");
         Ok(())
     }
 
+    async fn get_current_cache_allocation() -> Result<u64> {
+        fn dir_size(mut dir: fs::ReadDir) -> BoxFuture<'static, Result<u64>> {
+            async move {
+                let mut acc = 0;
+                while let Some(entry) = dir.next_entry().await? {
+                    let size = match entry.metadata().await? {
+                        data if data.is_dir() => {
+                            let path = entry.path();
+                            let read_dir = fs::read_dir(path).await?;
+
+                            dir_size(read_dir).await?
+                        }
+                        data => data.len(),
+                    };
+                    acc += size;
+                }
+                Ok(acc)
+            }
+            .boxed()
+        }
+
+        let val = dir_size(fs::read_dir(download_dir(true)?).await?).await;
+        if let Ok(val) = val.as_ref() {
+            info!("Cache dir has a size of '{}'", val);
+        }
+        val
+    }
+
+    async fn get_approx_video_size(&mut self, video: &Video) -> Result<u64> {
+        if let Some(value) = self.video_size_cache.get(&video.extractor_hash) {
+            Ok(*value)
+        } else {
+            // the subtitle file size should be negligible
+            let add_opts = YtDlpOptions {
+                subtitle_langs: "".to_owned(),
+            };
+            let opts = &download_opts(add_opts);
+
+            let result = yt_dlp::extract_info(&opts, &video.url, false, true)
+                .await
+                .with_context(|| {
+                    format!("Failed to extract video information: '{}'", video.title)
+                })?;
+
+            let size = if let Some(val) = result.filesize {
+                val
+            } else if let Some(val) = result.filesize_approx {
+                val
+            } else {
+                bail!("Failed to find a filesize for video: '{}'", video.title);
+            };
+
+            assert_eq!(
+                self.video_size_cache
+                    .insert(video.extractor_hash.clone(), size),
+                None
+            );
+
+            Ok(size)
+        }
+    }
+
     async fn actually_cache_video(app: &App, video: &Video) -> Result<()> {
         debug!("Download started: {}", &video.title);
 
diff --git a/src/main.rs b/src/main.rs
index c223140..ebbb45f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,13 +8,14 @@
 // You should have received a copy of the License along with this program.
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
-use std::{collections::HashMap, fs};
+use std::{collections::HashMap, fs, sync::Arc};
 
 use anyhow::{bail, Context, Result};
 use app::App;
 use cache::invalidate;
 use clap::Parser;
 use cli::{CacheCommand, CheckCommand, SelectCommand, SubscriptionCommand};
+use log::info;
 use select::cmds::handle_select_cmd;
 use tokio::{
     fs::File,
@@ -56,7 +57,12 @@ async fn main() -> Result<()> {
     let app = App::new(args.db_path.unwrap_or(constants::database()?)).await?;
 
     match args.command.unwrap_or(Command::default()) {
-        Command::Download { force } => {
+        Command::Download {
+            force,
+            max_cache_size,
+        } => {
+            info!("max cache size: '{}'", max_cache_size);
+
             if force {
                 invalidate(&app, true).await?;
             }
diff --git a/src/storage/video_database/extractor_hash.rs b/src/storage/video_database/extractor_hash.rs
index 3af4f60..593b5c4 100644
--- a/src/storage/video_database/extractor_hash.rs
+++ b/src/storage/video_database/extractor_hash.rs
@@ -19,7 +19,7 @@ use crate::{app::App, storage::video_database::getters::get_all_hashes};
 
 static EXTRACTOR_HASH_LENGTH: OnceCell<usize> = OnceCell::const_new();
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct ExtractorHash {
     hash: Hash,
 }