1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
// yt - A fully featured command line YouTube client
//
// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
// SPDX-License-Identifier: GPL-3.0-or-later
//
// This file is part of Yt.
//
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
use std::{collections::HashMap, process::Stdio, str::FromStr};
use anyhow::{Context, Ok, Result};
use chrono::{DateTime, Utc};
use log::{error, info, warn};
use tokio::{
io::{AsyncBufReadExt, BufReader},
process::Command,
};
use url::Url;
use yt_dlp::{unsmuggle_url, wrapper::info_json::InfoJson};
use crate::{
app::App,
storage::{
subscriptions::{get_subscriptions, Subscription},
video_database::{
extractor_hash::ExtractorHash, getters::get_all_hashes, setters::add_video, Video,
VideoStatus,
},
},
};
pub async fn update(
app: &App,
max_backlog: u32,
subs_to_update: Vec<String>,
_concurrent_processes: usize,
) -> Result<()> {
let subscriptions = get_subscriptions(&app).await?;
let mut back_subs: HashMap<Url, Subscription> = HashMap::new();
let mut urls: Vec<String> = vec![];
for (name, sub) in subscriptions.0 {
if subs_to_update.contains(&name) || subs_to_update.is_empty() {
urls.push(sub.url.to_string());
back_subs.insert(sub.url.clone(), sub);
} else {
info!(
"Not updating subscription '{}' as it was not specified",
name
);
}
}
// We can get away with not having to re-fetch the hashes every time, as the returned video
// should not contain duplicates.
let hashes = get_all_hashes(app).await?;
let mut child = Command::new("raw_update.py")
.arg(max_backlog.to_string())
.arg(urls.len().to_string())
.args(&urls)
.args(&hashes.iter().map(|haz| haz.to_string()).collect::<Vec<_>>())
.stdout(Stdio::piped())
.stderr(Stdio::null())
.stdin(Stdio::null())
.spawn()
.context("Failed to call python3 update_raw")?;
let mut out = BufReader::new(
child
.stdout
.take()
.expect("Should be able to take child stdout"),
)
.lines();
while let Some(line) = out.next_line().await? {
// use tokio::{fs::File, io::AsyncWriteExt};
// let mut output = File::create("output.json").await?;
// output.write(line.as_bytes()).await?;
// output.flush().await?;
// output.sync_all().await?;
// drop(output);
let output_json: HashMap<Url, InfoJson> =
serde_json::from_str(&line).expect("This should be valid json");
for (url, value) in output_json {
let sub = back_subs.get(&url).expect("This was stored before");
process_subscription(app, sub, value, &hashes).await?
}
}
let out = child.wait().await?;
if !out.success() {
error!("The update_raw.py invokation failed for all subscriptions.")
}
Ok(())
}
async fn process_subscription(
app: &App,
sub: &Subscription,
entry: InfoJson,
hashes: &Vec<blake3::Hash>,
) -> Result<()> {
macro_rules! unwrap_option {
($option:expr) => {
match $option {
Some(x) => x,
None => anyhow::bail!(concat!(
"Expected a value, but '",
stringify!($option),
"' is None!"
)),
}
};
}
let publish_date = if let Some(date) = &entry.upload_date {
let year: u32 = date
.chars()
.take(4)
.collect::<String>()
.parse()
.expect("Should work.");
let month: u32 = date
.chars()
.skip(4)
.take(2)
.collect::<String>()
.parse()
.expect("Should work");
let day: u32 = date
.chars()
.skip(6)
.take(2)
.collect::<String>()
.parse()
.expect("Should work");
let date_string = format!("{year:04}-{month:02}-{day:02}T00:00:00Z");
Some(
DateTime::<Utc>::from_str(&date_string)
.expect("This should always work")
.timestamp(),
)
} else {
warn!(
"The video '{}' lacks it's upload date!",
unwrap_option!(&entry.title)
);
None
};
let thumbnail_url = match (&entry.thumbnails, &entry.thumbnail) {
(None, None) => None,
(None, Some(thumbnail)) => Some(thumbnail.to_owned()),
// TODO: The algorithm is not exactly the best <2024-05-28>
(Some(thumbnails), None) => Some(
thumbnails
.get(0)
.expect("At least one should exist")
.url
.clone(),
),
(Some(_), Some(thumnail)) => Some(thumnail.to_owned()),
};
let url = {
let smug_url: url::Url = unwrap_option!(entry.webpage_url.clone());
unsmuggle_url(smug_url)?
};
let extractor_hash = blake3::hash(unwrap_option!(entry.id).as_bytes());
if hashes.contains(&extractor_hash) {
// We already stored the video information
unreachable!("The python update script should have never provided us a duplicated video");
} else {
let video = Video {
cache_path: None,
description: entry.description.clone(),
duration: entry.duration,
extractor_hash: ExtractorHash::from_hash(extractor_hash),
last_status_change: Utc::now().timestamp(),
parent_subscription_name: Some(sub.name.clone()),
priority: 0,
publish_date,
status: VideoStatus::Pick,
status_change: false,
thumbnail_url,
title: unwrap_option!(entry.title.clone()),
url,
};
println!("{}", video.to_color_display());
add_video(app, video).await?;
Ok(())
}
}
|