1use std::time::SystemTime;
9
10use ipaddress::IPAddress;
11use serde::Serialize;
12use tuwunel_core::{Err, Result, debug, err, implement};
13use url::Url;
14
15use super::Service;
16
17#[derive(Default, Serialize)]
18pub struct UrlPreviewData {
19 #[serde(
20 skip_serializing_if = "Option::is_none",
21 rename(serialize = "og:title")
22 )]
23 pub title: Option<String>,
24 #[serde(
25 skip_serializing_if = "Option::is_none",
26 rename(serialize = "og:description")
27 )]
28 pub description: Option<String>,
29 #[serde(
30 skip_serializing_if = "Option::is_none",
31 rename(serialize = "og:image")
32 )]
33 pub image: Option<String>,
34 #[serde(
35 skip_serializing_if = "Option::is_none",
36 rename(serialize = "matrix:image:size")
37 )]
38 pub image_size: Option<usize>,
39 #[serde(
40 skip_serializing_if = "Option::is_none",
41 rename(serialize = "og:image:width")
42 )]
43 pub image_width: Option<u32>,
44 #[serde(
45 skip_serializing_if = "Option::is_none",
46 rename(serialize = "og:image:height")
47 )]
48 pub image_height: Option<u32>,
49 #[serde(
50 skip_serializing_if = "Option::is_none",
51 rename(serialize = "og:video")
52 )]
53 pub video: Option<String>,
54 #[serde(
55 skip_serializing_if = "Option::is_none",
56 rename(serialize = "matrix:video:size")
57 )]
58 pub video_size: Option<usize>,
59 #[serde(
60 skip_serializing_if = "Option::is_none",
61 rename(serialize = "og:video:width")
62 )]
63 pub video_width: Option<u32>,
64 #[serde(
65 skip_serializing_if = "Option::is_none",
66 rename(serialize = "og:video:height")
67 )]
68 pub video_height: Option<u32>,
69 #[serde(
70 skip_serializing_if = "Option::is_none",
71 rename(serialize = "og:audio")
72 )]
73 pub audio: Option<String>,
74 #[serde(
75 skip_serializing_if = "Option::is_none",
76 rename(serialize = "matrix:audio:size")
77 )]
78 pub audio_size: Option<usize>,
79 #[serde(
80 skip_serializing_if = "Option::is_none",
81 rename(serialize = "og:type")
82 )]
83 pub og_type: Option<String>,
84 #[serde(
85 skip_serializing_if = "Option::is_none",
86 rename(serialize = "og:url")
87 )]
88 pub og_url: Option<String>,
89}
90
91#[implement(Service)]
92pub fn remove_url_preview(&self, url: &str) -> Result {
93 self.db.remove_url_preview(url)
95}
96
97#[implement(Service)]
98pub fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result {
99 let now = SystemTime::now()
100 .duration_since(SystemTime::UNIX_EPOCH)
101 .expect("valid system time");
102 self.db.set_url_preview(url, data, now)
103}
104
105#[implement(Service)]
106pub async fn get_url_preview(&self, url: &Url) -> Result<UrlPreviewData> {
107 if let Ok(preview) = self.db.get_url_preview(url.as_str()).await {
108 return Ok(preview);
109 }
110
111 let _request_lock = self.url_preview_mutex.lock(url.as_str()).await;
113
114 match self.db.get_url_preview(url.as_str()).await {
115 | Ok(preview) => Ok(preview),
116 | Err(_) => self.request_url_preview(url).await,
117 }
118}
119
120#[implement(Service)]
121async fn request_url_preview(&self, url: &Url) -> Result<UrlPreviewData> {
122 if let Ok(ip) = IPAddress::parse(url.host_str().expect("URL previously validated"))
123 && !self.services.client.valid_cidr_range(&ip)
124 {
125 return Err!(Request(Forbidden("Requesting from this address is forbidden")));
126 }
127
128 let client = &self.services.client.url_preview;
129 let response = client.get(url.as_str()).send().await?;
130
131 debug!(?url, "URL preview response headers: {:?}", response.headers());
132
133 if let Some(remote_addr) = response.remote_addr() {
134 debug!(?url, "URL preview response remote address: {:?}", remote_addr);
135
136 if let Ok(ip) = IPAddress::parse(remote_addr.ip().to_string())
137 && !self.services.client.valid_cidr_range(&ip)
138 {
139 return Err!(Request(Forbidden("Requesting from this address is forbidden")));
140 }
141 }
142
143 let content_type = response
144 .headers()
145 .get(reqwest::header::CONTENT_TYPE)
146 .ok_or_else(|| err!(Request(Unknown("Missing Content-Type header"))))?
147 .to_str()
148 .map_err(|e| err!(Request(Unknown("Invalid Content-Type header: {e}"))))?
149 .to_owned();
150
151 let data = match content_type.as_str() {
152 | html if html.starts_with("text/html") => self.download_html(url, response).await?,
153 | img if img.starts_with("image/") => self.download_image(response).await?,
154 | _ => return Err!(Request(Unknown("Unsupported Content-Type"))),
155 };
156
157 self.set_url_preview(url.as_str(), &data)?;
158
159 Ok(data)
160}
161
162#[cfg(feature = "url_preview")]
163#[implement(Service)]
164pub async fn download_image(&self, response: reqwest::Response) -> Result<UrlPreviewData> {
165 use image::ImageReader;
166 use ruma::Mxc;
167 use tuwunel_core::utils::random_string;
168
169 let image = response.bytes().await?;
170 let mxc = Mxc {
171 server_name: self.services.globals.server_name(),
172 media_id: &random_string(super::MXC_LENGTH),
173 };
174
175 self.create(&mxc, None, None, None, &image)
176 .await?;
177
178 let cursor = std::io::Cursor::new(&image);
179 let (width, height) = match ImageReader::new(cursor).with_guessed_format() {
180 | Err(_) => (None, None),
181 | Ok(reader) => match reader.into_dimensions() {
182 | Err(_) => (None, None),
183 | Ok((width, height)) => (Some(width), Some(height)),
184 },
185 };
186
187 Ok(UrlPreviewData {
188 image: Some(mxc.to_string()),
189 image_size: Some(image.len()),
190 image_width: width,
191 image_height: height,
192 ..Default::default()
193 })
194}
195
196#[cfg(not(feature = "url_preview"))]
197#[implement(Service)]
198#[expect(clippy::unused_async)]
199pub async fn download_image(&self, _response: reqwest::Response) -> Result<UrlPreviewData> {
200 Err!(FeatureDisabled("url_preview"))
201}
202
203#[cfg(feature = "url_preview")]
204#[implement(Service)]
205async fn download_html(
206 &self,
207 url: &Url,
208 mut response: reqwest::Response,
209) -> Result<UrlPreviewData> {
210 use webpage::HTML;
211
212 let mut bytes: Vec<u8> = Vec::new();
213 while let Some(chunk) = response.chunk().await? {
214 bytes.extend_from_slice(&chunk);
215 if bytes.len() > self.services.config.url_preview_max_spider_size {
216 debug!(
217 "Response body from URL {} exceeds url_preview_max_spider_size ({}), not \
218 processing the rest of the response body and assuming our necessary data is in \
219 this range.",
220 url, self.services.config.url_preview_max_spider_size
221 );
222 break;
223 }
224 }
225 let body = String::from_utf8_lossy(&bytes);
226 let Ok(html) = HTML::from_string(body.to_string(), Some(url.to_string())) else {
227 return Err!(Request(Unknown("Failed to parse HTML")));
228 };
229
230 let client = &self.services.client.url_preview;
233 let mut data = match html.opengraph.images.first() {
234 | None => UrlPreviewData::default(),
235 | Some(obj) => {
236 let image_url = url
237 .join(&obj.url)
238 .map_err(|e| err!(Request(Unknown("Invalid og:image URL: {e}"))))?;
239 let image_response = client.get(image_url.as_str()).send().await?;
240 self.download_image(image_response).await?
241 },
242 };
243
244 let props = html.opengraph.properties;
245
246 data.title = props.get("title").cloned().or(html.title);
248 data.description = props
249 .get("description")
250 .cloned()
251 .or(html.description);
252 data.og_type = Some(html.opengraph.og_type);
253 data.og_url = props.get("url").cloned();
254
255 Ok(data)
256}
257
258#[cfg(not(feature = "url_preview"))]
259#[implement(Service)]
260#[expect(clippy::unused_async)]
261async fn download_html(
262 &self,
263 _url: &Url,
264 _response: reqwest::Response,
265) -> Result<UrlPreviewData> {
266 Err!(FeatureDisabled("url_preview"))
267}
268
269#[implement(Service)]
270pub fn url_preview_allowed(&self, url: &Url) -> bool {
271 if ["http", "https"]
272 .iter()
273 .all(|&scheme| !scheme.eq_ignore_ascii_case(url.scheme()))
274 {
275 debug!("Ignoring non-HTTP/HTTPS URL to preview: {}", url);
276 return false;
277 }
278
279 let host = match url.host_str() {
280 | None => {
281 debug!("Ignoring URL preview for a URL that does not have a host (?): {}", url);
282 return false;
283 },
284 | Some(h) => h.to_owned(),
285 };
286
287 let allowlist_domain_contains = &self
288 .services
289 .config
290 .url_preview_domain_contains_allowlist;
291 let allowlist_domain_explicit = &self
292 .services
293 .config
294 .url_preview_domain_explicit_allowlist;
295 let denylist_domain_explicit = &self
296 .services
297 .config
298 .url_preview_domain_explicit_denylist;
299 let allowlist_url_contains = &self
300 .services
301 .config
302 .url_preview_url_contains_allowlist;
303
304 if allowlist_domain_contains.contains(&"*".to_owned())
305 || allowlist_domain_explicit.contains(&"*".to_owned())
306 || allowlist_url_contains.contains(&"*".to_owned())
307 {
308 debug!("Config key contains * which is allowing all URL previews. Allowing URL {}", url);
309 return true;
310 }
311
312 if !host.is_empty() {
313 if denylist_domain_explicit.contains(&host) {
314 debug!(
315 "Host {} is not allowed by url_preview_domain_explicit_denylist (check 1/4)",
316 &host
317 );
318 return false;
319 }
320
321 if allowlist_domain_explicit.contains(&host) {
322 debug!(
323 "Host {} is allowed by url_preview_domain_explicit_allowlist (check 2/4)",
324 &host
325 );
326 return true;
327 }
328
329 if allowlist_domain_contains
330 .iter()
331 .any(|domain_s| domain_s.contains(&host.clone()))
332 {
333 debug!(
334 "Host {} is allowed by url_preview_domain_contains_allowlist (check 3/4)",
335 &host
336 );
337 return true;
338 }
339
340 if allowlist_url_contains
341 .iter()
342 .any(|url_s| url.to_string().contains(url_s))
343 {
344 debug!("URL {} is allowed by url_preview_url_contains_allowlist (check 4/4)", &host);
345 return true;
346 }
347
348 if self.services.config.url_preview_check_root_domain {
350 debug!("Checking root domain");
351 match host.split_once('.') {
352 | None => return false,
353 | Some((_, root_domain)) => {
354 if denylist_domain_explicit.contains(&root_domain.to_owned()) {
355 debug!(
356 "Root domain {} is not allowed by \
357 url_preview_domain_explicit_denylist (check 1/3)",
358 &root_domain
359 );
360 return true;
361 }
362
363 if allowlist_domain_explicit.contains(&root_domain.to_owned()) {
364 debug!(
365 "Root domain {} is allowed by url_preview_domain_explicit_allowlist \
366 (check 2/3)",
367 &root_domain
368 );
369 return true;
370 }
371
372 if allowlist_domain_contains
373 .iter()
374 .any(|domain_s| domain_s.contains(&root_domain.to_owned()))
375 {
376 debug!(
377 "Root domain {} is allowed by url_preview_domain_contains_allowlist \
378 (check 3/3)",
379 &root_domain
380 );
381 return true;
382 }
383 },
384 }
385 }
386 }
387
388 false
389}