1use std::{net::IpAddr, time::SystemTime};
9
10use ipaddress::IPAddress;
11use serde::Serialize;
12use tuwunel_core::{Err, Result, debug, err, implement};
13use url::{Host, Url};
14
15use super::Service;
16
17#[derive(Default, Serialize)]
18pub struct UrlPreviewData {
19 #[serde(
20 skip_serializing_if = "Option::is_none",
21 rename(serialize = "og:title")
22 )]
23 pub title: Option<String>,
24 #[serde(
25 skip_serializing_if = "Option::is_none",
26 rename(serialize = "og:description")
27 )]
28 pub description: Option<String>,
29 #[serde(
30 skip_serializing_if = "Option::is_none",
31 rename(serialize = "og:image")
32 )]
33 pub image: Option<String>,
34 #[serde(
35 skip_serializing_if = "Option::is_none",
36 rename(serialize = "matrix:image:size")
37 )]
38 pub image_size: Option<usize>,
39 #[serde(
40 skip_serializing_if = "Option::is_none",
41 rename(serialize = "og:image:width")
42 )]
43 pub image_width: Option<u32>,
44 #[serde(
45 skip_serializing_if = "Option::is_none",
46 rename(serialize = "og:image:height")
47 )]
48 pub image_height: Option<u32>,
49 #[serde(
50 skip_serializing_if = "Option::is_none",
51 rename(serialize = "og:video")
52 )]
53 pub video: Option<String>,
54 #[serde(
55 skip_serializing_if = "Option::is_none",
56 rename(serialize = "matrix:video:size")
57 )]
58 pub video_size: Option<usize>,
59 #[serde(
60 skip_serializing_if = "Option::is_none",
61 rename(serialize = "og:video:width")
62 )]
63 pub video_width: Option<u32>,
64 #[serde(
65 skip_serializing_if = "Option::is_none",
66 rename(serialize = "og:video:height")
67 )]
68 pub video_height: Option<u32>,
69 #[serde(
70 skip_serializing_if = "Option::is_none",
71 rename(serialize = "og:audio")
72 )]
73 pub audio: Option<String>,
74 #[serde(
75 skip_serializing_if = "Option::is_none",
76 rename(serialize = "matrix:audio:size")
77 )]
78 pub audio_size: Option<usize>,
79 #[serde(
80 skip_serializing_if = "Option::is_none",
81 rename(serialize = "og:type")
82 )]
83 pub og_type: Option<String>,
84 #[serde(
85 skip_serializing_if = "Option::is_none",
86 rename(serialize = "og:url")
87 )]
88 pub og_url: Option<String>,
89}
90
91#[implement(Service)]
92pub fn remove_url_preview(&self, url: &str) -> Result {
93 self.db.remove_url_preview(url)
95}
96
97#[implement(Service)]
98pub fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result {
99 let now = SystemTime::now()
100 .duration_since(SystemTime::UNIX_EPOCH)
101 .expect("valid system time");
102 self.db.set_url_preview(url, data, now)
103}
104
105#[implement(Service)]
106pub async fn get_url_preview(&self, url: &Url) -> Result<UrlPreviewData> {
107 if let Ok(preview) = self.db.get_url_preview(url.as_str()).await {
108 return Ok(preview);
109 }
110
111 let _request_lock = self.url_preview_mutex.lock(url.as_str()).await;
113
114 match self.db.get_url_preview(url.as_str()).await {
115 | Ok(preview) => Ok(preview),
116 | Err(_) => self.request_url_preview(url).await,
117 }
118}
119
120#[implement(Service)]
121pub async fn request_url_preview(&self, url: &Url) -> Result<UrlPreviewData> {
122 self.check_url_host(url)?;
123
124 let client = &self.services.client.url_preview;
125 let response = client.get(url.as_str()).send().await?;
126
127 debug!(?url, "URL preview response headers: {:?}", response.headers());
128
129 if let Some(remote_addr) = response.remote_addr() {
130 debug!(?url, "URL preview response remote address: {:?}", remote_addr);
131
132 if let Ok(ip) = IPAddress::parse(remote_addr.ip().to_string())
133 && !self.services.client.valid_cidr_range(&ip)
134 {
135 return Err!(Request(Forbidden("Requesting from this address is forbidden")));
136 }
137 }
138
139 let content_type = response
140 .headers()
141 .get(reqwest::header::CONTENT_TYPE)
142 .ok_or_else(|| err!(Request(Unknown("Missing Content-Type header"))))?
143 .to_str()
144 .map_err(|e| err!(Request(Unknown("Invalid Content-Type header: {e}"))))?
145 .to_owned();
146
147 let data = match content_type.as_str() {
148 | html if html.starts_with("text/html") => self.download_html(url, response).await?,
149 | img if img.starts_with("image/") => self.download_image(response).await?,
150 | _ => return Err!(Request(Unknown("Unsupported Content-Type"))),
151 };
152
153 self.set_url_preview(url.as_str(), &data)?;
154
155 Ok(data)
156}
157
158#[cfg(feature = "url_preview")]
159#[implement(Service)]
160pub async fn download_image(&self, response: reqwest::Response) -> Result<UrlPreviewData> {
161 use image::ImageReader;
162 use ruma::Mxc;
163 use tuwunel_core::utils::random_string;
164
165 let limit = self.services.config.max_response_size;
166 let image = crate::client::read_response_capped(response, limit).await?;
167 let mxc = Mxc {
168 server_name: self.services.globals.server_name(),
169 media_id: &random_string(super::MXC_LENGTH),
170 };
171
172 self.create(&mxc, None, None, None, &image)
173 .await?;
174
175 let cursor = std::io::Cursor::new(&image);
176 let (width, height) = match ImageReader::new(cursor).with_guessed_format() {
177 | Err(_) => (None, None),
178 | Ok(reader) => match reader.into_dimensions() {
179 | Err(_) => (None, None),
180 | Ok((width, height)) => (Some(width), Some(height)),
181 },
182 };
183
184 Ok(UrlPreviewData {
185 image: Some(mxc.to_string()),
186 image_size: Some(image.len()),
187 image_width: width,
188 image_height: height,
189 ..Default::default()
190 })
191}
192
193#[cfg(not(feature = "url_preview"))]
194#[implement(Service)]
195#[expect(clippy::unused_async)]
196pub async fn download_image(&self, _response: reqwest::Response) -> Result<UrlPreviewData> {
197 Err!(FeatureDisabled("url_preview"))
198}
199
200#[cfg(feature = "url_preview")]
201#[implement(Service)]
202async fn download_html(
203 &self,
204 url: &Url,
205 mut response: reqwest::Response,
206) -> Result<UrlPreviewData> {
207 use webpage::HTML;
208
209 let mut bytes: Vec<u8> = Vec::new();
210 while let Some(chunk) = response.chunk().await? {
211 bytes.extend_from_slice(&chunk);
212 if bytes.len() > self.services.config.url_preview_max_spider_size {
213 debug!(
214 "Response body from URL {} exceeds url_preview_max_spider_size ({}), not \
215 processing the rest of the response body and assuming our necessary data is in \
216 this range.",
217 url, self.services.config.url_preview_max_spider_size
218 );
219 break;
220 }
221 }
222 let body = String::from_utf8_lossy(&bytes);
223 let Ok(html) = HTML::from_string(body.to_string(), Some(url.to_string())) else {
224 return Err!(Request(Unknown("Failed to parse HTML")));
225 };
226
227 let client = &self.services.client.url_preview;
230 let mut data = match html.opengraph.images.first() {
231 | None => UrlPreviewData::default(),
232 | Some(obj) => {
233 let image_url = url
234 .join(&obj.url)
235 .map_err(|e| err!(Request(Unknown("Invalid og:image URL: {e}"))))?;
236
237 self.check_url_host(&image_url)?;
238 let image_response = client.get(image_url.as_str()).send().await?;
239
240 if let Some(remote_addr) = image_response.remote_addr() {
241 debug!(?image_url, ?remote_addr, "og:image remote address");
242
243 if let Ok(ip) = IPAddress::parse(remote_addr.ip().to_string())
244 && !self.services.client.valid_cidr_range(&ip)
245 {
246 return Err!(Request(Forbidden("Requesting from this address is forbidden")));
247 }
248 }
249
250 self.download_image(image_response).await?
251 },
252 };
253
254 let props = html.opengraph.properties;
255
256 data.title = props.get("title").cloned().or(html.title);
258 data.description = props
259 .get("description")
260 .cloned()
261 .or(html.description);
262 data.og_type = Some(html.opengraph.og_type);
263 data.og_url = props.get("url").cloned();
264
265 Ok(data)
266}
267
268#[cfg(not(feature = "url_preview"))]
269#[implement(Service)]
270#[expect(clippy::unused_async)]
271async fn download_html(
272 &self,
273 _url: &Url,
274 _response: reqwest::Response,
275) -> Result<UrlPreviewData> {
276 Err!(FeatureDisabled("url_preview"))
277}
278
279#[implement(Service)]
280pub(super) fn check_url_host(&self, url: &Url) -> Result {
281 let host = url
282 .host()
283 .ok_or_else(|| err!(Request(Unknown("URL has no host"))))?;
284
285 let ip = match host {
286 | Host::Domain(_) => return Ok(()),
287 | Host::Ipv4(v4) => IpAddr::V4(v4),
288 | Host::Ipv6(v6) => IpAddr::V6(v6),
289 };
290
291 if !self.services.client.valid_cidr_range_ip(ip) {
292 return Err!(Request(Forbidden("Requesting from this address is forbidden")));
293 }
294
295 Ok(())
296}
297
298#[implement(Service)]
299pub fn url_preview_allowed(&self, url: &Url) -> bool {
300 if ["http", "https"]
301 .iter()
302 .all(|&scheme| !scheme.eq_ignore_ascii_case(url.scheme()))
303 {
304 debug!("Ignoring non-HTTP/HTTPS URL to preview: {}", url);
305 return false;
306 }
307
308 let host = match url.host_str() {
309 | None => {
310 debug!("Ignoring URL preview for a URL that does not have a host (?): {}", url);
311 return false;
312 },
313 | Some(h) => h.to_owned(),
314 };
315
316 let allowlist_domain_contains = &self
317 .services
318 .config
319 .url_preview_domain_contains_allowlist;
320 let allowlist_domain_explicit = &self
321 .services
322 .config
323 .url_preview_domain_explicit_allowlist;
324 let denylist_domain_explicit = &self
325 .services
326 .config
327 .url_preview_domain_explicit_denylist;
328 let allowlist_url_contains = &self
329 .services
330 .config
331 .url_preview_url_contains_allowlist;
332
333 if allowlist_domain_contains.contains(&"*".to_owned())
334 || allowlist_domain_explicit.contains(&"*".to_owned())
335 || allowlist_url_contains.contains(&"*".to_owned())
336 {
337 debug!("Config key contains * which is allowing all URL previews. Allowing URL {}", url);
338 return true;
339 }
340
341 if !host.is_empty() {
342 if denylist_domain_explicit.contains(&host) {
343 debug!(
344 "Host {} is not allowed by url_preview_domain_explicit_denylist (check 1/4)",
345 &host
346 );
347 return false;
348 }
349
350 if allowlist_domain_explicit.contains(&host) {
351 debug!(
352 "Host {} is allowed by url_preview_domain_explicit_allowlist (check 2/4)",
353 &host
354 );
355 return true;
356 }
357
358 if allowlist_domain_contains
359 .iter()
360 .any(|domain_s| domain_s.contains(&host.clone()))
361 {
362 debug!(
363 "Host {} is allowed by url_preview_domain_contains_allowlist (check 3/4)",
364 &host
365 );
366 return true;
367 }
368
369 if allowlist_url_contains
370 .iter()
371 .any(|url_s| url.to_string().contains(url_s))
372 {
373 debug!("URL {} is allowed by url_preview_url_contains_allowlist (check 4/4)", &host);
374 return true;
375 }
376
377 if self.services.config.url_preview_check_root_domain {
379 debug!("Checking root domain");
380 match host.split_once('.') {
381 | None => return false,
382 | Some((_, root_domain)) => {
383 if denylist_domain_explicit.contains(&root_domain.to_owned()) {
384 debug!(
385 "Root domain {} is not allowed by \
386 url_preview_domain_explicit_denylist (check 1/3)",
387 &root_domain
388 );
389 return false;
390 }
391
392 if allowlist_domain_explicit.contains(&root_domain.to_owned()) {
393 debug!(
394 "Root domain {} is allowed by url_preview_domain_explicit_allowlist \
395 (check 2/3)",
396 &root_domain
397 );
398 return true;
399 }
400
401 if allowlist_domain_contains
402 .iter()
403 .any(|domain_s| domain_s.contains(&root_domain.to_owned()))
404 {
405 debug!(
406 "Root domain {} is allowed by url_preview_domain_contains_allowlist \
407 (check 3/3)",
408 &root_domain
409 );
410 return true;
411 }
412 },
413 }
414 }
415 }
416
417 false
418}