diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..91f736e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rustwitness" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.102" +clap = {version = "4.6.1", features = ["derive"]} +headless_chrome = "1.0.21" +rayon = "1.12.0" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..46281b8 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,91 @@ +use anyhow::{Context, Result}; +use clap::Parser; +use headless_chrome::{Browser, LaunchOptions}; +use rayon::prelude::*; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Parallel web screenshot tool")] +struct Args { + #[arg(short, long)] + input: PathBuf, + + #[arg(short, long)] + output: PathBuf, + + #[arg(short, long)] + proxy: Option, + + #[arg(short, long, default_value_t = 4)] + workers: usize, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + if !args.output.exists() { + fs::create_dir_all(&args.output).context("Failed to create output directory")?; + } + + rayon::ThreadPoolBuilder::new() + .num_threads(args.workers) + .build_global() + .unwrap(); + + let file = File::open(&args.input).context("Failed to open input file")?; + let urls: Vec = BufReader::new(file) + .lines() + .filter_map(|line| line.ok()) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + println!( + "Starting capture of {} URLs using {} workers...", + urls.len(), + args.workers + ); + + urls.par_iter().enumerate().for_each(|(index, url)| { + if let Err(e) = capture_screenshot(url, index, &args) { + eprintln!("Failed to process {}: {:?}", url, e); + } + }); + + println!("All tasks completed."); + Ok(()) +} + +fn capture_screenshot(url: &str, index: usize, args: &Args) -> Result<()> { + let mut launch_options = LaunchOptions::default(); + + if let Some(ref proxy_url) = args.proxy { + let proxy_arg = format!("--proxy-server={}", proxy_url); + launch_options + .args + .push(std::ffi::OsStr::new(Box::leak(proxy_arg.into_boxed_str()))); + } + + let browser = Browser::new(launch_options).context("Browser launch failed")?; + let tab = browser.new_tab().context("Failed to open tab")?; + tab.navigate_to(url).context("Navigation failed")?; + tab.wait_until_navigated() + .context("Waiting for load failed")?; + let mut file_path = args.output.clone(); + file_path.push(format!("screenshot_{:03}.png", index + 1)); + let png_data = tab + .capture_screenshot( + headless_chrome::protocol::cdp::Page::CaptureScreenshotFormatOption::Png, + None, + None, + true, + ) + .context("Screenshot capture failed")?; + + fs::write(&file_path, png_data).context("Failed to write file")?; + println!("Successfully captured: {}", url); + + Ok(()) +}