use anyhow::{Context, Result}; use clap::Parser; use headless_chrome::{Browser, LaunchOptions}; use rayon::prelude::*; use std::fs::{self, File, read_to_string}; use std::io::{BufRead, BufReader}; use std::path::PathBuf; #[derive(Parser, Debug)] #[command(author, version, about = "Parallel web screenshot tool")] struct Args { #[arg(short, long)] input: PathBuf, #[arg(short, long)] output: PathBuf, #[arg(short, long)] proxy: Option, #[arg(short, long, default_value_t = 4)] workers: usize, } fn main() -> Result<()> { let args = Args::parse(); if !args.output.exists() { fs::create_dir_all(&args.output).context("Failed to create output directory")?; } let mut log_path = args.output.clone(); log_path.push("log.txt"); let mut resume = None; if log_path.exists() { let log = read_to_string(log_path)?; if let Some(last_line) = log.lines().last() { resume = Some(last_line.to_string()) } }; rayon::ThreadPoolBuilder::new() .num_threads(args.workers) .build_global() .unwrap(); let mut urls = Vec::new(); if resume.is_some() { let url_resume = resume.clone().unwrap(); let url_text = read_to_string(&args.input)?; let mut resume_met = false; for line in url_text.lines() { if !resume_met { if line.trim().to_string() == url_resume { resume_met = true; } } else { urls.push(line.trim().to_string()); } } } if resume.is_none() { let file = File::open(&args.input).context("Failed to open input file")?; let urls: Vec = BufReader::new(file) .lines() .filter_map(|line| line.ok()) .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty()) .collect(); println!( "Starting capture of {} URLs using {} workers...", urls.len(), args.workers ); urls.par_iter().enumerate().for_each(|(_index, url)| { if let Err(e) = capture_screenshot(url, &args) { eprintln!("Failed to process {}: {:?}", url, e); } }); println!("All tasks completed."); } else { urls.par_iter().enumerate().for_each(|(_index, url)| { if let Err(e) = capture_screenshot(url, &args) { eprintln!("failed to process {}: {:?}", url, e); } }); } Ok(()) } fn capture_screenshot(url: &str, args: &Args) -> Result<()> { let mut launch_options = LaunchOptions::default(); let file_name = url.split("://").collect::>()[1].to_string(); if let Some(ref proxy_url) = args.proxy { let proxy_arg = format!("--proxy-server={}", proxy_url); launch_options .args .push(std::ffi::OsStr::new(Box::leak(proxy_arg.into_boxed_str()))); } let browser = Browser::new(launch_options).context("Browser launch failed")?; let tab = browser.new_tab().context("Failed to open tab")?; tab.navigate_to(url).context("Navigation failed")?; tab.wait_until_navigated() .context("Waiting for load failed")?; tab.wait_for_element("body")?; tab.evaluate("document.readyState === 'complete'", false)?; let mut file_path = args.output.clone(); let mut log_path = args.output.clone(); log_path.push("log.txt"); file_path.push(format!("{}.png", file_name)); let png_data = tab .capture_screenshot( headless_chrome::protocol::cdp::Page::CaptureScreenshotFormatOption::Png, None, None, true, ) .context("Screenshot capture failed")?; fs::write(&file_path, png_data) .context("Failed to write file") .context(format!( "failed to write png file! {}", &file_path.display() ))?; println!("Successfully captured: {}", url); fs::write(&log_path, format!("{}\n", url).as_bytes()).context("failed to write log file!")?; Ok(()) }