Google Map Image Scrapper

import axios from 'axios'; import * as cheerio from 'cheerio'; import puppeteer from "puppeteer-extra"; import StealthPlugin from "puppeteer-extra-plugin-stealth"; import path from 'path'; import fs from 'fs'; // Use the StealthPlugin to avoid detection by websites puppeteer.use(StealthPlugin()); // Configuration parameters for the request const requestParams = { baseURL: `http://google.com`, // Base URL for Google Maps query: "Sarafgarh+Dam,+5QM6%2B2F6,+Odisha+770012", // Query string for the search coordinates: "", // GPS coordinates for the search (if needed) hl: "en", // Language parameter for the search }; // Utility function to introduce a delay const delay = ms => new Promise(resolve => setTimeout(resolve, ms)); /** * Clicks an element on the page identified by the given selector. * @param {Page} page - The Puppeteer page object. * @param {string} selector - The CSS selector of the element to click. */ async function clickElement(page, selector) { // Wait for the element to be visible await page.waitForSelector(selector, { visible: true }); // Click the element await page.click(selector); } /** * Interacts with the page by clicking the "All" button and waits for the content to load. * @param {Page} page - The Puppeteer page object. */ async function interactWithPageAndTakeScreenshot(page) { // Click the button with aria-label="All" await page.waitForSelector('button[aria-label="All"]', { visible: true }); await page.click('button[aria-label="All"]'); // Wait for any additional loading after clicking the button await delay(2000); } /** * Downloads an image from the given URL and saves it to the specified file path. * @param {string} url - The URL of the image to download. * @param {string} filePath - The path where the image will be saved. * @returns {Promise} - A promise that resolves when the image is downloaded. */ async function downloadImage(url, filePath) { const response = await axios({ url, responseType: 'stream', }); response.data.pipe(fs.createWriteStream(filePath)); return new Promise((resolve, reject) => { response.data.on('end', () => resolve()); response.data.on('error', (err) => reject(err)); }); } /** * Scrolls a container on the page a specified number of times. * @param {Page} page - The Puppeteer page object. * @param {string} scrollContainerSelector - The CSS selector of the container to scroll. * @param {number} loops - The number of times to scroll the container. */ async function scrollContainer(page, scrollContainerSelector, loops) { for (let i = 0; i < loops; i++) { // Scroll the container to the bottom await page.evaluate((selector) => { const container = document.querySelector(selector); if (container) { // Log the current scroll position and scroll height console.log(`Before scroll - Scroll Top: ${container.scrollTop}, Scroll Height: ${container.scrollHeight}`); // Scroll to the bottom container.scrollTo(0, container.scrollHeight); // Log the new scroll position after scrolling console.log(`After scroll - Scroll Top: ${container.scrollTop}, Scroll Height: ${container.scrollHeight}`); } }, scrollContainerSelector); await delay(1000); // Wait for content to load } } /** * Scrapes images from the page, saves them to the specified directory, and downloads them. * @param {Page} page - The Puppeteer page object. * @param {string} scrollContainerSelector - The CSS selector of the container to scroll. * @param {number} loops - The number of times to scroll the container. * @param {string} district - The name of the district where the images are located. * @param {string} distId - The ID of the district. */ async function scrapeAndSaveImages(page, scrollContainerSelector, loops, district, distId) { // Scroll the container to load more images await scrollContainer(page, scrollContainerSelector, loops); const htmlContent = await page.content(); const $ = cheerio.load(htmlContent); // Select elements with inline background-image styles const imageUrls = $('.U39Pmb') .map((_, element) => { const style = $(element).attr('style'); if (style) { // Extract URL from style attribute const urlMatch = style.match(/url$["']?(.*?)["']?$/); if (urlMatch && urlMatch[1]) { const url = urlMatch[1]; // Remove everything after '=' in the URL (if needed) const cleanedUrl = url.split('=')[0]; // Filter out invalid URLs if (cleanedUrl && !cleanedUrl.startsWith('//:0') && cleanedUrl !== '""') { return cleanedUrl; } } } return null; }) .get() // Convert to an array .filter((url) => url !== null); // Remove null values console.log(imageUrls); // Create a directory to save images const dir = `./downloaded_images/${district}`; if (!fs.existsSync(dir)) { fs.mkdirSync(dir); } // Download and save images for (let i = 0; i < imageUrls.length; i++) { const url = imageUrls[i]; const filePath = path.join(dir, `${distId}_image_${i + 1}.jpg`); try { await downloadImage(url, filePath); console.log(`Downloaded: ${filePath}`); } catch (err) { console.error(`Failed to download ${url}: ${err.message}`); } } } /** * Main function to scrape local places information and save images. */ async function getLocalPlacesInfo(dists) { // Launch a headless browser instance const browser = await puppeteer.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'); for (let i = 0; i < dists.length; i++) { const district = dists[i]; // Fetch data from the blogger info page const datas = await getDataFromBloggerInfo(district.id.toLowerCase()); for (let i = 0; i < datas.length; i++) { const data = datas[i]; try { const location = data.detailMap['ସ୍ଥାନ']; // Accessing using the exact key const id = data.id; const district = data.district; const locate = location.replace(/ /g, "+").replace(/,/g, ""); const URL = `${requestParams.baseURL}/maps/search/${locate}?hl=${requestParams.hl}`; // Navigate to the Google Maps URL await page.setDefaultNavigationTimeout(60000); await page.goto(URL); await delay(2000); // Click on the first result const selector = '.hfpxzc'; // Class name of the element to click await clickElement(page, selector); // Interact with the page and take a screenshot await interactWithPageAndTakeScreenshot(page); // Scroll and scrape images const scrollContainerSelector = '.m6QErb[style]'; const loops = 50; // Number of times to scroll await scrapeAndSaveImages(page, scrollContainerSelector, loops, district, id); } catch (err) { console.error(`Failed to download ${err.message}`); } } } // Close the browser await browser.close(); } /** * Fetches data from the blogger info page and parses it. * @returns {Promise} - A promise that resolves to an array of data objects. */ async function getDataFromBloggerInfo(place) { try { const response = await axios.get( `https://calenderapp.blogspot.com/2025/01/touristplaces-${place}.html` ); const $ = cheerio.load(response.data); const div = $(".post-body").text(); return JSON.parse(div); } catch (error) { console.error("Error fetching decoding parameters:", error); throw error; } } async function getDistricts() { try { const response = await axios.get( `https://calenderapp.blogspot.com/2024/12/touristplace-districts.html` ); const $ = cheerio.load(response.data); const div = $(".post-body").text(); return JSON.parse(div); } catch (error) { console.error("Error fetching decoding parameters:", error); throw error; } } async function getImages() { try { const datas = await getDistricts(); await getLocalPlacesInfo(datas); } catch (error) { console.error("Error fetching decoding parameters:", error); } } await getImages() // Execute the main function

Menu

Google Map Image Scrapper

0 Comments

Popular Posts

How to Decompile an APK File on Linux: A Step-by-Step Guide

Projects

Subscribe Us

Categories

Tags

Meet The Team

Contact Info

Contact List

Katie Fox

Report Abuse

Janie Doe

About Me

Melisa Edwards

Brand Promotion

Web Development

Web Design

App Design

Search This Blog For Great Content

Contact form

Menu

Google Map Image Scrapper

You may like these posts

0 Comments

Popular Posts

How to Decompile an APK File on Linux: A Step-by-Step Guide

Projects

Subscribe Us

Categories

Tags

Meet The Team

Contact Info

Contact List

Katie Fox

Report Abuse

Janie Doe

About Me

Melisa Edwards

Brand Promotion

Web Development

Web Design

App Design

Search This Blog For Great Content

Contact form