Skip to content

Loop through pages to return all the product links

The goal is to obtain all product links from all pages in the pagination. So far I have managed to print information to the console with console.log (links). However, since I am completely new to this field and completely inexperienced, I have a problem how to pass some value back with the return command. return links.

With console.log(links) I get a warning: getLinks is not iterable

const puppeteer = require('puppeteer')

async function getLinks(){
    const browser = await puppeteer.launch({headless: false, defaultViewport: null});
    const page = await browser.newPage();

    const url = "https://example.com/product-category?p=1&nidx"
    

    await page.goto(url)

    while(await page.$('.change-country-buttons > button:nth-child(1)')){
        await page.waitForTimeout(2000);
        await page.keyboard.press('ArrowDown');
        await page.waitForSelector('.change-country-buttons');
        await page.waitForTimeout(2000);
        await page.click('.change-country-buttons > button:nth-child(1)');
        await page.waitForTimeout(2000);
    }
    while(await page.$(".pagination .pagination--next")){
        await page.waitForTimeout(2000);
        await page.evaluate(() => {
            document.querySelector(".pagination .pagination--next").scrollIntoView();
    });
    await page.waitForTimeout(1000);
    await page.waitForSelector(".pagination .pagination--next")
    await page.waitForTimeout(500);
    await page.click('.pagination .pagination--next')
    const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
    await page.waitForTimeout(1500);
    console.log(links)
    }
}
return links // Is returning links only form the first page and then the loop stops

I tried something with Promise.all () but it wasn’t entirely clear to me how to do it.

Please help and be gentle as I am just starting to learn the basics

Answer

You need to create an array and push all the helmet links from each page onto it.

This tested successfully for me.

const puppeteer = require('puppeteer')

async function getLinks(){
    const browser = await puppeteer.launch({headless: false, defaultViewport: null});
    const page = await browser.newPage();

    const url = "https://www.motocard.com/en/motorcycle-road-gear/helmets/precio_150-3200/full-face?p=1&nidx"
    
    var all_links = [];
    
    await page.goto(url);

    while(await page.$('.change-country-buttons > button:nth-child(1)')){
        await page.waitForTimeout(2000);
        await page.keyboard.press('ArrowDown');
        await page.waitForSelector('.change-country-buttons');
        await page.waitForTimeout(2000);
        await page.click('.change-country-buttons > button:nth-child(1)');
        await page.waitForTimeout(2000);
    }
    while(await page.$(".pagination .pagination--next")){
        await page.waitForTimeout(2000);
        await page.evaluate(() => {
            document.querySelector(".pagination .pagination--next").scrollIntoView();
        });
        await page.waitForTimeout(1000);
        await page.waitForSelector(".pagination .pagination--next")
        await page.waitForTimeout(500);
        await page.click('.pagination .pagination--next')
        const links = await page.$$eval('.item__info > .mtc-link:nth-child(2)', (allAs) => { return allAs.map((a) => a.href) });
        await page.waitForTimeout(1500);
        //console.log(links)
        all_links.push(...links);
    }
    return all_links;
}

(async ()=>{
    var links = await getLinks();
    console.log('done');
    console.log(links);
})();