Fetch urls concurrently

2022-09-10 20:50:45 +03:00 · 2022-09-10 20:50:45 +03:00 · ca13a5440d
parent e044b7d9f0
commit ca13a5440d
2 changed files with 25 additions and 2 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -2,4 +2,5 @@ loguru
 pretty_errors
 colorama
 pyyaml
-beautifulsoup4
+beautifulsoup4
 aiohttp[speedups]
--- a/scraper.py
+++ b/scraper.py
@ -1,3 +1,8 @@
 from loguru import logger
 import aiohttp
 import asyncio
 from bs4 import BeautifulSoup
@ -11,5 +16,22 @@ class Scraper:
        self.rps_min = rps_min
        self.rps_max = rps_max
        self.parse_tasks = set()
    async def fetch(self, url: str):
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                return await resp.text()
    async def parse(self, url: str) -> str:
        resp = await self.fetch(url)
        logger.debug(f"Received response from {url}!")
        return resp
    async def scrape(self):
-        ...
+        for domain in self.domains:
            url = f"https://{domain}"
            parse_task = asyncio.create_task(self.parse(url))
            self.parse_tasks.add(parse_task)
        done, _pending = await asyncio.wait(self.parse_tasks)