36 lines
926 B
Python
36 lines
926 B
Python
from loguru import logger
|
|
|
|
import aiohttp
|
|
import asyncio
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
class Scraper:
|
|
def __init__(
|
|
self, domains: list[str], rps: int, proxy: list[str] = None
|
|
):
|
|
self.domains = domains
|
|
self.proxy = proxy
|
|
self.rps = rps
|
|
|
|
self.parse_tasks = set()
|
|
|
|
async def fetch(self, url: str):
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(url) as resp:
|
|
return await resp.text()
|
|
|
|
async def parse(self, url: str) -> str:
|
|
resp = await self.fetch(url)
|
|
logger.debug(f"Received response from {url}!")
|
|
return resp
|
|
|
|
async def scrape(self):
|
|
for domain in self.domains:
|
|
url = f"https://{domain}"
|
|
parse_task = asyncio.create_task(self.parse(url))
|
|
self.parse_tasks.add(parse_task)
|
|
|
|
done, _pending = await asyncio.wait(self.parse_tasks)
|