利用python异步提取奥拉星海报

哔哩哔哩直达
import re
import requests
import aiohttp
import asyncio
import time
import aiofiles
import os
from PIL import Image
#不进行图像处理，采用h5照片本来的格式 1600*900 需要20秒
#进行图像处理，变为1920*1080 需要70秒
#图像处理这部分没办法开启异步操作，所以我将这个问题交给用户自己选择
def get_source():
    url = "http://aola.100bt.com/h5/js/gamemain~30220415014450.js"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                             "Chrome/53.0.2785.101 Safari/537.36"}
    json = requests.get(url, headers=headers)
    json.encoding = "utf-8"
    #正则表达式提取出来的数据应该是这样的效果
    #  new s.default(0,"[YES.I.DO]至臻羲和",
    #  new s.default(1,"[圣武月神]至臻神武月",
    #  new s.default(2,"[樱の恋]羲和",
    #  new s.default(.*?)",
    # # 第一步先将数据预处理一次，得到海报列表附近的大致的范围，
    obj1 = re.search(r"PetSkinConfig.DATAS =(.*?)PetSkinConfig.SAME_SKIN_ARRAY = ",json.text,re.S)
    data1=obj1.group()
    # #第二步，将数据提取出上面的模样
    obj = re.compile(r'new _PetSkinInfo__WEBPACK_IMPORTED_MODULE_0__(?P<data>.*?)",', re.S)
    data = obj.finditer(data1)
    return data

def get_urls_names(data):
    urls = []
    names = []
    #第三步遍历循环，把数据改成这个样子
    #0,[YES.I.DO]至臻羲和
    #1,[圣武月神]至臻神武月
    for g in data:
        i = g.group("data").replace('new _PetSkinInfo__WEBPACK_IMPORTED_MODULE_0__["default"](', '')\
            .replace('"', '').replace('[default]','').replace(' ', '').replace('(','')
        # print(i)
        if "["  not in i and "皮肤" not in i:
            id = i.split(",")[0]
            name = i.split(",")[1]
            url = f"http://aola.100bt.com/h5/pet/petskin/background/bg/img_petskinbackground_{id}.png"
            urls.append(url)
            names.append(name)
    return zip(names, urls)


async def fetch(session, url):
    async with session.get(url) as response:
        if response.status == 404:
            return None
        return await response.read()


async def save_response(url, name, response):
    if response is not None:
        async with aiofiles.open(f"pictures/{name}.png", mode="wb") as f:
            await f.write(response)
            print(f"{name}下载完成")

async def save_response_img(url, name, response):
    if response is not None:
        async with aiofiles.open(f"pictures/{name}.png", mode="wb") as f:
            await f.write(response)
            with Image.open(f"pictures/{name}.png") as img:
                # 确保图像被加载到内存中
                img.load()
                # 调整图像大小，使用 LANCZOS 作为重采样滤波器
                img_resized = img.resize((1920, 1080), Image.Resampling.LANCZOS)
                # 构建新的文件路径
                new_file_path = os.path.join(os.getcwd()+"/pictures", f"{name}.png")
                # 保存调整后的图像
                img_resized.save(new_file_path)
            print(f"{name}下载完成")



async def main(id):
    # 正则提取网页中带有name和id的数据
    data = get_source()
    # 获取names和urls的列表
    names = []
    urls = []
    for name, url in get_urls_names(data):
        names.append(name)
        urls.append(url)
    timeout = aiohttp.ClientTimeout(total=800)  # 将超时时间设置为600秒
    connector = aiohttp.TCPConnector(limit=30)  # 将并发数量降低
    async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
        tasks = []
        for url in urls:
            # 异步判断404网页
            task = asyncio.create_task(fetch(session, url))
            tasks.append(task)
        responses = await asyncio.gather(*tasks)
        for url, name, response in zip(urls, names, responses):
            if response is None:
                print(f"{url} returned 404")
            else:
                if int(d) ==1:
                    await save_response(url, name, response)
                elif int(d) ==2:
                    await save_response_img(url, name, response)


if __name__ == '__main__':
    print("下载全部海报请输入1,指定海报请输入2，然后回车")
    c = input()
    if int(c) == 1:
        print("海报会自动下载到脚本同级目录的pictures文件夹中")
        print("海报默认为1600*900，优化后为1920*1080,优化需要多耗时一分钟")
        print("默认请输入1，优化请输入2，然后回车")
        d = input()
        print("请等待1-2分钟")
        #创建输出文件夹,如果存在就不创建了
        os.makedirs(os.getcwd()+"/pictures",exist_ok=True)
        t1 = time.time()
        asyncio.run(main(d))
        t2 = time.time()
        print(f"下载完成，总共用时{t2 - t1}秒")
    if int(c) == 2:
        data = get_source()
        for g in data:
            i = g.group("data").replace('new _PetSkinInfo__WEBPACK_IMPORTED_MODULE_0__["default"](', '') \
                .replace('"', '').replace('[default]', '').replace(' ', '').replace('(', '')
            if "[" not in i and "皮肤" not in i:
                print(i)
        id = input("请输入对应海报编号并回车，编号如上")
        url = f"http://aola.100bt.com/h5/pet/petskin/background/bg/img_petskinbackground_{id}.png"
        a = requests.get(url).text
        with open(f"{id}.png", mode="wb") as f:
            f.write(requests.get(url).content)
            with Image.open(f"{id}.png") as img:
                # 确保图像被加载到内存中
                img.load()
                # 调整图像大小，使用 LANCZOS 作为重采样滤波器
                img_resized = img.resize((1920, 1080), Image.Resampling.LANCZOS)
                # 构建新的文件路径
                new_file_path = os.path.join(os.getcwd(), f"{id}.png")
                # 保存调整后的图像
                img_resized.save(new_file_path)
            print(f"下载完成")
# 打包方式
# pyinstaller -F -i 1.ico 海报异步1.0.py
一	二	三	四	五	六	日
						1
2	3	4	5	6	7	8
9	10	11	12	13	14	15
16	17	18	19	20	21	22
23	24	25	26	27	28	29
30
发送评论 编辑评论

推荐文章

发送评论编辑评论