绅士漫画( 优+++ )

https://www.wnacg.ru/

yurun-zhang (11648)1天前

修复之前因漫画过多目录页,导致长篇漫画爬取截断,中间内容缺失问题。是个书源新手,还在学习中。
二维码导入
{
    "bookSourceComment": "漫画发布页:https:\/\/wn01.link\/\n1⃣️无需翻墙,国内直连\n2⃣️漫画没加载出来尝试刷新一下\n3⃣️脸黑🌚刷不出来的,别用wifi,用流量看。",
    "bookSourceGroup": "漫画",
    "bookSourceName": "绅士漫画( 优+++ )",
    "bookSourceType": 2,
    "bookSourceUrl": "https:\/\/www.wnacg.ru\/",
    "customOrder": 453,
    "enabled": true,
    "enabledCookieJar": true,
    "enabledExplore": true,
    "exploreUrl": "[\n  {\n    \"title\": \"🔖排行🔖\",\n    \"url\": \"\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 1\n    }\n    },\n  {\n    \"title\": \"月榜\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-favorite_ranking-page-{{page}}-type-month.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"周榜\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-favorite_ranking-page-{{page}}-type-week.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n {\n    \"title\": \"日榜\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-favorite_ranking-page-{{page}}-type-day.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n    {\n    \"title\": \"🔖同人志🔖\",\n    \"url\": \"\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 1\n    }\n    },\n  {\n    \"title\": \"创建时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-1.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"上传时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-1.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n {\n    \"title\": \"图片数\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-1.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"🔖韩漫🔖\",\n    \"url\": \"\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 1\n    }\n    },\n  {\n    \"title\": \"创建时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-20.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"上传时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-20.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n {\n    \"title\": \"图片数\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-20.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n\n  {\n    \"title\": \"🔖单行本🔖\",\n    \"url\": \"\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 1\n    }\n    },\n  {\n    \"title\": \"创建时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-9.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"上传时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-9.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n {\n    \"title\": \"图片数\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-9.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"🔖杂志&短篇🔖\",\n    \"url\": \"\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 1\n    }\n    },\n  {\n    \"title\": \"创建时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-10.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n  {\n    \"title\": \"上传时间\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-10.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  },\n {\n    \"title\": \"图片数\",\n    \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-10.html\",\n    \"style\": {\n      \"layout_flexGrow\": 1,\n      \"layout_flexBasisPercent\": 0.2\n    }\n  }\n]",
    "header": "{\n  \"User-Agent\": \"Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/142.0.0.0 Safari\/537.36\",\n  \"Referer\": \"https:\/\/www.wnacg.ru\/\",\n  \"Accept-Language\": \"zh-CN,zh;q=0.9\"\n}",
    "lastUpdateTime": "1764665472889",
    "respondTime": 180000,
    "ruleBookInfo": {
        "coverUrl": "\/\/div[@class='asTB']\/div[@class='asTBcell uwthumb']\/img\/@src##\/\/\/\/##https:\/\/",
        "downloadUrls": "\/\/div[@id='ads']\/a[1]\/@href",
        "intro": "\/\/div[@class='asTBcell uwconn']\/p\/text()@js:result.split('\\n').join('')",
        "kind": "\/\/div[@class='addtags']\/a[@class='tagshow']\/text()",
        "wordCount": "\/\/div[@class='asTBcell uwconn']\/label\/text()@js:\nconst infoSplit =  result.split('\\n')\nconst info = []\nfor(var i  = 0;i<infoSplit .length;i++){\ninfo.push(infoSplit[i].split(':')[1])\n}\ninfo.join('  ')"
    },
    "ruleContent": {
        "content": "\/\/span[@id='imgarea']\/a\/img\/@src##\/\/##https:\/\/@js:\n\n\/\/ 获取前缀URL\nconst regexStr = \"^https:\/\/img5\\\\.qy0\\\\.ru\/data\/\\\\d+\/\\\\d+\/\";\nconst regex = new RegExp(regexStr, \"g\");\nconst prefixUrlMatch = result.match(regex);\nconst prefixUrl = prefixUrlMatch ? prefixUrlMatch[0] : \"\";\n\n\/\/ 解析图片信息数组(容错:数组为空时设为[])\nconst imgInfoList = JSON.parse(java.get('imgInfoList') || '[]');\n\n\/\/ 拼接完整图片链接(直接使用原始imgName)\nconst imgUrlList = [];\nif (prefixUrl) {\n    imgInfoList.forEach(item => {\n        \/\/ 仅做空值容错,不修改imgName原始值\n        const imgName = item.imgName || '';\n        const imgExtension = item.imgExtension || 'jpg';\n        const fullImgUrl = prefixUrl + imgName + \".\" + imgExtension;\n        imgUrlList.push(fullImgUrl);\n    });\n}\n\n\/\/ 规则输出拼接后的img标签\nimgUrlList.map(x=>'<img src=\\\"'+x+'\\\">')\n.join(\"\\n\")",
        "imageStyle": "FULL"
    },
    "ruleExplore": {},
    "ruleSearch": {
        "bookList": "\/\/div[@class='gallary_wrap']\/ul\/li",
        "bookUrl": "\/\/div[@class='info']\/div[@class='title']\/a\/@href,{\"webView\":true}",
        "checkKeyWord": "元魔法",
        "coverUrl": "\/\/li\/div[1]\/a\/img\/@src",
        "intro": "\/\/li\/div[@class='info']\/div[@class='info_col']\/text()@js:\nvar timeStr = result.split(\",\")[1] || \"\";\nvar updateTime = timeStr.match(\/創建於(\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2})\/)?.[1] || \"无时间\";\n'更新时间:  ' + updateTime;",
        "kind": "\/\/li\/div[1]\/@class@js:\nif (result[0].indexOf('cate-1') !== -1) { \n    '同人志\/汉化'; \/\/ 加return,否则无返回值\n} else if (result[0].indexOf('cate-12') !== -1) {\n    '同人志\/日语'; \n} else if (result[0].indexOf('cate-9') !== -1) {\n    '单行本\/汉化';\n} else if (result[0].indexOf('cate-13') !== -1) {\n    '单行本\/日语';\n} else if (result[0].indexOf('cate-10') !== -1) {\n    '杂志&短篇\/汉化';\n} else if (result[0].indexOf('cate-14') !== -1) {\n    '杂志&短篇\/日语';\n} else if (result[0].indexOf('cate-18') !== -1) {\n     '杂志&短篇\/English';\n} else if (result[0].indexOf('cate-20') !== -1) {\n    '韩漫\/汉化';\n} else if (result[0].indexOf('cate-21') !== -1) {\n    '韩漫\/生肉';\n} else {\n    ''; \n}",
        "name": "\/\/div[@class='info']\/div[@class='title']\/a\/\/text()@js:\nconst bookname= result.split('\\n').join('').replace(\/^.*?]\/g, '')\nbookname",
        "wordCount": "\/\/li\/div[@class='info']\/div[@class='info_col']\/text()@js:\nvar imgNum = result.split(\",\")[0].match(\/\\d+(?=張圖片)\/g)[0]\nimgNum+'页'"
    },
    "ruleToc": {
        "chapterList": "\/\/div[@class='gallary_wrap tb']\/ul\/li[1]@js:\nvar imgInfoList = [];\nvar nextPageUrl = [];\n\n\/\/ === 自动生成完整分页列表 ===\n\n\/\/ 1. 获取分页区的所有链接字符串\nvar paginatorHrefs = java.getString(\"\/\/div[@class='f_left paginator']\/\/a\/@href\") || \"\";\n\n\/\/ 2. 初始化变量\nvar maxPage = 1;\nvar aid = \"\";\nvar urlPattern = \/page-(\\d+)-aid-(\\d+)\/; \/\/ 匹配 page-数字-aid-数字\n\n\/\/ 3. 遍历链接找到“最大页码”和“相册ID”\n\/\/ 分割字符串并过滤空行\nvar links = paginatorHrefs.split('\\n');\nfor (var i = 0; i < links.length; i++) {\n    var link = links[i].trim();\n    var match = link.match(urlPattern);\n    \n    if (match) {\n        var pageNum = parseInt(match[1]); \/\/ 提取页码\n        var currentAid = match[2];        \/\/ 提取AID\n        \n        \/\/ 更新最大页码\n        if (pageNum > maxPage) {\n            maxPage = pageNum;\n        }\n        \/\/ 提取AID(如果还没提取到)\n        if (!aid) {\n            aid = currentAid;\n        }\n    }\n}\n\n\/\/ 4. 根据获取到的 maxPage 和 aid 生成完整的 URL 列表\nif (aid && maxPage > 1) {\n    for (var i = 1; i <= maxPage; i++) {\n        \/\/ 拼接标准URL格式:photos-index-page-{页码}-aid-{ID}.html\n        nextPageUrl.push(\"https:\/\/www.wnacg.ru\/photos-index-page-\" + i + \"-aid-\" + aid + \".html\");\n    }\n} else {\n    \/\/ 如果没有找到分页(只有一页的情况),直接使用当前页面\n    nextPageUrl.push(baseUrl);\n}\n\njava.log(\"生成的总页数: \" + nextPageUrl.length);\n\n\njava.log(\"nexturl\"+nextPageUrl.join(\"\\n\"))\n\/\/ 2. 核心工具函数:预处理HTML(移除注释+清理空格)\nfunction cleanHtml(html) {\n    if (!html) return \"\";\n    \/\/ 步骤1:移除所有HTML注释(<!-- ... -->),彻底消除干扰\n    html = html.replace(\/<!--[\\s\\S]*?-->\/g, \"\");\n    \/\/ 步骤2:移除多余空白字符(换行、制表符等),简化匹配\n    html = html.replace(\/\\s+\/g, \" \");\n    return html;\n}\n\n\/\/ 3. 核心工具函数:提取单页的“序号数组”和“图片src数组”\nfunction extractPageData(cleanedHtml) {\n    var names = []; \/\/ 序号数组(001、002、003...)\n    var srcs = [];  \/\/ 图片src数组(与序号一一对应)\n\n    \/\/ 提取所有序号(<span class=\"name tb\">xxx<\/span>)\n    var nameRegex = \/<span class=\"name tb\">([\\d]+)<\\\/span>\/g;\n    var nameMatch;\n    while ((nameMatch = nameRegex.exec(cleanedHtml)) !== null) {\n        names.push(nameMatch[1]);\n    }\n\n    \/\/ 提取所有有效图片src(与序号对应的img,alt包含“_序号”)\n    var srcRegex = \/<img src=\"([^\"]+)\" alt=\"[^\"]+_([\\d]+)\"\/g;\n    var srcMatch;\n    while ((srcMatch = srcRegex.exec(cleanedHtml)) !== null) {\n        \/\/ 确保src对应的序号在names数组中(避免无关图片)\n        if (names.includes(srcMatch[2])) {\n            srcs.push(srcMatch[1]);\n        }\n    }\n\n    return { names: names, srcs: srcs };\n}\n\n\/\/ 4. 执行多页提取\ntry {\n    for (var p = 0; p < nextPageUrl.length; p++) {\n        var pageUrl = nextPageUrl[p];\n        var pageNum = p + 1;\n\n        \/\/ 步骤1:获取并清理HTML(移除注释+多余空格)\n        var rawHtml = new String(java.ajax(pageUrl)) || \"\";\n        var cleanedHtml = cleanHtml(rawHtml);\n\n        \/\/ 步骤2:提取当前页的序号和src数组\n        var pageData = extractPageData(cleanedHtml);\n        var pageNames = pageData.names;\n        var pageSrcs = pageData.srcs;\n\n        \/\/ 步骤3:按索引配对(序号和src一一对应),避免漏配\n        var addedNum = 0;\n        for (var i = 0; i < pageNames.length; i++) {\n            var imgName = pageNames[i];\n            var imgSrc = pageSrcs[i] || \"\";\n            var imgExtension = \"\";\n\n            \/\/ 提取扩展名\n            if (imgSrc) {\n                var dotIndex = imgSrc.lastIndexOf(\".\");\n                if (dotIndex !== -1) {\n                    imgExtension = imgSrc.slice(dotIndex + 1).toLowerCase().split(\/[?#]\/)[0];\n                }\n            }\n\n            \/\/ 去重并添加到数组\n            if (imgName && imgExtension && !imgInfoList.some(item => item.imgName === imgName)) {\n                imgInfoList.push({ imgName: imgName, imgExtension: imgExtension });\n                addedNum++;\n            }\n        }\n\n    }\n} catch (e) {\n    java.log(\"提取报错:\" + e.message);\n}\n\nimgInfoList.sort((a, b) => parseInt(a.imgName) - parseInt(b.imgName));\n\n\/\/ 返回原结果,不影响目录列表\njava.put('imgInfoList',JSON.stringify(imgInfoList))\nresult;",
        "chapterName": "\/\/li\/text()@js:'全话阅读'",
        "chapterUrl": "\/\/li\/\/a\/@href,{\"webView\":true}",
        "updateTime": "\/\/li\/div[@class='info']\/div[@class='info_col']\/text()"
    },
    "searchUrl": "https:\/\/www.wnacg.ru\/search\/?q={{key}}&f=_all&s=create_time_DESC&syn=yes",
    "weight": 500
}
广告