绅士漫画( 优+++ )
https://www.wnacg.ru/
yurun-zhang (11648)1天前
修复之前因漫画过多目录页,导致长篇漫画爬取截断,中间内容缺失问题。是个书源新手,还在学习中。
{
"bookSourceComment": "漫画发布页:https:\/\/wn01.link\/\n1⃣️无需翻墙,国内直连\n2⃣️漫画没加载出来尝试刷新一下\n3⃣️脸黑🌚刷不出来的,别用wifi,用流量看。",
"bookSourceGroup": "漫画",
"bookSourceName": "绅士漫画( 优+++ )",
"bookSourceType": 2,
"bookSourceUrl": "https:\/\/www.wnacg.ru\/",
"customOrder": 453,
"enabled": true,
"enabledCookieJar": true,
"enabledExplore": true,
"exploreUrl": "[\n {\n \"title\": \"🔖排行🔖\",\n \"url\": \"\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 1\n }\n },\n {\n \"title\": \"月榜\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-favorite_ranking-page-{{page}}-type-month.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"周榜\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-favorite_ranking-page-{{page}}-type-week.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"日榜\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-favorite_ranking-page-{{page}}-type-day.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"🔖同人志🔖\",\n \"url\": \"\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 1\n }\n },\n {\n \"title\": \"创建时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-1.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"上传时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-1.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"图片数\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-1.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"🔖韩漫🔖\",\n \"url\": \"\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 1\n }\n },\n {\n \"title\": \"创建时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-20.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"上传时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-20.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"图片数\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-20.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n\n {\n \"title\": \"🔖单行本🔖\",\n \"url\": \"\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 1\n }\n },\n {\n \"title\": \"创建时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-9.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"上传时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-9.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"图片数\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-9.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"🔖杂志&短篇🔖\",\n \"url\": \"\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 1\n }\n },\n {\n \"title\": \"创建时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-10.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"上传时间\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-10.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n },\n {\n \"title\": \"图片数\",\n \"url\": \"https:\/\/www.wnacg.ru\/albums-index-page-{{page}}-cate-10.html\",\n \"style\": {\n \"layout_flexGrow\": 1,\n \"layout_flexBasisPercent\": 0.2\n }\n }\n]",
"header": "{\n \"User-Agent\": \"Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/142.0.0.0 Safari\/537.36\",\n \"Referer\": \"https:\/\/www.wnacg.ru\/\",\n \"Accept-Language\": \"zh-CN,zh;q=0.9\"\n}",
"lastUpdateTime": "1764665472889",
"respondTime": 180000,
"ruleBookInfo": {
"coverUrl": "\/\/div[@class='asTB']\/div[@class='asTBcell uwthumb']\/img\/@src##\/\/\/\/##https:\/\/",
"downloadUrls": "\/\/div[@id='ads']\/a[1]\/@href",
"intro": "\/\/div[@class='asTBcell uwconn']\/p\/text()@js:result.split('\\n').join('')",
"kind": "\/\/div[@class='addtags']\/a[@class='tagshow']\/text()",
"wordCount": "\/\/div[@class='asTBcell uwconn']\/label\/text()@js:\nconst infoSplit = result.split('\\n')\nconst info = []\nfor(var i = 0;i<infoSplit .length;i++){\ninfo.push(infoSplit[i].split(':')[1])\n}\ninfo.join(' ')"
},
"ruleContent": {
"content": "\/\/span[@id='imgarea']\/a\/img\/@src##\/\/##https:\/\/@js:\n\n\/\/ 获取前缀URL\nconst regexStr = \"^https:\/\/img5\\\\.qy0\\\\.ru\/data\/\\\\d+\/\\\\d+\/\";\nconst regex = new RegExp(regexStr, \"g\");\nconst prefixUrlMatch = result.match(regex);\nconst prefixUrl = prefixUrlMatch ? prefixUrlMatch[0] : \"\";\n\n\/\/ 解析图片信息数组(容错:数组为空时设为[])\nconst imgInfoList = JSON.parse(java.get('imgInfoList') || '[]');\n\n\/\/ 拼接完整图片链接(直接使用原始imgName)\nconst imgUrlList = [];\nif (prefixUrl) {\n imgInfoList.forEach(item => {\n \/\/ 仅做空值容错,不修改imgName原始值\n const imgName = item.imgName || '';\n const imgExtension = item.imgExtension || 'jpg';\n const fullImgUrl = prefixUrl + imgName + \".\" + imgExtension;\n imgUrlList.push(fullImgUrl);\n });\n}\n\n\/\/ 规则输出拼接后的img标签\nimgUrlList.map(x=>'<img src=\\\"'+x+'\\\">')\n.join(\"\\n\")",
"imageStyle": "FULL"
},
"ruleExplore": {},
"ruleSearch": {
"bookList": "\/\/div[@class='gallary_wrap']\/ul\/li",
"bookUrl": "\/\/div[@class='info']\/div[@class='title']\/a\/@href,{\"webView\":true}",
"checkKeyWord": "元魔法",
"coverUrl": "\/\/li\/div[1]\/a\/img\/@src",
"intro": "\/\/li\/div[@class='info']\/div[@class='info_col']\/text()@js:\nvar timeStr = result.split(\",\")[1] || \"\";\nvar updateTime = timeStr.match(\/創建於(\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2})\/)?.[1] || \"无时间\";\n'更新时间: ' + updateTime;",
"kind": "\/\/li\/div[1]\/@class@js:\nif (result[0].indexOf('cate-1') !== -1) { \n '同人志\/汉化'; \/\/ 加return,否则无返回值\n} else if (result[0].indexOf('cate-12') !== -1) {\n '同人志\/日语'; \n} else if (result[0].indexOf('cate-9') !== -1) {\n '单行本\/汉化';\n} else if (result[0].indexOf('cate-13') !== -1) {\n '单行本\/日语';\n} else if (result[0].indexOf('cate-10') !== -1) {\n '杂志&短篇\/汉化';\n} else if (result[0].indexOf('cate-14') !== -1) {\n '杂志&短篇\/日语';\n} else if (result[0].indexOf('cate-18') !== -1) {\n '杂志&短篇\/English';\n} else if (result[0].indexOf('cate-20') !== -1) {\n '韩漫\/汉化';\n} else if (result[0].indexOf('cate-21') !== -1) {\n '韩漫\/生肉';\n} else {\n ''; \n}",
"name": "\/\/div[@class='info']\/div[@class='title']\/a\/\/text()@js:\nconst bookname= result.split('\\n').join('').replace(\/^.*?]\/g, '')\nbookname",
"wordCount": "\/\/li\/div[@class='info']\/div[@class='info_col']\/text()@js:\nvar imgNum = result.split(\",\")[0].match(\/\\d+(?=張圖片)\/g)[0]\nimgNum+'页'"
},
"ruleToc": {
"chapterList": "\/\/div[@class='gallary_wrap tb']\/ul\/li[1]@js:\nvar imgInfoList = [];\nvar nextPageUrl = [];\n\n\/\/ === 自动生成完整分页列表 ===\n\n\/\/ 1. 获取分页区的所有链接字符串\nvar paginatorHrefs = java.getString(\"\/\/div[@class='f_left paginator']\/\/a\/@href\") || \"\";\n\n\/\/ 2. 初始化变量\nvar maxPage = 1;\nvar aid = \"\";\nvar urlPattern = \/page-(\\d+)-aid-(\\d+)\/; \/\/ 匹配 page-数字-aid-数字\n\n\/\/ 3. 遍历链接找到“最大页码”和“相册ID”\n\/\/ 分割字符串并过滤空行\nvar links = paginatorHrefs.split('\\n');\nfor (var i = 0; i < links.length; i++) {\n var link = links[i].trim();\n var match = link.match(urlPattern);\n \n if (match) {\n var pageNum = parseInt(match[1]); \/\/ 提取页码\n var currentAid = match[2]; \/\/ 提取AID\n \n \/\/ 更新最大页码\n if (pageNum > maxPage) {\n maxPage = pageNum;\n }\n \/\/ 提取AID(如果还没提取到)\n if (!aid) {\n aid = currentAid;\n }\n }\n}\n\n\/\/ 4. 根据获取到的 maxPage 和 aid 生成完整的 URL 列表\nif (aid && maxPage > 1) {\n for (var i = 1; i <= maxPage; i++) {\n \/\/ 拼接标准URL格式:photos-index-page-{页码}-aid-{ID}.html\n nextPageUrl.push(\"https:\/\/www.wnacg.ru\/photos-index-page-\" + i + \"-aid-\" + aid + \".html\");\n }\n} else {\n \/\/ 如果没有找到分页(只有一页的情况),直接使用当前页面\n nextPageUrl.push(baseUrl);\n}\n\njava.log(\"生成的总页数: \" + nextPageUrl.length);\n\n\njava.log(\"nexturl\"+nextPageUrl.join(\"\\n\"))\n\/\/ 2. 核心工具函数:预处理HTML(移除注释+清理空格)\nfunction cleanHtml(html) {\n if (!html) return \"\";\n \/\/ 步骤1:移除所有HTML注释(<!-- ... -->),彻底消除干扰\n html = html.replace(\/<!--[\\s\\S]*?-->\/g, \"\");\n \/\/ 步骤2:移除多余空白字符(换行、制表符等),简化匹配\n html = html.replace(\/\\s+\/g, \" \");\n return html;\n}\n\n\/\/ 3. 核心工具函数:提取单页的“序号数组”和“图片src数组”\nfunction extractPageData(cleanedHtml) {\n var names = []; \/\/ 序号数组(001、002、003...)\n var srcs = []; \/\/ 图片src数组(与序号一一对应)\n\n \/\/ 提取所有序号(<span class=\"name tb\">xxx<\/span>)\n var nameRegex = \/<span class=\"name tb\">([\\d]+)<\\\/span>\/g;\n var nameMatch;\n while ((nameMatch = nameRegex.exec(cleanedHtml)) !== null) {\n names.push(nameMatch[1]);\n }\n\n \/\/ 提取所有有效图片src(与序号对应的img,alt包含“_序号”)\n var srcRegex = \/<img src=\"([^\"]+)\" alt=\"[^\"]+_([\\d]+)\"\/g;\n var srcMatch;\n while ((srcMatch = srcRegex.exec(cleanedHtml)) !== null) {\n \/\/ 确保src对应的序号在names数组中(避免无关图片)\n if (names.includes(srcMatch[2])) {\n srcs.push(srcMatch[1]);\n }\n }\n\n return { names: names, srcs: srcs };\n}\n\n\/\/ 4. 执行多页提取\ntry {\n for (var p = 0; p < nextPageUrl.length; p++) {\n var pageUrl = nextPageUrl[p];\n var pageNum = p + 1;\n\n \/\/ 步骤1:获取并清理HTML(移除注释+多余空格)\n var rawHtml = new String(java.ajax(pageUrl)) || \"\";\n var cleanedHtml = cleanHtml(rawHtml);\n\n \/\/ 步骤2:提取当前页的序号和src数组\n var pageData = extractPageData(cleanedHtml);\n var pageNames = pageData.names;\n var pageSrcs = pageData.srcs;\n\n \/\/ 步骤3:按索引配对(序号和src一一对应),避免漏配\n var addedNum = 0;\n for (var i = 0; i < pageNames.length; i++) {\n var imgName = pageNames[i];\n var imgSrc = pageSrcs[i] || \"\";\n var imgExtension = \"\";\n\n \/\/ 提取扩展名\n if (imgSrc) {\n var dotIndex = imgSrc.lastIndexOf(\".\");\n if (dotIndex !== -1) {\n imgExtension = imgSrc.slice(dotIndex + 1).toLowerCase().split(\/[?#]\/)[0];\n }\n }\n\n \/\/ 去重并添加到数组\n if (imgName && imgExtension && !imgInfoList.some(item => item.imgName === imgName)) {\n imgInfoList.push({ imgName: imgName, imgExtension: imgExtension });\n addedNum++;\n }\n }\n\n }\n} catch (e) {\n java.log(\"提取报错:\" + e.message);\n}\n\nimgInfoList.sort((a, b) => parseInt(a.imgName) - parseInt(b.imgName));\n\n\/\/ 返回原结果,不影响目录列表\njava.put('imgInfoList',JSON.stringify(imgInfoList))\nresult;",
"chapterName": "\/\/li\/text()@js:'全话阅读'",
"chapterUrl": "\/\/li\/\/a\/@href,{\"webView\":true}",
"updateTime": "\/\/li\/div[@class='info']\/div[@class='info_col']\/text()"
},
"searchUrl": "https:\/\/www.wnacg.ru\/search\/?q={{key}}&f=_all&s=create_time_DESC&syn=yes",
"weight": 500
}