Skip to content

Commit 9993274

Browse files
Rojikkuerror7404
andauthored
fix: Novelfire Scraping (#2000)
* fix: Novelfire theoretical scrape fix * Novelfire: Add old paged * Novelfire: Add monopage option Co-authored-by: From: Justin COLLON <48163201+error7404@users.noreply.github.com> * Novelfire: Implement new deshlash in old function * fix: Novelfire search repeats --------- Co-authored-by: From: Justin COLLON <48163201+error7404@users.noreply.github.com>
1 parent a8e63d6 commit 9993274

1 file changed

Lines changed: 142 additions & 7 deletions

File tree

plugins/english/novelfire.ts

Lines changed: 142 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,27 @@ import { Plugin } from '@/types/plugin';
44
import { NovelStatus } from '@libs/novelStatus';
55
import { Filters, FilterTypes } from '@libs/filterInputs';
66
import { defaultCover } from '@/types/constants';
7+
import { storage } from '@libs/storage';
78

89
class NovelFire implements Plugin.PluginBase {
910
id = 'novelfire';
1011
name = 'Novel Fire';
11-
version = '1.1.5';
12+
version = '1.1.7';
1213
icon = 'src/en/novelfire/icon.png';
1314
site = 'https://novelfire.net/';
1415

16+
novelList = [];
17+
18+
singlePage = storage.get('singlePage');
19+
pluginSettings = {
20+
singlePage: {
21+
value: '',
22+
label:
23+
'Force load all chapters on a single page (Slower & use more data)',
24+
type: 'Switch',
25+
},
26+
};
27+
1528
async getCheerio(url: string, search: boolean): Promise<CheerioAPI> {
1629
const r = await fetchApi(url);
1730
if (!r.ok && search != true)
@@ -34,6 +47,9 @@ class NovelFire implements Plugin.PluginBase {
3447
filters,
3548
}: Plugin.PopularNovelsOptions<typeof this.filters>,
3649
): Promise<Plugin.NovelItem[]> {
50+
if (pageNo == 1) {
51+
this.novelList = [];
52+
}
3753
let url = this.site + 'search-adv';
3854
if (showLatestNovels) {
3955
url += `?ctgcon=and&totalchapter=0&ratcon=min&rating=0&status=-1&sort=date&tagcon=and&page=${pageNo}`;
@@ -76,6 +92,12 @@ class NovelFire implements Plugin.PluginBase {
7692

7793
if (!novelPath) return;
7894

95+
if (this.novelList.includes(novelPath)) {
96+
return;
97+
} else {
98+
this.novelList.push(novelPath);
99+
}
100+
79101
return {
80102
name: novelName,
81103
cover: novelCover,
@@ -127,15 +149,81 @@ class NovelFire implements Plugin.PluginBase {
127149
return sortedChapters;
128150
}
129151

130-
async parseNovel(novelPathRaw: string): Promise<Plugin.SourceNovel> {
152+
async getAllChaptersForce(
153+
novelPath: string,
154+
pages: number,
155+
): Promise<Plugin.ChapterItem[]> {
156+
const pagesArray = Array.from({ length: pages }, (_, i) => i + 1);
157+
const allChapters: Plugin.ChapterItem[] = [];
158+
159+
// When pages > ~30, we get rate limited. To mitigate, split into chunks and retry chunk on rate limit with delay.
160+
const chunkSize = 5; // 5 pages per chunk was tested to be a good balance between speed and rate limiting.
161+
const retryCount = 10;
162+
const sleepTime = 3.5; // Rate limit seems to be around ~10s, so usually 3 retries should be enough for another ~30 pages.
163+
164+
const chaptersArray: Plugin.ChapterItem[][] = [];
165+
166+
for (let i = 0; i < pagesArray.length; i += chunkSize) {
167+
const pagesArrayChunk = pagesArray.slice(i, i + chunkSize);
168+
169+
const firstPage = pagesArrayChunk[0];
170+
const lastPage = pagesArrayChunk[pagesArrayChunk.length - 1];
171+
172+
let attempt = 0;
173+
174+
while (attempt < retryCount) {
175+
try {
176+
// Parse all pages in chunk in parallel
177+
const chaptersArrayChunk = await Promise.all(
178+
pagesArrayChunk.map(page =>
179+
this.parsePage(novelPath, page.toString()),
180+
),
181+
);
182+
183+
chaptersArray.push(...chaptersArrayChunk);
184+
break;
185+
} catch (err) {
186+
if (err instanceof NovelFireThrottlingError) {
187+
attempt += 1;
188+
console.warn(
189+
`[pages=${firstPage}-${lastPage}] Novel Fire is rate limiting requests. Retry attempt ${attempt + 1} in ${sleepTime} seconds...`,
190+
);
191+
if (attempt === retryCount) {
192+
throw err;
193+
}
194+
195+
// Sleep for X second before retrying
196+
await new Promise(resolve => setTimeout(resolve, sleepTime * 1000));
197+
} else {
198+
throw err;
199+
}
200+
}
201+
}
202+
}
203+
204+
// Merge all chapters into a single array
205+
for (let chapters of chaptersArray) {
206+
// For some reason it's formatted this way, this fixes it.
207+
chapters = chapters.chapters;
208+
for (let i = 0; i < Object.keys(chapters).length; i++) {
209+
allChapters.push(chapters[i]);
210+
}
211+
}
212+
return allChapters;
213+
}
214+
215+
async parseNovel(
216+
novelPathRaw: string,
217+
): Promise<Plugin.SourceNovel & { totalPages: number }> {
131218
const novelPath = deSlash(novelPathRaw);
132219
const $ = await this.getCheerio(this.site + novelPath, false);
133220
const baseUrl = this.site;
134221

135222
let post_id = '0';
136223

137-
const novel: Partial<Plugin.SourceNovel> = {
224+
const novel: Partial<Plugin.SourceNovel & { totalPages: number }> = {
138225
path: novelPath,
226+
totalPages: 1,
139227
};
140228

141229
novel.name =
@@ -185,20 +273,67 @@ class NovelFire implements Plugin.PluginBase {
185273

186274
post_id = $('#novel-report').attr('report-post_id') || '0';
187275

188-
novel.chapters = await this.getAllChapters(novelPath, post_id);
276+
try {
277+
novel.chapters = await this.getAllChapters(novelPath, post_id);
278+
} catch (error) {
279+
const totalChapters = $('.header-stats .icon-book-open')
280+
.parent()
281+
.text()
282+
.trim();
283+
novel.totalPages = Math.ceil(parseInt(totalChapters) / 100);
284+
if (this.singlePage) {
285+
novel.chapters = await this.getAllChaptersForce(
286+
novelPath,
287+
novel.totalPages,
288+
);
289+
if (novel.totalPages > 1 && novel.chapters.length > 100) {
290+
novel.totalPages = 1;
291+
}
292+
}
293+
}
294+
295+
return novel as Plugin.SourceNovel & { totalPages: number };
296+
}
297+
298+
async parsePage(novelPath: string, page: string): Promise<Plugin.SourcePage> {
299+
const url = `${this.site}${novelPath}/chapters?page=${page}`;
300+
const result = await fetchApi(url);
301+
const body = await result.text();
302+
303+
const loadedCheerio = load(body);
304+
305+
const chapters = loadedCheerio('.chapter-list li')
306+
.map((index, ele) => {
307+
const chapterName =
308+
loadedCheerio(ele).find('a').attr('title') || 'No Title Found';
309+
const chapterPath = loadedCheerio(ele).find('a').attr('href');
310+
311+
if (!chapterPath) return null;
189312

190-
return novel as Plugin.SourceNovel;
313+
return {
314+
name: chapterName,
315+
path: deSlash(chapterPath.replace(this.site, '')),
316+
};
317+
})
318+
.get()
319+
.filter(chapter => chapter !== null) as Plugin.ChapterItem[];
320+
321+
return {
322+
chapters,
323+
};
191324
}
192325

193326
async parseChapter(chapterPath: string): Promise<string> {
194327
const url = this.site + chapterPath;
195328
const loadedCheerio = await this.getCheerio(url, false);
196329

197330
const chapterText = loadedCheerio('#content');
198-
const odds = chapterText.find(':not(p, h1, span, i, b, u, img, a, div)');
331+
const odds = chapterText.find(
332+
':not(p, h1, span, i, b, u, img, a, div, strong)',
333+
);
199334
for (const ele of odds.toArray()) {
200335
const tag = ele.name.toString();
201-
if (tag.length > 5) {
336+
if (tag.length > 5 && ele.name.toString().substring(0, 1) == 'nf') {
202337
loadedCheerio(ele).remove();
203338
}
204339
}

0 commit comments

Comments
 (0)