Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,18 @@ import io.aatricks.novelscraper.util.TextUtils
* @property chapterNumber Optional chapter number for ordering
* @property nextChapterUrl Optional URL to the next chapter for navigation
* @property previousChapterUrl Optional URL to the previous chapter for navigation
* @property preCalculatedTextCount Optional pre-calculated text count for lazy loading
* @property preCalculatedImageCount Optional pre-calculated image count for lazy loading
*/
data class ChapterContent(
val paragraphs: List<ContentElement>,
val title: String? = null,
val url: String,
val chapterNumber: Int? = null,
val nextChapterUrl: String? = null,
val previousChapterUrl: String? = null
val previousChapterUrl: String? = null,
private val preCalculatedTextCount: Int? = null,
private val preCalculatedImageCount: Int? = null
) {
init {
require(url.isNotBlank()) { "URL cannot be blank" }
Expand All @@ -33,12 +37,12 @@ data class ChapterContent(
/**
* Returns the number of text elements in the chapter
*/
fun getTextCount(): Int = paragraphs.count { it is ContentElement.Text }
fun getTextCount(): Int = preCalculatedTextCount ?: paragraphs.count { it is ContentElement.Text }

/**
* Returns the number of image elements in the chapter
*/
fun getImageCount(): Int = paragraphs.sumOf {
fun getImageCount(): Int = preCalculatedImageCount ?: paragraphs.sumOf {
when (it) {
is ContentElement.Image -> 1
is ContentElement.ImageGroup -> it.images.size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ class ContentRepository @Inject constructor(
private val epubBookCache = mutableMapOf<String, EpubBook>()

sealed class ContentResult {
data class Success(val elements: List<ContentElement>, val title: String? = null, val url: String) : ContentResult()
data class Success(
val elements: List<ContentElement>,
val title: String? = null,
val url: String,
val textCount: Int? = null,
val imageCount: Int? = null
) : ContentResult()
data class Error(val message: String, val exception: Exception? = null) : ContentResult()
}

Expand Down Expand Up @@ -401,41 +407,75 @@ class ContentRepository @Inject constructor(
}.getOrNull()
}

private suspend fun loadPdfContent(filePath: String): ContentResult = withContext(Dispatchers.IO) {
runCatching {
val paragraphs = mutableListOf<String>()
private inner class PdfLazyList(
private val filePath: String,
private val totalPages: Int
) : AbstractList<ContentElement>() {
override val size: Int get() = totalPages

override fun get(index: Int): ContentElement {
if (index < 0 || index >= size) throw IndexOutOfBoundsException("Index: $index, Size: $size")
val text = loadPdfPageText(filePath, index + 1)
return ContentElement.Text(text)
}
}

private fun loadPdfPageText(filePath: String, pageNum: Int): String {
return runCatching {
val pdfDoc = if (filePath.startsWith("content://")) {
val uri = Uri.parse(filePath)
context.contentResolver.openInputStream(uri)?.use {
PdfDocument(PdfReader(it))
} ?: throw Exception("PDF not found")
context.contentResolver.openInputStream(uri)?.use {
PdfDocument(PdfReader(it))
} ?: return ""
} else {
val file = File(filePath)
if (!file.exists()) throw Exception("PDF not found")
if (!file.exists()) return ""
PdfDocument(PdfReader(file))
}

pdfDoc.use { doc ->
for (i in 1..doc.numberOfPages) {
PdfTextExtractor.getTextFromPage(doc.getPage(i)).lines()
.filterNot { it.trim().matches(Regex("^\\d+$")) }
.joinToString("\n")
.split(Regex("\n\\s*\\n"))
.map { it.trim() }
.filter { it.length > 20 }
.forEach { paragraphs.add(it) }
}
if (pageNum > doc.numberOfPages) return@use ""
val rawText = PdfTextExtractor.getTextFromPage(doc.getPage(pageNum))

rawText.lines()
.filterNot { it.trim().matches(Regex("^\\d+$")) }
.joinToString("\n")
.split(Regex("\\n\\s*\\n"))
.map { it.trim() }
.filter { it.length > 20 }
.joinToString("\n\n")
}

if (paragraphs.isEmpty()) throw Exception("No text in PDF")

}.getOrDefault("")
}

private suspend fun loadPdfContent(filePath: String): ContentResult = withContext(Dispatchers.IO) {
runCatching {
val pageCount = if (filePath.startsWith("content://")) {
val uri = Uri.parse(filePath)
context.contentResolver.openInputStream(uri)?.use {
PdfDocument(PdfReader(it)).use { doc -> doc.numberOfPages }
} ?: throw Exception("PDF not found")
} else {
val file = File(filePath)
if (!file.exists()) throw Exception("PDF not found")
PdfDocument(PdfReader(file)).use { doc -> doc.numberOfPages }
}

if (pageCount == 0) throw Exception("No text in PDF")

val title = if (filePath.startsWith("content://")) {
Uri.parse(filePath).lastPathSegment ?: "PDF"
} else {
File(filePath).nameWithoutExtension
}

ContentResult.Success(paragraphs.map { ContentElement.Text(it) }, title, filePath)

ContentResult.Success(
elements = PdfLazyList(filePath, pageCount),
title = title,
url = filePath,
textCount = pageCount,
imageCount = 0
)
}.getOrElse { e ->
ContentResult.Error("PDF Error: ${e.message}")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,9 @@ class ReaderViewModel @Inject constructor(
title = result.title,
url = result.url,
nextChapterUrl = contentRepository.incrementChapterUrl(result.url),
previousChapterUrl = contentRepository.decrementChapterUrl(result.url)
previousChapterUrl = contentRepository.decrementChapterUrl(result.url),
preCalculatedTextCount = result.textCount,
preCalculatedImageCount = result.imageCount
)

val libraryItem = effectiveLibraryItemId?.let { libraryRepository.getItemById(it) }
Expand Down