Skip to content

Commit c8b5f50

Browse files
authored
Use archive entry length instead of original length for EPUB positions (#162)
1 parent 7adfc05 commit c8b5f50

File tree

4 files changed

+148
-48
lines changed

4 files changed

+148
-48
lines changed

readium/streamer/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ All notable changes to this project will be documented in this file.
1515
### Changed
1616

1717
* Upgraded to Kotlin 1.5.21 and Gradle 7.1.1
18+
* The default EPUB positions service now uses the archive entry length when available. [This is similar to how Adobe RMSDK generates page numbers](https://github.com/readium/architecture/issues/123).
19+
* To use the former strategy, create the `Streamer` with: `Streamer(parsers = listOf(EpubParser(reflowablePositionsStrategy = OriginalLength(pageLength = 1024))))`
1820

1921
### Fixed
2022

readium/streamer/r2-streamer/src/main/java/org/readium/r2/streamer/parser/epub/EpubParser.kt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,13 @@ object EPUBConstant {
7878

7979
/**
8080
* Parses a Publication from an EPUB publication.
81+
*
82+
* @param reflowablePositionsStrategy Strategy used to calculate the number of positions in a
83+
* reflowable resource.
8184
*/
82-
class EpubParser : PublicationParser, org.readium.r2.streamer.parser.PublicationParser {
85+
class EpubParser(
86+
private val reflowablePositionsStrategy: EpubPositionsService.ReflowableStrategy = EpubPositionsService.ReflowableStrategy.recommended
87+
) : PublicationParser, org.readium.r2.streamer.parser.PublicationParser {
8388

8489
override suspend fun parse(asset: PublicationAsset, fetcher: Fetcher, warnings: WarningLogger?): Publication.Builder? =
8590
_parse(asset, fetcher, asset.name)
@@ -113,7 +118,7 @@ class EpubParser : PublicationParser, org.readium.r2.streamer.parser.Publication
113118
manifest = manifest,
114119
fetcher = fetcher,
115120
servicesBuilder = Publication.ServicesBuilder(
116-
positions = (EpubPositionsService)::create,
121+
positions = EpubPositionsService.createFactory(reflowablePositionsStrategy),
117122
search = StringSearchService.createDefaultFactory(),
118123
)
119124
)

readium/streamer/r2-streamer/src/main/java/org/readium/r2/streamer/parser/epub/EpubPositionsService.kt

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
/*
2-
* Module: r2-streamer-kotlin
3-
* Developers: Mickaël Menu
4-
*
5-
* Copyright (c) 2020. Readium Foundation. All rights reserved.
6-
* Use of this source code is governed by a BSD-style license which is detailed in the
7-
* LICENSE file present in the project repository where this source code is maintained.
2+
* Copyright 2020 Readium Foundation. All rights reserved.
3+
* Use of this source code is governed by the BSD-style license
4+
* available in the top-level LICENSE file of the project.
85
*/
96

107
package org.readium.r2.streamer.parser.epub
118

129
import org.readium.r2.shared.fetcher.Fetcher
10+
import org.readium.r2.shared.fetcher.Resource
1311
import org.readium.r2.shared.publication.Link
1412
import org.readium.r2.shared.publication.Locator
1513
import org.readium.r2.shared.publication.Publication
14+
import org.readium.r2.shared.publication.archive.archive
1615
import org.readium.r2.shared.publication.encryption.encryption
1716
import org.readium.r2.shared.publication.epub.EpubLayout
1817
import org.readium.r2.shared.publication.epub.layoutOf
@@ -30,17 +29,75 @@ import kotlin.math.ceil
3029
*
3130
* https://github.com/readium/architecture/blob/master/models/locators/best-practices/format.md#epub
3231
* https://github.com/readium/architecture/issues/101
33-
*
34-
* @param reflowablePositionLength Length in bytes of a position in a reflowable resource. This is
35-
* used to split a single reflowable resource into several positions.
3632
*/
37-
internal class EpubPositionsService(
33+
class EpubPositionsService(
3834
private val readingOrder: List<Link>,
3935
private val presentation: Presentation,
4036
private val fetcher: Fetcher,
41-
private val reflowablePositionLength: Long
37+
private val reflowableStrategy: ReflowableStrategy
4238
) : PositionsService {
4339

40+
companion object {
41+
42+
fun createFactory(reflowableStrategy: ReflowableStrategy = ReflowableStrategy.recommended): (Publication.Service.Context) -> EpubPositionsService =
43+
{ context ->
44+
EpubPositionsService(
45+
readingOrder = context.manifest.readingOrder,
46+
presentation = context.manifest.metadata.presentation,
47+
fetcher = context.fetcher,
48+
reflowableStrategy = reflowableStrategy
49+
)
50+
}
51+
}
52+
53+
/**
54+
* Strategy used to calculate the number of positions in a reflowable resource.
55+
*
56+
* Note that a fixed-layout resource always has a single position.
57+
*/
58+
sealed class ReflowableStrategy {
59+
/** Returns the number of positions in the given [resource] according to the strategy. */
60+
abstract suspend fun positionCount(resource: Resource): Int
61+
62+
/**
63+
* Use the original length of each resource (before compression and encryption) and split it
64+
* by the given [pageLength].
65+
*/
66+
data class OriginalLength(val pageLength: Int) : ReflowableStrategy() {
67+
override suspend fun positionCount(resource: Resource): Int {
68+
val length = resource.link().properties.encryption?.originalLength
69+
?: resource.length().getOrNull()
70+
?: 0
71+
return ceil(length.toDouble() / pageLength.toDouble()).toInt()
72+
.coerceAtLeast(1)
73+
}
74+
}
75+
76+
/**
77+
* Use the archive entry length (whether it is compressed or stored) and split it by the
78+
* given [pageLength].
79+
*/
80+
data class ArchiveEntryLength(val pageLength: Int) : ReflowableStrategy() {
81+
override suspend fun positionCount(resource: Resource): Int {
82+
val length = resource.link().properties.archive?.entryLength
83+
?: resource.length().getOrNull()
84+
?: 0
85+
return ceil(length.toDouble() / pageLength.toDouble()).toInt()
86+
.coerceAtLeast(1)
87+
}
88+
}
89+
90+
companion object {
91+
/**
92+
* Recommended historical strategy: archive entry length split by 1024 bytes pages.
93+
*
94+
* This strategy is used by Adobe RMSDK as well.
95+
* See https://github.com/readium/architecture/issues/123
96+
*/
97+
val recommended = ArchiveEntryLength(pageLength = 1024)
98+
}
99+
}
100+
44101
override suspend fun positionsByReadingOrder(): List<List<Locator>> {
45102
if (!::_positions.isInitialized)
46103
_positions = computePositions()
@@ -93,18 +150,13 @@ internal class EpubPositionsService(
93150
)
94151

95152
private suspend fun createReflowable(link: Link, startPosition: Int, fetcher: Fetcher): List<Locator> {
96-
// If the resource is encrypted, we use the `originalLength` declared in `encryption.xml`
97-
// instead of the ZIP entry length.
98-
val length = link.properties.encryption?.originalLength
99-
?: fetcher.get(link).use { it.length().getOrNull() }
100-
?: return emptyList()
101-
102-
val pageCount = ceil(length / reflowablePositionLength.toDouble()).toInt()
103-
.coerceAtLeast(1)
153+
val positionCount = fetcher.get(link).use { resource ->
154+
reflowableStrategy.positionCount(resource)
155+
}
104156

105-
return (1..pageCount).map { position ->
157+
return (1..positionCount).map { position ->
106158
createLocator(link,
107-
progression = (position - 1) / pageCount.toDouble(),
159+
progression = (position - 1) / positionCount.toDouble(),
108160
position = startPosition + position
109161
)
110162
}
@@ -119,17 +171,4 @@ internal class EpubPositionsService(
119171
position = position
120172
)
121173
)
122-
123-
companion object {
124-
125-
fun create(context: Publication.Service.Context): EpubPositionsService {
126-
return EpubPositionsService(
127-
readingOrder = context.manifest.readingOrder,
128-
presentation = context.manifest.metadata.presentation,
129-
fetcher = context.fetcher,
130-
reflowablePositionLength = 1024L
131-
)
132-
}
133-
134-
}
135174
}

readium/streamer/r2-streamer/src/test/java/org/readium/r2/streamer/parser/epub/EpubPositionsServiceTest.kt

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ class EpubPositionsServiceTest {
189189
Pair(51L, Link(href = "chap4")),
190190
Pair(120L, Link(href = "chap5"))
191191
),
192-
reflowablePositionLength = 50L
192+
reflowableStrategy = EpubPositionsService.ReflowableStrategy.ArchiveEntryLength(pageLength = 50)
193193
)
194194

195195
assertEquals(
@@ -280,7 +280,7 @@ class EpubPositionsServiceTest {
280280
readingOrder = listOf(
281281
Pair(60L, Link(href = "chap1"))
282282
),
283-
reflowablePositionLength = 50L
283+
reflowableStrategy = EpubPositionsService.ReflowableStrategy.ArchiveEntryLength(pageLength = 50)
284284
)
285285

286286
assertEquals(
@@ -317,7 +317,7 @@ class EpubPositionsServiceTest {
317317
Pair(60L, Link(href = "chap2", properties = createProperties(layout = EpubLayout.REFLOWABLE))),
318318
Pair(20000L, Link(href = "chap3", properties = createProperties(layout = EpubLayout.FIXED)))
319319
),
320-
reflowablePositionLength = 50L
320+
reflowableStrategy = EpubPositionsService.ReflowableStrategy.ArchiveEntryLength(pageLength = 50)
321321
)
322322

323323
assertEquals(
@@ -364,14 +364,63 @@ class EpubPositionsServiceTest {
364364
}
365365

366366
@Test
367-
fun `Use the encrypted {originalLength} if available, instead of the {Container}'s file length`() {
367+
fun `Use the {ArchiveEntryLength} reflowable strategy`() {
368368
val service = createService(
369369
layout = EpubLayout.REFLOWABLE,
370370
readingOrder = listOf(
371-
Pair(60L, Link(href = "chap1", properties = createProperties(encryptedOriginalLength = 20L))),
371+
Pair(60L, Link(href = "chap1", properties = createProperties(archiveEntryLength = 20L))),
372372
Pair(60L, Link(href = "chap2"))
373373
),
374-
reflowablePositionLength = 50L
374+
reflowableStrategy = EpubPositionsService.ReflowableStrategy.ArchiveEntryLength(pageLength = 50)
375+
)
376+
377+
assertEquals(
378+
listOf(
379+
listOf(
380+
Locator(
381+
href = "chap1",
382+
type = "text/html",
383+
locations = Locator.Locations(
384+
progression = 0.0,
385+
position = 1,
386+
totalProgression = 0.0
387+
)
388+
),
389+
),
390+
listOf(
391+
Locator(
392+
href = "chap2",
393+
type = "text/html",
394+
locations = Locator.Locations(
395+
progression = 0.0,
396+
position = 2,
397+
totalProgression = 1.0/3.0
398+
)
399+
),
400+
Locator(
401+
href = "chap2",
402+
type = "text/html",
403+
locations = Locator.Locations(
404+
progression = 0.5,
405+
position = 3,
406+
totalProgression = 2.0/3.0
407+
)
408+
)
409+
)
410+
),
411+
runBlocking { service.positionsByReadingOrder() }
412+
)
413+
}
414+
415+
@Test
416+
fun `Use the {OriginalLength} reflowable strategy`() {
417+
val service = createService(
418+
layout = EpubLayout.REFLOWABLE,
419+
readingOrder = listOf(
420+
Pair(60L, Link(href = "chap1", properties = createProperties(originalLength = 20L))),
421+
Pair(60L, Link(href = "chap2"))
422+
),
423+
reflowableStrategy = EpubPositionsService.ReflowableStrategy.OriginalLength(pageLength = 50)
375424
)
376425

377426
assertEquals(
@@ -411,7 +460,7 @@ class EpubPositionsServiceTest {
411460
private fun createService(
412461
layout: EpubLayout? = null,
413462
readingOrder: List<Pair<Long, Link>>,
414-
reflowablePositionLength: Long = 50L
463+
reflowableStrategy: EpubPositionsService.ReflowableStrategy = EpubPositionsService.ReflowableStrategy.ArchiveEntryLength(pageLength = 50)
415464
) = EpubPositionsService(
416465
readingOrder = readingOrder.map { it.second },
417466
fetcher = object : Fetcher {
@@ -436,21 +485,26 @@ class EpubPositionsServiceTest {
436485
override suspend fun close() {}
437486
},
438487
presentation = Presentation(layout = layout),
439-
reflowablePositionLength = reflowablePositionLength
488+
reflowableStrategy = reflowableStrategy
440489
)
441490

442-
private fun createProperties(layout: EpubLayout? = null, encryptedOriginalLength: Long? = null): Properties {
491+
private fun createProperties(layout: EpubLayout? = null, archiveEntryLength: Long? = null, originalLength: Long? = null): Properties {
443492
val properties = mutableMapOf<String, Any>()
444493
if (layout != null) {
445494
properties["layout"] = layout.value
446495
}
447-
if (encryptedOriginalLength != null) {
496+
if (originalLength != null) {
448497
properties["encrypted"] = mapOf(
449498
"algorithm" to "algo",
450-
"originalLength" to encryptedOriginalLength
499+
"originalLength" to originalLength
500+
)
501+
}
502+
if (archiveEntryLength != null) {
503+
properties["archive"] = mapOf(
504+
"entryLength" to archiveEntryLength,
505+
"isEntryCompressed" to true
451506
)
452507
}
453-
454508
return Properties(otherProperties = properties)
455509
}
456510

0 commit comments

Comments
 (0)