From 2096498c4883ae6bf2c79dcc240789df1235e802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C9=91rry=20Shiv=C9=91m?= Date: Mon, 3 Jun 2024 21:27:33 +0530 Subject: [PATCH] fix: epub-parser: Fallback to spine based parsing method if `navMap` is empty (#180) Signed-off-by: starry-shivam --- app/build.gradle | 2 +- .../java/com/starry/myne/MainViewModel.kt | 2 +- .../java/com/starry/myne/epub/EpubParser.kt | 253 +++++++++--------- .../starry/myne/epub/EpubParserException.kt | 8 + .../java/com/starry/myne/epub/EpubUtils.kt | 4 +- .../starry/myne/helpers/Preferencesutils.kt | 55 +++- .../starry/myne/ui/screens/main/MainScreen.kt | 6 - .../reader/activities/ReaderActivity.kt | 3 +- .../java/com/starry/myne/EpubParserTest.kt | 2 +- 9 files changed, 194 insertions(+), 141 deletions(-) create mode 100644 app/src/main/java/com/starry/myne/epub/EpubParserException.kt diff --git a/app/build.gradle b/app/build.gradle index 81816a18..4718ab51 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -147,7 +147,7 @@ dependencies { // Crash Handler. implementation 'cat.ereza:customactivityoncrash:2.4.0' // Kotlin reflect API. - implementation "org.jetbrains.kotlin:kotlin-reflect:1.9.22" + implementation "org.jetbrains.kotlin:kotlin-reflect:1.9.24" // Testing components. testImplementation 'junit:junit:4.13.2' testImplementation "com.google.truth:truth:1.1.3" diff --git a/app/src/main/java/com/starry/myne/MainViewModel.kt b/app/src/main/java/com/starry/myne/MainViewModel.kt index 92bb6090..ee4fb2be 100644 --- a/app/src/main/java/com/starry/myne/MainViewModel.kt +++ b/app/src/main/java/com/starry/myne/MainViewModel.kt @@ -50,7 +50,7 @@ class MainViewModel @Inject constructor(private val welcomeDataStore: WelcomeDat _startDestination.value = Screens.WelcomeScreen.route } - delay(180) + delay(150) _isLoading.value = false } } diff --git a/app/src/main/java/com/starry/myne/epub/EpubParser.kt b/app/src/main/java/com/starry/myne/epub/EpubParser.kt index f06b7534..711e78fb 100644 --- a/app/src/main/java/com/starry/myne/epub/EpubParser.kt +++ b/app/src/main/java/com/starry/myne/epub/EpubParser.kt @@ -38,8 +38,22 @@ import java.util.zip.ZipInputStream */ class EpubParser { + + /** + * Represents an EPUB document. + * + * @param metadata The metadata of the document. + * @param manifest The manifest of the document. + * @param spine The spine of the document. + * @param opfFilePath The file path of the OPF file. + */ + data class EpubDocument( + val metadata: Node, val manifest: Node, val spine: Node, val opfFilePath: String + ) + /** * Represents an item in the EPUB manifest. + * * @param id The ID of the item. * @param absPath The absolute path of the item. * @param mediaType The media type of the item. @@ -51,6 +65,7 @@ class EpubParser { /** * Represents a temporary EPUB chapter. + * * @param url The URL of the chapter. * @param title The title of the chapter. * @param body The body of the chapter. @@ -62,6 +77,7 @@ class EpubParser { /** * Represents an EPUB file. + * * @param absPath The absolute path of the file. * @param data The file data. */ @@ -112,102 +128,111 @@ class EpubParser { inputStream.use { return parseAndCreateEbook(it, shouldUseToc) } } + // Parses the EPUB file and creates an EpubBook object. private suspend fun parseAndCreateEbook( - inputStream: InputStream, - shouldUseToc: Boolean - ): EpubBook = - withContext(Dispatchers.IO) { - val files = getZipFiles(inputStream) - - val container = files["META-INF/container.xml"] - ?: throw Exception("META-INF/container.xml file missing") - - val opfFilePath = parseXMLFile(container.data) - ?.selectFirstTag("rootfile") - ?.getAttributeValue("full-path") - ?.decodedURL ?: throw Exception("Invalid container.xml file") - - val opfFile = files[opfFilePath] ?: throw Exception(".opf file missing") - - val document = parseXMLFile(opfFile.data) - ?: throw Exception(".opf file failed to parse data") - val metadata = document.selectFirstTag("metadata") - ?: throw Exception(".opf file metadata section missing") - val manifest = document.selectFirstTag("manifest") - ?: throw Exception(".opf file manifest section missing") - val spine = document.selectFirstTag("spine") - ?: throw Exception(".opf file spine section missing") - - val metadataTitle = metadata.selectFirstChildTag("dc:title")?.textContent - ?: "Unknown Title" - val metadataAuthor = metadata.selectFirstChildTag("dc:creator")?.textContent - ?: "Unknown Author" - val metadataLanguage = metadata.selectFirstChildTag("dc:language")?.textContent - ?: "en" - - val metadataCoverId = metadata - .selectChildTag("meta") - .find { it.getAttributeValue("name") == "cover" } - ?.getAttributeValue("content") - - val hrefRootPath = File(opfFilePath).parentFile ?: File("") - - val manifestItems = manifest.selectChildTag("item").map { - EpubManifestItem( - id = it.getAttribute("id"), - absPath = it.getAttribute("href").decodedURL.hrefAbsolutePath(hrefRootPath), - mediaType = it.getAttribute("media-type"), - properties = it.getAttribute("properties") - ) - }.associateBy { it.id } - - // Find the table of contents (toc.ncx) file. - val tocFileItem = manifestItems.values.firstOrNull { - it.absPath.endsWith(".ncx", ignoreCase = true) - } + inputStream: InputStream, shouldUseToc: Boolean + ): EpubBook = withContext(Dispatchers.IO) { + val files = getZipFiles(inputStream) + val document = createEpubDocument(files) + + val metadataTitle = + document.metadata.selectFirstChildTag("dc:title")?.textContent ?: "Unknown Title" + val metadataAuthor = + document.metadata.selectFirstChildTag("dc:creator")?.textContent ?: "Unknown Author" + val metadataLanguage = + document.metadata.selectFirstChildTag("dc:language")?.textContent ?: "en" + + val metadataCoverId = document.metadata.selectChildTag("meta") + .find { it.getAttributeValue("name") == "cover" }?.getAttributeValue("content") + + val hrefRootPath = File(document.opfFilePath).parentFile ?: File("") + + val manifestItems = document.manifest.selectChildTag("item").map { + EpubManifestItem( + id = it.getAttribute("id"), + absPath = it.getAttribute("href").decodedURL.hrefAbsolutePath(hrefRootPath), + mediaType = it.getAttribute("media-type"), + properties = it.getAttribute("properties") + ) + }.associateBy { it.id } - /** - * Parse chapters based on the table of contents (toc.ncx) file. - * If it is not present, fallback to parsing with the spine logic. - */ - val chapters = if (tocFileItem != null && shouldUseToc) { - Log.d(TAG, "Parsing based on ToC file") - parseUsingTocFile(tocFileItem, files, hrefRootPath) - } else { - Log.d(TAG, "Parsing based on spine; shouldUseToc: $shouldUseToc") - parseUsingSpine(spine, manifestItems, files) - } + // Find the table of contents (toc.ncx) file. + val tocFileItem = manifestItems.values.firstOrNull { + it.absPath.endsWith(".ncx", ignoreCase = true) + } - Log.d(TAG, "Parsing images") - val images = parseImages(manifestItems, files) - Log.d(TAG, "Parsing cover image") - val coverImage = parseCoverImage(metadataCoverId, manifestItems, files) - - Log.d(TAG, "EpubBook created") - return@withContext EpubBook( - fileName = metadataTitle.asFileName(), - title = metadataTitle, - author = metadataAuthor, - language = metadataLanguage, - coverImage = coverImage, - chapters = chapters, - images = images - ) + // Find the nested navPoints in the table of contents (toc.ncx) file. + val tocNavPoints = tocFileItem?.let { navItem -> + val tocFile = files[navItem.absPath] + val tocDocument = tocFile?.let { parseXMLFile(it.data) } + findNestedNavPoints((tocDocument?.selectFirstTag("navMap") as Element?)) + } + // Determine the method of parsing chapters based on the presence of ToC and + // the shouldUseToc flag. If tocNavPoints is not null or empty and shouldUseToc + // is true, use the ToC file for parsing. Otherwise, parse using the spine. + val chapters = if (!tocNavPoints.isNullOrEmpty() && shouldUseToc) { + Log.d(TAG, "Parsing based on ToC file") + parseUsingTocFile(tocNavPoints, files, hrefRootPath) + } else { + Log.d(TAG, "Parsing based on spine; shouldUseToc: $shouldUseToc") + parseUsingSpine(document.spine, manifestItems, files) } + Log.d(TAG, "Parsing images") + val images = parseImages(manifestItems, files) + Log.d(TAG, "Parsing cover image") + val coverImage = parseCoverImage(metadataCoverId, manifestItems, files) + + Log.d(TAG, "EpubBook created") + return@withContext EpubBook( + fileName = metadataTitle.asFileName(), + title = metadataTitle, + author = metadataAuthor, + language = metadataLanguage, + coverImage = coverImage, + chapters = chapters, + images = images + ) + + } + + // Get all of the files located in the EPUB archive. private suspend fun getZipFiles( inputStream: InputStream ): Map = withContext(Dispatchers.IO) { ZipInputStream(inputStream).let { zipInputStream -> - zipInputStream - .entries() - .filterNot { it.isDirectory } + zipInputStream.entries().filterNot { it.isDirectory } .map { EpubFile(absPath = it.name, data = zipInputStream.readBytes()) } .associateBy { it.absPath } } } + // Create an EpubDocument object from the EPUB files. + @Throws(EpubParserException::class) + private fun createEpubDocument(files: Map): EpubDocument { + val container = files["META-INF/container.xml"] + ?: throw EpubParserException("META-INF/container.xml file missing") + + val opfFilePath = parseXMLFile(container.data)?.selectFirstTag("rootfile") + ?.getAttributeValue("full-path")?.decodedURL + ?: throw EpubParserException("Invalid container.xml file") + + val opfFile = files[opfFilePath] ?: throw EpubParserException(".opf file missing") + + val document = parseXMLFile(opfFile.data) + ?: throw EpubParserException(".opf file failed to parse data") + val metadata = document.selectFirstTag("metadata") + ?: throw EpubParserException(".opf file metadata section missing") + val manifest = document.selectFirstTag("manifest") + ?: throw EpubParserException(".opf file manifest section missing") + val spine = document.selectFirstTag("spine") + ?: throw EpubParserException(".opf file spine section missing") + + return EpubDocument(metadata, manifest, spine, opfFilePath) + } + + // Find all nested navPoints in the table of contents (ToC) file. private fun findNestedNavPoints(element: Element?): List { val navPoints = mutableListOf() if (element == null) { @@ -223,21 +248,14 @@ class EpubParser { return navPoints } + // Parse chapters based on the table of contents (ToC) file. private fun parseUsingTocFile( - tocFileItem: EpubManifestItem, - files: Map, - hrefRootPath: File + tocNavPoints: List, files: Map, hrefRootPath: File ): List { - val tocFile = tocFileItem.let { files[it.absPath] } - val tocDocument = tocFile?.let { parseXMLFile(it.data) } - val tocNavPoints = - findNestedNavPoints((tocDocument?.selectFirstTag("navMap") as Element?)) - // Parse each chapter entry. return tocNavPoints.flatMap { navPoint -> val title = - navPoint.selectFirstChildTag("navLabel") - ?.selectFirstChildTag("text")?.textContent + navPoint.selectFirstChildTag("navLabel")?.selectFirstChildTag("text")?.textContent val chapterSrc = navPoint.selectFirstChildTag("content")?.getAttributeValue("src") ?.hrefAbsolutePath(hrefRootPath) @@ -271,9 +289,7 @@ class EpubParser { if (res != null) { listOf( EpubChapter( - absPath = chapterSrc, - title = title ?: "", - body = res.body + absPath = chapterSrc, title = title ?: "", body = res.body ) ) } else { @@ -285,6 +301,8 @@ class EpubParser { }.filter { it.body.isNotBlank() }.toList() } + // Parse chapters based on the spine of the epub document. + // This is the fallback method if the ToC file is not available or shouldUseToc is false. private fun parseUsingSpine( spine: Node, manifestItems: Map, @@ -292,16 +310,12 @@ class EpubParser { ): List { var chapterIndex = 0 val chapterExtensions = listOf("xhtml", "xml", "html", "htm").map { ".$it" } - return spine - .selectChildTag("itemref") - .mapNotNull { manifestItems[it.getAttribute("idref")] } - .filter { item -> + return spine.selectChildTag("itemref") + .mapNotNull { manifestItems[it.getAttribute("idref")] }.filter { item -> chapterExtensions.any { item.absPath.endsWith(it, ignoreCase = true) } || item.mediaType.startsWith("image/") - } - .mapNotNull { files[it.absPath]?.let { file -> it to file } } - .map { (item, file) -> + }.mapNotNull { files[it.absPath]?.let { file -> it to file } }.map { (item, file) -> val parser = EpubXMLFileParser(file.absPath, file.data, files) if (item.mediaType.startsWith("image/")) { TempEpubChapter( @@ -316,8 +330,7 @@ class EpubParser { // try to merge them and extract the main title of each one. // Is is not perfect but better than nothing. val chapterTitle = res.title ?: if (chapterIndex == 0) "" else null - if (chapterTitle != null) - chapterIndex += 1 + if (chapterTitle != null) chapterIndex += 1 TempEpubChapter( url = file.absPath, @@ -329,36 +342,29 @@ class EpubParser { }.groupBy { it.chapterIndex }.map { (index, list) -> - EpubChapter( - absPath = list.first().url, + EpubChapter(absPath = list.first().url, title = list.first().title ?: "Chapter $index", - body = list.joinToString("\n\n") { it.body } - ) + body = list.joinToString("\n\n") { it.body }) }.filter { it.body.isNotBlank() } } private fun parseImages( - manifestItems: Map, - files: Map + manifestItems: Map, files: Map ): List { val imageExtensions = listOf("png", "gif", "raw", "png", "jpg", "jpeg", "webp", "svg").map { ".$it" } - val unlistedImages = files - .asSequence() - .filter { (_, file) -> - imageExtensions.any { file.absPath.endsWith(it, ignoreCase = true) } - } - .map { (_, file) -> - EpubImage(absPath = file.absPath, image = file.data) - } + val unlistedImages = files.asSequence().filter { (_, file) -> + imageExtensions.any { file.absPath.endsWith(it, ignoreCase = true) } + }.map { (_, file) -> + EpubImage(absPath = file.absPath, image = file.data) + } - val listedImages = manifestItems.asSequence() - .map { it.value } - .filter { it.mediaType.startsWith("image") } - .mapNotNull { files[it.absPath] } - .map { EpubImage(absPath = it.absPath, image = it.data) } + val listedImages = + manifestItems.asSequence().map { it.value }.filter { it.mediaType.startsWith("image") } + .mapNotNull { files[it.absPath] } + .map { EpubImage(absPath = it.absPath, image = it.data) } return (listedImages + unlistedImages).distinctBy { it.absPath }.toList() } @@ -368,8 +374,7 @@ class EpubParser { manifestItems: Map, files: Map ): Bitmap? { - val coverImage = manifestItems[metadataCoverId] - ?.let { files[it.absPath] } + val coverImage = manifestItems[metadataCoverId]?.let { files[it.absPath] } ?.let { EpubImage(absPath = it.absPath, image = it.data) } return if (coverImage?.image != null) { diff --git a/app/src/main/java/com/starry/myne/epub/EpubParserException.kt b/app/src/main/java/com/starry/myne/epub/EpubParserException.kt new file mode 100644 index 00000000..22c1d24a --- /dev/null +++ b/app/src/main/java/com/starry/myne/epub/EpubParserException.kt @@ -0,0 +1,8 @@ +package com.starry.myne.epub + +/** + * Exception thrown when an error occurs while parsing an EPUB file. + * + * @param message The error message. + */ +class EpubParserException(message: String) : Exception(message) \ No newline at end of file diff --git a/app/src/main/java/com/starry/myne/epub/EpubUtils.kt b/app/src/main/java/com/starry/myne/epub/EpubUtils.kt index 6718d648..5e01d332 100644 --- a/app/src/main/java/com/starry/myne/epub/EpubUtils.kt +++ b/app/src/main/java/com/starry/myne/epub/EpubUtils.kt @@ -60,8 +60,8 @@ fun Node.getAttributeValue(attribute: String): String? = val NodeList.elements get() = (0..length).asSequence().mapNotNull { item(it) as? Element } val Node.childElements get() = childNodes.elements -fun JsoupNode.nextSiblingNodes(): List { - val siblings = mutableListOf() +fun JsoupNode.nextSiblingNodes(): List { + val siblings = mutableListOf() var nextSibling = nextSibling() while (nextSibling != null) { siblings.add(nextSibling) diff --git a/app/src/main/java/com/starry/myne/helpers/Preferencesutils.kt b/app/src/main/java/com/starry/myne/helpers/Preferencesutils.kt index 8c4e0d65..b17d5031 100644 --- a/app/src/main/java/com/starry/myne/helpers/Preferencesutils.kt +++ b/app/src/main/java/com/starry/myne/helpers/Preferencesutils.kt @@ -19,6 +19,11 @@ package com.starry.myne.helpers import android.content.Context import android.content.SharedPreferences +/** + * A helper class to manage shared preferences + * + * @param context The context to use + */ class PreferenceUtil(context: Context) { companion object { @@ -38,40 +43,80 @@ class PreferenceUtil(context: Context) { const val LIBRARY_SWIPE_TOOLTIP_BOOL = "show_library_tooltip" } - private var prefs: SharedPreferences - - init { - prefs = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) - } + // + private var prefs: SharedPreferences = + context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) + /** + * Check if a key exists in the preferences + * + * @param key The key to check + * @return True if the key exists, false otherwise + */ fun keyExists(key: String): Boolean = prefs.contains(key) + /** + * Insert a string value into the preferences + * + * @param key The key to insert the value into + * @param value The value to insert + */ fun putString(key: String, value: String) { val prefsEditor = prefs.edit() prefsEditor.putString(key, value) prefsEditor.apply() } + /** + * Insert an integer value into the preferences + * + * @param key The key to insert the value into + * @param value The value to insert + */ fun putInt(key: String, value: Int) { val prefsEditor = prefs.edit() prefsEditor.putInt(key, value) prefsEditor.apply() } + /** + * Insert a boolean value into the preferences + * + * @param key The key to insert the value into + * @param value The value to insert + */ fun putBoolean(key: String, value: Boolean) { val prefsEditor = prefs.edit() prefsEditor.putBoolean(key, value) prefsEditor.apply() } + /** + * Get a string value from the preferences + * + * @param key The key to get the value from + * @param defValue The default value to return if the key does not exist + */ fun getString(key: String, defValue: String): String? { return prefs.getString(key, defValue) } + /** + * Get an integer value from the preferences + * + * @param key The key to get the value from + * @param defValue The default value to return if the key does not exist + */ fun getInt(key: String, defValue: Int): Int { return prefs.getInt(key, defValue) } + /** + * Get a boolean value from the preferences + * + * @param key The key to get the value from + * @param defValue The default value to return if the key does not exist + */ fun getBoolean(key: String, defValue: Boolean): Boolean { return prefs.getBoolean(key, defValue) } diff --git a/app/src/main/java/com/starry/myne/ui/screens/main/MainScreen.kt b/app/src/main/java/com/starry/myne/ui/screens/main/MainScreen.kt index 4bfe4207..1f63a825 100644 --- a/app/src/main/java/com/starry/myne/ui/screens/main/MainScreen.kt +++ b/app/src/main/java/com/starry/myne/ui/screens/main/MainScreen.kt @@ -17,15 +17,11 @@ package com.starry.myne.ui.screens.main import android.annotation.SuppressLint -import androidx.activity.SystemBarStyle -import androidx.activity.enableEdgeToEdge -import androidx.appcompat.app.AppCompatActivity import androidx.compose.animation.AnimatedVisibility import androidx.compose.animation.slideInVertically import androidx.compose.animation.slideOutVertically import androidx.compose.foundation.background import androidx.compose.foundation.clickable -import androidx.compose.foundation.isSystemInDarkTheme import androidx.compose.foundation.layout.Arrangement import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Row @@ -41,7 +37,6 @@ import androidx.compose.material3.Scaffold import androidx.compose.material3.Text import androidx.compose.material3.surfaceColorAtElevation import androidx.compose.runtime.Composable -import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.getValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier @@ -59,7 +54,6 @@ import androidx.navigation.compose.rememberNavController import com.starry.myne.helpers.NetworkObserver import com.starry.myne.ui.navigation.BottomBarScreen import com.starry.myne.ui.navigation.NavGraph -import com.starry.myne.ui.screens.settings.viewmodels.ThemeMode import com.starry.myne.ui.theme.figeronaFont /** diff --git a/app/src/main/java/com/starry/myne/ui/screens/reader/activities/ReaderActivity.kt b/app/src/main/java/com/starry/myne/ui/screens/reader/activities/ReaderActivity.kt index c3e52e72..6f76d7f6 100644 --- a/app/src/main/java/com/starry/myne/ui/screens/reader/activities/ReaderActivity.kt +++ b/app/src/main/java/com/starry/myne/ui/screens/reader/activities/ReaderActivity.kt @@ -75,7 +75,8 @@ class ReaderActivity : AppCompatActivity() { // Fullscreen mode that ignores any cutout, notch etc. WindowCompat.setDecorFitsSystemWindows(window, false) val controller = WindowInsetsControllerCompat(window, window.decorView) - controller.systemBarsBehavior = WindowInsetsControllerCompat.BEHAVIOR_SHOW_TRANSIENT_BARS_BY_SWIPE + controller.systemBarsBehavior = + WindowInsetsControllerCompat.BEHAVIOR_SHOW_TRANSIENT_BARS_BY_SWIPE controller.hide(WindowInsetsCompat.Type.systemBars()) controller.hide(WindowInsetsCompat.Type.displayCutout()) diff --git a/app/src/test/java/com/starry/myne/EpubParserTest.kt b/app/src/test/java/com/starry/myne/EpubParserTest.kt index 95ca8b88..58843f5d 100644 --- a/app/src/test/java/com/starry/myne/EpubParserTest.kt +++ b/app/src/test/java/com/starry/myne/EpubParserTest.kt @@ -33,7 +33,7 @@ import java.util.zip.ZipOutputStream import kotlin.random.Random @RunWith(RobolectricTestRunner::class) -@Config(manifest=Config.NONE, sdk=[33]) // Run on Android 13 +@Config(manifest = Config.NONE, sdk = [33]) // Run on Android 13 class EpubParserTest { private lateinit var epubParser: EpubParser