From 567652578bb36db4157e7526a6eb362230093064 Mon Sep 17 00:00:00 2001 From: talosross Date: Thu, 2 May 2024 19:57:20 +0200 Subject: [PATCH] Added support for summarizing documents and images. --- app/build.gradle.kts | 10 +- .../com/talosross/summaryyou/MainActivity.kt | 236 ++++++++++++------ app/src/main/python/youtube.py | 12 +- app/src/main/res/values-cs/strings.xml | 55 ++-- build.gradle.kts | 2 +- 5 files changed, 205 insertions(+), 110 deletions(-) diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 5aa29ad..c23977d 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -12,7 +12,7 @@ android { } defaultConfig { applicationId = "com.talosross.summaryyou" - minSdk = 24 + minSdk = 26 targetSdk = 34 versionCode = 2024040714 versionName = "1.2.1" @@ -88,7 +88,7 @@ chaquopy { dependencies { implementation("androidx.core:core-ktx:1.12.0") implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.2") - implementation("androidx.activity:activity-compose:1.8.1") + implementation("androidx.activity:activity-compose:1.9.0") implementation(platform("androidx.compose:compose-bom:2023.10.01")) implementation("androidx.compose.ui:ui") implementation("androidx.compose.ui:ui-graphics") @@ -109,4 +109,10 @@ dependencies { implementation("io.coil-kt:coil-compose:2.4.0") implementation("io.coil-kt:coil-gif:2.1.0") implementation("com.google.mlkit:text-recognition:16.0.0") + implementation("androidx.activity:activity-ktx:1.9.0") + implementation("androidx.fragment:fragment-ktx:1.4.0") + implementation("org.apache.poi:poi-ooxml:5.2.5") + implementation("org.apache.poi:poi:5.2.2") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.6.4") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-play-services:1.6.4") } \ No newline at end of file diff --git a/app/src/main/java/com/talosross/summaryyou/MainActivity.kt b/app/src/main/java/com/talosross/summaryyou/MainActivity.kt index a4ed168..0237be5 100644 --- a/app/src/main/java/com/talosross/summaryyou/MainActivity.kt +++ b/app/src/main/java/com/talosross/summaryyou/MainActivity.kt @@ -1,15 +1,19 @@ package com.talosross.summaryyou +import android.app.Activity import android.content.ClipData import android.content.ClipboardManager import android.content.Context import android.content.Intent import android.content.SharedPreferences import android.content.res.Resources +import android.graphics.Bitmap +import android.graphics.pdf.PdfRenderer import android.net.Uri import android.os.Build import android.os.Build.VERSION.SDK_INT import android.os.Bundle +import android.provider.OpenableColumns import android.speech.tts.TextToSpeech import android.speech.tts.UtteranceProgressListener import android.util.Log @@ -18,6 +22,7 @@ import androidx.activity.ComponentActivity import androidx.activity.compose.rememberLauncherForActivityResult import androidx.activity.compose.setContent import androidx.activity.result.contract.ActivityResultContracts +import androidx.appcompat.app.AppCompatActivity import androidx.compose.animation.AnimatedVisibility import androidx.compose.foundation.ExperimentalFoundationApi import androidx.compose.foundation.Image @@ -48,6 +53,7 @@ import androidx.compose.material.icons.outlined.Share import androidx.compose.material3.Button import androidx.compose.material3.ButtonDefaults import androidx.compose.material3.Card +import androidx.compose.material3.CircularProgressIndicator import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.FilledTonalButton import androidx.compose.material3.FloatingActionButton @@ -121,9 +127,14 @@ import com.google.mlkit.vision.common.InputImage import com.google.mlkit.vision.text.TextRecognition import com.google.mlkit.vision.text.latin.TextRecognizerOptions import com.talosross.summaryyou.ui.theme.SummaryYouTheme +import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch +import kotlinx.coroutines.tasks.await import kotlinx.coroutines.withContext +import org.apache.poi.xwpf.usermodel.XWPFDocument +import java.io.IOException +import java.io.InputStream import java.util.Locale import java.util.UUID @@ -368,6 +379,7 @@ fun homeScreen(modifier: Modifier = Modifier, navController: NavHostController, var title by remember { mutableStateOf(null) } var author by remember { mutableStateOf(null) } var isLoading by remember { mutableStateOf(false) } // For Loading-Animation + var isExtracting by remember { mutableStateOf(false) } // For Loading-Animation var url by remember { mutableStateOf(initialUrl ?: "") } val scope = rememberCoroutineScope() // Python needs asynchronous call val context = LocalContext.current // Clipboard @@ -380,45 +392,68 @@ fun homeScreen(modifier: Modifier = Modifier, navController: NavHostController, var isError by remember { mutableStateOf(false) } val scrollBehavior = TopAppBarDefaults.enterAlwaysScrollBehavior(rememberTopAppBarState()) val key: String = APIKeyLibrary.getAPIKey() - var selectedFileUri by remember { mutableStateOf(null) } - + var isDocument by remember { mutableStateOf(false) } + var textDocument by remember { mutableStateOf(null) } val clipboardManager = ContextCompat.getSystemService( context, ClipboardManager::class.java ) as ClipboardManager - /* - fun extractTextFromDocument(uri: Uri) { - val inputStream = context.contentResolver.openInputStream(uri) - val fileData = inputStream?.readBytes() - - val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS) - val image = InputImage.fromByteArray(fileData, 0, fileData.size, 0, InputImage.IMAGE_FORMAT_JPEG) - recognizer.process(image) - .addOnSuccessListener { visionText -> - val extractedText = visionText.text - Log.d("TextExtraction", "Extracted Text: $extractedText") - // Verarbeite den extrahierten Text weiter - } - .addOnFailureListener { e -> - Log.e("TextExtraction", "Text extraction failed", e) - // Behandle den Fehler - // ... - } - } - */ + val result = remember { mutableStateOf(null) } val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) { uri -> - selectedFileUri = uri - //selectedFileUri?.let { fileUri -> - // extractTextFromDocument(fileUri) - //} + isExtracting = true + result.value = uri + if (uri != null) { + val mimeType = context.contentResolver.getType(uri) + + scope.launch { + isDocument = true + url = getFileName(context, uri) + textDocument = if (mimeType == "application/pdf") { + extractTextFromPdf(context, uri) + } else if (mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document") { + extractTextFromDocx(context, uri) + } else { + extractTextFromImage(context, uri) + } + isExtracting = false + } + } } - fun openFilePicker() { - launcher.launch(arrayOf("*/*")) + fun summarize() { + focusManager.clearFocus() + isLoading = true // Start Loading-Animation + if(isError){transcriptResult = ""} + isError = false // No error + scope.launch { + if (!isDocument) { + title = getTitel(url) + author = getAuthor(url) + val (result, error) = summarize(url, selectedIndex, viewModel) + transcriptResult = result + isError = error + }else { + title = url + author = "" + var text = "Document: " + textDocument + val (result, error) = summarize(text, selectedIndex, viewModel) + transcriptResult = result + isError = error + } + isLoading = false // Stop Loading-Animation + if(!isError){ + if (isYouTubeLink(url)) { + viewModel.addTextSummary(title, author, transcriptResult, true) // Add to history + }else{ + viewModel.addTextSummary(title, author, transcriptResult, false) // Add to history + } + } + } } + Box { Scaffold( modifier = Modifier @@ -531,6 +566,7 @@ fun homeScreen(modifier: Modifier = Modifier, navController: NavHostController, transcriptResult = null isError = false // No error focusRequester.requestFocus() + isDocument = false } ) { Icon( @@ -546,17 +582,28 @@ fun homeScreen(modifier: Modifier = Modifier, navController: NavHostController, .padding(top = 20.dp) .focusRequester(focusRequester) ) - /* Spacer(modifier = Modifier.width(16.dp)) OutlinedButton( - onClick = { openFilePicker() }, + onClick = { launcher.launch(arrayOf("application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "image/png", "image/jpeg", "image/jpg")) }, modifier = modifier .height(72.dp) .padding(top = 15.dp) ) { - Icon(Icons.Filled.AddCircle, "Floating action button.") + Box { + if (isExtracting) { + CircularProgressIndicator( + modifier = Modifier + .size(24.dp) + .align(Alignment.Center) + ) + } + Icon( + Icons.Filled.AddCircle, + contentDescription = "Floating action button", + modifier = Modifier.align(Alignment.Center) + ) + } } - */ } Box( modifier = if (isError) { @@ -778,28 +825,7 @@ fun homeScreen(modifier: Modifier = Modifier, navController: NavHostController, ) { Button( onClick = { - focusManager.clearFocus() - isLoading = true // Start Loading-Animation - isError = false // No error - scope.launch { - title = getTitel(url) - author = getAuthor(url) - val (result, error) = summarize( - url, - selectedIndex, - viewModel - ) - transcriptResult = result - isError = error - isLoading = false // Stop Loading-Animation - if(!isError){ - if (isYouTubeLink(url)) { - viewModel.addTextSummary(title, author, transcriptResult, true) // Add to history - }else{ - viewModel.addTextSummary(title, author, transcriptResult, false) // Add to history - } - } - } + summarize() }, contentPadding = ButtonDefaults.ButtonWithIconContentPadding ) { @@ -844,25 +870,7 @@ fun homeScreen(modifier: Modifier = Modifier, navController: NavHostController, } FloatingActionButton( onClick = { - focusManager.clearFocus() - isLoading = true // Start Loading-Animation - if(isError){transcriptResult = ""} - isError = false // No error - scope.launch { - title = getTitel(url) - author = getAuthor(url) - val (result, error) = summarize(url, selectedIndex, viewModel) - transcriptResult = result - isError = error - isLoading = false // Stop Loading-Animation - if(!isError){ - if (isYouTubeLink(url)) { - viewModel.addTextSummary(title, author, transcriptResult, true) // Add to history - }else{ - viewModel.addTextSummary(title, author, transcriptResult, false) // Add to history - } - } - } + summarize() }, modifier = modifier.padding(bottom = 60.dp, end = 15.dp) ) { @@ -940,5 +948,87 @@ fun isYouTubeLink(input: String): Boolean { return youtubePattern.matches(input) } +suspend fun extractTextFromPdf(context: Context, selectedPdfUri: Uri): String { + // Open the PDF file + val pdfRenderer = PdfRenderer(context.contentResolver.openFileDescriptor(selectedPdfUri, "r")!!) + // Initialize the text recognizer + val textRecognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS) + // Initialize the StringBuilder + val extractedText = StringBuilder() + + // Iterate through the pages of the PDF + for (pageNumber in 0 until pdfRenderer.pageCount) { + // Get the page as an image + val page = pdfRenderer.openPage(pageNumber) + val pageImage = Bitmap.createBitmap(page.width, page.height, Bitmap.Config.ARGB_8888) + page.render(pageImage, null, null, PdfRenderer.Page.RENDER_MODE_FOR_DISPLAY) + + // Create an input image from the page image + val inputImage = InputImage.fromBitmap(pageImage, 0) + + // Recognize text from the page image + val result = textRecognizer.process(inputImage).await() + extractedText.append(result.text) + + // Close the page + page.close() + } + + // Close the PDF file + pdfRenderer.close() + + // Return the extracted text + return extractedText.toString() +} + + +fun extractTextFromDocx(context: Context, selectedDocxUri: Uri): String { + // Öffne die DOCX-Datei als InputStream + val inputStream: InputStream? = context.contentResolver.openInputStream(selectedDocxUri) + + // Erstelle ein XWPFDocument mit dem InputStream + val doc = XWPFDocument(inputStream) + + // Initialisiere den StringBuilder für den extrahierten Text + val extractedText = StringBuilder() + + // Gehe durch alle Paragraphen des Dokuments + doc.paragraphs.forEach { paragraph -> + // Füge den Text des Paragraphen zum StringBuilder hinzu + extractedText.append(paragraph.text).append("\n") + } + + // Schließe das Dokument und den InputStream + doc.close() + inputStream?.close() + + // Gebe den extrahierten Text zurück + return extractedText.toString() +} + +suspend fun extractTextFromImage(context: Context, selectedImageUri: Uri): String = withContext(Dispatchers.IO) { + // Initialize the text recognizer + val textRecognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS) + + // Load the image from the URI + val inputImage = InputImage.fromFilePath(context, selectedImageUri) + + // Recognize text from the image + val result = textRecognizer.process(inputImage).await() + + // Return the extracted text + result.text +} + +fun getFileName(context: Context, uri: Uri): String { + var name = "" + val cursor = context.contentResolver.query(uri, null, null, null, null) + cursor?.let { + it.moveToFirst() + name = cursor.getString(it.getColumnIndex(OpenableColumns.DISPLAY_NAME)) + it.close() + } + return name +} \ No newline at end of file diff --git a/app/src/main/python/youtube.py b/app/src/main/python/youtube.py index 8350dd6..7bac571 100644 --- a/app/src/main/python/youtube.py +++ b/app/src/main/python/youtube.py @@ -149,10 +149,10 @@ def generate_summary(text: str, length: int, type: str, language: str, title: st # Prompts promptVideo0 = f"You will be provided with a transcript of the video{title}, and your task is to generate a very short, concise summary with a maximum of 20 words of the transcript using only 3 bullet points introduced by dashes. Every bullet point should be a maximum of 5 words, start with a hyphen and not be a full sentences. Please answer in {language}!!!!!!!!!!" #Done promptVideo1 = f"You will be provided with a transcript of the video{title}, and your task is to generate a very short, concise and compact summary with a maximum of 40 words of the transcript. If it includes a conclusion or key takeaway, make sure to include that in the end. Your answer should be in {language}!!!!!!!!!!!!1" #Done - just working with "1" - promptVideo3 = f"You will be provided with a transcript of the video with{title}, and your task is to generate a compact summary of the transcript in 130 words. If it includes a conclusion or key takeaway, make sure to include that in the end. Don't use the prefix 'summary' or 'conclusion'. Your answer should be in {language}!!!!!!!!!!!!1" #Done + promptVideo3 = f"You will be provided with a transcript of the video with{title}, and your task is to generate a summary of the transcript in 130 words. If it includes a conclusion or key takeaway, make sure to include that in the end. Don't use the prefix 'summary' or 'conclusion'. Your answer should be in {language}!!!!!!!!!!!!1" #Done promptArticle0 = f"You will be provided with the article{title}, and your task is to summarize it in 3 very short and concise bullet points. Every bullet point should be a maximum of 5 words, start with a hyphen and not be a full sentences. Summarize it in {language}!" #Done in German promptArticle1 = f"You will be provided with the article{title}, and your task is to generate a very short, concise and compact summary with a maximum of 50 words of the text. If it includes a conclusion or key takeaway, make sure to include that in the end. Summarize it in {language}!" #Done in German - promptArticle3 = f"You will be provided with the article{title}, and your task is to generate a compact summary of the text in {language}. If it includes a conclusion or key takeaway, make sure to include that in the end." #Done in German + promptArticle3 = f"You will be provided with the article{title}, and your task is to generate a summary of the text in {language}. If it includes a conclusion or key takeaway, make sure to include that in the end." #Done in German promptText0 = f"You will be provided with a text and your task is to generate a very short, concise summary with a maximum of 20 word of the text in {language} using only 3 bullet points." promptText1 = f"You will be provided with a text and your task is to generate a very short, concise summary with a maximum of 60 words of the text in {language}. If it includes a conclusion or key takeaway, make sure to include that in the end." promptText3 = f"You will be provided with a text and your task is to generate a summary of the text in {language}. If it includes a conclusion or key takeaway, make sure to include that in the end." @@ -177,7 +177,7 @@ def generate_summary(text: str, length: int, type: str, language: str, title: st max_tokens = 200 else: instructions = promptVideo3 - max_tokens = 400 + max_tokens = 600 elif type == "article": if language == "the same language as the ": language = language + "article" @@ -189,7 +189,7 @@ def generate_summary(text: str, length: int, type: str, language: str, title: st max_tokens = 200 else: instructions = promptArticle3 - max_tokens = 400 + max_tokens = 600 elif type == "text": if language == "the same language as the ": language = language + "text" @@ -201,7 +201,7 @@ def generate_summary(text: str, length: int, type: str, language: str, title: st max_tokens = 200 else: instructions = promptText3 - max_tokens = 400 + max_tokens = 600 elif type == "document": if language == "the same language as the ": language = language + "document" @@ -213,7 +213,7 @@ def generate_summary(text: str, length: int, type: str, language: str, title: st max_tokens = 200 else: instructions = promptDocument3 - max_tokens = 400 + max_tokens = 600 try: if model == "OpenAI" or model == "Groq": diff --git a/app/src/main/res/values-cs/strings.xml b/app/src/main/res/values-cs/strings.xml index bcd73f9..2651e73 100644 --- a/app/src/main/res/values-cs/strings.xml +++ b/app/src/main/res/values-cs/strings.xml @@ -1,39 +1,38 @@ + Summary You Krátké Střední Dlouhé Nastavení Historie Regenerovat - Neplatný odkaz: Shrnutí není možné - Transkript nenalezen - Žádný obsah - Neplatný vstup - Žádné připojení k internetu - Zjištěna paywall, shrnutí není možné. - Chyba serveru, musíte aktualizovat aplikaci. - Nesprávný klíč API - Chyba serveru, musíte zadat klíč API. + Neplatný odkaz – Shrnutí není možné + Nenalezen žádný přepis + Nenalezen žádný obsah/kontext + Nejste připojeni k internetu + Paywall detekován, nelze shrnout + Chyba s OpenAI, je nutné aktualizovat aplikaci. + Neplatný API klíč + Chyba s OpenAI, musíte definovat vlastní API klíč. Poskytnuté médium je příliš dlouhé. - Server je přetížen, zkuste to později. - Původní jazyk - Použít původní jazyk místo vašeho jazyka. + Výchozí jazyk + Použít výchozí jazyk místo vašeho jazyka. Jazyk - Vyberte jazyk + Zvolte jazyk: Otevřený zdrojový kód - Náš repozitář na GitHubu - Vytvořeno s ❤ od talosross - Použít Ultra Dark - Použít Ultra Dark režim s opravdově černou barvou + Náš GitHub repositář + Vyrobeno s ❤ uživatelem talosross + Použít Ultra Tmavý režim + Použít Ultra Tmavý režim s černým pozadím Více řádků pro URL - Použít více řádků v poli URL - Nastavit API klíč - Nastavit svůj API klíč od OpenAI + Použít více řádků v URL poli + Definovat API klíč + Nastavit svůj OpenAI API klíč Design Zrušit OK - Restartovat aplikaci pro použití nových změn. + Restartujte aplikaci pro aplikování změn. Vyžadován restart aplikace Systém Světlý @@ -42,15 +41,15 @@ Hledat Nic nenalezeno Vítejte v Summary You! - Shrňte webové stránky, články a videa na YouTube - Jednoduše vložte odkaz na webovou stránku, článek nebo video na YouTube pro vygenerování shrnutí. - Nebo jednoduše sdílejte odkaz s aplikací pro vygenerování shrnutí. - Přístup ke svým minulým shrnutím v sekci Historie. + Shrňte stránky, články a YouTube videa + Jednoduše vložte odkaz na webovou stránku, článek nebo YouTube video pro vygenerování shrnutí. + Nebo jednoduše sdilejte odkaz s aplikaci pro vygenerování shrnutí. + Zobrazte vaše staré shrnutí v sekci Historie. Dokončit Pokračovat Obchod Google Play Ohodnoťte nás - Návod - Znovu přehrát návod - Zatím jste nic neshrnuli. + Tutoriál + Znovu spustit tutoriál + diff --git a/build.gradle.kts b/build.gradle.kts index 42cfb19..ff07d7d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,6 +1,6 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. plugins { - id("com.android.application") version "8.3.0" apply false + id("com.android.application") version "8.4.0" apply false id("org.jetbrains.kotlin.android") version "1.8.10" apply false id("com.chaquo.python") version "15.0.0" apply false } \ No newline at end of file