From b7a64c57e4f1758d65193aafff943b1432ccca68 Mon Sep 17 00:00:00 2001 From: Louis Beaumont Date: Thu, 9 Jan 2025 16:47:35 -0800 Subject: [PATCH] docs: update all and cleanup warnings --- content/docs/pages/docs/_meta.js | 5 +- content/docs/pages/docs/api-reference.mdx | 192 ++++++++++++++++- content/docs/pages/docs/examples.mdx | 220 -------------------- content/docs/pages/docs/faq.mdx | 45 +++- content/docs/pages/docs/getting-started.mdx | 13 +- content/docs/pages/docs/integrations.mdx | 76 ------- content/docs/pages/docs/plugins.mdx | 167 +-------------- content/docs/pages/docs/server.mdx | 20 -- content/docs/pages/index.mdx | 17 +- screenpipe-audio/src/stt.rs | 2 +- screenpipe-vision/src/core.rs | 4 +- 11 files changed, 265 insertions(+), 496 deletions(-) delete mode 100644 content/docs/pages/docs/examples.mdx delete mode 100644 content/docs/pages/docs/integrations.mdx delete mode 100644 content/docs/pages/docs/server.mdx diff --git a/content/docs/pages/docs/_meta.js b/content/docs/pages/docs/_meta.js index ae6c39d95..a1f679147 100644 --- a/content/docs/pages/docs/_meta.js +++ b/content/docs/pages/docs/_meta.js @@ -1,11 +1,8 @@ export default { "getting-started": "getting started", "plugins": "plugins (pipes)", - "examples": "examples & use cases", "architecture": "architecture overview", "api-reference": "api reference", "contributing": "contributing", - "integrations": "integrations", - "server": "use screenpipe on a server", "faq": "faq" -}; \ No newline at end of file +}; diff --git a/content/docs/pages/docs/api-reference.mdx b/content/docs/pages/docs/api-reference.mdx index d0ed4cef6..7c632fca7 100644 --- a/content/docs/pages/docs/api-reference.mdx +++ b/content/docs/pages/docs/api-reference.mdx @@ -351,4 +351,194 @@ curl "http://localhost:3030/stream/frames?start_time=2024-03-10T12:00:00Z&end_ti } ``` - \ No newline at end of file + + + + +### experimental api + +#### merge frames +- **endpoint**: `/experimental/frames/merge` +- **method**: `post` +- **description**: merges multiple video frames into a single video + +##### request body: +```json +{ + "video_paths": ["path/to/video1.mp4", "path/to/video2.mp4"] +} +``` + +##### sample response: +```json +{ + "video_path": "/path/to/merged/video.mp4" +} +``` + +#### validate media +- **endpoint**: `/experimental/validate/media` +- **method**: `get` +- **description**: validates media file format and integrity + +##### query parameters: +- `file_path` (string): path to media file to validate + +##### sample response: +```json +{ + "status": "valid media file" +} +``` + +#### input control (experimental feature) +- **endpoint**: `/experimental/input_control` +- **method**: `post` +- **description**: control keyboard and mouse input programmatically + +##### request body: +```json +{ + "action": { + "type": "KeyPress", + "data": "enter" + } +} +``` +or +```json +{ + "action": { + "type": "MouseMove", + "data": { + "x": 100, + "y": 200 + } + } +} +``` +or +```json +{ + "action": { + "type": "MouseClick", + "data": "left" + } +} +``` +or +```json +{ + "action": { + "type": "WriteText", + "data": "hello world" + } +} +``` + + + + + +### database api + +#### execute raw sql +- **endpoint**: `/raw_sql` +- **method**: `post` +- **description**: execute raw SQL queries against the database (use with caution) + +##### request body: +```json +{ + "query": "SELECT * FROM frames LIMIT 5" +} +``` + +#### add content +- **endpoint**: `/add` +- **method**: `post` +- **description**: add new content (frames or transcriptions) to the database + +##### request body: +```json +{ + "device_name": "device1", + "content": { + "content_type": "frames", + "data": { + "frames": [ + { + "file_path": "/path/to/frame.png", + "timestamp": "2024-03-10T12:00:00Z", + "app_name": "chrome", + "window_name": "meeting", + "ocr_results": [ + { + "text": "detected text", + "text_json": "{\"additional\": \"metadata\"}", + "ocr_engine": "tesseract", + "focused": true + } + ], + "tags": ["meeting", "important"] + } + ] + } + } +} +``` +or +```json +{ + "device_name": "microphone1", + "content": { + "content_type": "transcription", + "data": { + "transcription": "transcribed text", + "transcription_engine": "whisper" + } + } +} +``` + + + + + +### realtime streaming api + +#### transcription stream +- **endpoint**: `/sse/transcriptions` +- **method**: `get` +- **description**: stream real-time transcriptions using server-sent events (SSE) + +##### sample event data: +```json +{ + "transcription": "live transcribed text", + "timestamp": "2024-03-10T12:00:00Z", + "device": "microphone1" +} +``` + +#### vision stream +- **endpoint**: `/sse/vision` +- **method**: `get` +- **description**: stream real-time vision events using server-sent events (SSE) + +##### query parameters: +- `images` (bool, optional): include base64 encoded images in events + +##### sample event data: +```json +{ + "type": "Ocr", + "text": "detected text", + "timestamp": "2024-03-10T12:00:00Z", + "image": "base64_encoded_image_data", + "app_name": "chrome", + "window_name": "meeting" +} +``` + + diff --git a/content/docs/pages/docs/examples.mdx b/content/docs/pages/docs/examples.mdx deleted file mode 100644 index 2f3393f9f..000000000 --- a/content/docs/pages/docs/examples.mdx +++ /dev/null @@ -1,220 +0,0 @@ -import MotionDiv from '../../components/motion-div' - -### use case examples - - - -screenpipe is a versatile tool that can be applied to a wide range of use cases, from personal productivity to business automation. below are some example workflows that highlight its functionality. - - - - - -### 1. ai-powered crm enrichment - - - -### scenario: -you want to automatically enrich your crm (customer relationship management) system with data from your screen interactions. - -### workflow: -1. **screen capture**: screenpipe continuously monitors your screen activity. -2. **ai analysis**: an ai model analyzes the captured screen data in real-time. -3. **crm enrichment**: based on the ai analysis, relevant information is automatically added to your crm (e.g., salesforce, notion). -4. **data organization**: the ai categorizes and organizes the information within your crm for easy access and retrieval. - -### benefits: -- save time on manual data entry. -- ensure your crm is always up-to-date with the latest customer interactions. -- improve the accuracy and completeness of your customer data. - - - - - -### 2. ai-powered daily summary - - - -### scenario: -you want an ai assistant to monitor your screen activity and provide a daily summary of your work and activities. - -### workflow: -1. **24/7 screen monitoring**: screenpipe captures your screen activity throughout the day. -2. **ai analysis**: llama 3.2 (or another ai model) processes and analyzes the captured screen data. -3. **summary generation**: the ai generates a comprehensive summary of your daily activities. -4. **email delivery**: an automated email containing the summary is sent to you at the end of each day. - -### benefits: -- gain insights into your daily productivity and time allocation. -- easily track and review your work progress. -- identify areas for improvement in your daily routine. - - - - - -### 3. private transcriptions & meeting summaries - - - -### scenario: -you need to transcribe and summarize meetings while maintaining privacy and using local resources. - -### workflow: -1. **meeting capture**: screenpipe records the audio and video of your meetings. -2. **local transcription**: solar pro 22b (running on ollama) transcribes the meeting audio locally on your macbook. -3. **summary generation**: the ai model generates a concise summary of the meeting content. -4. **secure storage**: transcriptions and summaries are stored locally, ensuring privacy and data security. - -### benefits: -- maintain privacy by processing all data locally. -- generate accurate transcriptions and summaries without relying on cloud services. -- save time by automating the meeting documentation process. - - - - - -### 4. meeting transcriptions and summaries - - - -### scenario: - -you regularly attend virtual meetings and want to transcribe and summarize them automatically. - -### workflow: - -1. **capture the meeting**: screenpipe records both your screen and audio during the meeting. -2. **transcribe the audio**: a pipe automatically runs speech-to-text on the recorded audio to generate a transcription. -3. **summarize the meeting**: another pipe processes the transcription using an ai model (like openai or ollama) to summarize the key points. -4. **export to notes**: the summarized notes are automatically exported to a note-taking app like notion or evernote for future reference. - -### benefits: - -- save time by not manually transcribing and summarizing meetings. -- keep an organized archive of all meetings. -- easily search for specific points from past meetings. - - - - - -### 5. productivity tracking - - - -### scenario: - -you want to analyze how much time you spend on specific tasks or applications throughout the day to improve productivity. - -### workflow: - -1. **capture screen activity**: screenpipe captures your screen activity 24/7, logging which applications you're using and what content is displayed. -2. **tag tasks**: a custom pipe tags different activities based on the application and the content on the screen (e.g., work-related apps like excel, emails, etc.). -3. **generate reports**: at the end of the day or week, the system generates a detailed report showing how much time you spent on different tasks or applications. -4. **visualize data**: the report can be visualized through charts or exported to a tool like excel or google sheets for further analysis. - -### benefits: - -- understand where your time goes. -- identify unproductive patterns and improve focus. -- set personal productivity goals and track progress. - - - - - -### 6. automatic documentation creation - -### scenario: - -you work in software development and need to maintain documentation of the development process and decisions made during coding sessions. - -### workflow: - -1. **capture coding sessions**: screenpipe captures your screen as you code, including any terminal commands or browser searches. -2. **tag key moments**: a custom pipe tags important activities such as code commits, test runs, or bug fixes. -3. **generate documentation**: another pipe compiles this information and generates a detailed development log, including screenshots, code snippets, and command outputs. -4. **export to documentation platform**: the generated documentation is automatically exported to a platform like confluence or github wiki. - -### benefits: - -- automatically document your development process. -- ensure nothing gets missed during coding or debugging. -- simplify the process of creating detailed technical reports. - - - - - -### 7. compliance and security monitoring - - -### scenario: - -your company needs to ensure that employees are following compliance protocols and that potential security risks are being monitored. - -### workflow: - -1. **capture screen activity**: screenpipe records employee screen activity, focusing on specific applications or workflows that require monitoring (e.g., sensitive data handling). -2. **flag security risks**: a pipe automatically scans the captured data for potential security risks, such as the display of sensitive data or the use of unauthorized software. -3. **generate alerts**: if any compliance violations or security risks are detected, the system generates alerts that are sent to the it or compliance team. -4. **create audit reports**: screenpipe generates audit reports showing compliance with security policies over time. - -### benefits: - -- ensure that employees adhere to compliance protocols. -- detect potential security threats in real-time. -- generate detailed audit logs to support regulatory requirements. - - - - - -### 8. educational material organization - - -### scenario: - -you're a student or researcher who regularly watches video lectures or webinars and wants to easily organize and reference the content. - -### workflow: - -1. **capture screen and audio**: screenpipe captures both the video and audio from the lecture or webinar. -2. **transcribe and tag**: a custom pipe transcribes the audio and tags important sections, such as key concepts or references to additional resources. -3. **summarize content**: another pipe summarizes the lecture, highlighting the main points and key takeaways. -4. **organize and search**: all captured and summarized content is saved in a searchable archive, allowing you to quickly reference specific topics or concepts from previous lectures. - -### benefits: - -- quickly reference important points from past lectures. -- organize educational material in a way that is easy to search and review. -- automatically create summaries to save time when studying. - - - - - -### 9. real-time collaboration and annotation - - -### scenario: - -you work in a remote team and want to collaborate on screen captures and videos in real-time. - -### workflow: - -1. **capture and share screen**: screenpipe records your screen activity and allows you to share the captured data with teammates in real-time. -2. **annotate screen captures**: a custom pipe allows team members to annotate shared screen captures, making comments or suggestions directly on the recorded content. -3. **collaborate on edits**: the annotated content is shared back, and team members can collaborate on the next steps (e.g., editing a document, fixing a bug, etc.). -4. **track changes**: all changes and annotations are tracked, ensuring that everyone is on the same page. - -### benefits: - -- improve collaboration with real-time screen sharing and annotation. -- keep a detailed record of team discussions and decisions. -- increase efficiency by sharing visual content with annotations. - - diff --git a/content/docs/pages/docs/faq.mdx b/content/docs/pages/docs/faq.mdx index 0f7670be3..fd7b6ffc9 100644 --- a/content/docs/pages/docs/faq.mdx +++ b/content/docs/pages/docs/faq.mdx @@ -1,11 +1,9 @@
- What's the difference with adept.ai and rewind.ai? - - - adept.ai is a closed product, focused on automation while we are open and focused on enabling tooling & infra for a wide range of applications like adept - - rewind.ai is a closed product, focused on a single use case (they only focus on meetings now), not customisable, your data is owned by them, and not extendable by developers + What's the difference with rewind.ai? + screenpipe is for developers to build apps like rewind.ai.
@@ -35,3 +33,42 @@ - etc. - We're constantly exploring new use cases and welcome community input!
+ +
+ Can I run screenpipe on remote/virtual machines? + + - yes! screenpipe works seamlessly with remote desktop solutions + - microsoft remote desktop: works out of the box, capturing both screen and audio + - other remote solutions: generally compatible as long as they support audio/video forwarding + - check our [server setup guide](/docs/server) for detailed instructions +
+ +
+ How resource-intensive is screenpipe? + + - designed to be lightweight and efficient + - typical cpu usage: 1-2% on modern machines + - memory footprint: ~100-200mb baseline + - storage usage varies based on your capture settings and activity + - optimized for 24/7 operation +
+ +
+ Can I build custom plugins/pipes? + + - yes! screenpipe is designed to be extensible + - write plugins in typescript + bun + - full access to captured screen/audio data + - integrate with any ai model or external service + - check our [plugin development guide](/docs/plugins) for details +
+ +
+ Is screenpipe open source? + + - yes! core functionality is open source under MIT license + - built with rust + tauri for the core + - plugins system in typescript + bun + - community contributions welcome + - find us on [github](https://github.com/screenpipe) +
diff --git a/content/docs/pages/docs/getting-started.mdx b/content/docs/pages/docs/getting-started.mdx index 40f0dda73..1e158f7d0 100644 --- a/content/docs/pages/docs/getting-started.mdx +++ b/content/docs/pages/docs/getting-started.mdx @@ -29,31 +29,26 @@ for non technical users or those who want to get started quickly, we recommend g -then open the simple timeline UI: +then stream the OCR data (requires `jq`): ```bash copy - git clone https://github.com/mediar-ai/screenpipe - open screenpipe-server/examples/timeline_ui_simple/index.html + curl -N "http://localhost:3030/sse/vision" | while read -r line; do echo $line | sed 's/^data: //' | jq; done ``` ```bash copy - git clone https://github.com/mediar-ai/screenpipe - xdg-open screenpipe-server/examples/timeline_ui_simple/index.html + curl -N "http://localhost:3030/sse/audio" | while read -r line; do echo $line | sed 's/^data: //' | jq; done ``` ```powershell copy - git clone https://github.com/mediar-ai/screenpipe - start screenpipe-server/examples/timeline_ui_simple/index.html + curl -N "http://localhost:3030/sse/audio" | while read -r line; do echo $line | sed 's/^data: //' | jq; done ``` -this will open a rewind.ai alike timeline powered by a single html file :) that you can tweak around! - now download the [desktop app](https://screenpi.pe) and use pipes (plugins) to add more features! diff --git a/content/docs/pages/docs/integrations.mdx b/content/docs/pages/docs/integrations.mdx deleted file mode 100644 index b357f5218..000000000 --- a/content/docs/pages/docs/integrations.mdx +++ /dev/null @@ -1,76 +0,0 @@ - - - -# integrations - -### mcp / anthropic app - -https://github.com/mediar-ai/screenpipe/blob/main/screenpipe-integrations/screenpipe-mcp - -### file organizer 2000 (obsidian plugin) - -https://github.com/different-ai/file-organizer-2000 - -### notion - -https://github.com/mediar-ai/screenpipe/tree/main/pipes/pipe-notion-table-logs - -### ollama - -screenpipe embeds ollama in the app, so you don't have to run it yourself (you can still do) - -just go to settings and enable it - -### openwebui - -https://github.com/TanGentleman/screenpipe-python-client/blob/main/INSTRUCTIONS.md - -### Omi AI Friend wearable - -looking for help to make it stable! - -https://github.com/mediar-ai/screenpipe/issues/249 - -### iphone screen mirroring - -make sure to update your iOS versions, then search for "iphone mirroring" in macos and follow the instructions - -screenpipe will record the screen so you can search or do anything with it on the screenpipe app or using the screenpipe api - -### iphone microphone recording - -just enable bluetooth and select iphone in audio device list - - -#### experimental integrations - -we have a bunch of experimental integrations available here: - -https://github.com/mediar-ai/screenpipe/tree/main/pipes - -keep in mind it's quite easy to integrate with screenpipe: -1. get some data from screenpipe API -2. send to a LLM -3. call another API - -the best code will autonomously handle these steps by the LLM (just provide tools and infinite round trips until completion of goal) - - -### linear - -### google sheets - -### reddit - -### obsidian - -### anthropic computer use - -### exa-ai - -### gmail - -### twitter - - - diff --git a/content/docs/pages/docs/plugins.mdx b/content/docs/pages/docs/plugins.mdx index 2ca7aa0da..03d6cd82c 100644 --- a/content/docs/pages/docs/plugins.mdx +++ b/content/docs/pages/docs/plugins.mdx @@ -6,35 +6,34 @@ import { Tabs, Tab } from 'nextra/components' screenpipe is built for extensibility through plugins that interact with captured screen and audio data. whether you need to tag activities, generate summaries, or send data to third-party services, plugins let you build powerful workflows. - plugins come in two flavors: - - **pipes**: native plugins that run within screenpipe's sandboxed environment. written in typescript/javascript. pipes can be either: - - UI-based: desktop native apps with NextJS for user interaction (e.g. think of screenpipe as a local Vercel powered by your 24/7 context) - - Headless (deprecated): running in the background without a visual interface with cron, etc. + plugins run within screenpipe's sandboxed environment. written in typescript/javascript and nextjs. ### why build pipes? 🚀 -think of pipes like a local Zapier which costs 10x & 10x less friction - no auth needed, with full context of your screen and audio data. +agents are quite dumb without context. screenpipe is the bridge between human context and AI understanding. #### for developers -- **zero infrastructure**: run locally, no servers or complex setups, access to your auth tokens (unlike Zapier) -- **typescript + bun**: blazing fast development +- **zero infrastructure**: run locally, no servers or complex setups, access to your auth tokens (unlike cloud agents) +- **typescript + rust + bun**: blazing fast environment - **full context**: rich OCR, desktop scrapping, keyboard/mouse, and audio transcription APIs -- **bounty program**: earn $100+ for building pipes or promoting screenpipe - **open source**: contribute to augmenting collective human intelligence - **monetization ready**: Stripe integration to monetize your pipes - **no lock-in**: ship a startup in 1h in screenpipe's store and export it later as a desktop native app using screenpipe as a library (we will even help you with that) #### killer features -- **ai flexibility**: OpenAI, local LLMs (ollama), or any provider +- **ai flexibility**: OpenAI, local LLMs (ollama), Anthropic, Gemini, etc. - **rich APIs**: - - `pipe.inbox` for AI/human-in-the-loop messaging - - `pipe.scheduler` for cron jobs - `pipe.input` for keyboard/mouse control - `pipe.queryScreenpipe` for context + - `pipe.streamTranscriptions` for audio transcription streaming + - `pipe.streamVision` for OCR/Accessibility streaming + - `pipe.settings` for app settings - **sandboxed & cross-platform**: safe execution on all OS - **real-time**: process screen & audio as it happens +- **cron jobs**: schedule your pipes to run at specific times, same API as Vercel +- **nextjs**: build desktop native apps with NextJS - no native hell @@ -88,7 +87,7 @@ the CLI will guide you through setting up your pipe | **speaker identification** | automatically identify and label different speakers using AI | [link](https://github.com/mediar-ai/screenpipe/tree/main/pipes/identify-speakers) | | **obsidian logs** | automate your second brain by logging activities to obsidian | [link](https://github.com/mediar-ai/screenpipe/tree/main/pipes/obsidian) | | **meeting assistant** | organize and summarize meetings with AI - get transcripts and insights | [link](https://github.com/mediar-ai/screenpipe/tree/main/pipes/meeting) | -| **linkedin ai assistant** | automate business development on linkedin | [link](https://github.com/mediar-ai/screenpipe/tree/main/pipes/linkedin_ai_assistant) | +| **linkedin ai agent** | automate business development on linkedin | [link](https://github.com/mediar-ai/screenpipe/tree/main/pipes/linkedin_ai_assistant) | | **loom** | generate looms from your screenpipe data | [link](https://github.com/mediar-ai/screenpipe/tree/main/pipes/pipe-for-loom) | @@ -102,150 +101,6 @@ screenpipe pipe download https://github.com/mediar-ai/screenpipe/tree/main/pipes screenpipe pipe enable pipe-obsidian-time-logs ``` -### pipe configuration - - - -we use `pipe.json` to configure your pipe through UI/CLI: - -```json -{ - "fields": [ - { - "default": 60, - "description": "Interval in seconds to process screen data", - "name": "interval", - "type": "number" - }, - { - "default": "daily", - "description": "Summary frequency: 'daily' or 'hourly:X'", - "name": "summaryFrequency", - "type": "string" - } - ] -} -``` - -this will render in the screenpipe UI. - -### screenpipe-js SDK - -key features: -- `pipe.inbox.send`: AI messages with user confirmation -- `pipe.sendDesktopNotification`: system notifications -- `pipe.queryScreenpipe`: query screen/audio data -- `pipe.input`: programmatic UI control (use your keyboard/mouse) -- `pipe.settings`: get/set app settings (e.g. AI model, port, etc.) -- (experimental) [vercel-like crons](https://vercel.com/docs/cron-jobs/manage-cron-jobs) - just replace `vercel.json` by `pipe.json` (only work for nextjs pipes) -[JS implementation (ask AI)](https://github.com/mediar-ai/screenpipe/blob/main/screenpipe-js/main.ts) -[Rust implementation (ask AI)](https://github.com/mediar-ai/screenpipe/blob/main/screenpipe-core/src/pipes.rs) - -### bounties & monetization - -- send PRs with your pipes, we'll review and merge! -- earn $100+ for new pipes or social media promotion -- early access to pipe store with Stripe integration -- [louis@screenpi.pe](mailto:louis@screenpi.pe) - - -### examples - - - -#### simple hourly summary with ollama - -this example gets the last hour of screen/audio data and generates a summary using your local ollama model: - -```typescript copy -import { generateText } from 'ai'; -import { ollama } from 'ollama-ai-provider'; -import { pipe } from '@screenpipe/sdk'; - -async function generateHourlySummary() { - // get settings & last hour data - const settings = await pipe.settings.getAll(); - const lastHour = await pipe.queryScreenpipe({ - contentType: 'all', - startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(), - limit: 10000 - }); - - // format data for context - const context = lastHour?.data.map(item => { - if (item.type === 'OCR') { - return `[${item.content.appName}] ${item.content.text}`; - } - return `[audio] ${item.content.transcription}`; - }).join('\n'); - - if (settings.aiProviderType === 'native-ollama') { - const { text } = await generateText({ - model: ollama(settings.aiModel), - system: 'you are a helpful assistant that summarizes activity data into markdown', - prompt: `summarize this activity data:\n${context}`, - }); - - // write to obsidian / markdown file - fs.writeFileSync(`/tmp/hourly-summary-${new Date().toISOString()}.md`, text); - } -} - -generateHourlySummary(); -``` - -#### smart meeting assistant with ollama - -this example monitors for meeting apps and provides real-time AI assistance: - -```typescript copy -import { streamText } from 'ai'; -import { ollama } from 'ollama-ai-provider'; -import { pipe } from '@screenpipe/sdk'; - - -async function meetingAssistant() { - const settings = await pipe.settings.getAll(); - - - - if (settings.aiProviderType === 'native-ollama') { - // get last 60 min context - const context = await pipe.queryScreenpipe({ - contentType: 'all', - startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(), - limit: 10000 - }); - - const { text } = await generateText({ - model: ollama(settings.aiModel), - system: 'you are a meeting assistant that provides real-time insights', - prompt: `analyze this meeting context and suggest helpful insights:\n${JSON.stringify(context)}` - }); - - // send insights to app AI inbox with actions - await pipe.inbox.send({ - title: 'meeting insights', - body: text, - actions: [{ - label: 'save to notes', - callback: async () => { - // save to your note system - console.log('saving insights...'); - } - }] - }); - } -} - -``` - -these examples show how to: -- query screen/audio data with `pipe.queryScreenpipe` -- use local AI with ollama (you can also use OpenAI, Anthropic, or any other provider) -- send interactive notifications with `pipe.inbox` - - diff --git a/content/docs/pages/docs/server.mdx b/content/docs/pages/docs/server.mdx deleted file mode 100644 index c0cdb8191..000000000 --- a/content/docs/pages/docs/server.mdx +++ /dev/null @@ -1,20 +0,0 @@ - - -### microsoft remote desktop server - -1. connect to your server using microsoft remote desktop -2. run screenpipe - -it works by default (will pick your monitor and remote audio) - -should work out for any client and also if the server is linux - -### custom business integration - -want to integrate screenpipe with your business workflows? - -check out our [custom integrations](https://screenpi.pe/businesses) docs - - - - diff --git a/content/docs/pages/index.mdx b/content/docs/pages/index.mdx index d8be8f739..a3ec0569b 100644 --- a/content/docs/pages/index.mdx +++ b/content/docs/pages/index.mdx @@ -4,7 +4,16 @@ import MotionDiv from '../components/motion-div'
-screenpipe is an open-source, 24/7 AI screen & mic capture app & lib. it provides a reliable pipeline for developers, businesses and end-users to manage build powerful AI workflows from screen recordings, OCR, audio inputs, and transcriptions. built using rust, with cross-platform support (windows, macos, and linux), it is versatile and secure, allowing users to own their data while integrating personalized ai tools. + +context is the dark matter of intelligence. + +screenpipe empowers developers to build context-aware AI tools by: +- capturing screen & audio 24/7 +- processing everything locally for privacy +- providing clean APIs for AI integration +- supporting all major platforms + +built in rust for reliability, it's the bridge between human context and AI understanding.
@@ -14,10 +23,10 @@ screenpipe is an open-source, 24/7 AI screen & mic capture app & lib. it provide - **24/7 media capture**: captures screen and audio data continuously, storing it locally. - **personalized ai**: enables ai models to be powered by your captured data. -- **open source & secure**: your data stays private, with complete control over storage and processing. +- **open source & secure**: your data stays private, 100% local, with complete control over storage and processing. - **cross-platform**: works on windows, macos, and linux. - **multi-device support**: supports multiple monitors & audio devices for comprehensive data capture. -- **plugins & pipes**: allows the creation and use of plugins (pipes) in TypeScript, running within a JS runtime to extend functionality. +- **plugins (pipes)**: allows the creation and use of plugins (pipes) in NextJS, running within a sandboxed runtime to extend functionality. @@ -25,7 +34,7 @@ screenpipe is an open-source, 24/7 AI screen & mic capture app & lib. it provide ## target audience -screenpipe is suitable for developers, AI businesses, and anyone interested in automating data capture and creating ai-powered workflows. users can use it for tasks such as meeting transcription, screen activity logging, or thousands of other use cases. +screenpipe is suitable for developers, AI businesses, and anyone interested in automating data capture and creating desktop context-aware ai agents. ## what's next? diff --git a/screenpipe-audio/src/stt.rs b/screenpipe-audio/src/stt.rs index 715046b6f..1da8caf98 100644 --- a/screenpipe-audio/src/stt.rs +++ b/screenpipe-audio/src/stt.rs @@ -252,9 +252,9 @@ pub async fn create_whisper_channel( while let Some(segment) = segments.recv().await { let path = path.clone(); let transcription_result = if cfg!(target_os = "macos") { - let timestamp = timestamp + segment.start.round() as u64; #[cfg(target_os = "macos")] { + let timestamp = timestamp + segment.start.round() as u64; autoreleasepool(|| { run_stt(segment, audio.device.clone(), &mut whisper_model, audio_transcription_engine.clone(), deepgram_api_key.clone(), languages.clone(), path, timestamp) }) diff --git a/screenpipe-vision/src/core.rs b/screenpipe-vision/src/core.rs index 90e236273..45d490ac1 100644 --- a/screenpipe-vision/src/core.rs +++ b/screenpipe-vision/src/core.rs @@ -25,7 +25,6 @@ use serde_json; use std::sync::Arc; use std::{ collections::HashMap, - sync::OnceLock, time::{Duration, Instant, UNIX_EPOCH}, }; use tokio::fs::File; @@ -36,6 +35,9 @@ use tokio::time::sleep; #[cfg(target_os = "macos")] use xcap_macos::Monitor; +#[cfg(target_os = "macos")] +use std::sync::OnceLock; + #[cfg(not(target_os = "macos"))] use xcap::Monitor;