diff --git a/.gitignore b/.gitignore index 75c402b..623a217 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ # misc .DS_Store *.pem +*.bak # debug npm-debug.log* diff --git a/assets/chrome.png b/assets/chrome.png new file mode 100644 index 0000000..d01b5c2 Binary files /dev/null and b/assets/chrome.png differ diff --git a/package.json b/package.json index 0638714..75631ab 100644 --- a/package.json +++ b/package.json @@ -25,8 +25,7 @@ "react": "18.2.0", "react-dom": "18.2.0", "tailwindcss": "3.4.1", - "util": "^0.12.5", - "uuid": "^13.0.0" + "util": "^0.12.5" }, "devDependencies": { "@babel/preset-env": "^7.26.9", @@ -63,6 +62,10 @@ }, "manifest": { "permissions": [ + "tabs", + "history", + "activeTab", + "scripting", "storage", "cookies" ], @@ -72,4 +75,4 @@ ] }, "type": "module" -} \ No newline at end of file +} diff --git a/src/background.ts b/src/background.ts index c86f56f..7512868 100644 --- a/src/background.ts +++ b/src/background.ts @@ -1,3 +1,171 @@ +// This is used to get all tabs in the browser, and some of their conten +chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { + // Handle getTabs request for Chrome Tabs connection + if (request.action === "getTabs") { + chrome.tabs.query({}, (tabs) => { + if (chrome.runtime.lastError) { + sendResponse({ error: chrome.runtime.lastError.message }); + } else { + sendResponse({ tabs: tabs }); + } + }); + return true; + } + + // Handle getTabsWithContent request + if (request.action === "getTabsWithContent") { + chrome.tabs.query({}, async (tabs) => { + if (chrome.runtime.lastError) { + sendResponse({ error: chrome.runtime.lastError.message }); + return; + } + + const tabsWithContentPromises = tabs.map(async (tab) => { + const tabData = { ...tab, pageContent: '' }; // Add pageContent property + + // Try to get page content for each tab + try { + if (tab.id && tab.url && !tab.url.startsWith('chrome://') && !tab.url.startsWith('chrome-extension://')) { + // Execute content script to get page text + const results = await chrome.scripting.executeScript({ + target: { tabId: tab.id }, + func: getPageContent, // Use 'func' instead of 'function' + }); + + if (results && results[0] && results[0].result) { + tabData.pageContent = results[0].result; + } + } + } catch (error) { + console.error(`Could not get content for tab ${tab.id}:`, error); + // Set a fallback description + tabData.pageContent = `Content from ${tab.url ? new URL(tab.url).hostname : 'unknown site'} - unable to read page content`; + } + + return tabData; + }); + + const tabsWithContent = await Promise.all(tabsWithContentPromises); + + sendResponse({ tabs: tabsWithContent }); + }); + return true; + } + + // Don't interfere with other message handlers + return false; +}); + +// This is for filtering text nodes in the page content extraction +// It feels less appropriate to have this logic here, +// but this function was long enough to warrant its own helper +const acceptNode = (node, excludedTags = ['script', 'style', 'noscript', 'iframe', 'object'], minTextLength = 3) => { + // Skip script, style, and other non-visible content + const parent = node.parentElement; + if (!parent) return NodeFilter.FILTER_REJECT; + + const tagName = parent.tagName.toLowerCase(); + if (excludedTags.includes(tagName)) { + return NodeFilter.FILTER_REJECT; + } + + // Skip if parent is hidden + const style = window.getComputedStyle(parent); + if (style.display === 'none' || style.visibility === 'hidden') { + return NodeFilter.FILTER_REJECT; + } + + // Only accept text nodes with meaningful content + const text = node.textContent?.trim() || ''; + if (text.length < minTextLength) return NodeFilter.FILTER_REJECT; + + return NodeFilter.FILTER_ACCEPT; +}; + +// This gets the page content from a tab. +function getPageContent() { + try { + const title = document.title || ''; + const url = window.location.href; + const domain = window.location.hostname; + + // Get ALL visible text from the page + let allText = ''; + + // Method 1: Try to get all text from body + if (document.body) { + // Get all text content, which automatically excludes HTML tags + allText = document.body.innerText || document.body.textContent || ''; + } + + // If body approach fails, try document-wide text extraction + if (!allText || allText.length < 100) { + // Get all text nodes in the document + const walker = document.createTreeWalker( + document.body || document.documentElement, + NodeFilter.SHOW_TEXT, + { acceptNode } + ); + + const textNodes = []; + let node; + while (node = walker.nextNode()) { + const text = node.textContent?.trim(); + if (text && text.length > 2) { + textNodes.push(text); + } + } + + allText = textNodes.join(' '); + } + + // Clean up the text + allText = allText + .replace(/\s+/g, ' ') // Replace multiple whitespace with single space + .trim(); + + // Take a reasonable sample of the text (first 300 chars) + const textSample = allText.substring(0, 300); + + // Combine title and text content + let result = ''; + if (title && title.trim()) { + result += `${title.trim()}. `; + } + + if (textSample && textSample.length > 10) { + // Remove title from content if it's repeated + let contentText = textSample; + if (title && textSample.toLowerCase().startsWith(title.toLowerCase())) { + contentText = textSample.substring(title.length).trim(); + if (contentText.startsWith('.') || contentText.startsWith('-')) { + contentText = contentText.substring(1).trim(); + } + } + + if (contentText.length > 10) { + result += contentText; + } + } + + // Generic fallback if no meaningful content found + if (!result.trim() || result.trim().length < 20) { + result = `Content from ${domain} - ${title || url.split('/').pop() || 'webpage'}`; + } + + return result || `Page from ${domain}`; + + } catch (error) { + console.error('Error extracting page content:', error); + + // Simple fallback + const domain = window.location.hostname; + const title = document.title || ''; + + return title || `Content from ${domain}`; + } +} + // This is used to register cookies in the browser chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { if (request.action === "setCookie") { diff --git a/src/connection_manager.tsx b/src/connection_manager.tsx index 2e48d7a..37cec77 100644 --- a/src/connection_manager.tsx +++ b/src/connection_manager.tsx @@ -6,9 +6,10 @@ import { GoogleScholarConnection } from "./connections/googleScholar/connection" import { WikipediaSegmentConnection } from "./connections/wikipediaSegment/connection"; import { GmailConnection } from "./connections/Gmail/connection"; import { LinkedInConnection } from "./connections/Linkedin/connection"; +import { ChromeTabsConnection } from "./connections/chromeTabs/connection"; -export const CONNECTIONS = [GmailConnection, WikipediaSegmentConnection, WikipediaReferencesConnection, GoogleConnection, PubmedConnection, GoogleDocsConnection, GoogleScholarConnection,LinkedInConnection]; +export const CONNECTIONS = [GmailConnection, WikipediaSegmentConnection, WikipediaReferencesConnection, GoogleConnection, PubmedConnection, GoogleDocsConnection, GoogleScholarConnection,LinkedInConnection, ChromeTabsConnection]; export const searchConnections = (url: string, ) => { const connections = CONNECTIONS.filter(connection => connection.trigger(url)); diff --git a/src/connections/Linkedin/connection.tsx b/src/connections/Linkedin/connection.tsx index 63422d8..4917301 100644 --- a/src/connections/Linkedin/connection.tsx +++ b/src/connections/Linkedin/connection.tsx @@ -3,7 +3,7 @@ import { GenerationProgress } from "../types"; import { getSpacePortal, registerAuthCookies, reqSpaceCreation } from "../../driver"; import wikiIcon from "data-base64:../../../assets/wiki.png"; -import { v4 as uuidv4 } from 'uuid'; +import { getUuidV4 } from "../../driver"; @@ -65,7 +65,7 @@ const createSpace = async ( const company = row[companyIdx]; const url = linkIdx !== -1 ? row[linkIdx] : ""; result.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Applied Job: ${title}`, text: `Applied to ${title} at ${company}`, link: url, @@ -122,7 +122,7 @@ const createSpace = async ( const name = document.querySelector("h1.text-heading-xlarge")?.textContent?.trim() || "Unknown Name"; const headline = document.querySelector(".text-body-medium.break-words")?.textContent?.trim() || ""; extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: name, text: sanitize(headline), link: window.location.href, @@ -139,7 +139,7 @@ const createSpace = async ( const about = aboutSection?.innerText?.trim(); if (about) { extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: "About", text: sanitize(about), link: window.location.href, @@ -161,7 +161,7 @@ const createSpace = async ( const description = entry.innerText?.trim(); if (jobTitle && description) { extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Experience: ${jobTitle}`, text: sanitize(description), link: window.location.href, @@ -184,7 +184,7 @@ const createSpace = async ( const eduDetails = entry.innerText?.trim(); if (school && eduDetails) { extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Education: ${school}`, text: sanitize(eduDetails), link: window.location.href, @@ -206,7 +206,7 @@ const createSpace = async ( if (!seen.has(connectionUrl)) { seen.add(connectionUrl); extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Connection: ${connectionName}`, text: `Connected with ${connectionName}`, link: connectionUrl, @@ -232,7 +232,7 @@ if (activitySection) { const postContent = card.textContent?.trim().replace(/\s+/g, " ") || "LinkedIn Activity"; extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Activity: ${postContent.slice(0, 40)}...`, text: postContent, link: postUrl, @@ -269,7 +269,7 @@ const getMessagesFromIframe = async (): Promise => { : "https://www.linkedin.com/messaging/"; return { - uuid: uuidv4(), + uuid: getUuidV4(), title: `Message with ${name}`, text: `${timestamp} - ${snippet}`, link: threadUrl, @@ -312,7 +312,7 @@ const getFollowedCompanies = async (): Promise => { const link = (card.querySelector("a") as HTMLAnchorElement)?.href || ""; return { - uuid: uuidv4(), + uuid: getUuidV4(), title: `Following: ${name}`, text: subtitle, link, diff --git a/src/connections/chromeTabs/connection.tsx b/src/connections/chromeTabs/connection.tsx new file mode 100644 index 0000000..5a16f6b --- /dev/null +++ b/src/connections/chromeTabs/connection.tsx @@ -0,0 +1,270 @@ +import type { MantisConnection, injectUIType, onMessageType, registerListenersType, setProgressType, establishLogSocketType } from "../types"; +import { GenerationProgress } from "../types"; + +import chromeIcon from "data-base64:../../../assets/chrome.png"; +import { getSpacePortal, registerAuthCookies, reqSpaceCreation } from "../../driver"; + + +interface TabWithContent extends chrome.tabs.Tab { + pageContent?: string; +} + +class DatasetTooSmallError extends Error { + constructor(public dataCount: number, message?: string) { + super(message || `Dataset too small: ${dataCount} items`); + this.name = 'DatasetTooSmallError'; + } +} + +class NoTabsFoundError extends Error { + constructor(message?: string) { + super(message || 'No tabs found'); + this.name = 'NoTabsFoundError'; + } +} + +const trigger = (url: string) => { + return url.includes("google.com/search"); +} +const MAX_RETRIES = 5; +const RETRY_DELAY_MS = 3000; + +const getTabsWithContentViaMessage = (): Promise => { + return new Promise((resolve, reject) => { + chrome.runtime.sendMessage({ action: "getTabsWithContent" }, (response) => { + if (chrome.runtime.lastError) { + reject(new Error(chrome.runtime.lastError.message)); + } else if (response.error) { + reject(new Error(response.error)); + } else { + resolve(response.tabs || []); + } + }); + }); +}; + + +const createSpace = async (injectUI: injectUIType, setProgress: setProgressType, onMessage: onMessageType, registerListeners: registerListenersType, establishLogSocket: establishLogSocketType) => { + setProgress(GenerationProgress.GATHERING_DATA); + + const extractedData = []; + + try { + // Get tabs via message passing + const tabs = await getTabsWithContentViaMessage(); + + if (!tabs || tabs.length === 0) { + throw new NoTabsFoundError(); + } + + // Process each tab (no duplication, no domain grouping) + tabs.forEach((tab, index) => { + if (tab.title && tab.url) { + let domain = ''; + try { + domain = new URL(tab.url).hostname; + } catch (e) { + domain = 'unknown'; + } + + // Get page content if available + let pageContent = ''; + if (tab.pageContent) { + pageContent = tab.pageContent; + } else { + pageContent = `Page from ${domain}`; + } + + extractedData.push({ + title: tab.title, + semantic_title: `${tab.active ? 'Active' : 'Background'} tab: ${tab.title}`, + link: tab.url, + snippet: `Tab ${index + 1}: ${pageContent}` + }); + } + }); + + setProgress(GenerationProgress.CREATING_SPACE); + + // Use automatic retry for space creation + const spaceData = await createSpaceWithAutoRetry(extractedData, establishLogSocket, `Chrome Tabs Space (${tabs.length} tabs)`); + + setProgress(GenerationProgress.INJECTING_UI); + + const spaceId = spaceData.space_id; + const createdWidget = await injectUI(spaceId, onMessage, registerListeners); + + setProgress(GenerationProgress.COMPLETED); + + return { spaceId, createdWidget }; + + } catch (error) { + console.error('Error in Chrome Tabs connection:', error); + + // Handle custom errors with instanceof + if (error instanceof DatasetTooSmallError) { + showDatasetTooSmallError(error.dataCount); + return null; + } + + if (error instanceof NoTabsFoundError) { + showNoTabsError(); + return null; + } + + // Handle server-side dataset errors by checking the error message + // (These come from external API, so we still need string checking) + const errorMessage = error.message || error.toString(); + if (errorMessage.includes('Dataset too small') || + errorMessage.includes('minimum 100 rows are required')) { + showDatasetTooSmallError(extractedData.length); + return null; + } + + throw error; + } +} + +// New function for automatic retry +const createSpaceWithAutoRetry = async (extractedData: { title: string; semantic_title: string; link: string; snippet: string; }[], establishLogSocket: establishLogSocketType, title: string, maxRetries = MAX_RETRIES) => { + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + + if (attempt > 1) { + // Wait for server to finish background processing + await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS)); + } + + return await reqSpaceCreation(extractedData, { + "title": "title", + "semantic_title": "semantic", + "link": "links", + "snippet": "semantic" + }, establishLogSocket, title); + + } catch (error) { + const errorMessage = error.message || error.toString(); + + // Check if it's a server-side dataset error and convert to custom error + if (errorMessage.includes('Dataset too small') || + errorMessage.includes('minimum 100 rows are required')) { + throw new DatasetTooSmallError(extractedData.length, errorMessage); + } + + // Check if it's a timeout error and we have retries left + if ((errorMessage.includes('504') || + errorMessage.includes('timeout') || + errorMessage.includes('Gateway Time-out')) && + attempt < maxRetries) { + + continue; // Try again + } + + // If it's not a timeout or we're out of retries, throw the error + throw error; + } + } +}; + +// Error handlers +// Base styles for error notifications +const getBaseErrorStyles = () => ({ + container: ` + position: fixed; + top: 20px; + right: 20px; + color: white; + padding: 20px; + border-radius: 12px; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3); + z-index: 10000; + max-width: 400px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + `, + header: 'display: flex; align-items: center; margin-bottom: 12px;', + title: 'font-size: 16px;', + message: 'margin: 0 0 12px 0; line-height: 1.4; font-size: 14px;', + button: ` + background: rgba(255, 255, 255, 0.2); + border: 1px solid rgba(255, 255, 255, 0.3); + color: white; + padding: 8px 16px; + border-radius: 6px; + cursor: pointer; + ` +}); + +// Generic error notification creator +const createErrorNotification = (config: { + background: string; + title: string; + message: string; + buttonText: string; +}) => { + const styles = getBaseErrorStyles(); + const errorDiv = document.createElement('div'); + + errorDiv.style.cssText = ` + ${styles.container} + background: ${config.background}; + `; + + // Create header container + const headerDiv = document.createElement('div'); + headerDiv.style.cssText = styles.header; + + const title = document.createElement('strong'); + title.style.cssText = styles.title; + title.textContent = config.title; + headerDiv.appendChild(title); + + // Create message paragraph + const message = document.createElement('p'); + message.style.cssText = styles.message; + message.textContent = config.message; + + // Create button + const button = document.createElement('button'); + button.style.cssText = styles.button; + button.textContent = config.buttonText; + + // Add event listener for button click + button.addEventListener('click', () => errorDiv.remove()); + + // Assemble the error div + errorDiv.appendChild(headerDiv); + errorDiv.appendChild(message); + errorDiv.appendChild(button); + + document.body.appendChild(errorDiv); +}; + +const showDatasetTooSmallError = (dataCount: number) => { + createErrorNotification({ + background: 'linear-gradient(135deg, #ff6b6b, #ee5a52)', + title: 'Not Enough Data', + message: `We found ${dataCount} tabs, but need more to create a meaningful space (recommended: ~70-100).`, + buttonText: 'Got it' + }); +}; + +const showNoTabsError = () => { + createErrorNotification({ + background: 'linear-gradient(135deg, #ff9500, #ff6b35)', + title: 'No Tabs Found', + message: 'Unable to gather enough tab information. Please ensure the extension has permissions and that you have at least 3 tabs open.', + buttonText: 'OK' + }); +}; +const injectUI = async (space_id: string, onMessage: onMessageType, registerListeners: registerListenersType) => { + return null; +} + +export const ChromeTabsConnection: MantisConnection = { + name: "Chrome Tabs", + description: "Analyzes all your currently open browser tabs", + icon: chromeIcon, + trigger: trigger, + createSpace: createSpace, + injectUI: injectUI, +} \ No newline at end of file