MODSetter
diff --git a/‎surfsense_backend/alembic/versions/30_add_baidu_search_connector_enum.py‎
Lines changed: 73 additions & 0 deletions b/‎surfsense_backend/alembic/versions/30_add_baidu_search_connector_enum.py‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎surfsense_backend/app/agents/researcher/nodes.py‎
Lines changed: 26 additions & 0 deletions b/‎surfsense_backend/app/agents/researcher/nodes.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎surfsense_backend/app/agents/researcher/utils.py‎
Lines changed: 2 additions & 0 deletions b/‎surfsense_backend/app/agents/researcher/utils.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎surfsense_backend/app/db.py‎
Lines changed: 1 addition & 0 deletions b/‎surfsense_backend/app/db.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎surfsense_backend/app/services/connector_service.py‎
Lines changed: 220 additions & 0 deletions b/‎surfsense_backend/app/services/connector_service.py‎
Lines changed: 220 additions & 0 deletions
diff --git a/‎surfsense_backend/app/utils/validators.py‎
Lines changed: 9 additions & 0 deletions b/‎surfsense_backend/app/utils/validators.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎surfsense_web/.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎surfsense_web/.gitignore‎
Lines changed: 5 additions & 0 deletions
@@ -0,0 +1,73 @@
+"""Add BAIDU_SEARCH_API to searchsourceconnectortype enum
+
+Revision ID: 30
+Revises: 29
+
+Changes:
+1. Add BAIDU_SEARCH_API value to searchsourceconnectortype enum
+2. Add BAIDU_SEARCH_API value to documenttype enum for consistency
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "30"
+down_revision: str | None = "29"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Add BAIDU_SEARCH_API to searchsourceconnectortype and documenttype enums."""
+
+    # Add BAIDU_SEARCH_API to searchsourceconnectortype enum
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_type t 
+                JOIN pg_enum e ON t.oid = e.enumtypid  
+                WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'BAIDU_SEARCH_API'
+            ) THEN
+                ALTER TYPE searchsourceconnectortype ADD VALUE 'BAIDU_SEARCH_API';
+            END IF;
+        END
+        $$;
+        """
+    )
+
+    # Add BAIDU_SEARCH_API to documenttype enum for consistency
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_type t 
+                JOIN pg_enum e ON t.oid = e.enumtypid  
+                WHERE t.typname = 'documenttype' AND e.enumlabel = 'BAIDU_SEARCH_API'
+            ) THEN
+                ALTER TYPE documenttype ADD VALUE 'BAIDU_SEARCH_API';
+            END IF;
+        END
+        $$;
+        """
+    )
+
+
+def downgrade() -> None:
+    """
+    Downgrade is not supported for enum values in PostgreSQL.
+    
+    Removing enum values can break existing data and is generally not safe.
+    To remove these values, you would need to:
+    1. Remove all references to BAIDU_SEARCH_API in the database
+    2. Recreate the enum type without BAIDU_SEARCH_API
+    3. Reapply all other enum values
+    
+    This is intentionally left as a no-op for safety.
+    """
+    pass
+
@@ -1037,6 +1037,32 @@ async def fetch_relevant_documents(
                             }
                         )
 
+                elif connector == "BAIDU_SEARCH_API":
+                    (
+                        source_object,
+                        baidu_chunks,
+                    ) = await connector_service.search_baidu(
+                        user_query=reformulated_query,
+                        user_id=user_id,
+                        search_space_id=search_space_id,
+                        top_k=top_k,
+                    )
+
+                    # Add to sources and raw documents
+                    if source_object:
+                        all_sources.append(source_object)
+                    all_raw_documents.extend(baidu_chunks)
+
+                    # Stream found document count
+                    if streaming_service and writer:
+                        writer(
+                            {
+                                "yield_value": streaming_service.format_terminal_info_delta(
+                                    f"🇨🇳 Found {len(baidu_chunks)} Baidu Search results related to your query"
+                                )
+                            }
+                        )
+
                 elif connector == "DISCORD_CONNECTOR":
                     (
                         source_object,
 
@@ -48,6 +48,7 @@ def get_connector_emoji(connector_name: str) -> str:
         "DISCORD_CONNECTOR": "🗨️",
         "TAVILY_API": "🔍",
         "LINKUP_API": "🔗",
+        "BAIDU_SEARCH_API": "🇨🇳",
         "GOOGLE_CALENDAR_CONNECTOR": "📅",
         "AIRTABLE_CONNECTOR": "🗃️",
         "LUMA_CONNECTOR": "✨",
@@ -72,6 +73,7 @@ def get_connector_friendly_name(connector_name: str) -> str:
         "DISCORD_CONNECTOR": "Discord",
         "TAVILY_API": "Tavily Search",
         "LINKUP_API": "Linkup Search",
+        "BAIDU_SEARCH_API": "Baidu Search",
         "AIRTABLE_CONNECTOR": "Airtable",
         "LUMA_CONNECTOR": "Luma",
     }
 
@@ -57,6 +57,7 @@ class SearchSourceConnectorType(str, Enum):
     TAVILY_API = "TAVILY_API"
     SEARXNG_API = "SEARXNG_API"
     LINKUP_API = "LINKUP_API"
+    BAIDU_SEARCH_API = "BAIDU_SEARCH_API"  # Baidu AI Search API for Chinese web search
     SLACK_CONNECTOR = "SLACK_CONNECTOR"
     NOTION_CONNECTOR = "NOTION_CONNECTOR"
     GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
 
@@ -560,6 +560,226 @@ def _format_list(value: Any) -> str | None:
 
         return result_object, documents
 
+    async def search_baidu(
+        self,
+        user_query: str,
+        user_id: str,
+        search_space_id: int,
+        top_k: int = 20,
+    ) -> tuple:
+        """
+        Search using Baidu AI Search API and return both sources and documents.
+        
+        Baidu AI Search provides intelligent search with automatic summarization.
+        We extract the raw search results (references) from the API response.
+        
+        Args:
+            user_query: User's search query
+            user_id: User ID
+            search_space_id: Search space ID
+            top_k: Maximum number of results to return
+            
+        Returns:
+            tuple: (sources_info_dict, documents_list)
+        """
+        # Get Baidu connector configuration
+        baidu_connector = await self.get_connector_by_type(
+            user_id, SearchSourceConnectorType.BAIDU_SEARCH_API, search_space_id
+        )
+
+        if not baidu_connector:
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+
+        config = baidu_connector.config or {}
+        api_key = config.get("BAIDU_API_KEY")
+
+        if not api_key:
+            print("ERROR: Baidu connector is missing BAIDU_API_KEY configuration")
+            print(f"Connector config: {config}")
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+
+        # Optional configuration parameters
+        model = config.get("BAIDU_MODEL", "ernie-3.5-8k")
+        search_source = config.get("BAIDU_SEARCH_SOURCE", "baidu_search_v2")
+        enable_deep_search = config.get("BAIDU_ENABLE_DEEP_SEARCH", False)
+
+        # Baidu AI Search API endpoint
+        baidu_endpoint = "https://qianfan.baidubce.com/v2/ai_search/chat/completions"
+
+        # Prepare request headers
+        # Note: Baidu uses X-Appbuilder-Authorization instead of standard Authorization header
+        headers = {
+            "X-Appbuilder-Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+
+        # Prepare request payload
+        # Calculate resource_type_filter top_k values
+        # Baidu v2 supports max 20 per type
+        max_per_type = min(top_k, 20)
+
+        payload = {
+            "messages": [{"role": "user", "content": user_query}],
+            "model": model,
+            "search_source": search_source,
+            "resource_type_filter": [
+                {"type": "web", "top_k": max_per_type},
+                {"type": "video", "top_k": max(1, max_per_type // 4)},  # Fewer videos
+            ],
+            "stream": False,  # Non-streaming for simpler processing
+            "enable_deep_search": enable_deep_search,
+            "enable_corner_markers": True,  # Enable reference markers
+        }
+
+        try:
+            # Baidu AI Search may take longer as it performs search + summarization
+            # Increase timeout to 90 seconds
+            async with httpx.AsyncClient(timeout=90.0) as client:
+                response = await client.post(
+                    baidu_endpoint,
+                    headers=headers,
+                    json=payload,
+                )
+                response.raise_for_status()
+        except httpx.TimeoutException as exc:
+            print(f"ERROR: Baidu API request timeout after 90s: {exc!r}")
+            print(f"Endpoint: {baidu_endpoint}")
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+        except httpx.HTTPStatusError as exc:
+            print(f"ERROR: Baidu API HTTP Status Error: {exc.response.status_code}")
+            print(f"Response text: {exc.response.text[:500]}")
+            print(f"Request URL: {exc.request.url}")
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+        except httpx.RequestError as exc:
+            print(f"ERROR: Baidu API Request Error: {type(exc).__name__}: {exc!r}")
+            print(f"Endpoint: {baidu_endpoint}")
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+        except Exception as exc:
+            print(f"ERROR: Unexpected error calling Baidu API: {type(exc).__name__}: {exc!r}")
+            print(f"Endpoint: {baidu_endpoint}")
+            print(f"Payload: {payload}")
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+
+        try:
+            data = response.json()
+        except ValueError as e:
+            print(f"ERROR: Failed to decode JSON response from Baidu AI Search: {e}")
+            print(f"Response status: {response.status_code}")
+            print(f"Response text: {response.text[:500]}")  # First 500 chars
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+
+        # Extract references (search results) from the response
+        baidu_references = data.get("references", [])
+        
+        if "code" in data or "message" in data:
+            print(f"WARNING: Baidu API returned error - Code: {data.get('code')}, Message: {data.get('message')}")
+
+        if not baidu_references:
+            print("WARNING: No references found in Baidu API response")
+            print(f"Response keys: {list(data.keys())}")
+            return {
+                "id": 12,
+                "name": "Baidu Search",
+                "type": "BAIDU_SEARCH_API",
+                "sources": [],
+            }, []
+
+        sources_list: list[dict[str, Any]] = []
+        documents: list[dict[str, Any]] = []
+
+        async with self.counter_lock:
+            for reference in baidu_references:
+                # Extract basic fields
+                title = reference.get("title", "Baidu Search Result")
+                url = reference.get("url", "")
+                content = reference.get("content", "")
+                date = reference.get("date", "")
+                ref_type = reference.get("type", "web")  # web, image, video
+
+                # Create a source entry
+                source = {
+                    "id": self.source_id_counter,
+                    "title": title,
+                    "description": content[:300] if content else "",  # Limit description length
+                    "url": url,
+                }
+                sources_list.append(source)
+
+                # Prepare metadata
+                metadata = {
+                    "url": url,
+                    "date": date,
+                    "type": ref_type,
+                    "source": "BAIDU_SEARCH_API",
+                    "web_anchor": reference.get("web_anchor", ""),
+                    "website": reference.get("website", ""),
+                }
+
+                # Add type-specific metadata
+                if ref_type == "image" and reference.get("image"):
+                    metadata["image"] = reference["image"]
+                elif ref_type == "video" and reference.get("video"):
+                    metadata["video"] = reference["video"]
+
+                # Create a document entry
+                document = {
+                    "chunk_id": self.source_id_counter,
+                    "content": content,
+                    "score": 1.0,  # Baidu doesn't provide relevance scores
+                    "document": {
+                        "id": self.source_id_counter,
+                        "title": title,
+                        "document_type": "BAIDU_SEARCH_API",
+                        "metadata": metadata,
+                    },
+                }
+                documents.append(document)
+                self.source_id_counter += 1
+
+        result_object = {
+            "id": 12,
+            "name": "Baidu Search",
+            "type": "BAIDU_SEARCH_API",
+            "sources": sources_list,
+        }
+
+        return result_object, documents
+
     async def search_slack(
         self,
         user_query: str,
 
@@ -434,6 +434,15 @@ def validate_list_field(key: str, field_name: str) -> None:
             },
         },
         "LINKUP_API": {"required": ["LINKUP_API_KEY"], "validators": {}},
+        "BAIDU_SEARCH_API": {
+            "required": ["BAIDU_API_KEY"],
+            "optional": [
+                "BAIDU_MODEL",
+                "BAIDU_SEARCH_SOURCE",
+                "BAIDU_ENABLE_DEEP_SEARCH",
+            ],
+            "validators": {},
+        },
         "SLACK_CONNECTOR": {"required": ["SLACK_BOT_TOKEN"], "validators": {}},
         "NOTION_CONNECTOR": {
             "required": ["NOTION_INTEGRATION_TOKEN"],
 
@@ -32,6 +32,11 @@ yarn-error.log*
 
 # env files (can opt-in for committing if needed)
 .env
+.env.local
+.env*.local
+.env.development.local
+.env.test.local
+.env.production.local
 
 # vercel
 .vercel