@@ -560,6 +560,226 @@ def _format_list(value: Any) -> str | None:
560560
561561 return result_object , documents
562562
563+ async def search_baidu (
564+ self ,
565+ user_query : str ,
566+ user_id : str ,
567+ search_space_id : int ,
568+ top_k : int = 20 ,
569+ ) -> tuple :
570+ """
571+ Search using Baidu AI Search API and return both sources and documents.
572+
573+ Baidu AI Search provides intelligent search with automatic summarization.
574+ We extract the raw search results (references) from the API response.
575+
576+ Args:
577+ user_query: User's search query
578+ user_id: User ID
579+ search_space_id: Search space ID
580+ top_k: Maximum number of results to return
581+
582+ Returns:
583+ tuple: (sources_info_dict, documents_list)
584+ """
585+ # Get Baidu connector configuration
586+ baidu_connector = await self .get_connector_by_type (
587+ user_id , SearchSourceConnectorType .BAIDU_SEARCH_API , search_space_id
588+ )
589+
590+ if not baidu_connector :
591+ return {
592+ "id" : 12 ,
593+ "name" : "Baidu Search" ,
594+ "type" : "BAIDU_SEARCH_API" ,
595+ "sources" : [],
596+ }, []
597+
598+ config = baidu_connector .config or {}
599+ api_key = config .get ("BAIDU_API_KEY" )
600+
601+ if not api_key :
602+ print ("ERROR: Baidu connector is missing BAIDU_API_KEY configuration" )
603+ print (f"Connector config: { config } " )
604+ return {
605+ "id" : 12 ,
606+ "name" : "Baidu Search" ,
607+ "type" : "BAIDU_SEARCH_API" ,
608+ "sources" : [],
609+ }, []
610+
611+ # Optional configuration parameters
612+ model = config .get ("BAIDU_MODEL" , "ernie-3.5-8k" )
613+ search_source = config .get ("BAIDU_SEARCH_SOURCE" , "baidu_search_v2" )
614+ enable_deep_search = config .get ("BAIDU_ENABLE_DEEP_SEARCH" , False )
615+
616+ # Baidu AI Search API endpoint
617+ baidu_endpoint = "https://qianfan.baidubce.com/v2/ai_search/chat/completions"
618+
619+ # Prepare request headers
620+ # Note: Baidu uses X-Appbuilder-Authorization instead of standard Authorization header
621+ headers = {
622+ "X-Appbuilder-Authorization" : f"Bearer { api_key } " ,
623+ "Content-Type" : "application/json" ,
624+ }
625+
626+ # Prepare request payload
627+ # Calculate resource_type_filter top_k values
628+ # Baidu v2 supports max 20 per type
629+ max_per_type = min (top_k , 20 )
630+
631+ payload = {
632+ "messages" : [{"role" : "user" , "content" : user_query }],
633+ "model" : model ,
634+ "search_source" : search_source ,
635+ "resource_type_filter" : [
636+ {"type" : "web" , "top_k" : max_per_type },
637+ {"type" : "video" , "top_k" : max (1 , max_per_type // 4 )}, # Fewer videos
638+ ],
639+ "stream" : False , # Non-streaming for simpler processing
640+ "enable_deep_search" : enable_deep_search ,
641+ "enable_corner_markers" : True , # Enable reference markers
642+ }
643+
644+ try :
645+ # Baidu AI Search may take longer as it performs search + summarization
646+ # Increase timeout to 90 seconds
647+ async with httpx .AsyncClient (timeout = 90.0 ) as client :
648+ response = await client .post (
649+ baidu_endpoint ,
650+ headers = headers ,
651+ json = payload ,
652+ )
653+ response .raise_for_status ()
654+ except httpx .TimeoutException as exc :
655+ print (f"ERROR: Baidu API request timeout after 90s: { exc !r} " )
656+ print (f"Endpoint: { baidu_endpoint } " )
657+ return {
658+ "id" : 12 ,
659+ "name" : "Baidu Search" ,
660+ "type" : "BAIDU_SEARCH_API" ,
661+ "sources" : [],
662+ }, []
663+ except httpx .HTTPStatusError as exc :
664+ print (f"ERROR: Baidu API HTTP Status Error: { exc .response .status_code } " )
665+ print (f"Response text: { exc .response .text [:500 ]} " )
666+ print (f"Request URL: { exc .request .url } " )
667+ return {
668+ "id" : 12 ,
669+ "name" : "Baidu Search" ,
670+ "type" : "BAIDU_SEARCH_API" ,
671+ "sources" : [],
672+ }, []
673+ except httpx .RequestError as exc :
674+ print (f"ERROR: Baidu API Request Error: { type (exc ).__name__ } : { exc !r} " )
675+ print (f"Endpoint: { baidu_endpoint } " )
676+ return {
677+ "id" : 12 ,
678+ "name" : "Baidu Search" ,
679+ "type" : "BAIDU_SEARCH_API" ,
680+ "sources" : [],
681+ }, []
682+ except Exception as exc :
683+ print (f"ERROR: Unexpected error calling Baidu API: { type (exc ).__name__ } : { exc !r} " )
684+ print (f"Endpoint: { baidu_endpoint } " )
685+ print (f"Payload: { payload } " )
686+ return {
687+ "id" : 12 ,
688+ "name" : "Baidu Search" ,
689+ "type" : "BAIDU_SEARCH_API" ,
690+ "sources" : [],
691+ }, []
692+
693+ try :
694+ data = response .json ()
695+ except ValueError as e :
696+ print (f"ERROR: Failed to decode JSON response from Baidu AI Search: { e } " )
697+ print (f"Response status: { response .status_code } " )
698+ print (f"Response text: { response .text [:500 ]} " ) # First 500 chars
699+ return {
700+ "id" : 12 ,
701+ "name" : "Baidu Search" ,
702+ "type" : "BAIDU_SEARCH_API" ,
703+ "sources" : [],
704+ }, []
705+
706+ # Extract references (search results) from the response
707+ baidu_references = data .get ("references" , [])
708+
709+ if "code" in data or "message" in data :
710+ print (f"WARNING: Baidu API returned error - Code: { data .get ('code' )} , Message: { data .get ('message' )} " )
711+
712+ if not baidu_references :
713+ print ("WARNING: No references found in Baidu API response" )
714+ print (f"Response keys: { list (data .keys ())} " )
715+ return {
716+ "id" : 12 ,
717+ "name" : "Baidu Search" ,
718+ "type" : "BAIDU_SEARCH_API" ,
719+ "sources" : [],
720+ }, []
721+
722+ sources_list : list [dict [str , Any ]] = []
723+ documents : list [dict [str , Any ]] = []
724+
725+ async with self .counter_lock :
726+ for reference in baidu_references :
727+ # Extract basic fields
728+ title = reference .get ("title" , "Baidu Search Result" )
729+ url = reference .get ("url" , "" )
730+ content = reference .get ("content" , "" )
731+ date = reference .get ("date" , "" )
732+ ref_type = reference .get ("type" , "web" ) # web, image, video
733+
734+ # Create a source entry
735+ source = {
736+ "id" : self .source_id_counter ,
737+ "title" : title ,
738+ "description" : content [:300 ] if content else "" , # Limit description length
739+ "url" : url ,
740+ }
741+ sources_list .append (source )
742+
743+ # Prepare metadata
744+ metadata = {
745+ "url" : url ,
746+ "date" : date ,
747+ "type" : ref_type ,
748+ "source" : "BAIDU_SEARCH_API" ,
749+ "web_anchor" : reference .get ("web_anchor" , "" ),
750+ "website" : reference .get ("website" , "" ),
751+ }
752+
753+ # Add type-specific metadata
754+ if ref_type == "image" and reference .get ("image" ):
755+ metadata ["image" ] = reference ["image" ]
756+ elif ref_type == "video" and reference .get ("video" ):
757+ metadata ["video" ] = reference ["video" ]
758+
759+ # Create a document entry
760+ document = {
761+ "chunk_id" : self .source_id_counter ,
762+ "content" : content ,
763+ "score" : 1.0 , # Baidu doesn't provide relevance scores
764+ "document" : {
765+ "id" : self .source_id_counter ,
766+ "title" : title ,
767+ "document_type" : "BAIDU_SEARCH_API" ,
768+ "metadata" : metadata ,
769+ },
770+ }
771+ documents .append (document )
772+ self .source_id_counter += 1
773+
774+ result_object = {
775+ "id" : 12 ,
776+ "name" : "Baidu Search" ,
777+ "type" : "BAIDU_SEARCH_API" ,
778+ "sources" : sources_list ,
779+ }
780+
781+ return result_object , documents
782+
563783 async def search_slack (
564784 self ,
565785 user_query : str ,
0 commit comments