Add detailed logging for DeepSeek model responses and fix default score issue

kratos06 · Arcadia822 · commit a1702a4ab933 · 2025-04-25T15:42:45.000+08:00
diff --git a/codedog/utils/code_evaluator.py b/codedog/utils/code_evaluator.py
@@ -697,6 +697,9 @@ async def _evaluate_single_diff(self, diff_content: str) -> Dict[str, Any]:
     def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]:
         """Validate and normalize scores with enhanced format handling."""
         try:
+            # &#35760;&#24405;&#21407;&#22987;&#32467;&#26524;
+            logger.info(f"Validating scores from result: {result}")
+
             # &#26816;&#26597;&#24182;&#22788;&#29702;&#19981;&#21516;&#26684;&#24335;&#30340;&#35780;&#20998;&#32467;&#26524;
             normalized_result = {}
 
@@ -706,6 +709,13 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]:
                 "error_handling", "documentation", "code_style", "overall_score", "comments", "estimated_hours"
             ]
 
+            # &#35760;&#24405;&#26159;&#21542;&#25152;&#26377;&#23383;&#27573;&#37117;&#23384;&#22312;
+            missing_fields = [field for field in required_fields if field not in result]
+            if missing_fields:
+                logger.warning(f"Missing fields in result: {missing_fields}")
+            else:
+                logger.info("All required fields are present in the result")
+
             # &#22788;&#29702;&#21487;&#33021;&#30340;&#19981;&#21516;&#26684;&#24335;
             # &#26684;&#24335;1: {"readability": 8, "efficiency": 7, ...}
             # &#26684;&#24335;2: {"score": {"readability": 8, "efficiency": 7, ...}}
@@ -910,7 +920,14 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]:
 
     def _generate_default_scores(self, error_message: str) -> Dict[str, Any]:
         """Generate default scores when evaluation fails."""
-        return {
+        logger.warning(f"Generating default scores due to error: {error_message[:200]}...")
+
+        # &#35760;&#24405;&#35843;&#29992;&#26632;&#65292;&#20197;&#20415;&#20102;&#35299;&#26159;&#20174;&#21738;&#37324;&#35843;&#29992;&#30340;
+        import traceback
+        stack_trace = traceback.format_stack()
+        logger.debug(f"Default scores generated from:\n{''.join(stack_trace[-5:-1])}")
+
+        default_scores = {
             "readability": 5,
             "efficiency": 5,
             "security": 5,
@@ -923,6 +940,9 @@ def _generate_default_scores(self, error_message: str) -> Dict[str, Any]:
             "comments": error_message
         }
 
+        logger.info(f"Default scores generated: {default_scores}")
+        return default_scores
+
     def _estimate_default_hours(self, additions: int, deletions: int) -> float:
         """Estimate default working hours based on additions and deletions.
 
@@ -1120,9 +1140,13 @@ def _extract_json(self, text: str) -> str:
             return ""
 
         # &#25171;&#21360;&#21407;&#22987;&#25991;&#26412;&#30340;&#31867;&#22411;&#21644;&#38271;&#24230;
+        logger.info(f"Response type: {type(text)}, length: {len(text)}")
         print(f"DEBUG: Response type: {type(text)}, length: {len(text)}")
         print(f"DEBUG: First 100 chars: '{text[:100]}'")
 
+        # &#35760;&#24405;&#23436;&#25972;&#21709;&#24212;&#29992;&#20110;&#35843;&#35797;
+        logger.debug(f"Complete model response: {text}")
+
         # &#26816;&#26597;&#26159;&#21542;&#21253;&#21547;&#26080;&#27861;&#35780;&#20272;&#30340;&#25552;&#31034;&#65288;&#22914;Base64&#32534;&#30721;&#20869;&#23481;&#65289;
         unevaluable_patterns = [
             r'Base64&#32534;&#30721;',
@@ -1276,6 +1300,7 @@ def _fix_malformed_json(self, json_str: str) -> str:
                 "estimated_hours": 0.0,
                 "comments": "API&#36820;&#22238;&#31354;&#21709;&#24212;&#65292;&#26174;&#31034;&#40664;&#35748;&#20998;&#25968;&#12290;"
             }
+            logger.warning("Returning default scores due to empty response")
             return json.dumps(default_scores)
 
         # &#26816;&#26597;&#26159;&#21542;&#26159;&#38169;&#35823;&#28040;&#24687;&#32780;&#19981;&#26159;JSON
@@ -1679,10 +1704,19 @@ async def _evaluate_diff_chunk(self, chunk: str) -> Dict[str, Any]:
                     retry_count += 1
                     if retry_count >= 2:  # &#21482;&#37325;&#35797;&#20004;&#27425;
                         logger.error(f"DeepSeek API error after 2 retries, abandoning evaluation: {error_message}")
-                        return self._generate_default_scores(f"DeepSeek API&#38169;&#35823;&#65292;&#25918;&#24323;&#35780;&#20272;: {error_message}")
+                        logger.error(f"Original error: {e}")
+                        logger.error(f"Last response (if any): {generated_text[:500] if generated_text else 'No response'}")
+
+                        # &#21019;&#24314;&#19968;&#20010;&#35814;&#32454;&#30340;&#38169;&#35823;&#28040;&#24687;
+                        error_detail = f"DeepSeek API&#38169;&#35823;&#65292;&#25918;&#24323;&#35780;&#20272;: {error_message}\n"
+                        error_detail += f"&#21407;&#22987;&#38169;&#35823;: {e}\n"
+                        error_detail += f"&#26368;&#21518;&#21709;&#24212;: {generated_text[:200] if generated_text else '&#26080;&#21709;&#24212;'}"
+
+                        return self._generate_default_scores(error_detail)
                     # &#20351;&#29992;&#36739;&#30701;&#30340;&#31561;&#24453;&#26102;&#38388;
                     wait_time = 3  # &#22266;&#23450;3&#31186;&#31561;&#24453;&#26102;&#38388;
                     logger.warning(f"DeepSeek API error, retrying in {wait_time}s (attempt {retry_count}/2)")
+                    logger.warning(f"Error details: {error_message}")
                     await asyncio.sleep(wait_time)
                 else:
                     # &#20854;&#20182;&#38169;&#35823;&#30452;&#25509;&#36820;&#22238;
diff --git a/codedog/utils/langchain_utils.py b/codedog/utils/langchain_utils.py
@@ -263,11 +263,17 @@ async def _agenerate(
                             # &#25552;&#21462;&#28040;&#24687;&#20869;&#23481;
                             message = response_data["choices"][0]["message"]["content"]
 
+                            # &#35760;&#24405;&#23436;&#25972;&#30340;&#21709;&#24212;&#20869;&#23481;&#29992;&#20110;&#35843;&#35797;
+                            logger.info(f"DeepSeek API response received successfully")
+                            logger.debug(f"DeepSeek API complete response: {json.dumps(response_data, ensure_ascii=False)}")
+                            logger.debug(f"DeepSeek API message content: {message}")
+
                             # &#26356;&#26032;&#20196;&#29260;&#20351;&#29992;&#21644;&#25104;&#26412;
                             if "usage" in response_data:
                                 tokens = response_data["usage"].get("total_tokens", 0)
                                 self.total_tokens += tokens
                                 self.total_cost += self._calculate_cost(tokens)
+                                logger.info(f"DeepSeek API token usage: {tokens}, total cost: ${self.total_cost:.6f}")
 
                             # &#21019;&#24314;&#24182;&#36820;&#22238; ChatResult
                             generation = ChatGeneration(message=AIMessage(content=message))