Skip to content

Commit a1702a4

Browse files
kratos06Arcadia822
authored andcommitted
Add detailed logging for DeepSeek model responses and fix default score issue
1 parent da1a5cf commit a1702a4

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

codedog/utils/code_evaluator.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,9 @@ async def _evaluate_single_diff(self, diff_content: str) -> Dict[str, Any]:
697697
def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]:
698698
"""Validate and normalize scores with enhanced format handling."""
699699
try:
700+
# 记录原始结果
701+
logger.info(f"Validating scores from result: {result}")
702+
700703
# 检查并处理不同格式的评分结果
701704
normalized_result = {}
702705

@@ -706,6 +709,13 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]:
706709
"error_handling", "documentation", "code_style", "overall_score", "comments", "estimated_hours"
707710
]
708711

712+
# 记录是否所有字段都存在
713+
missing_fields = [field for field in required_fields if field not in result]
714+
if missing_fields:
715+
logger.warning(f"Missing fields in result: {missing_fields}")
716+
else:
717+
logger.info("All required fields are present in the result")
718+
709719
# 处理可能的不同格式
710720
# 格式1: {"readability": 8, "efficiency": 7, ...}
711721
# 格式2: {"score": {"readability": 8, "efficiency": 7, ...}}
@@ -910,7 +920,14 @@ def _validate_scores(self, result: Dict[str, Any]) -> Dict[str, Any]:
910920

911921
def _generate_default_scores(self, error_message: str) -> Dict[str, Any]:
912922
"""Generate default scores when evaluation fails."""
913-
return {
923+
logger.warning(f"Generating default scores due to error: {error_message[:200]}...")
924+
925+
# 记录调用栈,以便了解是从哪里调用的
926+
import traceback
927+
stack_trace = traceback.format_stack()
928+
logger.debug(f"Default scores generated from:\n{''.join(stack_trace[-5:-1])}")
929+
930+
default_scores = {
914931
"readability": 5,
915932
"efficiency": 5,
916933
"security": 5,
@@ -923,6 +940,9 @@ def _generate_default_scores(self, error_message: str) -> Dict[str, Any]:
923940
"comments": error_message
924941
}
925942

943+
logger.info(f"Default scores generated: {default_scores}")
944+
return default_scores
945+
926946
def _estimate_default_hours(self, additions: int, deletions: int) -> float:
927947
"""Estimate default working hours based on additions and deletions.
928948
@@ -1120,9 +1140,13 @@ def _extract_json(self, text: str) -> str:
11201140
return ""
11211141

11221142
# 打印原始文本的类型和长度
1143+
logger.info(f"Response type: {type(text)}, length: {len(text)}")
11231144
print(f"DEBUG: Response type: {type(text)}, length: {len(text)}")
11241145
print(f"DEBUG: First 100 chars: '{text[:100]}'")
11251146

1147+
# 记录完整响应用于调试
1148+
logger.debug(f"Complete model response: {text}")
1149+
11261150
# 检查是否包含无法评估的提示(如Base64编码内容)
11271151
unevaluable_patterns = [
11281152
r'Base64编码',
@@ -1276,6 +1300,7 @@ def _fix_malformed_json(self, json_str: str) -> str:
12761300
"estimated_hours": 0.0,
12771301
"comments": "API返回空响应,显示默认分数。"
12781302
}
1303+
logger.warning("Returning default scores due to empty response")
12791304
return json.dumps(default_scores)
12801305

12811306
# 检查是否是错误消息而不是JSON
@@ -1679,10 +1704,19 @@ async def _evaluate_diff_chunk(self, chunk: str) -> Dict[str, Any]:
16791704
retry_count += 1
16801705
if retry_count >= 2: # 只重试两次
16811706
logger.error(f"DeepSeek API error after 2 retries, abandoning evaluation: {error_message}")
1682-
return self._generate_default_scores(f"DeepSeek API错误,放弃评估: {error_message}")
1707+
logger.error(f"Original error: {e}")
1708+
logger.error(f"Last response (if any): {generated_text[:500] if generated_text else 'No response'}")
1709+
1710+
# 创建一个详细的错误消息
1711+
error_detail = f"DeepSeek API错误,放弃评估: {error_message}\n"
1712+
error_detail += f"原始错误: {e}\n"
1713+
error_detail += f"最后响应: {generated_text[:200] if generated_text else '无响应'}"
1714+
1715+
return self._generate_default_scores(error_detail)
16831716
# 使用较短的等待时间
16841717
wait_time = 3 # 固定3秒等待时间
16851718
logger.warning(f"DeepSeek API error, retrying in {wait_time}s (attempt {retry_count}/2)")
1719+
logger.warning(f"Error details: {error_message}")
16861720
await asyncio.sleep(wait_time)
16871721
else:
16881722
# 其他错误直接返回

codedog/utils/langchain_utils.py

+6
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,17 @@ async def _agenerate(
263263
# 提取消息内容
264264
message = response_data["choices"][0]["message"]["content"]
265265

266+
# 记录完整的响应内容用于调试
267+
logger.info(f"DeepSeek API response received successfully")
268+
logger.debug(f"DeepSeek API complete response: {json.dumps(response_data, ensure_ascii=False)}")
269+
logger.debug(f"DeepSeek API message content: {message}")
270+
266271
# 更新令牌使用和成本
267272
if "usage" in response_data:
268273
tokens = response_data["usage"].get("total_tokens", 0)
269274
self.total_tokens += tokens
270275
self.total_cost += self._calculate_cost(tokens)
276+
logger.info(f"DeepSeek API token usage: {tokens}, total cost: ${self.total_cost:.6f}")
271277

272278
# 创建并返回 ChatResult
273279
generation = ChatGeneration(message=AIMessage(content=message))

0 commit comments

Comments
 (0)