Skip to content

Commit 6ce0811

Browse files
committed
feat: update to langchain 0.2
1 parent c47f341 commit 6ce0811

27 files changed

+2293
-3714
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,5 @@ settings:
118118
| AZURE_OPENAI_API_BASE | No | | Azure openai api base |
119119
| AZURE_OPENAI_DEPLOYMENT_ID | No | | Azure openai deployment id for gpt 3.5 |
120120
| AZURE_OPENAI_GPT4_DEPLOYMENT_ID| No | | Azure openai deployment id for gpt 4 |
121+
122+
# How to release

codedog/chains/code_review/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
from itertools import zip_longest
44
from typing import Any, Dict, List, Optional
55

6-
from langchain import BasePromptTemplate, LLMChain
76
from langchain.base_language import BaseLanguageModel
87
from langchain.callbacks.manager import (
98
AsyncCallbackManagerForChainRun,
109
CallbackManagerForChainRun,
1110
)
11+
from langchain.chains import LLMChain
1212
from langchain.chains.base import Chain
13+
from langchain_core.prompts import BasePromptTemplate
1314
from pydantic import Field
1415

1516
from codedog.chains.code_review.prompts import CODE_REVIEW_PROMPT

codedog/chains/code_review/prompts.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# TODO: Localization
2-
from langchain import PromptTemplate
2+
from langchain_core.prompts import PromptTemplate
33

44
from codedog.templates import grimoire_en
55

66
CODE_REVIEW_PROMPT = PromptTemplate(
7-
template=grimoire_en.CODE_SUGGESTION, input_variables=["name", "language", "content"]
7+
template=grimoire_en.CODE_SUGGESTION,
8+
input_variables=["name", "language", "content"],
89
)

codedog/chains/code_review/translate_code_review_chain.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
from itertools import zip_longest
44
from typing import List
55

6-
from langchain import BasePromptTemplate, LLMChain
76
from langchain.base_language import BaseLanguageModel
7+
from langchain.chains import LLMChain
8+
from langchain_core.prompts import BasePromptTemplate
89
from pydantic import Field
910

1011
from codedog.chains.code_review.base import CodeReviewChain

codedog/chains/pr_summary/base.py

+57-23
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22

33
from typing import Any, Dict, List, Optional
44

5-
from langchain import BasePromptTemplate, LLMChain
65
from langchain.base_language import BaseLanguageModel
76
from langchain.callbacks.manager import (
87
AsyncCallbackManagerForChainRun,
98
CallbackManagerForChainRun,
109
)
10+
from langchain.chains import LLMChain
1111
from langchain.chains.base import Chain
1212
from langchain.output_parsers import OutputFixingParser, PydanticOutputParser
1313
from langchain.schema import BaseOutputParser
14+
from langchain_core.prompts import BasePromptTemplate
1415
from pydantic import Extra, Field
1516

1617
from codedog.chains.pr_summary.prompts import CODE_SUMMARY_PROMPT, PR_SUMMARY_PROMPT
@@ -20,6 +21,8 @@
2021
PullRequestProcessor,
2122
)
2223

24+
processor = PullRequestProcessor.build()
25+
2326

2427
class PRSummaryChain(Chain):
2528
"""Summarize a pull request.
@@ -32,17 +35,13 @@ class PRSummaryChain(Chain):
3235
- code_summaries(Dict[str, str]): changed code file summarizations, key is file path.
3336
"""
3437

35-
# TODO: input keys validation
36-
3738
code_summary_chain: LLMChain = Field(exclude=True)
3839
"""Chain to use to summarize code change."""
3940
pr_summary_chain: LLMChain = Field(exclude=True)
4041
"""Chain to use to summarize PR."""
4142

4243
parser: BaseOutputParser = Field(exclude=True)
4344
"""Parse pr summarized result to PRSummary object."""
44-
processor: PullRequestProcessor = Field(exclude=True, default_factory=PullRequestProcessor.build)
45-
"""PR data process."""
4645

4746
_input_keys: List[str] = ["pull_request"]
4847
_output_keys: List[str] = ["pr_summary", "code_summaries"]
@@ -78,15 +77,21 @@ def review(self, inputs, _run_manager) -> Dict[str, Any]:
7877

7978
code_summary_inputs = self._process_code_summary_inputs(pr)
8079
code_summary_outputs = (
81-
self.code_summary_chain.apply(code_summary_inputs, callbacks=_run_manager.get_child(tag="CodeSummary"))
80+
self.code_summary_chain.apply(
81+
code_summary_inputs, callbacks=_run_manager.get_child(tag="CodeSummary")
82+
)
8283
if code_summary_inputs
8384
else []
8485
)
8586

86-
code_summaries = self.processor.build_change_summaries(code_summary_inputs, code_summary_outputs)
87+
code_summaries = processor.build_change_summaries(
88+
code_summary_inputs, code_summary_outputs
89+
)
8790

8891
pr_summary_input = self._process_pr_summary_input(pr, code_summaries)
89-
pr_summary_output = self.pr_summary_chain(pr_summary_input, callbacks=_run_manager.get_child(tag="PRSummary"))
92+
pr_summary_output = self.pr_summary_chain(
93+
pr_summary_input, callbacks=_run_manager.get_child(tag="PRSummary")
94+
)
9095

9196
return self._process_result(pr_summary_output, code_summaries)
9297

@@ -95,26 +100,38 @@ async def areview(self, inputs, _run_manager) -> Dict[str, Any]:
95100

96101
code_summary_inputs = self._process_code_summary_inputs(pr)
97102
code_summary_outputs = (
98-
await self.code_summary_chain.aapply(code_summary_inputs, callbacks=_run_manager.get_child())
103+
await self.code_summary_chain.aapply(
104+
code_summary_inputs, callbacks=_run_manager.get_child()
105+
)
99106
if code_summary_inputs
100107
else []
101108
)
102109

103-
code_summaries = self.processor.build_change_summaries(code_summary_inputs, code_summary_outputs)
110+
code_summaries = processor.build_change_summaries(
111+
code_summary_inputs, code_summary_outputs
112+
)
104113

105114
pr_summary_input = self._process_pr_summary_input(pr, code_summaries)
106-
pr_summary_output = await self.pr_summary_chain.acall(pr_summary_input, callbacks=_run_manager.get_child())
115+
pr_summary_output = await self.pr_summary_chain.ainvoke(
116+
pr_summary_input, callbacks=_run_manager.get_child()
117+
)
107118

108119
return await self._aprocess_result(pr_summary_output, code_summaries)
109120

110-
def _call(self, inputs: Dict[str, Any], run_manager: Optional[CallbackManagerForChainRun] = None) -> Dict[str, Any]:
121+
def _call(
122+
self,
123+
inputs: Dict[str, Any],
124+
run_manager: Optional[CallbackManagerForChainRun] = None,
125+
) -> Dict[str, Any]:
111126
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
112127
_run_manager.on_text(inputs["pull_request"].json() + "\n")
113128

114129
return self.review(inputs, _run_manager)
115130

116131
async def _acall(
117-
self, inputs: Dict[str, Any], run_manager: Optional[AsyncCallbackManagerForChainRun] = None
132+
self,
133+
inputs: Dict[str, Any],
134+
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
118135
) -> Dict[str, Any]:
119136
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
120137
await _run_manager.on_text(inputs["pull_request"].json() + "\n")
@@ -123,28 +140,36 @@ async def _acall(
123140

124141
def _process_code_summary_inputs(self, pr: PullRequest) -> List[Dict[str, str]]:
125142
input_data = []
126-
code_files = self.processor.get_diff_code_files(pr)
143+
code_files = processor.get_diff_code_files(pr)
127144
for code_file in code_files:
128145
input_item = {
129-
"content": code_file.diff_content.content[:2000], # TODO: handle long diff
146+
"content": code_file.diff_content.content[
147+
:2000
148+
], # TODO: handle long diff
130149
"name": code_file.full_name,
131150
"language": SUFFIX_LANGUAGE_MAPPING.get(code_file.suffix, ""),
132151
}
133152
input_data.append(input_item)
134153

135154
return input_data
136155

137-
def _process_pr_summary_input(self, pr: PullRequest, code_summaries: List[ChangeSummary]) -> Dict[str, str]:
138-
change_files_material: str = self.processor.gen_material_change_files(pr.change_files)
139-
code_summaries_material = self.processor.gen_material_code_summaries(code_summaries)
140-
pr_metadata_material = self.processor.gen_material_pr_metadata(pr)
156+
def _process_pr_summary_input(
157+
self, pr: PullRequest, code_summaries: List[ChangeSummary]
158+
) -> Dict[str, str]:
159+
change_files_material: str = processor.gen_material_change_files(
160+
pr.change_files
161+
)
162+
code_summaries_material = processor.gen_material_code_summaries(code_summaries)
163+
pr_metadata_material = processor.gen_material_pr_metadata(pr)
141164
return {
142165
"change_files": change_files_material,
143166
"code_summaries": code_summaries_material,
144167
"metadata": pr_metadata_material,
145168
}
146169

147-
def _process_result(self, pr_summary_output: Dict[str, Any], code_summaries: List[ChangeSummary]) -> Dict[str, Any]:
170+
def _process_result(
171+
self, pr_summary_output: Dict[str, Any], code_summaries: List[ChangeSummary]
172+
) -> Dict[str, Any]:
148173
return {
149174
"pr_summary": pr_summary_output["text"],
150175
"code_summaries": code_summaries,
@@ -167,7 +192,16 @@ def from_llm(
167192
pr_summary_prompt: BasePromptTemplate = PR_SUMMARY_PROMPT,
168193
**kwargs,
169194
) -> PRSummaryChain:
170-
parser = OutputFixingParser.from_llm(llm=pr_summary_llm, parser=PydanticOutputParser(pydantic_object=PRSummary))
195+
parser = OutputFixingParser.from_llm(
196+
llm=pr_summary_llm, parser=PydanticOutputParser(pydantic_object=PRSummary)
197+
)
171198
code_summary_chain = LLMChain(llm=code_summary_llm, prompt=code_summary_prompt)
172-
pr_summary_chain = LLMChain(llm=pr_summary_llm, prompt=pr_summary_prompt, output_parser=parser)
173-
return cls(code_summary_chain=code_summary_chain, pr_summary_chain=pr_summary_chain, parser=parser, **kwargs)
199+
pr_summary_chain = LLMChain(
200+
llm=pr_summary_llm, prompt=pr_summary_prompt, output_parser=parser
201+
)
202+
return cls(
203+
code_summary_chain=code_summary_chain,
204+
pr_summary_chain=pr_summary_chain,
205+
parser=parser,
206+
**kwargs,
207+
)

codedog/chains/pr_summary/prompts.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from langchain import PromptTemplate
21
from langchain.output_parsers import PydanticOutputParser
2+
from langchain_core.prompts import PromptTemplate
33

44
from codedog.models import PRSummary
55
from codedog.templates import grimoire_en
@@ -11,4 +11,6 @@
1111
input_variables=["metadata", "change_files", "code_summaries"],
1212
partial_variables={"format_instructions": parser.get_format_instructions()},
1313
)
14-
CODE_SUMMARY_PROMPT = PromptTemplate(template=grimoire_en.CODE_SUMMARY, input_variables=["name", "language", "content"])
14+
CODE_SUMMARY_PROMPT = PromptTemplate(
15+
template=grimoire_en.CODE_SUMMARY, input_variables=["name", "language", "content"]
16+
)

codedog/chains/pr_summary/translate_pr_summary_chain.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
from itertools import zip_longest
44
from typing import Any, Dict, List
55

6-
from langchain import BasePromptTemplate, LLMChain
76
from langchain.base_language import BaseLanguageModel
7+
from langchain.chains import LLMChain
88
from langchain.output_parsers import OutputFixingParser, PydanticOutputParser
9+
from langchain_core.prompts import BasePromptTemplate
910
from pydantic import Field
1011

1112
from codedog.chains.pr_summary.base import PRSummaryChain
@@ -116,7 +117,7 @@ def _translate_code_summaries(
116117
return code_summaries
117118

118119
async def _atranslate_summary(self, summary: PRSummary) -> PRSummary:
119-
response = await self.translate_chain.acall(
120+
response = await self.translate_chain.ainvoke(
120121
{
121122
"language": self.language,
122123
"description": "Changed file brief summary (must in single line!).",

codedog/chains/prompts.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
from langchain import PromptTemplate
1+
from langchain_core.prompts import PromptTemplate
22

33
from codedog.templates import grimoire_en
44

55
TRANSLATE_PROMPT = PromptTemplate(
6-
template=grimoire_en.TRANSLATE_PR_REVIEW, input_variables=["language", "description", "content"]
6+
template=grimoire_en.TRANSLATE_PR_REVIEW,
7+
input_variables=["language", "description", "content"],
78
)

codedog/models/change_file.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,5 @@ class ChangeFile(BaseModel):
6262
diff_content: DiffContent = Field(default="", exclude=True)
6363
"""The diff content of this file."""
6464

65-
_raw: Optional[object] = Field(default=None, exclude=True)
65+
raw: Optional[object] = Field(default=None, exclude=True)
6666
"""Raw object generated by client api of this change file."""

codedog/models/commit.py

+3-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Any
22

3-
from pydantic import BaseModel, Field, validator
4-
from pydantic.fields import ModelField
3+
from pydantic import BaseModel, Field
54

65

76
class Commit(BaseModel):
@@ -15,15 +14,6 @@ class Commit(BaseModel):
1514
message: str = Field(default="")
1615
"""Commit message."""
1716

18-
_raw: object = Field(default=None, exclude=True)
17+
raw: object = Field(default=None, exclude=True)
18+
"""git commit raw object"""
1919
"""git commit raw object"""
20-
21-
@validator("*", pre=True, allow_reuse=True)
22-
def none_to_default(value: Any, field: ModelField):
23-
if value is not None:
24-
return value
25-
if field.default:
26-
return field.default
27-
if field.default_factory:
28-
return (field.default_factory)()
29-
raise ValueError(f"Field {field.name} is None.")

codedog/models/diff.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Optional
22

3-
from pydantic import BaseModel, Field
3+
from pydantic import BaseModel, ConfigDict, Field
44
from unidiff import PatchedFile
55

66

@@ -22,6 +22,8 @@ class DiffSegment(BaseModel):
2222

2323

2424
class DiffContent(BaseModel):
25+
model_config = ConfigDict(arbitrary_types_allowed=True)
26+
2527
add_count: int = Field()
2628
"""Added lines count."""
2729
remove_count: int = Field()

codedog/models/issue.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Any
22

3-
from pydantic import BaseModel, Field, validator
4-
from pydantic.fields import ModelField
3+
from pydantic import BaseModel, Field
54

65

76
class Issue(BaseModel):
@@ -15,15 +14,5 @@ class Issue(BaseModel):
1514
url: str = Field(default="")
1615
"""Issue url."""
1716

18-
_raw: object = Field(default=None, exclude=True)
17+
raw: object = Field(default=None, exclude=True)
1918
"""git issue raw object"""
20-
21-
@validator("*", pre=True, allow_reuse=True)
22-
def none_to_default(value: Any, field: ModelField):
23-
if value is not None or field.type_ not in [str, int]:
24-
return value
25-
if field.default:
26-
return field.default
27-
if field.default_factory:
28-
return (field.default_factory)()
29-
raise ValueError(f"Field {field.name} is None.")

codedog/models/pull_request.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Any
22

3-
from pydantic import BaseModel, Field, validator
4-
from pydantic.fields import ModelField
3+
from pydantic import BaseModel, Field
54

65
from codedog.models.change_file import ChangeFile
76
from codedog.models.issue import Issue
@@ -32,15 +31,5 @@ class PullRequest(BaseModel):
3231
"""git PR target repository"""
3332
source_repository: Repository = Field(default=None, exclude=True)
3433
"""git PR source repository"""
35-
_raw: object = Field(default=None, exclude=True)
34+
raw: object = Field(default=None, exclude=True)
3635
"""git PR raw object"""
37-
38-
@validator("*", pre=True, allow_reuse=True)
39-
def none_to_default(value: Any, field: ModelField):
40-
if value is not None or field.type_ not in [str, int, float, bool, list, dict]:
41-
return value
42-
if field.default:
43-
return field.default
44-
if field.default_factory:
45-
return (field.default_factory)()
46-
raise ValueError(f"Field {field.name} is None.")

codedog/models/repository.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Any
22

3-
from pydantic import BaseModel, Field, validator
4-
from pydantic.fields import ModelField
3+
from pydantic import BaseModel, Field
54

65

76
class Repository(BaseModel):
@@ -15,15 +14,5 @@ class Repository(BaseModel):
1514
repository_url: str = Field(default="")
1615
"""Repository url this pull request belongs to."""
1716

18-
_raw: object = Field(default=None, exclude=True)
17+
raw: object = Field(default=None, exclude=True)
1918
"""git repository raw object"""
20-
21-
@validator("*", pre=True, allow_reuse=True)
22-
def none_to_default(value: Any, field: ModelField):
23-
if value is not None or field.type_ not in [str, int, float, bool, list, dict]:
24-
return value
25-
if field.default:
26-
return field.default
27-
if field.default_factory:
28-
return (field.default_factory)()
29-
raise ValueError(f"Field {field.name} is None.")

0 commit comments

Comments
 (0)