Skip to content

Commit 6b14a79

Browse files
committed
docs: add simplar chunk logic
1 parent d68e37a commit 6b14a79

File tree

1 file changed

+122
-0
lines changed

1 file changed

+122
-0
lines changed

README.md

+122
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,128 @@ AutoDev 早期采用的是 OpenAI API,其模型能力较强,因此在指令
684684
685685
而基于 ArchGuard 中所提供的丰富代码质量和架构质量分析能力,诸如 OpenAPI、 SCA(软件依赖分析)能力,我们也在思考未来是否也加入相关的设计。
686686
687+
```kotlin
688+
val codeDir = GitUtil
689+
.checkoutCode(config.url, config.branch, tempGitDir, config.gitDepth)
690+
.toFile().canonicalFile
691+
692+
logger.info("start walk $codeDir")
693+
694+
val languageWorker = LanguageWorker()
695+
val workerManager = WorkerManager(
696+
WorkerContext(
697+
config.codeContextStrategies,
698+
config.codeQualityTypes,
699+
config.insOutputConfig,
700+
pureDataFileName = config.pureDataFileName(),
701+
config.completionTypes,
702+
config.maxCompletionEachFile,
703+
config.completionTypeSize,
704+
qualityThreshold = InsQualityThreshold(
705+
complexity = InsQualityThreshold.MAX_COMPLEXITY,
706+
fileSize = InsQualityThreshold.MAX_FILE_SIZE,
707+
maxLineInCode = config.maxLineInCode,
708+
maxCharInCode = config.maxCharInCode,
709+
maxTokenLength = config.maxTokenLength,
710+
)
711+
)
712+
)
713+
workerManager.init(codeDir, config.language)
714+
```
715+
716+
随后是根据不同的质量门禁,来进行不同的质量检查:
717+
718+
```kotlin
719+
fun filterByThreshold(job: InstructionFileJob) {
720+
val summary = job.fileSummary
721+
if (!supportedExtensions.contains(summary.extension)) {
722+
return
723+
}
724+
725+
// limit by complexity
726+
if (summary.complexity > context.qualityThreshold.complexity) {
727+
logger.info("skip file ${summary.location} for complexity ${summary.complexity}")
728+
return
729+
}
730+
731+
// like js minified file
732+
if (summary.binary || summary.generated || summary.minified) {
733+
return
734+
}
735+
736+
// if the file size is too large, we just try 64k
737+
if (summary.bytes > context.qualityThreshold.fileSize) {
738+
logger.info("skip file ${summary.location} for size ${summary.bytes}")
739+
return
740+
}
741+
742+
// limit by token length
743+
val encoded = enc.encode(job.code)
744+
val length = encoded.size
745+
if (length > context.qualityThreshold.maxTokenLength) {
746+
logger.info("skip file ${summary.location} for over ${context.qualityThreshold.maxTokenLength} tokens")
747+
println("| filename: ${summary.filename} | tokens: $length | complexity: ${summary.complexity} | code: ${summary.lines} | size: ${summary.bytes} | location: ${summary.location} |")
748+
return
749+
}
750+
751+
val language = SupportedLang.from(summary.language)
752+
val worker = workers[language] ?: return
753+
worker.addJob(job)
754+
}
755+
```
756+
757+
在过虑之后,我们就可以由不同语言的 Worker 来进行处理,诸如 JavaWorker、PythonWorker 等。
758+
759+
```kotlin
760+
val lists = jobs.map { job ->
761+
val jobContext = JobContext(
762+
job,
763+
context.qualityTypes,
764+
fileTree,
765+
context.insOutputConfig,
766+
context.completionTypes,
767+
context.maxCompletionInOneFile,
768+
project = ProjectContext(
769+
compositionDependency = context.compositionDependency,
770+
),
771+
context.qualityThreshold
772+
)
773+
774+
context.codeContextStrategies.map { type ->
775+
val codeStrategyBuilder = type.builder(jobContext)
776+
codeStrategyBuilder.build()
777+
}.flatten()
778+
}.flatten()
779+
```
780+
781+
根据用户选择的上下文策略,我们就可以构建出不同的上下文,如:相关上下文、相似上下文等
782+
783+
SimilarChunksStrategyBuilder 的主要逻辑:
784+
785+
1. 使用配置中指定的规则检查以识别存在问题的数据结构。
786+
2. 收集所有具有相似数据结构的数据结构。
787+
3. 为每个被识别的数据结构中的函数构建完成生成器。
788+
4. 过滤掉具有空的前置和后置光标的完成生成器。
789+
5. 使用JavaSimilarChunker计算块补全的相似块。
790+
6. 为每个完成生成器创建SimilarChunkIns对象,包括语言、前置光标、相似块、后置光标、输出和类型的相关信息。
791+
7. 返回生成的SimilarChunkIns对象的列表。
792+
793+
在规则检查里,我们可以通过不同的规则来检查不同的代码质量问题,如:代码坏味道、测试坏味道、API 设计味道等。
794+
795+
```kotlin
796+
fun create(types: List<CodeQualityType>, thresholds: Map<String, Int> = mapOf()): List<QualityAnalyser> {
797+
return types.map { type ->
798+
when (type) {
799+
CodeQualityType.BadSmell -> BadsmellAnalyser(thresholds)
800+
CodeQualityType.TestBadSmell -> TestBadsmellAnalyser(thresholds)
801+
CodeQualityType.JavaController -> JavaControllerAnalyser(thresholds)
802+
CodeQualityType.JavaRepository -> JavaRepositoryAnalyser(thresholds)
803+
CodeQualityType.JavaService -> JavaServiceAnalyser(thresholds)
804+
}
805+
}
806+
}
807+
```
808+
687809
## 附:相关资源
688810
689811
TODO

0 commit comments

Comments
 (0)