18
18
*/
19
19
package co .elastic .clients .rag .article ;
20
20
21
- import org .apache .tika .exception .TikaException ;
22
- import org .apache .tika .metadata .Metadata ;
23
- import org .apache .tika .parser .AutoDetectParser ;
24
- import org .apache .tika .parser .ParseContext ;
25
- import org .apache .tika .parser .Parser ;
26
- import org .apache .tika .parser .pdf .PDFParserConfig ;
27
- import org .springframework .ai .chat .messages .Message ;
28
- import org .springframework .ai .chat .messages .UserMessage ;
29
- import org .springframework .ai .chat .model .ChatModel ;
30
- import org .springframework .ai .chat .model .ChatResponse ;
31
- import org .springframework .ai .chat .prompt .Prompt ;
32
- import org .springframework .ai .chat .prompt .SystemPromptTemplate ;
21
+ import org .springframework .ai .chat .client .ChatClient ;
33
22
import org .springframework .ai .document .Document ;
23
+ import org .springframework .ai .reader .pdf .PagePdfDocumentReader ;
34
24
import org .springframework .ai .transformer .splitter .TokenTextSplitter ;
35
25
import org .springframework .ai .vectorstore .ElasticsearchVectorStore ;
36
26
import org .springframework .ai .vectorstore .SearchRequest ;
37
- import org .springframework .beans .factory .annotation .Autowired ;
38
27
import org .springframework .stereotype .Service ;
39
- import org .xml .sax .SAXException ;
40
28
41
- import java .io .FileInputStream ;
42
- import java .io .IOException ;
43
- import java .util .ArrayList ;
44
- import java .util .HashMap ;
45
29
import java .util .List ;
46
- import java .util .Map ;
47
30
import java .util .stream .Collectors ;
48
31
49
32
@ Service
50
33
public class RagService {
51
34
35
+ // Both beans autowired from default configuration
52
36
private ElasticsearchVectorStore vectorStore ;
53
- private ChatModel chatModel ;
37
+ private ChatClient chatClient ;
54
38
55
- @ Autowired
56
- public RagService (ElasticsearchVectorStore vectorStore , ChatModel model ) {
39
+ public RagService (ElasticsearchVectorStore vectorStore , ChatClient .Builder clientBuilder ) {
57
40
this .vectorStore = vectorStore ;
58
- this .chatModel = model ;
41
+ this .chatClient = clientBuilder . build () ;
59
42
}
60
43
61
- public void ingestPDF (String path ) throws IOException , TikaException , SAXException {
62
- // Initializing the PDF parser
63
- // Keep in mind that AutoDetectParser is not thread safe
64
- Parser parser = new AutoDetectParser ();
65
- // Using our custom single page handler class
66
- PageContentHandler handler = new PageContentHandler ();
44
+ public void ingestPDF (String path ) {
67
45
68
- // No need for any other specific PDF configuration
69
- ParseContext parseContext = new ParseContext ();
70
- parseContext .set (PDFParserConfig .class , new PDFParserConfig ());
71
-
72
- // The metadata contain information such as creation date, creation tool used, etc... which we
73
- // don't need
74
- Metadata metadata = new Metadata ();
75
-
76
- // Reading the file
77
- try (FileInputStream stream = new FileInputStream (path )) {
78
- parser .parse (stream , handler , metadata , parseContext );
79
- }
80
-
81
- // Getting the result as a list of Strings with the content of the pages
82
- List <String > allPages = handler .getPages ();
83
- List <Document > docbatch = new ArrayList <>();
84
-
85
- // Converting pages to Documents
86
- for (int i = 0 ; i < allPages .size (); i ++) {
87
- Map <String , Object > docMetadata = new HashMap <>();
88
- // The page number will be used in the response
89
- docMetadata .put ("page" , i + 1 );
90
-
91
- Document doc = new Document (allPages .get (i ), docMetadata );
92
- docbatch .add (doc );
93
- }
46
+ // Spring AI utility class to read a PDF file page by page
47
+ PagePdfDocumentReader pdfReader = new PagePdfDocumentReader (path );
48
+ List <Document > docbatch = pdfReader .read ();
94
49
95
50
// Sending batch of documents to vector store
96
51
// applying tokenizer
@@ -109,31 +64,31 @@ public String queryLLM(String question) {
109
64
.map (Document ::getContent )
110
65
.collect (Collectors .joining (System .lineSeparator ()));
111
66
112
- // Setting the prompt
113
- String basePrompt = """
67
+ // Setting the prompt with the context
68
+ String prompt = """
114
69
You're assisting with providing the rules of the tabletop game Runewars.
115
- Use the information from the DOCUMENTS section to provide accurate answers.
70
+ Use the information from the DOCUMENTS section to provide accurate answers to the
71
+ question in the QUESTION section.
116
72
If unsure, simply state that you don't know.
117
73
118
74
DOCUMENTS:
119
- {documents}
120
- """ ;
121
-
122
- // Preparing the question for the LLM
123
- SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate (basePrompt );
124
- Message systemMessage = systemPromptTemplate .createMessage (Map .of ("documents" , documents ));
75
+ """ + documents
76
+ + """
77
+ QUESTION:
78
+ """ + question ;
125
79
126
- UserMessage userMessage = new UserMessage (question );
127
80
128
- Prompt prompt = new Prompt (List .of (systemMessage , userMessage ));
129
81
// Calling the chat model with the question
130
- ChatResponse response = chatModel .call (prompt );
82
+ String response = chatClient .prompt ()
83
+ .user (prompt )
84
+ .call ()
85
+ .content ();
131
86
132
- return response . getResult (). getOutput (). getContent () +
87
+ return response +
133
88
System .lineSeparator () +
134
89
"Found at page: " +
135
90
// Retrieving the first ranked page number from the document metadata
136
- vectorStoreResult .get (0 ).getMetadata ().get ("page" ) +
91
+ vectorStoreResult .get (0 ).getMetadata ().get (PagePdfDocumentReader . METADATA_START_PAGE_NUMBER ) +
137
92
" of the manual" ;
138
93
}
139
94
}
0 commit comments