update readme and readibility of the code

VinciGit00 · VinciGit00 · commit e017ee21f3ba · 2024-10-02T10:07:03.000+02:00
diff --git a/README.md b/README.md
@@ -113,7 +113,6 @@ The output will be a dictionary like the following:
     "contact_email": "contact@scrapegraphai.com"
 }
 ```
-
 There are other pipelines that can be used to extract information from multiple pages, generate Python scripts, or even generate audio files.
 
 | Pipeline Name           | Description                                                                                                      |
@@ -125,6 +124,8 @@ There are other pipelines that can be used to extract information from multiple
 | SmartScraperMultiGraph  | Multi-page scraper that extracts information from multiple pages given a single prompt and a list of sources.    |
 | ScriptCreatorMultiGraph | Multi-page scraper that generates a Python script for extracting information from multiple pages and sources.     |
 
+For each of these graphs there is the multi version. It allows to make calls of the LLM in parallel.
+
 It is possible to use different LLM through APIs, such as **OpenAI**, **Groq**, **Azure** and **Gemini**, or local models using **Ollama**.
 
 Remember to have [Ollama](https://ollama.com/) installed and download the models using the **ollama pull** command, if you want to use local models.
@@ -167,34 +168,6 @@ Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegra
 [![My Skills](https://skillicons.dev/icons?i=linkedin)](https://www.linkedin.com/company/scrapegraphai/)
 [![My Skills](https://skillicons.dev/icons?i=twitter)](https://twitter.com/scrapegraphai)
 
-## &#128506;&#65039; Roadmap
-
-We are working on the following features! If you are interested in collaborating right-click on the feature and open in a new tab to file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github!
-
-```mermaid
-%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#5C4B9B', 'edgeLabelBackground':'#ffffff', 'tertiaryColor': '#ffffff', 'primaryBorderColor': '#5C4B9B', 'fontFamily': 'Arial', 'fontSize': '16px', 'textColor': '#5C4B9B' }}}%%
-graph LR
-    A[DeepSearch Graph] --> F[Use Existing Chromium Instances]
-    F --> B[Page Caching]
-    B --> C[Screenshot Scraping]
-    C --> D[Handle Dynamic Content]
-    D --> E[New Webdrivers]
-
-    style A fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
-    style F fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
-    style B fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
-    style C fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
-    style D fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
-    style E fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10
-
-    click A href "https://github.com/VinciGit00/Scrapegraph-ai/issues/260" "Open DeepSearch Graph Issue"
-    click F href "https://github.com/VinciGit00/Scrapegraph-ai/issues/329" "Open Chromium Instances Issue"
-    click B href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Page Caching Issue"
-    click C href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Screenshot Scraping Issue"
-    click D href "https://github.com/VinciGit00/Scrapegraph-ai/issues/279" "Open Handle Dynamic Content Issue"
-    click E href "https://github.com/VinciGit00/Scrapegraph-ai/issues/171" "Open New Webdrivers Issue"
-```
-
 ## &#128200; Telemetry 
 We collect anonymous usage metrics to enhance our package's quality and user experience. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. For more information, please refer to the documentation [here](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html).
 
diff --git a/scrapegraphai/utils/cleanup_code.py b/scrapegraphai/utils/cleanup_code.py
@@ -4,6 +4,9 @@
 import re
 
 def extract_code(code: str) -> str:
+    """
+    Module for extracting code 
+    """
     pattern = r'```(?:python)?\n(.*?)```'
 
     match = re.search(pattern, code, re.DOTALL)
diff --git a/scrapegraphai/utils/cleanup_html.py b/scrapegraphai/utils/cleanup_html.py
@@ -101,7 +101,7 @@ def reduce_html(html, reduction):
         for attr in list(tag.attrs):
             if attr not in attrs_to_keep:
                 del tag[attr]
-   
+
     if reduction == 1:
         return minify_html(str(soup))
 
diff --git a/scrapegraphai/utils/code_error_analysis.py b/scrapegraphai/utils/code_error_analysis.py
@@ -2,24 +2,27 @@
 This module contains the functions that are used to generate the prompts for the code error analysis.
 """
 from typing import Any, Dict
+import json
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
-import json
 from ..prompts import (
     TEMPLATE_SYNTAX_ANALYSIS, TEMPLATE_EXECUTION_ANALYSIS,
     TEMPLATE_VALIDATION_ANALYSIS, TEMPLATE_SEMANTIC_ANALYSIS
 )
 
 def syntax_focused_analysis(state: dict, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_SYNTAX_ANALYSIS, input_variables=["generated_code", "errors"])
+    prompt = PromptTemplate(template=TEMPLATE_SYNTAX_ANALYSIS,
+                            input_variables=["generated_code", "errors"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "generated_code": state["generated_code"],
         "errors": state["errors"]["syntax"]
     })
 
 def execution_focused_analysis(state: dict, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_EXECUTION_ANALYSIS, input_variables=["generated_code", "errors", "html_code", "html_analysis"])
+    prompt = PromptTemplate(template=TEMPLATE_EXECUTION_ANALYSIS,
+                            input_variables=["generated_code", "errors",
+                                              "html_code", "html_analysis"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "generated_code": state["generated_code"],
@@ -29,7 +32,9 @@ def execution_focused_analysis(state: dict, llm_model) -> str:
     })
 
 def validation_focused_analysis(state: dict, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_VALIDATION_ANALYSIS, input_variables=["generated_code", "errors", "json_schema", "execution_result"])
+    prompt = PromptTemplate(template=TEMPLATE_VALIDATION_ANALYSIS,
+                            input_variables=["generated_code", "errors", 
+                                             "json_schema", "execution_result"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "generated_code": state["generated_code"],
@@ -39,7 +44,9 @@ def validation_focused_analysis(state: dict, llm_model) -> str:
     })
 
 def semantic_focused_analysis(state: dict, comparison_result: Dict[str, Any], llm_model) -> str:        
-    prompt = PromptTemplate(template=TEMPLATE_SEMANTIC_ANALYSIS, input_variables=["generated_code", "differences", "explanation"])
+    prompt = PromptTemplate(template=TEMPLATE_SEMANTIC_ANALYSIS,
+                            input_variables=["generated_code", 
+                                             "differences", "explanation"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "generated_code": state["generated_code"],
diff --git a/scrapegraphai/utils/code_error_correction.py b/scrapegraphai/utils/code_error_correction.py
@@ -10,32 +10,38 @@
 )
 
 def syntax_focused_code_generation(state: dict, analysis: str, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_SYNTAX_CODE_GENERATION, input_variables=["analysis", "generated_code"])
+    prompt = PromptTemplate(template=TEMPLATE_SYNTAX_CODE_GENERATION,
+                            input_variables=["analysis", "generated_code"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "analysis": analysis,
         "generated_code": state["generated_code"]
     })
 
 def execution_focused_code_generation(state: dict, analysis: str, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_EXECUTION_CODE_GENERATION, input_variables=["analysis", "generated_code"])
+    prompt = PromptTemplate(template=TEMPLATE_EXECUTION_CODE_GENERATION,
+                            input_variables=["analysis", "generated_code"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "analysis": analysis,
         "generated_code": state["generated_code"]
     })
 
 def validation_focused_code_generation(state: dict, analysis: str, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_VALIDATION_CODE_GENERATION, input_variables=["analysis", "generated_code", "json_schema"])
+    prompt = PromptTemplate(template=TEMPLATE_VALIDATION_CODE_GENERATION,
+                            input_variables=["analysis", "generated_code",
+                                             "json_schema"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "analysis": analysis,
         "generated_code": state["generated_code"],
         "json_schema": state["json_schema"]
     })
-    
+
 def semantic_focused_code_generation(state: dict, analysis: str, llm_model) -> str:
-    prompt = PromptTemplate(template=TEMPLATE_SEMANTIC_CODE_GENERATION, input_variables=["analysis", "generated_code", "generated_result", "reference_result"])
+    prompt = PromptTemplate(template=TEMPLATE_SEMANTIC_CODE_GENERATION,
+                            input_variables=["analysis", "generated_code",
+                                             "generated_result", "reference_result"])
     chain = prompt | llm_model | StrOutputParser()
     return chain.invoke({
         "analysis": analysis,