remove spell checking feature

2023-11-16 21:48:41 +01:00 · 2023-11-16 21:48:41 +01:00 · 9bf8f7ab6c
parent 4cfaa86f04
commit 9bf8f7ab6c
8 changed files with 31 additions and 137 deletions
--- a/.gitignore
+++ b/.gitignore
@ -183,6 +183,6 @@ output/
 # VSCode
 .vscode/
-Jeffrey_Paul_Goldberg_CV.yaml
+
-pyvenv.cfg
+# Personal CVs
-bin/
+SinaAtalay_CV.yaml
--- a/README.md
+++ b/README.md
@ -58,7 +58,7 @@ cv:
          drawing using finite element analysis with
          open-source software called CalculiX.
 ```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
+- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
 - Then, it creates a $\LaTeX$ file.
 - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).
--- a/docs/index.md
+++ b/docs/index.md
@ -58,7 +58,7 @@ cv:
          drawing using finite element analysis with
          open-source software called CalculiX.
 ```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
+- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
 - Then, it creates a $\LaTeX$ file.
 - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,7 +13,6 @@ dependencies = [
    'pydantic-extra-types==2.1.0',
    'pydantic_core==2.10.1',
    'typing_extensions==4.8.0',
    'pyspellchecker==0.7.2',
    'ruamel.yaml==0.17.35',
    'email-validator==2.0.0.post2',
    'typer[all]==0.9.0',
--- a/rendercv/init.py
+++ b/rendercv/init.py
@ -1,7 +1,7 @@
 """RenderCV package.
-It parses the user input YAML/JSON file and validates the data (checking spelling
+It parses the user input YAML/JSON file and validates the data (checks if the
-mistakes, whether the dates are consistent, etc.). Then, with the data, it creates a
+dates are consistent, if the URLs are valid, etc.). Then, with the data, it creates a
 $\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/).
 """
 import logging
--- a/rendercv/data_model.py
+++ b/rendercv/data_model.py
@ -32,89 +32,10 @@ from pydantic.functional_validators import AfterValidator
 from pydantic_extra_types.phone_numbers import PhoneNumber
 from pydantic_extra_types.color import Color
 from ruamel.yaml import YAML
 from spellchecker import SpellChecker
 logger = logging.getLogger(__name__)
 # don't give spelling warnings for these words:
 dictionary = [
    "aerostructures",
    "sportsperson",
    "cern",
    "mechatronics",
    "calculix",
    "microcontroller",
    "ansys",
    "nx",
    "aselsan",
    "hrjet",
    "simularge",
    "siemens",
    "dynamometer",
    "dc",
    "grammarly",
    "css",
    "html",
    "markdown",
    "ubuntu",
    "matlab",
    "lua",
    "premake",
    "javascript",
 ]
 spell = SpellChecker()
 all_misspelled_words = set()
 def check_spelling(sentence: str) -> str:
    """Check the spelling of a sentence and give warnings if there are any misspelled
    words.
    It uses [pyspellchecker](https://github.com/barrust/pyspellchecker). It can also
    guess the correct version of the misspelled word, but it is not used because it is
    very slow.
    Example:
        ```python
        check_spelling("An interesting sentence is akways good.")
        ```
        will print the following warning:
        `WARNING - The word "akways" might be misspelled according to the pyspellchecker.`
    Args:
        sentence (str): The sentence to check.
    Returns:
        str: The same sentence.
    """
    modified_sentence = sentence.lower()  # convert to lower case
    modified_sentence = re.sub(
        r"\-+", " ", modified_sentence
    )  # replace hyphens with spaces
    modified_sentence = re.sub(
        r"[^a-z\s\-']", "", modified_sentence
    )  # remove all the special characters
    words = modified_sentence.split()  # split sentence into a list of words
    misspelled = spell.unknown(words)  # find misspelled words
    if len(misspelled) > 0:
        for word in misspelled:
            if len(word) == 1:
                continue
            # for each misspelled word, check if it is in the dictionary and otherwise
            # give a warning
            if word in dictionary:
                continue
            all_misspelled_words.add(word)
    return sentence
 def escape_latex_characters(sentence: str) -> str:
    """Escape LaTeX characters in a sentence.
@ -129,18 +50,29 @@ def escape_latex_characters(sentence: str) -> str:
    # Dictionary of escape characters:
    escape_characters = {
        "#": r"\#",
-        "$": r"\$",
+        # "$": r"\$", # Don't escape $ as it is used for math mode
        "%": r"\%",
        "&": r"\&",
        "~": r"\textasciitilde{}",
-        "_": r"\_",
+        # "_": r"\_", # Don't escape _ as it is used for math mode
-        "^": r"\textasciicircum{}",
+        # "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
    }
    # Don't escape links as hyperref will do it automatically:
    # Find all the links in the sentence:
    links = re.findall(r"\[.*?\]\(.*?\)", sentence)
    # Replace the links with a placeholder:
    for link in links:
        sentence = sentence.replace(link, "!!-link-!!")
    # Handle backslash and curly braces separately because the other characters are
    # escaped with backslash and curly braces:
    sentence = sentence.replace("{", ">>{")
    sentence = sentence.replace("}", ">>}")
-    sentence = sentence.replace("\\", "\\textbackslash{}")
+    # don't escape backslash as it is used heavily in LaTeX:
    # sentence = sentence.replace("\\", "\\textbackslash{}")
    sentence = sentence.replace(">>{", "\\{")
    sentence = sentence.replace(">>}", "\\}")
@ -151,6 +83,10 @@ def escape_latex_characters(sentence: str) -> str:
        if character in escape_characters:
            sentence = sentence.replace(character, escape_characters[character])
    # Replace the links with the original links:
    for link in links:
        sentence = sentence.replace("!!-link-!!", link)
    return sentence
@ -641,7 +577,6 @@ class Design(BaseModel):
 # ======================================================================================
 LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
 SpellCheckedString = Annotated[LaTeXString, AfterValidator(check_spelling)]
 PastDate = Annotated[
    str,
    Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
@ -683,7 +618,7 @@ class Event(BaseModel):
        ),
        examples=["2020-09-24", "My Custom Date"],
    )
-    highlights: Optional[list[SpellCheckedString]] = Field(
+    highlights: Optional[list[LaTeXString]] = Field(
        default=[],
        title="Highlights",
        description=(
@ -850,7 +785,7 @@ class Event(BaseModel):
    @computed_field
    @cached_property
-    def highlight_strings(self) -> list[SpellCheckedString]:
+    def highlight_strings(self) -> list[LaTeXString]:
        highlight_strings = []
        if self.highlights is not None:
            highlight_strings.extend(self.highlights)
@ -906,7 +841,7 @@ class OneLineEntry(Event):
        title="Name",
        description="The name of the entry. It will be shown as bold text.",
    )
-    details: SpellCheckedString = Field(
+    details: LaTeXString = Field(
        title="Details",
        description="The details of the entry. It will be shown as normal text.",
    )
@ -968,7 +903,7 @@ class EducationEntry(Event):
    @computed_field
    @cached_property
-    def highlight_strings(self) -> list[SpellCheckedString]:
+    def highlight_strings(self) -> list[LaTeXString]:
        highlight_strings = []
        if self.gpa is not None:
@ -1386,35 +1321,6 @@ class CurriculumVitae(BaseModel):
        return model
    @model_validator(mode="after")
    @classmethod
    def print_all_the_misspeled_words(cls, model):
        """Print all the words that are misspelled according to pyspellchecker."""
        if len(all_misspelled_words) > 0:
            messages = []
            messages.append(
                "The following words might be misspelled (according to pyspellchecker):"
            )
            misspelled_words = list(all_misspelled_words)
            # Make misspeled_words a list of lists where each list contains 5:
            misspelled_words = [
                misspelled_words[i : i + 5] for i in range(0, len(misspelled_words), 5)
            ]
            # Join the words in each list with a comma, and join the lists with a new
            # line:
            misspelled_words = "\n  ".join(
                [", ".join(words) for words in misspelled_words]
            )
            messages.append(f"  {misspelled_words}")
            # Print the messages:
            logger.warning("\n".join(messages))
        return model
    @computed_field
    @cached_property
    def connections(self) -> list[Connection]:
--- a/run_rendercv.py
+++ b/run_rendercv.py
@ -2,7 +2,7 @@ from rendercv.__main__ import render
 from rendercv.data_model import generate_json_schema
 import os
-input_file_path = "John_Doe_CV.yaml"
+input_file_path = "SinaAtalay_CV.yaml"
 render(input_file_path) # type: ignore
 # This script is equivalent to running the following command in the terminal:
--- a/tests/test_data_model.py
+++ b/tests/test_data_model.py
@ -9,17 +9,6 @@ from pydantic import ValidationError, HttpUrl
 class TestDataModel(unittest.TestCase):
    def test_check_spelling(self):
        sentences = [
            "This is a sentence.",
            "This is a sentance with special characters &@#&^@*#&)((!@#_)()).",
            r"12312309 Thisdf sdfsd is a sentence *safds\{\}[[[]]]",
        ]
        for sentence in sentences:
            with self.subTest(sentence=sentence):
                data_model.check_spelling(sentence)
    def test_escape_latex_characters(self):
        str_without_latex_characters = "This is a string without LaTeX characters."
        expected = str_without_latex_characters