remove spell checking feature

2023-11-16 21:48:41 +01:00 · 2023-11-16 21:48:41 +01:00 · 9bf8f7ab6c
parent 4cfaa86f04
commit 9bf8f7ab6c
8 changed files with 31 additions and 137 deletions
--- a/.gitignore
+++ b/.gitignore
@ -183,6 +183,6 @@ output/

 # VSCode
 .vscode/
-Jeffrey_Paul_Goldberg_CV.yaml
-pyvenv.cfg
-bin/
+
+# Personal CVs
+SinaAtalay_CV.yaml
--- a/README.md
+++ b/README.md
@ -58,7 +58,7 @@ cv:
          drawing using finite element analysis with
          open-source software called CalculiX.
 ```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
+- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
 - Then, it creates a $\LaTeX$ file.
 - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).

--- a/docs/index.md
+++ b/docs/index.md
@ -58,7 +58,7 @@ cv:
          drawing using finite element analysis with
          open-source software called CalculiX.
 ```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
+- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
 - Then, it creates a $\LaTeX$ file.
 - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).

--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,7 +13,6 @@ dependencies = [
    'pydantic-extra-types==2.1.0',
    'pydantic_core==2.10.1',
    'typing_extensions==4.8.0',
-    'pyspellchecker==0.7.2',
    'ruamel.yaml==0.17.35',
    'email-validator==2.0.0.post2',
    'typer[all]==0.9.0',
--- a/rendercv/init.py
+++ b/rendercv/init.py
@ -1,7 +1,7 @@
 """RenderCV package.

-It parses the user input YAML/JSON file and validates the data (checking spelling
-mistakes, whether the dates are consistent, etc.). Then, with the data, it creates a
+It parses the user input YAML/JSON file and validates the data (checks if the
+dates are consistent, if the URLs are valid, etc.). Then, with the data, it creates a
 $\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/).
 """
 import logging
--- a/rendercv/data_model.py
+++ b/rendercv/data_model.py
@ -32,89 +32,10 @@ from pydantic.functional_validators import AfterValidator
 from pydantic_extra_types.phone_numbers import PhoneNumber
 from pydantic_extra_types.color import Color
 from ruamel.yaml import YAML
-from spellchecker import SpellChecker

 logger = logging.getLogger(__name__)


-# don't give spelling warnings for these words:
-dictionary = [
-    "aerostructures",
-    "sportsperson",
-    "cern",
-    "mechatronics",
-    "calculix",
-    "microcontroller",
-    "ansys",
-    "nx",
-    "aselsan",
-    "hrjet",
-    "simularge",
-    "siemens",
-    "dynamometer",
-    "dc",
-    "grammarly",
-    "css",
-    "html",
-    "markdown",
-    "ubuntu",
-    "matlab",
-    "lua",
-    "premake",
-    "javascript",
-]
-spell = SpellChecker()
-all_misspelled_words = set()
-
-
-def check_spelling(sentence: str) -> str:
-    """Check the spelling of a sentence and give warnings if there are any misspelled
-    words.
-
-    It uses [pyspellchecker](https://github.com/barrust/pyspellchecker). It can also
-    guess the correct version of the misspelled word, but it is not used because it is
-    very slow.
-
-    Example:
-        ```python
-        check_spelling("An interesting sentence is akways good.")
-        ```
-
-        will print the following warning:
-
-        `WARNING - The word "akways" might be misspelled according to the pyspellchecker.`
-
-    Args:
-        sentence (str): The sentence to check.
-
-    Returns:
-        str: The same sentence.
-    """
-    modified_sentence = sentence.lower()  # convert to lower case
-    modified_sentence = re.sub(
-        r"\-+", " ", modified_sentence
-    )  # replace hyphens with spaces
-    modified_sentence = re.sub(
-        r"[^a-z\s\-']", "", modified_sentence
-    )  # remove all the special characters
-    words = modified_sentence.split()  # split sentence into a list of words
-    misspelled = spell.unknown(words)  # find misspelled words
-
-    if len(misspelled) > 0:
-        for word in misspelled:
-            if len(word) == 1:
-                continue
-
-            # for each misspelled word, check if it is in the dictionary and otherwise
-            # give a warning
-            if word in dictionary:
-                continue
-
-            all_misspelled_words.add(word)
-
-    return sentence
-
-
 def escape_latex_characters(sentence: str) -> str:
    """Escape LaTeX characters in a sentence.

@ -129,18 +50,29 @@ def escape_latex_characters(sentence: str) -> str:
    # Dictionary of escape characters:
    escape_characters = {
        "#": r"\#",
-        "$": r"\$",
+        # "$": r"\$", # Don't escape $ as it is used for math mode
        "%": r"\%",
        "&": r"\&",
        "~": r"\textasciitilde{}",
-        "_": r"\_",
-        "^": r"\textasciicircum{}",
+        # "_": r"\_", # Don't escape _ as it is used for math mode
+        # "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
    }
+
+    # Don't escape links as hyperref will do it automatically:
+
+    # Find all the links in the sentence:
+    links = re.findall(r"\[.*?\]\(.*?\)", sentence)
+
+    # Replace the links with a placeholder:
+    for link in links:
+        sentence = sentence.replace(link, "!!-link-!!")
+
    # Handle backslash and curly braces separately because the other characters are
    # escaped with backslash and curly braces:
    sentence = sentence.replace("{", ">>{")
    sentence = sentence.replace("}", ">>}")
-    sentence = sentence.replace("\\", "\\textbackslash{}")
+    # don't escape backslash as it is used heavily in LaTeX:
+    # sentence = sentence.replace("\\", "\\textbackslash{}")
    sentence = sentence.replace(">>{", "\\{")
    sentence = sentence.replace(">>}", "\\}")

@ -151,6 +83,10 @@ def escape_latex_characters(sentence: str) -> str:
        if character in escape_characters:
            sentence = sentence.replace(character, escape_characters[character])

+    # Replace the links with the original links:
+    for link in links:
+        sentence = sentence.replace("!!-link-!!", link)
+
    return sentence


@ -641,7 +577,6 @@ class Design(BaseModel):
 # ======================================================================================

 LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
-SpellCheckedString = Annotated[LaTeXString, AfterValidator(check_spelling)]
 PastDate = Annotated[
    str,
    Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
@ -683,7 +618,7 @@ class Event(BaseModel):
        ),
        examples=["2020-09-24", "My Custom Date"],
    )
-    highlights: Optional[list[SpellCheckedString]] = Field(
+    highlights: Optional[list[LaTeXString]] = Field(
        default=[],
        title="Highlights",
        description=(
@ -850,7 +785,7 @@ class Event(BaseModel):

    @computed_field
    @cached_property
-    def highlight_strings(self) -> list[SpellCheckedString]:
+    def highlight_strings(self) -> list[LaTeXString]:
        highlight_strings = []
        if self.highlights is not None:
            highlight_strings.extend(self.highlights)
@ -906,7 +841,7 @@ class OneLineEntry(Event):
        title="Name",
        description="The name of the entry. It will be shown as bold text.",
    )
-    details: SpellCheckedString = Field(
+    details: LaTeXString = Field(
        title="Details",
        description="The details of the entry. It will be shown as normal text.",
    )
@ -968,7 +903,7 @@ class EducationEntry(Event):

    @computed_field
    @cached_property
-    def highlight_strings(self) -> list[SpellCheckedString]:
+    def highlight_strings(self) -> list[LaTeXString]:
        highlight_strings = []

        if self.gpa is not None:
@ -1386,35 +1321,6 @@ class CurriculumVitae(BaseModel):

        return model

-    @model_validator(mode="after")
-    @classmethod
-    def print_all_the_misspeled_words(cls, model):
-        """Print all the words that are misspelled according to pyspellchecker."""
-        if len(all_misspelled_words) > 0:
-            messages = []
-            messages.append(
-                "The following words might be misspelled (according to pyspellchecker):"
-            )
-
-            misspelled_words = list(all_misspelled_words)
-
-            # Make misspeled_words a list of lists where each list contains 5:
-            misspelled_words = [
-                misspelled_words[i : i + 5] for i in range(0, len(misspelled_words), 5)
-            ]
-
-            # Join the words in each list with a comma, and join the lists with a new
-            # line:
-            misspelled_words = "\n  ".join(
-                [", ".join(words) for words in misspelled_words]
-            )
-            messages.append(f"  {misspelled_words}")
-
-            # Print the messages:
-            logger.warning("\n".join(messages))
-
-        return model
-
    @computed_field
    @cached_property
    def connections(self) -> list[Connection]:
--- a/run_rendercv.py
+++ b/run_rendercv.py
@ -2,7 +2,7 @@ from rendercv.__main__ import render
 from rendercv.data_model import generate_json_schema
 import os

-input_file_path = "John_Doe_CV.yaml"
+input_file_path = "SinaAtalay_CV.yaml"
 render(input_file_path) # type: ignore

 # This script is equivalent to running the following command in the terminal:
--- a/tests/test_data_model.py
+++ b/tests/test_data_model.py
@ -9,17 +9,6 @@ from pydantic import ValidationError, HttpUrl


 class TestDataModel(unittest.TestCase):
-    def test_check_spelling(self):
-        sentences = [
-            "This is a sentence.",
-            "This is a sentance with special characters &@#&^@*#&)((!@#_)()).",
-            r"12312309 Thisdf sdfsd is a sentence *safds\{\}[[[]]]",
-        ]
-
-        for sentence in sentences:
-            with self.subTest(sentence=sentence):
-                data_model.check_spelling(sentence)
-
    def test_escape_latex_characters(self):
        str_without_latex_characters = "This is a string without LaTeX characters."
        expected = str_without_latex_characters