remove spell checking feature

This commit is contained in:
Sina Atalay 2023-11-16 21:48:41 +01:00 committed by Jeffrey Goldberg
parent 4cfaa86f04
commit 9bf8f7ab6c
8 changed files with 31 additions and 137 deletions

6
.gitignore vendored
View File

@ -183,6 +183,6 @@ output/
# VSCode # VSCode
.vscode/ .vscode/
Jeffrey_Paul_Goldberg_CV.yaml
pyvenv.cfg # Personal CVs
bin/ SinaAtalay_CV.yaml

View File

@ -58,7 +58,7 @@ cv:
drawing using finite element analysis with drawing using finite element analysis with
open-source software called CalculiX. open-source software called CalculiX.
``` ```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc. - Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
- Then, it creates a $\LaTeX$ file. - Then, it creates a $\LaTeX$ file.
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/). - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).

View File

@ -58,7 +58,7 @@ cv:
drawing using finite element analysis with drawing using finite element analysis with
open-source software called CalculiX. open-source software called CalculiX.
``` ```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc. - Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
- Then, it creates a $\LaTeX$ file. - Then, it creates a $\LaTeX$ file.
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/). - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).

View File

@ -13,7 +13,6 @@ dependencies = [
'pydantic-extra-types==2.1.0', 'pydantic-extra-types==2.1.0',
'pydantic_core==2.10.1', 'pydantic_core==2.10.1',
'typing_extensions==4.8.0', 'typing_extensions==4.8.0',
'pyspellchecker==0.7.2',
'ruamel.yaml==0.17.35', 'ruamel.yaml==0.17.35',
'email-validator==2.0.0.post2', 'email-validator==2.0.0.post2',
'typer[all]==0.9.0', 'typer[all]==0.9.0',

View File

@ -1,7 +1,7 @@
"""RenderCV package. """RenderCV package.
It parses the user input YAML/JSON file and validates the data (checking spelling It parses the user input YAML/JSON file and validates the data (checks if the
mistakes, whether the dates are consistent, etc.). Then, with the data, it creates a dates are consistent, if the URLs are valid, etc.). Then, with the data, it creates a
$\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/). $\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/).
""" """
import logging import logging

View File

@ -32,89 +32,10 @@ from pydantic.functional_validators import AfterValidator
from pydantic_extra_types.phone_numbers import PhoneNumber from pydantic_extra_types.phone_numbers import PhoneNumber
from pydantic_extra_types.color import Color from pydantic_extra_types.color import Color
from ruamel.yaml import YAML from ruamel.yaml import YAML
from spellchecker import SpellChecker
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# don't give spelling warnings for these words:
dictionary = [
"aerostructures",
"sportsperson",
"cern",
"mechatronics",
"calculix",
"microcontroller",
"ansys",
"nx",
"aselsan",
"hrjet",
"simularge",
"siemens",
"dynamometer",
"dc",
"grammarly",
"css",
"html",
"markdown",
"ubuntu",
"matlab",
"lua",
"premake",
"javascript",
]
spell = SpellChecker()
all_misspelled_words = set()
def check_spelling(sentence: str) -> str:
"""Check the spelling of a sentence and give warnings if there are any misspelled
words.
It uses [pyspellchecker](https://github.com/barrust/pyspellchecker). It can also
guess the correct version of the misspelled word, but it is not used because it is
very slow.
Example:
```python
check_spelling("An interesting sentence is akways good.")
```
will print the following warning:
`WARNING - The word "akways" might be misspelled according to the pyspellchecker.`
Args:
sentence (str): The sentence to check.
Returns:
str: The same sentence.
"""
modified_sentence = sentence.lower() # convert to lower case
modified_sentence = re.sub(
r"\-+", " ", modified_sentence
) # replace hyphens with spaces
modified_sentence = re.sub(
r"[^a-z\s\-']", "", modified_sentence
) # remove all the special characters
words = modified_sentence.split() # split sentence into a list of words
misspelled = spell.unknown(words) # find misspelled words
if len(misspelled) > 0:
for word in misspelled:
if len(word) == 1:
continue
# for each misspelled word, check if it is in the dictionary and otherwise
# give a warning
if word in dictionary:
continue
all_misspelled_words.add(word)
return sentence
def escape_latex_characters(sentence: str) -> str: def escape_latex_characters(sentence: str) -> str:
"""Escape LaTeX characters in a sentence. """Escape LaTeX characters in a sentence.
@ -129,18 +50,29 @@ def escape_latex_characters(sentence: str) -> str:
# Dictionary of escape characters: # Dictionary of escape characters:
escape_characters = { escape_characters = {
"#": r"\#", "#": r"\#",
"$": r"\$", # "$": r"\$", # Don't escape $ as it is used for math mode
"%": r"\%", "%": r"\%",
"&": r"\&", "&": r"\&",
"~": r"\textasciitilde{}", "~": r"\textasciitilde{}",
"_": r"\_", # "_": r"\_", # Don't escape _ as it is used for math mode
"^": r"\textasciicircum{}", # "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
} }
# Don't escape links as hyperref will do it automatically:
# Find all the links in the sentence:
links = re.findall(r"\[.*?\]\(.*?\)", sentence)
# Replace the links with a placeholder:
for link in links:
sentence = sentence.replace(link, "!!-link-!!")
# Handle backslash and curly braces separately because the other characters are # Handle backslash and curly braces separately because the other characters are
# escaped with backslash and curly braces: # escaped with backslash and curly braces:
sentence = sentence.replace("{", ">>{") sentence = sentence.replace("{", ">>{")
sentence = sentence.replace("}", ">>}") sentence = sentence.replace("}", ">>}")
sentence = sentence.replace("\\", "\\textbackslash{}") # don't escape backslash as it is used heavily in LaTeX:
# sentence = sentence.replace("\\", "\\textbackslash{}")
sentence = sentence.replace(">>{", "\\{") sentence = sentence.replace(">>{", "\\{")
sentence = sentence.replace(">>}", "\\}") sentence = sentence.replace(">>}", "\\}")
@ -151,6 +83,10 @@ def escape_latex_characters(sentence: str) -> str:
if character in escape_characters: if character in escape_characters:
sentence = sentence.replace(character, escape_characters[character]) sentence = sentence.replace(character, escape_characters[character])
# Replace the links with the original links:
for link in links:
sentence = sentence.replace("!!-link-!!", link)
return sentence return sentence
@ -641,7 +577,6 @@ class Design(BaseModel):
# ====================================================================================== # ======================================================================================
LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)] LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
SpellCheckedString = Annotated[LaTeXString, AfterValidator(check_spelling)]
PastDate = Annotated[ PastDate = Annotated[
str, str,
Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"), Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
@ -683,7 +618,7 @@ class Event(BaseModel):
), ),
examples=["2020-09-24", "My Custom Date"], examples=["2020-09-24", "My Custom Date"],
) )
highlights: Optional[list[SpellCheckedString]] = Field( highlights: Optional[list[LaTeXString]] = Field(
default=[], default=[],
title="Highlights", title="Highlights",
description=( description=(
@ -850,7 +785,7 @@ class Event(BaseModel):
@computed_field @computed_field
@cached_property @cached_property
def highlight_strings(self) -> list[SpellCheckedString]: def highlight_strings(self) -> list[LaTeXString]:
highlight_strings = [] highlight_strings = []
if self.highlights is not None: if self.highlights is not None:
highlight_strings.extend(self.highlights) highlight_strings.extend(self.highlights)
@ -906,7 +841,7 @@ class OneLineEntry(Event):
title="Name", title="Name",
description="The name of the entry. It will be shown as bold text.", description="The name of the entry. It will be shown as bold text.",
) )
details: SpellCheckedString = Field( details: LaTeXString = Field(
title="Details", title="Details",
description="The details of the entry. It will be shown as normal text.", description="The details of the entry. It will be shown as normal text.",
) )
@ -968,7 +903,7 @@ class EducationEntry(Event):
@computed_field @computed_field
@cached_property @cached_property
def highlight_strings(self) -> list[SpellCheckedString]: def highlight_strings(self) -> list[LaTeXString]:
highlight_strings = [] highlight_strings = []
if self.gpa is not None: if self.gpa is not None:
@ -1386,35 +1321,6 @@ class CurriculumVitae(BaseModel):
return model return model
@model_validator(mode="after")
@classmethod
def print_all_the_misspeled_words(cls, model):
"""Print all the words that are misspelled according to pyspellchecker."""
if len(all_misspelled_words) > 0:
messages = []
messages.append(
"The following words might be misspelled (according to pyspellchecker):"
)
misspelled_words = list(all_misspelled_words)
# Make misspeled_words a list of lists where each list contains 5:
misspelled_words = [
misspelled_words[i : i + 5] for i in range(0, len(misspelled_words), 5)
]
# Join the words in each list with a comma, and join the lists with a new
# line:
misspelled_words = "\n ".join(
[", ".join(words) for words in misspelled_words]
)
messages.append(f" {misspelled_words}")
# Print the messages:
logger.warning("\n".join(messages))
return model
@computed_field @computed_field
@cached_property @cached_property
def connections(self) -> list[Connection]: def connections(self) -> list[Connection]:

View File

@ -2,7 +2,7 @@ from rendercv.__main__ import render
from rendercv.data_model import generate_json_schema from rendercv.data_model import generate_json_schema
import os import os
input_file_path = "John_Doe_CV.yaml" input_file_path = "SinaAtalay_CV.yaml"
render(input_file_path) # type: ignore render(input_file_path) # type: ignore
# This script is equivalent to running the following command in the terminal: # This script is equivalent to running the following command in the terminal:

View File

@ -9,17 +9,6 @@ from pydantic import ValidationError, HttpUrl
class TestDataModel(unittest.TestCase): class TestDataModel(unittest.TestCase):
def test_check_spelling(self):
sentences = [
"This is a sentence.",
"This is a sentance with special characters &@#&^@*#&)((!@#_)()).",
r"12312309 Thisdf sdfsd is a sentence *safds\{\}[[[]]]",
]
for sentence in sentences:
with self.subTest(sentence=sentence):
data_model.check_spelling(sentence)
def test_escape_latex_characters(self): def test_escape_latex_characters(self):
str_without_latex_characters = "This is a string without LaTeX characters." str_without_latex_characters = "This is a string without LaTeX characters."
expected = str_without_latex_characters expected = str_without_latex_characters