remove spell checking feature

This commit is contained in:
Sina Atalay 2023-11-16 21:48:41 +01:00 committed by Jeffrey Goldberg
parent 4cfaa86f04
commit 9bf8f7ab6c
8 changed files with 31 additions and 137 deletions

6
.gitignore vendored
View File

@ -183,6 +183,6 @@ output/
# VSCode
.vscode/
Jeffrey_Paul_Goldberg_CV.yaml
pyvenv.cfg
bin/
# Personal CVs
SinaAtalay_CV.yaml

View File

@ -58,7 +58,7 @@ cv:
drawing using finite element analysis with
open-source software called CalculiX.
```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
- Then, it creates a $\LaTeX$ file.
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).

View File

@ -58,7 +58,7 @@ cv:
drawing using finite element analysis with
open-source software called CalculiX.
```
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
- Then, it creates a $\LaTeX$ file.
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).

View File

@ -13,7 +13,6 @@ dependencies = [
'pydantic-extra-types==2.1.0',
'pydantic_core==2.10.1',
'typing_extensions==4.8.0',
'pyspellchecker==0.7.2',
'ruamel.yaml==0.17.35',
'email-validator==2.0.0.post2',
'typer[all]==0.9.0',

View File

@ -1,7 +1,7 @@
"""RenderCV package.
It parses the user input YAML/JSON file and validates the data (checking spelling
mistakes, whether the dates are consistent, etc.). Then, with the data, it creates a
It parses the user input YAML/JSON file and validates the data (checks if the
dates are consistent, if the URLs are valid, etc.). Then, with the data, it creates a
$\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/).
"""
import logging

View File

@ -32,89 +32,10 @@ from pydantic.functional_validators import AfterValidator
from pydantic_extra_types.phone_numbers import PhoneNumber
from pydantic_extra_types.color import Color
from ruamel.yaml import YAML
from spellchecker import SpellChecker
logger = logging.getLogger(__name__)
# don't give spelling warnings for these words:
dictionary = [
"aerostructures",
"sportsperson",
"cern",
"mechatronics",
"calculix",
"microcontroller",
"ansys",
"nx",
"aselsan",
"hrjet",
"simularge",
"siemens",
"dynamometer",
"dc",
"grammarly",
"css",
"html",
"markdown",
"ubuntu",
"matlab",
"lua",
"premake",
"javascript",
]
spell = SpellChecker()
all_misspelled_words = set()
def check_spelling(sentence: str) -> str:
"""Check the spelling of a sentence and give warnings if there are any misspelled
words.
It uses [pyspellchecker](https://github.com/barrust/pyspellchecker). It can also
guess the correct version of the misspelled word, but it is not used because it is
very slow.
Example:
```python
check_spelling("An interesting sentence is akways good.")
```
will print the following warning:
`WARNING - The word "akways" might be misspelled according to the pyspellchecker.`
Args:
sentence (str): The sentence to check.
Returns:
str: The same sentence.
"""
modified_sentence = sentence.lower() # convert to lower case
modified_sentence = re.sub(
r"\-+", " ", modified_sentence
) # replace hyphens with spaces
modified_sentence = re.sub(
r"[^a-z\s\-']", "", modified_sentence
) # remove all the special characters
words = modified_sentence.split() # split sentence into a list of words
misspelled = spell.unknown(words) # find misspelled words
if len(misspelled) > 0:
for word in misspelled:
if len(word) == 1:
continue
# for each misspelled word, check if it is in the dictionary and otherwise
# give a warning
if word in dictionary:
continue
all_misspelled_words.add(word)
return sentence
def escape_latex_characters(sentence: str) -> str:
"""Escape LaTeX characters in a sentence.
@ -129,18 +50,29 @@ def escape_latex_characters(sentence: str) -> str:
# Dictionary of escape characters:
escape_characters = {
"#": r"\#",
"$": r"\$",
# "$": r"\$", # Don't escape $ as it is used for math mode
"%": r"\%",
"&": r"\&",
"~": r"\textasciitilde{}",
"_": r"\_",
"^": r"\textasciicircum{}",
# "_": r"\_", # Don't escape _ as it is used for math mode
# "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
}
# Don't escape links as hyperref will do it automatically:
# Find all the links in the sentence:
links = re.findall(r"\[.*?\]\(.*?\)", sentence)
# Replace the links with a placeholder:
for link in links:
sentence = sentence.replace(link, "!!-link-!!")
# Handle backslash and curly braces separately because the other characters are
# escaped with backslash and curly braces:
sentence = sentence.replace("{", ">>{")
sentence = sentence.replace("}", ">>}")
sentence = sentence.replace("\\", "\\textbackslash{}")
# don't escape backslash as it is used heavily in LaTeX:
# sentence = sentence.replace("\\", "\\textbackslash{}")
sentence = sentence.replace(">>{", "\\{")
sentence = sentence.replace(">>}", "\\}")
@ -151,6 +83,10 @@ def escape_latex_characters(sentence: str) -> str:
if character in escape_characters:
sentence = sentence.replace(character, escape_characters[character])
# Replace the links with the original links:
for link in links:
sentence = sentence.replace("!!-link-!!", link)
return sentence
@ -641,7 +577,6 @@ class Design(BaseModel):
# ======================================================================================
LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
SpellCheckedString = Annotated[LaTeXString, AfterValidator(check_spelling)]
PastDate = Annotated[
str,
Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
@ -683,7 +618,7 @@ class Event(BaseModel):
),
examples=["2020-09-24", "My Custom Date"],
)
highlights: Optional[list[SpellCheckedString]] = Field(
highlights: Optional[list[LaTeXString]] = Field(
default=[],
title="Highlights",
description=(
@ -850,7 +785,7 @@ class Event(BaseModel):
@computed_field
@cached_property
def highlight_strings(self) -> list[SpellCheckedString]:
def highlight_strings(self) -> list[LaTeXString]:
highlight_strings = []
if self.highlights is not None:
highlight_strings.extend(self.highlights)
@ -906,7 +841,7 @@ class OneLineEntry(Event):
title="Name",
description="The name of the entry. It will be shown as bold text.",
)
details: SpellCheckedString = Field(
details: LaTeXString = Field(
title="Details",
description="The details of the entry. It will be shown as normal text.",
)
@ -968,7 +903,7 @@ class EducationEntry(Event):
@computed_field
@cached_property
def highlight_strings(self) -> list[SpellCheckedString]:
def highlight_strings(self) -> list[LaTeXString]:
highlight_strings = []
if self.gpa is not None:
@ -1386,35 +1321,6 @@ class CurriculumVitae(BaseModel):
return model
@model_validator(mode="after")
@classmethod
def print_all_the_misspeled_words(cls, model):
"""Print all the words that are misspelled according to pyspellchecker."""
if len(all_misspelled_words) > 0:
messages = []
messages.append(
"The following words might be misspelled (according to pyspellchecker):"
)
misspelled_words = list(all_misspelled_words)
# Make misspeled_words a list of lists where each list contains 5:
misspelled_words = [
misspelled_words[i : i + 5] for i in range(0, len(misspelled_words), 5)
]
# Join the words in each list with a comma, and join the lists with a new
# line:
misspelled_words = "\n ".join(
[", ".join(words) for words in misspelled_words]
)
messages.append(f" {misspelled_words}")
# Print the messages:
logger.warning("\n".join(messages))
return model
@computed_field
@cached_property
def connections(self) -> list[Connection]:

View File

@ -2,7 +2,7 @@ from rendercv.__main__ import render
from rendercv.data_model import generate_json_schema
import os
input_file_path = "John_Doe_CV.yaml"
input_file_path = "SinaAtalay_CV.yaml"
render(input_file_path) # type: ignore
# This script is equivalent to running the following command in the terminal:

View File

@ -9,17 +9,6 @@ from pydantic import ValidationError, HttpUrl
class TestDataModel(unittest.TestCase):
def test_check_spelling(self):
sentences = [
"This is a sentence.",
"This is a sentance with special characters &@#&^@*#&)((!@#_)()).",
r"12312309 Thisdf sdfsd is a sentence *safds\{\}[[[]]]",
]
for sentence in sentences:
with self.subTest(sentence=sentence):
data_model.check_spelling(sentence)
def test_escape_latex_characters(self):
str_without_latex_characters = "This is a string without LaTeX characters."
expected = str_without_latex_characters