mirror of https://github.com/eyhc1/rendercv.git
remove spell checking feature
This commit is contained in:
parent
4cfaa86f04
commit
9bf8f7ab6c
|
@ -183,6 +183,6 @@ output/
|
||||||
|
|
||||||
# VSCode
|
# VSCode
|
||||||
.vscode/
|
.vscode/
|
||||||
Jeffrey_Paul_Goldberg_CV.yaml
|
|
||||||
pyvenv.cfg
|
# Personal CVs
|
||||||
bin/
|
SinaAtalay_CV.yaml
|
|
@ -58,7 +58,7 @@ cv:
|
||||||
drawing using finite element analysis with
|
drawing using finite element analysis with
|
||||||
open-source software called CalculiX.
|
open-source software called CalculiX.
|
||||||
```
|
```
|
||||||
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
|
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
|
||||||
- Then, it creates a $\LaTeX$ file.
|
- Then, it creates a $\LaTeX$ file.
|
||||||
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).
|
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ cv:
|
||||||
drawing using finite element analysis with
|
drawing using finite element analysis with
|
||||||
open-source software called CalculiX.
|
open-source software called CalculiX.
|
||||||
```
|
```
|
||||||
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc.
|
- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc.
|
||||||
- Then, it creates a $\LaTeX$ file.
|
- Then, it creates a $\LaTeX$ file.
|
||||||
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).
|
- Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/).
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,6 @@ dependencies = [
|
||||||
'pydantic-extra-types==2.1.0',
|
'pydantic-extra-types==2.1.0',
|
||||||
'pydantic_core==2.10.1',
|
'pydantic_core==2.10.1',
|
||||||
'typing_extensions==4.8.0',
|
'typing_extensions==4.8.0',
|
||||||
'pyspellchecker==0.7.2',
|
|
||||||
'ruamel.yaml==0.17.35',
|
'ruamel.yaml==0.17.35',
|
||||||
'email-validator==2.0.0.post2',
|
'email-validator==2.0.0.post2',
|
||||||
'typer[all]==0.9.0',
|
'typer[all]==0.9.0',
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
"""RenderCV package.
|
"""RenderCV package.
|
||||||
|
|
||||||
It parses the user input YAML/JSON file and validates the data (checking spelling
|
It parses the user input YAML/JSON file and validates the data (checks if the
|
||||||
mistakes, whether the dates are consistent, etc.). Then, with the data, it creates a
|
dates are consistent, if the URLs are valid, etc.). Then, with the data, it creates a
|
||||||
$\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/).
|
$\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/).
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -32,89 +32,10 @@ from pydantic.functional_validators import AfterValidator
|
||||||
from pydantic_extra_types.phone_numbers import PhoneNumber
|
from pydantic_extra_types.phone_numbers import PhoneNumber
|
||||||
from pydantic_extra_types.color import Color
|
from pydantic_extra_types.color import Color
|
||||||
from ruamel.yaml import YAML
|
from ruamel.yaml import YAML
|
||||||
from spellchecker import SpellChecker
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# don't give spelling warnings for these words:
|
|
||||||
dictionary = [
|
|
||||||
"aerostructures",
|
|
||||||
"sportsperson",
|
|
||||||
"cern",
|
|
||||||
"mechatronics",
|
|
||||||
"calculix",
|
|
||||||
"microcontroller",
|
|
||||||
"ansys",
|
|
||||||
"nx",
|
|
||||||
"aselsan",
|
|
||||||
"hrjet",
|
|
||||||
"simularge",
|
|
||||||
"siemens",
|
|
||||||
"dynamometer",
|
|
||||||
"dc",
|
|
||||||
"grammarly",
|
|
||||||
"css",
|
|
||||||
"html",
|
|
||||||
"markdown",
|
|
||||||
"ubuntu",
|
|
||||||
"matlab",
|
|
||||||
"lua",
|
|
||||||
"premake",
|
|
||||||
"javascript",
|
|
||||||
]
|
|
||||||
spell = SpellChecker()
|
|
||||||
all_misspelled_words = set()
|
|
||||||
|
|
||||||
|
|
||||||
def check_spelling(sentence: str) -> str:
|
|
||||||
"""Check the spelling of a sentence and give warnings if there are any misspelled
|
|
||||||
words.
|
|
||||||
|
|
||||||
It uses [pyspellchecker](https://github.com/barrust/pyspellchecker). It can also
|
|
||||||
guess the correct version of the misspelled word, but it is not used because it is
|
|
||||||
very slow.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```python
|
|
||||||
check_spelling("An interesting sentence is akways good.")
|
|
||||||
```
|
|
||||||
|
|
||||||
will print the following warning:
|
|
||||||
|
|
||||||
`WARNING - The word "akways" might be misspelled according to the pyspellchecker.`
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sentence (str): The sentence to check.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The same sentence.
|
|
||||||
"""
|
|
||||||
modified_sentence = sentence.lower() # convert to lower case
|
|
||||||
modified_sentence = re.sub(
|
|
||||||
r"\-+", " ", modified_sentence
|
|
||||||
) # replace hyphens with spaces
|
|
||||||
modified_sentence = re.sub(
|
|
||||||
r"[^a-z\s\-']", "", modified_sentence
|
|
||||||
) # remove all the special characters
|
|
||||||
words = modified_sentence.split() # split sentence into a list of words
|
|
||||||
misspelled = spell.unknown(words) # find misspelled words
|
|
||||||
|
|
||||||
if len(misspelled) > 0:
|
|
||||||
for word in misspelled:
|
|
||||||
if len(word) == 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# for each misspelled word, check if it is in the dictionary and otherwise
|
|
||||||
# give a warning
|
|
||||||
if word in dictionary:
|
|
||||||
continue
|
|
||||||
|
|
||||||
all_misspelled_words.add(word)
|
|
||||||
|
|
||||||
return sentence
|
|
||||||
|
|
||||||
|
|
||||||
def escape_latex_characters(sentence: str) -> str:
|
def escape_latex_characters(sentence: str) -> str:
|
||||||
"""Escape LaTeX characters in a sentence.
|
"""Escape LaTeX characters in a sentence.
|
||||||
|
|
||||||
|
@ -129,18 +50,29 @@ def escape_latex_characters(sentence: str) -> str:
|
||||||
# Dictionary of escape characters:
|
# Dictionary of escape characters:
|
||||||
escape_characters = {
|
escape_characters = {
|
||||||
"#": r"\#",
|
"#": r"\#",
|
||||||
"$": r"\$",
|
# "$": r"\$", # Don't escape $ as it is used for math mode
|
||||||
"%": r"\%",
|
"%": r"\%",
|
||||||
"&": r"\&",
|
"&": r"\&",
|
||||||
"~": r"\textasciitilde{}",
|
"~": r"\textasciitilde{}",
|
||||||
"_": r"\_",
|
# "_": r"\_", # Don't escape _ as it is used for math mode
|
||||||
"^": r"\textasciicircum{}",
|
# "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Don't escape links as hyperref will do it automatically:
|
||||||
|
|
||||||
|
# Find all the links in the sentence:
|
||||||
|
links = re.findall(r"\[.*?\]\(.*?\)", sentence)
|
||||||
|
|
||||||
|
# Replace the links with a placeholder:
|
||||||
|
for link in links:
|
||||||
|
sentence = sentence.replace(link, "!!-link-!!")
|
||||||
|
|
||||||
# Handle backslash and curly braces separately because the other characters are
|
# Handle backslash and curly braces separately because the other characters are
|
||||||
# escaped with backslash and curly braces:
|
# escaped with backslash and curly braces:
|
||||||
sentence = sentence.replace("{", ">>{")
|
sentence = sentence.replace("{", ">>{")
|
||||||
sentence = sentence.replace("}", ">>}")
|
sentence = sentence.replace("}", ">>}")
|
||||||
sentence = sentence.replace("\\", "\\textbackslash{}")
|
# don't escape backslash as it is used heavily in LaTeX:
|
||||||
|
# sentence = sentence.replace("\\", "\\textbackslash{}")
|
||||||
sentence = sentence.replace(">>{", "\\{")
|
sentence = sentence.replace(">>{", "\\{")
|
||||||
sentence = sentence.replace(">>}", "\\}")
|
sentence = sentence.replace(">>}", "\\}")
|
||||||
|
|
||||||
|
@ -151,6 +83,10 @@ def escape_latex_characters(sentence: str) -> str:
|
||||||
if character in escape_characters:
|
if character in escape_characters:
|
||||||
sentence = sentence.replace(character, escape_characters[character])
|
sentence = sentence.replace(character, escape_characters[character])
|
||||||
|
|
||||||
|
# Replace the links with the original links:
|
||||||
|
for link in links:
|
||||||
|
sentence = sentence.replace("!!-link-!!", link)
|
||||||
|
|
||||||
return sentence
|
return sentence
|
||||||
|
|
||||||
|
|
||||||
|
@ -641,7 +577,6 @@ class Design(BaseModel):
|
||||||
# ======================================================================================
|
# ======================================================================================
|
||||||
|
|
||||||
LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
|
LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
|
||||||
SpellCheckedString = Annotated[LaTeXString, AfterValidator(check_spelling)]
|
|
||||||
PastDate = Annotated[
|
PastDate = Annotated[
|
||||||
str,
|
str,
|
||||||
Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
|
Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
|
||||||
|
@ -683,7 +618,7 @@ class Event(BaseModel):
|
||||||
),
|
),
|
||||||
examples=["2020-09-24", "My Custom Date"],
|
examples=["2020-09-24", "My Custom Date"],
|
||||||
)
|
)
|
||||||
highlights: Optional[list[SpellCheckedString]] = Field(
|
highlights: Optional[list[LaTeXString]] = Field(
|
||||||
default=[],
|
default=[],
|
||||||
title="Highlights",
|
title="Highlights",
|
||||||
description=(
|
description=(
|
||||||
|
@ -850,7 +785,7 @@ class Event(BaseModel):
|
||||||
|
|
||||||
@computed_field
|
@computed_field
|
||||||
@cached_property
|
@cached_property
|
||||||
def highlight_strings(self) -> list[SpellCheckedString]:
|
def highlight_strings(self) -> list[LaTeXString]:
|
||||||
highlight_strings = []
|
highlight_strings = []
|
||||||
if self.highlights is not None:
|
if self.highlights is not None:
|
||||||
highlight_strings.extend(self.highlights)
|
highlight_strings.extend(self.highlights)
|
||||||
|
@ -906,7 +841,7 @@ class OneLineEntry(Event):
|
||||||
title="Name",
|
title="Name",
|
||||||
description="The name of the entry. It will be shown as bold text.",
|
description="The name of the entry. It will be shown as bold text.",
|
||||||
)
|
)
|
||||||
details: SpellCheckedString = Field(
|
details: LaTeXString = Field(
|
||||||
title="Details",
|
title="Details",
|
||||||
description="The details of the entry. It will be shown as normal text.",
|
description="The details of the entry. It will be shown as normal text.",
|
||||||
)
|
)
|
||||||
|
@ -968,7 +903,7 @@ class EducationEntry(Event):
|
||||||
|
|
||||||
@computed_field
|
@computed_field
|
||||||
@cached_property
|
@cached_property
|
||||||
def highlight_strings(self) -> list[SpellCheckedString]:
|
def highlight_strings(self) -> list[LaTeXString]:
|
||||||
highlight_strings = []
|
highlight_strings = []
|
||||||
|
|
||||||
if self.gpa is not None:
|
if self.gpa is not None:
|
||||||
|
@ -1386,35 +1321,6 @@ class CurriculumVitae(BaseModel):
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
@classmethod
|
|
||||||
def print_all_the_misspeled_words(cls, model):
|
|
||||||
"""Print all the words that are misspelled according to pyspellchecker."""
|
|
||||||
if len(all_misspelled_words) > 0:
|
|
||||||
messages = []
|
|
||||||
messages.append(
|
|
||||||
"The following words might be misspelled (according to pyspellchecker):"
|
|
||||||
)
|
|
||||||
|
|
||||||
misspelled_words = list(all_misspelled_words)
|
|
||||||
|
|
||||||
# Make misspeled_words a list of lists where each list contains 5:
|
|
||||||
misspelled_words = [
|
|
||||||
misspelled_words[i : i + 5] for i in range(0, len(misspelled_words), 5)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Join the words in each list with a comma, and join the lists with a new
|
|
||||||
# line:
|
|
||||||
misspelled_words = "\n ".join(
|
|
||||||
[", ".join(words) for words in misspelled_words]
|
|
||||||
)
|
|
||||||
messages.append(f" {misspelled_words}")
|
|
||||||
|
|
||||||
# Print the messages:
|
|
||||||
logger.warning("\n".join(messages))
|
|
||||||
|
|
||||||
return model
|
|
||||||
|
|
||||||
@computed_field
|
@computed_field
|
||||||
@cached_property
|
@cached_property
|
||||||
def connections(self) -> list[Connection]:
|
def connections(self) -> list[Connection]:
|
||||||
|
|
|
@ -2,7 +2,7 @@ from rendercv.__main__ import render
|
||||||
from rendercv.data_model import generate_json_schema
|
from rendercv.data_model import generate_json_schema
|
||||||
import os
|
import os
|
||||||
|
|
||||||
input_file_path = "John_Doe_CV.yaml"
|
input_file_path = "SinaAtalay_CV.yaml"
|
||||||
render(input_file_path) # type: ignore
|
render(input_file_path) # type: ignore
|
||||||
|
|
||||||
# This script is equivalent to running the following command in the terminal:
|
# This script is equivalent to running the following command in the terminal:
|
||||||
|
|
|
@ -9,17 +9,6 @@ from pydantic import ValidationError, HttpUrl
|
||||||
|
|
||||||
|
|
||||||
class TestDataModel(unittest.TestCase):
|
class TestDataModel(unittest.TestCase):
|
||||||
def test_check_spelling(self):
|
|
||||||
sentences = [
|
|
||||||
"This is a sentence.",
|
|
||||||
"This is a sentance with special characters &@#&^@*#&)((!@#_)()).",
|
|
||||||
r"12312309 Thisdf sdfsd is a sentence *safds\{\}[[[]]]",
|
|
||||||
]
|
|
||||||
|
|
||||||
for sentence in sentences:
|
|
||||||
with self.subTest(sentence=sentence):
|
|
||||||
data_model.check_spelling(sentence)
|
|
||||||
|
|
||||||
def test_escape_latex_characters(self):
|
def test_escape_latex_characters(self):
|
||||||
str_without_latex_characters = "This is a string without LaTeX characters."
|
str_without_latex_characters = "This is a string without LaTeX characters."
|
||||||
expected = str_without_latex_characters
|
expected = str_without_latex_characters
|
||||||
|
|
Loading…
Reference in New Issue