diff --git a/.gitignore b/.gitignore index 520baeb..b2e576e 100644 --- a/.gitignore +++ b/.gitignore @@ -183,6 +183,6 @@ output/ # VSCode .vscode/ -Jeffrey_Paul_Goldberg_CV.yaml -pyvenv.cfg -bin/ + +# Personal CVs +SinaAtalay_CV.yaml \ No newline at end of file diff --git a/README.md b/README.md index b58a5f0..120accf 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ cv: drawing using finite element analysis with open-source software called CalculiX. ``` -- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc. +- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc. - Then, it creates a $\LaTeX$ file. - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/). diff --git a/docs/index.md b/docs/index.md index a67eabb..5e57365 100644 --- a/docs/index.md +++ b/docs/index.md @@ -58,7 +58,7 @@ cv: drawing using finite element analysis with open-source software called CalculiX. ``` -- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, giving a warning if there are any spelling mistakes, etc. +- Then, it validates the input, such as checking if the dates are consistent, checking if the URLs are correct, etc. - Then, it creates a $\LaTeX$ file. - Finally, it renders the $\LaTeX$ file to generate the PDF, and you don't need $\LaTeX$ installed on your PC because RenderCV comes with [TinyTeX](https://yihui.org/tinytex/). diff --git a/pyproject.toml b/pyproject.toml index 5225381..61931ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ dependencies = [ 'pydantic-extra-types==2.1.0', 'pydantic_core==2.10.1', 'typing_extensions==4.8.0', - 'pyspellchecker==0.7.2', 'ruamel.yaml==0.17.35', 'email-validator==2.0.0.post2', 'typer[all]==0.9.0', diff --git a/rendercv/__init__.py b/rendercv/__init__.py index 4d544bf..690490c 100644 --- a/rendercv/__init__.py +++ b/rendercv/__init__.py @@ -1,7 +1,7 @@ """RenderCV package. -It parses the user input YAML/JSON file and validates the data (checking spelling -mistakes, whether the dates are consistent, etc.). Then, with the data, it creates a +It parses the user input YAML/JSON file and validates the data (checks if the +dates are consistent, if the URLs are valid, etc.). Then, with the data, it creates a $\\LaTeX$ file and renders it with [TinyTeX](https://yihui.org/tinytex/). """ import logging diff --git a/rendercv/data_model.py b/rendercv/data_model.py index a956dca..b365ce4 100644 --- a/rendercv/data_model.py +++ b/rendercv/data_model.py @@ -32,89 +32,10 @@ from pydantic.functional_validators import AfterValidator from pydantic_extra_types.phone_numbers import PhoneNumber from pydantic_extra_types.color import Color from ruamel.yaml import YAML -from spellchecker import SpellChecker logger = logging.getLogger(__name__) -# don't give spelling warnings for these words: -dictionary = [ - "aerostructures", - "sportsperson", - "cern", - "mechatronics", - "calculix", - "microcontroller", - "ansys", - "nx", - "aselsan", - "hrjet", - "simularge", - "siemens", - "dynamometer", - "dc", - "grammarly", - "css", - "html", - "markdown", - "ubuntu", - "matlab", - "lua", - "premake", - "javascript", -] -spell = SpellChecker() -all_misspelled_words = set() - - -def check_spelling(sentence: str) -> str: - """Check the spelling of a sentence and give warnings if there are any misspelled - words. - - It uses [pyspellchecker](https://github.com/barrust/pyspellchecker). It can also - guess the correct version of the misspelled word, but it is not used because it is - very slow. - - Example: - ```python - check_spelling("An interesting sentence is akways good.") - ``` - - will print the following warning: - - `WARNING - The word "akways" might be misspelled according to the pyspellchecker.` - - Args: - sentence (str): The sentence to check. - - Returns: - str: The same sentence. - """ - modified_sentence = sentence.lower() # convert to lower case - modified_sentence = re.sub( - r"\-+", " ", modified_sentence - ) # replace hyphens with spaces - modified_sentence = re.sub( - r"[^a-z\s\-']", "", modified_sentence - ) # remove all the special characters - words = modified_sentence.split() # split sentence into a list of words - misspelled = spell.unknown(words) # find misspelled words - - if len(misspelled) > 0: - for word in misspelled: - if len(word) == 1: - continue - - # for each misspelled word, check if it is in the dictionary and otherwise - # give a warning - if word in dictionary: - continue - - all_misspelled_words.add(word) - - return sentence - - def escape_latex_characters(sentence: str) -> str: """Escape LaTeX characters in a sentence. @@ -129,18 +50,29 @@ def escape_latex_characters(sentence: str) -> str: # Dictionary of escape characters: escape_characters = { "#": r"\#", - "$": r"\$", + # "$": r"\$", # Don't escape $ as it is used for math mode "%": r"\%", "&": r"\&", "~": r"\textasciitilde{}", - "_": r"\_", - "^": r"\textasciicircum{}", + # "_": r"\_", # Don't escape _ as it is used for math mode + # "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode } + + # Don't escape links as hyperref will do it automatically: + + # Find all the links in the sentence: + links = re.findall(r"\[.*?\]\(.*?\)", sentence) + + # Replace the links with a placeholder: + for link in links: + sentence = sentence.replace(link, "!!-link-!!") + # Handle backslash and curly braces separately because the other characters are # escaped with backslash and curly braces: sentence = sentence.replace("{", ">>{") sentence = sentence.replace("}", ">>}") - sentence = sentence.replace("\\", "\\textbackslash{}") + # don't escape backslash as it is used heavily in LaTeX: + # sentence = sentence.replace("\\", "\\textbackslash{}") sentence = sentence.replace(">>{", "\\{") sentence = sentence.replace(">>}", "\\}") @@ -151,6 +83,10 @@ def escape_latex_characters(sentence: str) -> str: if character in escape_characters: sentence = sentence.replace(character, escape_characters[character]) + # Replace the links with the original links: + for link in links: + sentence = sentence.replace("!!-link-!!", link) + return sentence @@ -641,7 +577,6 @@ class Design(BaseModel): # ====================================================================================== LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)] -SpellCheckedString = Annotated[LaTeXString, AfterValidator(check_spelling)] PastDate = Annotated[ str, Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"), @@ -683,7 +618,7 @@ class Event(BaseModel): ), examples=["2020-09-24", "My Custom Date"], ) - highlights: Optional[list[SpellCheckedString]] = Field( + highlights: Optional[list[LaTeXString]] = Field( default=[], title="Highlights", description=( @@ -850,7 +785,7 @@ class Event(BaseModel): @computed_field @cached_property - def highlight_strings(self) -> list[SpellCheckedString]: + def highlight_strings(self) -> list[LaTeXString]: highlight_strings = [] if self.highlights is not None: highlight_strings.extend(self.highlights) @@ -906,7 +841,7 @@ class OneLineEntry(Event): title="Name", description="The name of the entry. It will be shown as bold text.", ) - details: SpellCheckedString = Field( + details: LaTeXString = Field( title="Details", description="The details of the entry. It will be shown as normal text.", ) @@ -968,7 +903,7 @@ class EducationEntry(Event): @computed_field @cached_property - def highlight_strings(self) -> list[SpellCheckedString]: + def highlight_strings(self) -> list[LaTeXString]: highlight_strings = [] if self.gpa is not None: @@ -1386,35 +1321,6 @@ class CurriculumVitae(BaseModel): return model - @model_validator(mode="after") - @classmethod - def print_all_the_misspeled_words(cls, model): - """Print all the words that are misspelled according to pyspellchecker.""" - if len(all_misspelled_words) > 0: - messages = [] - messages.append( - "The following words might be misspelled (according to pyspellchecker):" - ) - - misspelled_words = list(all_misspelled_words) - - # Make misspeled_words a list of lists where each list contains 5: - misspelled_words = [ - misspelled_words[i : i + 5] for i in range(0, len(misspelled_words), 5) - ] - - # Join the words in each list with a comma, and join the lists with a new - # line: - misspelled_words = "\n ".join( - [", ".join(words) for words in misspelled_words] - ) - messages.append(f" {misspelled_words}") - - # Print the messages: - logger.warning("\n".join(messages)) - - return model - @computed_field @cached_property def connections(self) -> list[Connection]: diff --git a/run_rendercv.py b/run_rendercv.py index ec7ce23..c78ff9a 100644 --- a/run_rendercv.py +++ b/run_rendercv.py @@ -2,7 +2,7 @@ from rendercv.__main__ import render from rendercv.data_model import generate_json_schema import os -input_file_path = "John_Doe_CV.yaml" +input_file_path = "SinaAtalay_CV.yaml" render(input_file_path) # type: ignore # This script is equivalent to running the following command in the terminal: diff --git a/tests/test_data_model.py b/tests/test_data_model.py index b255bde..b097de7 100644 --- a/tests/test_data_model.py +++ b/tests/test_data_model.py @@ -9,17 +9,6 @@ from pydantic import ValidationError, HttpUrl class TestDataModel(unittest.TestCase): - def test_check_spelling(self): - sentences = [ - "This is a sentence.", - "This is a sentance with special characters &@#&^@*#&)((!@#_)()).", - r"12312309 Thisdf sdfsd is a sentence *safds\{\}[[[]]]", - ] - - for sentence in sentences: - with self.subTest(sentence=sentence): - data_model.check_spelling(sentence) - def test_escape_latex_characters(self): str_without_latex_characters = "This is a string without LaTeX characters." expected = str_without_latex_characters