Source code for pynblint.nb_linting

"""Linting functions for notebooks."""
import ast
import re
from typing import List, Pattern

from . import lint_register as register
from .cell import Cell, CellType
from .config import settings
from .lint import LintDefinition, LintLevel
from .notebook import Notebook

# ============== #
# NOTEBOOK LEVEL #
# ============== #


[docs]def non_linear_execution(notebook: Notebook) -> bool: """Check linear execution order of notebook cells.""" exec_counters: List[int] = [ cell.exec_count for cell in notebook.code_cells if cell.exec_count ] sorted_counters = sorted(exec_counters) return exec_counters != sorted_counters
[docs]def notebook_too_long(notebook: Notebook) -> bool: """Check if the notebook is too long (i.e., if it contains too many cells).""" return len(notebook) > settings.max_cells_in_notebook
[docs]def untitled_notebook(notebook: Notebook) -> bool: """Check whether the notebook is untitled. I.e., The notebook still has the default title: "Untitled[<number>].ipynb", Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook was left with the default creation title; ``False`` otherwise. """ res = False pattern: Pattern[str] = re.compile(r"Untitled\d*.ipynb") if pattern.match(notebook.path.name): res = True return res
[docs]def notebook_named_with_unrestricted_charset(notebook: Notebook) -> bool: """Check if the notebook filename contains characters outside ``[A-Za-z0-9_.-]``. To be supported by all popular operating systems, notebook names should be restricted to the ``[A-Za-z0-9_.-]`` charset. """ return not re.search("^[A-Za-z0-9_.-]+$", notebook.path.name)
[docs]def long_filename(notebook: Notebook) -> bool: """Check if the notebook title exceedes the fixed character threshold.""" if settings.filename_max_length: return len(notebook.path.name) > settings.filename_max_length else: return False
[docs]def imports_beyond_first_cell(notebook: Notebook) -> bool: """Check if import statements are used beyond the first code cell.""" code = notebook.script found_first_cell = False # when `found_first_cell` is True, it means we have found the first cell of code # that has to be ignored second_cell_not_reached = True # when set to False, we are actually reading instructions from the second cell of # code; from now on we need to analyze all the cells looking for import statements correct_position = True cell = "" program = code.split("\n") for line in program: if not found_first_cell: # it ignores all the lines before the first cell generated by # nbconvert(python# version ecc.) if line[0:5] == "# In[": found_first_cell = True elif not second_cell_not_reached: # starting from the second cell, it saves all the instructions until # it finds a new cell if line[0:5] != "# In[": cell = cell + "\n" + line else: try: tree = ast.parse(cell) # once it finds a new cell, it checks if there are any import # statements in the previous cell if sum(isinstance(exp, ast.Import) for exp in tree.body) > 0: correct_position = False break except SyntaxError: pass # TODO: handle this exception only during notebook creation else: if line[0:5] == "# In[": # following instructions are from the second cell of code, # the first one we have to analyze second_cell_not_reached = False return not correct_position
[docs]def missing_h1_md_heading(notebook: Notebook) -> bool: """Check that the notebook has an H1 Markdown title in the initial cells. Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook does not contain an H1 title in the selected set of initial cells; ``False`` otherwise. """ md_rows = "\n".join( [ cell.cell_source for cell in notebook.initial_cells if cell.cell_type == CellType.MARKDOWN ] ) pattern = re.compile(r"^\s*#\s*[^#\n]*$") return not any([pattern.match(line) for line in md_rows.splitlines()])
[docs]def missing_opening_MD_text(notebook: Notebook) -> bool: """Check that descriptive MD cells are present among the first cells of a notebook. Markdown cells containing just Markdown headings do not count. Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook has no MD cells among its fist cells; ``False`` otherwise. """ return not any( cell.cell_type == CellType.MARKDOWN and not cell.is_heading for cell in notebook.initial_cells )
[docs]def missing_closing_MD_text(notebook: Notebook) -> bool: """Check that descriptive MD cells are present among the first cells of a notebook. Markdown cells containing just Markdown headings do not count. Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook has no MD cells among its fist cells; ``False`` otherwise. """ return not any( cell.cell_type == CellType.MARKDOWN and not cell.is_heading for cell in notebook.final_cells )
[docs]def duplicate_notebook_not_renamed(notebook: Notebook) -> bool: """Check if the duplicate notebook has not been renamed. I.e., if it was left with the default title: ``<source-notebook-name>-Copy<copy-number>.ipynb``. Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook was left with the default title; ``False`` otherwise. """ res = False pattern: Pattern[str] = re.compile(r".*-Copy\d+.ipynb") if pattern.match(notebook.path.name): res = True return res
[docs]def too_few_MD_cells(notebook: Notebook) -> bool: """Check that the number of MD cells is adequate. Check that the number of MD cells is adequate with respect to the number of code cells. Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook contains too few MD cells with respect to the existing code cells; ``False`` otherwise. """ n_of_md_cells = len(notebook.markdown_cells) n_of_code_cells = len(notebook.code_cells) if n_of_code_cells: ratio = n_of_md_cells / n_of_code_cells return ratio < settings.min_md_code_ratio else: return False
[docs]def invalid_python_syntax(notebook: Notebook) -> bool: """Check if the notebook contains invalid Python syntax in code cells. Args: notebook (Notebook): the notebook to be analyzed. Returns: bool: ``True`` if the notebook contains invalid Python syntax (as detected by ``ast.parse``); ``False`` otherwise. """ return notebook.has_invalid_python_syntax
# ========== # # CELL LEVEL # # ========== #
[docs]def non_executed_cells(notebook: Notebook) -> List[Cell]: """Check the existence of non executed cells and return their list.""" return [cell for cell in notebook.code_cells if cell.non_executed]
[docs]def empty_cells(notebook: Notebook) -> List[Cell]: """Check the existence of empty cells and return their list.""" return [cell for cell in notebook.code_cells if cell.empty]
[docs]def cells_too_long(notebook: Notebook) -> List[Cell]: """Check whether code cells in this notebook are too long.""" return [ cell for cell in notebook.code_cells if len(cell.cell_source.split("\n")) > settings.max_lines_in_code_cell ]
# ================= # # LINT REGISTRATION # # ================= # notebook_level_lints: List[LintDefinition] = [ LintDefinition( slug="non-linear-execution", description="Notebook cells have been executed in a non-linear order.", recommendation="Re-run your notebook top to bottom to ensure it is " "reproducible.", linting_function=non_linear_execution, ), LintDefinition( slug="notebook-too-long", description="The notebook is too long: the total number of cells exceeds " f"the fixed threshold ({settings.max_cells_in_notebook}).", recommendation="Split this notebook into two or more notebooks.", linting_function=notebook_too_long, ), LintDefinition( slug="untitled-notebook", description="The notebook still has the default title: " "Untitled[<number>].ipynb", recommendation="Give it a meaningful title to make it easy to recognize.", linting_function=untitled_notebook, ), LintDefinition( slug="non-portable-chars-in-nb-name", description="The notebook filename contains non-portable characters " "(i.e., characters outside the [A-Za-z0-9_.-] charset).", recommendation="Rename your notebook by using characters contained " "in the following portable charset: [A-Za-z0-9_.-].", linting_function=notebook_named_with_unrestricted_charset, ), LintDefinition( slug="notebook-name-too-long", description="The notebook filename is too long (i.e., it exceeds the " f"fixed threshold of {settings.filename_max_length} characters).", recommendation="Use a shorter filename and leverage Markdown titles to convey " "detailed information.", linting_function=long_filename, ), LintDefinition( slug="imports-beyond-first-cell", description="Import statements found beyond the first cell of the notebook.", recommendation="Move import statements to the first code cell to make " "your notebook dependencies more explicit.", linting_function=imports_beyond_first_cell, ), LintDefinition( slug="missing-h1-MD-heading", description="An H1 Markdown heading is missing from the initial cells " "of the notebook.", recommendation="Clarify the notebook subject by writing an H1 Markdown heading " "in one of the initial cells of your notebook.", linting_function=missing_h1_md_heading, ), LintDefinition( slug="missing-opening-MD-text", description="The initial notebook cells " f"(i.e., the first {settings.initial_cells} cells in the notebook) " "contain no Markdown text.", recommendation="Begin your notebook by describing what you intend to do " "in one or more introductory Markdown cells.", linting_function=missing_opening_MD_text, ), LintDefinition( slug="missing-closing-MD-text", description="The final notebook cells " f"(i.e., the last {settings.final_cells} cells in the notebook) " "contain no Markdown text.", recommendation="Conclude your notebook by describing what you have accomplished" " in one or more concluding Markdown cells.", linting_function=missing_closing_MD_text, ), LintDefinition( slug="too-few-MD-cells", description="The notebook contains too few Markdown cells compared to code " "cells (the ratio is below the fixed threshold of " f"{settings.min_md_code_ratio*100}%).", recommendation="Describe the steps of your computation by adding " "a few more Markdown cells.", linting_function=too_few_MD_cells, ), LintDefinition( slug="duplicate-notebook-not-renamed", description="The duplicate notebook still has the default title: " "<source-notebook-name>-Copy<copy-number>.ipynb", recommendation="Give it a meaningful title to make it easy to recognize.", linting_function=duplicate_notebook_not_renamed, ), LintDefinition( slug="invalid-python-syntax", description="One or more notebook cells contain invalid Python syntax.", recommendation="Fix syntax errors in the notebook code cells.", linting_function=invalid_python_syntax, ), ] cell_level_lints: List[LintDefinition] = [ LintDefinition( slug="non-executed-cells", description="Non-executed cells are present in the notebook.", recommendation="Re-run your notebook top to bottom to ensure that all cells " "are executed.", linting_function=non_executed_cells, ), LintDefinition( slug="empty-cells", description="Empty cells are present in the notebook.", recommendation="Keep your notebook clean by deleting unused cells.", linting_function=empty_cells, show_details=False, ), LintDefinition( slug="cell-too-long", description="One or more code cells in this notebook are too long " "(i.e., they exceed the fixed threshold " f"of {settings.max_lines_in_code_cell} lines).", recommendation="Consider consolidating your code outside the notebook " "by moving utility functions to a structured and tested codebase.\n" "Use notebooks to display results, not to compute them.", linting_function=cells_too_long, ), ] def initialize() -> None: register.register_lints(LintLevel.NOTEBOOK, notebook_level_lints) register.register_lints(LintLevel.CELL, cell_level_lints)