Source code for pynblint.repo_linting

"""Linting functions for repositories containing notebooks"""

from pathlib import Path
from typing import List

from . import lint_register as register
from .lint import LintDefinition, LintLevel
from .repository import Repository

# ============= #
# PROJECT LEVEL #
# ============= #


[docs]def repository_not_versioned(repo: Repository) -> bool: """Check the absence of the ``.git`` folder.""" return not repo.is_git_repository
[docs]def dependencies_unmanaged(repo: Repository) -> bool: """Check the absence of configuration files for dependency management tools. All configuration files are searched in the root of the repository. """ paths = [ repo.path / "requirements.txt", repo.path / "pyproject.toml", repo.path / "environment.yml", repo.path / "setup.py", repo.path / "Pipfile", ] return not any(map(lambda x: x.exists(), paths))
# ========== # # PATH LEVEL # # ========== #
[docs]def duplicate_notebook_filename(repo: Repository) -> List[Path]: """Check the existence of notebooks with the same filename within a repository""" nb_filenames = [] duplicate_filanames = [] paths = [] for notebook in repo.notebooks: filename = notebook.path.name if filename in nb_filenames: duplicate_filanames.append(filename) else: nb_filenames.append(filename) for filename in duplicate_filanames: for notebook in repo.notebooks: if notebook.path.name == filename: paths.append(notebook.path) return paths
[docs]def unversioned_large_data_files(repo: Repository) -> List[Path]: """Check the presence of unversioned large data files. Check wether large data files are versioned with a data version control system. Currently, the only data VCS that Pynblint detects is DVC (https://dvc.org). Alternative solutions will be added soon. Args: repo (Repository): The repository to be analyzed. Returns: List[Path]: the list of large data files that should be put under version control. """ large_files = repo.large_file_paths if large_files and not Path(repo.path, ".dvc").is_dir(): return large_files else: return []
# ================= # # LINT REGISTRATION # # ================= # project_level_lints: List[LintDefinition] = [ LintDefinition( slug="repository-not-versioned", description="This repository is not version controlled.", recommendation="Put the repository under version control using a VCS like git.", linting_function=repository_not_versioned, ), LintDefinition( slug="dependencies-unmanaged", description="No dependency-management tools appear to be used in this project.", recommendation="If you are using `pip`, declare your dependencies in a " "`requirements.txt` file.\nYou can do so by running the following command: " "`pip freeze > requirements.txt`.", linting_function=dependencies_unmanaged, ), ] path_level_lints: List[LintDefinition] = [ LintDefinition( slug="duplicate-notebook-filename", description="Two or more notebooks with the same filename exist in this " "repository.", recommendation="Use different filenames to make notebooks easy to recognize; " "possibly stick to a naming convention.", linting_function=duplicate_notebook_filename, ), LintDefinition( slug="large-data-file-not-versioned", description="Your repository contains one or more data files that are not " "versioned.", recommendation="Use DVC (https://dvc.org) to put your data under " "version control;\nmake your data available by pushing it to one of the " "supported remotes.", linting_function=unversioned_large_data_files, ), ] def initialize() -> None: register.register_lints(LintLevel.PROJECT, project_level_lints) register.register_lints(LintLevel.PATH, path_level_lints)