Source code for pynblint.repository

import os
import tempfile
import zipfile
from abc import ABC
from pathlib import Path
from typing import List, Optional

import git

from .config import settings
from .notebook import Notebook


[docs]class Repository(ABC): """ This class stores data about a code repository. """ def __init__(self, path: Path): # Repository info self.path = path # Extracted content self.notebooks: List[Notebook] = [] # List of Notebook objects def retrieve_notebooks(self): # Directories to ignore while traversing the tree dirs_ignore = [".ipynb_checkpoints"] for root, dirs, files in os.walk(self.path): # `dirs[:] = value` modifies dirs in-place dirs[:] = [d for d in dirs if d not in dirs_ignore] for f in files: if f.endswith(".ipynb"): nb = Notebook(Path(root) / Path(f)) self.notebooks.append(nb) @property def is_git_repository(self): # Directories to ignore while traversing the tree dirs_ignore = [".ipynb_checkpoints"] versioned = False for _, dirs, _ in os.walk(self.path): # `dirs[:] = value` modifies dirs in-place dirs[:] = [d for d in dirs if d not in dirs_ignore] for d in dirs: if d == ".git": versioned = True return versioned @property def large_file_paths(self) -> List[Path]: """Return the list of files whose size is above the fixed threshold. The threshold size for data files is defined in the settings. Returns: List[Path]: the list of large files. """ large_files: List[Path] = [] for dirpath, _, filenames in os.walk(self.path): for filename in filenames: file_path = Path(dirpath) / filename if os.path.getsize(file_path) > settings.max_data_file_size: large_files.append(file_path) return large_files
[docs]class LocalRepository(Repository): """ This class stores data about a local code repository. The `source_path` can point either to a local directory or a zip archive """ def __init__(self, source_path: Path): self.source_path = source_path tmp_dir: Optional[tempfile.TemporaryDirectory] = None # Handle .zip archives if self.source_path.suffix == ".zip": # Create temp directory tmp_dir = tempfile.TemporaryDirectory() repo_path: Path = Path(tmp_dir.name) # Extract the zip file into the temp folder with zipfile.ZipFile(self.source_path, "r") as zip_file: zip_file.extractall(repo_path) # Handle local folders elif self.source_path.is_dir(): repo_path = self.source_path else: raise ValueError( "The file at the specified path is neither a notebook (.ipynb) " "nor a compressed archive." ) super().__init__(repo_path) self.retrieve_notebooks() # Clean up the temp directory if one was created if tmp_dir is not None: tmp_dir.cleanup()
[docs]class GitHubRepository(Repository): """ This class stores data about a GitHub repository """ def __init__(self, github_url: str): self.url = github_url # Clone the repo in a temp directory tmp_dir = tempfile.TemporaryDirectory() git.Repo.clone_from( # type: ignore url=github_url, to_path=tmp_dir.name, depth=1 ) super().__init__(Path(tmp_dir.name) / github_url.split("/")[-1]) # Analyze the repo self.retrieve_notebooks() # Clean up the temp directory tmp_dir.cleanup()