Source code for flake8_nb.parsers.cell_parsers

"""Module containing parsers for notebook cells.

This also includes parsers for the cell and inline tags.
It heavily utilizes the mutability of lists.
"""

from __future__ import annotations

import re
import warnings
from typing import Dict
from typing import List

from flake8_nb.parsers import CellId
from flake8_nb.parsers import NotebookCell

FLAKE8_TAG_PATTERN = re.compile(
    r"^flake8-noqa-(cell-(?P<cell_rules>(\w+\d+-?)+)"
    r"|line-(?P<line_nr>\d+)-(?P<line_rules>(\w+\d+-?)+))$"
    r"|^(?P<ignore_cell>flake8-noqa-cell)$"
    r"|^flake8-noqa-line-(?P<ignore_line_nr>\d+)$"
)

FLAKE8_INLINE_TAG_PATTERN = re.compile(
    r"^.*?(#(?P<flake8_inline_tags>(\s*flake8-noqa-(cell(-\w+\d+)*|line-\d+(-\w+\d+)*))+))\s*$",
    re.DOTALL,
)

FLAKE8_NOQA_INLINE_PATTERN = re.compile(
    r"^.+?\s*[#]\s*noqa\s*[:]"
    r"(?P<flake8_noqa_rules>(\s*\w+\d+[,]?\s*)+)$"
    r"|^.+?\s*(?P<has_flake8_noqa_all>[#]\s*noqa\s*[:]?\s*)$"
)

FLAKE8_NOQA_INLINE_REPLACE_PATTERN = re.compile(
    r"^(?P<source_code>.+?)\s*(?P<flake8_noqa>[#]\s*noqa\s*[:]?.*)$"
)

RulesDict = Dict[str, List[str]]


[docs]class InvalidFlake8TagWarning(UserWarning): """Warning thrown when a tag is badly formatted. When a cell tag starts with 'flake8-noqa-' but doesn't match the correct pattern needed for cell tags. This is used to show users that they have a typo in their tags. """ def __init__(self, flake8_tag: str): """Create InvalidFlake8TagWarning. Parameters ---------- flake8_tag : str Used improperly formatted flake8-nb tag """ super().__init__( "flake8-noqa-line/cell-tags should be of form " "'flake8-noqa-cell-<rule1>-<rule2>'|'flake8-noqa-cell'/" "'flake8-noqa-line-<line_nr>-<rule1>-<rule2>'|'flake8-noqa-line-<rule1>', " f"you used: '{flake8_tag}'" )
[docs]def extract_flake8_tags(notebook_cell: NotebookCell) -> list[str]: """Extract all tag that start with 'flake8-noqa-' from a cell. Parameters ---------- notebook_cell : NotebookCell Dict representation of a notebook cell as parsed from JSON. Returns ------- list[str] List of all tags in the given cell, which started with 'flake8-noqa-'. """ return [ tag for tag in notebook_cell["metadata"].get("tags", []) if tag.startswith("flake8-noqa-") ]
[docs]def extract_flake8_inline_tags(notebook_cell: NotebookCell) -> list[str]: """Extract flake8-tags which were used as comment in a cell. Parameters ---------- notebook_cell : NotebookCell Dict representation of a notebook cell as parsed from JSON. Returns ------- list[str] List of all inline tags in the given cell, which matched ``FLAKE8_INLINE_TAG_PATTERN``. """ flake8_inline_tags = [] for source_line in notebook_cell["source"]: match = re.match(FLAKE8_INLINE_TAG_PATTERN, source_line) if match and match["flake8_inline_tags"]: for tag in match["flake8_inline_tags"].split(" "): tag = tag.strip() if tag: flake8_inline_tags.append(tag) return flake8_inline_tags
[docs]def extract_inline_flake8_noqa(source_line: str) -> list[str]: """Extract flake8 noqa rules from normal flake8 comments . Parameters ---------- source_line : str Single line of sourcecode from a cell. Returns ------- list[str] List of flake8 rules. """ match = re.match(FLAKE8_NOQA_INLINE_PATTERN, source_line) if match: flake8_noqa_rules_str = match["flake8_noqa_rules"] if flake8_noqa_rules_str: flake8_noqa_rules = flake8_noqa_rules_str.split(",") return [line.strip() for line in flake8_noqa_rules] elif match["has_flake8_noqa_all"]: # pragma: no branch return ["noqa"] return []
[docs]def flake8_tag_to_rules_dict(flake8_tag: str) -> RulesDict: """Parse a flake8 tag to a ``rules_dict``. ``rules_dict`` contains lists of rules, depending on if the tag is a cell or a line tag. Parameters ---------- flake8_tag : str String of a flake8-tag. Returns ------- RulesDict Dict with cell and line rules. Line rules have the line number as key and cell rules have 'cell as key'. See Also -------- get_flake8_rules_dict """ match = re.match(FLAKE8_TAG_PATTERN, flake8_tag) if match: if match["cell_rules"]: cell_rules_str = match["cell_rules"] cell_rules = cell_rules_str.split("-") return {"cell": cell_rules} elif match["ignore_cell"]: return {"cell": ["noqa"]} elif match["line_nr"] and match["line_rules"]: line_nr = str(match["line_nr"]) line_rules_str = match["line_rules"] line_rules = line_rules_str.split("-") return {line_nr: line_rules} elif match["ignore_line_nr"]: # pragma: no branch line_nr = str(match["ignore_line_nr"]) return {line_nr: ["noqa"]} warnings.warn(InvalidFlake8TagWarning(flake8_tag)) return {}
[docs]def update_rules_dict(total_rules_dict: RulesDict, new_rules_dict: RulesDict) -> None: """Update the rules dict ``total_rules_dict`` with ``new_rules_dict``. If any entry of a key is 'noqa' (ignore all), the rules will be set to be only 'noqa'. Parameters ---------- total_rules_dict : RulesDict ``rules_dict`` which should be updated. new_rules_dict : RulesDict ``rules_dict`` which should be used to update ``total_rules_dict``. See Also -------- flake8_tag_to_rules_dict, get_flake8_rules_dict """ for key, new_rules in new_rules_dict.items(): old_rules = total_rules_dict.get(key, []) if "noqa" in old_rules + new_rules: total_rules_dict[key] = ["noqa"] else: total_rules_dict[key] = list(set(old_rules + new_rules))
[docs]def get_flake8_rules_dict(notebook_cell: NotebookCell) -> RulesDict: """Parse all flake8 tags of a cell to a ``rules_dict``. ``rules_dict`` contains lists of rules, depending on if the tag is a cell or a line tag. Parameters ---------- notebook_cell : NotebookCell Dict representation of a notebook cell as parsed from JSON. Returns ------- RulesDict Dict with all cell and line rules. Line rules have the line number as key and cell rules have 'cell as key'. See Also -------- flake8_tag_to_rules_dict, update_rules_dict """ flake8_tags = extract_flake8_tags(notebook_cell) flake8_inline_tags = extract_flake8_inline_tags(notebook_cell) total_rules_dict: RulesDict = {} for flake8_tag in set(flake8_tags + flake8_inline_tags): new_rules_dict = flake8_tag_to_rules_dict(flake8_tag) update_rules_dict(total_rules_dict, new_rules_dict) return total_rules_dict
[docs]def generate_rules_list(source_index: int, rules_dict: RulesDict) -> list[str]: """Generate a List of rules from ``rules_dict``. This list should be applied to the line at ``source_index``. Parameters ---------- source_index : int Index of the source code line. rules_dict : RulesDict Dict containing lists of rules, depending on if the tag is a cell or a line tag. Returns ------- list[str] List of rules which should be applied to the line at ``source_index``. See Also -------- flake8_tag_to_rules_dict, get_flake8_rules_dict """ line_rules = rules_dict.get(str(source_index + 1), []) cell_rules = rules_dict.get("cell", []) return line_rules + cell_rules
[docs]def update_inline_flake8_noqa(source_line: str, rules_list: list[str]) -> str: """Update ``source_line`` with flake8 noqa comments. This is done extraction flake8-tags as well as inline flake8 comments. Parameters ---------- source_line : str Single line of sourcecode from a cell. rules_list : list[str] List of rules which should be applied to ``source_line``. Returns ------- str ``source_line`` with flake8 noqa comments. See Also -------- generate_rules_list """ inline_flake8_noqa = extract_inline_flake8_noqa(source_line) source_line = source_line.rstrip("\n") if inline_flake8_noqa: rules_list = list(set(inline_flake8_noqa + rules_list)) source_line = re.sub(FLAKE8_NOQA_INLINE_REPLACE_PATTERN, r"\g<source_code>", source_line) rules_list = sorted(rules_list) if not rules_list: return f"{source_line}\n" noqa_str = "" if "noqa" in rules_list else ", ".join(rules_list) return f"{source_line} # noqa: {noqa_str}\n"
[docs]def notebook_cell_to_intermediate_dict( notebook_cell: NotebookCell, ) -> dict[str, CellId | str | int]: r"""Parse ``notebook_cell`` to a dict. That dict can later be written to a intermediate_py_file. Parameters ---------- notebook_cell : NotebookCell Dict representation of a notebook cell as parsed from JSON. Returns ------- dict[str, CellId | str | int] Dict which has the keys 'code', 'input_name' and 'code'. ``code``,``input_name`` is a str of the code cells ``In[\d\*]`` name and ``lines_of_code`` is the number of lines of corresponding parsed parsed notebook cell. See Also -------- update_inline_flake8_noqa, flake8_nb.parsers.notebook_parsers.create_intermediate_py_file """ updated_source_lines = [] input_nr = notebook_cell["execution_count"] total_cell_nr = notebook_cell["total_cell_nr"] code_cell_nr = notebook_cell["code_cell_nr"] rules_dict = get_flake8_rules_dict(notebook_cell) for line_index, source_line in enumerate(notebook_cell["source"]): rules_list = generate_rules_list(line_index, rules_dict) updated_source_line = update_inline_flake8_noqa(source_line, rules_list) updated_source_lines.append(updated_source_line) if input_nr is None: input_nr = " " return { "code": ( f"# INTERMEDIATE_CELL_SEPARATOR ({input_nr},{code_cell_nr},{total_cell_nr})\n\n\n" f"{''.join(updated_source_lines)}\n\n" ), "input_id": CellId(str(input_nr), code_cell_nr, total_cell_nr), "lines_of_code": len(updated_source_lines) + 5, }