Source code for historia._add_to_project

import datetime
import json
import os
import pathlib
import typing
import warnings

import beartype
import requests
import tqdm


def _parse_project_url(project_url: str, /) -> tuple[str, str, int]:
    """
    Parse owner type, owner login, and project number from a GitHub Project URL.

    Handles two URL formats:

    - ``https://github.com/users/{login}/projects/{number}``
    - ``https://github.com/orgs/{login}/projects/{number}``
    - ``https://github.com/{login}/projects/{number}``  (treated as a user project)

    Returns
    -------
    tuple[str, str, int]
        A tuple of (owner_type, owner_login, project_number).

    """
    parts = project_url.rstrip("/").split("/")
    if parts[3] in ("users", "orgs"):
        # Long format: https://github.com/users/{login}/projects/{number}
        owner_type = parts[3]
        owner_login = parts[4]
        project_number = int(parts[6])
    else:
        # Short user format: https://github.com/{login}/projects/{number}
        owner_type = "users"
        owner_login = parts[3]
        project_number = int(parts[5])
    return owner_type, owner_login, project_number


[docs] @beartype.beartype def add_to_project( *, directory: pathlib.Path, project_url: str, status: str | None = None, end_date_placeholder_days: int = 180, assign_members: bool = False, ) -> None: """ Add all unique URLs from the derivatives directory to a GitHub Project (v2). Items that are already present in the project are automatically skipped. For each new item: - If ``status`` is provided, all items are assigned that status value. - Otherwise, the status is derived from the item type and state: - If the Issue or PR is closed, it is given the 'Done' status. - If the item is an open PR, it is given the 'In Progress' status. - If the item is an open Issue, it is given the 'Todo' status. - The start date is set to the item's creation date. - The end date is set to the item's closed date (if closed), or to ``end_date_placeholder_days`` days after the creation date otherwise. Parameters ---------- directory : pathlib.Path The directory containing the derivatives JSON files. Should be a specific username subdirectory, e.g., ``/path/to/version-1/username-codycbakerphd``. If a parent directory with multiple ``version-*`` subdirectories is provided, only the latest version is used and a warning is emitted. project_url : str The URL of the GitHub Project v2 to add items to, e.g., ``https://github.com/users/username/projects/1`` or ``https://github.com/orgs/orgname/projects/1``. status : str or None, optional A custom status value to apply uniformly to all items added to the project. If ``None`` (the default), the status is derived from each item's type and state. end_date_placeholder_days : int, optional Number of days after the item's creation date to use as the placeholder end date when the item has not yet been closed. Default is 180 (approximately 6 months). assign_members : bool, optional When ``True``, update the project's custom ``Members`` text field using usernames inferred from ``username-*`` directory names in the data tree. """ github_token = os.getenv("GITHUB_TOKEN") if github_token is None: message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n" raise ValueError(message) headers = {"Authorization": f"token {github_token}"} data_directory = _resolve_latest_version_data_directory(directory) # Collect all unique URLs from JSON files in the directory url_to_members = _collect_url_member_usernames(data_directory) if assign_members else {} all_urls = list(url_to_members) if assign_members else _collect_unique_urls(data_directory) if not all_urls: warnings.warn(message=f"No URLs found in directory `{data_directory}`.", stacklevel=2) return # Resolve the project node ID and get Status / date field info project_id, status_field_id, status_options, start_date_field_id, end_date_field_id, members_field_id = ( _get_project_info( project_url=project_url, headers=headers, ) ) # Parse owner type, login, and number from URL owner_type, owner_login, project_number = _parse_project_url(project_url) validated_members_field_id: str | None = None if assign_members: if members_field_id is None: message = f"No 'Members' field found in project `{project_url}`." raise ValueError(message) validated_members_field_id = members_field_id existing_items = _list_project_items_with_member_values( owner_type=owner_type, owner_login=owner_login, project_number=project_number, members_field_id=validated_members_field_id, headers=headers, ) else: existing_items = { url: {"item_id": "", "members": None} for url in _list_project_item_content_urls( owner_type=owner_type, owner_login=owner_login, project_number=project_number, headers=headers, ) } if assign_members: for url in all_urls: if url not in existing_items: continue existing_item_info = existing_items[url] current_members = existing_item_info["members"] updated_members = _merge_member_values( current_value=current_members, usernames=url_to_members.get(url, set()), ) if updated_members is not None: normalized_current_members = _normalize_member_value(current_members) if updated_members == normalized_current_members: continue _set_item_text( project_id=project_id, item_id=typing.cast("str", existing_item_info["item_id"]), field_id=typing.cast("str", validated_members_field_id), text=updated_members, headers=headers, ) urls_to_add = [url for url in all_urls if url not in existing_items] for url in tqdm.tqdm(iterable=urls_to_add, desc="Adding items to project", unit="items", dynamic_ncols=True): # Determine the item type, state, and dates from the URL item_info = _get_item_info(url=url, headers=headers) if item_info is None: continue item_node_id, item_type, item_state, created_at, closed_at = item_info # Add the item to the project item_id = _add_item_to_project(project_id=project_id, content_id=item_node_id, headers=headers) if item_id is None: continue _set_initial_project_item_fields( item_values={ "url": url, "item_id": item_id, "item_type": item_type, "item_state": item_state, "created_at": created_at, "closed_at": closed_at, }, project_values={ "project_id": project_id, "status": status, "status_field_id": status_field_id, "status_options": status_options, "start_date_field_id": start_date_field_id, "end_date_field_id": end_date_field_id, "end_date_placeholder_days": end_date_placeholder_days, "members_field_id": validated_members_field_id if assign_members else None, "member_usernames": url_to_members.get(url, set()), }, headers=headers, )
def _collect_unique_urls(directory: pathlib.Path, /) -> list[str]: """Collect all unique URLs from GraphQL JSON files under the given directory.""" all_info_file_paths = list(directory.rglob(pattern="*.json")) all_urls: set[str] = set() for info_file_path in all_info_file_paths: with info_file_path.open(mode="r") as file_stream: info = json.load(file_stream) if isinstance(info, list): for value in info: if isinstance(value, str): all_urls.add(value) return list(all_urls) def _resolve_latest_version_data_directory(directory: pathlib.Path, /) -> pathlib.Path: """ Return the directory to scan for project population data files. Parameters ---------- directory : pathlib.Path User-supplied base directory for project population. Returns ------- pathlib.Path ``directory`` by default. If ``directory`` contains more than one numeric ``version-*`` subdirectory, this returns only the highest version directory. Warns ----- UserWarning Emitted when multiple numeric ``version-*`` directories are detected and only the highest version directory is selected. """ if not directory.exists() or not directory.is_dir(): return directory version_directories: list[tuple[int, pathlib.Path]] = [] for child in directory.iterdir(): if not child.is_dir() or not child.name.startswith("version-"): continue version_number = child.name.removeprefix("version-") if version_number.isdigit(): version_directories.append((int(version_number), child)) if len(version_directories) <= 1: return directory latest_version_entry = max(version_directories, key=lambda entry: entry[0]) latest_version_directory = latest_version_entry[1] warnings.warn( message="Incompatible database versions detected! Using only the latest - please run database migration.", stacklevel=2, ) return latest_version_directory def _collect_url_member_usernames(directory: pathlib.Path, /) -> dict[str, set[str]]: """Map each URL under ``directory`` to the set of usernames inferred from its path.""" url_to_members: dict[str, set[str]] = {} all_info_file_paths = list(directory.rglob(pattern="*.json")) for info_file_path in all_info_file_paths: with info_file_path.open(mode="r") as file_stream: info = json.load(file_stream) if not isinstance(info, list): continue username = _infer_username_from_data_path(directory=directory, info_file_path=info_file_path) for value in info: if not isinstance(value, str): continue url_to_members.setdefault(value, set()).add(username) return url_to_members def _infer_username_from_data_path(*, directory: pathlib.Path, info_file_path: pathlib.Path) -> str: """ Infer a username from the first ``username-*`` path segment for a data file. If no non-empty ``username-*`` segment exists in the file path relative to ``directory``, this falls back to ``directory.name``. """ relative_parts = info_file_path.relative_to(directory).parts for part in relative_parts: if part.startswith("username-"): username = part.removeprefix("username-") if username: return username if directory.name.startswith("username-"): username = directory.name.removeprefix("username-") if username: return username return directory.name def _merge_member_values(*, current_value: str | None, usernames: set[str]) -> str | None: """ Merge existing member text with username values into a deduplicated list. Parameters ---------- current_value : str or None Existing comma-separated value currently stored in the project's Members field. usernames : set[str] Usernames inferred from the local history directory structure. Returns ------- str or None Sorted comma-separated member names, or ``None`` if no non-empty values are present. """ values = {value.strip() for value in (current_value or "").split(",") if value.strip()} values.update({username.strip() for username in usernames if username.strip()}) if not values: return None return ",".join(sorted(values)) def _normalize_member_value(current_value: str | None, /) -> str | None: """Return a normalized comma-separated member string for a current field value.""" return _merge_member_values(current_value=current_value, usernames=set()) def _set_initial_project_item_fields( *, item_values: dict[str, str | None], project_values: dict[str, object], headers: dict[str, str], ) -> None: """Set status/date/member fields for a newly added project item.""" url = item_values["url"] item_id = item_values["item_id"] item_type = item_values["item_type"] item_state = item_values["item_state"] created_at = item_values["created_at"] if url is None or item_id is None or item_type is None or item_state is None or created_at is None: return status_options = typing.cast("dict[str, str]", project_values["status_options"]) status = typing.cast("str | None", project_values["status"]) status_name = _resolve_status_name( url=url, item_type=item_type, item_state=item_state, status=status, ) if status_name is None: return option_id = status_options.get(status_name) if option_id is None: message = ( f"Status option `{status_name}` not found in project. Available options: {list(status_options.keys())}." ) warnings.warn(message=message, stacklevel=3) return _set_item_status( project_id=typing.cast("str", project_values["project_id"]), item_id=item_id, field_id=typing.cast("str", project_values["status_field_id"]), option_id=option_id, headers=headers, ) _set_item_dates_for_content( item_values=item_values, project_values=project_values, headers=headers, ) members_field_id = typing.cast("str | None", project_values["members_field_id"]) member_usernames = typing.cast("set[str]", project_values["member_usernames"]) if members_field_id is not None: initial_members = _merge_member_values( current_value=None, usernames=member_usernames, ) if initial_members is not None: _set_item_text( project_id=typing.cast("str", project_values["project_id"]), item_id=item_id, field_id=members_field_id, text=initial_members, headers=headers, ) def _resolve_status_name(*, url: str, item_type: str, item_state: str, status: str | None) -> str | None: """Resolve status name from explicit override or item type/state mapping.""" if status is not None: return status status_name = { ("PullRequest", "closed"): "Done", ("PullRequest", "merged"): "Done", ("Issue", "closed"): "Done", ("PullRequest", "open"): "In Progress", ("Issue", "open"): "Todo", }.get((item_type, item_state)) if status_name is None: message = f"Unrecognised item type/state combination `({item_type}, {item_state})` for `{url}`; skipping." warnings.warn(message=message, stacklevel=3) return status_name def _set_item_dates_for_content( *, item_values: dict[str, str | None], project_values: dict[str, object], headers: dict[str, str], ) -> None: """Set start and end date fields for a project item when date fields exist.""" item_id = item_values["item_id"] created_at = item_values["created_at"] closed_at = item_values["closed_at"] if item_id is None or created_at is None: return project_id = typing.cast("str", project_values["project_id"]) start_date_field_id = typing.cast("str | None", project_values["start_date_field_id"]) end_date_field_id = typing.cast("str | None", project_values["end_date_field_id"]) end_date_placeholder_days = typing.cast("int", project_values["end_date_placeholder_days"]) if start_date_field_id is not None: start_date = created_at[:10] # Extract YYYY-MM-DD from ISO datetime _set_item_date( project_id=project_id, item_id=item_id, field_id=start_date_field_id, date=start_date, headers=headers, ) if end_date_field_id is None: return if closed_at is not None: end_date = closed_at[:10] else: creation_date = datetime.date.fromisoformat(created_at[:10]) end_date = (creation_date + datetime.timedelta(days=end_date_placeholder_days)).isoformat() _set_item_date( project_id=project_id, item_id=item_id, field_id=end_date_field_id, date=end_date, headers=headers, ) def _check_graphql_response(*, response: requests.Response, context: str) -> dict: """ Validate a GraphQL API response and raise or warn on errors. Parameters ---------- response : requests.Response The HTTP response from the GitHub GraphQL API. context : str A descriptive label used in error/warning messages (e.g., the URL or mutation name). Returns ------- dict The parsed JSON result from the response. Raises ------ RuntimeError If the response indicates a non-403 error. """ status = response.status_code try: result = response.json() except requests.exceptions.JSONDecodeError as exception: response_body = response.text.strip() or "<empty response body>" message = f"{context}\nStatus code {status}: GitHub GraphQL API response was not valid JSON: {response_body}" raise RuntimeError(message) from exception message = f"{context}\nStatus code {status}: {result}" if status == 403: warnings.warn(message=message, stacklevel=3) raise RuntimeError(message) if status != 200: raise RuntimeError(message) try: errors = result.get("errors") except AttributeError as exception: unexpected_payload_message = ( f"{context}\nStatus code {status}: GitHub GraphQL API returned unexpected JSON payload: {result}" ) raise RuntimeError(unexpected_payload_message) from exception if errors is not None: raise RuntimeError(message) return result def _is_missing_project_item_error(*, response: requests.Response, item_id: str) -> bool: """Return ``True`` when a GraphQL response indicates the project item no longer exists.""" if response.status_code != 200: return False result = response.json() errors = result.get("errors") if not isinstance(errors, list): return False for error in errors: if not isinstance(error, dict): continue if error.get("type") != "NOT_FOUND": continue message = error.get("message") if isinstance(message, str) and f"'{item_id}'" in message: return True return False def _get_project_info( *, project_url: str, headers: dict[str, str], ) -> tuple[str, str, dict[str, str], str | None, str | None, str | None]: """ Retrieve project node ID, Status field ID, Status option name-to-ID mapping, and Start/End date field IDs. Parameters ---------- project_url : str The URL of the GitHub Project v2. headers : dict[str, str] HTTP headers including the Authorization token. Returns ------- tuple[str, str, dict[str, str], str | None, str | None, str | None] A tuple of (project_id, status_field_id, status_options, start_date_field_id, end_date_field_id, members_field_id) where status_options maps option name → option ID, and start_date_field_id / end_date_field_id are the IDs of the project's "Start date" and "End date" date fields respectively (or None if not present). ``members_field_id`` is the ID of the project's "Members" field (or None if not present). """ # Parse owner type, owner login, and project number from URL # Expected formats: # https://github.com/users/{login}/projects/{number} # https://github.com/orgs/{login}/projects/{number} # https://github.com/{login}/projects/{number} owner_type, owner_login, project_number = _parse_project_url(project_url) if owner_type == "users": query = """ query GetProject($login: String!, $number: Int!) { user(login: $login) { projectV2(number: $number) { id fields(first: 20) { nodes { ... on ProjectV2SingleSelectField { id name options { id name } } ... on ProjectV2Field { id name dataType } } } } } } """ variables = {"login": owner_login, "number": project_number} data_path = ["data", "user", "projectV2"] else: query = """ query GetProject($login: String!, $number: Int!) { organization(login: $login) { projectV2(number: $number) { id fields(first: 20) { nodes { ... on ProjectV2SingleSelectField { id name options { id name } } ... on ProjectV2Field { id name dataType } } } } } } """ variables = {"login": owner_login, "number": project_number} data_path = ["data", "organization", "projectV2"] response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response(response=response, context=f"Failed to retrieve project info for `{project_url}`.") project_data = result for key in data_path: project_data = project_data[key] project_id = project_data["id"] # Find the Status, Start date, and End date fields status_field_id = None status_options: dict[str, str] = {} start_date_field_id = None end_date_field_id = None members_field_id = None for field in project_data["fields"]["nodes"]: if not field: continue field_name = field.get("name", "") if field_name == "Status": status_field_id = field["id"] for option in field.get("options", []): status_options[option["name"]] = option["id"] elif field.get("dataType") == "DATE" and field_name == "Start date": start_date_field_id = field["id"] elif field.get("dataType") == "DATE" and field_name == "End date": end_date_field_id = field["id"] elif field_name == "Members": members_field_id = field["id"] if status_field_id is None: message = f"No 'Status' field found in project `{project_url}`." raise ValueError(message) return project_id, status_field_id, status_options, start_date_field_id, end_date_field_id, members_field_id def _get_item_info(*, url: str, headers: dict[str, str]) -> tuple[str, str, str, str, str | None] | None: """ Fetch the node ID, type (PullRequest or Issue), state, creation date, and closed date for the given URL. Parameters ---------- url : str The GitHub URL of the PR or Issue. headers : dict[str, str] HTTP headers including the Authorization token. Returns ------- tuple[str, str, str, str, str | None] or None A tuple of (node_id, item_type, item_state, created_at, closed_at) where item_type is 'PullRequest' or 'Issue', item_state is 'open' or 'closed', created_at is an ISO 8601 datetime string, and closed_at is an ISO 8601 datetime string or None if not closed. Returns None if the URL does not resolve to a PR or Issue. """ query = """ query GetItem($url: URI!) { resource(url: $url) { ... on PullRequest { id state createdAt closedAt } ... on Issue { id state createdAt closedAt } } } """ variables = {"url": url} response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response(response=response, context=f"Failed to retrieve item info for URL `{url}`.") resource = result["data"]["resource"] if resource is None: return None node_id = resource["id"] item_state = resource["state"].lower() created_at: str = resource["createdAt"] closed_at: str | None = resource.get("closedAt") # Determine the type based on the URL path item_type = "PullRequest" if "/pull/" in url else "Issue" return node_id, item_type, item_state, created_at, closed_at def _add_item_to_project(*, project_id: str, content_id: str, headers: dict[str, str]) -> str | None: """ Add an item to a GitHub Project v2 by its content node ID. Parameters ---------- project_id : str The global node ID of the GitHub Project v2. content_id : str The global node ID of the PR or Issue to add. headers : dict[str, str] HTTP headers including the Authorization token. Returns ------- str or None The project item ID if successful, or None if rate-limited. """ mutation = """ mutation AddItem($projectId: ID!, $contentId: ID!) { addProjectV2ItemById(input: {projectId: $projectId, contentId: $contentId}) { item { id } } } """ variables = {"projectId": project_id, "contentId": content_id} response = requests.post( url="https://api.github.com/graphql", json={"query": mutation, "variables": variables}, headers=headers, timeout=30, ) try: result = _check_graphql_response( response=response, context=f"Failed to add item `{content_id}` to project `{project_id}`.", ) except RuntimeError: if response.status_code == 403: return None raise return result["data"]["addProjectV2ItemById"]["item"]["id"] def _set_item_status( *, project_id: str, item_id: str, field_id: str, option_id: str, headers: dict[str, str], ) -> None: """ Set the Status field of a project item. Parameters ---------- project_id : str The global node ID of the GitHub Project v2. item_id : str The project item ID. field_id : str The global node ID of the Status field. option_id : str The option ID for the desired status value. headers : dict[str, str] HTTP headers including the Authorization token. """ mutation = """ mutation SetStatus($projectId: ID!, $itemId: ID!, $fieldId: ID!, $optionId: String!) { updateProjectV2ItemFieldValue( input: { projectId: $projectId itemId: $itemId fieldId: $fieldId value: { singleSelectOptionId: $optionId } } ) { projectV2Item { id } } } """ variables = { "projectId": project_id, "itemId": item_id, "fieldId": field_id, "optionId": option_id, } response = requests.post( url="https://api.github.com/graphql", json={"query": mutation, "variables": variables}, headers=headers, timeout=30, ) try: _check_graphql_response( response=response, context=f"Failed to set status for item `{item_id}` in project `{project_id}`.", ) except RuntimeError: if response.status_code != 403: raise def _set_item_date( *, project_id: str, item_id: str, field_id: str, date: str, headers: dict[str, str], ) -> None: """ Set a date field of a project item. Parameters ---------- project_id : str The global node ID of the GitHub Project v2. item_id : str The project item ID. field_id : str The global node ID of the date field. date : str The date value in ISO format (``YYYY-MM-DD``). headers : dict[str, str] HTTP headers including the Authorization token. """ mutation = """ mutation SetDate($projectId: ID!, $itemId: ID!, $fieldId: ID!, $date: Date!) { updateProjectV2ItemFieldValue( input: { projectId: $projectId itemId: $itemId fieldId: $fieldId value: { date: $date } } ) { projectV2Item { id } } } """ variables = { "projectId": project_id, "itemId": item_id, "fieldId": field_id, "date": date, } response = requests.post( url="https://api.github.com/graphql", json={"query": mutation, "variables": variables}, headers=headers, timeout=30, ) try: _check_graphql_response( response=response, context=f"Failed to set date for item `{item_id}` in project `{project_id}`.", ) except RuntimeError: if response.status_code == 403: return if _is_missing_project_item_error(response=response, item_id=item_id): warnings.warn( message=f"Skipping date update for missing project item `{item_id}` in project `{project_id}`.", stacklevel=2, ) return raise def _set_item_text( *, project_id: str, item_id: str, field_id: str, text: str, headers: dict[str, str], ) -> None: """Set a text field value on a project item.""" mutation = """ mutation SetText($projectId: ID!, $itemId: ID!, $fieldId: ID!, $text: String!) { updateProjectV2ItemFieldValue( input: { projectId: $projectId itemId: $itemId fieldId: $fieldId value: { text: $text } } ) { projectV2Item { id } } } """ variables = { "projectId": project_id, "itemId": item_id, "fieldId": field_id, "text": text, } response = requests.post( url="https://api.github.com/graphql", json={"query": mutation, "variables": variables}, headers=headers, timeout=30, ) try: _check_graphql_response( response=response, context=f"Failed to set text for item `{item_id}` in project `{project_id}`.", ) except RuntimeError: if response.status_code != 403: raise
[docs] @beartype.beartype def update_project_item_dates( *, project_url: str, end_date_placeholder_days: int = 180, ) -> None: """ Update the start and end date fields on all items already added to a GitHub Project (v2). For each item in the project: - The start date field ("Start date") is set to the item's creation date. - The end date field ("End date") is set to the item's closed date (if closed), or to ``end_date_placeholder_days`` days after the creation date otherwise. If the project does not have a "Start date" or "End date" field, those updates are skipped. Parameters ---------- project_url : str The URL of the GitHub Project v2, e.g., ``https://github.com/users/username/projects/1`` or ``https://github.com/orgs/orgname/projects/1``. end_date_placeholder_days : int, optional Number of days after the item's creation date to use as the placeholder end date when the item has not yet been closed. Default is 180 (approximately 6 months). """ github_token = os.getenv("GITHUB_TOKEN") if github_token is None: message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n" raise ValueError(message) headers = {"Authorization": f"token {github_token}"} project_id, _status_field_id, _status_options, start_date_field_id, end_date_field_id, _members_field_id = ( _get_project_info( project_url=project_url, headers=headers, ) ) if start_date_field_id is None and end_date_field_id is None: warnings.warn( message=( f"Project `{project_url}` has no 'Start date' or 'End date' fields. No date updates were performed." ), stacklevel=2, ) return # Parse owner type, login, and number from URL for the items query owner_type, owner_login, project_number = _parse_project_url(project_url) # Collect all project items with their content dates (paginated) all_items = _list_project_items_with_dates( owner_type=owner_type, owner_login=owner_login, project_number=project_number, headers=headers, ) for item in tqdm.tqdm(iterable=all_items, desc="Updating item dates", unit="items", dynamic_ncols=True): item_id = item["id"] created_at: str = item["createdAt"] closed_at: str | None = item.get("closedAt") if start_date_field_id is not None: start_date = created_at[:10] _set_item_date( project_id=project_id, item_id=item_id, field_id=start_date_field_id, date=start_date, headers=headers, ) if end_date_field_id is not None: if closed_at is not None: end_date = closed_at[:10] else: creation_date = datetime.date.fromisoformat(created_at[:10]) end_date = (creation_date + datetime.timedelta(days=end_date_placeholder_days)).isoformat() _set_item_date( project_id=project_id, item_id=item_id, field_id=end_date_field_id, date=end_date, headers=headers, )
[docs] @beartype.beartype def update_project_item_members(*, project_url: str) -> None: """ Update the Members text field on all items already added to a GitHub Project (v2). Member attribution is inferred from assignee usernames and pull request reviewer usernames. Items without assignees or reviewers are skipped. Parameters ---------- project_url : str The URL of the GitHub Project v2, e.g., ``https://github.com/users/username/projects/1`` or ``https://github.com/orgs/orgname/projects/1``. """ github_token = os.getenv("GITHUB_TOKEN") if github_token is None: message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n" raise ValueError(message) headers = {"Authorization": f"token {github_token}"} project_id, _status_field_id, _status_options, _start_date_field_id, _end_date_field_id, members_field_id = ( _get_project_info( project_url=project_url, headers=headers, ) ) if members_field_id is None: warnings.warn( message=f"Project `{project_url}` has no 'Members' field. No member updates were performed.", stacklevel=2, ) return owner_type, owner_login, project_number = _parse_project_url(project_url) all_items = _list_project_items_with_member_usernames( owner_type=owner_type, owner_login=owner_login, project_number=project_number, headers=headers, ) for item_id, member_usernames in tqdm.tqdm( iterable=all_items, desc="Updating item members", unit="items", dynamic_ncols=True, ): members_value = _merge_member_values(current_value=None, usernames=member_usernames) if members_value is None: continue _set_item_text( project_id=project_id, item_id=item_id, field_id=members_field_id, text=members_value, headers=headers, )
def _list_project_item_content_urls( *, owner_type: str, owner_login: str, project_number: int, headers: dict[str, str], ) -> set[str]: """ Return the set of content URLs for all items already in the project. Parameters ---------- owner_type : str Either ``'users'`` or ``'orgs'``. owner_login : str The GitHub login of the project owner. project_number : int The number of the GitHub Project v2. headers : dict[str, str] HTTP headers including the Authorization token. Returns ------- set[str] A set of content URLs (PR or Issue URLs) for all items currently in the project. """ if owner_type == "users": query = """ query GetItemUrls($login: String!, $number: Int!, $after: String) { user(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { content { ... on PullRequest { url } ... on Issue { url } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "user", "projectV2", "items"] else: query = """ query GetItemUrls($login: String!, $number: Int!, $after: String) { organization(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { content { ... on PullRequest { url } ... on Issue { url } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "organization", "projectV2", "items"] existing_urls: set[str] = set() after_cursor = None while True: variables = {"login": owner_login, "number": project_number, "after": after_cursor} response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response( response=response, context=f"Failed to list project item URLs for project {project_number}.", ) items_data = result for key in data_path: items_data = items_data[key] for node in items_data["nodes"]: content = node.get("content") if content and "url" in content: existing_urls.add(content["url"]) page_info = items_data["pageInfo"] if not page_info["hasNextPage"]: break after_cursor = page_info["endCursor"] return existing_urls def _list_project_items_with_dates( *, owner_type: str, owner_login: str, project_number: int, headers: dict[str, str], ) -> list[dict]: """ Return all project items with their content creation and close dates. Parameters ---------- owner_type : str Either ``'users'`` or ``'orgs'``. owner_login : str The GitHub login of the project owner. project_number : int The number of the GitHub Project v2. headers : dict[str, str] HTTP headers including the Authorization token. Returns ------- list[dict] A list of dicts, each with keys ``id``, ``createdAt``, and optionally ``closedAt``. """ if owner_type == "users": query = """ query GetItems($login: String!, $number: Int!, $after: String) { user(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id content { ... on PullRequest { createdAt closedAt } ... on Issue { createdAt closedAt } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "user", "projectV2", "items"] else: query = """ query GetItems($login: String!, $number: Int!, $after: String) { organization(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id content { ... on PullRequest { createdAt closedAt } ... on Issue { createdAt closedAt } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "organization", "projectV2", "items"] all_items = [] after_cursor = None while True: variables = {"login": owner_login, "number": project_number, "after": after_cursor} response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response( response=response, context=f"Failed to list project items for project {project_number}.", ) items_data = result for key in data_path: items_data = items_data[key] for node in items_data["nodes"]: content = node.get("content") if content and "createdAt" in content: all_items.append( { "id": node["id"], "createdAt": content["createdAt"], "closedAt": content.get("closedAt"), }, ) page_info = items_data["pageInfo"] if not page_info["hasNextPage"]: break after_cursor = page_info["endCursor"] return all_items def _list_project_items_with_member_values( *, owner_type: str, owner_login: str, project_number: int, members_field_id: str, headers: dict[str, str], ) -> dict[str, dict[str, str | None]]: """Return project item IDs and current Members text values indexed by content URL.""" if owner_type == "users": query = """ query GetItemsWithMembers($login: String!, $number: Int!, $after: String) { user(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id content { ... on PullRequest { url } ... on Issue { url } } fieldValues(first: 20) { nodes { ... on ProjectV2ItemFieldTextValue { text field { ... on ProjectV2Field { id } } } } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "user", "projectV2", "items"] else: query = """ query GetItemsWithMembers($login: String!, $number: Int!, $after: String) { organization(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id content { ... on PullRequest { url } ... on Issue { url } } fieldValues(first: 20) { nodes { ... on ProjectV2ItemFieldTextValue { text field { ... on ProjectV2Field { id } } } } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "organization", "projectV2", "items"] items_by_url: dict[str, dict[str, str | None]] = {} after_cursor = None while True: variables = {"login": owner_login, "number": project_number, "after": after_cursor} response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response( response=response, context=f"Failed to list project member values for project {project_number}.", ) items_data = result for key in data_path: items_data = items_data[key] for node in items_data["nodes"]: content = node.get("content") if not content or "url" not in content: continue members_value = None for field_value in node["fieldValues"]["nodes"]: field = field_value.get("field") if field and field.get("id") == members_field_id: members_value = field_value.get("text") break items_by_url[content["url"]] = {"item_id": node["id"], "members": members_value} page_info = items_data["pageInfo"] if not page_info["hasNextPage"]: break after_cursor = page_info["endCursor"] return items_by_url def _list_project_items_with_member_usernames( *, owner_type: str, owner_login: str, project_number: int, headers: dict[str, str], ) -> list[tuple[str, set[str]]]: """Return project item IDs and inferred member usernames from assignees/reviewers.""" if owner_type == "users": query = """ query GetItemsWithMembers($login: String!, $number: Int!, $after: String) { user(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id content { ... on PullRequest { assignees(first: 20) { nodes { login } } reviewRequests(first: 20) { nodes { requestedReviewer { ... on User { login } } } } } ... on Issue { assignees(first: 20) { nodes { login } } } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "user", "projectV2", "items"] else: query = """ query GetItemsWithMembers($login: String!, $number: Int!, $after: String) { organization(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id content { ... on PullRequest { assignees(first: 20) { nodes { login } } reviewRequests(first: 20) { nodes { requestedReviewer { ... on User { login } } } } } ... on Issue { assignees(first: 20) { nodes { login } } } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "organization", "projectV2", "items"] items_with_members: list[tuple[str, set[str]]] = [] after_cursor = None while True: variables = {"login": owner_login, "number": project_number, "after": after_cursor} response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response( response=response, context=f"Failed to list project members for project {project_number}.", ) items_data = result for key in data_path: items_data = items_data[key] for node in items_data["nodes"]: content = node.get("content") if content is None: continue member_usernames: set[str] = set() for assignee in content.get("assignees", {}).get("nodes", []): assignee_login = assignee.get("login") if assignee_login is not None: member_usernames.add(assignee_login) for review_request in content.get("reviewRequests", {}).get("nodes", []): requested_reviewer = review_request.get("requestedReviewer") if requested_reviewer is None: continue reviewer_login = requested_reviewer.get("login") if reviewer_login is not None: member_usernames.add(reviewer_login) if member_usernames: items_with_members.append((node["id"], member_usernames)) page_info = items_data["pageInfo"] if not page_info["hasNextPage"]: break after_cursor = page_info["endCursor"] return items_with_members def _list_project_items_with_status( *, owner_type: str, owner_login: str, project_number: int, status_field_id: str, headers: dict[str, str], ) -> list[dict]: """ Return all project items together with their current status option ID. Parameters ---------- owner_type : str Either ``'users'`` or ``'orgs'``. owner_login : str The GitHub login of the project owner. project_number : int The number of the GitHub Project v2. status_field_id : str The global node ID of the Status field. headers : dict[str, str] HTTP headers including the Authorization token. Returns ------- list[dict] A list of dicts, each with keys ``id`` and ``status_option_id``. Items whose status field value cannot be determined have ``status_option_id`` set to ``None``. """ if owner_type == "users": query = """ query GetItemsWithStatus($login: String!, $number: Int!, $after: String) { user(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id fieldValues(first: 20) { nodes { ... on ProjectV2ItemFieldSingleSelectValue { optionId field { ... on ProjectV2SingleSelectField { id } } } } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "user", "projectV2", "items"] else: query = """ query GetItemsWithStatus($login: String!, $number: Int!, $after: String) { organization(login: $login) { projectV2(number: $number) { items(first: 100, after: $after) { nodes { id fieldValues(first: 20) { nodes { ... on ProjectV2ItemFieldSingleSelectValue { optionId field { ... on ProjectV2SingleSelectField { id } } } } } } pageInfo { hasNextPage endCursor } } } } } """ data_path = ["data", "organization", "projectV2", "items"] all_items = [] after_cursor = None while True: variables = {"login": owner_login, "number": project_number, "after": after_cursor} response = requests.post( url="https://api.github.com/graphql", json={"query": query, "variables": variables}, headers=headers, timeout=30, ) result = _check_graphql_response( response=response, context=f"Failed to list project items with status for project {project_number}.", ) items_data = result for key in data_path: items_data = items_data[key] for node in items_data["nodes"]: item_id = node["id"] status_option_id = None for field_value in node.get("fieldValues", {}).get("nodes", []): if not field_value: continue field = field_value.get("field", {}) if field and field.get("id") == status_field_id: status_option_id = field_value.get("optionId") break all_items.append({"id": item_id, "status_option_id": status_option_id}) page_info = items_data["pageInfo"] if not page_info["hasNextPage"]: break after_cursor = page_info["endCursor"] return all_items
[docs] @beartype.beartype def transition_status(*, project_url: str, current_status: str, new_status: str) -> None: """ Move all items with one Status value to another in a GitHub Project (v2). Parameters ---------- project_url : str The URL of the GitHub Project v2, e.g., ``https://github.com/users/username/projects/1`` or ``https://github.com/orgs/orgname/projects/1``. current_status : str The current Status value to match. new_status : str The destination Status value to apply. Raises ------ ValueError If the ``GITHUB_TOKEN`` environment variable is not set, or if the project does not have one of the specified status options. """ github_token = os.getenv("GITHUB_TOKEN") if github_token is None: message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n" raise ValueError(message) headers = {"Authorization": f"token {github_token}"} project_id, status_field_id, status_options, _start_date_field_id, _end_date_field_id, _members_field_id = ( _get_project_info( project_url=project_url, headers=headers, ) ) current_option_id = status_options.get(current_status) if current_option_id is None: message = ( f"Status option '{current_status}' not found in project `{project_url}`. " f"Available options: {list(status_options.keys())}." ) raise ValueError(message) new_option_id = status_options.get(new_status) if new_option_id is None: message = ( f"Status option '{new_status}' not found in project `{project_url}`. " f"Available options: {list(status_options.keys())}." ) raise ValueError(message) # Parse owner type, login, and number from URL owner_type, owner_login, project_number = _parse_project_url(project_url) # Fetch all items with their current status all_items = _list_project_items_with_status( owner_type=owner_type, owner_login=owner_login, project_number=project_number, status_field_id=status_field_id, headers=headers, ) items_to_move = [item for item in all_items if item["status_option_id"] == current_option_id] for item in tqdm.tqdm( iterable=items_to_move, desc=f"Moving items from {current_status} to {new_status}", unit="items", dynamic_ncols=True, ): _set_item_status( project_id=project_id, item_id=item["id"], field_id=status_field_id, option_id=new_option_id, headers=headers, )