import datetime
import json
import os
import pathlib
import typing
import warnings
import beartype
import requests
import tqdm
def _parse_project_url(project_url: str, /) -> tuple[str, str, int]:
"""
Parse owner type, owner login, and project number from a GitHub Project URL.
Handles two URL formats:
- ``https://github.com/users/{login}/projects/{number}``
- ``https://github.com/orgs/{login}/projects/{number}``
- ``https://github.com/{login}/projects/{number}`` (treated as a user project)
Returns
-------
tuple[str, str, int]
A tuple of (owner_type, owner_login, project_number).
"""
parts = project_url.rstrip("/").split("/")
if parts[3] in ("users", "orgs"):
# Long format: https://github.com/users/{login}/projects/{number}
owner_type = parts[3]
owner_login = parts[4]
project_number = int(parts[6])
else:
# Short user format: https://github.com/{login}/projects/{number}
owner_type = "users"
owner_login = parts[3]
project_number = int(parts[5])
return owner_type, owner_login, project_number
[docs]
@beartype.beartype
def add_to_project(
*,
directory: pathlib.Path,
project_url: str,
status: str | None = None,
end_date_placeholder_days: int = 180,
assign_members: bool = False,
) -> None:
"""
Add all unique URLs from the derivatives directory to a GitHub Project (v2).
Items that are already present in the project are automatically skipped.
For each new item:
- If ``status`` is provided, all items are assigned that status value.
- Otherwise, the status is derived from the item type and state:
- If the Issue or PR is closed, it is given the 'Done' status.
- If the item is an open PR, it is given the 'In Progress' status.
- If the item is an open Issue, it is given the 'Todo' status.
- The start date is set to the item's creation date.
- The end date is set to the item's closed date (if closed), or to
``end_date_placeholder_days`` days after the creation date otherwise.
Parameters
----------
directory : pathlib.Path
The directory containing the derivatives JSON files.
Should be a specific username subdirectory,
e.g., ``/path/to/version-1/username-codycbakerphd``.
If a parent directory with multiple ``version-*`` subdirectories is provided,
only the latest version is used and a warning is emitted.
project_url : str
The URL of the GitHub Project v2 to add items to,
e.g., ``https://github.com/users/username/projects/1``
or ``https://github.com/orgs/orgname/projects/1``.
status : str or None, optional
A custom status value to apply uniformly to all items added to the project.
If ``None`` (the default), the status is derived from each item's type and state.
end_date_placeholder_days : int, optional
Number of days after the item's creation date to use as the placeholder end date
when the item has not yet been closed. Default is 180 (approximately 6 months).
assign_members : bool, optional
When ``True``, update the project's custom ``Members`` text field using usernames
inferred from ``username-*`` directory names in the data tree.
"""
github_token = os.getenv("GITHUB_TOKEN")
if github_token is None:
message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n"
raise ValueError(message)
headers = {"Authorization": f"token {github_token}"}
data_directory = _resolve_latest_version_data_directory(directory)
# Collect all unique URLs from JSON files in the directory
url_to_members = _collect_url_member_usernames(data_directory) if assign_members else {}
all_urls = list(url_to_members) if assign_members else _collect_unique_urls(data_directory)
if not all_urls:
warnings.warn(message=f"No URLs found in directory `{data_directory}`.", stacklevel=2)
return
# Resolve the project node ID and get Status / date field info
project_id, status_field_id, status_options, start_date_field_id, end_date_field_id, members_field_id = (
_get_project_info(
project_url=project_url,
headers=headers,
)
)
# Parse owner type, login, and number from URL
owner_type, owner_login, project_number = _parse_project_url(project_url)
validated_members_field_id: str | None = None
if assign_members:
if members_field_id is None:
message = f"No 'Members' field found in project `{project_url}`."
raise ValueError(message)
validated_members_field_id = members_field_id
existing_items = _list_project_items_with_member_values(
owner_type=owner_type,
owner_login=owner_login,
project_number=project_number,
members_field_id=validated_members_field_id,
headers=headers,
)
else:
existing_items = {
url: {"item_id": "", "members": None}
for url in _list_project_item_content_urls(
owner_type=owner_type,
owner_login=owner_login,
project_number=project_number,
headers=headers,
)
}
if assign_members:
for url in all_urls:
if url not in existing_items:
continue
existing_item_info = existing_items[url]
current_members = existing_item_info["members"]
updated_members = _merge_member_values(
current_value=current_members,
usernames=url_to_members.get(url, set()),
)
if updated_members is not None:
normalized_current_members = _normalize_member_value(current_members)
if updated_members == normalized_current_members:
continue
_set_item_text(
project_id=project_id,
item_id=typing.cast("str", existing_item_info["item_id"]),
field_id=typing.cast("str", validated_members_field_id),
text=updated_members,
headers=headers,
)
urls_to_add = [url for url in all_urls if url not in existing_items]
for url in tqdm.tqdm(iterable=urls_to_add, desc="Adding items to project", unit="items", dynamic_ncols=True):
# Determine the item type, state, and dates from the URL
item_info = _get_item_info(url=url, headers=headers)
if item_info is None:
continue
item_node_id, item_type, item_state, created_at, closed_at = item_info
# Add the item to the project
item_id = _add_item_to_project(project_id=project_id, content_id=item_node_id, headers=headers)
if item_id is None:
continue
_set_initial_project_item_fields(
item_values={
"url": url,
"item_id": item_id,
"item_type": item_type,
"item_state": item_state,
"created_at": created_at,
"closed_at": closed_at,
},
project_values={
"project_id": project_id,
"status": status,
"status_field_id": status_field_id,
"status_options": status_options,
"start_date_field_id": start_date_field_id,
"end_date_field_id": end_date_field_id,
"end_date_placeholder_days": end_date_placeholder_days,
"members_field_id": validated_members_field_id if assign_members else None,
"member_usernames": url_to_members.get(url, set()),
},
headers=headers,
)
def _collect_unique_urls(directory: pathlib.Path, /) -> list[str]:
"""Collect all unique URLs from GraphQL JSON files under the given directory."""
all_info_file_paths = list(directory.rglob(pattern="*.json"))
all_urls: set[str] = set()
for info_file_path in all_info_file_paths:
with info_file_path.open(mode="r") as file_stream:
info = json.load(file_stream)
if isinstance(info, list):
for value in info:
if isinstance(value, str):
all_urls.add(value)
return list(all_urls)
def _resolve_latest_version_data_directory(directory: pathlib.Path, /) -> pathlib.Path:
"""
Return the directory to scan for project population data files.
Parameters
----------
directory : pathlib.Path
User-supplied base directory for project population.
Returns
-------
pathlib.Path
``directory`` by default. If ``directory`` contains more than one numeric
``version-*`` subdirectory, this returns only the highest version directory.
Warns
-----
UserWarning
Emitted when multiple numeric ``version-*`` directories are detected and
only the highest version directory is selected.
"""
if not directory.exists() or not directory.is_dir():
return directory
version_directories: list[tuple[int, pathlib.Path]] = []
for child in directory.iterdir():
if not child.is_dir() or not child.name.startswith("version-"):
continue
version_number = child.name.removeprefix("version-")
if version_number.isdigit():
version_directories.append((int(version_number), child))
if len(version_directories) <= 1:
return directory
latest_version_entry = max(version_directories, key=lambda entry: entry[0])
latest_version_directory = latest_version_entry[1]
warnings.warn(
message="Incompatible database versions detected! Using only the latest - please run database migration.",
stacklevel=2,
)
return latest_version_directory
def _collect_url_member_usernames(directory: pathlib.Path, /) -> dict[str, set[str]]:
"""Map each URL under ``directory`` to the set of usernames inferred from its path."""
url_to_members: dict[str, set[str]] = {}
all_info_file_paths = list(directory.rglob(pattern="*.json"))
for info_file_path in all_info_file_paths:
with info_file_path.open(mode="r") as file_stream:
info = json.load(file_stream)
if not isinstance(info, list):
continue
username = _infer_username_from_data_path(directory=directory, info_file_path=info_file_path)
for value in info:
if not isinstance(value, str):
continue
url_to_members.setdefault(value, set()).add(username)
return url_to_members
def _infer_username_from_data_path(*, directory: pathlib.Path, info_file_path: pathlib.Path) -> str:
"""
Infer a username from the first ``username-*`` path segment for a data file.
If no non-empty ``username-*`` segment exists in the file path relative to ``directory``,
this falls back to ``directory.name``.
"""
relative_parts = info_file_path.relative_to(directory).parts
for part in relative_parts:
if part.startswith("username-"):
username = part.removeprefix("username-")
if username:
return username
if directory.name.startswith("username-"):
username = directory.name.removeprefix("username-")
if username:
return username
return directory.name
def _merge_member_values(*, current_value: str | None, usernames: set[str]) -> str | None:
"""
Merge existing member text with username values into a deduplicated list.
Parameters
----------
current_value : str or None
Existing comma-separated value currently stored in the project's Members field.
usernames : set[str]
Usernames inferred from the local history directory structure.
Returns
-------
str or None
Sorted comma-separated member names, or ``None`` if no non-empty values are present.
"""
values = {value.strip() for value in (current_value or "").split(",") if value.strip()}
values.update({username.strip() for username in usernames if username.strip()})
if not values:
return None
return ",".join(sorted(values))
def _normalize_member_value(current_value: str | None, /) -> str | None:
"""Return a normalized comma-separated member string for a current field value."""
return _merge_member_values(current_value=current_value, usernames=set())
def _set_initial_project_item_fields(
*,
item_values: dict[str, str | None],
project_values: dict[str, object],
headers: dict[str, str],
) -> None:
"""Set status/date/member fields for a newly added project item."""
url = item_values["url"]
item_id = item_values["item_id"]
item_type = item_values["item_type"]
item_state = item_values["item_state"]
created_at = item_values["created_at"]
if url is None or item_id is None or item_type is None or item_state is None or created_at is None:
return
status_options = typing.cast("dict[str, str]", project_values["status_options"])
status = typing.cast("str | None", project_values["status"])
status_name = _resolve_status_name(
url=url,
item_type=item_type,
item_state=item_state,
status=status,
)
if status_name is None:
return
option_id = status_options.get(status_name)
if option_id is None:
message = (
f"Status option `{status_name}` not found in project. Available options: {list(status_options.keys())}."
)
warnings.warn(message=message, stacklevel=3)
return
_set_item_status(
project_id=typing.cast("str", project_values["project_id"]),
item_id=item_id,
field_id=typing.cast("str", project_values["status_field_id"]),
option_id=option_id,
headers=headers,
)
_set_item_dates_for_content(
item_values=item_values,
project_values=project_values,
headers=headers,
)
members_field_id = typing.cast("str | None", project_values["members_field_id"])
member_usernames = typing.cast("set[str]", project_values["member_usernames"])
if members_field_id is not None:
initial_members = _merge_member_values(
current_value=None,
usernames=member_usernames,
)
if initial_members is not None:
_set_item_text(
project_id=typing.cast("str", project_values["project_id"]),
item_id=item_id,
field_id=members_field_id,
text=initial_members,
headers=headers,
)
def _resolve_status_name(*, url: str, item_type: str, item_state: str, status: str | None) -> str | None:
"""Resolve status name from explicit override or item type/state mapping."""
if status is not None:
return status
status_name = {
("PullRequest", "closed"): "Done",
("PullRequest", "merged"): "Done",
("Issue", "closed"): "Done",
("PullRequest", "open"): "In Progress",
("Issue", "open"): "Todo",
}.get((item_type, item_state))
if status_name is None:
message = f"Unrecognised item type/state combination `({item_type}, {item_state})` for `{url}`; skipping."
warnings.warn(message=message, stacklevel=3)
return status_name
def _set_item_dates_for_content(
*,
item_values: dict[str, str | None],
project_values: dict[str, object],
headers: dict[str, str],
) -> None:
"""Set start and end date fields for a project item when date fields exist."""
item_id = item_values["item_id"]
created_at = item_values["created_at"]
closed_at = item_values["closed_at"]
if item_id is None or created_at is None:
return
project_id = typing.cast("str", project_values["project_id"])
start_date_field_id = typing.cast("str | None", project_values["start_date_field_id"])
end_date_field_id = typing.cast("str | None", project_values["end_date_field_id"])
end_date_placeholder_days = typing.cast("int", project_values["end_date_placeholder_days"])
if start_date_field_id is not None:
start_date = created_at[:10] # Extract YYYY-MM-DD from ISO datetime
_set_item_date(
project_id=project_id,
item_id=item_id,
field_id=start_date_field_id,
date=start_date,
headers=headers,
)
if end_date_field_id is None:
return
if closed_at is not None:
end_date = closed_at[:10]
else:
creation_date = datetime.date.fromisoformat(created_at[:10])
end_date = (creation_date + datetime.timedelta(days=end_date_placeholder_days)).isoformat()
_set_item_date(
project_id=project_id,
item_id=item_id,
field_id=end_date_field_id,
date=end_date,
headers=headers,
)
def _check_graphql_response(*, response: requests.Response, context: str) -> dict:
"""
Validate a GraphQL API response and raise or warn on errors.
Parameters
----------
response : requests.Response
The HTTP response from the GitHub GraphQL API.
context : str
A descriptive label used in error/warning messages (e.g., the URL or mutation name).
Returns
-------
dict
The parsed JSON result from the response.
Raises
------
RuntimeError
If the response indicates a non-403 error.
"""
status = response.status_code
try:
result = response.json()
except requests.exceptions.JSONDecodeError as exception:
response_body = response.text.strip() or "<empty response body>"
message = f"{context}\nStatus code {status}: GitHub GraphQL API response was not valid JSON: {response_body}"
raise RuntimeError(message) from exception
message = f"{context}\nStatus code {status}: {result}"
if status == 403:
warnings.warn(message=message, stacklevel=3)
raise RuntimeError(message)
if status != 200:
raise RuntimeError(message)
try:
errors = result.get("errors")
except AttributeError as exception:
unexpected_payload_message = (
f"{context}\nStatus code {status}: GitHub GraphQL API returned unexpected JSON payload: {result}"
)
raise RuntimeError(unexpected_payload_message) from exception
if errors is not None:
raise RuntimeError(message)
return result
def _is_missing_project_item_error(*, response: requests.Response, item_id: str) -> bool:
"""Return ``True`` when a GraphQL response indicates the project item no longer exists."""
if response.status_code != 200:
return False
result = response.json()
errors = result.get("errors")
if not isinstance(errors, list):
return False
for error in errors:
if not isinstance(error, dict):
continue
if error.get("type") != "NOT_FOUND":
continue
message = error.get("message")
if isinstance(message, str) and f"'{item_id}'" in message:
return True
return False
def _get_project_info(
*,
project_url: str,
headers: dict[str, str],
) -> tuple[str, str, dict[str, str], str | None, str | None, str | None]:
"""
Retrieve project node ID, Status field ID, Status option name-to-ID mapping, and Start/End date field IDs.
Parameters
----------
project_url : str
The URL of the GitHub Project v2.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
tuple[str, str, dict[str, str], str | None, str | None, str | None]
A tuple of (project_id, status_field_id, status_options, start_date_field_id,
end_date_field_id, members_field_id) where status_options maps option name → option ID, and
start_date_field_id / end_date_field_id are the IDs of the project's "Start date"
and "End date" date fields respectively (or None if not present). ``members_field_id``
is the ID of the project's "Members" field (or None if not present).
"""
# Parse owner type, owner login, and project number from URL
# Expected formats:
# https://github.com/users/{login}/projects/{number}
# https://github.com/orgs/{login}/projects/{number}
# https://github.com/{login}/projects/{number}
owner_type, owner_login, project_number = _parse_project_url(project_url)
if owner_type == "users":
query = """
query GetProject($login: String!, $number: Int!) {
user(login: $login) {
projectV2(number: $number) {
id
fields(first: 20) {
nodes {
... on ProjectV2SingleSelectField {
id
name
options {
id
name
}
}
... on ProjectV2Field {
id
name
dataType
}
}
}
}
}
}
"""
variables = {"login": owner_login, "number": project_number}
data_path = ["data", "user", "projectV2"]
else:
query = """
query GetProject($login: String!, $number: Int!) {
organization(login: $login) {
projectV2(number: $number) {
id
fields(first: 20) {
nodes {
... on ProjectV2SingleSelectField {
id
name
options {
id
name
}
}
... on ProjectV2Field {
id
name
dataType
}
}
}
}
}
}
"""
variables = {"login": owner_login, "number": project_number}
data_path = ["data", "organization", "projectV2"]
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(response=response, context=f"Failed to retrieve project info for `{project_url}`.")
project_data = result
for key in data_path:
project_data = project_data[key]
project_id = project_data["id"]
# Find the Status, Start date, and End date fields
status_field_id = None
status_options: dict[str, str] = {}
start_date_field_id = None
end_date_field_id = None
members_field_id = None
for field in project_data["fields"]["nodes"]:
if not field:
continue
field_name = field.get("name", "")
if field_name == "Status":
status_field_id = field["id"]
for option in field.get("options", []):
status_options[option["name"]] = option["id"]
elif field.get("dataType") == "DATE" and field_name == "Start date":
start_date_field_id = field["id"]
elif field.get("dataType") == "DATE" and field_name == "End date":
end_date_field_id = field["id"]
elif field_name == "Members":
members_field_id = field["id"]
if status_field_id is None:
message = f"No 'Status' field found in project `{project_url}`."
raise ValueError(message)
return project_id, status_field_id, status_options, start_date_field_id, end_date_field_id, members_field_id
def _get_item_info(*, url: str, headers: dict[str, str]) -> tuple[str, str, str, str, str | None] | None:
"""
Fetch the node ID, type (PullRequest or Issue), state, creation date, and closed date for the given URL.
Parameters
----------
url : str
The GitHub URL of the PR or Issue.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
tuple[str, str, str, str, str | None] or None
A tuple of (node_id, item_type, item_state, created_at, closed_at) where item_type is
'PullRequest' or 'Issue', item_state is 'open' or 'closed', created_at is an ISO 8601
datetime string, and closed_at is an ISO 8601 datetime string or None if not closed.
Returns None if the URL does not resolve to a PR or Issue.
"""
query = """
query GetItem($url: URI!) {
resource(url: $url) {
... on PullRequest {
id
state
createdAt
closedAt
}
... on Issue {
id
state
createdAt
closedAt
}
}
}
"""
variables = {"url": url}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(response=response, context=f"Failed to retrieve item info for URL `{url}`.")
resource = result["data"]["resource"]
if resource is None:
return None
node_id = resource["id"]
item_state = resource["state"].lower()
created_at: str = resource["createdAt"]
closed_at: str | None = resource.get("closedAt")
# Determine the type based on the URL path
item_type = "PullRequest" if "/pull/" in url else "Issue"
return node_id, item_type, item_state, created_at, closed_at
def _add_item_to_project(*, project_id: str, content_id: str, headers: dict[str, str]) -> str | None:
"""
Add an item to a GitHub Project v2 by its content node ID.
Parameters
----------
project_id : str
The global node ID of the GitHub Project v2.
content_id : str
The global node ID of the PR or Issue to add.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
str or None
The project item ID if successful, or None if rate-limited.
"""
mutation = """
mutation AddItem($projectId: ID!, $contentId: ID!) {
addProjectV2ItemById(input: {projectId: $projectId, contentId: $contentId}) {
item {
id
}
}
}
"""
variables = {"projectId": project_id, "contentId": content_id}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": mutation, "variables": variables},
headers=headers,
timeout=30,
)
try:
result = _check_graphql_response(
response=response,
context=f"Failed to add item `{content_id}` to project `{project_id}`.",
)
except RuntimeError:
if response.status_code == 403:
return None
raise
return result["data"]["addProjectV2ItemById"]["item"]["id"]
def _set_item_status(
*,
project_id: str,
item_id: str,
field_id: str,
option_id: str,
headers: dict[str, str],
) -> None:
"""
Set the Status field of a project item.
Parameters
----------
project_id : str
The global node ID of the GitHub Project v2.
item_id : str
The project item ID.
field_id : str
The global node ID of the Status field.
option_id : str
The option ID for the desired status value.
headers : dict[str, str]
HTTP headers including the Authorization token.
"""
mutation = """
mutation SetStatus($projectId: ID!, $itemId: ID!, $fieldId: ID!, $optionId: String!) {
updateProjectV2ItemFieldValue(
input: {
projectId: $projectId
itemId: $itemId
fieldId: $fieldId
value: { singleSelectOptionId: $optionId }
}
) {
projectV2Item {
id
}
}
}
"""
variables = {
"projectId": project_id,
"itemId": item_id,
"fieldId": field_id,
"optionId": option_id,
}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": mutation, "variables": variables},
headers=headers,
timeout=30,
)
try:
_check_graphql_response(
response=response,
context=f"Failed to set status for item `{item_id}` in project `{project_id}`.",
)
except RuntimeError:
if response.status_code != 403:
raise
def _set_item_date(
*,
project_id: str,
item_id: str,
field_id: str,
date: str,
headers: dict[str, str],
) -> None:
"""
Set a date field of a project item.
Parameters
----------
project_id : str
The global node ID of the GitHub Project v2.
item_id : str
The project item ID.
field_id : str
The global node ID of the date field.
date : str
The date value in ISO format (``YYYY-MM-DD``).
headers : dict[str, str]
HTTP headers including the Authorization token.
"""
mutation = """
mutation SetDate($projectId: ID!, $itemId: ID!, $fieldId: ID!, $date: Date!) {
updateProjectV2ItemFieldValue(
input: {
projectId: $projectId
itemId: $itemId
fieldId: $fieldId
value: { date: $date }
}
) {
projectV2Item {
id
}
}
}
"""
variables = {
"projectId": project_id,
"itemId": item_id,
"fieldId": field_id,
"date": date,
}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": mutation, "variables": variables},
headers=headers,
timeout=30,
)
try:
_check_graphql_response(
response=response,
context=f"Failed to set date for item `{item_id}` in project `{project_id}`.",
)
except RuntimeError:
if response.status_code == 403:
return
if _is_missing_project_item_error(response=response, item_id=item_id):
warnings.warn(
message=f"Skipping date update for missing project item `{item_id}` in project `{project_id}`.",
stacklevel=2,
)
return
raise
def _set_item_text(
*,
project_id: str,
item_id: str,
field_id: str,
text: str,
headers: dict[str, str],
) -> None:
"""Set a text field value on a project item."""
mutation = """
mutation SetText($projectId: ID!, $itemId: ID!, $fieldId: ID!, $text: String!) {
updateProjectV2ItemFieldValue(
input: {
projectId: $projectId
itemId: $itemId
fieldId: $fieldId
value: { text: $text }
}
) {
projectV2Item {
id
}
}
}
"""
variables = {
"projectId": project_id,
"itemId": item_id,
"fieldId": field_id,
"text": text,
}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": mutation, "variables": variables},
headers=headers,
timeout=30,
)
try:
_check_graphql_response(
response=response,
context=f"Failed to set text for item `{item_id}` in project `{project_id}`.",
)
except RuntimeError:
if response.status_code != 403:
raise
[docs]
@beartype.beartype
def update_project_item_dates(
*,
project_url: str,
end_date_placeholder_days: int = 180,
) -> None:
"""
Update the start and end date fields on all items already added to a GitHub Project (v2).
For each item in the project:
- The start date field ("Start date") is set to the item's creation date.
- The end date field ("End date") is set to the item's closed date (if closed),
or to ``end_date_placeholder_days`` days after the creation date otherwise.
If the project does not have a "Start date" or "End date" field, those updates are skipped.
Parameters
----------
project_url : str
The URL of the GitHub Project v2,
e.g., ``https://github.com/users/username/projects/1``
or ``https://github.com/orgs/orgname/projects/1``.
end_date_placeholder_days : int, optional
Number of days after the item's creation date to use as the placeholder end date
when the item has not yet been closed. Default is 180 (approximately 6 months).
"""
github_token = os.getenv("GITHUB_TOKEN")
if github_token is None:
message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n"
raise ValueError(message)
headers = {"Authorization": f"token {github_token}"}
project_id, _status_field_id, _status_options, start_date_field_id, end_date_field_id, _members_field_id = (
_get_project_info(
project_url=project_url,
headers=headers,
)
)
if start_date_field_id is None and end_date_field_id is None:
warnings.warn(
message=(
f"Project `{project_url}` has no 'Start date' or 'End date' fields. No date updates were performed."
),
stacklevel=2,
)
return
# Parse owner type, login, and number from URL for the items query
owner_type, owner_login, project_number = _parse_project_url(project_url)
# Collect all project items with their content dates (paginated)
all_items = _list_project_items_with_dates(
owner_type=owner_type,
owner_login=owner_login,
project_number=project_number,
headers=headers,
)
for item in tqdm.tqdm(iterable=all_items, desc="Updating item dates", unit="items", dynamic_ncols=True):
item_id = item["id"]
created_at: str = item["createdAt"]
closed_at: str | None = item.get("closedAt")
if start_date_field_id is not None:
start_date = created_at[:10]
_set_item_date(
project_id=project_id,
item_id=item_id,
field_id=start_date_field_id,
date=start_date,
headers=headers,
)
if end_date_field_id is not None:
if closed_at is not None:
end_date = closed_at[:10]
else:
creation_date = datetime.date.fromisoformat(created_at[:10])
end_date = (creation_date + datetime.timedelta(days=end_date_placeholder_days)).isoformat()
_set_item_date(
project_id=project_id,
item_id=item_id,
field_id=end_date_field_id,
date=end_date,
headers=headers,
)
[docs]
@beartype.beartype
def update_project_item_members(*, project_url: str) -> None:
"""
Update the Members text field on all items already added to a GitHub Project (v2).
Member attribution is inferred from assignee usernames and pull request reviewer usernames.
Items without assignees or reviewers are skipped.
Parameters
----------
project_url : str
The URL of the GitHub Project v2,
e.g., ``https://github.com/users/username/projects/1``
or ``https://github.com/orgs/orgname/projects/1``.
"""
github_token = os.getenv("GITHUB_TOKEN")
if github_token is None:
message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n"
raise ValueError(message)
headers = {"Authorization": f"token {github_token}"}
project_id, _status_field_id, _status_options, _start_date_field_id, _end_date_field_id, members_field_id = (
_get_project_info(
project_url=project_url,
headers=headers,
)
)
if members_field_id is None:
warnings.warn(
message=f"Project `{project_url}` has no 'Members' field. No member updates were performed.",
stacklevel=2,
)
return
owner_type, owner_login, project_number = _parse_project_url(project_url)
all_items = _list_project_items_with_member_usernames(
owner_type=owner_type,
owner_login=owner_login,
project_number=project_number,
headers=headers,
)
for item_id, member_usernames in tqdm.tqdm(
iterable=all_items,
desc="Updating item members",
unit="items",
dynamic_ncols=True,
):
members_value = _merge_member_values(current_value=None, usernames=member_usernames)
if members_value is None:
continue
_set_item_text(
project_id=project_id,
item_id=item_id,
field_id=members_field_id,
text=members_value,
headers=headers,
)
def _list_project_item_content_urls(
*,
owner_type: str,
owner_login: str,
project_number: int,
headers: dict[str, str],
) -> set[str]:
"""
Return the set of content URLs for all items already in the project.
Parameters
----------
owner_type : str
Either ``'users'`` or ``'orgs'``.
owner_login : str
The GitHub login of the project owner.
project_number : int
The number of the GitHub Project v2.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
set[str]
A set of content URLs (PR or Issue URLs) for all items currently in the project.
"""
if owner_type == "users":
query = """
query GetItemUrls($login: String!, $number: Int!, $after: String) {
user(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
content {
... on PullRequest { url }
... on Issue { url }
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "user", "projectV2", "items"]
else:
query = """
query GetItemUrls($login: String!, $number: Int!, $after: String) {
organization(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
content {
... on PullRequest { url }
... on Issue { url }
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "organization", "projectV2", "items"]
existing_urls: set[str] = set()
after_cursor = None
while True:
variables = {"login": owner_login, "number": project_number, "after": after_cursor}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(
response=response,
context=f"Failed to list project item URLs for project {project_number}.",
)
items_data = result
for key in data_path:
items_data = items_data[key]
for node in items_data["nodes"]:
content = node.get("content")
if content and "url" in content:
existing_urls.add(content["url"])
page_info = items_data["pageInfo"]
if not page_info["hasNextPage"]:
break
after_cursor = page_info["endCursor"]
return existing_urls
def _list_project_items_with_dates(
*,
owner_type: str,
owner_login: str,
project_number: int,
headers: dict[str, str],
) -> list[dict]:
"""
Return all project items with their content creation and close dates.
Parameters
----------
owner_type : str
Either ``'users'`` or ``'orgs'``.
owner_login : str
The GitHub login of the project owner.
project_number : int
The number of the GitHub Project v2.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
list[dict]
A list of dicts, each with keys ``id``, ``createdAt``, and optionally ``closedAt``.
"""
if owner_type == "users":
query = """
query GetItems($login: String!, $number: Int!, $after: String) {
user(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
content {
... on PullRequest { createdAt closedAt }
... on Issue { createdAt closedAt }
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "user", "projectV2", "items"]
else:
query = """
query GetItems($login: String!, $number: Int!, $after: String) {
organization(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
content {
... on PullRequest { createdAt closedAt }
... on Issue { createdAt closedAt }
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "organization", "projectV2", "items"]
all_items = []
after_cursor = None
while True:
variables = {"login": owner_login, "number": project_number, "after": after_cursor}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(
response=response,
context=f"Failed to list project items for project {project_number}.",
)
items_data = result
for key in data_path:
items_data = items_data[key]
for node in items_data["nodes"]:
content = node.get("content")
if content and "createdAt" in content:
all_items.append(
{
"id": node["id"],
"createdAt": content["createdAt"],
"closedAt": content.get("closedAt"),
},
)
page_info = items_data["pageInfo"]
if not page_info["hasNextPage"]:
break
after_cursor = page_info["endCursor"]
return all_items
def _list_project_items_with_member_values(
*,
owner_type: str,
owner_login: str,
project_number: int,
members_field_id: str,
headers: dict[str, str],
) -> dict[str, dict[str, str | None]]:
"""Return project item IDs and current Members text values indexed by content URL."""
if owner_type == "users":
query = """
query GetItemsWithMembers($login: String!, $number: Int!, $after: String) {
user(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
content {
... on PullRequest { url }
... on Issue { url }
}
fieldValues(first: 20) {
nodes {
... on ProjectV2ItemFieldTextValue {
text
field {
... on ProjectV2Field {
id
}
}
}
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "user", "projectV2", "items"]
else:
query = """
query GetItemsWithMembers($login: String!, $number: Int!, $after: String) {
organization(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
content {
... on PullRequest { url }
... on Issue { url }
}
fieldValues(first: 20) {
nodes {
... on ProjectV2ItemFieldTextValue {
text
field {
... on ProjectV2Field {
id
}
}
}
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "organization", "projectV2", "items"]
items_by_url: dict[str, dict[str, str | None]] = {}
after_cursor = None
while True:
variables = {"login": owner_login, "number": project_number, "after": after_cursor}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(
response=response,
context=f"Failed to list project member values for project {project_number}.",
)
items_data = result
for key in data_path:
items_data = items_data[key]
for node in items_data["nodes"]:
content = node.get("content")
if not content or "url" not in content:
continue
members_value = None
for field_value in node["fieldValues"]["nodes"]:
field = field_value.get("field")
if field and field.get("id") == members_field_id:
members_value = field_value.get("text")
break
items_by_url[content["url"]] = {"item_id": node["id"], "members": members_value}
page_info = items_data["pageInfo"]
if not page_info["hasNextPage"]:
break
after_cursor = page_info["endCursor"]
return items_by_url
def _list_project_items_with_member_usernames(
*,
owner_type: str,
owner_login: str,
project_number: int,
headers: dict[str, str],
) -> list[tuple[str, set[str]]]:
"""Return project item IDs and inferred member usernames from assignees/reviewers."""
if owner_type == "users":
query = """
query GetItemsWithMembers($login: String!, $number: Int!, $after: String) {
user(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
content {
... on PullRequest {
assignees(first: 20) {
nodes {
login
}
}
reviewRequests(first: 20) {
nodes {
requestedReviewer {
... on User {
login
}
}
}
}
}
... on Issue {
assignees(first: 20) {
nodes {
login
}
}
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "user", "projectV2", "items"]
else:
query = """
query GetItemsWithMembers($login: String!, $number: Int!, $after: String) {
organization(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
content {
... on PullRequest {
assignees(first: 20) {
nodes {
login
}
}
reviewRequests(first: 20) {
nodes {
requestedReviewer {
... on User {
login
}
}
}
}
}
... on Issue {
assignees(first: 20) {
nodes {
login
}
}
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "organization", "projectV2", "items"]
items_with_members: list[tuple[str, set[str]]] = []
after_cursor = None
while True:
variables = {"login": owner_login, "number": project_number, "after": after_cursor}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(
response=response,
context=f"Failed to list project members for project {project_number}.",
)
items_data = result
for key in data_path:
items_data = items_data[key]
for node in items_data["nodes"]:
content = node.get("content")
if content is None:
continue
member_usernames: set[str] = set()
for assignee in content.get("assignees", {}).get("nodes", []):
assignee_login = assignee.get("login")
if assignee_login is not None:
member_usernames.add(assignee_login)
for review_request in content.get("reviewRequests", {}).get("nodes", []):
requested_reviewer = review_request.get("requestedReviewer")
if requested_reviewer is None:
continue
reviewer_login = requested_reviewer.get("login")
if reviewer_login is not None:
member_usernames.add(reviewer_login)
if member_usernames:
items_with_members.append((node["id"], member_usernames))
page_info = items_data["pageInfo"]
if not page_info["hasNextPage"]:
break
after_cursor = page_info["endCursor"]
return items_with_members
def _list_project_items_with_status(
*,
owner_type: str,
owner_login: str,
project_number: int,
status_field_id: str,
headers: dict[str, str],
) -> list[dict]:
"""
Return all project items together with their current status option ID.
Parameters
----------
owner_type : str
Either ``'users'`` or ``'orgs'``.
owner_login : str
The GitHub login of the project owner.
project_number : int
The number of the GitHub Project v2.
status_field_id : str
The global node ID of the Status field.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
list[dict]
A list of dicts, each with keys ``id`` and ``status_option_id``.
Items whose status field value cannot be determined have ``status_option_id`` set to ``None``.
"""
if owner_type == "users":
query = """
query GetItemsWithStatus($login: String!, $number: Int!, $after: String) {
user(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
fieldValues(first: 20) {
nodes {
... on ProjectV2ItemFieldSingleSelectValue {
optionId
field {
... on ProjectV2SingleSelectField {
id
}
}
}
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "user", "projectV2", "items"]
else:
query = """
query GetItemsWithStatus($login: String!, $number: Int!, $after: String) {
organization(login: $login) {
projectV2(number: $number) {
items(first: 100, after: $after) {
nodes {
id
fieldValues(first: 20) {
nodes {
... on ProjectV2ItemFieldSingleSelectValue {
optionId
field {
... on ProjectV2SingleSelectField {
id
}
}
}
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}
"""
data_path = ["data", "organization", "projectV2", "items"]
all_items = []
after_cursor = None
while True:
variables = {"login": owner_login, "number": project_number, "after": after_cursor}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(
response=response,
context=f"Failed to list project items with status for project {project_number}.",
)
items_data = result
for key in data_path:
items_data = items_data[key]
for node in items_data["nodes"]:
item_id = node["id"]
status_option_id = None
for field_value in node.get("fieldValues", {}).get("nodes", []):
if not field_value:
continue
field = field_value.get("field", {})
if field and field.get("id") == status_field_id:
status_option_id = field_value.get("optionId")
break
all_items.append({"id": item_id, "status_option_id": status_option_id})
page_info = items_data["pageInfo"]
if not page_info["hasNextPage"]:
break
after_cursor = page_info["endCursor"]
return all_items
def _get_project_closing_workflows(
*,
project_url: str,
headers: dict[str, str],
) -> list[str]:
"""
Return the names of enabled project workflows that close the underlying GitHub items when status changes.
Parameters
----------
project_url : str
The URL of the GitHub Project v2.
headers : dict[str, str]
HTTP headers including the Authorization token.
Returns
-------
list[str]
Names of enabled workflows whose names indicate they close underlying items.
"""
owner_type, owner_login, project_number = _parse_project_url(project_url)
if owner_type == "users":
query = """
query GetProjectWorkflows($login: String!, $number: Int!) {
user(login: $login) {
projectV2(number: $number) {
workflows(first: 20) {
nodes {
name
enabled
}
}
}
}
}
"""
data_path = ["data", "user", "projectV2"]
else:
query = """
query GetProjectWorkflows($login: String!, $number: Int!) {
organization(login: $login) {
projectV2(number: $number) {
workflows(first: 20) {
nodes {
name
enabled
}
}
}
}
}
"""
data_path = ["data", "organization", "projectV2"]
variables = {"login": owner_login, "number": project_number}
response = requests.post(
url="https://api.github.com/graphql",
json={"query": query, "variables": variables},
headers=headers,
timeout=30,
)
result = _check_graphql_response(
response=response,
context=f"Failed to retrieve workflows for `{project_url}`.",
)
project_data = result
for key in data_path:
project_data = project_data[key]
workflow_nodes = project_data.get("workflows", {}).get("nodes", [])
return [
node["name"]
for node in workflow_nodes
if node and node.get("enabled") is True and "auto-close" in node.get("name", "").lower()
]
[docs]
@beartype.beartype
def get_project_closing_workflows(project_url: str, /) -> list[str]:
"""
Return the names of enabled project workflows that close the underlying GitHub items when status changes.
Parameters
----------
project_url : str
The URL of the GitHub Project v2,
e.g., ``https://github.com/users/username/projects/1``
or ``https://github.com/orgs/orgname/projects/1``.
Returns
-------
list[str]
Names of enabled workflows whose names indicate they close underlying items (e.g., ``Auto-close issue``).
Returns an empty list when no such workflows are enabled.
Raises
------
ValueError
If the ``GITHUB_TOKEN`` environment variable is not set.
"""
github_token = os.getenv("GITHUB_TOKEN")
if github_token is None:
message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n"
raise ValueError(message)
headers = {"Authorization": f"token {github_token}"}
return _get_project_closing_workflows(project_url=project_url, headers=headers)
[docs]
@beartype.beartype
def transition_status(*, project_url: str, current_status: str, new_status: str) -> None:
"""
Move all items with one Status value to another in a GitHub Project (v2).
Parameters
----------
project_url : str
The URL of the GitHub Project v2,
e.g., ``https://github.com/users/username/projects/1``
or ``https://github.com/orgs/orgname/projects/1``.
current_status : str
The current Status value to match.
new_status : str
The destination Status value to apply.
Raises
------
ValueError
If the ``GITHUB_TOKEN`` environment variable is not set, or if the project
does not have one of the specified status options.
"""
github_token = os.getenv("GITHUB_TOKEN")
if github_token is None:
message = "\nPlease set the `GITHUB_TOKEN` environment variable with a valid GitHub Personal Access Token!\n\n"
raise ValueError(message)
headers = {"Authorization": f"token {github_token}"}
project_id, status_field_id, status_options, _start_date_field_id, _end_date_field_id, _members_field_id = (
_get_project_info(
project_url=project_url,
headers=headers,
)
)
current_option_id = status_options.get(current_status)
if current_option_id is None:
message = (
f"Status option '{current_status}' not found in project `{project_url}`. "
f"Available options: {list(status_options.keys())}."
)
raise ValueError(message)
new_option_id = status_options.get(new_status)
if new_option_id is None:
message = (
f"Status option '{new_status}' not found in project `{project_url}`. "
f"Available options: {list(status_options.keys())}."
)
raise ValueError(message)
# Parse owner type, login, and number from URL
owner_type, owner_login, project_number = _parse_project_url(project_url)
# Fetch all items with their current status
all_items = _list_project_items_with_status(
owner_type=owner_type,
owner_login=owner_login,
project_number=project_number,
status_field_id=status_field_id,
headers=headers,
)
items_to_move = [item for item in all_items if item["status_option_id"] == current_option_id]
for item in tqdm.tqdm(
iterable=items_to_move,
desc=f"Moving items from {current_status} to {new_status}",
unit="items",
dynamic_ncols=True,
):
_set_item_status(
project_id=project_id,
item_id=item["id"],
field_id=status_field_id,
option_id=new_option_id,
headers=headers,
)