Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions PRIVACY_POLICY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ This will be an optional _opt-in_ feature that is not required to use PythonTA.

## What data will be sent?

When PyTA check a file/directory (by calling `python_ta.check_all`), two types of data may be sent:
When PyTA checks a file/directory (by calling `python_ta.check_all`), two types of data may be sent:

- The errors detected by PyTA during the check.
- The source files that you ran PyTA on.

These forms of data submission are independent and optional.
If you use a custom PyTA configuration, this information will be sent alongside either of the above data.
Each upload also includes the PythonTA version and an anonymous client ID used to group opt-in submissions.

## How can I opt in or opt out of this data collection?

Expand All @@ -21,7 +22,11 @@ The default configuration in the `python_ta` directory is `no` for both options.
## How will the data be anonymised?

PyTA will not collect or send identifying information about you or your computer. (_Note_: if you choose to submit source files checked by PyTA, those files may contain identifying information about you.)
PyTA does record a hash of your device's MAC address in order to identify when two runs come from the same device, but this is not used to deanonymize the collected data.
PyTA does not derive its anonymous client ID from hardware identifiers such as your device's MAC address.
Instead, when data is first submitted, PyTA generates a random ID and stores it locally.
Future opt-in submissions include a hash of this random ID, allowing submissions to be grouped without sending the locally stored ID itself.
On Windows this is stored in `%APPDATA%\PythonTA\anonymous_id`; on other platforms it is stored in `~/.python_ta/anonymous_id`.
Deleting this file resets the anonymous ID.

## Who will the data be sent to?

Expand All @@ -31,4 +36,4 @@ PyTA maintainers and computer science education researchers at the University of
## How will this data be used?

This data will be used to better understand how PyTA is used by students for the purpose of making it a better educational tool.
Potential research analyses of collected data include identifying common errors detected by PyTA and identifying errors that persist across multiple PyTA runs.
Potential research analyses of collected data include identifying common errors detected by PyTA and identifying errors that persist across multiple PyTA runs associated with the same anonymous ID.
1 change: 0 additions & 1 deletion packages/python-ta/.coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ omit =
packages/python-ta/src/python_ta/debug/*
packages/python-ta/src/python_ta/reporters/templates/*
packages/python-ta/src/python_ta/util/*
packages/python-ta/src/python_ta/upload.py
packages/python-ta/src/python_ta/utils.py

patch = subprocess
3 changes: 3 additions & 0 deletions packages/python-ta/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
- Fixed bug that allowed users to inject code into the browser template through the E9920 unnecessary f-string checker
- Fixed bug that caused user input containing markdown characters to be rendered by the markdown renderer in certain error messages
- Fixed memory leak issue that caused the memory usage to increase with each call to `python_ta.check_all()`.
- Fixed opt-in data uploads to use a random anonymous ID instead of a MAC-address-derived hash, close uploaded files reliably, and time out stalled network requests.
Existing opt-in users will receive a new anonymous client ID after upgrading.

### 🔧 Internal changes

- Removed old documentation files under `python_ta/reporters/`
- Added tests for the opt-in data upload path.
- Added tests for `infinite_loop_checker.py` to improve coverage for the `_name_holds_generator` function and the generator portion of the `_check_constant_loop_cond` function.
- Refactored `test_main.py` calls to use click's testing helpers.
- The `Z3Visitor`, `Z3Parser`, and `Z3ParseException` classes have been extracted into a _new Python package_, `python-ta-z3`.
Expand Down
154 changes: 111 additions & 43 deletions packages/python-ta/src/python_ta/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,164 @@

import hashlib
import json
import os
import sys
import uuid
from typing import NamedTuple
from contextlib import ExitStack
from pathlib import Path
from typing import Any, Iterable

import requests

UPLOAD_TIMEOUT_SECONDS = 5
ANONYMOUS_ID_ENV_VAR = "PYTA_ANONYMOUS_ID_FILE"
_cached_local_anonymous_id: tuple[str, str] | None = None

def errors_to_dict(errors: list[NamedTuple]) -> dict[str, list[str]]:

def errors_to_dict(errors: Iterable[Any]) -> dict[str, list[dict[str, Any]]]:
"""Convert PyTA errors from MessageSet format to a json format Dictionary."""
error_info = ["msg_id", "msg", "symbol", "module", "category", "line"]
error_types = ["code", "style"]
err_as_dict = {}
for msg_set in errors: # This iterates over the (filename, code, style) MessageSets
for error_type in error_types: # This iterates over the code and style attributes
current_type = getattr(msg_set, error_type) # Gets either the code or style dictionary
for key in current_type.keys(): # Iterates over the error id's of caught errors
err_as_dict[key] = []
info_set = current_type.get(key)
for (
msg
) in (
info_set.messages
): # Iterates over the messages for each error of the given code
err_as_dict[key].append({k: getattr(msg, k) for k in error_info})
for msg in _iter_error_messages(errors):
msg_id = getattr(msg, "msg_id", None)
if msg_id is None:
continue
err_as_dict.setdefault(msg_id, []).append(
{field: getattr(msg, field, None) for field in error_info}
)
return err_as_dict


def upload_to_server(
errors: list[NamedTuple], paths: list[str], config: dict[str, str], url: str, version: str
errors: Iterable[Any], paths: list[str], config: dict[str, Any], url: str, version: str
) -> None:
"""Send POST request to server with formatted data."""
unique_id = get_hashed_id() # Generates a device-specific ID
files = []
for path in paths:
f = open(path)
files.append(f)
upload = {str(i): f for i, f in enumerate(files)} # requests.post() requires passing a dict
# 'upload' is an empty dict in the case that 'files' is empty
unique_id = get_anonymous_id()
errors_dict = errors_to_dict(errors)
to_json = {"errors": errors_dict}
if config: # 'config' is an empty dictionary if the default was used
to_json["cfg"] = config
payload = json.dumps(to_json)
payload = json.dumps(to_json, default=str)

try:
response = requests.post(
url=url, files=upload, data={"id": unique_id, "version": version, "payload": payload}
)
for f in files:
f.close()
with ExitStack() as stack:
upload = {str(i): stack.enter_context(open(path, "rb")) for i, path in enumerate(paths)}
response = requests.post(
url=url,
files=upload,
data={"id": unique_id, "version": version, "payload": payload},
timeout=UPLOAD_TIMEOUT_SECONDS,
)
response.raise_for_status()
print("[INFO] Upload successful")
except requests.HTTPError as e:
print("[ERROR] Upload failed")
if e.response.status_code == 400:
status_code = e.response.status_code if e.response is not None else None
if status_code == 400:
print(
"[ERROR] HTTP Response Status 400: Client-side error, likely due to improper syntax. "
"Please report this to your instructor (and attach the code that caused the error)."
)
elif e.response.status_code == 403:
elif status_code == 403:
print(
"[ERROR] HTTP Response Status 403: Authorization is currently required for submission."
)
elif e.response.status_code == 500:
elif status_code == 500:
print(
"[ERROR] HTTP Response Status 500: The server ran into a situation it doesn't know how to handle. "
)
print(
"Please report this to your instructor (and attach the code that caused the error)."
)
elif e.response.status_code == 503:
elif status_code == 503:
print(
"[ERROR] HTTP Response Status 503: The server is not ready to handle your request. "
)
print("It may be down for maintenance.")
else:
print('[ERROR] Error message: "{}"'.format(e))

except requests.ConnectionError as e:
except requests.Timeout:
print("[ERROR] Upload failed")
print("[ERROR] Error message: Connection timed out. The server may be temporarily down.")
except requests.ConnectionError:
print("[ERROR] Upload failed")
print(
"[ERROR] Error message: Connection timed out. This may be caused by your firewall, or the server may be "
"[ERROR] Error message: Could not connect. This may be caused by your firewall, or the server may be "
"temporarily down."
)
except requests.RequestException as e:
print("[ERROR] Upload failed")
print('[ERROR] Error message: "{}"'.format(e))
except OSError as e:
print("[ERROR] Upload failed")
print(f'[ERROR] Could not read a file selected for upload: "{e}"')


def get_hashed_id() -> str:
def get_anonymous_id() -> str:
"""Return an anonymous ID for opt-in data uploads.

This is a hash of a random local ID so multiple opt-in uploads can be
grouped without deriving an identifier from hardware information.
"""
Generates a unique ID by hashing the user's mac-address.
local_anonymous_id = _get_or_create_local_anonymous_id()
return hashlib.sha512(local_anonymous_id.encode("utf-8")).hexdigest()


def get_hashed_id() -> str:
"""Return the anonymous upload ID.

This function is kept as a backwards-compatible alias for older code that
imported it directly.
"""
mac = str(uuid.uuid1())[24:]
hash_gen = hashlib.sha512()
encoded = mac.encode("utf-8")
hash_gen.update(encoded)
return hash_gen.hexdigest()
return get_anonymous_id()


def _get_or_create_local_anonymous_id() -> str:
"""Return the random local ID used as input for the anonymous upload ID."""
global _cached_local_anonymous_id

anonymous_id_path = _get_anonymous_id_path()
anonymous_id_path_key = str(anonymous_id_path)
if (
_cached_local_anonymous_id is not None
and _cached_local_anonymous_id[0] == anonymous_id_path_key
):
return _cached_local_anonymous_id[1]

try:
anonymous_id = anonymous_id_path.read_text(encoding="utf-8").strip()
uuid.UUID(anonymous_id)
return anonymous_id
except (OSError, ValueError):
anonymous_id = str(uuid.uuid4())

try:
anonymous_id_path.parent.mkdir(parents=True, exist_ok=True)
anonymous_id_path.write_text(anonymous_id + "\n", encoding="utf-8")
except OSError:
_cached_local_anonymous_id = (anonymous_id_path_key, anonymous_id)
return anonymous_id


def _iter_error_messages(errors: Iterable[Any]) -> Iterable[Any]:
"""Yield individual messages from current and legacy reporter upload data."""
for error_group in errors:
if isinstance(error_group, list):
yield from error_group
elif hasattr(error_group, "code") and hasattr(error_group, "style"):
for error_type in ("code", "style"):
current_type = getattr(error_group, error_type)
for info_set in current_type.values():
yield from info_set.messages
else:
yield error_group


def _get_anonymous_id_path() -> Path:
"""Return the local path used to store the anonymous upload ID."""
if ANONYMOUS_ID_ENV_VAR in os.environ:
return Path(os.environ[ANONYMOUS_ID_ENV_VAR]).expanduser()

if sys.platform == "win32" and os.environ.get("APPDATA"):
return Path(os.environ["APPDATA"]) / "PythonTA" / "anonymous_id"
return Path.home() / ".python_ta" / "anonymous_id"
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
import sys
import unittest
from unittest.mock import patch

import astroid
import pylint.testutils
Expand All @@ -12,6 +13,7 @@
import python_ta
from python_ta.checkers.invalid_name_checker import (
InvalidNameChecker,
_parse_name,
_to_pascal_case,
_to_upper_case_with_underscores,
)
Expand Down Expand Up @@ -905,6 +907,16 @@ def test_module_name_no_snippet() -> None:


class TestNamingConventionHelpers(unittest.TestCase):
def test_parse_name_returns_empty_result_when_name_is_not_parsed(self) -> None:
"""Test that parsing handles unexpected match failures defensively."""
with patch("python_ta.checkers.invalid_name_checker.re.match", return_value=None):
self.assertEqual(_parse_name("snake_case"), ("", None, ""))

def test_converters_return_none_for_names_starting_with_digit(self) -> None:
"""Test that name conversion fails when a name starts with a digit."""
self.assertIsNone(_to_pascal_case("1bad_name"))
self.assertIsNone(_to_upper_case_with_underscores("1bad_name"))

def test_to_pascal_case(self) -> None:
"""Test that names are correctly converted to PascalCase."""
self.assertEqual(_to_pascal_case("snake_case"), "SnakeCase")
Expand Down
Loading