-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprotocol.py
More file actions
261 lines (203 loc) · 9.38 KB
/
protocol.py
File metadata and controls
261 lines (203 loc) · 9.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
"""
Wire protocol for the educational client/server pair.
Why this module exists
----------------------
The original implementation used a 13-byte ASCII length header for responses
but sent commands as raw (unframed) bytes. This created two problems a
student is likely to hit early:
1. A raw ``recv(1024)`` on the client can split a long command into two
reads, and the command handler sees only the first slice.
2. The ASCII header did not validate its contents, so ``int(header)`` on a
non-numeric value raises ``ValueError`` and crashes the server.
This module replaces both sides with a single symmetric frame format that is
binary, fixed-width, magic-number validated, and self-describing.
Frame layout
------------
Each frame on the wire looks like this (big-endian, network byte order):
+------------------+---------+---------+--------------------+
| magic (4 bytes) | version | type | length (4 bytes) | ...
| b"EDUB" | uint8 | uint8 | uint32, max 16 MiB |
+------------------+---------+---------+--------------------+
| payload (length bytes) |
+-----------------------------------------------------------+
- ``magic`` is always the 4 ASCII bytes ``EDUB`` (educational backdoor).
If we read anything else we raise ``ProtocolError`` and close the socket.
- ``version`` is currently 1. Bumping it allows future changes without
silently mis-parsing old clients.
- ``type`` is a one-byte discriminator so the receiver knows what the
payload represents (command text, shell output, binary file, auth
handshake message, ...). See ``FrameType`` below.
- ``length`` is the payload length in bytes. Hard-capped at 16 MiB to
prevent a single malicious frame from exhausting memory.
Educational notes are embedded as docstrings and inline comments: this file
is meant to be read end-to-end as a tutorial on binary framing, ``struct``,
socket recv semantics, and why short reads are a thing.
This module is usable as-is for any educational TCP project, not just the
backdoor. The framing functions have no dependency on the rest of the
project.
"""
from __future__ import annotations
import enum
import socket
import ssl
import struct
from dataclasses import dataclass
# Type alias so static checkers (mypy, pyright) accept both plain and
# TLS-wrapped sockets everywhere. ``ssl.SSLSocket`` is a subclass of
# ``socket.socket`` at runtime so isinstance checks still work. We use
# the modern PEP 604 ``|`` union syntax because this project targets
# Python 3.10+.
SocketLike = socket.socket | ssl.SSLSocket
# --------------------------------------------------------------------------
# Constants
# --------------------------------------------------------------------------
#: 4-byte magic that starts every frame. Chosen to be ASCII-readable so a
#: student using Wireshark or ``od -c`` can spot a frame boundary.
MAGIC = b"EDUB"
#: Current wire-format version. Bump this if the header layout changes in a
#: backwards-incompatible way.
PROTOCOL_VERSION = 1
#: Hard upper bound on a single frame's payload size. 16 MiB is large enough
#: for an educational screenshot (~5 MB PNG) but small enough that a
#: malicious sender cannot force the receiver to allocate gigabytes.
MAX_FRAME_SIZE = 16 * 1024 * 1024 # 16 MiB
#: ``struct`` format for the fixed header: 4-byte magic, uint8 version,
#: uint8 type, uint32 length. Prefix ``!`` selects network byte order so
#: frames are portable across architectures.
HEADER_FORMAT = "!4sBBI"
#: Cached header size so we never accidentally hardcode it.
HEADER_SIZE = struct.calcsize(HEADER_FORMAT) # == 10 bytes
class FrameType(enum.IntEnum):
"""
Discriminator for the ``type`` byte in every frame.
Keeping this as an ``IntEnum`` lets us:
- Serialise with ``int(frame.frame_type)`` in the ``struct.pack`` call.
- Compare with ``if frame.frame_type is FrameType.COMMAND`` in handlers.
- Reject unknown types cleanly in ``recv_frame`` when the byte does not
correspond to any enum member.
The numeric values are stable across versions because they appear on
the wire. Never renumber; only append.
"""
#: Operator sending a command line to the client. Payload is UTF-8 text.
COMMAND = 1
#: Client sending a plain-text response back (stdout+stderr, info).
#: Payload is UTF-8 text.
RESPONSE_TEXT = 2
#: Client sending binary content (file bytes, screenshot PNG bytes).
#: Payload is raw bytes, no encoding assumed.
RESPONSE_BINARY = 3
#: Generic error notification. Payload is UTF-8 text describing the
#: failure (missing file, chdir refused, unknown command, ...).
RESPONSE_ERROR = 4
#: First step of the auth handshake: server sends a random challenge
#: (16 bytes) as payload.
AUTH_CHALLENGE = 10
#: Second step: client responds with HMAC(secret, challenge) as payload.
AUTH_RESPONSE = 11
#: Third step: server confirms the handshake succeeded. Payload empty.
AUTH_OK = 12
#: Third step alternate: server rejects. Payload is a UTF-8 reason.
AUTH_FAIL = 13
class ProtocolError(Exception):
"""
Raised when an incoming frame violates the wire format.
Callers should treat this as a fatal per-connection error and close the
socket. A student who wants to be more resilient can extend this into a
subclass hierarchy (bad magic, bad version, too-large frame, unexpected
EOF, unknown frame type).
"""
# --------------------------------------------------------------------------
# Frame dataclass + framing helpers
# --------------------------------------------------------------------------
@dataclass(frozen=True)
class Frame:
"""
Decoded representation of a single frame on the wire.
A ``Frame`` is immutable (``frozen=True``) so tests can compare them
with ``==`` and handlers cannot accidentally mutate them.
"""
frame_type: FrameType
payload: bytes
def encode(self) -> bytes:
"""
Serialise this frame to its on-the-wire byte sequence.
The resulting bytes are safe to pass directly to ``sock.sendall``.
"""
if len(self.payload) > MAX_FRAME_SIZE:
raise ProtocolError(
f"payload too large: {len(self.payload)} > {MAX_FRAME_SIZE}"
)
header = struct.pack(
HEADER_FORMAT,
MAGIC,
PROTOCOL_VERSION,
int(self.frame_type),
len(self.payload),
)
return header + self.payload
def _recv_exact(sock: SocketLike, nbytes: int) -> bytes:
"""
Read exactly ``nbytes`` bytes from ``sock``, across multiple ``recv``
calls if needed.
This is the classic "short read" handling every networking tutorial
glosses over. ``socket.recv(n)`` is allowed to return fewer than ``n``
bytes at any time, even if the peer sent the full amount in one
``sendall``. TCP does not preserve write boundaries, so we loop.
Returns the full buffer on success, or raises ``ProtocolError`` if the
peer closes the connection before we have enough bytes.
"""
buf = bytearray()
while len(buf) < nbytes:
chunk = sock.recv(nbytes - len(buf))
if not chunk:
raise ProtocolError(
f"unexpected EOF while reading {nbytes} bytes "
f"({len(buf)} received)"
)
buf.extend(chunk)
return bytes(buf)
def send_frame(sock: SocketLike, frame_type: FrameType, payload: bytes) -> None:
"""
Package the given ``frame_type`` and ``payload`` into a frame and send
it in one ``sendall`` call.
This is the function handlers call when they want to reply to the peer.
"""
frame = Frame(frame_type=frame_type, payload=payload)
sock.sendall(frame.encode())
def recv_frame(sock: SocketLike) -> Frame:
"""
Read one full frame from ``sock`` and return a decoded ``Frame``.
Validates the magic, version, and length field. Raises
``ProtocolError`` with an explanatory message for any violation, or
returns a valid ``Frame`` object with a known ``FrameType``.
"""
header = _recv_exact(sock, HEADER_SIZE)
magic, version, type_byte, length = struct.unpack(HEADER_FORMAT, header)
if magic != MAGIC:
raise ProtocolError(
f"bad magic: got {magic!r}, expected {MAGIC!r}"
)
if version != PROTOCOL_VERSION:
raise ProtocolError(
f"unsupported protocol version: {version} "
f"(this build speaks v{PROTOCOL_VERSION})"
)
if length > MAX_FRAME_SIZE:
raise ProtocolError(
f"frame payload too large: {length} > {MAX_FRAME_SIZE}"
)
try:
frame_type = FrameType(type_byte)
except ValueError as err:
raise ProtocolError(f"unknown frame type byte: {type_byte}") from err
payload = _recv_exact(sock, length) if length else b""
return Frame(frame_type=frame_type, payload=payload)
# --------------------------------------------------------------------------
# Convenience: send text / binary / error with the right type
# --------------------------------------------------------------------------
def send_text(sock: SocketLike, frame_type: FrameType, text: str) -> None:
"""Send a text frame encoded as UTF-8."""
send_frame(sock, frame_type, text.encode("utf-8"))
def send_error(sock: SocketLike, reason: str) -> None:
"""Send a RESPONSE_ERROR frame with a human-readable reason."""
send_text(sock, FrameType.RESPONSE_ERROR, reason)