diff --git a/examples/python-magic/README.md b/examples/python-magic/README.md
new file mode 100644
index 0000000..249a38c
--- /dev/null
+++ b/examples/python-magic/README.md
@@ -0,0 +1,18 @@
+# python-magic Examples
+
+Each sub-directory contains a self-contained example. The order in
+which the examples are to appear is specified in `order.json` (an
+array of directory names in the expected order).
+
+In each example directory you'll find:
+
+* `config.toml` - must conform to the specification outlined here:
+ https://docs.pyscript.net/latest/user-guide/configuration/ This is
+ parsed and ultimately turned into a JSON representation as part of
+ the package's API object.
+* `setup.py` - Python code for contextual and environmental setup,
+ NOT SEEN BY THE END USER, but is run before the `code.py` code is
+ evaluated. Allows us to create useful (IPython) shims, avoid
+ repeating boilerplate and whatnot.
+* `code.py` - the actual code added to the editor which forms the
+ practical example of using the package.
diff --git a/examples/python-magic/identify_file_types/code.py b/examples/python-magic/identify_file_types/code.py
new file mode 100644
index 0000000..92fcfcf
--- /dev/null
+++ b/examples/python-magic/identify_file_types/code.py
@@ -0,0 +1,54 @@
+"""
+A first look at python-magic.
+
+python-magic is a thin Python wrapper around libmagic, the same engine
+behind the Unix `file` command. Given some bytes (or a file path), it
+guesses the file type by inspecting the content's signature -- not the
+filename or extension.
+
+Docs: https://github.com/ahupp/python-magic
+"""
+from IPython.core.display import display, HTML
+
+import magic
+
+
+# A small "file cabinet" of byte signatures for common file types.
+# Each entry is a realistic header we'd find at the start of a file.
+file_samples = {
+ "report.pdf": b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n1 0 obj\n<< /Type /Catalog >>\nendobj\n",
+ "logo.png": (
+ b"\x89PNG\r\n\x1a\n"
+ b"\x00\x00\x00\rIHDR\x00\x00\x00\x10\x00\x00\x00\x10\x08\x06\x00\x00\x00"
+ ),
+ "photo.jpg": b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00",
+ "archive.zip": b"PK\x03\x04\x14\x00\x00\x00\x08\x00" + b"\x00" * 20,
+ "notes.txt": b"Dear diary,\nToday I learned about libmagic.\n",
+ "song.mp3": b"ID3\x04\x00\x00\x00\x00\x00\x00" + b"\x00" * 64,
+}
+
+heading("Guessing file types from raw bytes")
+note(
+ "We pass the first chunk of each file's bytes to "
+ "magic.from_buffer and let libmagic identify it. "
+ "Notice that we never look at the filename -- the bytes alone are enough."
+)
+
+# Build an HTML table of filename, libmagic description, and MIME type.
+rows = ["
{name}{mime_type}{text}
"), append=True) diff --git a/examples/python-magic/mime_router/code.py b/examples/python-magic/mime_router/code.py new file mode 100644 index 0000000..7f72ded --- /dev/null +++ b/examples/python-magic/mime_router/code.py @@ -0,0 +1,87 @@ +# --------------------------------------------------------------------- +# Building a tiny content-aware upload router with python-magic. +# --------------------------------------------------------------------- +# +# A common real-world use of python-magic: an "upload handler" that +# decides what to do with a file based on its true type, regardless +# of what the user named it. This protects against mislabeled or +# disguised files (think: a script renamed to look like an image). + +import magic + + +heading("A MIME-based upload router") +note( + "Each incoming upload is a tuple of (claimed filename, bytes). " + "We detect the real MIME type and dispatch to the appropriate " + "handler. A mismatch between the filename's extension and the " + "detected type is flagged as suspicious." +) + +# Pretend these came in over the wire from a web form. +incoming_uploads = [ + ("vacation.jpg", b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01" + b"\x00\x01\x00\x00" + b"\x00" * 200), + ("budget.pdf", b"%PDF-1.5\n%\xe2\xe3\xcf\xd3\n1 0 obj\n<<>>\nendobj\n" + + b"\x00" * 100), + ("backup.zip", b"PK\x03\x04\x14\x00\x00\x00\x08\x00" + b"\x00" * 200), + # Sneaky: claims to be a PNG but is actually plain text. + ("avatar.png", b"#!/bin/sh\necho 'definitely not an image'\n" * 20), + ("readme.txt", b"Welcome to the project!\n\nThis is a friendly readme.\n" * 10), +] + + +# Map MIME prefixes to handler descriptions. In a real app these would +# be functions; here we just describe what would happen. +def route(mime_type): + """Return a (handler_name, action) pair for a detected MIME type.""" + if mime_type.startswith("image/"): + return ("ImageProcessor", "resize and store in /uploads/images") + if mime_type == "application/pdf": + return ("DocumentIndexer", "extract text and add to search index") + if mime_type.startswith("text/"): + return ("TextStore", "save to /uploads/text") + if mime_type in {"application/zip", "application/x-zip-compressed"}: + return ("ArchiveScanner", "scan contents before unpacking") + return ("QuarantineBin", "unknown type, hold for review") + + +# Map common extensions to expected MIME prefixes for sanity-checking. +expected_prefix = { + ".jpg": "image/", ".jpeg": "image/", ".png": "image/", + ".pdf": "application/pdf", + ".zip": "application/zip", + ".txt": "text/", +} + +rows = [ + "{filename}{mime_type}avatar.png is correctly identified as a "
+ "shell script (text/x-shellscript or similar) and "
+ "flagged. This is exactly the kind of check a libmagic-based router "
+ "buys you for free."
+)
diff --git a/examples/python-magic/mime_router/config.toml b/examples/python-magic/mime_router/config.toml
new file mode 100644
index 0000000..5471e23
--- /dev/null
+++ b/examples/python-magic/mime_router/config.toml
@@ -0,0 +1 @@
+packages = ["python-magic"]
diff --git a/examples/python-magic/mime_router/setup.py b/examples/python-magic/mime_router/setup.py
new file mode 100644
index 0000000..5e3150b
--- /dev/null
+++ b/examples/python-magic/mime_router/setup.py
@@ -0,0 +1,20 @@
+"""Lightweight setup for cell 2 -- no IPython shim, just the names
+the first cell already established."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+ return _display(
+ *args, **kwargs, target=__pyscript_display_target__,
+ )
+
+
+def heading(text, level=2):
+ display(HTML(f"{text}
"), append=True) diff --git a/examples/python-magic/order.json b/examples/python-magic/order.json new file mode 100644 index 0000000..bab83a3 --- /dev/null +++ b/examples/python-magic/order.json @@ -0,0 +1,4 @@ +[ + "identify_file_types", + "mime_router" +]