commit 71667d81e43e75f010aae853d9cbdd1759dcc34e Author: Caileb Date: Sat Jun 14 13:09:17 2025 -0500 Initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..3d9d42e --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# MIME Type Reference + +This project provides a quick and easy way to browse and search for official IANA-registered MIME types. It consists of two main parts: + +1. A Python script (`fetcher.py`) that downloads the latest MIME type registration data from the official IANA website. +2. A self-contained HTML file (`index.html`) that displays the data in a clean, searchable, and user-friendly interface. + +## Features + +- **Up-to-date Data**: Fetches data directly from IANA's official sources. +- **Categorized View**: Organizes MIME types by category (text, image, application, etc.). +- **Obsolete/Deprecated Handling**: Separately categorizes and displays obsolete or deprecated types for clarity. +- **Live Search**: Instantly filter all categories with a single search bar. +- **Zero Dependencies**: The `index.html` file is completely self-contained and requires no external libraries or frameworks. + +## How to Use + +### 1. Fetch the Data + +To get the latest MIME types, run the Python script. This will download the data from IANA and create a `mime_types.json` file in the same directory. + +You will need `aiohttp` to run the script: + +```bash +pip install aiohttp +python fetcher.py +``` + +### 2. View the Data + +Simply open the `index.html` file in any modern web browser. It will automatically load the `mime_types.json` file and display the data. + +You can now browse the categories or use the search bar to find specific MIME types. \ No newline at end of file diff --git a/fetcher.py b/fetcher.py new file mode 100644 index 0000000..48208fa --- /dev/null +++ b/fetcher.py @@ -0,0 +1,102 @@ +import asyncio +import aiohttp +import csv +import json +import re +from io import StringIO +from collections import OrderedDict, defaultdict + +URLS = OrderedDict([ + ('text', 'https://www.iana.org/assignments/media-types/text.csv'), + ('image', 'https://www.iana.org/assignments/media-types/image.csv'), + ('audio', 'https://www.iana.org/assignments/media-types/audio.csv'), + ('video', 'https://www.iana.org/assignments/media-types/video.csv'), + ('application', 'https://www.iana.org/assignments/media-types/application.csv'), + ('font', 'https://www.iana.org/assignments/media-types/font.csv'), + ('model', 'https://www.iana.org/assignments/media-types/model.csv'), + ('multipart', 'https://www.iana.org/assignments/media-types/multipart.csv'), + ('message', 'https://www.iana.org/assignments/media-types/message.csv'), + ('haptics', 'https://www.iana.org/assignments/media-types/haptics.csv') +]) + +OUTPUT_JSON_FILE = "mime_types.json" +REQUEST_TIMEOUT_SECONDS = 20 +USER_AGENT = "MimeTypeFetcher/1.0.0" + +OBSOLETE_DEPRECATED_PATTERN = re.compile( + r"\(obsolete(d)?([^\)]*)\)|" + r"\bobsolete(d)?\b|" + r"\(deprecated([^\)]*)\)|" + r"\bdeprecated\b", + re.IGNORECASE +) + +def is_obsolete_or_deprecated(name_str): + return bool(OBSOLETE_DEPRECATED_PATTERN.search(name_str)) + +async def fetch_category_data(session, category_key, url): + print(f"Fetching: {category_key}...") + headers = {"User-Agent": USER_AGENT} + try: + async with session.get(url, timeout=REQUEST_TIMEOUT_SECONDS, headers=headers) as response: + response.raise_for_status() + text_content = await response.text(encoding="utf-8") + reader = csv.DictReader(StringIO(text_content)) + return category_key, list(reader) + except Exception as e: + print(f"Error for {category_key} ({url}): {type(e).__name__} - {e}") + return category_key, [] + +async def process_all_mime_types(): + final_data = OrderedDict() + # Pre-initialize obsolete_data to preserve category order + obsolete_data = OrderedDict((key, []) for key in URLS.keys()) + + print("Starting MIME type download...\n") + async with aiohttp.ClientSession() as session: + tasks = [fetch_category_data(session, key, url) for key, url in URLS.items()] + results = await asyncio.gather(*tasks) + + print("\nProcessing downloaded data...") + total_processed_entries = 0 + + for category_key, raw_rows in results: + active_entries = [] + if not raw_rows: + final_data[category_key] = [] + continue + + for row in raw_rows: + name = row.get('Name', '').strip() + template = row.get('Template', '').strip() + + if name and template: + entry = {'name': name, 'template': template} + if is_obsolete_or_deprecated(name): + obsolete_data[category_key].append(entry) + else: + active_entries.append(entry) + + final_data[category_key] = active_entries + total_processed_entries += len(active_entries) + print(f"Processed {len(active_entries)} active types for {category_key}.") + + # Filter out obsolete categories with no entries and add to final_data + final_obsolete_data = OrderedDict([(k, v) for k, v in obsolete_data.items() if v]) + if final_obsolete_data: + final_data["obsolete_deprecated"] = final_obsolete_data + obsolete_count = sum(len(v) for v in final_obsolete_data.values()) + total_processed_entries += obsolete_count + print(f"Categorized {obsolete_count} obsolete/deprecated types by original category.") + + print(f"\nTotal entries processed: {total_processed_entries}") + + try: + with open(OUTPUT_JSON_FILE, 'w', encoding='utf-8') as f: + json.dump(final_data, f, indent=2, ensure_ascii=False) + print(f"Data saved to {OUTPUT_JSON_FILE}") + except IOError as e: + print(f"Error writing JSON: {e}") + +if __name__ == "__main__": + asyncio.run(process_all_mime_types()) \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..a3b4b42 --- /dev/null +++ b/index.html @@ -0,0 +1,279 @@ + + + + + + MIME Type Reference + + + + +
+
+ +
+
+

Loading MIME types...

+ +
+
+ + + + \ No newline at end of file