From 8e5c35a83797685c2da6772d32c664b2e1b8e865 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 17:44:44 -0800 Subject: [PATCH 01/71] Testing for pep8 --- .github/workflows/pep8-enforcement.yaml | 69 ++++++++ .gitignore | 220 +++++++++++++++++++++++- pep8-enforcement.yaml | 69 ++++++++ 3 files changed, 356 insertions(+), 2 deletions(-) create mode 100755 .github/workflows/pep8-enforcement.yaml create mode 100755 pep8-enforcement.yaml diff --git a/.github/workflows/pep8-enforcement.yaml b/.github/workflows/pep8-enforcement.yaml new file mode 100755 index 0000000..7f146e1 --- /dev/null +++ b/.github/workflows/pep8-enforcement.yaml @@ -0,0 +1,69 @@ +name: PEP 8 Compliance Check + +on: + workflow_dispatch: + push: + branches: + - '**' # Run on pushes to any branch + +jobs: + pep8_check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # IMPORTANT: Fetch all history for git diff to work correctly + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' # Specify your Python version (e.g., '3.9', '3.10', '3.11') + + - name: Install flake8 + run: pip install flake8 + + - name: Get changed Python files + id: changed-files + run: | + # For push events, compare with the previous commit + # Handle cases where commits might not exist or be accessible + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + # Manual trigger - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + elif [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then + # First commit - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + else + # Try git diff, fallback to all files if it fails + CHANGED_PYTHON_FILES=$(git diff --name-only --diff-filter=ACM ${{ github.event.before }} ${{ github.sha }} 2>/dev/null | grep '\.py$' | tr '\n' ' ' || find . -name "*.py" -type f | tr '\n' ' ') + fi + + # Ensure we have a value even if empty + if [ -z "$CHANGED_PYTHON_FILES" ]; then + CHANGED_PYTHON_FILES="" + fi + + echo "changed_python_files=${CHANGED_PYTHON_FILES}" >> $GITHUB_OUTPUT + echo "Event type: ${{ github.event_name }}" + echo "Before commit: ${{ github.event.before }}" + echo "Current commit: ${{ github.sha }}" + echo "Found Python files: '$CHANGED_PYTHON_FILES'" + shell: bash + + - name: Run flake8 on changed Python files + if: success() && steps.changed-files.outputs.changed_python_files + run: | + echo "Checking the following files for PEP 8 compliance:" + echo "${{ steps.changed-files.outputs.changed_python_files }}" + echo "" + # Use xargs to properly handle multiple files + echo "${{ steps.changed-files.outputs.changed_python_files }}" | xargs flake8 + shell: bash + + - name: Report success if no Python files were changed (Optional, for clarity) + # This step only runs if the 'Run flake8' step was skipped due to no Python files being found. + # It provides a clear message in the GitHub Actions UI. + if: success() && !steps.changed-files.outputs.changed_python_files + run: echo "No .py files changed in this push. PEP 8 compliance check passed (skipped)." \ No newline at end of file diff --git a/.gitignore b/.gitignore index 65f3ed0..0f5d24d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,220 @@ -.idea -ENV +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the enitre vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# avoid mac stuff +.DS_Store + +# Ignore all image files *.jpg *.jpeg +*.png + +# Terraform Stuff to Ignore +# Local .terraform directories +**/.terraform/* +**/.venv/ + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log +# other stuff +.vscode +.venv +__pycache__ +secrets.txt diff --git a/pep8-enforcement.yaml b/pep8-enforcement.yaml new file mode 100755 index 0000000..7f146e1 --- /dev/null +++ b/pep8-enforcement.yaml @@ -0,0 +1,69 @@ +name: PEP 8 Compliance Check + +on: + workflow_dispatch: + push: + branches: + - '**' # Run on pushes to any branch + +jobs: + pep8_check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # IMPORTANT: Fetch all history for git diff to work correctly + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' # Specify your Python version (e.g., '3.9', '3.10', '3.11') + + - name: Install flake8 + run: pip install flake8 + + - name: Get changed Python files + id: changed-files + run: | + # For push events, compare with the previous commit + # Handle cases where commits might not exist or be accessible + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + # Manual trigger - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + elif [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then + # First commit - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + else + # Try git diff, fallback to all files if it fails + CHANGED_PYTHON_FILES=$(git diff --name-only --diff-filter=ACM ${{ github.event.before }} ${{ github.sha }} 2>/dev/null | grep '\.py$' | tr '\n' ' ' || find . -name "*.py" -type f | tr '\n' ' ') + fi + + # Ensure we have a value even if empty + if [ -z "$CHANGED_PYTHON_FILES" ]; then + CHANGED_PYTHON_FILES="" + fi + + echo "changed_python_files=${CHANGED_PYTHON_FILES}" >> $GITHUB_OUTPUT + echo "Event type: ${{ github.event_name }}" + echo "Before commit: ${{ github.event.before }}" + echo "Current commit: ${{ github.sha }}" + echo "Found Python files: '$CHANGED_PYTHON_FILES'" + shell: bash + + - name: Run flake8 on changed Python files + if: success() && steps.changed-files.outputs.changed_python_files + run: | + echo "Checking the following files for PEP 8 compliance:" + echo "${{ steps.changed-files.outputs.changed_python_files }}" + echo "" + # Use xargs to properly handle multiple files + echo "${{ steps.changed-files.outputs.changed_python_files }}" | xargs flake8 + shell: bash + + - name: Report success if no Python files were changed (Optional, for clarity) + # This step only runs if the 'Run flake8' step was skipped due to no Python files being found. + # It provides a clear message in the GitHub Actions UI. + if: success() && !steps.changed-files.outputs.changed_python_files + run: echo "No .py files changed in this push. PEP 8 compliance check passed (skipped)." \ No newline at end of file From 63d77729a26fa51b22e7addbd858b10d8d8c29e9 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 17:46:52 -0800 Subject: [PATCH 02/71] not pep8 valid --- mosaic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mosaic.py b/mosaic.py index 6c5bda2..cab3839 100644 --- a/mosaic.py +++ b/mosaic.py @@ -236,3 +236,4 @@ def mosaic(img_path, tiles_path): else: mosaic(source_image, tile_dir) +# this is not pep8 valid \ No newline at end of file From 0ae5e487f1001eae55ae4d25248f439ab67c371f Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 17:49:24 -0800 Subject: [PATCH 03/71] should not be able to commit --- .pre-commit-config.yaml | 5 +++ mosaic.py | 2 +- pep8-enforcement.yaml | 69 ----------------------------------------- 3 files changed, 6 insertions(+), 70 deletions(-) create mode 100644 .pre-commit-config.yaml delete mode 100755 pep8-enforcement.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..706a314 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 # Use the latest stable version + hooks: + - id: flake8 \ No newline at end of file diff --git a/mosaic.py b/mosaic.py index cab3839..adfe507 100644 --- a/mosaic.py +++ b/mosaic.py @@ -236,4 +236,4 @@ def mosaic(img_path, tiles_path): else: mosaic(source_image, tile_dir) -# this is not pep8 valid \ No newline at end of file +# this is not pep8 valid very not valid \ No newline at end of file diff --git a/pep8-enforcement.yaml b/pep8-enforcement.yaml deleted file mode 100755 index 7f146e1..0000000 --- a/pep8-enforcement.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: PEP 8 Compliance Check - -on: - workflow_dispatch: - push: - branches: - - '**' # Run on pushes to any branch - -jobs: - pep8_check: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 # IMPORTANT: Fetch all history for git diff to work correctly - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' # Specify your Python version (e.g., '3.9', '3.10', '3.11') - - - name: Install flake8 - run: pip install flake8 - - - name: Get changed Python files - id: changed-files - run: | - # For push events, compare with the previous commit - # Handle cases where commits might not exist or be accessible - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - # Manual trigger - check all Python files - CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') - elif [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then - # First commit - check all Python files - CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') - else - # Try git diff, fallback to all files if it fails - CHANGED_PYTHON_FILES=$(git diff --name-only --diff-filter=ACM ${{ github.event.before }} ${{ github.sha }} 2>/dev/null | grep '\.py$' | tr '\n' ' ' || find . -name "*.py" -type f | tr '\n' ' ') - fi - - # Ensure we have a value even if empty - if [ -z "$CHANGED_PYTHON_FILES" ]; then - CHANGED_PYTHON_FILES="" - fi - - echo "changed_python_files=${CHANGED_PYTHON_FILES}" >> $GITHUB_OUTPUT - echo "Event type: ${{ github.event_name }}" - echo "Before commit: ${{ github.event.before }}" - echo "Current commit: ${{ github.sha }}" - echo "Found Python files: '$CHANGED_PYTHON_FILES'" - shell: bash - - - name: Run flake8 on changed Python files - if: success() && steps.changed-files.outputs.changed_python_files - run: | - echo "Checking the following files for PEP 8 compliance:" - echo "${{ steps.changed-files.outputs.changed_python_files }}" - echo "" - # Use xargs to properly handle multiple files - echo "${{ steps.changed-files.outputs.changed_python_files }}" | xargs flake8 - shell: bash - - - name: Report success if no Python files were changed (Optional, for clarity) - # This step only runs if the 'Run flake8' step was skipped due to no Python files being found. - # It provides a clear message in the GitHub Actions UI. - if: success() && !steps.changed-files.outputs.changed_python_files - run: echo "No .py files changed in this push. PEP 8 compliance check passed (skipped)." \ No newline at end of file From d03b74e8c8c649e1a812806f499d2556ba425cb0 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 18:13:38 -0800 Subject: [PATCH 04/71] pep8 compliant? --- .pre-commit-config.yaml | 2 +- mosaic.py | 413 ++++++++++++++++++++++------------------ pep8-enforcement.yaml | 69 +++++++ 3 files changed, 293 insertions(+), 191 deletions(-) create mode 100644 pep8-enforcement.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 706a314..00f604e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ repos: - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 # Use the latest stable version + rev: 6.0.0 hooks: - id: flake8 \ No newline at end of file diff --git a/mosaic.py b/mosaic.py index adfe507..0df6ce7 100644 --- a/mosaic.py +++ b/mosaic.py @@ -1,239 +1,272 @@ import sys -import os, os.path +import os +import os.path from PIL import Image, ImageOps from multiprocessing import Process, Queue, cpu_count # Change these 3 config parameters to suit your needs... -TILE_SIZE = 50 # height/width of mosaic tiles in pixels -TILE_MATCH_RES = 5 # tile matching resolution (higher values give better fit but require more processing) -ENLARGEMENT = 8 # the mosaic image will be this many times wider and taller than the original +TILE_SIZE = 50 # height/width of mosaic tiles in pixels +TILE_MATCH_RES = 5 # tile matching resolution +ENLARGEMENT = 8 # mosaic image will be this many times larger TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) WORKER_COUNT = max(cpu_count() - 1, 1) OUT_FILE = 'mosaic.jpeg' EOQ_VALUE = None -class TileProcessor: - def __init__(self, tiles_directory): - self.tiles_directory = tiles_directory - - def __process_tile(self, tile_path): - try: - img = Image.open(tile_path) - img = ImageOps.exif_transpose(img) - - # tiles must be square, so get the largest square that fits inside the image - w = img.size[0] - h = img.size[1] - min_dimension = min(w, h) - w_crop = (w - min_dimension) / 2 - h_crop = (h - min_dimension) / 2 - img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) - - large_tile_img = img.resize((TILE_SIZE, TILE_SIZE), Image.LANCZOS) - small_tile_img = img.resize((int(TILE_SIZE/TILE_BLOCK_SIZE), int(TILE_SIZE/TILE_BLOCK_SIZE)), Image.LANCZOS) - - return (large_tile_img.convert('RGB'), small_tile_img.convert('RGB')) - except: - return (None, None) - - def get_tiles(self): - large_tiles = [] - small_tiles = [] - print('Reading tiles from {}...'.format(self.tiles_directory)) +class TileProcessor: + def __init__(self, tiles_directory): + self.tiles_directory = tiles_directory + + def __process_tile(self, tile_path): + try: + img = Image.open(tile_path) + img = ImageOps.exif_transpose(img) + + # tiles must be square, so get the largest square that fits inside + w = img.size[0] + h = img.size[1] + min_dimension = min(w, h) + w_crop = (w - min_dimension) / 2 + h_crop = (h - min_dimension) / 2 + img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) + + large_tile_img = img.resize((TILE_SIZE, TILE_SIZE), Image.LANCZOS) + small_tile_img = img.resize( + (int(TILE_SIZE / TILE_BLOCK_SIZE), + int(TILE_SIZE / TILE_BLOCK_SIZE)), + Image.LANCZOS + ) + + return (large_tile_img.convert('RGB'), + small_tile_img.convert('RGB')) + except Exception: + return (None, None) + + def get_tiles(self): + large_tiles = [] + small_tiles = [] + + print('Reading tiles from {}...'.format(self.tiles_directory)) + + # search the tiles directory recursively + for root, subFolders, files in os.walk(self.tiles_directory): + for tile_name in files: + print( + 'Reading {:40.40}'.format(tile_name), + flush=True, end='\r') + tile_path = os.path.join(root, tile_name) + large_tile, small_tile = self.__process_tile(tile_path) + if large_tile: + large_tiles.append(large_tile) + small_tiles.append(small_tile) + + print('Processed {} tiles.'.format(len(large_tiles))) + + return (large_tiles, small_tiles) - # search the tiles directory recursively - for root, subFolders, files in os.walk(self.tiles_directory): - for tile_name in files: - print('Reading {:40.40}'.format(tile_name), flush=True, end='\r') - tile_path = os.path.join(root, tile_name) - large_tile, small_tile = self.__process_tile(tile_path) - if large_tile: - large_tiles.append(large_tile) - small_tiles.append(small_tile) +class TargetImage: + def __init__(self, image_path): + self.image_path = image_path - print('Processed {} tiles.'.format(len(large_tiles))) + def get_data(self): + print('Processing main image...') + img = Image.open(self.image_path) + w = img.size[0] * ENLARGEMENT + h = img.size[1] * ENLARGEMENT + large_img = img.resize((w, h), Image.LANCZOS) + w_diff = (w % TILE_SIZE) / 2 + h_diff = (h % TILE_SIZE) / 2 - return (large_tiles, small_tiles) + # crop the image slightly so we use a whole number of tiles + if w_diff or h_diff: + large_img = large_img.crop( + (w_diff, h_diff, w - w_diff, h - h_diff) + ) -class TargetImage: - def __init__(self, image_path): - self.image_path = image_path + small_img = large_img.resize( + (int(w / TILE_BLOCK_SIZE), int(h / TILE_BLOCK_SIZE)), + Image.LANCZOS + ) - def get_data(self): - print('Processing main image...') - img = Image.open(self.image_path) - w = img.size[0] * ENLARGEMENT - h = img.size[1] * ENLARGEMENT - large_img = img.resize((w, h), Image.LANCZOS) - w_diff = (w % TILE_SIZE)/2 - h_diff = (h % TILE_SIZE)/2 - - # if necessary, crop the image slightly so we use a whole number of tiles horizontally and vertically - if w_diff or h_diff: - large_img = large_img.crop((w_diff, h_diff, w - w_diff, h - h_diff)) + image_data = (large_img.convert('RGB'), small_img.convert('RGB')) - small_img = large_img.resize((int(w/TILE_BLOCK_SIZE), int(h/TILE_BLOCK_SIZE)), Image.LANCZOS) + print('Main image processed.') - image_data = (large_img.convert('RGB'), small_img.convert('RGB')) + return image_data - print('Main image processed.') - - return image_data class TileFitter: - def __init__(self, tiles_data): - self.tiles_data = tiles_data - - def __get_tile_diff(self, t1, t2, bail_out_value): - diff = 0 - for i in range(len(t1)): - #diff += (abs(t1[i][0] - t2[i][0]) + abs(t1[i][1] - t2[i][1]) + abs(t1[i][2] - t2[i][2])) - diff += ((t1[i][0] - t2[i][0])**2 + (t1[i][1] - t2[i][1])**2 + (t1[i][2] - t2[i][2])**2) - if diff > bail_out_value: - # we know already that this isn't going to be the best fit, so no point continuing with this tile - return diff - return diff - - def get_best_fit_tile(self, img_data): - best_fit_tile_index = None - min_diff = sys.maxsize - tile_index = 0 - - # go through each tile in turn looking for the best match for the part of the image represented by 'img_data' - for tile_data in self.tiles_data: - diff = self.__get_tile_diff(img_data, tile_data, min_diff) - if diff < min_diff: - min_diff = diff - best_fit_tile_index = tile_index - tile_index += 1 - - return best_fit_tile_index + def __init__(self, tiles_data): + self.tiles_data = tiles_data + + def __get_tile_diff(self, t1, t2, bail_out_value): + diff = 0 + for i in range(len(t1)): + diff += ((t1[i][0] - t2[i][0])**2 + + (t1[i][1] - t2[i][1])**2 + + (t1[i][2] - t2[i][2])**2) + if diff > bail_out_value: + return diff + return diff + + def get_best_fit_tile(self, img_data): + best_fit_tile_index = None + min_diff = sys.maxsize + tile_index = 0 + + for tile_data in self.tiles_data: + diff = self.__get_tile_diff(img_data, tile_data, min_diff) + if diff < min_diff: + min_diff = diff + best_fit_tile_index = tile_index + tile_index += 1 + + return best_fit_tile_index -def fit_tiles(work_queue, result_queue, tiles_data): - # this function gets run by the worker processes, one on each CPU core - tile_fitter = TileFitter(tiles_data) - - while True: - try: - img_data, img_coords = work_queue.get(True) - if img_data == EOQ_VALUE: - break - tile_index = tile_fitter.get_best_fit_tile(img_data) - result_queue.put((img_coords, tile_index)) - except KeyboardInterrupt: - pass - - # let the result handler know that this worker has finished everything - result_queue.put((EOQ_VALUE, EOQ_VALUE)) -class ProgressCounter: - def __init__(self, total): - self.total = total - self.counter = 0 +def fit_tiles(work_queue, result_queue, tiles_data): + tile_fitter = TileFitter(tiles_data) - def update(self): - self.counter += 1 - print("Progress: {:04.1f}%".format(100 * self.counter / self.total), flush=True, end='\r') + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = tile_fitter.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass -class MosaicImage: - def __init__(self, original_img): - self.image = Image.new(original_img.mode, original_img.size) - self.x_tile_count = int(original_img.size[0] / TILE_SIZE) - self.y_tile_count = int(original_img.size[1] / TILE_SIZE) - self.total_tiles = self.x_tile_count * self.y_tile_count + result_queue.put((EOQ_VALUE, EOQ_VALUE)) - def add_tile(self, tile_data, coords): - img = Image.new('RGB', (TILE_SIZE, TILE_SIZE)) - img.putdata(tile_data) - self.image.paste(img, coords) - def save(self, path): - self.image.save(path) +class ProgressCounter: + def __init__(self, total): + self.total = total + self.counter = 0 -def build_mosaic(result_queue, all_tile_data_large, original_img_large): - mosaic = MosaicImage(original_img_large) + def update(self): + self.counter += 1 + print("Progress: {:04.1f}%".format(100 * self.counter / self.total), + flush=True, end='\r') - active_workers = WORKER_COUNT - while True: - try: - img_coords, best_fit_tile_index = result_queue.get() - if img_coords == EOQ_VALUE: - active_workers -= 1 - if not active_workers: - break - else: - tile_data = all_tile_data_large[best_fit_tile_index] - mosaic.add_tile(tile_data, img_coords) +class MosaicImage: + def __init__(self, original_img): + self.image = Image.new(original_img.mode, original_img.size) + self.x_tile_count = int(original_img.size[0] / TILE_SIZE) + self.y_tile_count = int(original_img.size[1] / TILE_SIZE) + self.total_tiles = self.x_tile_count * self.y_tile_count - except KeyboardInterrupt: - pass + def add_tile(self, tile_data, coords): + img = Image.new('RGB', (TILE_SIZE, TILE_SIZE)) + img.putdata(tile_data) + self.image.paste(img, coords) - mosaic.save(OUT_FILE) - print('\nFinished, output is in', OUT_FILE) + def save(self, path): + self.image.save(path) -def compose(original_img, tiles): - print('Building mosaic, press Ctrl-C to abort...') - original_img_large, original_img_small = original_img - tiles_large, tiles_small = tiles - mosaic = MosaicImage(original_img_large) +def build_mosaic(result_queue, all_tile_data_large, original_img_large): + mosaic = MosaicImage(original_img_large) - all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] - all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + active_workers = WORKER_COUNT + while True: + try: + img_coords, best_fit_tile_index = result_queue.get() - work_queue = Queue(WORKER_COUNT) - result_queue = Queue() + if img_coords == EOQ_VALUE: + active_workers -= 1 + if not active_workers: + break + else: + tile_data = all_tile_data_large[best_fit_tile_index] + mosaic.add_tile(tile_data, img_coords) - try: - # start the worker processes that will build the mosaic image - Process(target=build_mosaic, args=(result_queue, all_tile_data_large, original_img_large)).start() + except KeyboardInterrupt: + pass - # start the worker processes that will perform the tile fitting - for n in range(WORKER_COUNT): - Process(target=fit_tiles, args=(work_queue, result_queue, all_tile_data_small)).start() + mosaic.save(OUT_FILE) + print('\nFinished, output is in', OUT_FILE) - progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) - for x in range(mosaic.x_tile_count): - for y in range(mosaic.y_tile_count): - large_box = (x * TILE_SIZE, y * TILE_SIZE, (x + 1) * TILE_SIZE, (y + 1) * TILE_SIZE) - small_box = (x * TILE_SIZE/TILE_BLOCK_SIZE, y * TILE_SIZE/TILE_BLOCK_SIZE, (x + 1) * TILE_SIZE/TILE_BLOCK_SIZE, (y + 1) * TILE_SIZE/TILE_BLOCK_SIZE) - work_queue.put((list(original_img_small.crop(small_box).getdata()), large_box)) - progress.update() - except KeyboardInterrupt: - print('\nHalting, saving partial image please wait...') +def compose(original_img, tiles): + print('Building mosaic, press Ctrl-C to abort...') + original_img_large, original_img_small = original_img + tiles_large, tiles_small = tiles + + mosaic = MosaicImage(original_img_large) + + all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] + all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + + work_queue = Queue(WORKER_COUNT) + result_queue = Queue() + + try: + Process(target=build_mosaic, args=( + result_queue, all_tile_data_large, original_img_large)).start() + + for n in range(WORKER_COUNT): + Process(target=fit_tiles, args=( + work_queue, result_queue, all_tile_data_small)).start() + + progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) + for x in range(mosaic.x_tile_count): + for y in range(mosaic.y_tile_count): + large_box = ( + x * TILE_SIZE, + y * TILE_SIZE, + (x + 1) * TILE_SIZE, + (y + 1) * TILE_SIZE + ) + small_box = ( + x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE + ) + work_queue.put( + (list(original_img_small.crop(small_box).getdata()), + large_box) + ) + progress.update() + + except KeyboardInterrupt: + print('\nHalting, saving partial image please wait...') + + finally: + for n in range(WORKER_COUNT): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) - finally: - # put these special values onto the queue to let the workers know they can terminate - for n in range(WORKER_COUNT): - work_queue.put((EOQ_VALUE, EOQ_VALUE)) def show_error(msg): - print('ERROR: {}'.format(msg)) + print('ERROR: {}'.format(msg)) + def mosaic(img_path, tiles_path): - image_data = TargetImage(img_path).get_data() - tiles_data = TileProcessor(tiles_path).get_tiles() - if tiles_data[0]: - compose(image_data, tiles_data) - else: - show_error("No images found in tiles directory '{}'".format(tiles_path)) + image_data = TargetImage(img_path).get_data() + tiles_data = TileProcessor(tiles_path).get_tiles() + if tiles_data[0]: + compose(image_data, tiles_data) + else: + show_error( + "No images found in tiles directory '{}'".format(tiles_path) + ) + if __name__ == '__main__': - if len(sys.argv) < 3: - show_error('Usage: {} \r'.format(sys.argv[0])) - else: - source_image = sys.argv[1] - tile_dir = sys.argv[2] - if not os.path.isfile(source_image): - show_error("Unable to find image file '{}'".format(source_image)) - elif not os.path.isdir(tile_dir): - show_error("Unable to find tile directory '{}'".format(tile_dir)) - else: - mosaic(source_image, tile_dir) - -# this is not pep8 valid very not valid \ No newline at end of file + if len(sys.argv) < 3: + show_error('Usage: {} \r'.format(sys.argv[0])) + else: + source_image = sys.argv[1] + tile_dir = sys.argv[2] + if not os.path.isfile(source_image): + show_error("Unable to find image file '{}'".format(source_image)) + elif not os.path.isdir(tile_dir): + show_error("Unable to find tile directory '{}'".format(tile_dir)) + else: + mosaic(source_image, tile_dir) diff --git a/pep8-enforcement.yaml b/pep8-enforcement.yaml new file mode 100644 index 0000000..7f146e1 --- /dev/null +++ b/pep8-enforcement.yaml @@ -0,0 +1,69 @@ +name: PEP 8 Compliance Check + +on: + workflow_dispatch: + push: + branches: + - '**' # Run on pushes to any branch + +jobs: + pep8_check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # IMPORTANT: Fetch all history for git diff to work correctly + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' # Specify your Python version (e.g., '3.9', '3.10', '3.11') + + - name: Install flake8 + run: pip install flake8 + + - name: Get changed Python files + id: changed-files + run: | + # For push events, compare with the previous commit + # Handle cases where commits might not exist or be accessible + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + # Manual trigger - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + elif [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then + # First commit - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + else + # Try git diff, fallback to all files if it fails + CHANGED_PYTHON_FILES=$(git diff --name-only --diff-filter=ACM ${{ github.event.before }} ${{ github.sha }} 2>/dev/null | grep '\.py$' | tr '\n' ' ' || find . -name "*.py" -type f | tr '\n' ' ') + fi + + # Ensure we have a value even if empty + if [ -z "$CHANGED_PYTHON_FILES" ]; then + CHANGED_PYTHON_FILES="" + fi + + echo "changed_python_files=${CHANGED_PYTHON_FILES}" >> $GITHUB_OUTPUT + echo "Event type: ${{ github.event_name }}" + echo "Before commit: ${{ github.event.before }}" + echo "Current commit: ${{ github.sha }}" + echo "Found Python files: '$CHANGED_PYTHON_FILES'" + shell: bash + + - name: Run flake8 on changed Python files + if: success() && steps.changed-files.outputs.changed_python_files + run: | + echo "Checking the following files for PEP 8 compliance:" + echo "${{ steps.changed-files.outputs.changed_python_files }}" + echo "" + # Use xargs to properly handle multiple files + echo "${{ steps.changed-files.outputs.changed_python_files }}" | xargs flake8 + shell: bash + + - name: Report success if no Python files were changed (Optional, for clarity) + # This step only runs if the 'Run flake8' step was skipped due to no Python files being found. + # It provides a clear message in the GitHub Actions UI. + if: success() && !steps.changed-files.outputs.changed_python_files + run: echo "No .py files changed in this push. PEP 8 compliance check passed (skipped)." \ No newline at end of file From ba2758ad747662cc4b53f906e808115a8f21d794 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 18:25:01 -0800 Subject: [PATCH 05/71] checked for pycodestyle again. --- splicer.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 splicer.py diff --git a/splicer.py b/splicer.py new file mode 100644 index 0000000..723d4c6 --- /dev/null +++ b/splicer.py @@ -0,0 +1,115 @@ +import cv2 +import os +import hashlib +import argparse +import sys # noqa F401 to be used later + + +def get_file_md5(file_path): + """Generates an MD5 hash of the video file in chunks.""" + hash_md5 = hashlib.md5() + try: + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + except FileNotFoundError: + return None + + +def process_video(absolute_video_path, output_folder): + """Processes a single video file.""" + cap = cv2.VideoCapture(absolute_video_path) + if not cap.isOpened(): + print(f" [!] Error: Could not open \ + '{os.path.basename(absolute_video_path)}'. Skipping.") + return + + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + if fps <= 0: + print(f" [!] Error: Could not determine FPS for \ + '{os.path.basename(absolute_video_path)}'. Skipping.") + cap.release() + return + + duration_seconds = total_frames / fps + interval_seconds = 1 if duration_seconds < 60 else 5 + capture_step = int(fps * interval_seconds) + + file_hash = get_file_md5(absolute_video_path) + saved_count = 0 + current_frame_idx = 0 + + print(f" Processing: \ + {os.path.basename(absolute_video_path)} ({duration_seconds:.1f}s)") # noqa + + while True: + ret, frame = cap.read() + if not ret: + break + + if current_frame_idx % capture_step == 0: + saved_count += 1 + filename = f"{file_hash}-{saved_count:04d}.png" # noqa + save_path = os.path.join(output_folder, filename) + cv2.imwrite(save_path, frame) + + current_frame_idx += 1 + + cap.release() + print(f" Done. Saved {saved_count} frames.") + + +def main(): + parser = argparse.ArgumentParser( + description="Extract frames from videos based on length.") + parser.add_argument("-path", help="Path to a single video file.") + parser.add_argument("-folder", help="Path to \ + a folder to process recursively.") + + args = parser.parse_args() + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_folder = os.path.join(script_dir, "frames") + + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + # List to hold all files to be processed + video_files = [] + + # Handle single file mode + if args.path: + abs_path = os.path.abspath(args.path) + if os.path.isfile(abs_path): + video_files.append(abs_path) + else: + print(f"Error: Single file '{abs_path}' not found.") + + # Handle recursive folder mode + if args.folder: + abs_folder = os.path.abspath(args.folder) + if os.path.isdir(abs_folder): + print(f"Scanning folder: {abs_folder}...") + for root, dirs, files in os.walk(abs_folder): + for file in files: + if file.lower().endswith(".mp4"): + video_files.append(os.path.join(root, file)) + else: + print(f"Error: Folder '{abs_folder}' not found.") + + if not video_files: + print("No valid .mp4 files found to process. Use -path or -folder.") + return + + print(f"Found {len(video_files)} video(s) to process.\n" + "-"*30) + + for vid in video_files: + process_video(vid, output_folder) + + print("-"*30 + f"\nAll tasks complete. Frames are in: {output_folder}") + + +if __name__ == "__main__": + main() From 62a4e20ffec4257b31e38815eb6a47154b2a829a Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 18:48:42 -0800 Subject: [PATCH 06/71] mostly known working code for s3_acces --- s3_access.py | 216 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 s3_access.py diff --git a/s3_access.py b/s3_access.py new file mode 100644 index 0000000..821f449 --- /dev/null +++ b/s3_access.py @@ -0,0 +1,216 @@ +import boto3 +import os +from botocore.exceptions import ClientError + + +class S3Access: + """S3 access class for managing S3 bucket operations.""" + + def __init__(self, bucket_name): + """ + Initialize S3Access with a bucket name. + + @Args: + bucket_name (str): Name of the S3 bucket to connect to + """ + self.bucket_name = bucket_name + self.s3_client = boto3.client('s3') + + def list_sources(self): + """ + List all objects in the sources folder of the S3 bucket. + + Returns: + list: List of object keys in the sources folder + """ + try: + # List objects with prefix 'sources/' + response = self.s3_client.list_objects_v2( + Bucket=self.bucket_name, + Prefix='sources/' + ) + + # Extract object keys from the response + if 'Contents' in response: + object_keys = [obj['Key'] for obj in response['Contents']] + return object_keys + else: + return [] + + except ClientError as e: + print(f"Error listing sources: {e}") + return [] + + def rename_key(self, current_key, new_key): + """ + Rename an S3 object by copying it to a new key and deleting the old. + + Args: + current_key (str): Current key name of the S3 object + new_key (str): New key name for the S3 object + + Returns: + bool: True if successful, False otherwise + """ + try: + # Copy the object to the new key + copy_source = { + 'Bucket': self.bucket_name, + 'Key': current_key + } + + self.s3_client.copy_object( + Bucket=self.bucket_name, + CopySource=copy_source, + Key=new_key + ) + + # Delete the original object + self.s3_client.delete_object( + Bucket=self.bucket_name, + Key=current_key + ) + + print(f"Successfully renamed {current_key} to {new_key}") + return True + + except ClientError as e: + print(f"Error renaming key {current_key} to {new_key}: {e}") + return False + + def put_object(self, key, file_object): + """ + Upload a file object to S3 with the specified key. + + Args: + key (str): Key name for the S3 object + file_object: File-like object to upload (must support read()) + + Returns: + bool: True if successful, False otherwise + """ + try: + self.s3_client.put_object( + Bucket=self.bucket_name, + Key=key, + Body=file_object + ) + + print(f"Successfully uploaded object to {key}") + return True + + except ClientError as e: + print(f"Error uploading object to {key}: {e}") + return False + + def get_object(self, key): + """ + Get an object from S3 with the specified key. + + Args: + key (str): Key name of the S3 object to retrieve + + Returns: + bytes: File content as bytes, or None if error + """ + try: + response = self.s3_client.get_object( + Bucket=self.bucket_name, + Key=key + ) + + file_content = response['Body'].read() + print(f"Successfully retrieved object {key}") + return file_content + + except ClientError as e: + print(f"Error retrieving object {key}: {e}") + return None + + def object_exists(self, key): + """ + Check if an object exists in S3 with the specified key. + + Args: + key (str): Key name of the S3 object to check + + Returns: + bool: True if object exists, False otherwise + """ + try: + self.s3_client.head_object( + Bucket=self.bucket_name, + Key=key + ) + return True + + except ClientError as e: + if e.response['Error']['Code'] == '404': + return False + else: + print(f"Error checking if object {key} exists: {e}") + return False + + def delete_object(self, key): + """ + Delete an object from S3 with the specified key. + + Args: + key (str): Key name of the S3 object to delete + + Returns: + bool: True if successful, False otherwise + """ + try: + self.s3_client.delete_object( + Bucket=self.bucket_name, + Key=key + ) + + print(f"Successfully deleted object {key}") + return True + + except ClientError as e: + print(f"Error deleting object {key}: {e}") + return False + + def download_to_disk(self, s3_key, local_path): + """ + Downloads a file from S3 directly to the local filesystem. + + Args: + s3_key (str): The key of the object in S3. + local_path (str): Where to save the file locally. + """ + try: + # Create the local directory if it doesn't exist + local_dir = os.path.dirname(local_path) + if local_dir and not os.path.exists(local_dir): + os.makedirs(local_dir) + + self.s3_client.download_file(self.bucket_name, s3_key, local_path) + print(f"Successfully downloaded {s3_key} to {local_path}") + return True + except ClientError as e: + print(f"Error downloading {s3_key}: {e}") + return False + + def upload_from_disk(self, local_path, s3_key): + """ + Uploads a file from the local filesystem to S3. + + Args: + local_path (str): Path to the local file. + s3_key (str): The destination key in S3. + """ + if not os.path.isfile(local_path): + print(f"Error: Local file {local_path} does not exist.") + return False + + try: + self.s3_client.upload_file(local_path, self.bucket_name, s3_key) + print(f"Successfully uploaded {local_path} to {s3_key}") + return True + except ClientError as e: + print(f"Error uploading {local_path}: {e}") + return False From 27b68e0366c2a02e7203c99eaa388a668ec5c732 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 18:54:39 -0800 Subject: [PATCH 07/71] new version of flake8? --- .pre-commit-config.yaml | 2 +- s3_access.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 00f604e..10ae814 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ repos: - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 7.3.0 hooks: - id: flake8 \ No newline at end of file diff --git a/s3_access.py b/s3_access.py index 821f449..5c91c5a 100644 --- a/s3_access.py +++ b/s3_access.py @@ -6,7 +6,7 @@ class S3Access: """S3 access class for managing S3 bucket operations.""" - def __init__(self, bucket_name): + def __init__(self, bucket_name: str): """ Initialize S3Access with a bucket name. @@ -16,7 +16,7 @@ def __init__(self, bucket_name): self.bucket_name = bucket_name self.s3_client = boto3.client('s3') - def list_sources(self): + def list_sources(self, s3folder: str): """ List all objects in the sources folder of the S3 bucket. @@ -27,7 +27,7 @@ def list_sources(self): # List objects with prefix 'sources/' response = self.s3_client.list_objects_v2( Bucket=self.bucket_name, - Prefix='sources/' + Prefix=s3folder ) # Extract object keys from the response From e672eb232ba130bddb5394b6b4e1c56e27ae347c Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 19:16:15 -0800 Subject: [PATCH 08/71] added helper tool to load data from s3 to a local ec2 instance ebs drive. --- load_data_from_s3.py | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 load_data_from_s3.py diff --git a/load_data_from_s3.py b/load_data_from_s3.py new file mode 100644 index 0000000..3bd3741 --- /dev/null +++ b/load_data_from_s3.py @@ -0,0 +1,72 @@ +import os +import argparse +import sys +from s3_access import S3Access # Assuming your class file is s3_access.py + + +def is_mounted(path): + """Check if the specific path is a mounted volume.""" + return os.path.ismount(path) + + +def main(): + parser = argparse.ArgumentParser( + description="Download video assets from S3 to EBS storage." + ) + parser.add_argument( + "--all-videos", + action="store_true", + help="Download all objects from the 'video/' prefix" + ) + + args = parser.parse_args() + + bucket_name = os.environ.get("S3_STORAGE") + mount_point = "/mnt/ebs" + local_target_dir = os.path.join(mount_point, "raw_vids") + s3_prefix = "video" + + if not bucket_name: + print("ERROR: Environment variable 'S3_STORAGE' is not set.") + sys.exit(1) + + # Safety Check: Is the EBS volume actually there? + if not is_mounted(mount_point): + print(f"ERROR: EBS volume is not mounted at {mount_point}") + print("Aborting to prevent filling up the root partition.") + sys.exit(1) + + s3 = S3Access(bucket_name) + + if args.all_videos: + print(f"Scanning s3://{bucket_name}/{s3_prefix}...") + video_keys = s3.list_sources(s3_prefix) + + if not video_keys: + print("No videos found.") + return + + os.makedirs(local_target_dir, exist_ok=True) + + print(f"Found {len(video_keys)} objects. \ + Downloading to {local_target_dir}...") + + for key in video_keys: + filename = os.path.basename(key) + if not filename: # Skips the prefix 'folder' key + continue + + local_path = os.path.join(local_target_dir, filename) + + success = s3.download_to_disk(key, local_path) + + if not success: + print(f"Failed to download: {key}") + + print("\nProcess Complete.") + else: + parser.print_help() + + +if __name__ == "__main__": + main() From 2f4c04c66a9208f10d96ca63533b47e303ffd6d5 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 19:36:44 -0800 Subject: [PATCH 09/71] installed the initializer --- load_data_from_s3.py | 73 ++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/load_data_from_s3.py b/load_data_from_s3.py index 3bd3741..1855860 100644 --- a/load_data_from_s3.py +++ b/load_data_from_s3.py @@ -1,7 +1,7 @@ import os import argparse import sys -from s3_access import S3Access # Assuming your class file is s3_access.py +from s3_access import S3Access def is_mounted(path): @@ -11,60 +11,67 @@ def is_mounted(path): def main(): parser = argparse.ArgumentParser( - description="Download video assets from S3 to EBS storage." + description="Transfer assets from S3 to EBS storage." ) + parser.add_argument( "--all-videos", action="store_true", - help="Download all objects from the 'video/' prefix" + help="Download all objects from \ + the 'video/' prefix to /mnt/ebs/raw_vids" + ) + parser.add_argument( + "--all-photos", + action="store_true", + help="Download .png and .jpeg \ + objects from 'photos/' to /mnt/ebs/samples" ) args = parser.parse_args() bucket_name = os.environ.get("S3_STORAGE") mount_point = "/mnt/ebs" - local_target_dir = os.path.join(mount_point, "raw_vids") - s3_prefix = "video" if not bucket_name: print("ERROR: Environment variable 'S3_STORAGE' is not set.") sys.exit(1) - # Safety Check: Is the EBS volume actually there? if not is_mounted(mount_point): print(f"ERROR: EBS volume is not mounted at {mount_point}") - print("Aborting to prevent filling up the root partition.") sys.exit(1) s3 = S3Access(bucket_name) + # Logic for Videos if args.all_videos: - print(f"Scanning s3://{bucket_name}/{s3_prefix}...") - video_keys = s3.list_sources(s3_prefix) - - if not video_keys: - print("No videos found.") - return - - os.makedirs(local_target_dir, exist_ok=True) - - print(f"Found {len(video_keys)} objects. \ - Downloading to {local_target_dir}...") - - for key in video_keys: - filename = os.path.basename(key) - if not filename: # Skips the prefix 'folder' key - continue - - local_path = os.path.join(local_target_dir, filename) - - success = s3.download_to_disk(key, local_path) - - if not success: - print(f"Failed to download: {key}") - - print("\nProcess Complete.") - else: + local_vids_dir = os.path.join(mount_point, "raw_vids") + os.makedirs(local_vids_dir, exist_ok=True) + + print(f"Downloading videos to {local_vids_dir}...") + keys = s3.list_sources("lowresvideo") + for key in keys: + if os.path.basename(key): + s3.download_to_disk( + key, + os.path.join(local_vids_dir, os.path.basename(key))) + + # Logic for Photos. Get the phots we'll make mosaics from + if args.all_photos: + local_photos_dir = os.path.join(mount_point, "samples") + os.makedirs(local_photos_dir, exist_ok=True) + + print(f"Downloading photos to {local_photos_dir}...") + keys = s3.list_sources("moasic-art-photos") + + valid_extensions = ('.png', '.jpeg', '.jpg') + + for key in keys: + if key.lower().endswith(valid_extensions): + filename = os.path.basename(key) + s3.download_to_disk( + key, os.path.join(local_photos_dir, filename)) + + if not (args.all_videos or args.all_photos): parser.print_help() From 924b3cba3455eef21e987e3ddb180c2bd12ded07 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 19:43:36 -0800 Subject: [PATCH 10/71] splicer now should resize down to smaller tiles. --- splicer.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/splicer.py b/splicer.py index 723d4c6..301bd96 100644 --- a/splicer.py +++ b/splicer.py @@ -17,6 +17,28 @@ def get_file_md5(file_path): return None +def resize_maintain_aspect(frame, short_side_target=200): + """ + Resizes a frame so the shortest side matches short_side_target, + maintaining the original aspect ratio. + """ + h, w = frame.shape[:2] + + # Determine which side is the shortest + if h < w: + # Landscape or Square: height is the shortest + ratio = short_side_target / float(h) + new_dim = (int(w * ratio), short_side_target) + else: + # Portrait: width is the shortest + ratio = short_side_target / float(w) + new_dim = (short_side_target, int(h * ratio)) + + # Perform the actual resize + resized_frame = cv2.resize(frame, new_dim, interpolation=cv2.INTER_AREA) + return resized_frame + + def process_video(absolute_video_path, output_folder): """Processes a single video file.""" cap = cv2.VideoCapture(absolute_video_path) @@ -53,8 +75,10 @@ def process_video(absolute_video_path, output_folder): if current_frame_idx % capture_step == 0: saved_count += 1 filename = f"{file_hash}-{saved_count:04d}.png" # noqa + processed_frame = resize_maintain_aspect( + frame, short_side_target=200) save_path = os.path.join(output_folder, filename) - cv2.imwrite(save_path, frame) + cv2.imwrite(save_path, processed_frame) current_frame_idx += 1 From feae3a0db2215afb3e89b95e83286ef8fda37e0c Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 19:47:32 -0800 Subject: [PATCH 11/71] hardcoded the out folder --- splicer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/splicer.py b/splicer.py index 301bd96..cf70ca7 100644 --- a/splicer.py +++ b/splicer.py @@ -94,8 +94,7 @@ def main(): a folder to process recursively.") args = parser.parse_args() - script_dir = os.path.dirname(os.path.abspath(__file__)) - output_folder = os.path.join(script_dir, "frames") + output_folder = os.path.join('/mnt/ebs/', "frames") if not os.path.exists(output_folder): os.makedirs(output_folder) From 0a45fc2e6913dd24a0eee998e68e04d09b47b64c Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 20:03:47 -0800 Subject: [PATCH 12/71] slightly redundant write functionality to s3 now provided. --- load_data_from_s3.py | 47 +++++++++++++++++++++++++++++++++++--------- requirements.txt | 3 +++ 2 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 requirements.txt diff --git a/load_data_from_s3.py b/load_data_from_s3.py index 1855860..088ba5f 100644 --- a/load_data_from_s3.py +++ b/load_data_from_s3.py @@ -11,24 +11,32 @@ def is_mounted(path): def main(): parser = argparse.ArgumentParser( - description="Transfer assets from S3 to EBS storage." + description="Transfer assets between S3 and EBS storage." ) parser.add_argument( "--all-videos", action="store_true", - help="Download all objects from \ - the 'video/' prefix to /mnt/ebs/raw_vids" + help="Download all objects from the 'lowresvideo' \ + prefix to /mnt/ebs/raw_vids" ) parser.add_argument( "--all-photos", action="store_true", - help="Download .png and .jpeg \ - objects from 'photos/' to /mnt/ebs/samples" + help="Download .png and .jpeg objects from \ + 'moasic-art-photos' to /mnt/ebs/samples" + ) + # New flag for uploading results + parser.add_argument( + "--upload-results", + action="store_true", + help="Upload all images from /mnt/ebs/mosaics \ + to the S3 'mosaics' prefix" ) args = parser.parse_args() + # Configuration from environment and paths bucket_name = os.environ.get("S3_STORAGE") mount_point = "/mnt/ebs" @@ -42,7 +50,7 @@ def main(): s3 = S3Access(bucket_name) - # Logic for Videos + # --- Logic for Videos (Download) --- if args.all_videos: local_vids_dir = os.path.join(mount_point, "raw_vids") os.makedirs(local_vids_dir, exist_ok=True) @@ -55,14 +63,13 @@ def main(): key, os.path.join(local_vids_dir, os.path.basename(key))) - # Logic for Photos. Get the phots we'll make mosaics from + # --- Logic for Photos to be turned to mosaics (Download) if args.all_photos: local_photos_dir = os.path.join(mount_point, "samples") os.makedirs(local_photos_dir, exist_ok=True) print(f"Downloading photos to {local_photos_dir}...") keys = s3.list_sources("moasic-art-photos") - valid_extensions = ('.png', '.jpeg', '.jpg') for key in keys: @@ -71,7 +78,29 @@ def main(): s3.download_to_disk( key, os.path.join(local_photos_dir, filename)) - if not (args.all_videos or args.all_photos): + # --- Logic for Results (Upload) --- + if args.upload_results: + local_mosaics_dir = os.path.join(mount_point, "mosaics") + + if not os.path.exists(local_mosaics_dir): + print(f"Skipping upload: {local_mosaics_dir} does not exist.") + else: + print(f"Uploading mosaic results \ + to S3 bucket '{bucket_name}/mosaics'...") + valid_images = ('.png', '.jpg', '.jpeg', '.tiff') + + # Loop through the local mosaic directory + for filename in os.listdir(local_mosaics_dir): + if filename.lower().endswith(valid_images): + local_path = os.path.join(local_mosaics_dir, filename) + # Prepend 'mosaics/' prefix for S3 + s3_key = f"mosaics/{filename}" + + print(f"Uploading {filename}...") + s3.upload_from_disk(local_path, s3_key) + + # Show help if no flags are provided + if not (args.all_videos or args.all_photos or args.upload_results): parser.print_help() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..df80ae3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +boto3>=1.26.0 +Pillow>=10.0.0 +opencv-python>=4.8.0 \ No newline at end of file From 0b03b9d1a83da9a18723cd32596a1538e7d3db40 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 27 Feb 2026 20:17:20 -0800 Subject: [PATCH 13/71] save of a mosaic now goes to a static location. --- mosaic.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/mosaic.py b/mosaic.py index 0df6ce7..c5312e0 100644 --- a/mosaic.py +++ b/mosaic.py @@ -1,5 +1,7 @@ import sys import os +import io +import hashlib import os.path from PIL import Image, ImageOps from multiprocessing import Process, Queue, cpu_count @@ -166,8 +168,32 @@ def add_tile(self, tile_data, coords): img.putdata(tile_data) self.image.paste(img, coords) - def save(self, path): - self.image.save(path) + def save(self): + """ + Saves the image_obj as a .jpeg to /mnt/ebs/mosaics + using its MD5 hash as the filename. + """ + output_dir = "/mnt/ebs/mosaics" + + # Ensure the output directory exists + os.makedirs(output_dir, exist_ok=True) + + # 1. Convert image to bytes to calculate hash + # We save to a temporary buffer or use the raw data + img_byte_arr = io.BytesIO() + self.image.save(img_byte_arr, format='JPEG') + img_bytes = img_byte_arr.getvalue() + + md5_hash = hashlib.md5(img_bytes).hexdigest() + + filename = f"{md5_hash}.jpeg" + final_path = os.path.join(output_dir, filename) + + with open(final_path, "wb") as f: + f.write(img_bytes) + + print(f"Mosaic saved to: {final_path}") + return final_path def build_mosaic(result_queue, all_tile_data_large, original_img_large): @@ -189,8 +215,8 @@ def build_mosaic(result_queue, all_tile_data_large, original_img_large): except KeyboardInterrupt: pass - mosaic.save(OUT_FILE) - print('\nFinished, output is in', OUT_FILE) + OUT_FILEPATH = mosaic.save() + print('\nFinished, output is in', OUT_FILEPATH) def compose(original_img, tiles): From 41e49812c014e28e08bf87440103ee00bf840fe2 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 07:56:28 -0800 Subject: [PATCH 14/71] removing mount check for now. --- load_data_from_s3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/load_data_from_s3.py b/load_data_from_s3.py index 088ba5f..d781af5 100644 --- a/load_data_from_s3.py +++ b/load_data_from_s3.py @@ -44,9 +44,9 @@ def main(): print("ERROR: Environment variable 'S3_STORAGE' is not set.") sys.exit(1) - if not is_mounted(mount_point): - print(f"ERROR: EBS volume is not mounted at {mount_point}") - sys.exit(1) + # noqa if not is_mounted("/"): + # noqa print(f"ERROR: EBS volume is not mounted at {mount_point}") + # noqa sys.exit(1) s3 = S3Access(bucket_name) From 394ba37df1ff02ef87f7eb7b18ee68aa0ca78f3c Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 08:12:56 -0800 Subject: [PATCH 15/71] corrected a breaking typo. --- load_data_from_s3.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/load_data_from_s3.py b/load_data_from_s3.py index d781af5..f03b6ec 100644 --- a/load_data_from_s3.py +++ b/load_data_from_s3.py @@ -44,10 +44,6 @@ def main(): print("ERROR: Environment variable 'S3_STORAGE' is not set.") sys.exit(1) - # noqa if not is_mounted("/"): - # noqa print(f"ERROR: EBS volume is not mounted at {mount_point}") - # noqa sys.exit(1) - s3 = S3Access(bucket_name) # --- Logic for Videos (Download) --- @@ -69,7 +65,7 @@ def main(): os.makedirs(local_photos_dir, exist_ok=True) print(f"Downloading photos to {local_photos_dir}...") - keys = s3.list_sources("moasic-art-photos") + keys = s3.list_sources("mosaic-art-photos") valid_extensions = ('.png', '.jpeg', '.jpg') for key in keys: From ec791129318b405c2ebaff91ede1a870ccc9b3b8 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 11:47:09 -0800 Subject: [PATCH 16/71] updated .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 0f5d24d..d7d111a 100644 --- a/.gitignore +++ b/.gitignore @@ -218,3 +218,6 @@ crash.*.log .venv __pycache__ secrets.txt +test-media/ +test-results/ +logs/ \ No newline at end of file From ca8b09fdbc744ff2541a18926c0c150182265156 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 12:13:29 -0800 Subject: [PATCH 17/71] made a dangerous idea for Image. Disabled some protections. --- mosaic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mosaic.py b/mosaic.py index c5312e0..28f6700 100644 --- a/mosaic.py +++ b/mosaic.py @@ -10,6 +10,7 @@ TILE_SIZE = 50 # height/width of mosaic tiles in pixels TILE_MATCH_RES = 5 # tile matching resolution ENLARGEMENT = 8 # mosaic image will be this many times larger +Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) WORKER_COUNT = max(cpu_count() - 1, 1) From a15dc7147288797928b19e26393658ec2f67db01 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 12:49:10 -0800 Subject: [PATCH 18/71] minimized logging for my own sanity. --- mosaic.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mosaic.py b/mosaic.py index 28f6700..f6bc3f8 100644 --- a/mosaic.py +++ b/mosaic.py @@ -56,9 +56,6 @@ def get_tiles(self): # search the tiles directory recursively for root, subFolders, files in os.walk(self.tiles_directory): for tile_name in files: - print( - 'Reading {:40.40}'.format(tile_name), - flush=True, end='\r') tile_path = os.path.join(root, tile_name) large_tile, small_tile = self.__process_tile(tile_path) if large_tile: @@ -260,7 +257,7 @@ def compose(original_img, tiles): (list(original_img_small.crop(small_box).getdata()), large_box) ) - progress.update() + progress.update() # process updates on every x completion. except KeyboardInterrupt: print('\nHalting, saving partial image please wait...') From 73619b12860ff8e2616a07d9cb46de9a2beeb1d4 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 14:23:58 -0800 Subject: [PATCH 19/71] ensures smaller sources files. Logging is no longer excessive or silent. --- load_data_from_s3.py | 23 ++++++++++++++++++++--- mosaic.py | 7 ++++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/load_data_from_s3.py b/load_data_from_s3.py index f03b6ec..f6b8d11 100644 --- a/load_data_from_s3.py +++ b/load_data_from_s3.py @@ -1,12 +1,26 @@ import os import argparse import sys +from PIL import Image from s3_access import S3Access -def is_mounted(path): - """Check if the specific path is a mounted volume.""" - return os.path.ismount(path) +def resize_in_place(file_path, max_dimension=600): + """ + Opens an image, resizes it to fit within max_dimension + (maintaining aspect ratio), and overwrites the original file. + """ + try: + with Image.open(file_path) as img: + # .thumbnail handles aspect ratio automatically + # It only shrinks if the image is larger than 600px + img.thumbnail((max_dimension, max_dimension), + Image.Resampling.LANCZOS) + img.save(file_path) + return True + except Exception as e: + print(f"Error resizing {file_path}: {e}") + return False def main(): @@ -70,9 +84,12 @@ def main(): for key in keys: if key.lower().endswith(valid_extensions): + # downloads source files from s3 + # ensures they are never larger 600 on longer side filename = os.path.basename(key) s3.download_to_disk( key, os.path.join(local_photos_dir, filename)) + resize_in_place(os.path.join(local_photos_dir, filename)) # --- Logic for Results (Upload) --- if args.upload_results: diff --git a/mosaic.py b/mosaic.py index f6bc3f8..031fc14 100644 --- a/mosaic.py +++ b/mosaic.py @@ -50,7 +50,8 @@ def __process_tile(self, tile_path): def get_tiles(self): large_tiles = [] small_tiles = [] - + count = 0 + exp_threshold = 1 # for logging print('Reading tiles from {}...'.format(self.tiles_directory)) # search the tiles directory recursively @@ -61,6 +62,10 @@ def get_tiles(self): if large_tile: large_tiles.append(large_tile) small_tiles.append(small_tile) + count += 1 + if count == exp_threshold: + print(f'Processed {count} file(s) so far...') + exp_threshold = exp_threshold * 2 print('Processed {} tiles.'.format(len(large_tiles))) From 014a18540f8f293d6aa777bb60444ebac8165417 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 15:48:47 -0800 Subject: [PATCH 20/71] m brain transplated a processer. Now uses sci-kit learn tools. --- mosaic-v2.py | 398 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 398 insertions(+) create mode 100644 mosaic-v2.py diff --git a/mosaic-v2.py b/mosaic-v2.py new file mode 100644 index 0000000..4e4f6d3 --- /dev/null +++ b/mosaic-v2.py @@ -0,0 +1,398 @@ +import sys +import os +import io +import hashlib +import os.path +from PIL import Image, ImageOps +from scipy.spatial import KDTree +import numpy as np +from skimage.metrics import structural_similarity as ssim +from multiprocessing import Process, Queue, cpu_count + +# Change these 3 config parameters to suit your needs... +TILE_SIZE = 50 # height/width of mosaic tiles in pixels +TILE_MATCH_RES = 5 # tile matching resolution +ENLARGEMENT = 8 # mosaic image will be this many times larger +Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now + +TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) +WORKER_COUNT = max(cpu_count() - 1, 1) +OUT_FILE = 'mosaic.jpeg' +EOQ_VALUE = None + + +class TileProcessor: + def __init__(self, tiles_directory): + self.tiles_directory = tiles_directory + + def get_average_color(self, img_path_or_obj): + """ + The 'Resize Trick': Shrinks an image to 1x1 to find the mean RGB. + Accepts either a file path or an existing PIL Image object. + """ + try: + # If it's a path, open it; otherwise assume it's an Image object + if isinstance(img_path_or_obj, str): + img = Image.open(img_path_or_obj) + else: + img = img_path_or_obj + + img = img.convert('RGB') + # BOX resampling is fast and mathematically accurate for averaging + img_tiny = img.resize((1, 1), resample=Image.Resampling.BOX) + return img_tiny.getpixel((0, 0)) + except Exception as e: + print(f"Error processing image: {e}") + return None + + def __process_tile(self, tile_path): + try: + img = Image.open(tile_path) + img = ImageOps.exif_transpose(img) + + # tiles must be square, so get the largest square that fits inside + w = img.size[0] + h = img.size[1] + min_dimension = min(w, h) + w_crop = (w - min_dimension) / 2 + h_crop = (h - min_dimension) / 2 + img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) + + large_tile_img = img.resize((TILE_SIZE, TILE_SIZE), Image.LANCZOS) + small_tile_img = img.resize( + (int(TILE_SIZE / TILE_BLOCK_SIZE), + int(TILE_SIZE / TILE_BLOCK_SIZE)), + Image.LANCZOS + ) + + return (large_tile_img.convert('RGB'), + small_tile_img.convert('RGB')) + except Exception: + return (None, None) + + def get_tiles(self): + large_tiles = [] + small_tiles = [] + count = 0 + exp_threshold = 1 # for logging + print('Reading tiles from {}...'.format(self.tiles_directory)) + + # search the tiles directory recursively + for root, subFolders, files in os.walk(self.tiles_directory): + for tile_name in files: + tile_path = os.path.join(root, tile_name) + large_tile, small_tile = self.__process_tile(tile_path) + if large_tile: + large_tiles.append(large_tile) + small_tiles.append(small_tile) + count += 1 + if count == exp_threshold: + print(f'Processed {count} file(s) so far...') + exp_threshold = exp_threshold * 2 + + print('Processed {} tiles.'.format(len(large_tiles))) + + return (large_tiles, small_tiles) + + +class TargetImage: + def __init__(self, image_path): + self.image_path = image_path + + def get_data(self): + print('Processing main image...') + img = Image.open(self.image_path) + w = img.size[0] * ENLARGEMENT + h = img.size[1] * ENLARGEMENT + large_img = img.resize((w, h), Image.LANCZOS) + w_diff = (w % TILE_SIZE) / 2 + h_diff = (h % TILE_SIZE) / 2 + + # crop the image slightly so we use a whole number of tiles + if w_diff or h_diff: + large_img = large_img.crop( + (w_diff, h_diff, w - w_diff, h - h_diff) + ) + + small_img = large_img.resize( + (int(w / TILE_BLOCK_SIZE), int(h / TILE_BLOCK_SIZE)), + Image.LANCZOS + ) + + image_data = (large_img.convert('RGB'), small_img.convert('RGB')) + + print('Main image processed.') + + return image_data + + +class TileFitter: + def __init__(self, tiles_data): + self.tiles_data = tiles_data + + def get_average_color(self, img_path_or_obj): + try: + # If it's a path, open it; otherwise assume it's an Image object + if isinstance(img_path_or_obj, str): + img = Image.open(img_path_or_obj) + else: + img = img_path_or_obj + + img = img.convert('RGB') + # BOX resampling is fast and mathematically accurate for averaging + img_tiny = img.resize((1, 1), resample=Image.Resampling.BOX) + return img_tiny.getpixel((0, 0)) + except Exception as e: + print(f"Error processing image: {e}") + return None + + def __get_tile_diff(self, t1, t2, bail_out_value): + diff = 0 + for i in range(len(t1)): + diff += ((t1[i][0] - t2[i][0])**2 + + (t1[i][1] - t2[i][1])**2 + + (t1[i][2] - t2[i][2])**2) + if diff > bail_out_value: + return diff + return diff + + def get_best_fit_tile(self, img_data): + """ Image Data is the from the Sample """ + best_fit_tile_index = None + min_diff = sys.maxsize + tile_index = 0 + + for tile_data in self.tiles_data: + diff = self.__get_tile_diff(img_data, tile_data, min_diff) + if diff < min_diff: + min_diff = diff + best_fit_tile_index = tile_index + tile_index += 1 + + return best_fit_tile_index + + +class TileFitterSciKit: + def __init__(self, tiles_data, match_res=TILE_MATCH_RES): + # tiles_data here is the 'small_tiles' list from TileProcessor + self.tiles_data = tiles_data + self.match_res = match_res + + print("Initializing KDTree for hybrid search...") + # 1. Convert tiles to NumPy arrays once + # We reshape them from flat lists back into (5x5x3) blocks for SSIM + self.tiles_np = [ + np.array(t).reshape((self.match_res, self.match_res, 3)) + for t in tiles_data + ] + + # 2. Pre-calculate average colors for the Tree + avg_colors = [t.mean(axis=(0, 1)) for t in self.tiles_np] + self.tree = KDTree(np.array(avg_colors)) + print("KDTree + SSIM Hybrid Fitter Ready.") + + def get_best_fit_tile(self, img_data): + """ + img_data: A flat list of pixels (from original code's getdata()) + We convert it to NumPy to use the Tree and SSIM. + """ + # Convert the incoming list to a 5x5x3 array + target_np = np.array(img_data).reshape( + (self.match_res, self.match_res, 3)) + + # Step 1: KDTree Pruning (The "Bucket" step) + # Find the top 40 color matches + target_avg = target_np.mean(axis=(0, 1)) + _, indices = self.tree.query(target_avg, k=40) + + best_score = -1 + best_fit_tile_index = indices[0] + + # Step 2: SSIM Refinement + for idx in indices: + candidate_np = self.tiles_np[idx] + + # SSIM needs to know the range of pixel values (0-255) + score = ssim(target_np, + candidate_np, + channel_axis=2, + data_range=255) + + if score > best_score: + best_score = score + best_fit_tile_index = idx + + # Early exit if we find an amazing match + if score > 0.98: + break + + return best_fit_tile_index + + +def fit_tiles(work_queue, result_queue, tiles_data): + tile_fitter = TileFitterSciKit(tiles_data) + + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = tile_fitter.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass + + result_queue.put((EOQ_VALUE, EOQ_VALUE)) + + +class ProgressCounter: + def __init__(self, total): + self.total = total + self.counter = 0 + + def update(self): + self.counter += 1 + print("Progress: {:04.1f}%".format(100 * self.counter / self.total), + flush=True, end='\r') + + +class MosaicImage: + def __init__(self, original_img): + self.image = Image.new(original_img.mode, original_img.size) + self.x_tile_count = int(original_img.size[0] / TILE_SIZE) + self.y_tile_count = int(original_img.size[1] / TILE_SIZE) + self.total_tiles = self.x_tile_count * self.y_tile_count + + def add_tile(self, tile_data, coords): + img = Image.new('RGB', (TILE_SIZE, TILE_SIZE)) + img.putdata(tile_data) + self.image.paste(img, coords) + + def save(self): + """ + Saves the image_obj as a .jpeg to /mnt/ebs/mosaics + using its MD5 hash as the filename. + """ + output_dir = "/mnt/ebs/mosaics" + + # Ensure the output directory exists + os.makedirs(output_dir, exist_ok=True) + + # 1. Convert image to bytes to calculate hash + # We save to a temporary buffer or use the raw data + img_byte_arr = io.BytesIO() + self.image.save(img_byte_arr, format='JPEG') + img_bytes = img_byte_arr.getvalue() + + md5_hash = hashlib.md5(img_bytes).hexdigest() + + filename = f"{md5_hash}.jpeg" + final_path = os.path.join(output_dir, filename) + + with open(final_path, "wb") as f: + f.write(img_bytes) + + print(f"Mosaic saved to: {final_path}") + return final_path + + +def build_mosaic(result_queue, all_tile_data_large, original_img_large): + mosaic = MosaicImage(original_img_large) + + active_workers = WORKER_COUNT + while True: + try: + img_coords, best_fit_tile_index = result_queue.get() + + if img_coords == EOQ_VALUE: + active_workers -= 1 + if not active_workers: + break + else: + tile_data = all_tile_data_large[best_fit_tile_index] + mosaic.add_tile(tile_data, img_coords) + + except KeyboardInterrupt: + pass + + OUT_FILEPATH = mosaic.save() + print('\nFinished, output is in', OUT_FILEPATH) + + +def compose(original_img, tiles): + print('Building mosaic, press Ctrl-C to abort...') + original_img_large, original_img_small = original_img + tiles_large, tiles_small = tiles + + mosaic = MosaicImage(original_img_large) + + all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] + all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + + work_queue = Queue(WORKER_COUNT) + result_queue = Queue() + + try: + Process(target=build_mosaic, args=( + result_queue, all_tile_data_large, original_img_large)).start() + + for n in range(WORKER_COUNT): + Process(target=fit_tiles, args=( + work_queue, result_queue, all_tile_data_small)).start() + + progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) + for x in range(mosaic.x_tile_count): + for y in range(mosaic.y_tile_count): + large_box = ( + x * TILE_SIZE, + y * TILE_SIZE, + (x + 1) * TILE_SIZE, + (y + 1) * TILE_SIZE + ) + small_box = ( + x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE + ) + work_queue.put( + (list(original_img_small.crop(small_box).getdata()), + large_box) + ) + progress.update() # process updates on every x completion. + + except KeyboardInterrupt: + print('\nHalting, saving partial image please wait...') + + finally: + for n in range(WORKER_COUNT): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + +def show_error(msg): + print('ERROR: {}'.format(msg)) + + +def mosaic(img_path, tiles_path): + image_data = TargetImage(img_path).get_data() + tiles_data = TileProcessor(tiles_path).get_tiles() + if tiles_data[0]: + compose(image_data, tiles_data) + else: + show_error( + "No images found in tiles directory '{}'".format(tiles_path) + ) + + +if __name__ == '__main__': + if len(sys.argv) < 3: + show_error('Usage: {} \r'.format(sys.argv[0])) + else: + source_image = sys.argv[1] + tile_dir = sys.argv[2] + if not os.path.isfile(source_image): + show_error("Unable to find image file '{}'".format(source_image)) + elif not os.path.isdir(tile_dir): + show_error("Unable to find tile directory '{}'".format(tile_dir)) + else: + mosaic(source_image, tile_dir) From bbd09d599e25ce48e603820841dd53625ef7de0e Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 16:15:29 -0800 Subject: [PATCH 21/71] updating requirements for scikit --- requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index df80ae3..62e66bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ boto3>=1.26.0 Pillow>=10.0.0 -opencv-python>=4.8.0 \ No newline at end of file +opencv-python>=4.8.0 +numpy>=1.24.0 +scipy>=1.10.0 +scikit-image>=0.21.0 \ No newline at end of file From e28a052e091d09db38e6d46bb15831ad1dc2507f Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 18:06:55 -0800 Subject: [PATCH 22/71] adding anti clumping. --- mosaic-v2.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mosaic-v2.py b/mosaic-v2.py index 4e4f6d3..98f2b26 100644 --- a/mosaic-v2.py +++ b/mosaic-v2.py @@ -173,8 +173,10 @@ def get_best_fit_tile(self, img_data): class TileFitterSciKit: - def __init__(self, tiles_data, match_res=TILE_MATCH_RES): + def __init__(self, tiles_data, match_res=TILE_MATCH_RES, penalty=0.1): # tiles_data here is the 'small_tiles' list from TileProcessor + self.penalty = penalty + self.usages = [0.0 for x in range(len(tiles_data))] self.tiles_data = tiles_data self.match_res = match_res @@ -218,6 +220,8 @@ def get_best_fit_tile(self, img_data): channel_axis=2, data_range=255) + score = score - self.usages[idx] + if score > best_score: best_score = score best_fit_tile_index = idx @@ -226,6 +230,8 @@ def get_best_fit_tile(self, img_data): if score > 0.98: break + self.usages[best_fit_tile_index] = \ + self.usages[best_fit_tile_index] + self.penalty return best_fit_tile_index From 1fb90fb3a21a168d121ff5db39bffaaaddf4997a Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 18:24:21 -0800 Subject: [PATCH 23/71] set the splicer for multithreading. --- splicer.py | 97 +++++++++++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 59 deletions(-) diff --git a/splicer.py b/splicer.py index cf70ca7..109bc28 100644 --- a/splicer.py +++ b/splicer.py @@ -2,59 +2,46 @@ import os import hashlib import argparse -import sys # noqa F401 to be used later +from multiprocessing import Pool, cpu_count +from functools import partial + +WORKER_COUNT = cpu_count() def get_file_md5(file_path): - """Generates an MD5 hash of the video file in chunks.""" hash_md5 = hashlib.md5() try: with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() - except FileNotFoundError: + except Exception: return None def resize_maintain_aspect(frame, short_side_target=200): - """ - Resizes a frame so the shortest side matches short_side_target, - maintaining the original aspect ratio. - """ h, w = frame.shape[:2] - - # Determine which side is the shortest if h < w: - # Landscape or Square: height is the shortest ratio = short_side_target / float(h) new_dim = (int(w * ratio), short_side_target) else: - # Portrait: width is the shortest ratio = short_side_target / float(w) new_dim = (short_side_target, int(h * ratio)) - - # Perform the actual resize - resized_frame = cv2.resize(frame, new_dim, interpolation=cv2.INTER_AREA) - return resized_frame + return cv2.resize(frame, new_dim, interpolation=cv2.INTER_AREA) -def process_video(absolute_video_path, output_folder): - """Processes a single video file.""" +def process_video_worker(absolute_video_path, output_folder): + """The function each CPU core will run.""" cap = cv2.VideoCapture(absolute_video_path) if not cap.isOpened(): - print(f" [!] Error: Could not open \ - '{os.path.basename(absolute_video_path)}'. Skipping.") - return + return f"Error: {os.path.basename(absolute_video_path)}" fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if fps <= 0: - print(f" [!] Error: Could not determine FPS for \ - '{os.path.basename(absolute_video_path)}'. Skipping.") cap.release() - return + return f"Bad FPS: {os.path.basename(absolute_video_path)}" duration_seconds = total_frames / fps interval_seconds = 1 if duration_seconds < 60 else 5 @@ -64,9 +51,6 @@ def process_video(absolute_video_path, output_folder): saved_count = 0 current_frame_idx = 0 - print(f" Processing: \ - {os.path.basename(absolute_video_path)} ({duration_seconds:.1f}s)") # noqa - while True: ret, frame = cap.read() if not ret: @@ -74,64 +58,59 @@ def process_video(absolute_video_path, output_folder): if current_frame_idx % capture_step == 0: saved_count += 1 - filename = f"{file_hash}-{saved_count:04d}.png" # noqa - processed_frame = resize_maintain_aspect( - frame, short_side_target=200) + filename = f"{file_hash}-{saved_count:04d}.png" + processed_frame = resize_maintain_aspect(frame, + short_side_target=200) save_path = os.path.join(output_folder, filename) cv2.imwrite(save_path, processed_frame) current_frame_idx += 1 cap.release() - print(f" Done. Saved {saved_count} frames.") + return f"Done: {os.path.basename(absolute_video_path)} \ + ({saved_count} frames)" def main(): parser = argparse.ArgumentParser( - description="Extract frames from videos based on length.") - parser.add_argument("-path", help="Path to a single video file.") - parser.add_argument("-folder", help="Path to \ - a folder to process recursively.") - + description="Multi-threaded Frame Extraction") + parser.add_argument("-path", + help="Path to a single video file.") + parser.add_argument("-folder", + help="Path to folder for recursive processing.") args = parser.parse_args() - output_folder = os.path.join('/mnt/ebs/', "frames") + output_folder = os.path.join('/mnt/ebs/', "frames") if not os.path.exists(output_folder): os.makedirs(output_folder) - # List to hold all files to be processed video_files = [] - - # Handle single file mode if args.path: - abs_path = os.path.abspath(args.path) - if os.path.isfile(abs_path): - video_files.append(abs_path) - else: - print(f"Error: Single file '{abs_path}' not found.") - - # Handle recursive folder mode + video_files.append(os.path.abspath(args.path)) if args.folder: abs_folder = os.path.abspath(args.folder) - if os.path.isdir(abs_folder): - print(f"Scanning folder: {abs_folder}...") - for root, dirs, files in os.walk(abs_folder): - for file in files: - if file.lower().endswith(".mp4"): - video_files.append(os.path.join(root, file)) - else: - print(f"Error: Folder '{abs_folder}' not found.") + for root, _, files in os.walk(abs_folder): + for file in files: + if file.lower().endswith(".mp4"): + video_files.append(os.path.join(root, file)) if not video_files: - print("No valid .mp4 files found to process. Use -path or -folder.") + print("No videos found.") return - print(f"Found {len(video_files)} video(s) to process.\n" + "-"*30) + print(f"Distributing {len(video_files)} \ + videos across {WORKER_COUNT} CPUs...") + + # --- THE MULTIPROCESSING MAGIC --- + worker_func = partial(process_video_worker, + output_folder=output_folder) - for vid in video_files: - process_video(vid, output_folder) + with Pool(processes=WORKER_COUNT) as pool: + # 'imap_unordered' for easy balanacing + for result in pool.imap_unordered(worker_func, video_files): + print(f" [+] {result}") - print("-"*30 + f"\nAll tasks complete. Frames are in: {output_folder}") + print(f"\nProcessing complete. All frames in: {output_folder}") if __name__ == "__main__": From 02aef15a1000bdca50e31d528627283e4c3dae07 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 18:34:13 -0800 Subject: [PATCH 24/71] added some Quality of Life to the v2 console too. --- mosaic-v2.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/mosaic-v2.py b/mosaic-v2.py index 98f2b26..1fd7146 100644 --- a/mosaic-v2.py +++ b/mosaic-v2.py @@ -3,6 +3,7 @@ import io import hashlib import os.path +import argparse from PIL import Image, ImageOps from scipy.spatial import KDTree import numpy as np @@ -391,14 +392,36 @@ def mosaic(img_path, tiles_path): if __name__ == '__main__': - if len(sys.argv) < 3: - show_error('Usage: {} \r'.format(sys.argv[0])) - else: - source_image = sys.argv[1] - tile_dir = sys.argv[2] + + parser = argparse.ArgumentParser( + description="Generate a high-quality mosaic.") + + # Create the mutually exclusive group for input + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-file", "-f", help="Path to the source image file.") + group.add_argument("-folder", help="Path to a \ + folder of images (not yet implemented).") + + # The tiles directory with a default value + parser.add_argument("-tiles", "-t", + default="/mnt/ebs/frames", + help="Path to the directory \ + containing tiles (default: /mnt/ebs/frames)") + + args = parser.parse_args() + + # Current logic: Only handle the single file mode + if args.file: + source_image = os.path.abspath(args.file) + tile_dir = os.path.abspath(args.tiles) + if not os.path.isfile(source_image): - show_error("Unable to find image file '{}'".format(source_image)) + show_error(f"Unable to find image file '{source_image}'") elif not os.path.isdir(tile_dir): - show_error("Unable to find tile directory '{}'".format(tile_dir)) + show_error(f"Unable to find tile directory '{tile_dir}'") else: + # Trigger the mosaic process mosaic(source_image, tile_dir) + + elif args.folder: + print("Folder mode requested, but not yet implemented. Stay tuned!") From 24d22bec3075bf65ef65197e4bec5205bd19b259 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 18:50:39 -0800 Subject: [PATCH 25/71] hopefully no more redundant tile processing. --- mosaic-v2.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/mosaic-v2.py b/mosaic-v2.py index 1fd7146..d592e2f 100644 --- a/mosaic-v2.py +++ b/mosaic-v2.py @@ -380,15 +380,14 @@ def show_error(msg): print('ERROR: {}'.format(msg)) -def mosaic(img_path, tiles_path): +def mosaic(img_path, tiles_data): + """ Takes in Tiles Data as an Agrument now """ image_data = TargetImage(img_path).get_data() - tiles_data = TileProcessor(tiles_path).get_tiles() + # tiles_data = TileProcessor(tiles_path).get_tiles() if tiles_data[0]: compose(image_data, tiles_data) else: - show_error( - "No images found in tiles directory '{}'".format(tiles_path) - ) + show_error("Tiles Data not propery formatted!") if __name__ == '__main__': @@ -421,7 +420,28 @@ def mosaic(img_path, tiles_path): show_error(f"Unable to find tile directory '{tile_dir}'") else: # Trigger the mosaic process - mosaic(source_image, tile_dir) + tiles_data = TileProcessor(tile_dir).get_tiles() + mosaic(source_image, tiles_data) elif args.folder: - print("Folder mode requested, but not yet implemented. Stay tuned!") + abs_folder = os.path.abspath(args.folder) + tile_dir = os.path.abspath(args.tiles) + try: + samples = [e.path for e in os.scandir(abs_folder) + if e.is_file()] + except FileNotFoundError: + print(f"Error: Folder '{abs_folder}' not found.") + exit(1) + tiles_data = TileProcessor(tile_dir).get_tiles() + for file_path in samples: + if not os.path.isfile(file_path): + show_error(f"Unable to find image file \ + '{file_path}'") + continue + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory \ + '{tile_dir}'") + continue + else: + # Trigger the mosaic process + mosaic(file_path, tiles_data) From 9762cc4a5e06f2cc67de3cffc9c3aafe3a255ce1 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 19:32:44 -0800 Subject: [PATCH 26/71] more finely tuned default penalty. --- mosaic-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosaic-v2.py b/mosaic-v2.py index d592e2f..8b48ebf 100644 --- a/mosaic-v2.py +++ b/mosaic-v2.py @@ -174,7 +174,7 @@ def get_best_fit_tile(self, img_data): class TileFitterSciKit: - def __init__(self, tiles_data, match_res=TILE_MATCH_RES, penalty=0.1): + def __init__(self, tiles_data, match_res=TILE_MATCH_RES, penalty=0.02): # tiles_data here is the 'small_tiles' list from TileProcessor self.penalty = penalty self.usages = [0.0 for x in range(len(tiles_data))] From f015004697948dd45385687f55a9b6943da84d72 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 19:55:37 -0800 Subject: [PATCH 27/71] enforcing color channel uniformity at download from s3 --- load_data_from_s3.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/load_data_from_s3.py b/load_data_from_s3.py index f6b8d11..fdb759b 100644 --- a/load_data_from_s3.py +++ b/load_data_from_s3.py @@ -12,6 +12,9 @@ def resize_in_place(file_path, max_dimension=600): """ try: with Image.open(file_path) as img: + if img.mode != 'RGB': + # Enforces RGB for uniformity + img = img.convert('RGB') # .thumbnail handles aspect ratio automatically # It only shrinks if the image is larger than 600px img.thumbnail((max_dimension, max_dimension), From d070c3760e6d5d0adc06757e30890e5353db12a6 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 20:48:27 -0800 Subject: [PATCH 28/71] creating a test script to verify that dependancie run okay on hardware. --- test-image.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 test-image.py diff --git a/test-image.py b/test-image.py new file mode 100644 index 0000000..c48559c --- /dev/null +++ b/test-image.py @@ -0,0 +1,16 @@ +from PIL import Image +import numpy as np +from skimage.metrics import structural_similarity as ssim + +# 1. Create a fake 100x100 image +data = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) +img = Image.fromarray(data) +img.save("test.jpg") + +# 2. Try to reload and SSIM it +reloaded = np.array(Image.open("test.jpg")) +try: + score = ssim(reloaded, reloaded, channel_axis=-1) + print(f"SSIM Success! Score: {score}") +except Exception as e: + print(f"SSIM Failed on this hardware: {e}") From 8ab03224396c2639e089097a487e6e0908a1779f Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 22:27:51 -0800 Subject: [PATCH 29/71] updated dependanices for python3.14 --- requirements.txt | 12 ++++++------ splicer.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 62e66bb..b103ab9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -boto3>=1.26.0 -Pillow>=10.0.0 -opencv-python>=4.8.0 -numpy>=1.24.0 -scipy>=1.10.0 -scikit-image>=0.21.0 \ No newline at end of file +boto3>=1.42.59 +Pillow>=12.1.1 +opencv-python>=4.13.0.92 +numpy>=2.42 +scipy>=1.17.0 +scikit-image>=0.26.0 \ No newline at end of file diff --git a/splicer.py b/splicer.py index 109bc28..788394d 100644 --- a/splicer.py +++ b/splicer.py @@ -8,6 +8,19 @@ WORKER_COUNT = cpu_count() +def is_video_readable(v_path): + """ Verifies if a file is not corrupted """ + cap = cv2.VideoCapture(v_path) + if not cap.isOpened(): + return False + + # Try to grab just the first frame to see if the decoder barfs + ret, frame = cap.read() + cap.release() + + return ret and frame is not None + + def get_file_md5(file_path): hash_md5 = hashlib.md5() try: @@ -32,6 +45,9 @@ def resize_maintain_aspect(frame, short_side_target=200): def process_video_worker(absolute_video_path, output_folder): """The function each CPU core will run.""" + if not is_video_readable(absolute_video_path): + return f"Error: {os.path.basename(absolute_video_path)}\ + is not readable!" cap = cv2.VideoCapture(absolute_video_path) if not cap.isOpened(): return f"Error: {os.path.basename(absolute_video_path)}" From e733402ba4da90463ab74d35701df9cb4a589737 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 28 Feb 2026 23:46:59 -0800 Subject: [PATCH 30/71] downgraded requirements to python 3.9 --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index b103ab9..8c5eb50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ boto3>=1.42.59 -Pillow>=12.1.1 +Pillow>=11.3.0 opencv-python>=4.13.0.92 -numpy>=2.42 -scipy>=1.17.0 +numpy>=2.0.2 +scipy>=1.13.1 scikit-image>=0.26.0 \ No newline at end of file From a67addc25382e7ba0321b98c1bbbc1f856d7928d Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 1 Mar 2026 08:37:08 -0800 Subject: [PATCH 31/71] making code resilient to what I think must be bad input. --- mosaic-v2.py | 19 +++++++++++++------ requirements.txt | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/mosaic-v2.py b/mosaic-v2.py index 8b48ebf..4f658fc 100644 --- a/mosaic-v2.py +++ b/mosaic-v2.py @@ -216,12 +216,19 @@ def get_best_fit_tile(self, img_data): candidate_np = self.tiles_np[idx] # SSIM needs to know the range of pixel values (0-255) - score = ssim(target_np, - candidate_np, - channel_axis=2, - data_range=255) - - score = score - self.usages[idx] + try: + score = ssim(target_np, + candidate_np, + channel_axis=2, + data_range=255) + score = score - self.usages[idx] + + except ValueError as e: + # codes sometimes breaks and hangs. + print(f'Got exception {e} \ + skipping index{idx} \ + data was {type(candidate_np)}') + continue if score > best_score: best_score = score diff --git a/requirements.txt b/requirements.txt index 8c5eb50..1bc02ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ Pillow>=11.3.0 opencv-python>=4.13.0.92 numpy>=2.0.2 scipy>=1.13.1 -scikit-image>=0.26.0 \ No newline at end of file +scikit-image>=0.24.0 \ No newline at end of file From 39c32d451606f3c8a44ec535fab0eea0539f168d Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 1 Mar 2026 08:56:07 -0800 Subject: [PATCH 32/71] fixing win size --- mosaic-v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mosaic-v2.py b/mosaic-v2.py index 4f658fc..eb8f496 100644 --- a/mosaic-v2.py +++ b/mosaic-v2.py @@ -220,7 +220,8 @@ def get_best_fit_tile(self, img_data): score = ssim(target_np, candidate_np, channel_axis=2, - data_range=255) + data_range=255, + win_size=3) score = score - self.usages[idx] except ValueError as e: From 3a3985d5bd68669dc68e8c579ce6b7e355b5dc0e Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 6 Mar 2026 17:56:24 -0800 Subject: [PATCH 33/71] a slightly more organized version of this tool --- mosaic-v2.py => mosaicizers/mosaic-v2.py | 0 mosaic.py => mosaicizers/mosaic.py | 0 load_data_from_s3.py => tools/load_data_from_s3.py | 0 s3_access.py => tools/s3_access.py | 0 splicer.py => tools/splicer.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename mosaic-v2.py => mosaicizers/mosaic-v2.py (100%) rename mosaic.py => mosaicizers/mosaic.py (100%) rename load_data_from_s3.py => tools/load_data_from_s3.py (100%) rename s3_access.py => tools/s3_access.py (100%) rename splicer.py => tools/splicer.py (100%) diff --git a/mosaic-v2.py b/mosaicizers/mosaic-v2.py similarity index 100% rename from mosaic-v2.py rename to mosaicizers/mosaic-v2.py diff --git a/mosaic.py b/mosaicizers/mosaic.py similarity index 100% rename from mosaic.py rename to mosaicizers/mosaic.py diff --git a/load_data_from_s3.py b/tools/load_data_from_s3.py similarity index 100% rename from load_data_from_s3.py rename to tools/load_data_from_s3.py diff --git a/s3_access.py b/tools/s3_access.py similarity index 100% rename from s3_access.py rename to tools/s3_access.py diff --git a/splicer.py b/tools/splicer.py similarity index 100% rename from splicer.py rename to tools/splicer.py From 6cfea61a98debe5a4e8bbd3b4d7b34e356ef28a3 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 6 Mar 2026 19:49:25 -0800 Subject: [PATCH 34/71] sorted mosaic-v2 into its own folder. --- mosaicizers/MosaicImage.py | 44 +++++ mosaicizers/TargetImage.py | 37 ++++ mosaicizers/TileFitterSciKit.py | 74 ++++++++ mosaicizers/TileProcessor.py | 80 ++++++++ mosaicizers/mosaic-v2.py | 322 +++++--------------------------- tools/load_data_from_s3.py | 2 +- 6 files changed, 283 insertions(+), 276 deletions(-) create mode 100644 mosaicizers/MosaicImage.py create mode 100644 mosaicizers/TargetImage.py create mode 100644 mosaicizers/TileFitterSciKit.py create mode 100644 mosaicizers/TileProcessor.py diff --git a/mosaicizers/MosaicImage.py b/mosaicizers/MosaicImage.py new file mode 100644 index 0000000..e157497 --- /dev/null +++ b/mosaicizers/MosaicImage.py @@ -0,0 +1,44 @@ +import io +import os +import hashlib +from PIL import Image + + +class MosaicImage: + def __init__(self, original_img, tile_size=50): + self.image = Image.new(original_img.mode, original_img.size) + self.x_tile_count = int(original_img.size[0] / tile_size) + self.y_tile_count = int(original_img.size[1] / tile_size) + self.total_tiles = self.x_tile_count * self.y_tile_count + self.tile_size = tile_size + + def add_tile(self, tile_data, coords): + img = Image.new('RGB', (self.tile_size, self.tile_size)) + img.putdata(tile_data) + self.image.paste(img, coords) + + def save(self, output_dir='/mnt/ebs/mosaics', suffix=''): + """ + Saves the image_obj as a .jpeg to /mnt/ebs/mosaics + using its MD5 hash as the filename. + """ + + # Ensure the output directory exists + os.makedirs(output_dir, exist_ok=True) + + # 1. Convert image to bytes to calculate hash + # We save to a temporary buffer or use the raw data + img_byte_arr = io.BytesIO() + self.image.save(img_byte_arr, format='JPEG') + img_bytes = img_byte_arr.getvalue() + + md5_hash = hashlib.md5(img_bytes).hexdigest() + + filename = f"{md5_hash}{suffix}.jpeg" + final_path = os.path.join(output_dir, filename) + + with open(final_path, "wb") as f: + f.write(img_bytes) + + print(f"Mosaic saved to: {final_path}") + return final_path diff --git a/mosaicizers/TargetImage.py b/mosaicizers/TargetImage.py new file mode 100644 index 0000000..3b732c1 --- /dev/null +++ b/mosaicizers/TargetImage.py @@ -0,0 +1,37 @@ +from PIL import Image + + +class TargetImage: + def __init__(self, image_path, + tile_size=50, tile_res=5, + enlargement_factor=8): + self.image_path = image_path + self.tile_size = tile_size + self.enlargement_factor = enlargement_factor + self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) + + def get_data(self): + print('Processing main image...') + img = Image.open(self.image_path) + w = img.size[0] * self.enlargement_factor + h = img.size[1] * self.enlargement_factor + large_img = img.resize((w, h), Image.LANCZOS) + w_diff = (w % self.tile_size) / 2 + h_diff = (h % self.tile_size) / 2 + + # crop the image slightly so we use a whole number of tiles + if w_diff or h_diff: + large_img = large_img.crop( + (w_diff, h_diff, w - w_diff, h - h_diff) + ) + + small_img = large_img.resize( + (int(w / self.tile_block_size), int(h / self.tile_block_size)), + Image.LANCZOS + ) + + image_data = (large_img.convert('RGB'), small_img.convert('RGB')) + + print('Main image processed.') + + return image_data diff --git a/mosaicizers/TileFitterSciKit.py b/mosaicizers/TileFitterSciKit.py new file mode 100644 index 0000000..a87c2db --- /dev/null +++ b/mosaicizers/TileFitterSciKit.py @@ -0,0 +1,74 @@ +import numpy as np +from skimage.metrics import structural_similarity as ssim +from scipy.spatial import KDTree + + +class TileFitterSciKit: + def __init__(self, tiles_data, match_res=5, penalty=0.02): + # tiles_data here is the 'small_tiles' list from TileProcessor + self.penalty = penalty + self.usages = [0.0 for x in range(len(tiles_data))] + self.tiles_data = tiles_data + self.match_res = match_res + + print("Initializing KDTree for hybrid search...") + # 1. Convert tiles to NumPy arrays once + # We reshape them from flat lists back into (5x5x3) blocks for SSIM + self.tiles_np = [ + np.array(t).reshape((self.match_res, self.match_res, 3)) + for t in tiles_data + ] + + # 2. Pre-calculate average colors for the Tree + avg_colors = [t.mean(axis=(0, 1)) for t in self.tiles_np] + self.tree = KDTree(np.array(avg_colors)) + print("KDTree + SSIM Hybrid Fitter Ready.") + + def get_best_fit_tile(self, img_data): + """ + img_data: A flat list of pixels (from original code's getdata()) + We convert it to NumPy to use the Tree and SSIM. + """ + # Convert the incoming list to a 5x5x3 array + target_np = np.array(img_data).reshape( + (self.match_res, self.match_res, 3)) + + # Step 1: KDTree Pruning (The "Bucket" step) + # Find the top 40 color matches + target_avg = target_np.mean(axis=(0, 1)) + _, indices = self.tree.query(target_avg, k=40) + + best_score = -1 + best_fit_tile_index = indices[0] + + # Step 2: SSIM Refinement + for idx in indices: + candidate_np = self.tiles_np[idx] + + # SSIM needs to know the range of pixel values (0-255) + try: + score = ssim(target_np, + candidate_np, + channel_axis=2, + data_range=255, + win_size=self.match_res-2) + score = score - self.usages[idx] + + except ValueError as e: + # codes sometimes breaks and hangs. + print(f'Got exception {e} \ + skipping index{idx} \ + data was {type(candidate_np)}') + continue + + if score > best_score: + best_score = score + best_fit_tile_index = idx + + # Early exit if we find an amazing match + if score > 0.98: + break + + self.usages[best_fit_tile_index] = \ + self.usages[best_fit_tile_index] + self.penalty + return best_fit_tile_index diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py new file mode 100644 index 0000000..95b33fc --- /dev/null +++ b/mosaicizers/TileProcessor.py @@ -0,0 +1,80 @@ +import os +from PIL import Image, ImageOps + + +class TileProcessor: + def __init__(self, tiles_directory, tile_size=50, tile_res=5): + self.tiles_directory = tiles_directory + self.tile_size = tile_size, + self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) + self.tile_res = tile_res + + def get_average_color(self, img_path_or_obj): + """ + The 'Resize Trick': Shrinks an image to 1x1 to find the mean RGB. + Accepts either a file path or an existing PIL Image object. + """ + try: + # If it's a path, open it; otherwise assume it's an Image object + if isinstance(img_path_or_obj, str): + img = Image.open(img_path_or_obj) + else: + img = img_path_or_obj + + img = img.convert('RGB') + # BOX resampling is fast and mathematically accurate for averaging + img_tiny = img.resize((1, 1), resample=Image.Resampling.BOX) + return img_tiny.getpixel((0, 0)) + except Exception as e: + print(f"Error processing image: {e}") + return None + + def __process_tile(self, tile_path): + try: + img = Image.open(tile_path) + img = ImageOps.exif_transpose(img) + + # tiles must be square, so get the largest square that fits inside + w = img.size[0] + h = img.size[1] + min_dimension = min(w, h) + w_crop = (w - min_dimension) / 2 + h_crop = (h - min_dimension) / 2 + img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) + + large_tile_img = img.resize((self.tile_size, self.tile_size), + Image.LANCZOS) + small_tile_img = img.resize( + (int(self.tile_size / self.tile_block_size), + int(self.tile_size / self.tile_block_size)), + Image.LANCZOS + ) + + return (large_tile_img.convert('RGB'), + small_tile_img.convert('RGB')) + except Exception: + return (None, None) + + def get_tiles(self): + large_tiles = [] + small_tiles = [] + count = 0 + exp_threshold = 1 # for logging + print('Reading tiles from {}...'.format(self.tiles_directory)) + + # search the tiles directory recursively + for root, subFolders, files in os.walk(self.tiles_directory): + for tile_name in files: + tile_path = os.path.join(root, tile_name) + large_tile, small_tile = self.__process_tile(tile_path) + if large_tile: + large_tiles.append(large_tile) + small_tiles.append(small_tile) + count += 1 + if count == exp_threshold: + print(f'Processed {count} file(s) so far...') + exp_threshold = exp_threshold * 2 + + print('Processed {} tiles.'.format(len(large_tiles))) + + return (large_tiles, small_tiles) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index eb8f496..2f584f5 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -1,249 +1,32 @@ -import sys +############################################### +# This file will create a mosaic from CLI. +# TODO need to sort out functions here +# In a new extension of Mosaic. +############################################### + import os -import io -import hashlib import os.path import argparse -from PIL import Image, ImageOps -from scipy.spatial import KDTree -import numpy as np -from skimage.metrics import structural_similarity as ssim from multiprocessing import Process, Queue, cpu_count -# Change these 3 config parameters to suit your needs... +# These are the custom imports +import MosaicImage +import TargetImage +import TileProcessor +import TileFitterSciKit + +# These are now configed by CLI or class defaults TILE_SIZE = 50 # height/width of mosaic tiles in pixels TILE_MATCH_RES = 5 # tile matching resolution -ENLARGEMENT = 8 # mosaic image will be this many times larger -Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now +# ENLARGEMENT = 8 # mosaic image will be this many times larger +# Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) WORKER_COUNT = max(cpu_count() - 1, 1) -OUT_FILE = 'mosaic.jpeg' +# OUT_FILE = 'mosaic.jpeg' EOQ_VALUE = None -class TileProcessor: - def __init__(self, tiles_directory): - self.tiles_directory = tiles_directory - - def get_average_color(self, img_path_or_obj): - """ - The 'Resize Trick': Shrinks an image to 1x1 to find the mean RGB. - Accepts either a file path or an existing PIL Image object. - """ - try: - # If it's a path, open it; otherwise assume it's an Image object - if isinstance(img_path_or_obj, str): - img = Image.open(img_path_or_obj) - else: - img = img_path_or_obj - - img = img.convert('RGB') - # BOX resampling is fast and mathematically accurate for averaging - img_tiny = img.resize((1, 1), resample=Image.Resampling.BOX) - return img_tiny.getpixel((0, 0)) - except Exception as e: - print(f"Error processing image: {e}") - return None - - def __process_tile(self, tile_path): - try: - img = Image.open(tile_path) - img = ImageOps.exif_transpose(img) - - # tiles must be square, so get the largest square that fits inside - w = img.size[0] - h = img.size[1] - min_dimension = min(w, h) - w_crop = (w - min_dimension) / 2 - h_crop = (h - min_dimension) / 2 - img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) - - large_tile_img = img.resize((TILE_SIZE, TILE_SIZE), Image.LANCZOS) - small_tile_img = img.resize( - (int(TILE_SIZE / TILE_BLOCK_SIZE), - int(TILE_SIZE / TILE_BLOCK_SIZE)), - Image.LANCZOS - ) - - return (large_tile_img.convert('RGB'), - small_tile_img.convert('RGB')) - except Exception: - return (None, None) - - def get_tiles(self): - large_tiles = [] - small_tiles = [] - count = 0 - exp_threshold = 1 # for logging - print('Reading tiles from {}...'.format(self.tiles_directory)) - - # search the tiles directory recursively - for root, subFolders, files in os.walk(self.tiles_directory): - for tile_name in files: - tile_path = os.path.join(root, tile_name) - large_tile, small_tile = self.__process_tile(tile_path) - if large_tile: - large_tiles.append(large_tile) - small_tiles.append(small_tile) - count += 1 - if count == exp_threshold: - print(f'Processed {count} file(s) so far...') - exp_threshold = exp_threshold * 2 - - print('Processed {} tiles.'.format(len(large_tiles))) - - return (large_tiles, small_tiles) - - -class TargetImage: - def __init__(self, image_path): - self.image_path = image_path - - def get_data(self): - print('Processing main image...') - img = Image.open(self.image_path) - w = img.size[0] * ENLARGEMENT - h = img.size[1] * ENLARGEMENT - large_img = img.resize((w, h), Image.LANCZOS) - w_diff = (w % TILE_SIZE) / 2 - h_diff = (h % TILE_SIZE) / 2 - - # crop the image slightly so we use a whole number of tiles - if w_diff or h_diff: - large_img = large_img.crop( - (w_diff, h_diff, w - w_diff, h - h_diff) - ) - - small_img = large_img.resize( - (int(w / TILE_BLOCK_SIZE), int(h / TILE_BLOCK_SIZE)), - Image.LANCZOS - ) - - image_data = (large_img.convert('RGB'), small_img.convert('RGB')) - - print('Main image processed.') - - return image_data - - -class TileFitter: - def __init__(self, tiles_data): - self.tiles_data = tiles_data - - def get_average_color(self, img_path_or_obj): - try: - # If it's a path, open it; otherwise assume it's an Image object - if isinstance(img_path_or_obj, str): - img = Image.open(img_path_or_obj) - else: - img = img_path_or_obj - - img = img.convert('RGB') - # BOX resampling is fast and mathematically accurate for averaging - img_tiny = img.resize((1, 1), resample=Image.Resampling.BOX) - return img_tiny.getpixel((0, 0)) - except Exception as e: - print(f"Error processing image: {e}") - return None - - def __get_tile_diff(self, t1, t2, bail_out_value): - diff = 0 - for i in range(len(t1)): - diff += ((t1[i][0] - t2[i][0])**2 + - (t1[i][1] - t2[i][1])**2 + - (t1[i][2] - t2[i][2])**2) - if diff > bail_out_value: - return diff - return diff - - def get_best_fit_tile(self, img_data): - """ Image Data is the from the Sample """ - best_fit_tile_index = None - min_diff = sys.maxsize - tile_index = 0 - - for tile_data in self.tiles_data: - diff = self.__get_tile_diff(img_data, tile_data, min_diff) - if diff < min_diff: - min_diff = diff - best_fit_tile_index = tile_index - tile_index += 1 - - return best_fit_tile_index - - -class TileFitterSciKit: - def __init__(self, tiles_data, match_res=TILE_MATCH_RES, penalty=0.02): - # tiles_data here is the 'small_tiles' list from TileProcessor - self.penalty = penalty - self.usages = [0.0 for x in range(len(tiles_data))] - self.tiles_data = tiles_data - self.match_res = match_res - - print("Initializing KDTree for hybrid search...") - # 1. Convert tiles to NumPy arrays once - # We reshape them from flat lists back into (5x5x3) blocks for SSIM - self.tiles_np = [ - np.array(t).reshape((self.match_res, self.match_res, 3)) - for t in tiles_data - ] - - # 2. Pre-calculate average colors for the Tree - avg_colors = [t.mean(axis=(0, 1)) for t in self.tiles_np] - self.tree = KDTree(np.array(avg_colors)) - print("KDTree + SSIM Hybrid Fitter Ready.") - - def get_best_fit_tile(self, img_data): - """ - img_data: A flat list of pixels (from original code's getdata()) - We convert it to NumPy to use the Tree and SSIM. - """ - # Convert the incoming list to a 5x5x3 array - target_np = np.array(img_data).reshape( - (self.match_res, self.match_res, 3)) - - # Step 1: KDTree Pruning (The "Bucket" step) - # Find the top 40 color matches - target_avg = target_np.mean(axis=(0, 1)) - _, indices = self.tree.query(target_avg, k=40) - - best_score = -1 - best_fit_tile_index = indices[0] - - # Step 2: SSIM Refinement - for idx in indices: - candidate_np = self.tiles_np[idx] - - # SSIM needs to know the range of pixel values (0-255) - try: - score = ssim(target_np, - candidate_np, - channel_axis=2, - data_range=255, - win_size=3) - score = score - self.usages[idx] - - except ValueError as e: - # codes sometimes breaks and hangs. - print(f'Got exception {e} \ - skipping index{idx} \ - data was {type(candidate_np)}') - continue - - if score > best_score: - best_score = score - best_fit_tile_index = idx - - # Early exit if we find an amazing match - if score > 0.98: - break - - self.usages[best_fit_tile_index] = \ - self.usages[best_fit_tile_index] + self.penalty - return best_fit_tile_index - - def fit_tiles(work_queue, result_queue, tiles_data): tile_fitter = TileFitterSciKit(tiles_data) @@ -271,47 +54,10 @@ def update(self): flush=True, end='\r') -class MosaicImage: - def __init__(self, original_img): - self.image = Image.new(original_img.mode, original_img.size) - self.x_tile_count = int(original_img.size[0] / TILE_SIZE) - self.y_tile_count = int(original_img.size[1] / TILE_SIZE) - self.total_tiles = self.x_tile_count * self.y_tile_count - - def add_tile(self, tile_data, coords): - img = Image.new('RGB', (TILE_SIZE, TILE_SIZE)) - img.putdata(tile_data) - self.image.paste(img, coords) - - def save(self): - """ - Saves the image_obj as a .jpeg to /mnt/ebs/mosaics - using its MD5 hash as the filename. - """ - output_dir = "/mnt/ebs/mosaics" - - # Ensure the output directory exists - os.makedirs(output_dir, exist_ok=True) - - # 1. Convert image to bytes to calculate hash - # We save to a temporary buffer or use the raw data - img_byte_arr = io.BytesIO() - self.image.save(img_byte_arr, format='JPEG') - img_bytes = img_byte_arr.getvalue() - - md5_hash = hashlib.md5(img_bytes).hexdigest() - - filename = f"{md5_hash}.jpeg" - final_path = os.path.join(output_dir, filename) - - with open(final_path, "wb") as f: - f.write(img_bytes) - - print(f"Mosaic saved to: {final_path}") - return final_path - - -def build_mosaic(result_queue, all_tile_data_large, original_img_large): +def build_mosaic(result_queue, + all_tile_data_large, + original_img_large, + suffix): mosaic = MosaicImage(original_img_large) active_workers = WORKER_COUNT @@ -330,7 +76,7 @@ def build_mosaic(result_queue, all_tile_data_large, original_img_large): except KeyboardInterrupt: pass - OUT_FILEPATH = mosaic.save() + OUT_FILEPATH = mosaic.save(suffix=suffix) print('\nFinished, output is in', OUT_FILEPATH) @@ -399,6 +145,17 @@ def mosaic(img_path, tiles_data): if __name__ == '__main__': + def restricted_float(x): + try: + x = float(x) + except ValueError: + raise argparse.ArgumentTypeError(f"{x} is not a \ + floating-point number") + + if x < 0.01 or x > 0.5: + raise argparse.ArgumentTypeError(f"{x} is not in range \ + [0.01, 0.5]") + return x parser = argparse.ArgumentParser( description="Generate a high-quality mosaic.") @@ -415,6 +172,21 @@ def mosaic(img_path, tiles_data): help="Path to the directory \ containing tiles (default: /mnt/ebs/frames)") + parser.add_argument("-out_dir", "-o", + default="/mnt/ebs/mosaics", + help="This is the directory the \ + Mosaics will be save to.") + + parser.add_argument('-suffix', '-s', + help="Type something here if \ + if you want it appended \ + to the file name.") + + parser.add_argument('-ratio', + type=restricted_float, + default=0.2, + help="Set the ratio (range: 0.01 to 0.5, \ + default: 0.2)") args = parser.parse_args() # Current logic: Only handle the single file mode diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index fdb759b..8b05a8d 100644 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -2,7 +2,7 @@ import argparse import sys from PIL import Image -from s3_access import S3Access +from tools.s3_access import S3Access def resize_in_place(file_path, max_dimension=600): From 4cbfef76cbe3163c272e14e4db8cba3bb6bd8884 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 6 Mar 2026 20:42:25 -0800 Subject: [PATCH 35/71] updated splicer --- tools/load_data_from_s3.py | 2 +- tools/splicer.py | 91 ++++++++++++++++++++++++++++++++------ 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index 8b05a8d..fdb759b 100644 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -2,7 +2,7 @@ import argparse import sys from PIL import Image -from tools.s3_access import S3Access +from s3_access import S3Access def resize_in_place(file_path, max_dimension=600): diff --git a/tools/splicer.py b/tools/splicer.py index 788394d..84244d0 100644 --- a/tools/splicer.py +++ b/tools/splicer.py @@ -4,6 +4,7 @@ import argparse from multiprocessing import Pool, cpu_count from functools import partial +from PIL import Image WORKER_COUNT = cpu_count() @@ -87,24 +88,90 @@ def process_video_worker(absolute_video_path, output_folder): ({saved_count} frames)" +def process_image(image_path, double=True, + output_dir="/mnt/ebs/frames"): + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + try: + with Image.open(image_path) as img: + # Ensure RGB mode + if img.mode != 'RGB': + img = img.convert('RGB') + + width, height = img.size + short_side = min(width, height) + is_square = (width == height) + + # --- Crop 1: Upper Left --- + upper_left_box = (0, 0, short_side, short_side) + _save_processed_crop(img.crop(upper_left_box), output_dir) + + # --- Crop 2: Lower Right (If source not square) --- + if double and not is_square: + lower_right_box = (width - short_side, + height - short_side, + width, height) + _save_processed_crop(img.crop(lower_right_box), + output_dir) + + except Exception as e: + print(f"Error processing {image_path}: {e}") + + +def _save_processed_crop(crop_img, output_dir): + """Internal helper to resize, hash, and save the image.""" + resized = crop_img.resize((200, 200), Image.Resampling.LANCZOS) + hash_name = hashlib.md5(resized.tobytes()).hexdigest() + save_path = os.path.join(output_dir, f"{hash_name}.png") + resized.save(save_path, "PNG") + print(f" [#] Saved: {hash_name}.png") + + def main(): parser = argparse.ArgumentParser( - description="Multi-threaded Frame Extraction") - parser.add_argument("-path", - help="Path to a single video file.") - parser.add_argument("-folder", - help="Path to folder for recursive processing.") + description="Multi-threaded Media Processor") + + mode_group = parser.add_mutually_exclusive_group(required=True) + + mode_group.add_argument("-video-file", + help="Path to a single video file.") + mode_group.add_argument("-video-folder", + help="Path to folder for recursive \ + video processing.") + mode_group.add_argument("-image-folder", + help="Path to folder containing \ + images for processing.") + args = parser.parse_args() + # --- BRANCH 1: IMAGE PROCESSING --- + if args.image_folder: + print(f"Starting Image Processing mode on: {args.image_folder}") + abs_folder = os.path.abspath(args.image_folder) + photos = [] + for root, _, files in os.walk(abs_folder): + for file in files: + extension = file.split(".") + if extension.lower() in ["png", "jpg", "jpeg"]: + photos.append(os.path.join(root, file)) + if len(photos) == 0: + print(f'No photos found {abs_folder}') + return + + # --- BRANCH 2: VIDEO PROCESSING --- output_folder = os.path.join('/mnt/ebs/', "frames") if not os.path.exists(output_folder): os.makedirs(output_folder) video_files = [] - if args.path: - video_files.append(os.path.abspath(args.path)) - if args.folder: - abs_folder = os.path.abspath(args.folder) + + if args.video_file: + video_files.append(os.path.abspath(args.video_file)) + + elif args.video_folder: + abs_folder = os.path.abspath(args.video_folder) for root, _, files in os.walk(abs_folder): for file in files: if file.lower().endswith(".mp4"): @@ -115,14 +182,12 @@ def main(): return print(f"Distributing {len(video_files)} \ - videos across {WORKER_COUNT} CPUs...") + videos across {WORKER_COUNT} CPUs...") # --- THE MULTIPROCESSING MAGIC --- - worker_func = partial(process_video_worker, - output_folder=output_folder) + worker_func = partial(process_video_worker, output_folder=output_folder) with Pool(processes=WORKER_COUNT) as pool: - # 'imap_unordered' for easy balanacing for result in pool.imap_unordered(worker_func, video_files): print(f" [+] {result}") From 046d1959989471d0b6227cc5be1f21e5c0455553 Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 6 Mar 2026 20:56:04 -0800 Subject: [PATCH 36/71] configurered to pull from tools. --- tools/load_data_from_s3.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index fdb759b..9d2376e 100644 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -32,17 +32,26 @@ def main(): ) parser.add_argument( + # videos to made to tiles "--all-videos", action="store_true", help="Download all objects from the 'lowresvideo' \ prefix to /mnt/ebs/raw_vids" ) parser.add_argument( - "--all-photos", + # Images to made to mosaics + "--all-samples", action="store_true", help="Download .png and .jpeg objects from \ 'moasic-art-photos' to /mnt/ebs/samples" ) + parser.add_argument( + # Images to made to mosaics + "--all-photos", + action="store_true", + help="Download .png and .jpeg objects from \ + 'picsources' to /mnt/ebs/raw_photos" + ) # New flag for uploading results parser.add_argument( "--upload-results", @@ -77,21 +86,39 @@ def main(): os.path.join(local_vids_dir, os.path.basename(key))) # --- Logic for Photos to be turned to mosaics (Download) - if args.all_photos: + if args.all_samples: local_photos_dir = os.path.join(mount_point, "samples") os.makedirs(local_photos_dir, exist_ok=True) - print(f"Downloading photos to {local_photos_dir}...") + print(f"Downloading samples to {local_photos_dir}...") keys = s3.list_sources("mosaic-art-photos") valid_extensions = ('.png', '.jpeg', '.jpg') for key in keys: if key.lower().endswith(valid_extensions): # downloads source files from s3 + filename = os.path.basename(key) + s3.download_to_disk( + key, os.path.join(local_photos_dir, filename)) # ensures they are never larger 600 on longer side + resize_in_place(os.path.join(local_photos_dir, filename)) + + # --- Logic for Photos to be turned to mosaics (Download) + if args.all_photos: + local_photos_dir = os.path.join(mount_point, "raw_photos") + os.makedirs(local_photos_dir, exist_ok=True) + + print(f"Downloading tile photos to {local_photos_dir}...") + keys = s3.list_sources("picsources") + valid_extensions = ('.png', '.jpeg', '.jpg') + + for key in keys: + if key.lower().endswith(valid_extensions): + # downloads source files from s3 filename = os.path.basename(key) s3.download_to_disk( key, os.path.join(local_photos_dir, filename)) + # ensures no files are larger 600 on any side. resize_in_place(os.path.join(local_photos_dir, filename)) # --- Logic for Results (Upload) --- From 043b8d627f04635fb936c3b2f0b3cc5f8d36a10f Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 11:32:09 -0800 Subject: [PATCH 37/71] set splicer to actually process videos this time. --- tools/splicer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/splicer.py b/tools/splicer.py index 84244d0..a99c87c 100644 --- a/tools/splicer.py +++ b/tools/splicer.py @@ -153,11 +153,14 @@ def main(): photos = [] for root, _, files in os.walk(abs_folder): for file in files: - extension = file.split(".") + extension = file.split(".")[-1] if extension.lower() in ["png", "jpg", "jpeg"]: photos.append(os.path.join(root, file)) if len(photos) == 0: print(f'No photos found {abs_folder}') + for photo in photos: + process_image(photo, double=True, + output_dir="/mnt/ebs/frames") return # --- BRANCH 2: VIDEO PROCESSING --- From 1ef2e02a109209e3d8eddea60ca91fd2b9b003bd Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 12:12:44 -0800 Subject: [PATCH 38/71] Made sure Mosaic v2 can use suffixes for files. --- mosaicizers/mosaic-v2.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 2f584f5..51d4dad 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -27,8 +27,9 @@ EOQ_VALUE = None -def fit_tiles(work_queue, result_queue, tiles_data): - tile_fitter = TileFitterSciKit(tiles_data) +def fit_tiles(work_queue, result_queue, + tiles_data, penalty=0.2): + tile_fitter = TileFitterSciKit(tiles_data, penalty=penalty) while True: try: @@ -57,7 +58,7 @@ def update(self): def build_mosaic(result_queue, all_tile_data_large, original_img_large, - suffix): + suffix=''): mosaic = MosaicImage(original_img_large) active_workers = WORKER_COUNT @@ -80,7 +81,7 @@ def build_mosaic(result_queue, print('\nFinished, output is in', OUT_FILEPATH) -def compose(original_img, tiles): +def compose(original_img, tiles, penalty=0.2, suffix=''): print('Building mosaic, press Ctrl-C to abort...') original_img_large, original_img_small = original_img tiles_large, tiles_small = tiles @@ -95,11 +96,13 @@ def compose(original_img, tiles): try: Process(target=build_mosaic, args=( - result_queue, all_tile_data_large, original_img_large)).start() + result_queue, all_tile_data_large, + original_img_large, suffix)).start() for n in range(WORKER_COUNT): Process(target=fit_tiles, args=( - work_queue, result_queue, all_tile_data_small)).start() + work_queue, result_queue, + all_tile_data_small, penalty)).start() progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) for x in range(mosaic.x_tile_count): @@ -134,12 +137,12 @@ def show_error(msg): print('ERROR: {}'.format(msg)) -def mosaic(img_path, tiles_data): +def mosaic(img_path, tiles_data, penalty=0.2, suffix=''): """ Takes in Tiles Data as an Agrument now """ image_data = TargetImage(img_path).get_data() # tiles_data = TileProcessor(tiles_path).get_tiles() if tiles_data[0]: - compose(image_data, tiles_data) + compose(image_data, tiles_data, penalty=penalty, suffix=suffix) else: show_error("Tiles Data not propery formatted!") @@ -152,7 +155,7 @@ def restricted_float(x): raise argparse.ArgumentTypeError(f"{x} is not a \ floating-point number") - if x < 0.01 or x > 0.5: + if x < 0.0 or x > 0.5: raise argparse.ArgumentTypeError(f"{x} is not in range \ [0.01, 0.5]") return x @@ -182,11 +185,12 @@ def restricted_float(x): if you want it appended \ to the file name.") - parser.add_argument('-ratio', + parser.add_argument('-penalty', type=restricted_float, default=0.2, - help="Set the ratio (range: 0.01 to 0.5, \ - default: 0.2)") + help="Set the penalty (range: 0.0 to 0.5, \ + default: 0.2) \ + High Penalty means less repetition of tiles") args = parser.parse_args() # Current logic: Only handle the single file mode @@ -201,7 +205,8 @@ def restricted_float(x): else: # Trigger the mosaic process tiles_data = TileProcessor(tile_dir).get_tiles() - mosaic(source_image, tiles_data) + mosaic(source_image, tiles_data, + penalty=args.penalty, suffix=args.suffix) elif args.folder: abs_folder = os.path.abspath(args.folder) @@ -224,4 +229,4 @@ def restricted_float(x): continue else: # Trigger the mosaic process - mosaic(file_path, tiles_data) + mosaic(file_path, tiles_data, penalty=args.penalty) From b09ae71e63ca13d84357a7fb5d76438789d53ab0 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 12:20:02 -0800 Subject: [PATCH 39/71] import class not module. --- mosaicizers/mosaic-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 51d4dad..dad28b5 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -12,7 +12,7 @@ # These are the custom imports import MosaicImage import TargetImage -import TileProcessor +from TileProcessor import TileProcessor import TileFitterSciKit # These are now configed by CLI or class defaults From 0ccc0291e00c9cfd005955adf0487420fcb118fc Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 12:24:35 -0800 Subject: [PATCH 40/71] fixed all imports --- mosaicizers/mosaic-v2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index dad28b5..bf04486 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -10,10 +10,10 @@ from multiprocessing import Process, Queue, cpu_count # These are the custom imports -import MosaicImage -import TargetImage +from MosaicImage import MosaicImage +from TargetImage import TargetImage from TileProcessor import TileProcessor -import TileFitterSciKit +from TileFitterSciKit import TileFitterSciKit # These are now configed by CLI or class defaults TILE_SIZE = 50 # height/width of mosaic tiles in pixels From eb9135cc204da7feb06a0baa9854b7e84e0f627b Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 12:34:29 -0800 Subject: [PATCH 41/71] fixed mosaic invocation --- mosaicizers/mosaic-v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index bf04486..6e5a175 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -229,4 +229,5 @@ def restricted_float(x): continue else: # Trigger the mosaic process - mosaic(file_path, tiles_data, penalty=args.penalty) + mosaic(file_path, tiles_data, + penalty=args.penalty, suffix=args.suffix) From 48f213443c583452360567f3d0b74d6a9811c4b2 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 12:55:41 -0800 Subject: [PATCH 42/71] cannot run Tile Processor. --- mosaicizers/mosaic-v2.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 6e5a175..eaa22e7 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -217,7 +217,14 @@ def restricted_float(x): except FileNotFoundError: print(f"Error: Folder '{abs_folder}' not found.") exit(1) - tiles_data = TileProcessor(tile_dir).get_tiles() + try: + tiles_data = TileProcessor(tile_dir).get_tiles() + except FileNotFoundError: + print(f"Error: Tile directory '{tile_dir}' not found.") + exit(1) + except Exception as e: + print(f"Error running TileProcessor class '{tile_dir}': {e}") + exit(1) for file_path in samples: if not os.path.isfile(file_path): show_error(f"Unable to find image file \ From cad587db86f6214538290abcb250e32a2f827a8d Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 12:58:44 -0800 Subject: [PATCH 43/71] no errors, but not processing correctly. --- mosaicizers/mosaic-v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index eaa22e7..4bfb3e4 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -218,7 +218,8 @@ def restricted_float(x): print(f"Error: Folder '{abs_folder}' not found.") exit(1) try: - tiles_data = TileProcessor(tile_dir).get_tiles() + tp = TileProcessor(tile_dir) + tiles_data = tp.get_tiles() except FileNotFoundError: print(f"Error: Tile directory '{tile_dir}' not found.") exit(1) From dc5e3f12a535748cf29bb6a26733080db6339db3 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 13:17:59 -0800 Subject: [PATCH 44/71] I think this will work now. --- mosaicizers/TileProcessor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py index 95b33fc..eae3dd4 100644 --- a/mosaicizers/TileProcessor.py +++ b/mosaicizers/TileProcessor.py @@ -4,9 +4,10 @@ class TileProcessor: def __init__(self, tiles_directory, tile_size=50, tile_res=5): + bottom = max(min(tile_res, tile_size), 1) self.tiles_directory = tiles_directory self.tile_size = tile_size, - self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) + self.tile_block_size = tile_size / bottom self.tile_res = tile_res def get_average_color(self, img_path_or_obj): From 9a3f9cd6dbaf068b530862a1ef5836c64b0b34ec Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 13:28:55 -0800 Subject: [PATCH 45/71] Tile Processer still not working. --- mosaicizers/TileProcessor.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py index eae3dd4..90c81b8 100644 --- a/mosaicizers/TileProcessor.py +++ b/mosaicizers/TileProcessor.py @@ -4,10 +4,9 @@ class TileProcessor: def __init__(self, tiles_directory, tile_size=50, tile_res=5): - bottom = max(min(tile_res, tile_size), 1) self.tiles_directory = tiles_directory - self.tile_size = tile_size, - self.tile_block_size = tile_size / bottom + self.tile_size = tile_size + self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) self.tile_res = tile_res def get_average_color(self, img_path_or_obj): @@ -53,7 +52,9 @@ def __process_tile(self, tile_path): return (large_tile_img.convert('RGB'), small_tile_img.convert('RGB')) - except Exception: + except Exception as e: + print(f"Error processing tile '{tile_path}': {e}") + exit(1) return (None, None) def get_tiles(self): From 0a87230ba647ee0166ab47ff0aa50465e4903ce2 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 14:08:23 -0800 Subject: [PATCH 46/71] adding an easy downloader tool. --- downloads.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 downloads.py diff --git a/downloads.py b/downloads.py new file mode 100644 index 0000000..2646051 --- /dev/null +++ b/downloads.py @@ -0,0 +1,56 @@ +################################################## +# This script is intend to be run locally to get +# resulting images from s3. +################################################### + +import boto3 +import os +import argparse + + +def download_s3_folder(bucket_name, prefix, local_dir, region): + s3 = boto3.client('s3', region_name=region) + + paginator = s3.get_paginator('list_objects_v2') + params = {'Bucket': bucket_name, 'Prefix': prefix} + + os.makedirs(local_dir, exist_ok=True) + + for page in paginator.paginate(**params): + if 'Contents' not in page: + print("No files found with the given prefix.") + return + for obj in page['Contents']: + key = obj['Key'] + if key.endswith('/'): + # Skip folders + continue + + # Create the full local path + relative_path = os.path.relpath(key, prefix) + local_file_path = os.path.join(local_dir, relative_path) + local_file_dir = os.path.dirname(local_file_path) + os.makedirs(local_file_dir, exist_ok=True) + + print(f"Downloading {key} to {local_file_path}") + s3.download_file(bucket_name, key, local_file_path) + + +def main(): + parser = argparse.ArgumentParser(description='Download S3 \ + files to local directory.') + parser.add_argument('--bucket', + required=True, help='Name of the S3 bucket.') + parser.add_argument('--region', + default='us-west-2', + help='AWS region of the S3 bucket. Default: us-west-2') + parser.add_argument('--prefix', required=True, + help='Prefix (folder) in S3 to download.') + + args = parser.parse_args() + + download_s3_folder(args.bucket, args.prefix, 'test-results', args.region) + + +if __name__ == "__main__": + main() From e8621b1a922bd796599f627de25b8454f2d10874 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 7 Mar 2026 19:56:39 -0800 Subject: [PATCH 47/71] expanded on Mosaic Class so that assemble is method. Legacy funcationalty still here. --- mosaicizers/MosaicImage.py | 23 ++++++++++++++ mosaicizers/mosaic-v2.py | 65 +++++++++++++++++++++++--------------- 2 files changed, 62 insertions(+), 26 deletions(-) diff --git a/mosaicizers/MosaicImage.py b/mosaicizers/MosaicImage.py index e157497..1628143 100644 --- a/mosaicizers/MosaicImage.py +++ b/mosaicizers/MosaicImage.py @@ -42,3 +42,26 @@ def save(self, output_dir='/mnt/ebs/mosaics', suffix=''): print(f"Mosaic saved to: {final_path}") return final_path + + def assemble(self, result_queue, all_tile_data_large, + worker_count): + """ + Monitors the result_queue and assembles the image in real-time. + Replaces the standalone build_mosaic function. + """ + print('\nAssembling mosaic blocks...') + active_workers = worker_count + EOQ_VALUE = None # Sentinel value to indicate end of queue + + while active_workers > 0: + try: + img_coords, best_fit_tile_index = result_queue.get() + + if img_coords == EOQ_VALUE: + active_workers -= 1 + else: + tile_data = all_tile_data_large[best_fit_tile_index] + self.add_tile(tile_data, img_coords) + except KeyboardInterrupt: + print('\nInterrupt detected, saving progress...') + break diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 4bfb3e4..977478b 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -86,6 +86,7 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): original_img_large, original_img_small = original_img tiles_large, tiles_small = tiles + # 1. Initialize our mosaic object mosaic = MosaicImage(original_img_large) all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] @@ -94,43 +95,55 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): work_queue = Queue(WORKER_COUNT) result_queue = Queue() - try: - Process(target=build_mosaic, args=( - result_queue, all_tile_data_large, - original_img_large, suffix)).start() - - for n in range(WORKER_COUNT): - Process(target=fit_tiles, args=( - work_queue, result_queue, - all_tile_data_small, penalty)).start() + # 2. Start the computational WORKERS only + worker_pool = [] + for n in range(WORKER_COUNT): + p = Process(target=fit_tiles, args=( + work_queue, result_queue, + all_tile_data_small, penalty)) + p.start() + worker_pool.append(p) + try: + # 3. Phase 1: Dispatch work (The Producer) progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) for x in range(mosaic.x_tile_count): for y in range(mosaic.y_tile_count): - large_box = ( - x * TILE_SIZE, - y * TILE_SIZE, - (x + 1) * TILE_SIZE, - (y + 1) * TILE_SIZE - ) - small_box = ( - x * TILE_SIZE / TILE_BLOCK_SIZE, - y * TILE_SIZE / TILE_BLOCK_SIZE, - (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, - (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE - ) + # ... [Your existing cropping logic here] ... + large_box = (x * TILE_SIZE, y * TILE_SIZE, + (x + 1) * TILE_SIZE, (y + 1) * TILE_SIZE) + small_box = (x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE) + work_queue.put( (list(original_img_small.crop(small_box).getdata()), - large_box) - ) - progress.update() # process updates on every x completion. + large_box)) + progress.update() + + # 4. Phase 2: Collect and Paste (The Consumer) + # We call this in the MAIN process. It will block here until + # the workers finish sending results through the result_queue. + mosaic.assemble(result_queue, all_tile_data_large, + WORKER_COUNT, suffix=suffix) + mosaic.save(suffix=suffix) except KeyboardInterrupt: print('\nHalting, saving partial image please wait...') - - finally: + # We tell the workers to stop for n in range(WORKER_COUNT): work_queue.put((EOQ_VALUE, EOQ_VALUE)) + # Optional: assemble what you have. + mosaic.assemble(result_queue, all_tile_data_large, + WORKER_COUNT, suffix=suffix) + mosaic.save(suffix=suffix) + + finally: + # Ensure workers are cleaned up + for p in worker_pool: + if p.is_alive(): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) def show_error(msg): From 5fe09916f9125f2e78a86670bf4dd390570ce5a2 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 08:44:17 -0700 Subject: [PATCH 48/71] added option for s3 data tool to get a smaller sample of videos, pics, and sources. --- tools/load_data_from_s3.py | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index 9d2376e..a3f5675 100644 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -3,6 +3,7 @@ import sys from PIL import Image from s3_access import S3Access +from random import shuffle def resize_in_place(file_path, max_dimension=600): @@ -38,6 +39,13 @@ def main(): help="Download all objects from the 'lowresvideo' \ prefix to /mnt/ebs/raw_vids" ) + parser.add_argument( + # videos to made to tiles + "--some-videos", + action="store_true", + help="Download random 15 videos from the 'lowresvideo' \ + prefix to /mnt/ebs/raw_vids" + ) parser.add_argument( # Images to made to mosaics "--all-samples", @@ -52,6 +60,13 @@ def main(): help="Download .png and .jpeg objects from \ 'picsources' to /mnt/ebs/raw_photos" ) + parser.add_argument( + # Images to made to mosaics + "--some-photos", + action="store_true", + help="Download a random 100 .png and .jpeg objects from \ + 'picsources' to /mnt/ebs/raw_photos" + ) # New flag for uploading results parser.add_argument( "--upload-results", @@ -85,6 +100,21 @@ def main(): key, os.path.join(local_vids_dir, os.path.basename(key))) + if args.some_videos: + """ Grabs a random 15 videos, rather than all" """ + local_vids_dir = os.path.join(mount_point, "raw_vids") + os.makedirs(local_vids_dir, exist_ok=True) + + print(f"Downloading videos to {local_vids_dir}...") + keys = s3.list_sources("lowresvideo") + shuffle(keys) + keys = keys[:15] + for key in keys: + if os.path.basename(key): + s3.download_to_disk( + key, + os.path.join(local_vids_dir, os.path.basename(key))) + # --- Logic for Photos to be turned to mosaics (Download) if args.all_samples: local_photos_dir = os.path.join(mount_point, "samples") @@ -121,6 +151,27 @@ def main(): # ensures no files are larger 600 on any side. resize_in_place(os.path.join(local_photos_dir, filename)) + # --- Logic for Photos to be turned to mosaics (Download) + if args.some_photos: + """ Grabs a random 100 photos, rather than all" """ + local_photos_dir = os.path.join(mount_point, "raw_photos") + os.makedirs(local_photos_dir, exist_ok=True) + + print(f"Downloading tile photos to {local_photos_dir}...") + keys = s3.list_sources("picsources") + shuffle(keys) + keys = keys[:100] + valid_extensions = ('.png', '.jpeg', '.jpg') + + for key in keys: + if key.lower().endswith(valid_extensions): + # downloads source files from s3 + filename = os.path.basename(key) + s3.download_to_disk( + key, os.path.join(local_photos_dir, filename)) + # ensures no files are larger 600 on any side. + resize_in_place(os.path.join(local_photos_dir, filename)) + # --- Logic for Results (Upload) --- if args.upload_results: local_mosaics_dir = os.path.join(mount_point, "mosaics") From 95b29ea533efe07b7eec2ad1abde53b73428ea01 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 08:47:32 -0700 Subject: [PATCH 49/71] also added option to grab only three sources for mosaics. --- tools/load_data_from_s3.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index a3f5675..3da8d13 100644 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -53,6 +53,13 @@ def main(): help="Download .png and .jpeg objects from \ 'moasic-art-photos' to /mnt/ebs/samples" ) + parser.add_argument( + # Images to made to mosaics + "--some-samples", + action="store_true", + help="Download .png and .jpeg objects from \ + 'moasic-art-photos' to /mnt/ebs/samples" + ) parser.add_argument( # Images to made to mosaics "--all-photos", @@ -134,6 +141,27 @@ def main(): resize_in_place(os.path.join(local_photos_dir, filename)) # --- Logic for Photos to be turned to mosaics (Download) + if args.some_samples: + """ Limits to just three from the 'mosaic-art-photos' """ + local_photos_dir = os.path.join(mount_point, "samples") + os.makedirs(local_photos_dir, exist_ok=True) + + print(f"Downloading samples to {local_photos_dir}...") + keys = s3.list_sources("mosaic-art-photos") + valid_extensions = ('.png', '.jpeg', '.jpg') + shuffle(keys) + keys = keys[:3] + + for key in keys: + if key.lower().endswith(valid_extensions): + # downloads source files from s3 + filename = os.path.basename(key) + s3.download_to_disk( + key, os.path.join(local_photos_dir, filename)) + # ensures they are never larger 600 on longer side + resize_in_place(os.path.join(local_photos_dir, filename)) + + # --- Logic for Photos to be used as tiles (Download) if args.all_photos: local_photos_dir = os.path.join(mount_point, "raw_photos") os.makedirs(local_photos_dir, exist_ok=True) From 7f565d2c5b87db68223b8db7118563f911e5d386 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 13:50:29 -0700 Subject: [PATCH 50/71] have set up TileFitterSciKiet and Mosaic Classes to use methods now. --- mosaicizers/TileFitterSciKit.py | 17 +++++++++++++++-- mosaicizers/mosaic-v2.py | 32 +++----------------------------- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/mosaicizers/TileFitterSciKit.py b/mosaicizers/TileFitterSciKit.py index a87c2db..86a437f 100644 --- a/mosaicizers/TileFitterSciKit.py +++ b/mosaicizers/TileFitterSciKit.py @@ -24,6 +24,20 @@ def __init__(self, tiles_data, match_res=5, penalty=0.02): self.tree = KDTree(np.array(avg_colors)) print("KDTree + SSIM Hybrid Fitter Ready.") + def fit_tiles(self, work_queue, result_queue): + EOQ_VALUE = None # Sentinel + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = self.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass + + result_queue.put((EOQ_VALUE, EOQ_VALUE)) + def get_best_fit_tile(self, img_data): """ img_data: A flat list of pixels (from original code's getdata()) @@ -34,9 +48,8 @@ def get_best_fit_tile(self, img_data): (self.match_res, self.match_res, 3)) # Step 1: KDTree Pruning (The "Bucket" step) - # Find the top 40 color matches target_avg = target_np.mean(axis=(0, 1)) - _, indices = self.tree.query(target_avg, k=40) + _, indices = self.tree.query(target_avg, k=100) best_score = -1 best_fit_tile_index = indices[0] diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 977478b..5260143 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -55,32 +55,6 @@ def update(self): flush=True, end='\r') -def build_mosaic(result_queue, - all_tile_data_large, - original_img_large, - suffix=''): - mosaic = MosaicImage(original_img_large) - - active_workers = WORKER_COUNT - while True: - try: - img_coords, best_fit_tile_index = result_queue.get() - - if img_coords == EOQ_VALUE: - active_workers -= 1 - if not active_workers: - break - else: - tile_data = all_tile_data_large[best_fit_tile_index] - mosaic.add_tile(tile_data, img_coords) - - except KeyboardInterrupt: - pass - - OUT_FILEPATH = mosaic.save(suffix=suffix) - print('\nFinished, output is in', OUT_FILEPATH) - - def compose(original_img, tiles, penalty=0.2, suffix=''): print('Building mosaic, press Ctrl-C to abort...') original_img_large, original_img_small = original_img @@ -98,9 +72,9 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): # 2. Start the computational WORKERS only worker_pool = [] for n in range(WORKER_COUNT): - p = Process(target=fit_tiles, args=( - work_queue, result_queue, - all_tile_data_small, penalty)) + fitter = TileFitterSciKit(all_tile_data_small, penalty=penalty) + p = Process(target=fitter.fit_tiles, args=( + work_queue, result_queue)) p.start() worker_pool.append(p) From 5d132014f09beaad54fc4fa1fa1199163211ab5a Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 14:23:52 -0700 Subject: [PATCH 51/71] removed an extra suffix argument ; --- mosaicizers/mosaic-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 5260143..ecdc496 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -100,7 +100,7 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): # We call this in the MAIN process. It will block here until # the workers finish sending results through the result_queue. mosaic.assemble(result_queue, all_tile_data_large, - WORKER_COUNT, suffix=suffix) + WORKER_COUNT) mosaic.save(suffix=suffix) except KeyboardInterrupt: From c4fb4e50d3f221b730905b55bb3014d8056d5476 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 14:42:12 -0700 Subject: [PATCH 52/71] removed a deadlock. --- mosaicizers/mosaic-v2.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index ecdc496..627beae 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -66,7 +66,7 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] - work_queue = Queue(WORKER_COUNT) + work_queue = Queue() result_queue = Queue() # 2. Start the computational WORKERS only @@ -99,19 +99,12 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): # 4. Phase 2: Collect and Paste (The Consumer) # We call this in the MAIN process. It will block here until # the workers finish sending results through the result_queue. - mosaic.assemble(result_queue, all_tile_data_large, - WORKER_COUNT) - mosaic.save(suffix=suffix) except KeyboardInterrupt: print('\nHalting, saving partial image please wait...') # We tell the workers to stop for n in range(WORKER_COUNT): work_queue.put((EOQ_VALUE, EOQ_VALUE)) - # Optional: assemble what you have. - mosaic.assemble(result_queue, all_tile_data_large, - WORKER_COUNT, suffix=suffix) - mosaic.save(suffix=suffix) finally: # Ensure workers are cleaned up @@ -119,6 +112,10 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): if p.is_alive(): work_queue.put((EOQ_VALUE, EOQ_VALUE)) + mosaic.assemble(result_queue, all_tile_data_large, + WORKER_COUNT) + mosaic.save(suffix=suffix) + def show_error(msg): print('ERROR: {}'.format(msg)) From df4b265f99aca7dd12da70876b0dad64fcc068f5 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 15:08:43 -0700 Subject: [PATCH 53/71] updated functionality of progress counter. --- mosaicizers/MosaicImage.py | 3 +++ mosaicizers/ProgressCounter.py | 9 +++++++++ mosaicizers/mosaic-v2.py | 31 ++----------------------------- 3 files changed, 14 insertions(+), 29 deletions(-) create mode 100644 mosaicizers/ProgressCounter.py diff --git a/mosaicizers/MosaicImage.py b/mosaicizers/MosaicImage.py index 1628143..0d98630 100644 --- a/mosaicizers/MosaicImage.py +++ b/mosaicizers/MosaicImage.py @@ -2,6 +2,7 @@ import os import hashlib from PIL import Image +from ProgressCounter import ProgressCounter class MosaicImage: @@ -50,6 +51,7 @@ def assemble(self, result_queue, all_tile_data_large, Replaces the standalone build_mosaic function. """ print('\nAssembling mosaic blocks...') + progress = ProgressCounter(self.total_tiles) active_workers = worker_count EOQ_VALUE = None # Sentinel value to indicate end of queue @@ -62,6 +64,7 @@ def assemble(self, result_queue, all_tile_data_large, else: tile_data = all_tile_data_large[best_fit_tile_index] self.add_tile(tile_data, img_coords) + progress.update() except KeyboardInterrupt: print('\nInterrupt detected, saving progress...') break diff --git a/mosaicizers/ProgressCounter.py b/mosaicizers/ProgressCounter.py new file mode 100644 index 0000000..7c980a6 --- /dev/null +++ b/mosaicizers/ProgressCounter.py @@ -0,0 +1,9 @@ +class ProgressCounter: + def __init__(self, total): + self.total = total + self.counter = 0 + + def update(self): + self.counter += 1 + print("Progress: {:04.1f}%".format(100 * self.counter / self.total), + flush=True, end='\r') diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index 627beae..d933f96 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -14,6 +14,7 @@ from TargetImage import TargetImage from TileProcessor import TileProcessor from TileFitterSciKit import TileFitterSciKit +from ProgressCounter import ProgressCounter # These are now configed by CLI or class defaults TILE_SIZE = 50 # height/width of mosaic tiles in pixels @@ -27,34 +28,6 @@ EOQ_VALUE = None -def fit_tiles(work_queue, result_queue, - tiles_data, penalty=0.2): - tile_fitter = TileFitterSciKit(tiles_data, penalty=penalty) - - while True: - try: - img_data, img_coords = work_queue.get(True) - if img_data == EOQ_VALUE: - break - tile_index = tile_fitter.get_best_fit_tile(img_data) - result_queue.put((img_coords, tile_index)) - except KeyboardInterrupt: - pass - - result_queue.put((EOQ_VALUE, EOQ_VALUE)) - - -class ProgressCounter: - def __init__(self, total): - self.total = total - self.counter = 0 - - def update(self): - self.counter += 1 - print("Progress: {:04.1f}%".format(100 * self.counter / self.total), - flush=True, end='\r') - - def compose(original_img, tiles, penalty=0.2, suffix=''): print('Building mosaic, press Ctrl-C to abort...') original_img_large, original_img_small = original_img @@ -94,7 +67,7 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): work_queue.put( (list(original_img_small.crop(small_box).getdata()), large_box)) - progress.update() + progress.update() # 4. Phase 2: Collect and Paste (The Consumer) # We call this in the MAIN process. It will block here until From a7c9069cfb8393e1ea2167a283cf47bd08db91cf Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 15:25:16 -0700 Subject: [PATCH 54/71] added a stat tracker to get vibes on matching. --- mosaicizers/TileFitterSciKit.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mosaicizers/TileFitterSciKit.py b/mosaicizers/TileFitterSciKit.py index 86a437f..d8f75bd 100644 --- a/mosaicizers/TileFitterSciKit.py +++ b/mosaicizers/TileFitterSciKit.py @@ -10,6 +10,7 @@ def __init__(self, tiles_data, match_res=5, penalty=0.02): self.usages = [0.0 for x in range(len(tiles_data))] self.tiles_data = tiles_data self.match_res = match_res + self.winners = np.array([], dtype=float) print("Initializing KDTree for hybrid search...") # 1. Convert tiles to NumPy arrays once @@ -36,6 +37,8 @@ def fit_tiles(self, work_queue, result_queue): except KeyboardInterrupt: pass + self.get_winning_stats() # print stats + # Signal that this worker is done result_queue.put((EOQ_VALUE, EOQ_VALUE)) def get_best_fit_tile(self, img_data): @@ -51,6 +54,7 @@ def get_best_fit_tile(self, img_data): target_avg = target_np.mean(axis=(0, 1)) _, indices = self.tree.query(target_avg, k=100) + best_raw_score = float('-inf') best_score = -1 best_fit_tile_index = indices[0] @@ -77,11 +81,28 @@ def get_best_fit_tile(self, img_data): if score > best_score: best_score = score best_fit_tile_index = idx + best_raw_score = score # Early exit if we find an amazing match if score > 0.98: + best_raw_score = score break + self.winners = np.append(self.winners, best_raw_score) self.usages[best_fit_tile_index] = \ self.usages[best_fit_tile_index] + self.penalty return best_fit_tile_index + + def get_winning_stats(self): + """Prints statistical analysis of the tile match quality.""" + if self.winners.size == 0: + print("No winners recorded yet.") + return + + print("\n--- Tile Match Quality Stats ---") + print(f"Total Blocks Matched: {len(self.winners)}") + print(f"Mean SSIM: {np.mean(self.winners):.4f}") + print(f"Median SSIM: {np.median(self.winners):.4f}") + print(f"Highest SSIM: {np.max(self.winners):.4f} (Best Match)") + print(f"Lowest SSIM: {np.min(self.winners):.4f} (Worst Match)") + print("--------------------------------\n") From 7b267bc3e6651c8a04f823b24a4d8cd7599daa8a Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 8 Mar 2026 15:40:20 -0700 Subject: [PATCH 55/71] optimizing for caching --- mosaicizers/TileProcessor.py | 37 ++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py index 90c81b8..4ee0498 100644 --- a/mosaicizers/TileProcessor.py +++ b/mosaicizers/TileProcessor.py @@ -1,13 +1,38 @@ import os +import pickle +import gzip from PIL import Image, ImageOps class TileProcessor: - def __init__(self, tiles_directory, tile_size=50, tile_res=5): + def __init__(self, tiles_directory, + tile_size=50, tile_res=5, + cache_file='/mnt/ebs/TILES_DATA.pkl.gz'): self.tiles_directory = tiles_directory self.tile_size = tile_size self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) self.tile_res = tile_res + self.cache_file = cache_file + + def tiles_save(self, data): + """Saves processed tile data to a compressed pickle file.""" + print(f"Caching processed tiles to {self.cache_file}...") + try: + with gzip.open(self.cache_file, 'wb') as f: + pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) + print("Cache saved successfully.") + except Exception as e: + print(f"Failed to save cache: {e}") + + def tiles_load(self): + """Loads processed tile data from the compressed pickle file.""" + print(f"Loading cached tiles from {self.cache_file}...") + try: + with gzip.open(self.cache_file, 'rb') as f: + return pickle.load(f) + except Exception as e: + print(f"Failed to load cache: {e}") + return None def get_average_color(self, img_path_or_obj): """ @@ -64,6 +89,12 @@ def get_tiles(self): exp_threshold = 1 # for logging print('Reading tiles from {}...'.format(self.tiles_directory)) + if os.path.exists(self.cache_file): + data = self.tiles_load() + if data: + print(f"Loaded {len(data[0])} tiles from cache.") + return data + # search the tiles directory recursively for root, subFolders, files in os.walk(self.tiles_directory): for tile_name in files: @@ -79,4 +110,6 @@ def get_tiles(self): print('Processed {} tiles.'.format(len(large_tiles))) - return (large_tiles, small_tiles) + data = (large_tiles, small_tiles) + self.tiles_save(data) + return data From c58e0654c8644db97c133993effe4a1cc9878c91 Mon Sep 17 00:00:00 2001 From: Joel Gonzaga Date: Sat, 21 Mar 2026 18:33:20 -0700 Subject: [PATCH 56/71] Refactor (#3) * refactored upload tool * I demand a fiddle folder * added a new advanced parsing tool. * refactored splicer for more flexibility --- .gitignore | 5 +- tools/advanced_parse.py | 141 ++++++++++++++++++++ tools/load_data_from_s3.py | 264 +++++++++++++------------------------ tools/splicer.py | 28 +++- 4 files changed, 262 insertions(+), 176 deletions(-) create mode 100644 tools/advanced_parse.py diff --git a/.gitignore b/.gitignore index d7d111a..dd3ea2d 100644 --- a/.gitignore +++ b/.gitignore @@ -220,4 +220,7 @@ __pycache__ secrets.txt test-media/ test-results/ -logs/ \ No newline at end of file +logs/ + +# fiddle and experimentation files +fiddle/ \ No newline at end of file diff --git a/tools/advanced_parse.py b/tools/advanced_parse.py new file mode 100644 index 0000000..a0d97c2 --- /dev/null +++ b/tools/advanced_parse.py @@ -0,0 +1,141 @@ +######################################################################## +# The image parse makes crops based on random areas of an image +# It takes 40% area crops, and then finds the most complex +# and the most smooth. Using SSIM to identify structural complexity. +# The output is two 50x50 tiles per image, one complex and one smooth. +######################################################################### + +import cv2 +import numpy as np +import argparse +import os +import hashlib +import random +from skimage.metrics import structural_similarity as ssim +from concurrent.futures import ProcessPoolExecutor, as_completed +import multiprocessing + + +def get_image_md5(img): + """Generates an MD5 hash of the image pixel data.""" + return hashlib.md5(img.tobytes()).hexdigest() + + +def get_random_crops(img, num_crops=10): + """Extracts N square crops of a 40% shorter dimension.""" + h, w = img.shape[:2] + crop_size = int(min(h, w) * 0.4) + + crops = [] + for _ in range(num_crops): + y = np.random.randint(0, h - crop_size + 1) + x = np.random.randint(0, w - crop_size + 1) + crops.append(img[y:y+crop_size, x:x+crop_size]) + return crops + + +def batch_process(source_folder, out_dir, limit=None): + valid_exts = ('.jpg', '.jpeg', '.png') + files = [ + os.path.join(source_folder, f) for f in os.listdir(source_folder) + if f.lower().endswith(valid_exts) + ] + + random.shuffle(files) + if limit: + files = files[:limit] + + num_cpus = multiprocessing.cpu_count() + print(f"Launching parallel processing on {num_cpus} CPUs...") + + with ProcessPoolExecutor(max_workers=num_cpus) as executor: + futures = {executor.submit(process_image, f, out_dir): f for f in files} # noqa: E501 + + for future in as_completed(futures): + result = future.result() + print(result) + + +def analyze_structural_complexity(crop): + """ + Returns an SSIM score comparing the crop to a blurred version of itself. + High Score (~1.0): Smooth / Low Detail + Low Score (<0.5): Complex / High Detail + """ + gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) + # 25x25 kernel provides a significant enough blur to measure structure loss + blurred = cv2.bilateralFilter(gray, 9, 75, 75) + return ssim(gray, blurred) + + +def save_tile(crop, output_dir, suffix): + """Resizes to 50x50, hashes, and saves to disk.""" + tile_50x50 = cv2.resize(crop, (50, 50), interpolation=cv2.INTER_AREA) + file_hash = get_image_md5(tile_50x50) + + filename = f"{file_hash}_{suffix}.png" + save_path = os.path.join(output_dir, filename) + + cv2.imwrite(save_path, tile_50x50) + return save_path + + +def process_image(image_path, out_dir): + img = cv2.imread(image_path) + if img is None: + print(f"Error: Could not load {image_path}") + return + + # 1. Generate candidate crops + candidates = get_random_crops(img, num_crops=15) + + # 2. Score candidates + # Store as list of (score, crop) tuples + scored_crops = [(analyze_structural_complexity(c), c) for c in candidates] + + # 3. Identify best matches + scored_crops.sort(key=lambda x: x[0]) + + most_complex_crop = scored_crops[0][1] + most_smooth_crop = scored_crops[-1][1] + + # 4. Save results + output_dir = out_dir + os.makedirs(output_dir, exist_ok=True) + + path_c = save_tile(most_complex_crop, output_dir, "complex") + path_s = save_tile(most_smooth_crop, output_dir, "smooth") + + print(f"Success! \nComplex: {path_c} \nSmooth: {path_s}") + + +def main(): + parser = argparse.ArgumentParser(description="Tile extractor \ + with batch support.") + + parser.add_argument("--input", type=str, + help="Path to a single input image") + parser.add_argument("--source-folder", type=str, + help="Folder containing images to process") + parser.add_argument("--out-dir", type=str, + default="test_tiles", help="Where to save tiles") + parser.add_argument("--limit", type=int, + help="Max number of images to process from folder") + + args = parser.parse_args() + + if args.source_folder: + if not os.path.isdir(args.source_folder): + print(f"Error: {args.source_folder} is not a directory.") + return + batch_process(args.source_folder, args.out_dir, args.limit) + + elif args.input: + process_image(args.input, args.out_dir) + + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index 3da8d13..2bdf92d 100644 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -32,59 +32,44 @@ def main(): description="Transfer assets between S3 and EBS storage." ) - parser.add_argument( - # videos to made to tiles - "--all-videos", - action="store_true", - help="Download all objects from the 'lowresvideo' \ - prefix to /mnt/ebs/raw_vids" + # Create a mutually exclusive group so only one task happens at a time + task_group = parser.add_mutually_exclusive_group(required=True) + + task_group.add_argument( + "--videos", action="store_true", + help="Download video assets" ) - parser.add_argument( - # videos to made to tiles - "--some-videos", - action="store_true", - help="Download random 15 videos from the 'lowresvideo' \ - prefix to /mnt/ebs/raw_vids" + task_group.add_argument( + "--samples", action="store_true", + help="Download sample images (to be turned into mosaics)" ) - parser.add_argument( - # Images to made to mosaics - "--all-samples", - action="store_true", - help="Download .png and .jpeg objects from \ - 'moasic-art-photos' to /mnt/ebs/samples" + task_group.add_argument( + "--photos", action="store_true", + help="Download tile photos (sources for mosaic tiles)" ) - parser.add_argument( - # Images to made to mosaics - "--some-samples", - action="store_true", - help="Download .png and .jpeg objects from \ - 'moasic-art-photos' to /mnt/ebs/samples" + task_group.add_argument( + "--upload-results", action="store_true", + help="Upload local mosaics to S3" ) + + # Shared modifiers parser.add_argument( - # Images to made to mosaics - "--all-photos", - action="store_true", - help="Download .png and .jpeg objects from \ - 'picsources' to /mnt/ebs/raw_photos" + "--limit", type=int, default=None, + help="Limit the number of files downloaded \ + (e.g., 15 for videos, 3 for samples, 100 for photos)" ) parser.add_argument( - # Images to made to mosaics - "--some-photos", - action="store_true", - help="Download a random 100 .png and .jpeg objects from \ - 'picsources' to /mnt/ebs/raw_photos" + "--out-dir", type=str, default=None, + help="Override the default local directory" ) - # New flag for uploading results parser.add_argument( - "--upload-results", - action="store_true", - help="Upload all images from /mnt/ebs/mosaics \ - to the S3 'mosaics' prefix" + "--prefix", type=str, default=None, + help="Override the default S3 prefix (folder) to pull from" ) args = parser.parse_args() - # Configuration from environment and paths + # Configuration bucket_name = os.environ.get("S3_STORAGE") mount_point = "/mnt/ebs" @@ -93,137 +78,76 @@ def main(): sys.exit(1) s3 = S3Access(bucket_name) - - # --- Logic for Videos (Download) --- - if args.all_videos: - local_vids_dir = os.path.join(mount_point, "raw_vids") - os.makedirs(local_vids_dir, exist_ok=True) - - print(f"Downloading videos to {local_vids_dir}...") - keys = s3.list_sources("lowresvideo") - for key in keys: - if os.path.basename(key): - s3.download_to_disk( - key, - os.path.join(local_vids_dir, os.path.basename(key))) - - if args.some_videos: - """ Grabs a random 15 videos, rather than all" """ - local_vids_dir = os.path.join(mount_point, "raw_vids") - os.makedirs(local_vids_dir, exist_ok=True) - - print(f"Downloading videos to {local_vids_dir}...") - keys = s3.list_sources("lowresvideo") - shuffle(keys) - keys = keys[:15] - for key in keys: - if os.path.basename(key): - s3.download_to_disk( - key, - os.path.join(local_vids_dir, os.path.basename(key))) - - # --- Logic for Photos to be turned to mosaics (Download) - if args.all_samples: - local_photos_dir = os.path.join(mount_point, "samples") - os.makedirs(local_photos_dir, exist_ok=True) - - print(f"Downloading samples to {local_photos_dir}...") - keys = s3.list_sources("mosaic-art-photos") - valid_extensions = ('.png', '.jpeg', '.jpg') - - for key in keys: - if key.lower().endswith(valid_extensions): - # downloads source files from s3 - filename = os.path.basename(key) - s3.download_to_disk( - key, os.path.join(local_photos_dir, filename)) - # ensures they are never larger 600 on longer side - resize_in_place(os.path.join(local_photos_dir, filename)) - - # --- Logic for Photos to be turned to mosaics (Download) - if args.some_samples: - """ Limits to just three from the 'mosaic-art-photos' """ - local_photos_dir = os.path.join(mount_point, "samples") - os.makedirs(local_photos_dir, exist_ok=True) - - print(f"Downloading samples to {local_photos_dir}...") - keys = s3.list_sources("mosaic-art-photos") - valid_extensions = ('.png', '.jpeg', '.jpg') - shuffle(keys) - keys = keys[:3] - - for key in keys: - if key.lower().endswith(valid_extensions): - # downloads source files from s3 - filename = os.path.basename(key) - s3.download_to_disk( - key, os.path.join(local_photos_dir, filename)) - # ensures they are never larger 600 on longer side - resize_in_place(os.path.join(local_photos_dir, filename)) - - # --- Logic for Photos to be used as tiles (Download) - if args.all_photos: - local_photos_dir = os.path.join(mount_point, "raw_photos") - os.makedirs(local_photos_dir, exist_ok=True) - - print(f"Downloading tile photos to {local_photos_dir}...") - keys = s3.list_sources("picsources") - valid_extensions = ('.png', '.jpeg', '.jpg') - - for key in keys: - if key.lower().endswith(valid_extensions): - # downloads source files from s3 - filename = os.path.basename(key) - s3.download_to_disk( - key, os.path.join(local_photos_dir, filename)) - # ensures no files are larger 600 on any side. - resize_in_place(os.path.join(local_photos_dir, filename)) - - # --- Logic for Photos to be turned to mosaics (Download) - if args.some_photos: - """ Grabs a random 100 photos, rather than all" """ - local_photos_dir = os.path.join(mount_point, "raw_photos") - os.makedirs(local_photos_dir, exist_ok=True) - - print(f"Downloading tile photos to {local_photos_dir}...") - keys = s3.list_sources("picsources") + valid_extensions = ('.png', '.jpeg', '.jpg') + + # --- Logic Based Assignment for Defaults --- + if args.videos: + local_dir = args.out_dir or os.path.join(mount_point, "raw_vids") + prefix = args.prefix or "lowresvideo" + limit = args.limit or (15 if "limit" in sys.argv else None) + is_image = False + + elif args.samples: + local_dir = args.out_dir or os.path.join(mount_point, "samples") + prefix = args.prefix or "mosaic-art-photos" + limit = args.limit or (3 if "limit" in sys.argv else None) + is_image = True + + elif args.photos: + local_dir = args.out_dir or os.path.join(mount_point, "raw_photos") + prefix = args.prefix or "picsources" + limit = args.limit or (100 if "limit" in sys.argv else None) + is_image = True + + elif args.upload_results: + # Upload logic is slightly different + local_dir = args.out_dir or os.path.join(mount_point, "mosaics") + handle_upload(s3, local_dir, bucket_name) + return + + # --- Unified Download Execution --- + os.makedirs(local_dir, exist_ok=True) + print(f"Syncing S3 '{prefix}' to {local_dir}...") + + keys = s3.list_sources(prefix) + + if limit: shuffle(keys) - keys = keys[:100] - valid_extensions = ('.png', '.jpeg', '.jpg') - - for key in keys: - if key.lower().endswith(valid_extensions): - # downloads source files from s3 - filename = os.path.basename(key) - s3.download_to_disk( - key, os.path.join(local_photos_dir, filename)) - # ensures no files are larger 600 on any side. - resize_in_place(os.path.join(local_photos_dir, filename)) - - # --- Logic for Results (Upload) --- - if args.upload_results: - local_mosaics_dir = os.path.join(mount_point, "mosaics") - - if not os.path.exists(local_mosaics_dir): - print(f"Skipping upload: {local_mosaics_dir} does not exist.") - else: - print(f"Uploading mosaic results \ - to S3 bucket '{bucket_name}/mosaics'...") - valid_images = ('.png', '.jpg', '.jpeg', '.tiff') - - # Loop through the local mosaic directory - for filename in os.listdir(local_mosaics_dir): - if filename.lower().endswith(valid_images): - local_path = os.path.join(local_mosaics_dir, filename) - # Prepend 'mosaics/' prefix for S3 - s3_key = f"mosaics/{filename}" - - print(f"Uploading {filename}...") - s3.upload_from_disk(local_path, s3_key) - - # Show help if no flags are provided - if not (args.all_videos or args.all_photos or args.upload_results): - parser.print_help() + keys = keys[:limit] + + for key in keys: + filename = os.path.basename(key) + if not filename: + continue + + # Filter for images if applicable + if is_image and not key.lower().endswith(valid_extensions): + continue + + dest_path = os.path.join(local_dir, filename) + s3.download_to_disk(key, dest_path) + + if is_image: + resize_in_place(dest_path) + + +def handle_upload(s3, local_dir, bucket_name): + if not os.path.exists(local_dir): + print(f"Skipping upload: {local_dir} does not exist.") + return + + valid_images = ('.png', '.jpg', '.jpeg', '.tiff') + print(f"Uploading results from {local_dir} to S3 \ + {bucket_name}/mosaics'...") + + for filename in os.listdir(local_dir): + if filename.lower().endswith(valid_images): + s3.upload_from_disk(os.path.join(local_dir, filename), + f"mosaics/{filename}") + + +if __name__ == "__main__": + main() if __name__ == "__main__": diff --git a/tools/splicer.py b/tools/splicer.py index a99c87c..de3352f 100644 --- a/tools/splicer.py +++ b/tools/splicer.py @@ -2,6 +2,7 @@ import os import hashlib import argparse +import random from multiprocessing import Pool, cpu_count from functools import partial from PIL import Image @@ -144,8 +145,20 @@ def main(): help="Path to folder containing \ images for processing.") + parser.add_argument("-out-dir", + help="Override default output directory \ + (/mnt/ebs/frames)") + parser.add_argument("-limit", type=int, + help="Limit number of files processed \ + (for testing)") + args = parser.parse_args() + # Decalare output folder + output_folder = args.out_dir or "/mnt/ebs/frames" + if not os.path.exists(output_folder): + os.makedirs(output_folder) + # --- BRANCH 1: IMAGE PROCESSING --- if args.image_folder: print(f"Starting Image Processing mode on: {args.image_folder}") @@ -156,18 +169,19 @@ def main(): extension = file.split(".")[-1] if extension.lower() in ["png", "jpg", "jpeg"]: photos.append(os.path.join(root, file)) + + if args.limit: + random.shuffle(photos) + photos = photos[:args.limit] + if len(photos) == 0: print(f'No photos found {abs_folder}') for photo in photos: process_image(photo, double=True, - output_dir="/mnt/ebs/frames") + output_dir=output_folder) return # --- BRANCH 2: VIDEO PROCESSING --- - output_folder = os.path.join('/mnt/ebs/', "frames") - if not os.path.exists(output_folder): - os.makedirs(output_folder) - video_files = [] if args.video_file: @@ -180,6 +194,10 @@ def main(): if file.lower().endswith(".mp4"): video_files.append(os.path.join(root, file)) + if args.limit: + random.shuffle(video_files) + video_files = video_files[:args.limit] + if not video_files: print("No videos found.") return From 327ea0f7639e5127252b747275395508d6e18c6b Mon Sep 17 00:00:00 2001 From: Joel Gonzaga Date: Fri, 3 Apr 2026 07:51:27 -0700 Subject: [PATCH 57/71] Optimize memory (#4) * using a shared space in memory across all workers. * updated comments. --- mosaicizers/mosaic-v2.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index d933f96..a796ba5 100644 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -1,13 +1,15 @@ ############################################### # This file will create a mosaic from CLI. -# TODO need to sort out functions here -# In a new extension of Mosaic. +# This version use scikit-learn's KDTree for +# tile matching and multiprocessing for speed. +# Also optimized memory usage by sharing tile data +# across workers. ############################################### import os import os.path import argparse -from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Queue, cpu_count, get_context # These are the custom imports from MosaicImage import MosaicImage @@ -28,6 +30,21 @@ EOQ_VALUE = None +_global_fitter = None # Shared across all workers + + +def worker_init(tile_data, penalty): + """This runs ONCE when each worker process starts.""" + global _global_fitter + _global_fitter = TileFitterSciKit(tile_data, penalty=penalty) + + +def worker_task(work_queue, result_queue): + """The actual loop the worker runs.""" + # Use the global fitter already sitting in this process's memory + _global_fitter.fit_tiles(work_queue, result_queue) + + def compose(original_img, tiles, penalty=0.2, suffix=''): print('Building mosaic, press Ctrl-C to abort...') original_img_large, original_img_small = original_img @@ -42,12 +59,16 @@ def compose(original_img, tiles, penalty=0.2, suffix=''): work_queue = Queue() result_queue = Queue() - # 2. Start the computational WORKERS only + # 2a Init the Global Fitter. + worker_init(all_tile_data_small, penalty) + ctx = get_context('fork') # from an import at the top worker_pool = [] + # 2b Rally workers. for n in range(WORKER_COUNT): - fitter = TileFitterSciKit(all_tile_data_small, penalty=penalty) - p = Process(target=fitter.fit_tiles, args=( - work_queue, result_queue)) + p = ctx.Process( + target=worker_task, + args=(work_queue, result_queue) + ) p.start() worker_pool.append(p) From c973de447a738fc26c29dc0df6cb375a9bc12c77 Mon Sep 17 00:00:00 2001 From: Joel Gonzaga Date: Wed, 6 May 2026 15:34:43 -0700 Subject: [PATCH 58/71] Optimize memory (#5) * using a shared space in memory across all workers. * updated comments. From 9ed7cbb3685e50886557111296bd3c43489c7d6a Mon Sep 17 00:00:00 2001 From: Joel G Date: Wed, 6 May 2026 16:01:49 -0700 Subject: [PATCH 59/71] added a process timer class --- .gitignore | 0 .pre-commit-config.yaml | 0 LICENSE | 0 README.md | 0 downloads.py | 0 mosaicizers/MosaicImage.py | 0 mosaicizers/ProcessTimer.py | 30 ++++++++++++++++++++++++++++++ mosaicizers/ProgressCounter.py | 0 mosaicizers/TargetImage.py | 0 mosaicizers/TileFitterSciKit.py | 0 mosaicizers/TileProcessor.py | 0 mosaicizers/mosaic-v2.py | 0 mosaicizers/mosaic.py | 0 pep8-enforcement.yaml | 0 requirements.txt | 0 test-image.py | 0 tools/advanced_parse.py | 0 tools/load_data_from_s3.py | 0 tools/s3_access.py | 0 tools/splicer.py | 0 20 files changed, 30 insertions(+) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 .pre-commit-config.yaml mode change 100644 => 100755 LICENSE mode change 100644 => 100755 README.md mode change 100644 => 100755 downloads.py mode change 100644 => 100755 mosaicizers/MosaicImage.py create mode 100755 mosaicizers/ProcessTimer.py mode change 100644 => 100755 mosaicizers/ProgressCounter.py mode change 100644 => 100755 mosaicizers/TargetImage.py mode change 100644 => 100755 mosaicizers/TileFitterSciKit.py mode change 100644 => 100755 mosaicizers/TileProcessor.py mode change 100644 => 100755 mosaicizers/mosaic-v2.py mode change 100644 => 100755 mosaicizers/mosaic.py mode change 100644 => 100755 pep8-enforcement.yaml mode change 100644 => 100755 requirements.txt mode change 100644 => 100755 test-image.py mode change 100644 => 100755 tools/advanced_parse.py mode change 100644 => 100755 tools/load_data_from_s3.py mode change 100644 => 100755 tools/s3_access.py mode change 100644 => 100755 tools/splicer.py diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/downloads.py b/downloads.py old mode 100644 new mode 100755 diff --git a/mosaicizers/MosaicImage.py b/mosaicizers/MosaicImage.py old mode 100644 new mode 100755 diff --git a/mosaicizers/ProcessTimer.py b/mosaicizers/ProcessTimer.py new file mode 100755 index 0000000..071aca1 --- /dev/null +++ b/mosaicizers/ProcessTimer.py @@ -0,0 +1,30 @@ +from datetime import datetime + + +class ProcessTimer: + def __init__(self, process_name, start_time=None): + self.process_name = process_name + # slight non-standard to make sure "now" is when + # init is invoked. + self.start_time = start_time if start_time else datetime.now() + + print(f"[{self.process_name}] Process started at: \ + {self.start_time.strftime('%Y %b %d %H:%M:%S')}") + + def finish(self): + end_time = datetime.now() + print(f"[{self.process_name}] Process ended at: \ + {end_time.strftime('%Y %b %d %H:%M:%S')}") + + # Calculate duration + duration = end_time - self.start_time + total_seconds = int(duration.total_seconds()) + + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + seconds = total_seconds % 60 + total_minutes = total_seconds // 60 + + print(f"Total time was: {hours} hours {minutes} \ + minutes {seconds} seconds \ + ({total_minutes} total minutes)") diff --git a/mosaicizers/ProgressCounter.py b/mosaicizers/ProgressCounter.py old mode 100644 new mode 100755 diff --git a/mosaicizers/TargetImage.py b/mosaicizers/TargetImage.py old mode 100644 new mode 100755 diff --git a/mosaicizers/TileFitterSciKit.py b/mosaicizers/TileFitterSciKit.py old mode 100644 new mode 100755 diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py old mode 100644 new mode 100755 diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py old mode 100644 new mode 100755 diff --git a/mosaicizers/mosaic.py b/mosaicizers/mosaic.py old mode 100644 new mode 100755 diff --git a/pep8-enforcement.yaml b/pep8-enforcement.yaml old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/test-image.py b/test-image.py old mode 100644 new mode 100755 diff --git a/tools/advanced_parse.py b/tools/advanced_parse.py old mode 100644 new mode 100755 diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py old mode 100644 new mode 100755 diff --git a/tools/s3_access.py b/tools/s3_access.py old mode 100644 new mode 100755 diff --git a/tools/splicer.py b/tools/splicer.py old mode 100644 new mode 100755 From 7ba7713fd3c52cff77f4ab4bc6b1f87f476b9ad8 Mon Sep 17 00:00:00 2001 From: Joel G Date: Wed, 6 May 2026 16:38:44 -0700 Subject: [PATCH 60/71] Implemented process timer --- mosaicizers/TileProcessor.py | 5 ++++- mosaicizers/mosaic-v2.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py index 4ee0498..43257fc 100755 --- a/mosaicizers/TileProcessor.py +++ b/mosaicizers/TileProcessor.py @@ -3,6 +3,8 @@ import gzip from PIL import Image, ImageOps +from ProcessTimer import ProcessTimer + class TileProcessor: def __init__(self, tiles_directory, @@ -96,6 +98,7 @@ def get_tiles(self): return data # search the tiles directory recursively + tileTimer = ProcessTimer("Tile Making") for root, subFolders, files in os.walk(self.tiles_directory): for tile_name in files: tile_path = os.path.join(root, tile_name) @@ -109,7 +112,7 @@ def get_tiles(self): exp_threshold = exp_threshold * 2 print('Processed {} tiles.'.format(len(large_tiles))) - + tileTimer.finish() data = (large_tiles, small_tiles) self.tiles_save(data) return data diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index a796ba5..afb078b 100755 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -17,6 +17,7 @@ from TileProcessor import TileProcessor from TileFitterSciKit import TileFitterSciKit from ProgressCounter import ProgressCounter +from ProcessTimer import ProcessTimer # These are now configed by CLI or class defaults TILE_SIZE = 50 # height/width of mosaic tiles in pixels @@ -118,9 +119,10 @@ def show_error(msg): def mosaic(img_path, tiles_data, penalty=0.2, suffix=''): """ Takes in Tiles Data as an Agrument now """ image_data = TargetImage(img_path).get_data() - # tiles_data = TileProcessor(tiles_path).get_tiles() if tiles_data[0]: + imgTimer = ProcessTimer('Single Image ') compose(image_data, tiles_data, penalty=penalty, suffix=suffix) + imgTimer.finish() else: show_error("Tiles Data not propery formatted!") @@ -215,5 +217,7 @@ def restricted_float(x): continue else: # Trigger the mosaic process + folderTimer = ProcessTimer('imgs by in folder') mosaic(file_path, tiles_data, penalty=args.penalty, suffix=args.suffix) + folderTimer.finish() From 69956560052527635be9f45e17435ec5d7d62d96 Mon Sep 17 00:00:00 2001 From: Joel G Date: Thu, 7 May 2026 11:09:59 -0700 Subject: [PATCH 61/71] Created funcationality to process tiles outside of compose. --- mosaicizers/TileProcessor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py index 43257fc..11a6aa4 100755 --- a/mosaicizers/TileProcessor.py +++ b/mosaicizers/TileProcessor.py @@ -116,3 +116,10 @@ def get_tiles(self): data = (large_tiles, small_tiles) self.tiles_save(data) return data + + +if __name__ == '__main__': + # If run, makes cache if none exist + tp = TileProcessor('/mnt/ebs/frames') + data = tp.get_tiles() + print("Tile KDTree is processed and saved as pickle file") From 16c07fecdfdff38e412d6e4debd6b6f9a6327890 Mon Sep 17 00:00:00 2001 From: Joel G Date: Wed, 13 May 2026 17:27:50 -0700 Subject: [PATCH 62/71] updated files for legacy processing. --- mosaicizers/mosaic-v2.py | 4 +- mosaicizers/mosaic.py | 96 +++++++++++++++++++++++++++++++++++++- tools/load_data_from_s3.py | 34 ++++++-------- 3 files changed, 111 insertions(+), 23 deletions(-) diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py index afb078b..888ee9c 100755 --- a/mosaicizers/mosaic-v2.py +++ b/mosaicizers/mosaic-v2.py @@ -206,6 +206,7 @@ def restricted_float(x): except Exception as e: print(f"Error running TileProcessor class '{tile_dir}': {e}") exit(1) + folderTimer = ProcessTimer('imgs by in folder') for file_path in samples: if not os.path.isfile(file_path): show_error(f"Unable to find image file \ @@ -217,7 +218,6 @@ def restricted_float(x): continue else: # Trigger the mosaic process - folderTimer = ProcessTimer('imgs by in folder') mosaic(file_path, tiles_data, penalty=args.penalty, suffix=args.suffix) - folderTimer.finish() + folderTimer.finish() diff --git a/mosaicizers/mosaic.py b/mosaicizers/mosaic.py index 031fc14..d2e9f1c 100755 --- a/mosaicizers/mosaic.py +++ b/mosaicizers/mosaic.py @@ -3,8 +3,10 @@ import io import hashlib import os.path +import argparse from PIL import Image, ImageOps from multiprocessing import Process, Queue, cpu_count +from ProcessTimer import ProcessTimer # Change these 3 config parameters to suit your needs... TILE_SIZE = 50 # height/width of mosaic tiles in pixels @@ -173,10 +175,10 @@ def add_tile(self, tile_data, coords): def save(self): """ - Saves the image_obj as a .jpeg to /mnt/ebs/mosaics + Saves the image_obj as a .jpeg to /mnt/ebs/legacy-mosaics using its MD5 hash as the filename. """ - output_dir = "/mnt/ebs/mosaics" + output_dir = "/mnt/ebs/legacy-mosaics" # Ensure the output directory exists os.makedirs(output_dir, exist_ok=True) @@ -287,6 +289,7 @@ def mosaic(img_path, tiles_path): ) +""" if __name__ == '__main__': if len(sys.argv) < 3: show_error('Usage: {} \r'.format(sys.argv[0])) @@ -299,3 +302,92 @@ def mosaic(img_path, tiles_path): show_error("Unable to find tile directory '{}'".format(tile_dir)) else: mosaic(source_image, tile_dir) +""" + +if __name__ == '__main__': + def restricted_float(x): + try: + x = float(x) + except ValueError: + raise argparse.ArgumentTypeError(f"{x} is not a \ + floating-point number") + + if x < 0.0 or x > 0.5: + raise argparse.ArgumentTypeError(f"{x} is not in range \ + [0.01, 0.5]") + return x + + parser = argparse.ArgumentParser( + description="Generate a high-quality mosaic.") + + # Create the mutually exclusive group for input + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-file", "-f", help="Path to the source image file.") + group.add_argument("-folder", help="Path to a \ + folder of images (not yet implemented).") + + # The tiles directory with a default value + parser.add_argument("-tiles", "-t", + default="/mnt/ebs/frames", + help="Path to the directory \ + containing tiles (default: /mnt/ebs/frames)") + + parser.add_argument("-out_dir", "-o", + default="/mnt/ebs/mosaics", + help="This is the directory the \ + Mosaics will be save to.") + + parser.add_argument('-suffix', '-s', + help="Type something here if \ + if you want it appended \ + to the file name.") + + parser.add_argument('-penalty', + type=restricted_float, + default=0.2, + help="Set the penalty (range: 0.0 to 0.5, \ + default: 0.2) \ + High Penalty means less repetition of tiles") + args = parser.parse_args() + + # Current logic: Only handle the single file mode + if args.file: + source_image = os.path.abspath(args.file) + tile_dir = os.path.abspath(args.tiles) + + if not os.path.isfile(source_image): + show_error(f"Unable to find image file '{source_image}'") + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory '{tile_dir}'") + else: + # Trigger the mosaic process + mosaic(source_image, tile_dir) + + elif args.folder: + abs_folder = os.path.abspath(args.folder) + tile_dir = os.path.abspath(args.tiles) + try: + samples = [e.path for e in os.scandir(abs_folder) + if e.is_file()] + except FileNotFoundError: + print(f"Error: Folder '{abs_folder}' not found.") + exit(1) + try: + os.scandir(tile_dir) + except Exception: + print(f"Error: Tile directory '{tile_dir}' not found.") + exit(1) + folderTimer = ProcessTimer('imgs by in folder') + for file_path in samples: + if not os.path.isfile(file_path): + show_error(f"Unable to find image file \ + '{file_path}'") + continue + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory \ + '{tile_dir}'") + continue + else: + # Trigger the mosaic process + mosaic(file_path, tile_dir) + folderTimer.finish() diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index 2bdf92d..11ca85e 100755 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -27,6 +27,21 @@ def resize_in_place(file_path, max_dimension=600): return False +def handle_upload(s3, local_dir, bucket_name): + if not os.path.exists(local_dir): + print(f"Skipping upload: {local_dir} does not exist.") + return + + valid_images = ('.png', '.jpg', '.jpeg', '.tiff') + print(f"Uploading results from {local_dir} to S3 \ + {bucket_name}/mosaics'...") + + for filename in os.listdir(local_dir): + if filename.lower().endswith(valid_images): + s3.upload_from_disk(os.path.join(local_dir, filename), + f"mosaics/{filename}") + + def main(): parser = argparse.ArgumentParser( description="Transfer assets between S3 and EBS storage." @@ -131,24 +146,5 @@ def main(): resize_in_place(dest_path) -def handle_upload(s3, local_dir, bucket_name): - if not os.path.exists(local_dir): - print(f"Skipping upload: {local_dir} does not exist.") - return - - valid_images = ('.png', '.jpg', '.jpeg', '.tiff') - print(f"Uploading results from {local_dir} to S3 \ - {bucket_name}/mosaics'...") - - for filename in os.listdir(local_dir): - if filename.lower().endswith(valid_images): - s3.upload_from_disk(os.path.join(local_dir, filename), - f"mosaics/{filename}") - - -if __name__ == "__main__": - main() - - if __name__ == "__main__": main() From d1ff668b5a6fd3ed12e9d9a396bb3b2978b4d07a Mon Sep 17 00:00:00 2001 From: Joel G Date: Wed, 13 May 2026 17:36:49 -0700 Subject: [PATCH 63/71] added a new arguement to get more frames from a video --- tools/splicer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/splicer.py b/tools/splicer.py index de3352f..94bde99 100755 --- a/tools/splicer.py +++ b/tools/splicer.py @@ -45,7 +45,7 @@ def resize_maintain_aspect(frame, short_side_target=200): return cv2.resize(frame, new_dim, interpolation=cv2.INTER_AREA) -def process_video_worker(absolute_video_path, output_folder): +def process_video_worker(absolute_video_path, output_folder, density=5): """The function each CPU core will run.""" if not is_video_readable(absolute_video_path): return f"Error: {os.path.basename(absolute_video_path)}\ @@ -62,7 +62,7 @@ def process_video_worker(absolute_video_path, output_folder): return f"Bad FPS: {os.path.basename(absolute_video_path)}" duration_seconds = total_frames / fps - interval_seconds = 1 if duration_seconds < 60 else 5 + interval_seconds = 1 if duration_seconds < 60 else density capture_step = int(fps * interval_seconds) file_hash = get_file_md5(absolute_video_path) @@ -151,11 +151,16 @@ def main(): parser.add_argument("-limit", type=int, help="Limit number of files processed \ (for testing)") + parser.add_argument("-denisty", type=int, default=5, + help="In seconds, frequeence \ + of frame capture \ + (> 60s always 1 second)") args = parser.parse_args() # Decalare output folder output_folder = args.out_dir or "/mnt/ebs/frames" + density = args.density if not os.path.exists(output_folder): os.makedirs(output_folder) @@ -206,7 +211,9 @@ def main(): videos across {WORKER_COUNT} CPUs...") # --- THE MULTIPROCESSING MAGIC --- - worker_func = partial(process_video_worker, output_folder=output_folder) + worker_func = partial(process_video_worker, + output_folder=output_folder, + density=density) with Pool(processes=WORKER_COUNT) as pool: for result in pool.imap_unordered(worker_func, video_files): From 1d6b1d4fc4a355314afc61c19a78dd1e3c1dc77d Mon Sep 17 00:00:00 2001 From: Joel G Date: Fri, 15 May 2026 22:12:43 -0700 Subject: [PATCH 64/71] reduced splicer log output so it is not completely loco --- tools/splicer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/splicer.py b/tools/splicer.py index 94bde99..0e58428 100755 --- a/tools/splicer.py +++ b/tools/splicer.py @@ -127,7 +127,8 @@ def _save_processed_crop(crop_img, output_dir): hash_name = hashlib.md5(resized.tobytes()).hexdigest() save_path = os.path.join(output_dir, f"{hash_name}.png") resized.save(save_path, "PNG") - print(f" [#] Saved: {hash_name}.png") + # way to much logging. Images get into thousands. + # print(f" [#] Saved: {hash_name}.png") def main(): From f11e1768474cdd636526c3d3c12bf816daa810ed Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 16 May 2026 07:50:42 -0700 Subject: [PATCH 65/71] created a script so that running this code is less of a headache. --- load-and-run.sh | 43 +++++++++++++++++++++++++++++++++++++++++++ mosaicizers/mosaic.py | 2 +- 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 load-and-run.sh diff --git a/load-and-run.sh b/load-and-run.sh new file mode 100644 index 0000000..ea0cb33 --- /dev/null +++ b/load-and-run.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +export S3_STORAGE="replace-with-bucket" + +# Explicitly use the python binary inside your virtual environment to ensure stability +VENV_PYTHON=".venv/bin/python3" + +echo "=== Starting Pipeline at $(date) ===" > pipeline_status.log + +# 1. Make the directory that everything saves to and reads from. +echo "Creating Source directory." >> pipeline_status.log +mkdir -p /mnt/ebs || { echo "ERROR: Failed to create directory /mnt/ebs" >> pipeline_status.log; exit 1; } +chmod 777 /mnt/ebs || { echo "ERROR: Failed to 777 /mnt/ebs" >> pipeline_status.log; exit 1; } + +# 2. Download Step (Changed second command to >> to append) +echo "Downloading from videos s3..." >> pipeline_status.log +$VENV_PYTHON -u tools/load_data_from_s3.py --videos > log_0_download.log 2>&1 || { echo "ERROR: could not download videos" >> log_0_download.log; exit 1; } + +echo "Downloading from samples s3..." >> pipeline_status.log +$VENV_PYTHON -u tools/load_data_from_s3.py --samples >> log_0_download.log 2>&1 || { echo "ERROR: could not download source photos" >> log_0_download.log; exit 1; } + +# 3. Preprocessing Steps (Sequential - Changed second to >> and fixed error strings) +echo "Running preprocess step 1 (splicer vids)..." >> pipeline_status.log +$VENV_PYTHON -u tools/splicer.py -video-folder /mnt/ebs/raw_vids -out-dir /mnt/ebs/raw_photos > log_1_preprocess.log 2>&1 || { echo "ERROR: failed to splice videos" >> log_1_preprocess.log; exit 1; } + +echo "Running preprocess step 2 (splicer photos)..." >> pipeline_status.log +$VENV_PYTHON -u tools/splicer.py -image-folder /mnt/ebs/raw_photos >> log_1_preprocess.log 2>&1 || { echo "ERROR: failed to process photos" >> log_1_preprocess.log; exit 1; } + +# 4. Main Processing Iterations (Sequential - Using unique log files per variation for easier debugging) +echo "Running process with legacy method..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic.py -folder /mnt/ebs/samples > log_2_legacy.log 2>&1 || { echo "ERROR: could not run legacy mosaics.py" >> log_2_legacy.log; exit 1; } + +echo "Running main script v2 with no penalty..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.0 -suffix _p00_v2 > log_3_v2_p00.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p00" >> log_3_v2_p00.log; exit 1; } + +echo "Running main script v2 with low penalty..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.05 -suffix _p05_v2 > log_3_v2_p05.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p05" >> log_3_v2_p05.log; exit 1; } + +echo "Running main script v2 with high penalty..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.15 -suffix _p15_v2 > log_3_v2_p15.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p15" >> log_3_v2_p15.log; exit 1; } + + +echo "=== Pipeline Finished Successfully at $(date) ===" >> pipeline_status.log \ No newline at end of file diff --git a/mosaicizers/mosaic.py b/mosaicizers/mosaic.py index d2e9f1c..c859908 100755 --- a/mosaicizers/mosaic.py +++ b/mosaicizers/mosaic.py @@ -324,7 +324,7 @@ def restricted_float(x): group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-file", "-f", help="Path to the source image file.") group.add_argument("-folder", help="Path to a \ - folder of images (not yet implemented).") + folder of images.") # The tiles directory with a default value parser.add_argument("-tiles", "-t", From 21a49a9a81ac728b0d470b95810d0b78ca619ea6 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 16 May 2026 07:55:14 -0700 Subject: [PATCH 66/71] Upload results now too --- load-and-run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/load-and-run.sh b/load-and-run.sh index ea0cb33..dfb40a5 100644 --- a/load-and-run.sh +++ b/load-and-run.sh @@ -39,5 +39,6 @@ $VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.05 echo "Running main script v2 with high penalty..." >> pipeline_status.log $VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.15 -suffix _p15_v2 > log_3_v2_p15.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p15" >> log_3_v2_p15.log; exit 1; } - +echo "Uploading results!" >> pipeline_status.log +$VENV_PYTHON - u tools/load_data_from_s3 --upload-reulst > log_4_upload.log 2>&1 || { echo "ERROR: could not upload results " >> log_3_v2_p15.log; exit 1; } echo "=== Pipeline Finished Successfully at $(date) ===" >> pipeline_status.log \ No newline at end of file From 76e670622302e8c01ac76e0f2a95f742b2169798 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 16 May 2026 07:58:10 -0700 Subject: [PATCH 67/71] will create root folder manually --- load-and-run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/load-and-run.sh b/load-and-run.sh index dfb40a5..991cb17 100644 --- a/load-and-run.sh +++ b/load-and-run.sh @@ -8,9 +8,9 @@ VENV_PYTHON=".venv/bin/python3" echo "=== Starting Pipeline at $(date) ===" > pipeline_status.log # 1. Make the directory that everything saves to and reads from. -echo "Creating Source directory." >> pipeline_status.log -mkdir -p /mnt/ebs || { echo "ERROR: Failed to create directory /mnt/ebs" >> pipeline_status.log; exit 1; } -chmod 777 /mnt/ebs || { echo "ERROR: Failed to 777 /mnt/ebs" >> pipeline_status.log; exit 1; } +#echo "Creating Source directory." >> pipeline_status.log +#mkdir -p /mnt/ebs || { echo "ERROR: Failed to create directory /mnt/ebs" >> pipeline_status.log; exit 1; } +#chmod 777 /mnt/ebs || { echo "ERROR: Failed to 777 /mnt/ebs" >> pipeline_status.log; exit 1; } # 2. Download Step (Changed second command to >> to append) echo "Downloading from videos s3..." >> pipeline_status.log From 1a80116c0bc82c5ab3b392b012071a525c033d04 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 16 May 2026 09:16:43 -0700 Subject: [PATCH 68/71] fixed splicer problem. --- load-and-run.sh | 2 +- tools/splicer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/load-and-run.sh b/load-and-run.sh index 991cb17..8cb80a6 100644 --- a/load-and-run.sh +++ b/load-and-run.sh @@ -21,7 +21,7 @@ $VENV_PYTHON -u tools/load_data_from_s3.py --samples >> log_0_download.log 2>&1 # 3. Preprocessing Steps (Sequential - Changed second to >> and fixed error strings) echo "Running preprocess step 1 (splicer vids)..." >> pipeline_status.log -$VENV_PYTHON -u tools/splicer.py -video-folder /mnt/ebs/raw_vids -out-dir /mnt/ebs/raw_photos > log_1_preprocess.log 2>&1 || { echo "ERROR: failed to splice videos" >> log_1_preprocess.log; exit 1; } +$VENV_PYTHON -u tools/splicer.py -video-folder /mnt/ebs/raw_vids -density 1 -out-dir /mnt/ebs/raw_photos > log_1_preprocess.log 2>&1 || { echo "ERROR: failed to splice videos" >> log_1_preprocess.log; exit 1; } echo "Running preprocess step 2 (splicer photos)..." >> pipeline_status.log $VENV_PYTHON -u tools/splicer.py -image-folder /mnt/ebs/raw_photos >> log_1_preprocess.log 2>&1 || { echo "ERROR: failed to process photos" >> log_1_preprocess.log; exit 1; } diff --git a/tools/splicer.py b/tools/splicer.py index 0e58428..5c605c7 100755 --- a/tools/splicer.py +++ b/tools/splicer.py @@ -152,7 +152,7 @@ def main(): parser.add_argument("-limit", type=int, help="Limit number of files processed \ (for testing)") - parser.add_argument("-denisty", type=int, default=5, + parser.add_argument("-density", type=int, default=5, help="In seconds, frequeence \ of frame capture \ (> 60s always 1 second)") From fc41d6a888a1c2d0ed8f2dd2b07406d37ba8fb51 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sat, 16 May 2026 12:18:12 -0700 Subject: [PATCH 69/71] Modified the original Mosiac.py. Still not sure if it will ... work. --- mosaicizers/mosaic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mosaicizers/mosaic.py b/mosaicizers/mosaic.py index c859908..baa32ba 100755 --- a/mosaicizers/mosaic.py +++ b/mosaicizers/mosaic.py @@ -178,7 +178,7 @@ def save(self): Saves the image_obj as a .jpeg to /mnt/ebs/legacy-mosaics using its MD5 hash as the filename. """ - output_dir = "/mnt/ebs/legacy-mosaics" + output_dir = "/mnt/ebs/mosaics" # Ensure the output directory exists os.makedirs(output_dir, exist_ok=True) @@ -191,7 +191,7 @@ def save(self): md5_hash = hashlib.md5(img_bytes).hexdigest() - filename = f"{md5_hash}.jpeg" + filename = f"{md5_hash}-legacy.jpeg" final_path = os.path.join(output_dir, filename) with open(final_path, "wb") as f: @@ -279,6 +279,7 @@ def show_error(msg): def mosaic(img_path, tiles_path): + mosaicTimer = ProcessTimer(f'Running for: {img_path}') image_data = TargetImage(img_path).get_data() tiles_data = TileProcessor(tiles_path).get_tiles() if tiles_data[0]: @@ -287,6 +288,7 @@ def mosaic(img_path, tiles_path): show_error( "No images found in tiles directory '{}'".format(tiles_path) ) + mosaicTimer.finish() """ From 2964448236372605982b4db1edccbf565adbe951 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 17 May 2026 14:38:58 -0700 Subject: [PATCH 70/71] running another experiment --- load-and-run.sh | 7 +- mosaicizers/TileFitterSciKitNoKD.py | 108 ++++++++++++++ mosaicizers/mosaic-nokd.py | 223 ++++++++++++++++++++++++++++ tools/load_data_from_s3.py | 2 +- 4 files changed, 337 insertions(+), 3 deletions(-) create mode 100755 mosaicizers/TileFitterSciKitNoKD.py create mode 100755 mosaicizers/mosaic-nokd.py diff --git a/load-and-run.sh b/load-and-run.sh index 8cb80a6..a0af5b1 100644 --- a/load-and-run.sh +++ b/load-and-run.sh @@ -34,10 +34,13 @@ echo "Running main script v2 with no penalty..." >> pipeline_status.log $VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.0 -suffix _p00_v2 > log_3_v2_p00.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p00" >> log_3_v2_p00.log; exit 1; } echo "Running main script v2 with low penalty..." >> pipeline_status.log -$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.05 -suffix _p05_v2 > log_3_v2_p05.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p05" >> log_3_v2_p05.log; exit 1; } +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.02 -suffix _p02_v2 > log_3_v2_p02.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p02" >> log_3_v2_p05.log; exit 1; } echo "Running main script v2 with high penalty..." >> pipeline_status.log -$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.15 -suffix _p15_v2 > log_3_v2_p15.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p15" >> log_3_v2_p15.log; exit 1; } +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.08 -suffix _p08_v2 > log_3_v2_p08.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p08" >> log_3_v2_p15.log; exit 1; } + +echo "Running No KD version of Sci kit with 00 penalty." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-nokd.py -folder /mnt/ebs/samples -penalty 0.0 -suffix _p00_v2 > log_3_v2_p08.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p08" >> log_3_v2_p15.log;} echo "Uploading results!" >> pipeline_status.log $VENV_PYTHON - u tools/load_data_from_s3 --upload-reulst > log_4_upload.log 2>&1 || { echo "ERROR: could not upload results " >> log_3_v2_p15.log; exit 1; } diff --git a/mosaicizers/TileFitterSciKitNoKD.py b/mosaicizers/TileFitterSciKitNoKD.py new file mode 100755 index 0000000..369eaad --- /dev/null +++ b/mosaicizers/TileFitterSciKitNoKD.py @@ -0,0 +1,108 @@ +import numpy as np +from skimage.metrics import structural_similarity as ssim +from scipy.spatial import KDTree + + +class TileFitterSciKit: + def __init__(self, tiles_data, match_res=5, penalty=0.00): + # tiles_data here is the 'small_tiles' list from TileProcessor + self.penalty = penalty + self.usages = [0.0 for x in range(len(tiles_data))] + self.tiles_data = tiles_data + self.match_res = match_res + self.winners = np.array([], dtype=float) + + print("Initializing KDTree for hybrid search...") + # 1. Convert tiles to NumPy arrays once + # We reshape them from flat lists back into (5x5x3) blocks for SSIM + self.tiles_np = [ + np.array(t).reshape((self.match_res, self.match_res, 3)) + for t in tiles_data + ] + + # 2. Pre-calculate average colors for the Tree + avg_colors = [t.mean(axis=(0, 1)) for t in self.tiles_np] + self.tree = KDTree(np.array(avg_colors)) + print("KDTree + SSIM Hybrid Fitter Ready.") + + def fit_tiles(self, work_queue, result_queue): + EOQ_VALUE = None # Sentinel + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = self.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass + + self.get_winning_stats() # print stats + # Signal that this worker is done + result_queue.put((EOQ_VALUE, EOQ_VALUE)) + + def get_best_fit_tile(self, img_data): + """ + img_data: A flat list of pixels (from original code's getdata()) + We convert it to NumPy to use the Tree and SSIM. + """ + # Convert the incoming list to a 5x5x3 array + target_np = np.array(img_data).reshape( + (self.match_res, self.match_res, 3)) + + # Step 1: KDTree Pruning (The "Bucket" step) + # target_avg = target_np.mean(axis=(0, 1)) + # _, indices = self.tree.query(target_avg, k=100) + + best_raw_score = float('-inf') + best_score = -1 + best_fit_tile_index = 0 + + # Step 2: SSIM Refinement + for idx in range(0, len(self.tiles_np)): + candidate_np = self.tiles_np[idx] + + # SSIM needs to know the range of pixel values (0-255) + try: + score = ssim(target_np, + candidate_np, + channel_axis=2, + data_range=255, + win_size=self.match_res-2) + score = score - self.usages[idx] + + except ValueError as e: + # codes sometimes breaks and hangs. + print(f'Got exception {e} \ + skipping index{idx} \ + data was {type(candidate_np)}') + continue + + if score > best_score: + best_score = score + best_fit_tile_index = idx + best_raw_score = score + + # Early exit if we find an amazing match + if score > 0.98: + best_raw_score = score + break + + self.winners = np.append(self.winners, best_raw_score) + self.usages[best_fit_tile_index] = \ + self.usages[best_fit_tile_index] + self.penalty + return best_fit_tile_index + + def get_winning_stats(self): + """Prints statistical analysis of the tile match quality.""" + if self.winners.size == 0: + print("No winners recorded yet.") + return + + print("\n--- Tile Match Quality Stats ---") + print(f"Total Blocks Matched: {len(self.winners)}") + print(f"Mean SSIM: {np.mean(self.winners):.4f}") + print(f"Median SSIM: {np.median(self.winners):.4f}") + print(f"Highest SSIM: {np.max(self.winners):.4f} (Best Match)") + print(f"Lowest SSIM: {np.min(self.winners):.4f} (Worst Match)") + print("--------------------------------\n") diff --git a/mosaicizers/mosaic-nokd.py b/mosaicizers/mosaic-nokd.py new file mode 100755 index 0000000..469c036 --- /dev/null +++ b/mosaicizers/mosaic-nokd.py @@ -0,0 +1,223 @@ +############################################### +# This file will create a mosaic from CLI. +# This version use scikit-learn's KDTree for +# tile matching and multiprocessing for speed. +# Also optimized memory usage by sharing tile data +# across workers. +############################################### + +import os +import os.path +import argparse +from multiprocessing import Queue, cpu_count, get_context + +# These are the custom imports +from MosaicImage import MosaicImage +from TargetImage import TargetImage +from TileProcessor import TileProcessor +from TileFitterSciKitNoKD import TileFitterSciKit +from ProgressCounter import ProgressCounter +from ProcessTimer import ProcessTimer + +# These are now configed by CLI or class defaults +TILE_SIZE = 50 # height/width of mosaic tiles in pixels +TILE_MATCH_RES = 5 # tile matching resolution +# ENLARGEMENT = 8 # mosaic image will be this many times larger +# Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now + +TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) +WORKER_COUNT = max(cpu_count() - 1, 1) +# OUT_FILE = 'mosaic.jpeg' +EOQ_VALUE = None + + +_global_fitter = None # Shared across all workers + + +def worker_init(tile_data, penalty): + """This runs ONCE when each worker process starts.""" + global _global_fitter + _global_fitter = TileFitterSciKit(tile_data, penalty=penalty) + + +def worker_task(work_queue, result_queue): + """The actual loop the worker runs.""" + # Use the global fitter already sitting in this process's memory + _global_fitter.fit_tiles(work_queue, result_queue) + + +def compose(original_img, tiles, penalty=0.2, suffix=''): + print('Building mosaic, press Ctrl-C to abort...') + original_img_large, original_img_small = original_img + tiles_large, tiles_small = tiles + + # 1. Initialize our mosaic object + mosaic = MosaicImage(original_img_large) + + all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] + all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + + work_queue = Queue() + result_queue = Queue() + + # 2a Init the Global Fitter. + worker_init(all_tile_data_small, penalty) + ctx = get_context('fork') # from an import at the top + worker_pool = [] + # 2b Rally workers. + for n in range(WORKER_COUNT): + p = ctx.Process( + target=worker_task, + args=(work_queue, result_queue) + ) + p.start() + worker_pool.append(p) + + try: + # 3. Phase 1: Dispatch work (The Producer) + progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) + for x in range(mosaic.x_tile_count): + for y in range(mosaic.y_tile_count): + # ... [Your existing cropping logic here] ... + large_box = (x * TILE_SIZE, y * TILE_SIZE, + (x + 1) * TILE_SIZE, (y + 1) * TILE_SIZE) + small_box = (x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE) + + work_queue.put( + (list(original_img_small.crop(small_box).getdata()), + large_box)) + progress.update() + + # 4. Phase 2: Collect and Paste (The Consumer) + # We call this in the MAIN process. It will block here until + # the workers finish sending results through the result_queue. + + except KeyboardInterrupt: + print('\nHalting, saving partial image please wait...') + # We tell the workers to stop + for n in range(WORKER_COUNT): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + finally: + # Ensure workers are cleaned up + for p in worker_pool: + if p.is_alive(): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + mosaic.assemble(result_queue, all_tile_data_large, + WORKER_COUNT) + mosaic.save(suffix=suffix) + + +def show_error(msg): + print('ERROR: {}'.format(msg)) + + +def mosaic(img_path, tiles_data, penalty=0.2, suffix=''): + """ Takes in Tiles Data as an Agrument now """ + image_data = TargetImage(img_path).get_data() + if tiles_data[0]: + imgTimer = ProcessTimer('Single Image ') + compose(image_data, tiles_data, penalty=penalty, suffix=suffix) + imgTimer.finish() + else: + show_error("Tiles Data not propery formatted!") + + +if __name__ == '__main__': + def restricted_float(x): + try: + x = float(x) + except ValueError: + raise argparse.ArgumentTypeError(f"{x} is not a \ + floating-point number") + + if x < 0.0 or x > 0.5: + raise argparse.ArgumentTypeError(f"{x} is not in range \ + [0.01, 0.5]") + return x + + parser = argparse.ArgumentParser( + description="Generate a high-quality mosaic.") + + # Create the mutually exclusive group for input + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-file", "-f", help="Path to the source image file.") + group.add_argument("-folder", help="Path to a \ + folder of images (not yet implemented).") + + # The tiles directory with a default value + parser.add_argument("-tiles", "-t", + default="/mnt/ebs/frames", + help="Path to the directory \ + containing tiles (default: /mnt/ebs/frames)") + + parser.add_argument("-out_dir", "-o", + default="/mnt/ebs/mosaics", + help="This is the directory the \ + Mosaics will be save to.") + + parser.add_argument('-suffix', '-s', + help="Type something here if \ + if you want it appended \ + to the file name.") + + parser.add_argument('-penalty', + type=restricted_float, + default=0.2, + help="Set the penalty (range: 0.0 to 0.5, \ + default: 0.2) \ + High Penalty means less repetition of tiles") + args = parser.parse_args() + + # Current logic: Only handle the single file mode + if args.file: + source_image = os.path.abspath(args.file) + tile_dir = os.path.abspath(args.tiles) + + if not os.path.isfile(source_image): + show_error(f"Unable to find image file '{source_image}'") + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory '{tile_dir}'") + else: + # Trigger the mosaic process + tiles_data = TileProcessor(tile_dir).get_tiles() + mosaic(source_image, tiles_data, + penalty=args.penalty, suffix=args.suffix) + + elif args.folder: + abs_folder = os.path.abspath(args.folder) + tile_dir = os.path.abspath(args.tiles) + try: + samples = [e.path for e in os.scandir(abs_folder) + if e.is_file()] + except FileNotFoundError: + print(f"Error: Folder '{abs_folder}' not found.") + exit(1) + try: + tp = TileProcessor(tile_dir) + tiles_data = tp.get_tiles() + except FileNotFoundError: + print(f"Error: Tile directory '{tile_dir}' not found.") + exit(1) + except Exception as e: + print(f"Error running TileProcessor class '{tile_dir}': {e}") + exit(1) + folderTimer = ProcessTimer('imgs by in folder') + for file_path in samples: + if not os.path.isfile(file_path): + show_error(f"Unable to find image file \ + '{file_path}'") + continue + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory \ + '{tile_dir}'") + continue + else: + # Trigger the mosaic process + mosaic(file_path, tiles_data, + penalty=args.penalty, suffix=args.suffix) + folderTimer.finish() diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py index 11ca85e..ff9c663 100755 --- a/tools/load_data_from_s3.py +++ b/tools/load_data_from_s3.py @@ -93,7 +93,7 @@ def main(): sys.exit(1) s3 = S3Access(bucket_name) - valid_extensions = ('.png', '.jpeg', '.jpg') + valid_extensions = ('.png', '.jpeg', '.jpg', 'webp') # --- Logic Based Assignment for Defaults --- if args.videos: From 0d346c935a740e779b7c0502a398994431b975a9 Mon Sep 17 00:00:00 2001 From: Joel G Date: Sun, 7 Jun 2026 16:04:41 -0700 Subject: [PATCH 71/71] finished a readme.md --- README.md | 118 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index fc4aec8..a25eebc 100755 --- a/README.md +++ b/README.md @@ -1,32 +1,112 @@ -# mosaic +# Mosaic JTG Fork -This utility can be used to generate [photo-mosaic](http://en.wikipedia.org/wiki/Photographic_mosaic) images, to use it you must have Python installed, along with the [Pillow](http://pillow.readthedocs.org/en/latest/) imaging library. +This is a fork of the [Codebox/mosaic](https://github.com/codebox/mosaic) project. It introduces the following changes and refactors: -As well as an image to use for the photo-mosaic ([most common image formats are supported](http://pillow.readthedocs.org/en/latest/handbook/image-file-formats.html)), you will need a large collection of different images to be used as tiles. The tile images can be any shape or size (the utility will automatically crop and resize them) but for good results you will need a lot of them - a few hundred at least. One convenient way of generating large numbers of tile images is to [extract screenshots from video files](https://trac.ffmpeg.org/wiki/Create%20a%20thumbnail%20image%20every%20X%20seconds%20of%20the%20video) using [ffmpeg](https://www.ffmpeg.org/). +* **File Restructuring:** Refactored the classes of the original project from a single file into multiple, modular files. +* **Media Tools:** Provided a `tools` folder for quick pre-processing of images and slicing videos into image frames. +* **Cloud Data Sync:** Added tools for transferring photos from an Amazon S3 bucket to an EC2 instance and vice versa. +* **OpenCV Implementation:** Introduced a second version of the main application (`mosaic-v2.py`) that leverages a CV2 model rather than raw pixel processing loops. +* **KD-Tree Optimization:** Uses a KD-Tree to optimize tile matching speed, balancing computation time against target quality. -Run the utility from the command line, as follows: +This tool generates [photo-mosaic](http://en.wikipedia.org/wiki/Photographic_mosaic) images. To use it, you must have Python installed along with the [Pillow](http://pillow.readthedocs.org/en/latest/) imaging library. -
python mosaic.py <image> <tiles directory>
-
+While this version is designed assuming the code runs on an AWS EC2 instance with multiple processors, it can still be executed locally. See this blog post for [additional notes on the architecture](https://www.joelgonzaga.com/2026/05/18/photo-mosaics-with-scikit-aws-and-kdtree/). -* The `image` argument should contain the path to the image for which you want to build the mosaic -* The `tiles directory` argument should contain the path to the directory containing the tile images (the directory will be searched recursively, so it doesn't matter if some of the images are contained in sub-directories) +In either case, you will need a source image for the mosaic ([most common image formats are supported](http://pillow.readthedocs.org/en/latest/handbook/image-file-formats.html)). Additionally, you will need a large collection of separate images to be used as tiles. The tile images can be any shape or size (the utility will automatically crop and resize them), but for good results, you will need at least a few hundred. One convenient way to generate a massive tile library is to [extract screenshots from video files](https://trac.ffmpeg.org/wiki/Create%20a%20thumbnail%20image%20every%20X%20seconds%20of%20the%20video) using [FFmpeg](https://www.ffmpeg.org/), which is often the quickest method for local testing. -For example: +--- -
python mosaic.py game_of_thrones_poster.jpg /home/admin/images/screenshots
-
+## Architecture Setup (EC2, S3, and EBS) -The images below show an example of how the mosaic tiles are matched to the details of the original image: +> **Note:** This project does not provision infrastructure automatically, nor is it an exhaustive guide to AWS. Users are assumed to have a working knowledge of cloud resources or the willingness to reference official documentation. Mind your resource usage to manage costs. -![Mosaic Image](https://codebox.net/assets/images/mosaic/mosaic_small.jpg) -Original +Set up the following environment in AWS: +* An **S3 Bucket** +* An **EC2 Instance** +* An **EBS Volume** of a reasonable size relative to your asset library +* An **IAM Role/Instance Profile** allowing the EC2 instance to read/write to the S3 bucket -[![Mosaic Image Detail](https://codebox.net/assets/images/mosaic/mosaic_detail.jpg)](https://codebox.net/assets/images/mosaic/mosaic_large.jpg) -Mosaic Detail (click through for [full mosaic](https://codebox.net/assets/images/mosaic/mosaic_large.jpg) ~15MB) +### S3 Bucket Configuration +The S3 bucket serves as your centralized storage, allowing you to easily stage raw inputs and retrieve completed mosaics. Create the following four folders: +* `tile-videos/` – Source videos to be chopped into mosaic tiles. +* `tile-images/` – Base images to be processed into mosaic tiles. +* `source-images/` – Target images you want transformed into mosaics. +* `result-mosaics/` – The destination folder where the EC2 instance drops completed outputs. -Producing large, highly detailed mosaics can take some time - you should experiment with the various [configuration parameters](https://github.com/codebox/mosaic/blob/master/mosaic.py#L6) explained in the source code to find the right balance between image quality and render time. +### EC2 and EBS Configuration +Using an EC2 instance optimized for compute and memory delivers the best performance, depending on your tile dataset size. +* **4 vCPUs / 32 GB RAM** safely handles up to ~100k tiles without bottlenecking memory. +* **8 vCPUs / 64 GB RAM** is recommended for datasets scaling past 200k tiles. -In particular the [TILE_MATCH_RES](https://github.com/codebox/mosaic/blob/master/mosaic.py#L8) parameter can have a big impact on both these factors - its value determines how closely the program examines each tile when trying to find the best fit for a particular segment of the image. Setting TILE_MATCH_RES to '1' simply finds the average colour of each tile, and picks the one that most closely matches the average colour of the image segment. As the value is increased, the tile is examined in more detail. Setting TILE_MATCH_RES to equal TILE_SIZE will cause the utility to examine each pixel in the tile individually, producing the best possible match (during my testing I didn't find a very noticeable improvement beyond a value of 5, but YMMV). +The EC2 instance needs Python 3, `pip`, and public internet access to clone the project. Your attached EBS volume requires sufficient I/O performance depending on project scope. -By default the utility will configure itself to use all available CPUs/CPU-cores on the host system, if you want to leave some processing power spare for other tasks then adjust the [WORKER_COUNT](https://github.com/codebox/mosaic/blob/master/mosaic.py#L12) parameter accordingly. +#### Environment Setup +Clone this repository, ensure your storage volume is mounted, and install dependencies: + +```bash +pip3 install -r requirements.txt +``` + +*(Note: Some OpenCV dependencies may take a moment to compile and install.* + +The automation scripts assume an active directory path exists at /mnt/ebs/. Ensure this directory is created on your mounted volume and has read/write permissions enabled. + +## Using The Tools +The tools directory contains several utility scripts. Remote into your EC2 instance via SSH and use the --help flag to inspect argument configurations: +```bash +python3 tools/splicer --help +``` + +## Tools Summary +- **advanced_parse.py:** An alternate method for turning videos into tile components. It captures several cropped square screenshots per frame. Use with caution due to the high volume of files generated. + +- **s3_access.py:** A support file containing classes and helper methods to authenticate and connect to Amazon S3. + +- **load_data_from_s3.py:** Pulls processing assets down from your S3 bucket to the local EBS drive and pushes completed renders back to S3. + +- **splicer.py:** Slices raw video files into individual frames, then processes rectangular image frames into cropped square tiles. If your inputs are raw videos, you will need to execute this script for both steps. + +## Making Mosaics +To use the refactored implementation of the original pixel-matching Codebox codebase, run: +```bash +python mosaic.py -file /path/to/source.jpeg -tiles /path/to/tiles_folder +``` +To run the OpenCV implementation with KD-Tree optimizations, execute: +```bash +python mosaic-v2.py -file /path/to/source.jpeg -tiles /path/to/tiles_folder +``` + +A third alternative, `mosaic-nokd.py`, is available for comparative testing but features the longest execution time. + +# Version Differences +`mosaic.py` + +Functionally identical to the original Codebox engine. The CLI interface has been modified slightly for consistency, but it matches tiles to the target source image using the exact same absolute pixel-difference calculation loops. + +- *Matching Behavior:* To find the optimal match for any given block, it scans every single tile in the target directory. + +`mosaic-v2.py` + +Matches tiles to the source image using an OpenCV computer vision approach alongside a KD-Tree to index and cluster your tile assets by their average color. + +- *Matching Behavior:* When analyzing a block, it queries the KD-Tree to scan only a localized subset of optimal candidate tiles instead of the whole directory, drastically increasing rendering speeds. + +- *Tile Cool-down/Randomization:* Includes a reuse penalty feature to prevent identical tiles from stacking right next to each other. The -penalty option defaults to 0.2. Higher values discourage immediate tile reuse; setting -penalty 0.0 disables randomization completely. + +- *Naming Options:* Features customizable output naming flags. Run with --help to view suffix and export formatting options. + +`mosaic-nokd.py` + +Maintains the OpenCV processing logic found in v2, but strips away the KD-Tree color-grouping index. + +- *Matching Behavior:* Because it skips the KD-Tree index and uses OpenCV, every single tile is scanned sequentially for every block. This is universally the slowest processing option of the three. + +# Final Thoughts + + - Running this workflow in the cloud is highly recommended due to high CPU/RAM utilization, though high-spec local workstations work perfectly as well. + + - Experiment with the -penalty float parameters and alternate script versions to find the structural and visual balance you prefer. + + - When using splicer.py configured to sample one frame per second, an average feature-length film yields roughly 26k–32k tiles. + + - Scale Warning: The larger your asset library grows, the more memory the program requires to map out the lookup array or KD-Tree structures. \ No newline at end of file