Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ _build
deps
.elixir_ls
priv
native/philomena/target
1 change: 1 addition & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ config :philomena,
image_url_root: System.fetch_env!("IMAGE_URL_ROOT"),
badge_url_root: System.fetch_env!("BADGE_URL_ROOT"),
mailer_address: System.fetch_env!("MAILER_ADDRESS"),
mediaproc_addr: System.fetch_env!("MEDIAPROC_ADDR"),
tag_file_root: System.fetch_env!("TAG_FILE_ROOT"),
site_domains: System.fetch_env!("SITE_DOMAINS"),
tag_url_root: System.fetch_env!("TAG_URL_ROOT"),
Expand Down
14 changes: 14 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ services:
- IMAGE_URL_ROOT=/img
- BADGE_URL_ROOT=/badge-img
- TAG_URL_ROOT=/tag-img
- MEDIAPROC_ADDR=mediaproc:1500
- OPENSEARCH_URL=http://opensearch:9200
- REDIS_HOST=valkey
- DATABASE_URL=ecto://postgres:postgres@postgres/philomena_dev
Expand All @@ -52,6 +53,7 @@ services:
- app_deps_data:/srv/philomena/deps
- app_native_data:/srv/philomena/priv/native
depends_on:
- mediaproc
- postgres
- opensearch
- valkey
Expand Down Expand Up @@ -89,6 +91,18 @@ services:
- .:/srv/philomena
attach: false

mediaproc:
build:
context: .
dockerfile: ./docker/mediaproc/Dockerfile
attach: false
deploy:
resources:
limits:
cpus: '4'
memory: 8gb
pids: 8192

web:
build:
context: .
Expand Down
20 changes: 2 additions & 18 deletions docker/app/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,28 +1,12 @@
FROM elixir:1.18.1-alpine

ADD https://api.github.com/repos/philomena-dev/FFmpeg/git/refs/heads/release/6.1 /tmp/ffmpeg_version.json
RUN (echo "https://github.com/philomena-dev/prebuilt-ffmpeg/raw/master"; cat /etc/apk/repositories) > /tmp/repositories \
&& cp /tmp/repositories /etc/apk/repositories \
&& apk update --allow-untrusted \
&& apk add inotify-tools build-base git ffmpeg ffmpeg-dev npm nodejs file-dev libjpeg-turbo-dev libpng-dev gifsicle optipng libjpeg-turbo-utils librsvg rsvg-convert imagemagick postgresql16-client wget rust cargo --allow-untrusted \
RUN apk add inotify-tools build-base git npm nodejs postgresql16-client wget rust cargo \
&& mix local.hex --force \
&& mix local.rebar --force

ADD https://api.github.com/repos/philomena-dev/cli_intensities/git/refs/heads/master /tmp/cli_intensities_version.json
RUN git clone --depth 1 https://github.com/philomena-dev/cli_intensities /tmp/cli_intensities \
&& cd /tmp/cli_intensities \
&& make -j$(nproc) install

ADD https://api.github.com/repos/philomena-dev/mediatools/git/refs/heads/master /tmp/mediatools_version.json
RUN git clone --depth 1 https://github.com/philomena-dev/mediatools /tmp/mediatools \
&& ln -s /usr/lib/librsvg-2.so.2 /usr/lib/librsvg-2.so \
&& cd /tmp/mediatools \
&& make -j$(nproc) install

COPY docker/app/run-development /usr/local/bin/run-development
COPY docker/app/run-test /usr/local/bin/run-test
COPY docker/app/safe-rsvg-convert /usr/local/bin/safe-rsvg-convert
COPY docker/app/purge-cache /usr/local/bin/purge-cache
ENV PATH=$PATH:/root/.cargo/bin
EXPOSE 5173
CMD run-development
CMD ["/usr/local/bin/run-development"]
77 changes: 77 additions & 0 deletions docker/mediaproc/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
FROM rust:1.83-slim

RUN apt update \
&& apt install -y build-essential git libmagic-dev libturbojpeg0-dev libpng-dev \
gifsicle optipng libjpeg-turbo-progs librsvg2-bin librsvg2-dev file imagemagick \
libx264-dev libx265-dev libvpx-dev libdav1d-dev libaom-dev libopus-dev \
libmp3lame-dev libvorbis-dev libwebp-dev libjxl-dev yasm wget

ADD https://api.github.com/repos/philomena-dev/FFmpeg/git/refs/heads/release/7.1 /tmp/ffmpeg_version.json
ADD https://api.github.com/repos/philomena-dev/cli_intensities/git/refs/heads/master /tmp/cli_intensities_version.json
ADD https://api.github.com/repos/philomena-dev/mediatools/git/refs/heads/master /tmp/mediatools_version.json

RUN wget -qO /tmp/FFmpeg.tar.gz https://github.com/philomena-dev/FFmpeg/archive/refs/heads/release/7.1.tar.gz \
&& wget -qO /tmp/cli_intensities.tar.gz https://github.com/philomena-dev/cli_intensities/archive/refs/heads/master.tar.gz \
&& wget -qO /tmp/mediatools.tar.gz https://github.com/philomena-dev/mediatools/archive/refs/heads/master.tar.gz

RUN cd /tmp \
&& tar -xf FFmpeg.tar.gz \
&& tar -xf cli_intensities.tar.gz \
&& tar -xf mediatools.tar.gz \
&& cd /tmp/FFmpeg-release-7.1 \
&& ./configure \
--prefix=/usr \
--disable-everything \
--disable-stripping \
--disable-static \
--disable-ffplay \
--disable-doc \
--disable-htmlpages \
--disable-manpages \
--disable-podpages \
--disable-txtpages \
--disable-protocols \
--enable-shared \
--enable-pic \
--enable-pthreads \
--enable-gpl \
--enable-avfilter \
--enable-bsf=extract_extradata \
--enable-decoder=aac,apng,av1,gif,h264,hevc,jpeg2000,jpegxl,libaom-av1,libdav1d,libvorbis,libvpx_vp8,libvpx_vp9,mp3,mjpeg,opus,png,vorbis,vp8,vp9,webvtt \
--enable-demuxer=apng,gif,image2,image_gif_pipe,image_jpeg_pipe,image_png_pipe,image_webp_pipe,matroska,mjpeg,mjpeg_2000,mov,webm \
--enable-encoder=aac,apng,gif,jpegxl,libmp3lame,libaom-av1,libvorbis,libopus,libvpx_vp8,libvpx_vp9,libx265,libx264,opus,mjpeg,png,vorbis,webvtt \
--enable-filter=concat,palettegen,paletteuse,scale,setpts,setsar,settb,split,trim \
--enable-libaom \
--enable-libjxl \
--enable-libdav1d \
--enable-libopus \
--enable-libmp3lame \
--enable-libvpx \
--enable-libvorbis \
--enable-libx264 \
--enable-libx265 \
--enable-libwebp \
--enable-muxer=apng,image2,gif,matroska,mp4,webp,webm \
--enable-parser=aac,gif,h264,hevc,jpeg2000,jpegxl,mjpeg,opus,png,vorbis,vp8,vp9,webp \
--enable-protocol=concat,data,file,subfile \
&& make -j$(nproc) install \
&& cd /tmp/cli_intensities-master \
&& make -j$(nproc) install \
&& cd /tmp/mediatools-master \
&& make -j$(nproc) install

COPY native/philomena /tmp/philomena
COPY docker/mediaproc/safe-rsvg-convert /usr/bin/safe-rsvg-convert
ADD https://github.com/liamwhite/philomena-ris-inference-toolkit/releases/download/v1.0/dinov2-with-registers-base.pt /usr/share/dinov2-with-registers-base.pt

RUN cd /tmp/philomena \
&& cargo build --release -p mediaproc_server \
&& cp target/release/mediaproc_server /usr/bin/mediaproc_server \
&& find target/release/build -regextype posix-extended -regex '^.*\.so(\.[0-9]+)*$' -exec cp '{}' /usr/lib/ ';'

# Set up unprivileged user account
RUN useradd -ms /bin/bash mediaproc
USER mediaproc
WORKDIR /home/mediaproc
ENV RUST_LOG=trace
CMD ["/usr/bin/mediaproc_server", "0.0.0.0:1500", "/usr/share/dinov2-with-registers-base.pt"]
File renamed without changes.
87 changes: 83 additions & 4 deletions lib/philomena/duplicate_reports.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ defmodule Philomena.DuplicateReports do
alias Ecto.Multi
alias Philomena.Repo

alias PhilomenaMedia.Features
alias PhilomenaQuery.Search
alias Philomena.DuplicateReports.DuplicateReport
alias Philomena.DuplicateReports.SearchQuery
alias Philomena.DuplicateReports.Uploader
Expand All @@ -20,7 +22,7 @@ defmodule Philomena.DuplicateReports do
source = Repo.preload(source, :intensity)

{source.intensity, source.image_aspect_ratio}
|> find_duplicates(dist: 0.2)
|> find_duplicates_by_intensities(dist: 0.2)
|> where([i, _it], i.id != ^source.id)
|> Repo.all()
|> Enum.map(fn target ->
Expand All @@ -30,7 +32,77 @@ defmodule Philomena.DuplicateReports do
end)
end

def find_duplicates({intensities, aspect_ratio}, opts \\ []) do
def find_duplicates_by_features(features = %Features{}, filter, opts \\ []) do
min_score = Keyword.get(opts, :min_score, 0)
limit = Keyword.get(opts, :limit, 25)

# TODO: many issues with efficient filtering using k-NN plugin,
# use post_filter to work around for the time being
#
# https://github.com/opensearch-project/k-NN/issues/2222
# https://github.com/opensearch-project/k-NN/issues/2339
# https://github.com/opensearch-project/k-NN/issues/2347

query = %{
query: %{
nested: %{
path: "vectors",
query: %{
knn: %{
"vectors.f": %{
vector: features.features,
k: 100
}
}
}
}
},
post_filter: filter,
min_score: min_score
}

images =
Image
|> Search.search_definition(query, %{page_size: limit})
|> Search.search_records(preload(Image, [:user, :sources, tags: :aliases]))

images
|> Map.put(:total_entries, min(images.total_entries, limit))
|> Map.put(:total_pages, min(images.total_pages, 1))
end

@doc """
Executes the reverse image search query from parameters.

## Examples

iex> execute_search_query_by_features(%{"image" => ...})
{:ok, [%Image{...}, ....]}

iex> execute_search_query_by_features(%{"image" => ...})
{:error, %Ecto.Changeset{}}

"""
def execute_search_query_by_features(filter, attrs \\ %{}) do
%SearchQuery{}
|> SearchQuery.changeset(attrs)
|> Uploader.analyze_upload(attrs)
|> Ecto.Changeset.apply_action(:create)
|> case do
{:ok, search_query} ->
images =
search_query
|> generate_features()
|> find_duplicates_by_features(filter, limit: search_query.limit)

{:ok, images}

error ->
error
end
end

def find_duplicates_by_intensities({intensities, aspect_ratio}, opts \\ []) do
aspect_dist = Keyword.get(opts, :aspect_dist, 0.05)
limit = Keyword.get(opts, :limit, 10)
dist = Keyword.get(opts, :dist, 0.25)
Expand Down Expand Up @@ -71,7 +143,7 @@ defmodule Philomena.DuplicateReports do
{:error, %Ecto.Changeset{}}

"""
def execute_search_query(attrs \\ %{}) do
def execute_search_query_by_intensities(attrs \\ %{}) do
%SearchQuery{}
|> SearchQuery.changeset(attrs)
|> Uploader.analyze_upload(attrs)
Expand All @@ -85,7 +157,7 @@ defmodule Philomena.DuplicateReports do

images =
{intensities, aspect}
|> find_duplicates(dist: dist, aspect_dist: dist, limit: limit)
|> find_duplicates_by_intensities(dist: dist, aspect_dist: dist, limit: limit)
|> preload([:user, :intensity, [:sources, tags: :aliases]])
|> Repo.paginate(page_size: 50)

Expand All @@ -103,6 +175,13 @@ defmodule Philomena.DuplicateReports do
PhilomenaMedia.Processors.intensities(analysis, file)
end

defp generate_features(search_query) do
analysis = SearchQuery.to_analysis(search_query)
file = search_query.uploaded_image

PhilomenaMedia.Processors.features(analysis, file)
end

@doc """
Returns an `%Ecto.Changeset{}` for tracking search query changes.

Expand Down
91 changes: 91 additions & 0 deletions lib/philomena/image_vectors.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
defmodule Philomena.ImageVectors do
@moduledoc """
The ImageVectors context.
"""

import Ecto.Query, warn: false
alias Philomena.Repo

alias Philomena.ImageVectors.ImageVector

@doc """
Gets a single image_vector.

Raises `Ecto.NoResultsError` if the Image vector does not exist.

## Examples

iex> get_image_vector!(123)
%ImageVector{}

iex> get_image_vector!(456)
** (Ecto.NoResultsError)

"""
def get_image_vector!(id), do: Repo.get!(ImageVector, id)

@doc """
Creates a image_vector.

## Examples

iex> create_image_vector(%{field: value})
{:ok, %ImageVector{}}

iex> create_image_vector(%{field: bad_value})
{:error, %Ecto.Changeset{}}

"""
def create_image_vector(image, attrs \\ %PhilomenaMedia.Features{}) do
%ImageVector{image_id: image.id}
|> ImageVector.changeset(Map.from_struct(attrs))
|> Repo.insert()
end

@doc """
Updates a image_vector.

## Examples

iex> update_image_vector(image_vector, %{field: new_value})
{:ok, %ImageVector{}}

iex> update_image_vector(image_vector, %{field: bad_value})
{:error, %Ecto.Changeset{}}

"""
def update_image_vector(%ImageVector{} = image_vector, attrs) do
image_vector
|> ImageVector.changeset(attrs)
|> Repo.update()
end

@doc """
Deletes a image_vector.

## Examples

iex> delete_image_vector(image_vector)
{:ok, %ImageVector{}}

iex> delete_image_vector(image_vector)
{:error, %Ecto.Changeset{}}

"""
def delete_image_vector(%ImageVector{} = image_vector) do
Repo.delete(image_vector)
end

@doc """
Returns an `%Ecto.Changeset{}` for tracking image_vector changes.

## Examples

iex> change_image_vector(image_vector)
%Ecto.Changeset{data: %ImageVector{}}

"""
def change_image_vector(%ImageVector{} = image_vector, attrs \\ %{}) do
ImageVector.changeset(image_vector, attrs)
end
end
Loading