Skip to content

Image Search with Localisation

Getting Started

  1. Clone the examples repository.
  2. Run Marqo:

    docker rm -f marqo
    docker pull marqoai/marqo:latest
    docker run --name marqo -it --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest
    
    For mode detailed instructions, check the getting started guide.

  3. Follow the instructions below or see the original code and article here and here.

Code

index_all_data.py
#####################################################
### STEP 0. Import and define any helper functions
#####################################################

from marqo import Client
import os
import pandas as pd 
from utils import download_data

#####################################################
### STEP 1. start Marqo
#####################################################

# Follow the instructions here https://github.com/marqo-ai/marqo

#####################################################
### STEP 2. Get the data for indexing
#####################################################


# this will pull directly from the s3 bucket if True, otherwise it will pull for local indexing
use_remote = False
in_docker = True

data = pd.read_csv('files.csv', index_col=0)
docker_path = 'http://host.docker.internal:8222/'
local_dir = os.getcwd() + '/images/'

locators = download_data(data=data, download_dir=local_dir, use_remote=use_remote, in_docker=in_docker, docker_path=docker_path)

documents = [{"image_location":s3_uri, '_id':os.path.basename(s3_uri)} for s3_uri in locators]

# if you have the images locally, see the instructions 
# here https://marqo.pages.dev/Advanced-Usage/images/ for the best ways to index 


#####################################################
### STEP 3. Create the index(s)
######################################################

client = Client()

# setup the settings so we can comapre the different methods
index_name_prefix = "visual-search"
patch_methods = ["dino/v1", None, "yolox"] #["dino/v1", "dino/v2", "frcnn", None, "yolox"]
model_name = "ViT-B/32"
n_processes = 3
batch_size = 50

# set this to false if you do not want to delete the previous index of the same name
delete_index = True

settings = {
    "index_defaults": {
        "treat_urls_and_pointers_as_images": True,
        "image_preprocessing": {
            "patch_method": None
        },
        "model":None,
        "normalize_embeddings":True,
    },
}

for patch_method in patch_methods:

    suffix = '' if patch_method is None else f"-{patch_method.replace('/','-')}"
    index_name = index_name_prefix + suffix

    # update the settings we want to use
    settings['index_defaults']['model'] = model_name
    settings['index_defaults']['image_preprocessing']['patch_method'] = patch_method

    # optionally delete the index if it exists
    if delete_index:
        try:
            client.delete_index(index_name)
        except:
            print("index does not exist, cannot delete")

    # create the index with our settings
    response = client.create_index(index_name, settings_dict=settings)


    response = client.index(index_name).add_documents(documents, device='cuda', 
                                server_batch_size=batch_size, processes=n_processes)