Skip to content

Quickstart

Deploy a scoring server, stream a HuggingFace dataset, and submit data on-chain — all from a single cloneable project.

git clone https://github.com/soma-org/quickstart.git
cd quickstart
uv sync
uv run modal setup

This installs all dependencies and authenticates with Modal. You’ll get $5 in free credits, and $30 if you update your billing. The project structure:

src/quickstart/
├── scoring_server.py # Modal app — GPU scoring server
├── huggingface.py # Modal app — HF dataset pipeline
├── scoring_client.py # CLI client — fetches targets, scores data
├── train_torch.py # Modal app — PyTorch training
└── train_flax.py # Modal app — Flax/JAX training
uv run modal serve src/quickstart/scoring_server.py

Modal prints a URL for the scoring endpoint — copy it for the next step.

The scoring server runs the SOMA binary on an L4 GPU and exposes it as an HTTP endpoint. Here’s how it works:

The Modal image installs CUDA, the SOMA binary, and the Python SDK:

image = (
modal.Image.from_registry(
"nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.13"
)
.apt_install("curl")
.env({"PATH": "/root/.local/bin:/usr/local/bin:/usr/bin:/bin", "RUST_LOG": "warn"})
.run_commands(
"curl -sSfL https://sup.soma.org | sh",
"sup install soma",
)
.pip_install("soma-sdk>=0.1.7", "fastapi[standard]")
)

When the container starts, @modal.enter() launches the scoring service and waits for it to be healthy:

@app.cls(image=image, gpu="L4", scaledown_window=300, timeout=900, volumes={"/data": volume})
class Scorer:
@modal.enter()
def start_soma(self):
self.proc = subprocess.Popen(
["soma", "start", "scoring", "--device", "cuda", "--data-dir", "/data"],
stdout=sys.stdout,
stderr=sys.stderr,
)
# ... waits up to 30s for scoring_health() to return True

Incoming requests hit a FastAPI endpoint that forwards to the local scoring service:

@modal.fastapi_endpoint(method="POST")
async def score(self, request: dict):
client = await SomaClient(
chain="testnet",
scoring_url=f"http://localhost:{SCORING_PORT}",
)
models = [SimpleNamespace(**m) for m in request["models"]]
result = await client.score(
data_url=request["data_url"],
models=models,
target_embedding=request["target_embedding"],
data_checksum=request["data_checksum"],
data_size=request["data_size"],
)
return {
"winner": result.winner,
"loss_score": result.loss_score,
"embedding": result.embedding,
"distance": result.distance,
}

Before submitting data, your SOMA keypair needs to be available to the pipeline.

  1. Export your secret key:

    soma keytool export --key-identity my-wallet
  2. Add it to the project’s .env file:

    cp .env.example .env

    Edit .env and set SOMA_SECRET_KEY to your exported key.

  3. Store it as a Modal Secret (required for cloud functions):

    modal secret create soma-keypair SOMA_SECRET_KEY=<your-secret-key>
uv run modal run src/quickstart/huggingface.py

This streams HuggingFace datasets, filters for candidates likely to score well, scores them against your scoring server, and submits the winner on-chain.

The pipeline is a funnel: stream thousands of text chunks, filter cheaply with a small embedding model, then score only the top candidates with the full SOMA scoring server.

Stream a dataset, embed each chunk with a small model (MiniLM, 384-dim), and return the top-k closest to the target embedding.

@app.function(image=image, gpu="T4", volumes={"/cache": volume}, timeout=1800)
def find_candidates(
dataset_name: str,
dataset_config: str,
target_embedding: list[float],
num_samples: int = 5000,
top_k: int = 20,
) -> list[dict]:
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer(
"sentence-transformers/all-MiniLM-L6-v2",
cache_folder="/cache/models",
)
ds = load_dataset(
dataset_name, dataset_config,
split="train", streaming=True,
)
texts = []
for i, example in enumerate(ds):
if i >= num_samples:
break
text = example.get("text", "")
if len(text) > 200:
texts.append(text)
embeddings = model.encode(
[t[:8192] for t in texts],
normalize_embeddings=True,
batch_size=64,
)
# Heuristic pre-filter: MiniLM is 384-dim, SOMA targets are 2048-dim
target = np.array(target_embedding[:384])
target = target / np.linalg.norm(target)
distances = 1 - embeddings @ target
indices = np.argsort(distances)[:top_k]
return [
{"text": texts[i], "distance": float(distances[i])}
for i in indices
]

Score the filtered candidates against the real SOMA models and submit the best one.

@app.function(
image=image,
secrets=[modal.Secret.from_name("soma-keypair")],
timeout=300,
)
async def score_and_submit(
candidates: list[dict],
target_id: str,
scoring_url: str,
):
import os
from soma_sdk import SomaClient, Keypair
kp = Keypair.from_secret_key(os.environ["SOMA_SECRET_KEY"])
client = await SomaClient(chain="testnet", scoring_url=scoring_url)
targets = await client.get_targets(status="open")
target = next(t for t in targets if t.id == target_id)
manifests = await client.get_model_manifests(target)
best_score = None
best_candidate = None
for candidate in candidates:
data = candidate["text"].encode("utf-8")
result = await client.score(
data_url="",
models=manifests,
target_embedding=target.embedding,
data=data,
seed=0,
)
distance = result.distance[result.winner]
if best_score is None or distance < best_score:
best_score = distance
best_candidate = (candidate, result, data)
if best_candidate and best_score <= target.distance_threshold:
candidate, result, data = best_candidate
await client.submit_data(
kp,
target_id,
data,
"",
target.model_ids[result.winner],
result.embedding,
result.distance[result.winner],
)
print(f"Submitted! Distance: {best_score:.6f}")
else:
print(f"No candidate within threshold (best: {best_score})")

The entry point picks datasets, fans out find_candidates in parallel using .starmap(), and submits the best result.

@app.local_entrypoint()
async def main():
from soma_sdk import SomaClient
client = await SomaClient(chain="testnet")
targets = await client.get_targets(status="open", limit=1)
target = targets[0]
print(f"Target: {target.id}, threshold: {target.distance_threshold}")
datasets_to_try = [
("HuggingFaceFW/fineweb", "sample-10BT"),
("wikimedia/wikipedia", "20231101.en"),
("bigcode/starcoderdata", "default"),
]
all_candidates = []
for result in find_candidates.starmap(
[(name, config, target.embedding) for name, config in datasets_to_try]
):
all_candidates.extend(result)
all_candidates.sort(key=lambda c: c["distance"])
best = all_candidates[:10]
print(f"Top 10 candidates from {len(all_candidates)} total")
await score_and_submit.remote(
best,
target.id,
scoring_url="https://your--soma-scoring-scorer-score.modal.run",
)

To keep the scoring server running without a local modal serve process:

uv run modal deploy src/quickstart/scoring_server.py

The server scales to zero when idle — no requests means no cost (~$0.80/hr when active on an L4 GPU).