Midv-195 4k -

This example:

Save as train_embeddings.py and run.

import os, random, math
from glob import glob
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models
import torch.nn.functional as F
from tqdm import tqdm
# Simple dataset: expects folders per ID (if available) or flat folder.
class ImageFolderDataset(Dataset):
    def __init__(self, root, size=256, augment=False):
        self.paths = []
        self.labels = []
        classes = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root,d))])
        if len(classes)==0:
            # flat folder
            self.paths = sorted(glob(os.path.join(root,"*.jpg"))+glob(os.path.join(root,"*.png")))
            self.labels = [0]*len(self.paths)
        else:
            for idx,c in enumerate(classes):
                files = glob(os.path.join(root,c,"*.jpg"))+glob(os.path.join(root,c,"*.png"))
                for f in files:
                    self.paths.append(f); self.labels.append(idx)
        self.size = size
        self.augment = augment
        self.base_tr = T.Compose([
            T.Resize((size,size)),
            T.ToTensor(),
            T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
        ])
        self.aug_tr = T.Compose([
            T.RandomResizedCrop(size, scale=(0.7,1.0)),
            T.RandomHorizontalFlip(),
            T.ColorJitter(0.2,0.2,0.2,0.05),
            T.RandomApply([T.GaussianBlur(3)], p=0.2),
            T.ToTensor(),
            T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
        ])
    def __len__(self): return len(self.paths)
    def __getitem__(self, i):
        img = Image.open(self.paths[i]).convert('RGB')
        if self.augment:
            x1 = self.aug_tr(img)
            x2 = self.aug_tr(img)
            return x1, x2, self.labels[i]
        else:
            return self.base_tr(img), self.labels[i]
# Model: ResNet-50 backbone + MLP projection to 512
class EmbedNet(nn.Module):
    def __init__(self, out_dim=512, backbone='resnet50', pretrained=True):
        super().__init__()
        if backbone=='resnet50':
            net = models.resnet50(pretrained=pretrained)
            dims = net.fc.in_features
            modules = list(net.children())[:-1]  # remove fc
            self.backbone = nn.Sequential(*modules)
        else:
            raise ValueError("only resnet50 in this snippet")
        self.head = nn.Sequential(
            nn.Linear(dims, 2048),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(2048),
            nn.Linear(2048, out_dim)
        )
    def forward(self, x):
        x = self.backbone(x)  # B x C x 1 x 1
        x = x.view(x.size(0), -1)
        x = self.head(x)
        x = F.normalize(x, p=2, dim=1)
        return x
# NT-Xent loss (contrastive with temperature)
def nt_xent_loss(z1, z2, temperature=0.1):
    z = torch.cat([z1, z2], dim=0)  # 2N x D
    sim = torch.matmul(z, z.T)  # 2N x 2N
    sim = sim / temperature
    N = z1.size(0)
    labels = torch.arange(N, device=z.device)
    labels = torch.cat([labels + N, labels], dim=0)
    # mask out self-similarity
    mask = (~torch.eye(2*N, dtype=torch.bool, device=z.device)).float()
    exp_sim = torch.exp(sim) * mask
    denom = exp_sim.sum(dim=1)
    pos_sim = torch.exp(torch.sum(z1*z2, dim=1)/temperature)
    pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
    loss = -torch.log(pos_sim / denom)
    return loss.mean()
def train(root, epochs=20, bs=64, lr=1e-4, size=256, device='cuda'):
    ds = ImageFolderDataset(root, size=size, augment=True)
    dl = DataLoader(ds, batch_size=bs, shuffle=True, num_workers=8, drop_last=True)
    model = EmbedNet(out_dim=512).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scaler = torch.cuda.amp.GradScaler()
    for ep in range(epochs):
        model.train()
        pbar = tqdm(dl, desc=f"Epoch ep+1/epochs")
        for x1,x2,_lbl in pbar:
            x1 = x1.to(device); x2 = x2.to(device)
            with torch.cuda.amp.autocast():
                z1 = model(x1); z2 = model(x2)
                loss = nt_xent_loss(z1, z2, temperature=0.1)
            opt.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            pbar.set_postfix(loss=loss.item())
    return model
# Embedding extraction utility
def extract_embeddings(model, folder, size=256, device='cuda'):
    tr = T.Compose([T.Resize((size,size)), T.ToTensor(),
                    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
    paths = sorted(glob(os.path.join(folder,"**","*.jpg"), recursive=True)+glob(os.path.join(folder,"**","*.png"), recursive=True))
    embs = []
    model.eval()
    with torch.no_grad():
        for p in tqdm(paths):
            img = Image.open(p).convert('RGB')
            x = tr(img).unsqueeze(0).to(device)
            z = model(x).cpu().numpy()[0]
            embs.append((p,z))
    return embs
if __name__=='__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', required=True, help='root image folder')
    parser.add_argument('--mode', choices=['train','embed'], default='train')
    parser.add_argument('--out', default='model.pth')
    args = parser.parse_args()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if args.mode=='train':
        m = train(args.data, epochs=20, bs=64, device=device)
        torch.save(m.state_dict(), args.out)
    else:
        m = EmbedNet().to(device)
        m.load_state_dict(torch.load(args.out, map_location=device))
        embs = extract_embeddings(m, args.data, device=device)
        # simple save
        import pickle
        with open('embeddings.pkl','wb') as f:
            pickle.dump(embs, f)
        print("Saved embeddings.pkl")

| Feature | MIDV‑195 4K | |---------|-------------| | Sensor | 35 mm full‑frame back‑illuminated CMOS, 24.5 MP (effective) | | Dynamic Range | 16+ stops (measured with DSC Labs) | | Resolution & Frame Rates | 4K (4096×2160) @ 24/25/30/48/50/60/120 fps; 2K @ up to 240 fps | | Bit Depth | 10‑bit 4:2:2 internally; 12‑bit 4:2:2 in ProRes RAW; 16‑bit RAW via HDMI | | Codec Options | ProRes 422 HQ/LT, DNxHR HQX, Apple ProRes RAW, Blackmagic RAW (via external recorder) | | ISO Range | Native 100‑25,600 (extendable to 50‑102,400) | | Shutter | Electronic rolling shutter with global‑shutter mode (up to 1/8000 s) | | Autofocus | Dual‑pixel PDAF with 5,000‑point coverage, face/eye detection, continuous tracking | | Stabilization | 5‑axis in‑body (up to 6 EV) + optional electronic stabilization | | Viewfinder/Display | 0.5‑inch OLED EVF (3.69 M dots) + 3.2‑inch touchscreen LCD (4.5 M dots) | | Connectivity | 12‑G‑SDI, HDMI 2.1 (12‑bit RAW), USB‑C 3.2, 2× 3.5 mm audio, Wi‑Fi 6, Bluetooth 5.2 | | Storage | Dual CFast 2.0 slots + one SD UHS‑III slot (flexible recording) | | Battery | 1× BP‑X300 (12 Wh) – up to 130 min continuous 4K/60p recording; optional V‑Mount plate | | Dimensions / Weight | 115 mm × 80 mm × 80 mm; 0.95 kg (body only) | | Ruggedness | IP68 dust & water‑proof, MIL‑STD‑810G shock‑tested |


The HD‑SDI output (12 G‑SDI) makes the MIDV‑195 perfect for live‑streaming setups. Paired with an ATEM Mini Pro ISO, producers can switch between two cameras while simultaneously recording each source as separate ProRes files for later editing. MIDV-195 4K


The built‑in recording engine supports ProRes RAW (Apple) and Blackmagic RAW via an optional firmware patch, giving you a single‑card workflow for most projects. Dual CFast 2.0 slots enable relay recording, ensuring uninterrupted capture even during long takes.

Example workflow:

Three native log curves are available:

| Curve | Bit Depth | Intended Use | |-------|-----------|--------------| | MIDV‑Log | 12‑bit | General cinematic work (Rec. 2020) | | V‑Log2 | 16‑bit (RAW) | High‑end post‑production, HDR | | Flat‑HD | 10‑bit | Quick‑turn productions where speed trumps grading |

All three can be output simultaneously via HDMI, making external recorders a perfect companion for high‑end pipelines.


| Performer | Role | Highlights | |-----------|------|------------| | Lead Male | Executive (Yamato) | Delivers a compelling mix of authority and vulnerability; his subtle facial work conveys the character’s inner turmoil. | | Lead Female | Executive’s partner (Aki) | Strong screen presence; her performance balances sensuality with an undercurrent of strategic calculation. | | Supporting Cast | Colleagues & antagonists | Provide solid grounding for the corporate environment; the antagonist’s cold demeanor heightens the stakes. | This example:

Overall, the chemistry between the leads feels genuine, which is crucial for sustaining audience investment throughout the more intimate sequences.


| Element | Assessment | |---------|------------| | Premise | The story centers on a high‑stakes corporate power play that spirals into an intense personal entanglement. The protagonist (a charismatic executive) becomes drawn into a secretive liaison that threatens both career and personal safety. | | Story Arc | The screenplay follows a classic three‑act structure:
1️⃣ Setup – Introduces the corporate world and the central characters;
2️⃣ Complication – The hidden affair surfaces, creating tension and moral ambiguity;
3️⃣ Resolution – A climactic confrontation that ties the professional and personal threads together. | | Character Development | The lead’s internal conflict is reasonably fleshed out, with subtle hints at past trauma that inform his choices. Supporting characters (the rival, the confidante, and the love interest) are given just enough depth to keep the stakes credible without detracting from the film’s primary focus. | | Dialogue | Crisp, business‑jargon‑laden in the office scenes, shifting to more intimate, emotionally charged exchanges in the private moments. The script avoids gratuitous exposition, allowing the tension to build through subtext. |