Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions comfy/image_encoders/dino3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import torch.nn as nn
import torch.nn.functional as F

import comfy.ops
from comfy.ldm.modules.attention import optimized_attention_for_device
from comfy.image_encoders.dino2 import LayerScale as DINOv3ViTLayerScale

Expand Down Expand Up @@ -171,11 +172,11 @@ def forward(self, pixel_values, bool_masked_pos=None):
patch_embeddings = patch_embeddings.flatten(2).transpose(1, 2)

if bool_masked_pos is not None:
mask_token = self.mask_token.to(patch_embeddings.dtype)
mask_token = comfy.ops.cast_to_input(self.mask_token, patch_embeddings)
patch_embeddings = torch.where(bool_masked_pos.unsqueeze(-1), mask_token, patch_embeddings)

cls_token = self.cls_token.expand(batch_size, -1, -1).to(patch_embeddings.device)
register_tokens = self.register_tokens.expand(batch_size, -1, -1).to(patch_embeddings.device)
cls_token = comfy.ops.cast_to_input(self.cls_token.expand(batch_size, -1, -1), patch_embeddings)
register_tokens = comfy.ops.cast_to_input(self.register_tokens.expand(batch_size, -1, -1), patch_embeddings)
embeddings = torch.cat([cls_token, register_tokens, patch_embeddings], dim=1)
return embeddings

Expand Down
8 changes: 8 additions & 0 deletions comfy/ldm/chroma_radiance/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class ChromaRadianceParams(ChromaParams):
# None means use the same dtype as the model.
nerf_embedder_dtype: Optional[torch.dtype]
use_x0: bool
# Use sequential txt_ids instead of zeros
use_sequential_txt_ids: bool

class ChromaRadiance(Chroma):
"""
Expand Down Expand Up @@ -162,6 +164,9 @@ def __init__(self, image_model=None, final_layer=True, dtype=None, device=None,
if params.use_x0:
self.register_buffer("__x0__", torch.tensor([]))

if params.use_sequential_txt_ids:
self.register_buffer("__sequential__", torch.tensor([]))

@property
def _nerf_final_layer(self) -> nn.Module:
if self.params.nerf_final_head_type == "linear":
Expand Down Expand Up @@ -313,6 +318,9 @@ def _forward(
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
# Radiance after 2026-05-22 uses sequential txt_ids instead of zeros
if params.use_sequential_txt_ids:
txt_ids[:, :, 0] = torch.arange(context.shape[1], device=x.device, dtype=x.dtype).unsqueeze(0).expand(bs, -1)

img_out = self.forward_orig(
img,
Expand Down
4 changes: 4 additions & 0 deletions comfy/model_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["use_x0"] = True
else:
dit_config["use_x0"] = False
if "{}__sequential__".format(key_prefix) in state_dict_keys: # sequential txt_ids
dit_config["use_sequential_txt_ids"] = True
else:
dit_config["use_sequential_txt_ids"] = False
else:
dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys
Expand Down
2 changes: 1 addition & 1 deletion comfy_api_nodes/nodes_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ClaudeNode",
display_name="Anthropic Claude",
category="text/partner/Anthropic",
category="partner/text/Anthropic",
essentials_category="Text Generation",
description="Generate text responses with Anthropic's Claude models. "
"Provide a text prompt and optionally one or more images for multimodal context.",
Expand Down
4 changes: 2 additions & 2 deletions comfy_api_nodes/nodes_beeble.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="BeebleSwitchXVideoEdit",
display_name="Beeble SwitchX Video Edit",
category="video/partner/Beeble",
category="partner/video/Beeble",
description=(
"Edit a video with Beeble SwitchX. Switches anything in the scene (background, "
"lighting, costume) while preserving the original subject's pixels and motion. "
Expand Down Expand Up @@ -302,7 +302,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="BeebleSwitchXImageEdit",
display_name="Beeble SwitchX Image Edit",
category="image/partner/Beeble",
category="partner/image/Beeble",
description=(
"Edit a single image with Beeble SwitchX. Switches anything in the scene "
"(background, lighting, costume) while preserving the original subject's pixels. "
Expand Down
16 changes: 8 additions & 8 deletions comfy_api_nodes/nodes_bfl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProUltraImageNode",
display_name="Flux 1.1 [pro] Ultra Image",
category="image/partner/BFL",
category="partner/image/BFL",
description="Generates images using Flux Pro 1.1 Ultra via api based on prompt and resolution.",
inputs=[
IO.String.Input(
Expand Down Expand Up @@ -155,7 +155,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id=cls.NODE_ID,
display_name=cls.DISPLAY_NAME,
category="image/partner/BFL",
category="partner/image/BFL",
description="Edits images using Flux.1 Kontext [pro] via api based on prompt and aspect ratio.",
inputs=[
IO.String.Input(
Expand Down Expand Up @@ -277,7 +277,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProExpandNode",
display_name="Flux.1 Expand Image",
category="image/partner/BFL",
category="partner/image/BFL",
description="Outpaints image based on prompt.",
inputs=[
IO.Image.Input("image"),
Expand Down Expand Up @@ -414,7 +414,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxProFillNode",
display_name="Flux.1 Fill Image",
category="image/partner/BFL",
category="partner/image/BFL",
description="Inpaints image based on mask and prompt.",
inputs=[
IO.Image.Input("image"),
Expand Down Expand Up @@ -521,7 +521,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxEraseNode",
display_name="Flux Erase Image",
category="image/partner/BFL",
category="partner/image/BFL",
description="Removes the masked object from an image and reconstructs the background. "
"Paint the mask over what you want to erase.",
inputs=[
Expand Down Expand Up @@ -597,7 +597,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="FluxVTONode",
display_name="Flux Virtual Try-On",
category="image/partner/BFL",
category="partner/image/BFL",
description="Virtual try-on: dresses the person in the provided garment.",
inputs=[
IO.Image.Input("person", tooltip="Image of the person to dress."),
Expand Down Expand Up @@ -697,7 +697,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id=cls.NODE_ID,
display_name=cls.DISPLAY_NAME,
category="image/partner/BFL",
category="partner/image/BFL",
description="Generates images synchronously based on prompt and resolution.",
inputs=[
IO.String.Input(
Expand Down Expand Up @@ -868,7 +868,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="Flux2ImageNode",
display_name="Flux.2 Image",
category="image/partner/BFL",
category="partner/image/BFL",
description="Generate images via Flux.2 [pro] or Flux.2 [max] from a prompt and optional reference images.",
inputs=[
IO.String.Input(
Expand Down
6 changes: 3 additions & 3 deletions comfy_api_nodes/nodes_bria.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def define_schema(cls):
return IO.Schema(
node_id="BriaImageEditNode",
display_name="Bria FIBO Image Edit",
category="image/partner/Bria",
category="partner/image/Bria",
description="Edit images using Bria latest model",
inputs=[
IO.Combo.Input("model", options=["FIBO"]),
Expand Down Expand Up @@ -169,7 +169,7 @@ def define_schema(cls):
return IO.Schema(
node_id="BriaRemoveImageBackground",
display_name="Bria Remove Image Background",
category="image/partner/Bria",
category="partner/image/Bria",
description="Remove the background from an image using Bria RMBG 2.0.",
inputs=[
IO.Image.Input("image"),
Expand Down Expand Up @@ -245,7 +245,7 @@ def define_schema(cls):
return IO.Schema(
node_id="BriaRemoveVideoBackground",
display_name="Bria Remove Video Background",
category="video/partner/Bria",
category="partner/video/Bria",
description="Remove the background from a video using Bria. ",
inputs=[
IO.Video.Input("video"),
Expand Down
24 changes: 12 additions & 12 deletions comfy_api_nodes/nodes_bytedance.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceImageNode",
display_name="ByteDance Image",
category="image/partner/ByteDance",
category="partner/image/ByteDance",
description="Generate images using ByteDance models via api based on prompt",
inputs=[
IO.Combo.Input("model", options=["seedream-3-0-t2i-250415"]),
Expand Down Expand Up @@ -492,7 +492,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceSeedreamNode",
display_name="ByteDance Seedream 4.5 & 5.0",
category="image/partner/ByteDance",
category="partner/image/ByteDance",
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
inputs=[
IO.Combo.Input(
Expand Down Expand Up @@ -754,7 +754,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceSeedreamNodeV2",
display_name="ByteDance Seedream 4.5 & 5.0",
category="image/partner/ByteDance",
category="partner/image/ByteDance",
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
inputs=[
IO.String.Input(
Expand Down Expand Up @@ -920,7 +920,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceTextToVideoNode",
display_name="ByteDance Text to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate video using ByteDance models via api based on prompt",
inputs=[
IO.Combo.Input(
Expand Down Expand Up @@ -1048,7 +1048,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceImageToVideoNode",
display_name="ByteDance Image to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate video using ByteDance models via api based on image and prompt",
inputs=[
IO.Combo.Input(
Expand Down Expand Up @@ -1185,7 +1185,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceFirstLastFrameNode",
display_name="ByteDance First-Last-Frame to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate video using prompt and first and last frames.",
inputs=[
IO.Combo.Input(
Expand Down Expand Up @@ -1333,7 +1333,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceImageReferenceNode",
display_name="ByteDance Reference Images to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate video using prompt and reference images.",
inputs=[
IO.Combo.Input(
Expand Down Expand Up @@ -1576,7 +1576,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDance2TextToVideoNode",
display_name="ByteDance Seedance 2.0 Text to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate video using Seedance 2.0 models based on a text prompt.",
inputs=[
IO.DynamicCombo.Input(
Expand Down Expand Up @@ -1677,7 +1677,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDance2FirstLastFrameNode",
display_name="ByteDance Seedance 2.0 First-Last-Frame to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate video using Seedance 2.0 from a first frame image and optional last frame image.",
inputs=[
IO.DynamicCombo.Input(
Expand Down Expand Up @@ -1944,7 +1944,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDance2ReferenceNode",
display_name="ByteDance Seedance 2.0 Reference to Video",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description="Generate, edit, or extend video using Seedance 2.0 with reference images, "
"videos, and audio. Supports multimodal reference, video editing, and video extension.",
inputs=[
Expand Down Expand Up @@ -2241,7 +2241,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ByteDanceCreateImageAsset",
display_name="ByteDance Create Image Asset",
category="image/partner/ByteDance",
category="partner/image/ByteDance",
description=(
"Create a Seedance 2.0 personal image asset. Uploads the input image and "
"registers it in the given asset group. If group_id is empty, runs a real-person "
Expand Down Expand Up @@ -2308,7 +2308,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ByteDanceCreateVideoAsset",
display_name="ByteDance Create Video Asset",
category="video/partner/ByteDance",
category="partner/video/ByteDance",
description=(
"Create a Seedance 2.0 personal video asset. Uploads the input video and "
"registers it in the given asset group. If group_id is empty, runs a real-person "
Expand Down
2 changes: 1 addition & 1 deletion comfy_api_nodes/nodes_bytedance_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def define_schema(cls):
return IO.Schema(
node_id="ByteDanceSeedNode",
display_name="ByteDance Seed",
category="text/partner/ByteDance",
category="partner/text/ByteDance",
essentials_category="Text Generation",
description="Generate text responses with ByteDance's Seed 2.0 models. "
"Provide a text prompt and optionally one or more images or videos for multimodal context.",
Expand Down
16 changes: 8 additions & 8 deletions comfy_api_nodes/nodes_elevenlabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsSpeechToText",
display_name="ElevenLabs Speech to Text",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Transcribe audio to text. "
"Supports automatic language detection, speaker diarization, and audio event tagging.",
inputs=[
Expand Down Expand Up @@ -210,7 +210,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsVoiceSelector",
display_name="ElevenLabs Voice Selector",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Select a predefined ElevenLabs voice for text-to-speech generation.",
inputs=[
IO.Combo.Input(
Expand Down Expand Up @@ -239,7 +239,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsTextToSpeech",
display_name="ElevenLabs Text to Speech",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Convert text to speech.",
inputs=[
IO.Custom(ELEVENLABS_VOICE).Input(
Expand Down Expand Up @@ -414,7 +414,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsAudioIsolation",
display_name="ElevenLabs Voice Isolation",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Remove background noise from audio, isolating vocals or speech.",
inputs=[
IO.Audio.Input(
Expand Down Expand Up @@ -459,7 +459,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsTextToSoundEffects",
display_name="ElevenLabs Text to Sound Effects",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Generate sound effects from text descriptions.",
inputs=[
IO.String.Input(
Expand Down Expand Up @@ -555,7 +555,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsInstantVoiceClone",
display_name="ElevenLabs Instant Voice Clone",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Create a cloned voice from audio samples. "
"Provide 1-8 audio recordings of the voice to clone.",
inputs=[
Expand Down Expand Up @@ -658,7 +658,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsSpeechToSpeech",
display_name="ElevenLabs Speech to Speech",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Transform speech from one voice to another while preserving the original content and emotion.",
inputs=[
IO.Custom(ELEVENLABS_VOICE).Input(
Expand Down Expand Up @@ -793,7 +793,7 @@ def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="ElevenLabsTextToDialogue",
display_name="ElevenLabs Text to Dialogue",
category="audio/partner/ElevenLabs",
category="partner/audio/ElevenLabs",
description="Generate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.",
inputs=[
IO.Float.Input(
Expand Down
Loading
Loading