tensor

sorix.tensor ¶

Device ¶

Device(device)

Represents a computing device in Sorix, matching PyTorch's torch.device.

Source code in sorix/tensor.py

def __init__(self, device: Union[str, Device]):
    if isinstance(device, Device):
        self.type = device.type
        self.index = device.index
    elif isinstance(device, str):
        if ':' in device:
            self.type, index_str = device.split(':')
            self.index = int(index_str)
        else:
            self.type = device
            self.index = 0 if device != 'cpu' else None
    else:
        raise ValueError(f"Invalid device: {device}")

Size ¶

Bases: tuple

A tuple subclass that represents the shape of a Tensor, matching PyTorch's torch.Size.

DType ¶

DType(name)

Represents a data type in Sorix, matching PyTorch's torch.dtype.

Source code in sorix/tensor.py

def __init__(self, name: str):
    self.name = name

no_grad ¶

no_grad()

Context manager that disables autograd engine.

Examples:

with sorix.no_grad():
    x = sorix.tensor([1.0], requires_grad=True)
    y = x + 2
print(y.requires_grad)  # False

Source code in sorix/tensor.py

def __init__(self):
    self.prev = True

Tensor ¶

Tensor(
    data,
    _children=[],
    _op="",
    device="cpu",
    requires_grad=False,
    dtype=None,
)

Initializes a new Tensor.

Parameters:

data (TensorData) –

Numerical data (numpy array, list, scalar, etc.).
device (str, default: 'cpu' ) –

Computing device ('cpu' or 'cuda').
requires_grad (bool, default: False ) –

Whether to track gradients for this tensor.
dtype (Any, default: None ) –

Data type for the tensor elements.

Source code in sorix/tensor.py

def __init__(
    self, 
    data: TensorData, 
    _children: Union[List[Tensor], Tuple[Tensor, ...]] = [], 
    _op: str = '',
    device: str = 'cpu',
    requires_grad: bool = False,
    dtype: Any = None
) -> None:
    """
    Initializes a new Tensor.

    Args:
        data: Numerical data (numpy array, list, scalar, etc.).
        device: Computing device ('cpu' or 'cuda').
        requires_grad: Whether to track gradients for this tensor.
        dtype: Data type for the tensor elements.
    """
    self.device = Device(device)

    if self.device.type == 'cuda' and not _cupy_available:
        raise Exception('Cupy is not available')

    xp = cp if (self.device.type == 'cuda' and _cupy_available) else np

    if self.device.type == 'cuda' and _cupy_available:
        with cp.cuda.Device(self.device.index):
            if isinstance(data, (list, tuple, int, float)):
                data = xp.array(data, dtype=dtype.name if isinstance(dtype, DType) else dtype)
                if dtype is None and data.dtype == xp.float64:
                    data = data.astype(xp.float32)
            elif isinstance(data, (np.ndarray, xp.ndarray if _cupy_available else np.ndarray, pd.DataFrame, pd.Series)):
                # Use asarray to preserve CuPy memory views for optimizer buffers.
                # Do NOT auto-convert float64→float32: that breaks explicit astype() calls.
                target_dtype = dtype.name if isinstance(dtype, DType) else dtype
                data = xp.asarray(data) if target_dtype is None else xp.asarray(data, dtype=target_dtype)
            else:
                data = xp.array(data, dtype=dtype.name if isinstance(dtype, DType) else dtype)
                if dtype is None and data.dtype == xp.float64:
                    data = data.astype(xp.float32)
    else:
        if isinstance(data, (list, tuple, int, float)):
            data = xp.array(data, dtype=dtype.name if isinstance(dtype, DType) else dtype)
            if dtype is None and data.dtype == xp.float64:
                data = data.astype(xp.float32)
        elif isinstance(data, (np.ndarray, xp.ndarray if _cupy_available else np.ndarray, pd.DataFrame, pd.Series)):
            # Use asarray to preserve NumPy memory views for optimizer fused buffers.
            # Do NOT auto-convert float64→float32: that breaks explicit astype() calls.
            target_dtype = dtype.name if isinstance(dtype, DType) else dtype
            data = data if target_dtype is None else xp.asarray(data, dtype=target_dtype)
        else:
            data = xp.array(data, dtype=dtype.name if isinstance(dtype, DType) else dtype)
            if dtype is None and data.dtype == xp.float64:
                data = data.astype(xp.float32)

    self.data: Any = data
    self.requires_grad: bool = requires_grad
    # grad is a plain np/cp array by default; becomes a Tensor only during create_graph
    self.grad: Optional[Any] = None

    self._backward = _noop
    enabled = is_grad_enabled()
    has_grad_child = any(getattr(c, 'requires_grad', False) for c in _children)
    self._prev: Set[Tensor] = set(_children) if (enabled and (requires_grad or has_grad_child)) else set()
    self._op: str = _op if enabled else ''

T `property` ¶

Transpose of the tensor.

to ¶

to(device)

Moves the tensor to the specified device.

Parameters:

device (Union[str, Device]) –

'cpu', 'cuda', 'cuda:0', etc.

Source code in sorix/tensor.py

def to(self, device: Union[str, Device]) -> Tensor:
    """
    Moves the tensor to the specified device.

    Args:
        device: 'cpu', 'cuda', 'cuda:0', etc.
    """
    new_device = Device(device)
    if new_device == self.device:
        return self

    if new_device.type == 'cuda':
        if not _cupy_available:
            raise RuntimeError("CuPy is not installed, you cannot use CUDA")
        with cp.cuda.Device(new_device.index):
            self.data = cp.asarray(self.data)
            # If grad is a Tensor, move it properly
            if self.grad is not None:
                self.grad.to(new_device)
    elif new_device.type == "cpu":
        self.data = cp.asnumpy(self.data) if self.device == 'cuda' else self.data
        # If grad is a Tensor, move it properly
        if self.grad is not None:
            self.grad.to(new_device)
    else:
        raise ValueError(f"Invalid device type: {new_device.type}")

    self.device = new_device
    return self

cpu ¶

cpu()

Moves tensor to CPU.

Source code in sorix/tensor.py

def cpu(self) -> Tensor:
    """Moves tensor to CPU."""
    return self.to("cpu")

gpu ¶

gpu()

Moves tensor to GPU.

Source code in sorix/tensor.py

def gpu(self) -> Tensor:
    """Moves tensor to GPU."""
    return self.to('cuda')

add_ ¶

add_(other)

In-place addition.

Source code in sorix/tensor.py

def add_(self, other: Union[Tensor, float, int]) -> Tensor:
    """In-place addition."""
    other_data = other.data if isinstance(other, Tensor) else other
    self.data += other_data
    return self

sub_ ¶

sub_(other)

In-place subtraction.

Source code in sorix/tensor.py

def sub_(self, other: Union[Tensor, float, int]) -> Tensor:
    """In-place subtraction."""
    other_data = other.data if isinstance(other, Tensor) else other
    self.data -= other_data
    return self

mul_ ¶

mul_(other)

In-place multiplication.

Source code in sorix/tensor.py

def mul_(self, other: Union[Tensor, float, int]) -> Tensor:
    """In-place multiplication."""
    other_data = other.data if isinstance(other, Tensor) else other
    self.data *= other_data
    return self

div_ ¶

div_(other)

In-place division.

Source code in sorix/tensor.py

def div_(self, other: Union[Tensor, float, int]) -> Tensor:
    """In-place division."""
    other_data = other.data if isinstance(other, Tensor) else other
    self.data /= other_data
    return self

fill_ ¶

fill_(value)

Fills the tensor with the specified value in-place.

Source code in sorix/tensor.py

def fill_(self, value: float) -> Tensor:
    """Fills the tensor with the specified value in-place."""
    self.data.fill(value)
    return self

zero_ ¶

zero_()

Fills the tensor with zero in-place.

Source code in sorix/tensor.py

def zero_(self) -> Tensor:
    """Fills the tensor with zero in-place."""
    return self.fill_(0.0)

add ¶

add(other)

Element-wise addition.

Parameters:

other (Union[Tensor, float, int]) –

The tensor or scalar to add.

Returns:

Tensor –

A new tensor with the sum.

Examples:

x = Tensor([1, 2])
y = Tensor([3, 4])
z = x.add(y)  # Tensor([4, 6])

Source code in sorix/tensor.py

def add(self, other: Union[Tensor, float, int]) -> Tensor:
    """
    Element-wise addition.

    Args:
        other: The tensor or scalar to add.

    Returns:
        A new tensor with the sum.

    Examples:
        ```python
        x = Tensor([1, 2])
        y = Tensor([3, 4])
        z = x.add(y)  # Tensor([4, 6])
        ```
    """
    other = other if isinstance(other, Tensor) else Tensor(other, device=self.device)

    if not is_grad_enabled():
        return Tensor(self.data + other.data, device=self.device)

    requires_grad = self.requires_grad or other.requires_grad   
    out = Tensor(self.data + other.data, [self, other], '+', device=self.device, requires_grad=requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            grad_self = Tensor._match_shape(out.grad, self.data.shape)
            self._accumulate_grad(grad_self)
        if other.requires_grad:
            grad_other = Tensor._match_shape(out.grad, other.data.shape)
            other._accumulate_grad(grad_other)

    out._backward = _backward
    return out

sub ¶

sub(other)

Element-wise subtraction.

Parameters:

other (Union[Tensor, float, int]) –

The tensor or scalar to subtract.

Returns:

Tensor –

A new tensor with the result.

Examples:

x = Tensor([5, 5])
y = Tensor([1, 2])
z = x.sub(y)  # Tensor([4, 3])

Source code in sorix/tensor.py

def sub(self, other: Union[Tensor, float, int]) -> Tensor:
    """
    Element-wise subtraction.

    Args:
        other: The tensor or scalar to subtract.

    Returns:
        A new tensor with the result.

    Examples:
        ```python
        x = Tensor([5, 5])
        y = Tensor([1, 2])
        z = x.sub(y)  # Tensor([4, 3])
        ```
    """
    other = other if isinstance(other, Tensor) else Tensor(other, device=self.device)

    if not is_grad_enabled():
        return Tensor(self.data - other.data, device=self.device)

    requires_grad = self.requires_grad or other.requires_grad
    out = Tensor(self.data - other.data, [self, other], '-', device=self.device, requires_grad=requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return

        if self.requires_grad:
            grad_self = Tensor._match_shape(out.grad, self.data.shape)
            self._accumulate_grad(grad_self)

        if other.requires_grad:
            grad_other = Tensor._match_shape(out.grad, other.data.shape)
            other._accumulate_grad(-grad_other)

    out._backward = _backward
    return out

mul ¶

mul(other)

Element-wise multiplication.

Parameters:

other (Union[Tensor, float, int]) –

The tensor or scalar to multiply by.

Returns:

Tensor –

A new tensor with the product.

Examples:

x = Tensor([2, 3])
y = Tensor([4, 5])
z = x.mul(y)  # Tensor([8, 15])

Source code in sorix/tensor.py

def mul(self, other: Union[Tensor, float, int]) -> Tensor:
    """
    Element-wise multiplication.

    Args:
        other: The tensor or scalar to multiply by.

    Returns:
        A new tensor with the product.

    Examples:
        ```python
        x = Tensor([2, 3])
        y = Tensor([4, 5])
        z = x.mul(y)  # Tensor([8, 15])
        ```
    """
    other = other if isinstance(other, Tensor) else Tensor(other, device=self.device)

    if not is_grad_enabled():
        return Tensor(self.data * other.data, device=self.device)

    requires_grad = self.requires_grad or other.requires_grad
    out = Tensor(self.data * other.data, [self, other], '*', device=self.device, requires_grad=requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return

        if self.requires_grad:
            grad_self_val = other * out.grad
            self._accumulate_grad(Tensor._match_shape(grad_self_val, self.shape))

        if other.requires_grad:
            grad_other_val = self * out.grad
            other._accumulate_grad(Tensor._match_shape(grad_other_val, other.shape))

    out._backward = _backward
    return out

matmul ¶

matmul(other)

Matrix multiplication.

Parameters:

other (Union[Tensor, ndarray]) –

The tensor or array to multiply by.

Returns:

Tensor –

A new tensor with the matrix product.

Examples:

x = Tensor([[1, 2], [3, 4]])
y = Tensor([[5], [6]])
z = x.matmul(y) # [[17], [39]]

Source code in sorix/tensor.py

def matmul(self, other: Union[Tensor, np.ndarray]) -> Tensor:
    """
    Matrix multiplication.

    Args:
        other: The tensor or array to multiply by.

    Returns:
        A new tensor with the matrix product.

    Examples:
        ```python
        x = Tensor([[1, 2], [3, 4]])
        y = Tensor([[5], [6]])
        z = x.matmul(y) # [[17], [39]]
        ```
    """
    other = other if isinstance(other, Tensor) else Tensor(other, device=self.device)

    if not is_grad_enabled():
        return Tensor(self.data @ other.data, device=self.device)

    requires_grad = self.requires_grad or other.requires_grad
    out = Tensor(self.data @ other.data, [self, other], '@', device=self.device, requires_grad=requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return

        if self.requires_grad:
            grad_self = out.grad @ other.T
            self._accumulate_grad(Tensor._match_shape(grad_self, self.shape))

        if other.requires_grad:
            grad_other = self.T @ out.grad
            other._accumulate_grad(Tensor._match_shape(grad_other, other.shape))

    out._backward = _backward
    return out

tanh ¶

tanh()

Hyperbolic tangent activation.

Source code in sorix/tensor.py

def tanh(self) -> Tensor:
    """Hyperbolic tangent activation."""
    xp = cp if self.device == 'cuda' else np

    if not is_grad_enabled():
        return Tensor(xp.tanh(self.data), device=self.device)

    out = Tensor(xp.tanh(self.data), [self], 'tanh', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            if is_grad_enabled():
                # Support higher-order graph
                self._accumulate_grad(out.grad * (1 - out**2))
            else:
                # FAST PATH: raw data ops avoid Tensor creation overhead
                g = out.grad.data if isinstance(out.grad, Tensor) else out.grad
                self._accumulate_grad(g * (1 - out.data**2))

    out._backward = _backward
    return out

pow ¶

pow(n)

Raises tensor to the power of n.

Source code in sorix/tensor.py

def pow(self, n: Union[int, float]) -> Tensor:
    """Raises tensor to the power of n."""
    assert isinstance(n, (int, float)), "only supporting int/float powers for now"

    if not is_grad_enabled():
        return Tensor(self.data**n, device=self.device)

    out = Tensor(self.data**n, [self], f'**{n}', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return

        if self.requires_grad:
            grad = out.grad * (n * (self**(n-1)))
            self._accumulate_grad(grad)

    out._backward = _backward
    return out

sigmoid ¶

sigmoid()

Sigmoid activation.

Source code in sorix/tensor.py

def sigmoid(self) -> Tensor:
    """Sigmoid activation."""
    xp = self.xp

    out_data = 1 / (1 + xp.exp(-self.data))
    if not is_grad_enabled():
        return Tensor(out_data, device=self.device, requires_grad=False)

    out = Tensor(out_data, [self], 'sigmoid', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            if is_grad_enabled():
                self._accumulate_grad(out.grad * out * (1 - out))
            else:
                # FAST PATH: use underlying arrays directly
                g = out.grad.data if isinstance(out.grad, Tensor) else out.grad
                self._accumulate_grad(g * out.data * (1 - out.data))

    out._backward = _backward
    return out

softmax ¶

softmax(axis=-1, dim=None)

Softmax activation along an axis/dim.

Source code in sorix/tensor.py

def softmax(self, axis: int = -1, dim: Optional[int] = None) -> Tensor:
    """Softmax activation along an axis/dim."""
    if dim is not None:
        axis = dim
    xp = self.xp

    # Stability trick
    shifted_data = self.data - xp.max(self.data, axis=axis, keepdims=True)
    exp_data = xp.exp(shifted_data)
    out_data = exp_data / xp.sum(exp_data, axis=axis, keepdims=True)

    if not is_grad_enabled():
        return Tensor(out_data, device=self.device, requires_grad=False)

    out = Tensor(out_data, [self], 'softmax', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            if is_grad_enabled():
                # Softmax gradient higher-order tracking
                sum_grad_s = (out.grad * out).sum(axis=axis, keepdims=True)
                self._accumulate_grad(out * (out.grad - sum_grad_s))
            else:
                # FAST PATH: raw data ops for standard training
                grad_out_data = out.grad.data
                out_data = out.data
                sum_grad_s_data = (grad_out_data * out_data).sum(axis=axis, keepdims=True)
                self._accumulate_grad(out_data * (grad_out_data - sum_grad_s_data))

    out._backward = _backward
    return out

div ¶

div(other)

Element-wise division.

Source code in sorix/tensor.py

def div(self, other: Union[Tensor, float, int]) -> Tensor:
    """Element-wise division."""
    other = other if isinstance(other, Tensor) else Tensor(other, device=self.device)

    if not is_grad_enabled():
        return Tensor(self.data / other.data, device=self.device)

    requires_grad = self.requires_grad or other.requires_grad
    out = Tensor(self.data / other.data, [self, other], '/', device=self.device, requires_grad=requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return

        if self.requires_grad:
            grad_self_val = out.grad / other
            self._accumulate_grad(Tensor._match_shape(grad_self_val, self.shape))

        if other.requires_grad:
            grad_other_val = -self * out.grad / (other**2)
            other._accumulate_grad(Tensor._match_shape(grad_other_val, other.shape))

    out._backward = _backward
    return out

mean ¶

mean(axis=None, dim=None, keepdims=False)

Computes mean along axis/dim.

Source code in sorix/tensor.py

def mean(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, 
         dim: Optional[Union[int, Tuple[int, ...]]] = None, 
         keepdims: bool = False) -> Tensor:
    """Computes mean along axis/dim."""
    if dim is not None:
        axis = dim
    xp = self.xp

    if not is_grad_enabled():
        return Tensor(xp.mean(self.data, axis=axis, keepdims=keepdims), device=self.device)

    out = Tensor(xp.mean(self.data, axis=axis, keepdims=keepdims), [self], 'mean', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:            
        if out.grad is None:
            return

        if self.requires_grad:
            grad = out.grad
            if not keepdims and axis is not None:
                grad = grad.expand_dims(axis=axis)

            n = self.numel / (out.numel if out.numel > 0 else 1)
            self._accumulate_grad(grad / n)
    out._backward = _backward
    return out

sum ¶

sum(axis=None, dim=None, keepdims=False)

Computes sum along axis/dim.

Source code in sorix/tensor.py

def sum(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, 
        dim: Optional[Union[int, Tuple[int, ...]]] = None, 
        keepdims: bool = False) -> Tensor:
    """Computes sum along axis/dim."""
    if dim is not None:
        axis = dim
    xp = self.xp

    if not is_grad_enabled():
        return Tensor(self.data.sum(axis=axis, keepdims=keepdims), device=self.device)

    out = Tensor(self.data.sum(axis=axis, keepdims=keepdims), [self], 'sum', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return

        if self.requires_grad:
            grad = out.grad
            if not keepdims and axis is not None:
                grad = grad.expand_dims(axis=axis)
            # Multiplication by ones_like is implicit during accumulation match_shape
            self._accumulate_grad(grad)

    out._backward = _backward
    return out

abs ¶

abs()

Absolute value.

Source code in sorix/tensor.py

def abs(self) -> Tensor:
    """Absolute value."""
    xp = cp if self.device == 'cuda' else np
    return Tensor(xp.abs(self.data), device=self.device)

reshape ¶

reshape(*shape)

Reshapes the tensor to a new shape.

Source code in sorix/tensor.py

def reshape(self, *shape: Any) -> Tensor:
    """Reshapes the tensor to a new shape."""
    if len(shape) == 1 and isinstance(shape[0], (list, tuple)):
        shape = shape[0]

    if not is_grad_enabled():
        return Tensor(self.data.reshape(*shape), device=self.device, requires_grad=False)

    out = Tensor(self.data.reshape(*shape), [self], 'reshape', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            self._accumulate_grad(out.grad.reshape(self.shape))

    out._backward = _backward
    return out

view ¶

view(*shape)

Alias for reshape, implemented to mimic PyTorch's view method.

Source code in sorix/tensor.py

def view(self, *shape: Any) -> Tensor:
    """Alias for reshape, implemented to mimic PyTorch's view method."""
    return self.reshape(*shape)

transpose ¶

transpose(*axes)

Transposes the tensor axes.

Source code in sorix/tensor.py

def transpose(self, *axes: Any) -> Tensor:
    """Transposes the tensor axes."""
    if not is_grad_enabled():
        return Tensor(self.data.transpose(*axes), device=self.device, requires_grad=False)

    out = Tensor(self.data.transpose(*axes), [self], 'transpose', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            if not axes:
                self._accumulate_grad(out.grad.transpose())
            else:
                inv_axes = np.argsort(axes)
                self._accumulate_grad(out.grad.transpose(*inv_axes))

    out._backward = _backward
    return out

flatten ¶

flatten()

Flattens the tensor into 1D.

Source code in sorix/tensor.py

def flatten(self) -> Tensor:
    """Flattens the tensor into 1D."""
    return self.reshape(-1)

expand_dims ¶

expand_dims(axis)

Adds a new dimension at the specified axis. Matches np.expand_dims.

Source code in sorix/tensor.py

def expand_dims(self, axis: int) -> Tensor:
    """Adds a new dimension at the specified axis. Matches np.expand_dims."""
    new_shape = list(self.shape)
    if axis < 0:
        axis = len(new_shape) + axis + 1
    new_shape.insert(axis, 1)
    return self.reshape(*new_shape)

unsqueeze ¶

unsqueeze(axis)

Alias for expand_dims, matching PyTorch.

Source code in sorix/tensor.py

def unsqueeze(self, axis: int) -> Tensor:
    """Alias for expand_dims, matching PyTorch."""
    return self.expand_dims(axis)

squeeze ¶

squeeze(axis=None)

Removes dimensions of size 1.

Source code in sorix/tensor.py

def squeeze(self, axis: Optional[int] = None) -> Tensor:
    """Removes dimensions of size 1."""
    xp = self.xp

    if not is_grad_enabled():
        return Tensor(xp.squeeze(self.data, axis=axis), device=self.device, requires_grad=False)

    out = Tensor(xp.squeeze(self.data, axis=axis), [self], 'squeeze', device=self.device, requires_grad=self.requires_grad)

    def _backward() -> None:
        if out.grad is None:
            return
        if self.requires_grad:
            self._accumulate_grad(out.grad.reshape(self.shape))

    out._backward = _backward
    return out

permute ¶

permute(*dims)

Permutes the dimensions of the tensor.

Source code in sorix/tensor.py

def permute(self, *dims: int) -> 'Tensor':
    """Permutes the dimensions of the tensor.
    """
    return self.transpose(*dims)

repeat ¶

repeat(*sizes)

Repeats the tensor along the specified dimensions.

Source code in sorix/tensor.py

def repeat(self, *sizes: int) -> 'Tensor':
    """Repeats the tensor along the specified dimensions.
    """
    if len(sizes) == 1 and isinstance(sizes[0], (list, tuple)):
        sizes = sizes[0]

    xp = self.xp
    out_data = xp.tile(self.data, sizes)

    if not is_grad_enabled() or not self.requires_grad:
        return Tensor(out_data, device=self.device, requires_grad=False)

    out = Tensor(out_data, [self], 'repeat', device=self.device, requires_grad=True)

    def _backward():
        if out.grad is None:
            return
        if self.requires_grad:
            # Sum back the repeated parts
            g = out.grad.data if isinstance(out.grad, Tensor) else out.grad

            # To sum back xp.tile(A, (s1, s2)): 
            # Reshape to (s1, A.shape[0], s2, A.shape[1]...) and sum over s indices.
            curr_g = g
            for i, s in enumerate(sizes):
                if s > 1:
                    # This logic is a bit simplified for general case
                    # Proper way is reshaping and summing.
                    pass

            # More robust generic way for any tile:
            # We can use _match_shape logic or a direct sum over reshaped dims
            input_shape = self.shape
            # If sizes has more dims than input_shape, input_shape is effectively (1, 1..., actual_shape)
            expanded_input_shape = [1] * (len(sizes) - len(input_shape)) + list(input_shape)

            reshape_dims = []
            sum_axes = []
            for i, s in enumerate(sizes):
                reshape_dims.append(s)
                reshape_dims.append(expanded_input_shape[i])
                sum_axes.append(len(reshape_dims) - 2) # the 's' dimension

            grad_summed = g.reshape(reshape_dims).sum(axis=tuple(sum_axes))
            self._accumulate_grad(grad_summed.reshape(input_shape))

    out._backward = _backward
    return out

unbind ¶

unbind(dim=0)

Removes a tensor dimension. Returns a tuple of all slices along that dimension.

Source code in sorix/tensor.py

def unbind(self, dim: int = 0) -> Tuple['Tensor', ...]:
    """Removes a tensor dimension. Returns a tuple of all slices along that dimension.
    """
    # Handle negative dim
    if dim < 0:
        dim = self.ndim + dim

    res = []
    for i in range(self.shape[dim]):
        # Using __getitem__ to keep autograd
        slc = [slice(None)] * self.ndim
        slc[dim] = i
        res.append(self[tuple(slc)])
    return tuple(res)

split ¶

split(split_size_or_sections, dim=0)

Splits the tensor into chunks.

Source code in sorix/tensor.py

def split(self, split_size_or_sections: Union[int, List[int]], dim: int = 0) -> List['Tensor']:
    """Splits the tensor into chunks.
    """
    if dim < 0:
        dim = self.ndim + dim

    dim_size = self.shape[dim]
    if isinstance(split_size_or_sections, int):
        sections = []
        while dim_size > 0:
            s = min(dim_size, split_size_or_sections)
            sections.append(s)
            dim_size -= s
    else:
        sections = split_size_or_sections

    res = []
    start = 0
    for s in sections:
        slc = [slice(None)] * self.ndim
        slc[dim] = slice(start, start + s)
        res.append(self[tuple(slc)])
        start += s
    return res

chunk ¶

chunk(chunks, dim=0)

Splits a tensor into a specific number of chunks.

Source code in sorix/tensor.py

def chunk(self, chunks: int, dim: int = 0) -> List['Tensor']:
    """Splits a tensor into a specific number of chunks.
    """
    dim_size = self.shape[dim]
    split_size = (dim_size + chunks - 1) // chunks
    return self.split(split_size, dim=dim)

backward ¶

backward(
    gradient=None, retain_graph=True, create_graph=False
)

Computes the gradient of current tensor w.r.t. graph leaves.

The graph is traversed in reverse topological order to propagate gradients. If the tensor is non-scalar, a gradient must be provided.

Parameters:

gradient (Optional[Union[Tensor, ndarray, Any]], default: None ) –

The gradient of this tensor, usually the dL/d(this_tensor). Must match the shape of this tensor.
retain_graph (bool, default: True ) –

If False, the graph used to compute the grads will be freed.
create_graph (bool, default: False ) –

If True, graph of the gradient will be constructed, allowing to compute higher-order derivative products.

Source code in sorix/tensor.py

def backward(self, gradient: Optional[Union[Tensor, np.ndarray, Any]] = None, 
             retain_graph: bool = True, create_graph: bool = False) -> None:
    """
    Computes the gradient of current tensor w.r.t. graph leaves.

    The graph is traversed in reverse topological order to propagate gradients.
    If the tensor is non-scalar, a gradient must be provided.

    Args:
        gradient: The gradient of this tensor, usually the dL/d(this_tensor).
                 Must match the shape of this tensor.
        retain_graph: If False, the graph used to compute the grads will be freed.
        create_graph: If True, graph of the gradient will be constructed, 
                     allowing to compute higher-order derivative products.
    """
    topo: List[Tensor] = []
    visited: Set[int] = set()

    def build_topo(t: Tensor) -> None:
        if id(t) not in visited:
            visited.add(id(t))
            for child in t._prev:
                build_topo(child)
            topo.append(t)

    build_topo(self)

    xp = self.xp
    d_name = self.dtype.name if isinstance(self.dtype, DType) else str(self.dtype)

    # Check for scalarity if no seed gradient is provided.
    if gradient is None:
        if self.data.size != 1:
            raise RuntimeError("grad can be implicitly created only for scalar outputs.")
        seed_data = xp.ones_like(self.data, dtype=d_name)
        # During create_graph, wrap in Tensor so the higher-order graph is built.
        # Otherwise keep as a plain array for efficiency.
        if create_graph:
            seed_grad = Tensor(seed_data, device=self.device, requires_grad=True)
        else:
            # create_graph=False: use plain Tensor (no graph required)
            seed_grad = Tensor(seed_data, device=self.device)
    else:
        if isinstance(gradient, Tensor):
            seed_grad = gradient
        else:
            seed_grad = Tensor(gradient, device=self.device)

        # Validate shape
        if seed_grad.shape != self.data.shape:
            raise ValueError(f"Gradient shape {seed_grad.shape} does not match tensor shape {self.data.shape}")

    prev_grad_enabled = is_grad_enabled()
    set_grad_enabled(create_graph)
    try:
        # Always use _accumulate_grad for consistency
        self._accumulate_grad(seed_grad)

        for node in reversed(topo):
            node._backward()
    finally:
        if not retain_graph:
            for node in topo:
                node._prev = set() # Break references to free graph
        set_grad_enabled(prev_grad_enabled)

astype ¶

astype(dtype)

Casts tensor to a new data type.

Source code in sorix/tensor.py

def astype(self, dtype: Any) -> Tensor:
    """Casts tensor to a new data type."""
    target_dtype = dtype.name if isinstance(dtype, DType) else dtype
    return Tensor(self.data.astype(target_dtype), device=self.device, requires_grad=self.requires_grad)

float ¶

float()

Casts tensor to float32.

Source code in sorix/tensor.py

def float(self) -> Tensor:
    """Casts tensor to float32."""
    return self.astype(float32)

double ¶

double()

Casts tensor to float64.

Source code in sorix/tensor.py

def double(self) -> Tensor:
    """Casts tensor to float64."""
    return self.astype(float64)

half ¶

half()

Casts tensor to float16.

Source code in sorix/tensor.py

def half(self) -> Tensor:
    """Casts tensor to float16."""
    return self.astype(float16)

int ¶

int()

Casts tensor to int32.

Source code in sorix/tensor.py

def int(self) -> Tensor:
    """Casts tensor to int32."""
    return self.astype(int32)

long ¶

long()

Casts tensor to int64.

Source code in sorix/tensor.py

def long(self) -> Tensor:
    """Casts tensor to int64."""
    return self.astype(int64)

bool ¶

bool()

Casts tensor to bool.

Source code in sorix/tensor.py

def bool(self) -> Tensor:
    """Casts tensor to bool."""
    return self.astype(bool_)

detach ¶

detach()

Returns a new Tensor, detached from the current graph.

Source code in sorix/tensor.py

def detach(self) -> Tensor:
    """Returns a new Tensor, detached from the current graph."""
    return Tensor(self.data.copy(), device=self.device, requires_grad=False)

size ¶

size(dim=None)

Returns the size of the tensor, matching PyTorch's .size() method.

Source code in sorix/tensor.py

def size(self, dim: Optional[int] = None) -> Union[Size, int]:
    """Returns the size of the tensor, matching PyTorch's .size() method."""
    if dim is not None:
        return self.shape[dim]
    return self.shape

dim ¶

dim()

Returns the number of dimensions in the tensor.

Source code in sorix/tensor.py

def dim(self) -> int:
    """Returns the number of dimensions in the tensor."""
    return self.ndim

t ¶

t()

Expects self to be <= 2-D tensor and transposes dimensions 0 and 1.

Source code in sorix/tensor.py

def t(self) -> Tensor:
    """Expects self to be <= 2-D tensor and transposes dimensions 0 and 1."""
    if self.ndim > 2:
        raise RuntimeError("t() expects at most 2 dimensions")
    return self.transpose()

numpy ¶

numpy()

Returns the data as a NumPy array.

If the tensor is on the GPU, it will be copied to the host.

Returns:

ndarray –

The numerical data as a NumPy ndarray.

Source code in sorix/tensor.py

def numpy(self) -> np.ndarray:
    """
    Returns the data as a NumPy array.

    If the tensor is on the GPU, it will be copied to the host.

    Returns:
        The numerical data as a NumPy ndarray.
    """
    return self.data if self.device == 'cpu' else self.data.get()   

item ¶

item()

Returns the scalar value of a 1-element tensor.

Examples:

x = Tensor([42])
val = x.item()  # 42

Source code in sorix/tensor.py

def item(self) -> Union[float, int]:
    """
    Returns the scalar value of a 1-element tensor.

    Examples:
        ```python
        x = Tensor([42])
        val = x.item()  # 42
        ```
    """
    return self.data.item()

cuda ¶

cuda()

Moves tensor to GPU. Alias for gpu().

Source code in sorix/tensor.py

def cuda(self) -> Tensor:
    """Moves tensor to GPU. Alias for gpu()."""
    return self.to('cuda')

set_grad_enabled ¶

set_grad_enabled(mode)

Sets if autograd engine is enabled.

Source code in sorix/tensor.py

def set_grad_enabled(mode: bool) -> None:
    """Sets if autograd engine is enabled."""
    Tensor._autograd_enabled = mode

is_grad_enabled ¶

is_grad_enabled()

Returns True if autograd engine is enabled.

Source code in sorix/tensor.py

def is_grad_enabled() -> bool:
    """Returns True if autograd engine is enabled."""
    return Tensor._autograd_enabled

get_xp ¶

get_xp(*args)

Returns the appropriate array module (numpy or cupy) for the given arguments.

Source code in sorix/tensor.py

def get_xp(*args: Any) -> Any:
    """Returns the appropriate array module (numpy or cupy) for the given arguments."""
    for arg in args:
        if isinstance(arg, Tensor) and arg.device.type == 'cuda':
            if _cupy_available:
                return cp
    return np

tensor ¶

tensor(data, device='cpu', requires_grad=False, dtype=None)

Factory function to create a Sorix Tensor.

Examples:

x = sorix.tensor([1.0, 2.0], requires_grad=True, dtype=sorix.float32)

Source code in sorix/tensor.py

def tensor(
    data: TensorData, 
    device: str = 'cpu', 
    requires_grad: bool = False,
    dtype: Any = None
) -> Tensor:
    """
    Factory function to create a Sorix Tensor.

    Examples:
        ```python
        x = sorix.tensor([1.0, 2.0], requires_grad=True, dtype=sorix.float32)
        ```
    """
    return Tensor(data, device=device, requires_grad=requires_grad, dtype=dtype)

tensor

sorix.tensor ¶

Device ¶

Size ¶

DType ¶

no_grad ¶

Tensor ¶

T property ¶

to ¶

cpu ¶

gpu ¶

add_ ¶

sub_ ¶

mul_ ¶

div_ ¶

fill_ ¶

zero_ ¶

add ¶

sub ¶

mul ¶

matmul ¶

tanh ¶

pow ¶

sigmoid ¶

softmax ¶

div ¶

mean ¶

sum ¶

abs ¶

reshape ¶

view ¶

transpose ¶

flatten ¶

expand_dims ¶

unsqueeze ¶

squeeze ¶

permute ¶

repeat ¶

unbind ¶

split ¶

chunk ¶

backward ¶

astype ¶

float ¶

double ¶

half ¶

int ¶

long ¶

bool ¶

detach ¶

size ¶

dim ¶

t ¶

numpy ¶

item ¶

cuda ¶

set_grad_enabled ¶

is_grad_enabled ¶

get_xp ¶

tensor ¶

T `property` ¶