nlqk.embeddings.vectors

vectors.py

Vector functionalities:

View Source

  1# coding: utf-8
  2
  3"""
  4vectors.py
  5
  6Module: nlqk.embeddings.vectors
  7
  8(C) 2025 by [Damir Cavar](http://damir.cavar.me/), James Bryan Graves, and [NLP Lab](https://nlp-lab.org/)
  9
 10Vector functionalities:
 11
 12"""
 13
 14
 15# from typing import Union, Sequence
 16import os
 17if os.getenv("GITHUB_ACTIONS") == "true":
 18    import numpy as np
 19else:
 20    try: # prefer RAPIDS libraries and GPU over numpy and CPU
 21        import cupy as np  # Try to import cupy and alias it as np
 22        _USE_GPU = True
 23    except ModuleNotFoundError:
 24        import numpy as np  # If cupy not found, import numpy and alias it as np
 25        _USE_GPU = False
 26# import GPUtil  # If you're using GPUtil
 27
 28
 29# Example Usage:
 30
 31
 32def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray, tol: float = 1e-12) -> complex:
 33    """
 34    Computes the cosine similarity between two vectors.
 35
 36    Args:
 37        vec1 np.ndarray: First input vector.
 38        vec2 np.ndarray: Second input vector.
 39        tol (float): Threshold below which vector norm is treated as zero. Defaults to 1e-12.
 40
 41    Returns:
 42        float: Cosine similarity value. For real inputs: value in [-1, 1].
 43
 44    Raises:
 45        ZeroDivisionError: If either vector has zero length (norm below tolerance).
 46    """
 47    #if not np.iscomplexobj(vec1):
 48    #    vec1 = vec1.astype(complex)
 49    #if not np.iscomplexobj(vec2):
 50    #    vec2 = vec2.astype(complex) # np.ravel(np.asarray(vec2))
 51    # if vectors are normalized cosine similarity is equivalent to the dot product
 52    if is_normalized(vec1) and is_normalized(vec2):
 53        return np.dot(vec1, vec2)
 54
 55    n1, n2 = np.linalg.norm(vec1), np.linalg.norm(vec2)
 56    if n1 < tol or n2 < tol:
 57        raise ZeroDivisionError("Zero-length vector")
 58
 59    return complex(np.vdot(vec1, vec2) / (n1 * n2))
 60
 61
 62def is_normalized(vector: np.ndarray, tolerance: float=1e-9) -> bool:
 63    """
 64    Checks if a NumPy vector is normalized (its L2 norm is approximately 1).
 65
 66    Args:
 67        vector (Union[Sequence[Union[int, float, complex]], np.ndarray]): The input vector.
 68        tolerance (float): The allowed tolerance for comparison with 1.
 69
 70    Returns:
 71        bool: True if the vector is normalized, False otherwise.
 72    """
 73    if np.isclose(np.linalg.norm(vector), 1.0, atol=tolerance):
 74        return True
 75    return False
 76
 77
 78def normalize(vector: np.ndarray) -> np.ndarray:
 79    """
 80    Normalizes a vector to unit length.
 81
 82    Args:
 83        vector np.ndarray: Input vector to normalize.
 84
 85    Returns:
 86        np.ndarray: The normalized vector with unit norm.
 87
 88    Raises:
 89        ValueError: If the input vector has zero norm and cannot be normalized.
 90    """
 91    norm = np.linalg.norm(vector)
 92    if norm != 0:
 93        return vector / norm
 94    raise ValueError("Zero vector cannot be normalized.")
 95
 96
 97
 98def pad_vector(vector: np.ndarray, target_size: int) -> np.ndarray:
 99    """
100    Pads a vector with zeros to reach the specified target size.
101
102    Args:
103        vector np.ndarray: Input vector to pad.
104        target_size (int): The desired size of the output vector.
105
106    Returns:
107        np.ndarray: Padded vector of length target_size with complex dtype.
108
109    Raises:
110        ValueError: If the input vector is larger than the target size.
111    """
112    return np.pad(vector, (0, target_size - len(vector)), mode='constant')
113
114
115def pair_real_to_complex(vector: np.ndarray) -> np.ndarray:
116    """
117    Converts a real-valued vector into a complex-valued vector by pairing adjacent elements.
118
119    Args:
120        vector np.ndarray: Real-valued input vector, if not even length it will be padded to even length.
121
122    Returns:
123        np.ndarray: Complex-valued vector where each complex number is formed from consecutive pairs.
124
125    Raises:
126        ValueError: If the vector length is odd and cannot be paired into complex numbers.
127    """
128    if len(vector) > 0:
129        if len(vector) % 2 != 0:
130            # automatically pad with a 0
131            vector = pad_vector(vector, len(vector) + 1)
132    else:
133        raise ValueError("Vector length must be even to pair into complex numbers.")
134    return np.array([vector[i] + 1j * vector[i+1] for i in range(0, len(vector), 2)])
135
136
137
138def pad_vectors(vectors: np.ndarray, size: int) -> np.ndarray:
139    """Pad rows with zeros to size.
140
141    Args:
142        vectors np.ndarray: matrix of vectors to be padded, all the same length.
143        size int: target length of vectors.
144
145    Returns:
146        np.ndarray: Padded vectors.
147    """
148    return np.pad(vectors, [(0, 0), (0, size - vectors.shape[1])], mode='constant')

def cosine_similarity(vec1: cupy.ndarray, vec2: cupy.ndarray, tol: float = 1e-12) -> complex: View Source

33def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray, tol: float = 1e-12) -> complex:
34    """
35    Computes the cosine similarity between two vectors.
36
37    Args:
38        vec1 np.ndarray: First input vector.
39        vec2 np.ndarray: Second input vector.
40        tol (float): Threshold below which vector norm is treated as zero. Defaults to 1e-12.
41
42    Returns:
43        float: Cosine similarity value. For real inputs: value in [-1, 1].
44
45    Raises:
46        ZeroDivisionError: If either vector has zero length (norm below tolerance).
47    """
48    #if not np.iscomplexobj(vec1):
49    #    vec1 = vec1.astype(complex)
50    #if not np.iscomplexobj(vec2):
51    #    vec2 = vec2.astype(complex) # np.ravel(np.asarray(vec2))
52    # if vectors are normalized cosine similarity is equivalent to the dot product
53    if is_normalized(vec1) and is_normalized(vec2):
54        return np.dot(vec1, vec2)
55
56    n1, n2 = np.linalg.norm(vec1), np.linalg.norm(vec2)
57    if n1 < tol or n2 < tol:
58        raise ZeroDivisionError("Zero-length vector")
59
60    return complex(np.vdot(vec1, vec2) / (n1 * n2))

Computes the cosine similarity between two vectors.

Args: vec1 np.ndarray: First input vector. vec2 np.ndarray: Second input vector. tol (float): Threshold below which vector norm is treated as zero. Defaults to 1e-12.

Returns: float: Cosine similarity value. For real inputs: value in [-1, 1].

Raises: ZeroDivisionError: If either vector has zero length (norm below tolerance).

def is_normalized(vector: cupy.ndarray, tolerance: float = 1e-09) -> bool: View Source

63def is_normalized(vector: np.ndarray, tolerance: float=1e-9) -> bool:
64    """
65    Checks if a NumPy vector is normalized (its L2 norm is approximately 1).
66
67    Args:
68        vector (Union[Sequence[Union[int, float, complex]], np.ndarray]): The input vector.
69        tolerance (float): The allowed tolerance for comparison with 1.
70
71    Returns:
72        bool: True if the vector is normalized, False otherwise.
73    """
74    if np.isclose(np.linalg.norm(vector), 1.0, atol=tolerance):
75        return True
76    return False

Checks if a NumPy vector is normalized (its L2 norm is approximately 1).

Args: vector (Union[Sequence[Union[int, float, complex]], np.ndarray]): The input vector. tolerance (float): The allowed tolerance for comparison with 1.

Returns: bool: True if the vector is normalized, False otherwise.

def normalize(vector: cupy.ndarray) -> cupy.ndarray: View Source

79def normalize(vector: np.ndarray) -> np.ndarray:
80    """
81    Normalizes a vector to unit length.
82
83    Args:
84        vector np.ndarray: Input vector to normalize.
85
86    Returns:
87        np.ndarray: The normalized vector with unit norm.
88
89    Raises:
90        ValueError: If the input vector has zero norm and cannot be normalized.
91    """
92    norm = np.linalg.norm(vector)
93    if norm != 0:
94        return vector / norm
95    raise ValueError("Zero vector cannot be normalized.")

Normalizes a vector to unit length.

Args: vector np.ndarray: Input vector to normalize.

Returns: np.ndarray: The normalized vector with unit norm.

Raises: ValueError: If the input vector has zero norm and cannot be normalized.

def pad_vector(vector: cupy.ndarray, target_size: int) -> cupy.ndarray: View Source

 99def pad_vector(vector: np.ndarray, target_size: int) -> np.ndarray:
100    """
101    Pads a vector with zeros to reach the specified target size.
102
103    Args:
104        vector np.ndarray: Input vector to pad.
105        target_size (int): The desired size of the output vector.
106
107    Returns:
108        np.ndarray: Padded vector of length target_size with complex dtype.
109
110    Raises:
111        ValueError: If the input vector is larger than the target size.
112    """
113    return np.pad(vector, (0, target_size - len(vector)), mode='constant')

Pads a vector with zeros to reach the specified target size.

Args: vector np.ndarray: Input vector to pad. target_size (int): The desired size of the output vector.

Returns: np.ndarray: Padded vector of length target_size with complex dtype.

Raises: ValueError: If the input vector is larger than the target size.

def pair_real_to_complex(vector: cupy.ndarray) -> cupy.ndarray: View Source

116def pair_real_to_complex(vector: np.ndarray) -> np.ndarray:
117    """
118    Converts a real-valued vector into a complex-valued vector by pairing adjacent elements.
119
120    Args:
121        vector np.ndarray: Real-valued input vector, if not even length it will be padded to even length.
122
123    Returns:
124        np.ndarray: Complex-valued vector where each complex number is formed from consecutive pairs.
125
126    Raises:
127        ValueError: If the vector length is odd and cannot be paired into complex numbers.
128    """
129    if len(vector) > 0:
130        if len(vector) % 2 != 0:
131            # automatically pad with a 0
132            vector = pad_vector(vector, len(vector) + 1)
133    else:
134        raise ValueError("Vector length must be even to pair into complex numbers.")
135    return np.array([vector[i] + 1j * vector[i+1] for i in range(0, len(vector), 2)])

Converts a real-valued vector into a complex-valued vector by pairing adjacent elements.

Args: vector np.ndarray: Real-valued input vector, if not even length it will be padded to even length.

Returns: np.ndarray: Complex-valued vector where each complex number is formed from consecutive pairs.

Raises: ValueError: If the vector length is odd and cannot be paired into complex numbers.

def pad_vectors(vectors: cupy.ndarray, size: int) -> cupy.ndarray: View Source

139def pad_vectors(vectors: np.ndarray, size: int) -> np.ndarray:
140    """Pad rows with zeros to size.
141
142    Args:
143        vectors np.ndarray: matrix of vectors to be padded, all the same length.
144        size int: target length of vectors.
145
146    Returns:
147        np.ndarray: Padded vectors.
148    """
149    return np.pad(vectors, [(0, 0), (0, size - vectors.shape[1])], mode='constant')

Pad rows with zeros to size.

Args: vectors np.ndarray: matrix of vectors to be padded, all the same length. size int: target length of vectors.

Returns: np.ndarray: Padded vectors.