nlqk.embeddings.vectors
vectors.py
Module: nlqk.embeddings.vectors
(C) 2025 by Damir Cavar, James Bryan Graves, and NLP Lab
Vector functionalities:
1# coding: utf-8 2 3""" 4vectors.py 5 6Module: nlqk.embeddings.vectors 7 8(C) 2025 by [Damir Cavar](http://damir.cavar.me/), James Bryan Graves, and [NLP Lab](https://nlp-lab.org/) 9 10Vector functionalities: 11 12""" 13 14 15# from typing import Union, Sequence 16import os 17if os.getenv("GITHUB_ACTIONS") == "true": 18 import numpy as np 19else: 20 try: # prefer RAPIDS libraries and GPU over numpy and CPU 21 import cupy as np # Try to import cupy and alias it as np 22 _USE_GPU = True 23 except ModuleNotFoundError: 24 import numpy as np # If cupy not found, import numpy and alias it as np 25 _USE_GPU = False 26# import GPUtil # If you're using GPUtil 27 28 29# Example Usage: 30 31 32def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray, tol: float = 1e-12) -> complex: 33 """ 34 Computes the cosine similarity between two vectors. 35 36 Args: 37 vec1 np.ndarray: First input vector. 38 vec2 np.ndarray: Second input vector. 39 tol (float): Threshold below which vector norm is treated as zero. Defaults to 1e-12. 40 41 Returns: 42 float: Cosine similarity value. For real inputs: value in [-1, 1]. 43 44 Raises: 45 ZeroDivisionError: If either vector has zero length (norm below tolerance). 46 """ 47 #if not np.iscomplexobj(vec1): 48 # vec1 = vec1.astype(complex) 49 #if not np.iscomplexobj(vec2): 50 # vec2 = vec2.astype(complex) # np.ravel(np.asarray(vec2)) 51 # if vectors are normalized cosine similarity is equivalent to the dot product 52 if is_normalized(vec1) and is_normalized(vec2): 53 return np.dot(vec1, vec2) 54 55 n1, n2 = np.linalg.norm(vec1), np.linalg.norm(vec2) 56 if n1 < tol or n2 < tol: 57 raise ZeroDivisionError("Zero-length vector") 58 59 return complex(np.vdot(vec1, vec2) / (n1 * n2)) 60 61 62def is_normalized(vector: np.ndarray, tolerance: float=1e-9) -> bool: 63 """ 64 Checks if a NumPy vector is normalized (its L2 norm is approximately 1). 65 66 Args: 67 vector (Union[Sequence[Union[int, float, complex]], np.ndarray]): The input vector. 68 tolerance (float): The allowed tolerance for comparison with 1. 69 70 Returns: 71 bool: True if the vector is normalized, False otherwise. 72 """ 73 if np.isclose(np.linalg.norm(vector), 1.0, atol=tolerance): 74 return True 75 return False 76 77 78def normalize(vector: np.ndarray) -> np.ndarray: 79 """ 80 Normalizes a vector to unit length. 81 82 Args: 83 vector np.ndarray: Input vector to normalize. 84 85 Returns: 86 np.ndarray: The normalized vector with unit norm. 87 88 Raises: 89 ValueError: If the input vector has zero norm and cannot be normalized. 90 """ 91 norm = np.linalg.norm(vector) 92 if norm != 0: 93 return vector / norm 94 raise ValueError("Zero vector cannot be normalized.") 95 96 97 98def pad_vector(vector: np.ndarray, target_size: int) -> np.ndarray: 99 """ 100 Pads a vector with zeros to reach the specified target size. 101 102 Args: 103 vector np.ndarray: Input vector to pad. 104 target_size (int): The desired size of the output vector. 105 106 Returns: 107 np.ndarray: Padded vector of length target_size with complex dtype. 108 109 Raises: 110 ValueError: If the input vector is larger than the target size. 111 """ 112 return np.pad(vector, (0, target_size - len(vector)), mode='constant') 113 114 115def pair_real_to_complex(vector: np.ndarray) -> np.ndarray: 116 """ 117 Converts a real-valued vector into a complex-valued vector by pairing adjacent elements. 118 119 Args: 120 vector np.ndarray: Real-valued input vector, if not even length it will be padded to even length. 121 122 Returns: 123 np.ndarray: Complex-valued vector where each complex number is formed from consecutive pairs. 124 125 Raises: 126 ValueError: If the vector length is odd and cannot be paired into complex numbers. 127 """ 128 if len(vector) > 0: 129 if len(vector) % 2 != 0: 130 # automatically pad with a 0 131 vector = pad_vector(vector, len(vector) + 1) 132 else: 133 raise ValueError("Vector length must be even to pair into complex numbers.") 134 return np.array([vector[i] + 1j * vector[i+1] for i in range(0, len(vector), 2)]) 135 136 137 138def pad_vectors(vectors: np.ndarray, size: int) -> np.ndarray: 139 """Pad rows with zeros to size. 140 141 Args: 142 vectors np.ndarray: matrix of vectors to be padded, all the same length. 143 size int: target length of vectors. 144 145 Returns: 146 np.ndarray: Padded vectors. 147 """ 148 return np.pad(vectors, [(0, 0), (0, size - vectors.shape[1])], mode='constant')
33def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray, tol: float = 1e-12) -> complex: 34 """ 35 Computes the cosine similarity between two vectors. 36 37 Args: 38 vec1 np.ndarray: First input vector. 39 vec2 np.ndarray: Second input vector. 40 tol (float): Threshold below which vector norm is treated as zero. Defaults to 1e-12. 41 42 Returns: 43 float: Cosine similarity value. For real inputs: value in [-1, 1]. 44 45 Raises: 46 ZeroDivisionError: If either vector has zero length (norm below tolerance). 47 """ 48 #if not np.iscomplexobj(vec1): 49 # vec1 = vec1.astype(complex) 50 #if not np.iscomplexobj(vec2): 51 # vec2 = vec2.astype(complex) # np.ravel(np.asarray(vec2)) 52 # if vectors are normalized cosine similarity is equivalent to the dot product 53 if is_normalized(vec1) and is_normalized(vec2): 54 return np.dot(vec1, vec2) 55 56 n1, n2 = np.linalg.norm(vec1), np.linalg.norm(vec2) 57 if n1 < tol or n2 < tol: 58 raise ZeroDivisionError("Zero-length vector") 59 60 return complex(np.vdot(vec1, vec2) / (n1 * n2))
Computes the cosine similarity between two vectors.
Args: vec1 np.ndarray: First input vector. vec2 np.ndarray: Second input vector. tol (float): Threshold below which vector norm is treated as zero. Defaults to 1e-12.
Returns: float: Cosine similarity value. For real inputs: value in [-1, 1].
Raises: ZeroDivisionError: If either vector has zero length (norm below tolerance).
63def is_normalized(vector: np.ndarray, tolerance: float=1e-9) -> bool: 64 """ 65 Checks if a NumPy vector is normalized (its L2 norm is approximately 1). 66 67 Args: 68 vector (Union[Sequence[Union[int, float, complex]], np.ndarray]): The input vector. 69 tolerance (float): The allowed tolerance for comparison with 1. 70 71 Returns: 72 bool: True if the vector is normalized, False otherwise. 73 """ 74 if np.isclose(np.linalg.norm(vector), 1.0, atol=tolerance): 75 return True 76 return False
Checks if a NumPy vector is normalized (its L2 norm is approximately 1).
Args: vector (Union[Sequence[Union[int, float, complex]], np.ndarray]): The input vector. tolerance (float): The allowed tolerance for comparison with 1.
Returns: bool: True if the vector is normalized, False otherwise.
79def normalize(vector: np.ndarray) -> np.ndarray: 80 """ 81 Normalizes a vector to unit length. 82 83 Args: 84 vector np.ndarray: Input vector to normalize. 85 86 Returns: 87 np.ndarray: The normalized vector with unit norm. 88 89 Raises: 90 ValueError: If the input vector has zero norm and cannot be normalized. 91 """ 92 norm = np.linalg.norm(vector) 93 if norm != 0: 94 return vector / norm 95 raise ValueError("Zero vector cannot be normalized.")
Normalizes a vector to unit length.
Args: vector np.ndarray: Input vector to normalize.
Returns: np.ndarray: The normalized vector with unit norm.
Raises: ValueError: If the input vector has zero norm and cannot be normalized.
99def pad_vector(vector: np.ndarray, target_size: int) -> np.ndarray: 100 """ 101 Pads a vector with zeros to reach the specified target size. 102 103 Args: 104 vector np.ndarray: Input vector to pad. 105 target_size (int): The desired size of the output vector. 106 107 Returns: 108 np.ndarray: Padded vector of length target_size with complex dtype. 109 110 Raises: 111 ValueError: If the input vector is larger than the target size. 112 """ 113 return np.pad(vector, (0, target_size - len(vector)), mode='constant')
Pads a vector with zeros to reach the specified target size.
Args: vector np.ndarray: Input vector to pad. target_size (int): The desired size of the output vector.
Returns: np.ndarray: Padded vector of length target_size with complex dtype.
Raises: ValueError: If the input vector is larger than the target size.
116def pair_real_to_complex(vector: np.ndarray) -> np.ndarray: 117 """ 118 Converts a real-valued vector into a complex-valued vector by pairing adjacent elements. 119 120 Args: 121 vector np.ndarray: Real-valued input vector, if not even length it will be padded to even length. 122 123 Returns: 124 np.ndarray: Complex-valued vector where each complex number is formed from consecutive pairs. 125 126 Raises: 127 ValueError: If the vector length is odd and cannot be paired into complex numbers. 128 """ 129 if len(vector) > 0: 130 if len(vector) % 2 != 0: 131 # automatically pad with a 0 132 vector = pad_vector(vector, len(vector) + 1) 133 else: 134 raise ValueError("Vector length must be even to pair into complex numbers.") 135 return np.array([vector[i] + 1j * vector[i+1] for i in range(0, len(vector), 2)])
Converts a real-valued vector into a complex-valued vector by pairing adjacent elements.
Args: vector np.ndarray: Real-valued input vector, if not even length it will be padded to even length.
Returns: np.ndarray: Complex-valued vector where each complex number is formed from consecutive pairs.
Raises: ValueError: If the vector length is odd and cannot be paired into complex numbers.
139def pad_vectors(vectors: np.ndarray, size: int) -> np.ndarray: 140 """Pad rows with zeros to size. 141 142 Args: 143 vectors np.ndarray: matrix of vectors to be padded, all the same length. 144 size int: target length of vectors. 145 146 Returns: 147 np.ndarray: Padded vectors. 148 """ 149 return np.pad(vectors, [(0, 0), (0, size - vectors.shape[1])], mode='constant')
Pad rows with zeros to size.
Args: vectors np.ndarray: matrix of vectors to be padded, all the same length. size int: target length of vectors.
Returns: np.ndarray: Padded vectors.