nlqk.embeddings
1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4 5""" 6embedding.py 7 8(C) 2025 by [Damir Cavar](http://damir.cavar.me/) and the [NLP Lab](https://nlp-lab.org/) 9 10Module: nlqk.embeddings.states 11 12 13""" 14 15import os 16from typing import List 17if os.getenv("GITHUB_ACTIONS") == "true": 18 import numpy as np 19else: 20 try: # prefer RAPIDS libraries and GPU over numpy and CPU 21 import cupy as np # Try to import cupy and alias it as np 22 _USE_GPU = True 23 except ModuleNotFoundError: 24 import numpy as np # If cupy not found, import numpy and alias it as np 25 _USE_GPU = False 26from nlqk.defaults import OPEN_AI_EMBEDDING_MODELS 27 28 29#from .vectors import ( 30# is_normalized, 31# normalize, 32# pad_vector, 33# pair_real_to_complex, 34# cosine_similarity, 35#) 36#from .states import ( 37# hamiltonian_to_state, 38# check_states_equal, 39#) 40 41 42def get_openai_embeddings(wordlist: List[str], api_key = '', model_name: str = 'large') -> np.ndarray: 43 """Get the GPT embeddings for a wordlist. 44 45 Args: 46 wordlist List of str: List of words. 47 api_key str: The OpenAI API key. 48 model_name str: One of the valid OpenAI embedding model names. 49 50 Returns: 51 np.ndarray: the OpenAI embeddings for the words in the wordlist. 52 53 Raises: 54 ValueError: If the OpenAI key is missing, i.e., no specification of arg 'api_key' and not environment variable OPENAI_API_KEY. 55 """ 56 57 if model_name not in OPEN_AI_EMBEDDING_MODELS: 58 raise ValueError(f'model_name not a valid OpenAI embedding model name. Use one of: {", ".join(OPEN_AI_EMBEDDING_MODELS.keys())}') 59 if not api_key: 60 # check environment variable OPENAI_API_KEY 61 api_key = os.environ.get('OPENAI_API_KEY') 62 if not api_key: 63 raise ValueError(f"Attempted OpenAI API call without API-key. Provide a valid value for 'api_key' or set the API-key in the environment variable OPENAI_API_KEY.") 64 # TODO call openai api 65 return np.array([]) 66 67 68def get_embeddings(wordlist: List[str], ) -> np.ndarray: 69 70 return np.array([])
def
get_openai_embeddings( wordlist: List[str], api_key='', model_name: str = 'large') -> cupy.ndarray:
43def get_openai_embeddings(wordlist: List[str], api_key = '', model_name: str = 'large') -> np.ndarray: 44 """Get the GPT embeddings for a wordlist. 45 46 Args: 47 wordlist List of str: List of words. 48 api_key str: The OpenAI API key. 49 model_name str: One of the valid OpenAI embedding model names. 50 51 Returns: 52 np.ndarray: the OpenAI embeddings for the words in the wordlist. 53 54 Raises: 55 ValueError: If the OpenAI key is missing, i.e., no specification of arg 'api_key' and not environment variable OPENAI_API_KEY. 56 """ 57 58 if model_name not in OPEN_AI_EMBEDDING_MODELS: 59 raise ValueError(f'model_name not a valid OpenAI embedding model name. Use one of: {", ".join(OPEN_AI_EMBEDDING_MODELS.keys())}') 60 if not api_key: 61 # check environment variable OPENAI_API_KEY 62 api_key = os.environ.get('OPENAI_API_KEY') 63 if not api_key: 64 raise ValueError(f"Attempted OpenAI API call without API-key. Provide a valid value for 'api_key' or set the API-key in the environment variable OPENAI_API_KEY.") 65 # TODO call openai api 66 return np.array([])
Get the GPT embeddings for a wordlist.
Args: wordlist List of str: List of words. api_key str: The OpenAI API key. model_name str: One of the valid OpenAI embedding model names.
Returns: np.ndarray: the OpenAI embeddings for the words in the wordlist.
Raises: ValueError: If the OpenAI key is missing, i.e., no specification of arg 'api_key' and not environment variable OPENAI_API_KEY.
def
get_embeddings(wordlist: List[str]) -> cupy.ndarray: