nlqk.embeddings

embedding.py

(C) 2025 by Damir Cavar and the NLP Lab

Module: nlqk.embeddings.states

 1#!/usr/bin/env python
 2# -*- coding: utf-8 -*-
 3
 4
 5"""
 6embedding.py
 7
 8(C) 2025 by [Damir Cavar](http://damir.cavar.me/) and the [NLP Lab](https://nlp-lab.org/)
 9
10Module: nlqk.embeddings.states
11
12
13"""
14
15import os
16from typing import List
17if os.getenv("GITHUB_ACTIONS") == "true":
18    import numpy as np
19else:
20    try: # prefer RAPIDS libraries and GPU over numpy and CPU
21        import cupy as np  # Try to import cupy and alias it as np
22        _USE_GPU = True
23    except ModuleNotFoundError:
24        import numpy as np  # If cupy not found, import numpy and alias it as np
25        _USE_GPU = False
26from nlqk.defaults import OPEN_AI_EMBEDDING_MODELS
27
28
29#from .vectors import (
30#    is_normalized,
31#    normalize,
32#    pad_vector,
33#    pair_real_to_complex,
34#    cosine_similarity,
35#)
36#from .states import (
37#    hamiltonian_to_state,
38#    check_states_equal,
39#)
40
41
42def get_openai_embeddings(wordlist: List[str], api_key = '', model_name: str = 'large') -> np.ndarray:
43    """Get the GPT embeddings for a wordlist.
44
45    Args:
46        wordlist List of str: List of words.
47        api_key str: The OpenAI API key.
48        model_name str: One of the valid OpenAI embedding model names.
49
50    Returns:
51        np.ndarray: the OpenAI embeddings for the words in the wordlist.
52
53    Raises:
54        ValueError: If the OpenAI key is missing, i.e., no specification of arg 'api_key' and not environment variable OPENAI_API_KEY.
55    """
56
57    if model_name not in OPEN_AI_EMBEDDING_MODELS:
58        raise ValueError(f'model_name not a valid OpenAI embedding model name. Use one of: {", ".join(OPEN_AI_EMBEDDING_MODELS.keys())}')
59    if not api_key:
60        # check environment variable OPENAI_API_KEY
61        api_key = os.environ.get('OPENAI_API_KEY')
62        if not api_key:
63            raise ValueError(f"Attempted OpenAI API call without API-key. Provide a valid value for 'api_key' or set the API-key in the environment variable OPENAI_API_KEY.")
64    # TODO call openai api
65    return np.array([])
66
67
68def get_embeddings(wordlist: List[str], ) -> np.ndarray:
69
70    return np.array([])
def get_openai_embeddings( wordlist: List[str], api_key='', model_name: str = 'large') -> cupy.ndarray:
43def get_openai_embeddings(wordlist: List[str], api_key = '', model_name: str = 'large') -> np.ndarray:
44    """Get the GPT embeddings for a wordlist.
45
46    Args:
47        wordlist List of str: List of words.
48        api_key str: The OpenAI API key.
49        model_name str: One of the valid OpenAI embedding model names.
50
51    Returns:
52        np.ndarray: the OpenAI embeddings for the words in the wordlist.
53
54    Raises:
55        ValueError: If the OpenAI key is missing, i.e., no specification of arg 'api_key' and not environment variable OPENAI_API_KEY.
56    """
57
58    if model_name not in OPEN_AI_EMBEDDING_MODELS:
59        raise ValueError(f'model_name not a valid OpenAI embedding model name. Use one of: {", ".join(OPEN_AI_EMBEDDING_MODELS.keys())}')
60    if not api_key:
61        # check environment variable OPENAI_API_KEY
62        api_key = os.environ.get('OPENAI_API_KEY')
63        if not api_key:
64            raise ValueError(f"Attempted OpenAI API call without API-key. Provide a valid value for 'api_key' or set the API-key in the environment variable OPENAI_API_KEY.")
65    # TODO call openai api
66    return np.array([])

Get the GPT embeddings for a wordlist.

Args: wordlist List of str: List of words. api_key str: The OpenAI API key. model_name str: One of the valid OpenAI embedding model names.

Returns: np.ndarray: the OpenAI embeddings for the words in the wordlist.

Raises: ValueError: If the OpenAI key is missing, i.e., no specification of arg 'api_key' and not environment variable OPENAI_API_KEY.

def get_embeddings(wordlist: List[str]) -> cupy.ndarray:
69def get_embeddings(wordlist: List[str], ) -> np.ndarray:
70
71    return np.array([])