Last active
April 11, 2023 19:05
-
-
Save ThomasMiconi/1065370cf25262d4d2e30c2b9519ebe0 to your computer and use it in GitHub Desktop.
GPT3.5 doing multiple linear regression in-context
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import openai | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| # Load your API key from an environment variable or secret management service | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| NBTRAIN = 100 | |
| NBTEST = 30 | |
| NOISEMULT = .2 | |
| TESTMULT = 4.0 | |
| TRAINMULT = 2.0 | |
| # The function to be regressed: | |
| def fct(in1, in2): | |
| return .3 * in1 + .7 * in2 | |
| # Training data | |
| x1 = (100 * TRAINMULT * np.random.rand(NBTRAIN)).astype(int)/100.0 # np.random.rand(NBTRAIN) | |
| x2 = (100 * TRAINMULT * np.random.rand(NBTRAIN)).astype(int)/100.0 # np.random.rand(NBTRAIN) | |
| y = fct(x1, x2) | |
| ynoise = y + NOISEMULT * (2.0 * np.random.rand(y.size) - 1.0) | |
| # Generating test data | |
| x1x2 = zip(x1, x2) | |
| x1t = [] | |
| x2t = [] | |
| for numtest in range(NBTEST): | |
| while True: | |
| a = int(100 * TESTMULT * np.random.rand())/100.0 # np.random.rand() | |
| b = int(100 * TESTMULT * np.random.rand())/100.0 | |
| if not (a,b) in x1x2: | |
| break | |
| x1t.append(a) | |
| x2t.append(b) | |
| x1t =np.array(x1t); x2t = np.array(x2t) | |
| yt = fct(x1t, x2t) | |
| linestrain = [] | |
| linestest = [] | |
| for nn in range(NBTRAIN): | |
| linestrain.append(f"If x1 = {x1[nn]:.2f} and x2 = {x2[nn]:.2f}, then y = {ynoise[nn]:.2f}.") | |
| for nn in range(NBTEST): | |
| linestest.append(f"If x1 = {x1t[nn]:.2f} and x2 = {x2t[nn]:.2f}, then y =") | |
| prompt_base = "\n".join(linestrain) +"\n" | |
| responses = [] | |
| # Prompts for each test case incclude the whole training data, plus one line from the test data (without the y!) | |
| for nt in range(NBTEST): | |
| prompt = prompt_base + linestest[nt] | |
| print("Prompt", nt, ":") | |
| print(prompt + "(End.)") | |
| response = openai.Completion.create(model="text-davinci-003", prompt=prompt, temperature=0, max_tokens=7) | |
| responses.append(response['choices'][0]['text'][1:5]) | |
| ytpred = np.array([float(x) for x in responses]) | |
| plt.figure(); plt.plot(ynoise, y, '.b', label='Train data (prompt)') | |
| plt.plot(yt, ytpred, '.r', label='Test data (completion)') | |
| plt.ylabel("Predicted y") | |
| plt.xlabel("True y") | |
| plt.title('y = .3*x1 + .7*x2 + N(0,0.2)') | |
| plt.legend() | |
| plt.show() | |
| print("End.") |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example of a full prompt (predicts one data point):