This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding=utf-8 | |
| # Copyright 2025 The HuggingFace Inc. team | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from datasets import load_dataset | |
| from trl import SFTTrainer | |
| dataset = load_dataset("trl-lib/Capybara", split="train") | |
| trainer = SFTTrainer( | |
| model="Qwen/Qwen2.5-0.5B", | |
| train_dataset=dataset, | |
| ) | |
| trainer.train() | |
| train.push_to_hub("Qwen2.5-0.5B-SFT") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| from datasets import load_dataset | |
| # Helper function to filter data based on time range | |
| def filter_date_range(df, date_col, start_date, end_date): | |
| return df[(df[date_col] >= start_date) & (df[date_col] < end_date)] | |
| # Get the current time |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from transformers import pipeline | |
| from string import Template | |
| template = Template( | |
| """Please act as an impartial judge and evaluate the quality of the responses provided by | |
| two AI assistants to the user question displayed below. Your evaluation should consider | |
| factors such as the helpfulness and relevance. Ensure that the order in which the responses | |
| were presented does not influence your decision. Answer just by [[A]] if assistant A is better, | |
| [[B]] if assistant B is better, and [[C]] for a tie. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import urllib.request | |
| from datetime import datetime | |
| import wandb | |
| import yaml | |
| from yaml.loader import SafeLoader | |
| atari_ids = [ | |
| # "AdventureNoFrameskip-v4", | |
| # "AirRaidNoFrameskip-v4", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from manim import * # I'll skip this line for the following | |
| class MyBeautifulGraph(Scene): | |
| def construct(self): | |
| axes = Axes() | |
| self.add(axes) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| for i_episode in range(nb_episodes): | |
| # HERE : THE CODE TO | |
| # initilize episode and loop until the game is over | |
| # ... | |
| # improve the policy after each transition | |
| # ... | |
| # if episode ends with a reward | |
| if reward: | |
| # decrease epsilon |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| for t in range(len(states)): | |
| # Compute the dicounted reward Gt from time t | |
| # Gt = rewards[t] + gamma*Gt | |
| Gt = compute_gain(rewards, t, gamma) | |
| # \delta_t = G_t - Q(S_t, A_t) | |
| delta_t = Gt - Q[states[t]][actions[t]] | |
| # Add pair state-action to the counter | |
| N[states[t]][actions[t]] += 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # \delta_t = R_{t+1} + \gamma * Q(S_{t+1}, A_{t+1}) - Q(S_{t}, A_{t}) | |
| delta_t = reward + gamma* Q[next_state][next_action] - Q[state][action] | |
| # Add delta_t to the current value function | |
| # Q(S_t, A_t) += alpha * \delta_t | |
| Q[state][action] += alpha * delta_t |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # initilise Q and N | |
| Q_pi = np.ones((nb_states, nb_actions)) | |
| N = np.zeros_like(Q_pi) | |
| for i_generation in range(nb_generation): | |
| # Define π' = ε-greedy policy w.r.t Q_π | |
| pi = epsilon_greedy_policy(Q_pi, eps) | |
| # Generate one episode by following the π' | |
| states, actions, rewards = generate_episode(env, pi) |
NewerOlder