def fibonacci(x):
    out = [0, 1]
    for _ in range(x):
        out.append(out[-2] + out[-1])
    return out[-1]


fibonacci(23)

46368


import torch  # this better work, or the environment is wrong
import jax # ditto this


torch.cuda.is_available()   # check if there is a GPU which is ready

True


# import transformers  # this won't work yet


# Classic transformers
!pip install huggingface-hub
!pip install transformers
!pip install accelerate

Collecting huggingface-hub
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 311.2/311.2 kB 6.1 MB/s eta 0:00:00a 0:00:01
Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (3.13.1)
Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2023.6.0)
Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2.31.0)
Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.66.1)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (6.0.1)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.5.0)
Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (23.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (2023.7.22)
Installing collected packages: huggingface-hub
Successfully installed huggingface-hub-0.19.0
Collecting transformers
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.9/7.9 MB 30.3 MB/s eta 0:00:00
Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)
Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.0)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)
Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.8/3.8 MB 76.3 MB/s eta 0:00:00
Collecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 82.5 MB/s eta 0:00:00
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)
Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 295.0/295.0 kB 38.2 MB/s eta 0:00:00
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)
Installing collected packages: safetensors, huggingface-hub, tokenizers, transformers
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.19.0
    Uninstalling huggingface-hub-0.19.0:
      Successfully uninstalled huggingface-hub-0.19.0
Successfully installed huggingface-hub-0.17.3 safetensors-0.4.0 tokenizers-0.14.1 transformers-4.35.0
Collecting accelerate
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 261.4/261.4 kB 5.4 MB/s eta 0:00:00
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)
Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)
Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu118)
Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.17.3)
Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.13.1)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.5.0)
Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)
Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)
Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2023.6.0)
Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)
Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)
Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.1)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2023.7.22)
Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)
Installing collected packages: accelerate
Successfully installed accelerate-0.24.1


import transformers


# Tokens
# https://huggingface.co/settings/tokens

access_key = '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'
import huggingface_hub
huggingface_hub.login(access_key)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


from transformers import pipeline

pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")


# We use the tokenizer's chat template to format each message 
# - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": "You are a professor of economics at the University of Oxford",
    },
    {
        "role": "user",
        "content": "Explain double-debiased machine learning for causal inference"},
]


prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)


print(outputs[0]["generated_text"])

/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )
  warnings.warn(

<|system|>
You are a professor of economics at the University of Oxford</s>
<|user|>
Explain double-debiased machine learning for causal inference</s>
<|assistant|>
Double-debiased machine learning is a statistical technique that combines two debiasing strategies to estimate causal effects more accurately. In causal inference, the goal is to estimate the effect of a treatment on an outcome, while controlling for all other factors that might influence the outcome. Double-debiased machine learning applies two debiasing steps to address sources of estimation error that can arise in complex data settings.

The first debiasing step is called outcome regression debiasing. In this step, the effect of the treatment on the outcome is estimated by regressing the outcome on the treatment and other relevant covariates. However, this estimated effect can be biased due to unobserved confounding variables that are correlated with both the treatment and the outcome. To address this, outcome regression debiasing uses a second-stage regression to adjust for the unobserved confounding variables.

The second debiasing step is called treatment selection debiasing. In this step, the effect of the treatment on the outcome is estimated by selecting a subset of the population based on their characteristics, and then estimating the effect of the treatment on the outcome for this selected subset. However, this estimated effect can be biased due to selection bias, as the selected subset may not be representative of the entire population. To address this, treatment selection debiasing uses a weighting function to adjust for the selection bias.

Double-debiased machine learning combines these two debiasing steps to obtain more accurate causal estimates. First, outcome regression debiasing is applied to estimate the effect of the treatment on the outcome, while controlling for the observed covariates and adjusting for the unobserved confounding variables. Then, treatment selection debiasing is applied to adjust for the selection bias and obtain a more representative estimate for the selected subset. By combining these two debiasing steps, double-debiased machine learning can provide more accurate causal estimates, especially in complex data settings with many confounding variables and selection bias.


from transformers import AutoModelForCausalLM, AutoTokenizer


# Load the tokenizer and model
# tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
# model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16)


tokenizer = pipe.tokenizer
model = pipe.model


model.device

device(type='cuda', index=0)


# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors="pt")
print(prompt)
print(inputs)

<|system|>
You are a professor of economics at the University of Oxford</s>
<|user|>
Explain double-debiased machine learning for causal inference</s>
<|assistant|>

{'input_ids': tensor([[    1,   523, 28766,  6574, 28766, 28767,    13,  1976,   460,   264,
         12192,   302, 25426,   438,   272,  2900,   302, 13434,     2, 28705,
            13, 28789, 28766,  1838, 28766, 28767,    13,   966, 19457,  3579,
         28733,   450,  6309,  1293,  5599,  5168,   354,  3599,   282,   297,
          2103,     2, 28705,    13, 28789, 28766,   489, 11143, 28766, 28767,
            13]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1]])}


inputs = {k: v.to(model.device) for k, v in inputs.items()}
inputs

{'input_ids': tensor([[    1,   523, 28766,  6574, 28766, 28767,    13,  1976,   460,   264,
          12192,   302, 25426,   438,   272,  2900,   302, 13434,     2, 28705,
             13, 28789, 28766,  1838, 28766, 28767,    13,   966, 19457,  3579,
          28733,   450,  6309,  1293,  5599,  5168,   354,  3599,   282,   297,
           2103,     2, 28705,    13, 28789, 28766,   489, 11143, 28766, 28767,
             13]], device='cuda:0'),
 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1]], device='cuda:0')}


# Forward pass, specifying that we want hidden states and attentions
model_output = model(**inputs, output_hidden_states=True, output_attentions=True)


# Access hidden states; note that the first element is the input embeddings,
# so you might want to start from index 1 to get the hidden states after the first layer
initial_embeddings = model_output.hidden_states[0]
hidden_states = model_output.hidden_states[1:]


# Access attentions
attentions = model_output.attentions


len(hidden_states)

32


initial_embeddings.shape, hidden_states[0].shape, hidden_states[31].shape

(torch.Size([1, 51, 4096]),
 torch.Size([1, 51, 4096]),
 torch.Size([1, 51, 4096]))


initial_embeddings[0]

tensor([[-4.1199e-03,  4.1580e-04, -4.4861e-03,  ..., -4.1771e-04,
         -1.0223e-03, -2.0117e-06],
        [-5.4321e-03, -2.9297e-03, -6.9809e-04,  ...,  2.5940e-03,
         -6.8283e-04, -5.4550e-04],
        [ 6.5327e-05, -2.2278e-03,  7.0190e-04,  ..., -1.0300e-04,
          1.0071e-03,  1.4191e-03],
        ...,
        [ 6.5327e-05, -2.2278e-03,  7.0190e-04,  ..., -1.0300e-04,
          1.0071e-03,  1.4191e-03],
        [ 3.5286e-04,  2.6855e-03,  3.0365e-03,  ..., -1.5721e-06,
          3.6621e-04, -4.6921e-04],
        [-8.8882e-04, -1.0910e-03,  8.0109e-04,  ...,  1.1978e-03,
         -7.5340e-05, -6.7902e-04]], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>)


hidden_states[31][0]

tensor([[ -1.0156,  -0.1875,  -2.3594,  ...,   0.0698,  -1.2891,   1.9219],
        [ -1.6094,  -5.6250,  -1.1953,  ...,   0.6250,  -0.6016,  -1.2266],
        [ -3.0938,  -1.8281,  -0.7539,  ...,  -0.7266,  -5.5312,   0.9688],
        ...,
        [  2.5781, -10.5625,   2.6875,  ...,   1.2344,  -2.4688,  -3.5938],
        [  9.6875,   3.6094,   5.5000,  ...,  -6.0312,  -3.6250,  -6.4375],
        [  7.1250,  -1.4062,   5.3125,  ...,  -1.1250,  -6.4688,  -1.6094]],
       device='cuda:0', dtype=torch.bfloat16, grad_fn=<SelectBackward0>)


# Get logits from the model output
logits = model_output.logits
logits.shape

torch.Size([1, 51, 32000])


# Find the most likely next token ID (assuming you want the last token in the sequence)
next_token_id = torch.argmax(logits[:, -1, :], dim=-1).item()  # This gives you an integer


next_token_text = tokenizer.decode([next_token_id], skip_special_tokens=True)


next_token_text

'Double'


n_params = 0
import numpy


for i, (name, param) in enumerate(model.named_parameters()):
    n_params += numpy.prod(list(param.size()))
    if i > 10: continue
    print(f"Layer: {name} | Size: {param.size()} | Some of its values : {param[0]} \n")  # example to print first 2 values

Layer: model.embed_tokens.weight | Size: torch.Size([32000, 4096]) | Some of its values : tensor([ 3.4124e-06, -1.3888e-05, -1.3411e-05,  ..., -7.0632e-06,
         2.3842e-06,  9.8944e-06], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>) 

Layer: model.layers.0.self_attn.q_proj.weight | Size: torch.Size([4096, 4096]) | Some of its values : tensor([ 7.7248e-05,  9.6893e-04, -3.3379e-05,  ...,  4.1504e-03,
         2.4438e-05, -4.0054e-04], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>) 

Layer: model.layers.0.self_attn.k_proj.weight | Size: torch.Size([1024, 4096]) | Some of its values : tensor([ 3.1292e-07, -3.1891e-03,  1.2875e-04,  ..., -1.6724e-02,
         1.6022e-04, -6.5613e-04], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>) 

Layer: model.layers.0.self_attn.v_proj.weight | Size: torch.Size([1024, 4096]) | Some of its values : tensor([-3.4714e-04, -1.6861e-03, -6.9046e-04,  ...,  4.9133e-03,
         9.5367e-05,  2.4109e-03], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>) 

Layer: model.layers.0.self_attn.o_proj.weight | Size: torch.Size([4096, 4096]) | Some of its values : tensor([ 0.0007,  0.0045,  0.0041,  ..., -0.0011, -0.0005,  0.0006],
       device='cuda:0', dtype=torch.bfloat16, grad_fn=<SelectBackward0>) 

Layer: model.layers.0.mlp.gate_proj.weight | Size: torch.Size([14336, 4096]) | Some of its values : tensor([-0.0042, -0.0009, -0.0014,  ...,  0.0025,  0.0036,  0.0033],
       device='cuda:0', dtype=torch.bfloat16, grad_fn=<SelectBackward0>) 

Layer: model.layers.0.mlp.up_proj.weight | Size: torch.Size([14336, 4096]) | Some of its values : tensor([-0.0002, -0.0003, -0.0009,  ...,  0.0057,  0.0036,  0.0006],
       device='cuda:0', dtype=torch.bfloat16, grad_fn=<SelectBackward0>) 

Layer: model.layers.0.mlp.down_proj.weight | Size: torch.Size([4096, 14336]) | Some of its values : tensor([-2.6703e-03,  6.0558e-05, -3.9291e-04,  ..., -1.7395e-03,
         3.4790e-03, -1.0376e-03], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>) 

Layer: model.layers.0.input_layernorm.weight | Size: torch.Size([4096]) | Some of its values : 0.000514984130859375 

Layer: model.layers.0.post_attention_layernorm.weight | Size: torch.Size([4096]) | Some of its values : 0.41796875 

Layer: model.layers.1.self_attn.q_proj.weight | Size: torch.Size([4096, 4096]) | Some of its values : tensor([-3.0708e-04, -1.9531e-03,  7.8678e-05,  ...,  2.7924e-03,
        -4.4632e-04,  2.7771e-03], device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<SelectBackward0>)


print(f"These are the first 10 of {i} tensors of weights in the model\n")
print(f"The total number of entries in all the tensors is {n_params}")

These are the first 10 of 290 tensors of weights in the model

The total number of entries in all the tensors is 7241732096

ML & Econ reading group¶

LLMs¶

W6: Practical implementation¶

Jeremy Large, 14 Nov 2023¶

Plan for today¶

HuggingFace overview¶

Python¶

Jupyter Notebooks¶

As this is a jupyter notebook, we can do things like:¶

Transformers¶

Lets set this up on Google Colab¶

Now we are ready to run some code¶

Authentication¶

Zephyr 7b¶

Zephyr 7b in more detail¶

Here's a shortcut because we already created `pipe`¶

Tokenize the prompt¶

Move the input tensors to the same device as the model¶

Run the model on the inputs¶

OK, lets look inside the model¶

The raw initial embeddings:¶

The 'transformed' embeddings after all the attention processing:¶

The final logits from this run of the model:¶

Simplest way to extract a first word from the logits:¶

A delve into Zephyr's parameters¶

Many thanks¶

ML & Econ reading group¶

LLMs¶

W6: Practical implementation¶

Jeremy Large, 14 Nov 2023¶

Plan for today¶

HuggingFace overview¶

Python¶

Jupyter Notebooks¶

As this is a jupyter notebook, we can do things like:¶

Transformers¶

Lets set this up on Google Colab¶

Now we are ready to run some code¶

Authentication¶

Zephyr 7b¶

Zephyr 7b in more detail¶

Here's a shortcut because we already created pipe¶

Tokenize the prompt¶

Move the input tensors to the same device as the model¶

Run the model on the inputs¶

OK, lets look inside the model¶

The raw initial embeddings:¶

The 'transformed' embeddings after all the attention processing:¶

The final logits from this run of the model:¶

Simplest way to extract a first word from the logits:¶

A delve into Zephyr's parameters¶

Many thanks¶

Here's a shortcut because we already created `pipe`¶