Skip to content

Commit 7bd2631

Browse files
authored
Switch from urllib to requests to improve reliability (#867)
* Switch from urllib to requests to improve reliability * Keep ruff linter-specific * update * update * update
1 parent 8552565 commit 7bd2631

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+602
-446
lines changed

.github/workflows/basic-tests-latest-python.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,14 @@ jobs:
3838
- name: Test Selected Python Scripts
3939
run: |
4040
source .venv/bin/activate
41-
pytest --ruff setup/02_installing-python-libraries/tests.py
42-
pytest --ruff ch04/01_main-chapter-code/tests.py
43-
pytest --ruff ch05/01_main-chapter-code/tests.py
44-
pytest --ruff ch06/01_main-chapter-code/tests.py
41+
pytest setup/02_installing-python-libraries/tests.py
42+
pytest ch04/01_main-chapter-code/tests.py
43+
pytest ch05/01_main-chapter-code/tests.py
44+
pytest ch06/01_main-chapter-code/tests.py
4545
4646
- name: Validate Selected Jupyter Notebooks
4747
run: |
4848
source .venv/bin/activate
49-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
50-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
51-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
49+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
50+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
51+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

.github/workflows/basic-tests-linux-uv.yml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,24 @@ jobs:
4747
shell: bash
4848
run: |
4949
source .venv/bin/activate
50-
pytest --ruff setup/02_installing-python-libraries/tests.py
51-
pytest --ruff ch04/01_main-chapter-code/tests.py
52-
pytest --ruff ch04/03_kv-cache/tests.py
53-
pytest --ruff ch05/01_main-chapter-code/tests.py
54-
pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
55-
pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
56-
pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
57-
pytest --ruff ch05/12_gemma3/tests/test_gemma3_nb.py
58-
pytest --ruff ch05/12_gemma3/tests/test_gemma3_kv_nb.py
59-
pytest --ruff ch06/01_main-chapter-code/tests.py
50+
pytest setup/02_installing-python-libraries/tests.py
51+
pytest ch04/01_main-chapter-code/tests.py
52+
pytest ch04/03_kv-cache/tests.py
53+
pytest ch05/01_main-chapter-code/tests.py
54+
pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
55+
pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
56+
pytest ch05/11_qwen3/tests/test_qwen3_nb.py
57+
pytest ch05/12_gemma3/tests/test_gemma3_nb.py
58+
pytest ch05/12_gemma3/tests/test_gemma3_kv_nb.py
59+
pytest ch06/01_main-chapter-code/tests.py
6060
6161
- name: Validate Selected Jupyter Notebooks (uv)
6262
shell: bash
6363
run: |
6464
source .venv/bin/activate
65-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
66-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
67-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
65+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
66+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
67+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
6868
6969
- name: Test Selected Bonus Materials
7070
shell: bash

.github/workflows/basic-tests-macos-uv.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,20 @@ jobs:
4747
shell: bash
4848
run: |
4949
source .venv/bin/activate
50-
pytest --ruff setup/02_installing-python-libraries/tests.py
51-
pytest --ruff ch04/01_main-chapter-code/tests.py
52-
pytest --ruff ch05/01_main-chapter-code/tests.py
53-
pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
54-
pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
55-
pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
56-
pytest --ruff ch05/12_gemma3/tests/test_gemma3_nb.py
57-
pytest --ruff ch05/12_gemma3/tests/test_gemma3_kv_nb.py
58-
pytest --ruff ch06/01_main-chapter-code/tests.py
50+
pytest setup/02_installing-python-libraries/tests.py
51+
pytest ch04/01_main-chapter-code/tests.py
52+
pytest ch05/01_main-chapter-code/tests.py
53+
pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
54+
pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
55+
pytest ch05/11_qwen3/tests/test_qwen3_nb.py
56+
pytest ch05/12_gemma3/tests/test_gemma3_nb.py
57+
pytest ch05/12_gemma3/tests/test_gemma3_kv_nb.py
58+
pytest ch06/01_main-chapter-code/tests.py
5959
6060
- name: Validate Selected Jupyter Notebooks (uv)
6161
shell: bash
6262
run: |
6363
source .venv/bin/activate
64-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
65-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
66-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
64+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
65+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
66+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

.github/workflows/basic-tests-old-pytorch.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ jobs:
4343
- name: Test Selected Python Scripts
4444
run: |
4545
source .venv/bin/activate
46-
pytest --ruff setup/02_installing-python-libraries/tests.py
47-
pytest --ruff ch04/01_main-chapter-code/tests.py
48-
pytest --ruff ch05/01_main-chapter-code/tests.py
49-
pytest --ruff ch06/01_main-chapter-code/tests.py
46+
pytest setup/02_installing-python-libraries/tests.py
47+
pytest ch04/01_main-chapter-code/tests.py
48+
pytest ch05/01_main-chapter-code/tests.py
49+
pytest ch06/01_main-chapter-code/tests.py
5050
5151
- name: Validate Selected Jupyter Notebooks
5252
run: |
5353
source .venv/bin/activate
54-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
55-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
56-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
54+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
55+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
56+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

.github/workflows/basic-tests-pip.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@ jobs:
4646
- name: Test Selected Python Scripts
4747
run: |
4848
source .venv/bin/activate
49-
pytest --ruff setup/02_installing-python-libraries/tests.py
50-
pytest --ruff ch04/01_main-chapter-code/tests.py
51-
pytest --ruff ch05/01_main-chapter-code/tests.py
52-
pytest --ruff ch06/01_main-chapter-code/tests.py
49+
pytest setup/02_installing-python-libraries/tests.py
50+
pytest ch04/01_main-chapter-code/tests.py
51+
pytest ch05/01_main-chapter-code/tests.py
52+
pytest ch06/01_main-chapter-code/tests.py
5353
5454
- name: Validate Selected Jupyter Notebooks
5555
run: |
5656
source .venv/bin/activate
57-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
58-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
59-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
57+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
58+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
59+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

.github/workflows/basic-tests-pixi.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ jobs:
4747
- name: Test Selected Python Scripts
4848
shell: pixi run --environment tests bash -e {0}
4949
run: |
50-
pytest --ruff setup/02_installing-python-libraries/tests.py
51-
pytest --ruff ch04/01_main-chapter-code/tests.py
52-
pytest --ruff ch05/01_main-chapter-code/tests.py
53-
pytest --ruff ch06/01_main-chapter-code/tests.py
50+
pytest setup/02_installing-python-libraries/tests.py
51+
pytest ch04/01_main-chapter-code/tests.py
52+
pytest ch05/01_main-chapter-code/tests.py
53+
pytest ch06/01_main-chapter-code/tests.py
5454
5555
- name: Validate Selected Jupyter Notebooks
5656
shell: pixi run --environment tests bash -e {0}
5757
run: |
58-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
59-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
60-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
58+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
59+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
60+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

.github/workflows/basic-tests-pytorch-rc.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ jobs:
3939
- name: Test Selected Python Scripts
4040
run: |
4141
source .venv/bin/activate
42-
pytest --ruff setup/02_installing-python-libraries/tests.py
43-
pytest --ruff ch04/01_main-chapter-code/tests.py
44-
pytest --ruff ch05/01_main-chapter-code/tests.py
45-
pytest --ruff ch06/01_main-chapter-code/tests.py
42+
pytest setup/02_installing-python-libraries/tests.py
43+
pytest ch04/01_main-chapter-code/tests.py
44+
pytest ch05/01_main-chapter-code/tests.py
45+
pytest ch06/01_main-chapter-code/tests.py
4646
4747
- name: Validate Selected Jupyter Notebooks
4848
run: |
4949
source .venv/bin/activate
50-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
51-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
52-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
50+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
51+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
52+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

.github/workflows/basic-tests-windows-uv-pip.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,18 @@ jobs:
4949
shell: bash
5050
run: |
5151
source .venv/Scripts/activate
52-
pytest --ruff setup/02_installing-python-libraries/tests.py
53-
pytest --ruff ch04/01_main-chapter-code/tests.py
54-
pytest --ruff ch05/01_main-chapter-code/tests.py
55-
pytest --ruff ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
56-
pytest --ruff ch05/07_gpt_to_llama/tests/test_llama32_nb.py
57-
pytest --ruff ch05/11_qwen3/tests/test_qwen3_nb.py
58-
pytest --ruff ch06/01_main-chapter-code/tests.py
52+
pytest setup/02_installing-python-libraries/tests.py
53+
pytest ch04/01_main-chapter-code/tests.py
54+
pytest ch05/01_main-chapter-code/tests.py
55+
pytest ch05/07_gpt_to_llama/tests/tests_rope_and_parts.py
56+
pytest ch05/07_gpt_to_llama/tests/test_llama32_nb.py
57+
pytest ch05/11_qwen3/tests/test_qwen3_nb.py
58+
pytest ch06/01_main-chapter-code/tests.py
5959
6060
- name: Run Jupyter Notebook Tests
6161
shell: bash
6262
run: |
6363
source .venv/Scripts/activate
64-
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
65-
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
66-
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
64+
pytest --nbval ch02/01_main-chapter-code/dataloader.ipynb
65+
pytest --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
66+
pytest --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

appendix-D/01_main-chapter-code/appendix-D.ipynb

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,19 +121,40 @@
121121
"outputs": [],
122122
"source": [
123123
"import os\n",
124-
"import urllib.request\n",
124+
"import requests\n",
125125
"\n",
126126
"file_path = \"the-verdict.txt\"\n",
127127
"url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
128128
"\n",
129129
"if not os.path.exists(file_path):\n",
130+
" response = requests.get(url, timeout=30)\n",
131+
" response.raise_for_status()\n",
132+
" text_data = response.text\n",
133+
" with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
134+
" file.write(text_data)\n",
135+
"else:\n",
136+
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
137+
" text_data = file.read()\n",
138+
"\n",
139+
"# The book originally used the following code below\n",
140+
"# However, urllib uses older protocol settings that\n",
141+
"# can cause problems for some readers using a VPN.\n",
142+
"# The `requests` version above is more robust\n",
143+
"# in that regard.\n",
144+
"\n",
145+
"\"\"\"\n",
146+
"import os\n",
147+
"import urllib.request\n",
148+
"\n",
149+
"if not os.path.exists(file_path):\n",
130150
" with urllib.request.urlopen(url) as response:\n",
131151
" text_data = response.read().decode('utf-8')\n",
132152
" with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
133153
" file.write(text_data)\n",
134154
"else:\n",
135155
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
136-
" text_data = file.read()"
156+
" text_data = file.read()\n",
157+
"\"\"\""
137158
]
138159
},
139160
{

appendix-E/01_main-chapter-code/appendix-E.ipynb

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@
190190
}
191191
],
192192
"source": [
193-
"import urllib\n",
193+
"# import urllib\n",
194+
"import requests\n",
194195
"from pathlib import Path\n",
195196
"import pandas as pd\n",
196197
"from previous_chapters import (\n",
@@ -215,13 +216,20 @@
215216
"extracted_path = \"sms_spam_collection\"\n",
216217
"data_file_path = Path(extracted_path) / \"SMSSpamCollection.tsv\"\n",
217218
"\n",
219+
"\n",
218220
"try:\n",
219221
" download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
220-
"except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
222+
"except (requests.exceptions.RequestException, TimeoutError) as e:\n",
221223
" print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
222224
" url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
223225
" download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
224226
"\n",
227+
"# The book originally used\n",
228+
"# except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
229+
"# in the code above.\n",
230+
"# However, some VPN users reported issues with `urllib`, so the code was updated\n",
231+
"# to use `requests` instead\n",
232+
"\n",
225233
"df = pd.read_csv(data_file_path, sep=\"\\t\", header=None, names=[\"Label\", \"Text\"])\n",
226234
"balanced_df = create_balanced_dataset(df)\n",
227235
"balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})\n",

0 commit comments

Comments
 (0)