<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"><url><loc>https://puyang.me/blog/engineering_system_view/training-models-larger-than-one-gpu-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/engineering_system_view/training-models-larger-than-one-gpu.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/engineering_system_view/what-are-we-optimizing-in-agent-context-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/engineering_system_view/what-are-we-optimizing-in-agent-context.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/geometry-of-transformers/what-attention-does-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/geometry-of-transformers/what-attention-does.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/geometry-of-transformers/why-multi-head-matters-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/geometry-of-transformers/why-multi-head-matters.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/high-dimensional-space-and-machine-learning/distance-breakdown-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/high-dimensional-space-and-machine-learning/distance-breakdown.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/high-dimensional-space-and-machine-learning/hypersphere-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/high-dimensional-space-and-machine-learning/hypersphere.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/high-dimensional-space-and-machine-learning/orthogonality-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/high-dimensional-space-and-machine-learning/orthogonality.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/</loc></url><url><loc>https://puyang.me/blog/representation-space-of-large-models/semantic-linearity-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/representation-space-of-large-models/semantic-linearity.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/representation-space-of-large-models/spherical-coding-en.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/representation-space-of-large-models/spherical-coding.html</loc><lastmod>2026-03-11T21:43:36.000Z</lastmod></url><url><loc>https://puyang.me/blog/theory-of-tokenizers/what-tokenization-does-en.html</loc><lastmod>2026-03-12T21:32:20.000Z</lastmod></url><url><loc>https://puyang.me/blog/theory-of-tokenizers/what-tokenization-does.html</loc><lastmod>2026-03-12T21:32:20.000Z</lastmod></url><url><loc>https://puyang.me/blog/theory-of-tokenizers/why-character-level-rarely-wins-en.html</loc><lastmod>2026-03-12T21:23:04.000Z</lastmod></url><url><loc>https://puyang.me/blog/theory-of-tokenizers/why-character-level-rarely-wins.html</loc><lastmod>2026-03-12T21:23:04.000Z</lastmod></url><url><loc>https://puyang.me/blog/theory-of-tokenizers/why-vocab-size-stays-near-50k-en.html</loc><lastmod>2026-03-12T21:23:04.000Z</lastmod></url><url><loc>https://puyang.me/blog/theory-of-tokenizers/why-vocab-size-stays-near-50k.html</loc><lastmod>2026-03-12T21:23:04.000Z</lastmod></url><url><loc>https://puyang.me/</loc><lastmod>2026-03-11T00:20:29.000Z</lastmod></url><url><loc>https://puyang.me/news/2025-12-05-paper-accepted.html</loc><lastmod>2026-03-09T21:21:27.000Z</lastmod></url><url><loc>https://puyang.me/news/2026-01-20-launched-new-site.html</loc><lastmod>2026-03-09T21:21:27.000Z</lastmod></url><url><loc>https://puyang.me/news/</loc><lastmod>2026-03-09T21:21:27.000Z</lastmod></url><url><loc>https://puyang.me/projects/</loc><lastmod>2026-03-20T19:21:10.000Z</lastmod></url><url><loc>https://puyang.me/projects/network-architecture-for-6g-network/</loc><lastmod>2026-03-20T19:21:10.000Z</lastmod></url><url><loc>https://puyang.me/projects/sample-project/</loc><lastmod>2026-03-20T19:21:10.000Z</lastmod></url><url><loc>https://puyang.me/projects/semantic-routing-and-load-balance/</loc><lastmod>2026-03-20T19:21:10.000Z</lastmod></url><url><loc>https://puyang.me/projects/wireless-avionics-intra-communications-waic-system/</loc><lastmod>2026-03-20T19:21:10.000Z</lastmod></url><url><loc>https://puyang.me/publications/</loc></url></urlset>