<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"><url><loc>https://tahabouhsine.com/blog/</loc></url><url><loc>https://tahabouhsine.com/blog/about/</loc></url><url><loc>https://tahabouhsine.com/blog/activations-are-bad-for-geometry/</loc></url><url><loc>https://tahabouhsine.com/blog/ai-illiteracy-pt1/</loc></url><url><loc>https://tahabouhsine.com/blog/attention-is-a-kernel/</loc></url><url><loc>https://tahabouhsine.com/blog/cheap-attention-is-linear-attention/</loc></url><url><loc>https://tahabouhsine.com/blog/convex-readout-jax-flax-nnx/</loc></url><url><loc>https://tahabouhsine.com/blog/latent-on-the-spectrum/</loc></url><url><loc>https://tahabouhsine.com/blog/linear-attention-jax-flax-nnx/</loc></url><url><loc>https://tahabouhsine.com/blog/not-all-infinities-are-equal/</loc></url><url><loc>https://tahabouhsine.com/blog/opposite-is-not-different/</loc></url><url><loc>https://tahabouhsine.com/blog/organizing-randomness-jax/</loc></url><url><loc>https://tahabouhsine.com/blog/poem-0-1/</loc></url><url><loc>https://tahabouhsine.com/blog/qk-projections-jax-flax-nnx/</loc></url><url><loc>https://tahabouhsine.com/blog/readout-as-convex-combination/</loc></url><url><loc>https://tahabouhsine.com/blog/search/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/activation-functions/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/africa-tech/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/ai-education/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/ai-literacy/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/ai/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/attention/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/bilinear/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/clip/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/contrastive-learning/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/contrastive/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/cosine-similarity/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/cross-entropy/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/deep-learning/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/efficient-transformers/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/embeddings/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/flax/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/frame-theory/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/geometry/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/hallucination/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/hierarchy/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/implementation/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/infonce/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/information-theory/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/information/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/interpretability/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/jacobian/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/jax/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/kernel-methods/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/kernels/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/label-structure/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/latent-space/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/linear-attention/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/loss-functions/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/loss-landscape/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/mechanistic-interpretability/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/ml/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/mlp/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/modality-gap/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/morocco/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/nadaraya-watson/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/neural-collapse/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/neural-networks/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/nnx/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/optax/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/orthogonality/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/packing/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/performer/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/phase-transitions/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/poem/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/policy/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/prototypes/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/query-key/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/random-features/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/relu/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/representation-learning/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/residual-stream/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/rkhs/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/rope/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/self-attention/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/self-supervised-learning/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/siglip/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/simclr/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/simplex-etf/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/simplex-packing/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/simplex/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/spectral-embedding/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/supcon/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/taxonomy/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/tight-frames/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/training-dynamics/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/transformers/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/triplet-loss/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/welch-bound/</loc></url><url><loc>https://tahabouhsine.com/blog/tag/yat-unit/</loc></url><url><loc>https://tahabouhsine.com/blog/tags/</loc></url><url><loc>https://tahabouhsine.com/blog/three-states-of-information/</loc></url><url><loc>https://tahabouhsine.com/blog/untangling-the-moons/</loc></url><url><loc>https://tahabouhsine.com/blog/welch-bound-good-latent-space/</loc></url><url><loc>https://tahabouhsine.com/blog/welch-bound-jax-analysis/</loc></url><url><loc>https://tahabouhsine.com/blog/what-an-mlp-knows/</loc></url><url><loc>https://tahabouhsine.com/blog/why-attention-needs-qk-projections/</loc></url></urlset>