(108.00, 754.57) (126.82, 754.57) (126.82, 763.12) (108.00, 763.12)       /F81 IBM	<|special_separator|>
(399.97, 754.57) (429.85, 754.57) (429.85, 763.12) (399.97, 763.12)       /F81 Granite	<|special_separator|>
(432.34, 754.57) (471.62, 754.57) (471.62, 763.12) (432.34, 763.12)       /F81 Language	<|special_separator|>
(474.12, 754.57) (504.00, 754.57) (504.00, 763.12) (474.12, 763.12)       /F81 Models	<|special_separator|>
(108.25, 698.11) (121.70, 698.11) (121.70, 706.66) (108.25, 706.66)       /F81 2.1	<|special_separator|>
(132.16, 698.11) (161.25, 698.52) (161.25, 705.37) (132.16, 706.66)       /F81 DENSE	<|special_separator|>
(164.23, 698.11) (201.23, 698.52) (201.23, 705.37) (164.23, 706.66)       /F81 MODELS	<|special_separator|>
(108.00, 676.91) (138.47, 676.91) (138.47, 685.46) (108.00, 685.46)       /F81 Granite	<|special_separator|>
(141.08, 676.91) (153.78, 676.91) (153.78, 685.46) (141.08, 685.46)       /F81 3.0	<|special_separator|>
(156.39, 676.91) (168.25, 676.91) (168.25, 685.46) (156.39, 685.46)       /F81 2B	<|special_separator|>
(170.85, 676.91) (185.53, 676.91) (185.53, 685.46) (170.85, 685.46)       /F81 and	<|special_separator|>
(188.14, 676.91) (200.00, 676.91) (200.00, 685.46) (188.14, 685.46)       /F81 8B	<|special_separator|>
(202.60, 676.91) (225.74, 676.91) (225.74, 685.46) (202.60, 685.46)       /F81 dense	<|special_separator|>
(228.35, 676.91) (257.70, 676.91) (257.70, 685.46) (228.35, 685.46)       /F81 models	<|special_separator|>
(260.31, 676.91) (281.75, 676.91) (281.75, 685.46) (260.31, 685.46)       /F81 share	<|special_separator|>
(284.36, 676.91) (288.87, 676.91) (288.87, 685.46) (284.36, 685.46)       /F81 a	<|special_separator|>
(291.47, 676.91) (319.70, 676.91) (319.70, 685.46) (291.47, 685.46)       /F81 similar	<|special_separator|>
(322.31, 676.91) (370.28, 676.91) (370.28, 685.46) (322.31, 685.46)       /F81 architecture	<|special_separator|>
(372.88, 676.91) (381.34, 676.91) (381.34, 685.46) (372.88, 685.46)       /F81 as	<|special_separator|>
(383.95, 676.91) (415.00, 676.91) (415.00, 685.46) (383.95, 685.46)       /F81 popular	<|special_separator|>
(417.60, 676.91) (454.29, 676.91) (454.29, 685.46) (417.60, 685.46)       /F81 language	<|special_separator|>
(456.90, 676.91) (486.25, 676.91) (486.25, 685.46) (456.90, 685.46)       /F81 models	<|special_separator|>
(488.86, 676.91) (504.00, 676.91) (504.00, 685.46) (488.86, 685.46)       /F81 like	<|special_separator|>
(108.00, 665.95) (133.96, 665.95) (133.96, 674.50) (108.00, 674.50)       /F81 Llama	<|special_separator|>
(136.72, 665.95) (151.39, 665.95) (151.39, 674.50) (136.72, 674.50)       /F81 and	<|special_separator|>
(154.15, 665.95) (167.69, 665.95) (167.69, 674.50) (154.15, 674.50)       /F81 our	<|special_separator|>
(170.46, 665.95) (205.20, 665.95) (205.20, 674.50) (170.46, 674.50)       /F81 previous	<|special_separator|>
(207.95, 665.95) (238.43, 665.95) (238.43, 674.50) (207.95, 674.50)       /F81 Granite	<|special_separator|>
(241.18, 665.95) (262.63, 665.95) (262.63, 674.50) (241.18, 674.50)       /F81 Code	<|special_separator|>
(265.40, 665.95) (294.75, 665.95) (294.75, 674.50) (265.40, 674.50)       /F81 models	<|special_separator|>
(297.51, 665.95) (326.30, 665.95) (326.30, 674.50) (297.51, 674.50)       /F81 Mishra	<|special_separator|>
(329.05, 665.95) (336.39, 665.95) (336.39, 674.50) (329.05, 674.50)       /F81 et	<|special_separator|>
(339.14, 665.95) (349.02, 665.95) (349.02, 674.50) (339.14, 674.50)       /F81 al.	<|special_separator|>
(351.77, 665.95) (381.40, 665.95) (381.40, 674.50) (351.77, 674.50)       /F81 (2024),	<|special_separator|>
(384.23, 665.95) (419.23, 665.95) (419.23, 674.50) (384.23, 674.50)       /F81 ensuring	<|special_separator|>
(421.98, 665.95) (447.39, 665.95) (447.39, 674.50) (421.98, 674.50)       /F81 strong	<|special_separator|>
(450.15, 665.95) (504.35, 665.95) (504.35, 674.50) (450.15, 674.50)       /F81 compatibility	<|special_separator|>
(107.64, 654.99) (125.71, 654.99) (125.71, 663.54) (107.64, 663.54)       /F81 with	<|special_separator|>
(129.02, 654.99) (178.68, 654.99) (178.68, 663.54) (129.02, 663.54)       /F81 open-source	<|special_separator|>
(182.00, 654.99) (219.81, 654.99) (219.81, 663.54) (182.00, 663.54)       /F81 inference	<|special_separator|>
(223.12, 654.99) (237.79, 654.99) (237.79, 663.54) (223.12, 663.54)       /F81 and	<|special_separator|>
(241.11, 654.99) (285.71, 654.99) (285.71, 663.54) (241.11, 663.54)       /F81 fine-tuning	<|special_separator|>
(289.03, 654.99) (328.26, 654.99) (328.26, 663.54) (289.03, 663.54)       /F81 pipelines.	<|special_separator|>
(333.83, 654.99) (347.14, 654.99) (347.14, 663.54) (333.83, 663.54)       /F81 We	<|special_separator|>
(350.45, 654.99) (363.99, 654.99) (363.99, 663.54) (350.45, 663.54)       /F81 use	<|special_separator|>
(367.32, 654.99) (402.87, 654.99) (402.87, 663.54) (367.32, 663.54)       /F81 Grouped	<|special_separator|>
(406.19, 654.99) (431.58, 654.99) (431.58, 663.54) (406.19, 663.54)       /F81 Query	<|special_separator|>
(434.90, 654.99) (473.30, 654.99) (473.30, 663.54) (434.90, 663.54)       /F81 Attention	<|special_separator|>
(476.61, 654.99) (504.83, 654.99) (504.83, 663.54) (476.61, 663.54)       /F81 (GQA;	<|special_separator|>
(107.64, 644.03) (136.80, 644.03) (136.80, 652.58) (107.64, 652.58)       /F81 Ainslie	<|special_separator|>
(139.29, 644.03) (146.58, 644.03) (146.58, 652.58) (139.29, 652.58)       /F81 et	<|special_separator|>
(149.06, 644.03) (158.87, 644.03) (158.87, 652.58) (149.06, 652.58)       /F81 al.	<|special_separator|>
(161.36, 644.03) (184.91, 644.03) (184.91, 652.58) (161.36, 652.58)       /F81 2023)	<|special_separator|>
(187.39, 644.03) (205.33, 644.03) (205.33, 652.58) (187.39, 652.58)       /F81 with	<|special_separator|>
(207.82, 644.03) (212.87, 644.03) (212.87, 652.58) (207.82, 652.58)       /F81 8	<|special_separator|>
(215.35, 644.03) (254.65, 644.03) (254.65, 652.58) (215.35, 652.58)       /F81 key-value	<|special_separator|>
(257.15, 644.03) (280.12, 644.03) (280.12, 652.58) (257.15, 652.58)       /F81 heads	<|special_separator|>
(282.61, 644.03) (290.46, 644.03) (290.46, 652.58) (282.61, 652.58)       /F81 to	<|special_separator|>
(292.95, 644.03) (305.29, 644.03) (305.29, 652.58) (292.95, 652.58)       /F81 get	<|special_separator|>
(307.77, 644.03) (312.25, 644.03) (312.25, 652.58) (307.77, 652.58)       /F81 a	<|special_separator|>
(314.74, 644.03) (334.93, 644.03) (334.93, 652.58) (314.74, 652.58)       /F81 good	<|special_separator|>
(337.41, 644.03) (368.23, 644.03) (368.23, 652.58) (337.41, 652.58)       /F81 balance	<|special_separator|>
(370.72, 644.03) (404.35, 644.03) (404.35, 652.58) (370.72, 652.58)       /F81 between	<|special_separator|>
(406.83, 644.03) (440.47, 644.03) (440.47, 652.58) (406.83, 652.58)       /F81 memory	<|special_separator|>
(442.96, 644.03) (459.22, 644.03) (459.22, 652.58) (442.96, 652.58)       /F81 cost	<|special_separator|>
(461.71, 644.03) (476.29, 644.03) (476.29, 652.58) (461.71, 652.58)       /F81 and	<|special_separator|>
(478.77, 644.03) (504.00, 644.03) (504.00, 652.58) (478.77, 652.58)       /F81 model	<|special_separator|>
(108.00, 633.07) (161.10, 633.07) (161.10, 641.62) (108.00, 641.62)       /F81 performance,	<|special_separator|>
(163.59, 633.07) (178.05, 633.07) (178.05, 641.62) (163.59, 641.62)       /F81 and	<|special_separator|>
(180.53, 633.07) (207.78, 633.07) (207.78, 641.62) (180.53, 641.62)       /F81 Rotary	<|special_separator|>
(210.28, 633.07) (243.11, 633.07) (243.11, 641.62) (210.28, 641.62)       /F81 Position	<|special_separator|>
(245.60, 633.07) (291.77, 633.07) (291.77, 641.62) (245.60, 641.62)       /F81 Embedding	<|special_separator|>
(294.25, 633.07) (323.74, 633.07) (323.74, 641.62) (294.25, 641.62)       /F81 (RoPE;	<|special_separator|>
(326.23, 633.07) (336.80, 633.07) (336.80, 641.62) (326.23, 641.62)       /F81 Su	<|special_separator|>
(339.30, 633.07) (346.53, 633.07) (346.53, 641.62) (339.30, 641.62)       /F81 et	<|special_separator|>
(349.02, 633.07) (358.75, 633.07) (358.75, 641.62) (349.02, 641.62)       /F81 al.	<|special_separator|>
(361.23, 633.07) (384.59, 633.07) (384.59, 641.62) (361.23, 641.62)       /F81 2024)	<|special_separator|>
(387.09, 633.07) (394.88, 633.07) (394.88, 641.62) (387.09, 641.62)       /F81 to	<|special_separator|>
(397.37, 633.07) (422.40, 633.07) (422.40, 641.62) (397.37, 641.62)       /F81 model	<|special_separator|>
(424.88, 633.07) (437.12, 633.07) (437.12, 641.62) (424.88, 641.62)       /F81 the	<|special_separator|>
(439.61, 633.07) (469.24, 633.07) (469.24, 641.62) (439.61, 641.62)       /F81 relative	<|special_separator|>
(471.73, 633.07) (504.00, 633.07) (504.00, 641.62) (471.73, 641.62)       /F81 position	<|special_separator|>
(108.00, 622.11) (141.86, 622.11) (141.86, 630.66) (108.00, 630.66)       /F81 between	<|special_separator|>
(145.27, 622.11) (174.25, 622.11) (174.25, 630.66) (145.27, 630.66)       /F81 tokens.	<|special_separator|>
(180.10, 622.11) (194.07, 622.11) (194.07, 630.66) (180.10, 630.66)       /F81 For	<|special_separator|>
(197.47, 622.11) (209.89, 622.11) (209.89, 630.66) (197.47, 630.66)       /F81 the	<|special_separator|>
(213.31, 622.11) (234.20, 622.11) (234.20, 630.66) (213.31, 630.66)       /F81 MLP	<|special_separator|>
(237.61, 622.11) (264.42, 622.11) (264.42, 630.66) (237.61, 630.66)       /F81 layers,	<|special_separator|>
(268.07, 622.11) (298.55, 622.11) (298.55, 630.66) (268.07, 630.66)       /F81 Granite	<|special_separator|>
(301.95, 622.11) (314.65, 622.11) (314.65, 630.66) (301.95, 630.66)       /F81 3.0	<|special_separator|>
(318.07, 622.11) (343.46, 622.11) (343.46, 630.66) (318.07, 630.66)       /F81 Dense	<|special_separator|>
(346.88, 622.11) (376.23, 622.11) (376.23, 630.66) (346.88, 630.66)       /F81 models	<|special_separator|>
(379.65, 622.11) (393.19, 622.11) (393.19, 630.66) (379.65, 630.66)       /F81 use	<|special_separator|>
(396.61, 622.11) (433.30, 622.11) (433.30, 630.66) (396.61, 630.66)       /F81 SwiGLU	<|special_separator|>
(436.72, 622.11) (445.18, 622.11) (445.18, 630.66) (436.72, 630.66)       /F81 as	<|special_separator|>
(448.58, 622.11) (461.00, 622.11) (461.00, 630.66) (448.58, 630.66)       /F81 the	<|special_separator|>
(464.42, 622.11) (504.00, 622.11) (504.00, 630.66) (464.42, 630.66)       /F81 activation	<|special_separator|>
(108.00, 611.15) (143.02, 611.15) (143.02, 619.71) (108.00, 619.71)       /F81 function.	<|special_separator|>
(146.12, 611.15) (172.71, 611.15) (172.71, 619.71) (146.12, 619.71)       /F81 Before	<|special_separator|>
(175.21, 611.15) (193.12, 611.15) (193.12, 619.71) (175.21, 619.71)       /F81 each	<|special_separator|>
(195.61, 611.15) (215.70, 611.15) (215.70, 619.71) (195.61, 619.71)       /F81 MLP	<|special_separator|>
(218.19, 611.15) (232.31, 611.15) (232.31, 619.71) (218.19, 619.71)       /F81 and	<|special_separator|>
(234.81, 611.15) (269.01, 611.15) (269.01, 619.71) (234.81, 619.71)       /F81 attention	<|special_separator|>
(271.51, 611.15) (293.09, 611.15) (293.09, 619.71) (271.51, 619.71)       /F81 layer,	<|special_separator|>
(295.59, 611.15) (306.99, 611.15) (306.99, 619.71) (295.59, 619.71)       /F81 we	<|special_separator|>
(309.48, 611.15) (322.50, 611.15) (322.50, 619.71) (309.48, 619.71)       /F81 use	<|special_separator|>
(325.00, 611.15) (368.44, 611.15) (368.44, 619.71) (325.00, 619.71)       /F81 RMSNorm	<|special_separator|>
(370.93, 611.15) (378.54, 611.15) (378.54, 619.71) (370.93, 619.71)       /F81 to	<|special_separator|>
(381.04, 611.15) (420.12, 611.15) (420.12, 619.71) (381.04, 619.71)       /F81 normalize	<|special_separator|>
(422.61, 611.15) (434.56, 611.15) (434.56, 619.71) (422.61, 619.71)       /F81 the	<|special_separator|>
(437.05, 611.15) (463.09, 611.15) (463.09, 619.71) (437.05, 619.71)       /F81 layer's	<|special_separator|>
(465.60, 611.15) (488.13, 611.15) (488.13, 619.71) (465.60, 619.71)       /F81 input.	<|special_separator|>
(491.23, 611.15) (504.00, 611.15) (504.00, 619.71) (491.23, 619.71)       /F81 We	<|special_separator|>
(108.00, 600.20) (124.37, 600.20) (124.37, 608.75) (108.00, 608.75)       /F81 also	<|special_separator|>
(126.92, 600.20) (148.36, 600.20) (148.36, 608.75) (126.92, 608.75)       /F81 share	<|special_separator|>
(150.92, 600.20) (195.50, 600.20) (195.50, 608.75) (150.92, 608.75)       /F81 parameters	<|special_separator|>
(198.05, 600.20) (231.91, 600.20) (231.91, 608.75) (198.05, 608.75)       /F81 between	<|special_separator|>
(234.46, 600.20) (246.88, 600.20) (246.88, 608.75) (234.46, 608.75)       /F81 the	<|special_separator|>
(249.43, 600.20) (270.32, 600.20) (270.32, 608.75) (249.43, 608.75)       /F81 input	<|special_separator|>
(272.89, 600.20) (318.05, 600.20) (318.05, 608.75) (272.89, 608.75)       /F81 embedding	<|special_separator|>
(320.60, 600.20) (335.27, 600.20) (335.27, 608.75) (320.60, 608.75)       /F81 and	<|special_separator|>
(337.82, 600.20) (350.24, 600.20) (350.24, 608.75) (337.82, 608.75)       /F81 the	<|special_separator|>
(352.80, 600.20) (378.77, 600.20) (378.77, 608.75) (352.80, 608.75)       /F81 output	<|special_separator|>
(381.32, 600.20) (404.46, 600.20) (404.46, 608.75) (381.32, 608.75)       /F81 linear	<|special_separator|>
(407.01, 600.20) (449.06, 600.20) (449.06, 608.75) (407.01, 608.75)       /F81 transform.	<|special_separator|>
(452.35, 600.20) (470.41, 600.20) (470.41, 608.75) (452.35, 608.75)       /F81 This	<|special_separator|>
(472.96, 600.20) (504.00, 600.20) (504.00, 608.75) (472.96, 608.75)       /F81 reduces	<|special_separator|>
(108.00, 589.24) (120.14, 589.24) (120.14, 597.79) (108.00, 597.79)       /F81 the	<|special_separator|>
(122.64, 589.24) (138.09, 589.24) (138.09, 597.79) (122.64, 597.79)       /F81 size	<|special_separator|>
(140.58, 589.24) (148.85, 589.24) (148.85, 597.79) (140.58, 597.79)       /F81 of	<|special_separator|>
(151.36, 589.24) (163.49, 589.24) (163.49, 597.79) (151.36, 597.79)       /F81 the	<|special_separator|>
(165.99, 589.24) (193.30, 589.24) (193.30, 597.79) (165.99, 597.79)       /F81 model,	<|special_separator|>
(195.81, 589.24) (210.15, 589.24) (210.15, 597.79) (195.81, 597.79)       /F81 and	<|special_separator|>
(212.65, 589.24) (224.23, 589.24) (224.23, 597.79) (212.65, 597.79)       /F81 we	<|special_separator|>
(226.73, 589.24) (245.13, 589.24) (245.13, 597.79) (226.73, 597.79)       /F81 have	<|special_separator|>
(247.63, 589.24) (283.34, 589.24) (283.34, 597.79) (247.63, 597.79)       /F81 observed	<|special_separator|>
(285.83, 589.24) (300.73, 589.24) (300.73, 597.79) (285.83, 597.79)       /F81 that	<|special_separator|>
(303.24, 589.24) (315.38, 589.24) (315.38, 597.79) (303.24, 597.79)       /F81 the	<|special_separator|>
(317.88, 589.24) (338.30, 589.24) (338.30, 597.79) (317.88, 597.79)       /F81 tying	<|special_separator|>
(340.79, 589.24) (349.07, 589.24) (349.07, 597.79) (340.79, 597.79)       /F81 of	<|special_separator|>
(351.57, 589.24) (371.98, 589.24) (371.98, 597.79) (351.57, 597.79)       /F81 these	<|special_separator|>
(374.48, 589.24) (422.48, 589.24) (422.48, 597.79) (374.48, 597.79)       /F81 embeddings	<|special_separator|>
(424.98, 589.24) (443.39, 589.24) (443.39, 597.79) (424.98, 597.79)       /F81 have	<|special_separator|>
(445.88, 589.24) (465.46, 589.24) (465.46, 597.79) (445.88, 597.79)       /F81 zero,	<|special_separator|>
(467.96, 589.24) (476.24, 589.24) (476.24, 597.79) (467.96, 597.79)       /F81 or	<|special_separator|>
(478.74, 589.24) (497.09, 589.24) (497.09, 597.79) (478.74, 597.79)       /F81 even	<|special_separator|>
(499.59, 589.24) (504.00, 589.24) (504.00, 597.79) (499.59, 597.79)       /F81 a	<|special_separator|>
(108.00, 578.28) (139.15, 578.28) (139.15, 586.83) (108.00, 586.83)       /F81 positive	<|special_separator|>
(141.64, 578.28) (168.76, 578.28) (168.76, 586.83) (141.64, 586.83)       /F81 impact	<|special_separator|>
(171.25, 578.28) (181.22, 578.28) (181.22, 586.83) (171.25, 586.83)       /F81 on	<|special_separator|>
(183.71, 578.28) (208.61, 578.28) (208.61, 586.83) (183.71, 586.83)       /F81 model	<|special_separator|>
(211.10, 578.28) (263.94, 578.28) (263.94, 586.83) (211.10, 586.83)       /F81 performance.	<|special_separator|>
(108.25, 551.57) (121.70, 551.57) (121.70, 560.13) (108.25, 560.13)       /F81 2.2	<|special_separator|>
(132.16, 551.57) (225.80, 551.99) (225.80, 558.83) (132.16, 560.13)       /F81 MIXTURE-OF-EXPERT	<|special_separator|>
(228.78, 551.99) (264.00, 551.99) (264.00, 558.83) (228.78, 558.83)       /F81 MODELS	<|special_separator|>
(108.00, 530.37) (138.47, 530.37) (138.47, 538.92) (108.00, 538.92)       /F81 Granite	<|special_separator|>
(141.69, 530.37) (154.39, 530.37) (154.39, 538.92) (141.69, 538.92)       /F81 3.0	<|special_separator|>
(157.59, 530.37) (169.45, 530.37) (169.45, 538.92) (157.59, 538.92)       /F81 1B	<|special_separator|>
(172.66, 530.37) (187.33, 530.37) (187.33, 538.92) (172.66, 538.92)       /F81 and	<|special_separator|>
(190.54, 530.37) (202.40, 530.37) (202.40, 538.92) (190.54, 538.92)       /F81 3B	<|special_separator|>
(205.60, 530.37) (225.93, 530.37) (225.93, 538.92) (205.60, 538.92)       /F81 MoE	<|special_separator|>
(229.14, 530.37) (258.50, 530.37) (258.50, 538.92) (229.14, 538.92)       /F81 models	<|special_separator|>
(261.70, 530.37) (275.24, 530.37) (275.24, 538.92) (261.70, 538.92)       /F81 use	<|special_separator|>
(278.45, 530.37) (306.69, 530.37) (306.69, 538.92) (278.45, 538.92)       /F81 similar	<|special_separator|>
(309.90, 530.37) (357.86, 530.37) (357.86, 538.92) (309.90, 538.92)       /F81 architecture	<|special_separator|>
(361.06, 530.37) (369.52, 530.37) (369.52, 538.92) (361.06, 538.92)       /F81 as	<|special_separator|>
(372.74, 530.37) (403.21, 530.37) (403.21, 538.92) (372.74, 538.92)       /F81 Granite	<|special_separator|>
(406.42, 530.37) (431.82, 530.37) (431.82, 538.92) (406.42, 538.92)       /F81 Dense	<|special_separator|>
(435.02, 530.37) (466.92, 530.37) (466.92, 538.92) (435.02, 538.92)       /F81 models,	<|special_separator|>
(470.31, 530.37) (488.38, 530.37) (488.38, 538.92) (470.31, 538.92)       /F81 with	<|special_separator|>
(491.58, 530.37) (504.00, 530.37) (504.00, 538.92) (491.58, 538.92)       /F81 the	<|special_separator|>
(108.00, 519.41) (128.56, 519.41) (128.56, 527.97) (108.00, 527.97)       /F81 MLP	<|special_separator|>
(131.06, 519.41) (154.94, 519.41) (154.94, 527.97) (131.06, 527.97)       /F81 layers	<|special_separator|>
(157.43, 519.41) (200.78, 519.41) (200.78, 527.97) (157.43, 527.97)       /F81 substituted	<|special_separator|>
(203.27, 519.41) (221.06, 519.41) (221.06, 527.97) (203.27, 527.97)       /F81 with	<|special_separator|>
(223.55, 519.41) (243.55, 519.41) (243.55, 527.97) (223.55, 527.97)       /F81 MoE	<|special_separator|>
(246.04, 519.41) (272.43, 519.41) (272.43, 527.97) (246.04, 527.97)       /F81 layers.	<|special_separator|>
(275.52, 519.41) (282.74, 519.41) (282.74, 527.97) (275.52, 527.97)       /F81 A	<|special_separator|>
(285.23, 519.41) (317.46, 519.41) (317.46, 527.97) (285.23, 527.97)       /F81 Mixture	<|special_separator|>
(319.95, 519.41) (328.28, 519.41) (328.28, 527.97) (319.95, 527.97)       /F81 of	<|special_separator|>
(330.77, 519.41) (361.33, 519.41) (361.33, 527.97) (330.77, 527.97)       /F81 Experts	<|special_separator|>
(363.82, 519.41) (390.49, 519.41) (390.49, 527.97) (363.82, 527.97)       /F81 (MoE)	<|special_separator|>
(392.98, 519.41) (412.98, 519.41) (412.98, 527.97) (392.98, 527.97)       /F81 layer	<|special_separator|>
(415.46, 519.41) (456.02, 519.41) (456.02, 527.97) (415.46, 527.97)       /F81 comprises	<|special_separator|>
(458.52, 519.58) (466.52, 519.58) (466.52, 528.28) (458.52, 528.28)       /F31 N	<|special_separator|>
(470.10, 519.41) (504.00, 519.41) (504.00, 527.97) (470.10, 527.97)       /F81 modules	<|special_separator|>
(108.00, 508.62) (112.88, 508.62) (112.88, 517.33) (108.00, 517.33)       /F31 f	<|special_separator|>
(112.88, 507.69) (116.85, 507.69) (116.85, 513.79) (112.88, 513.79)       /F27 1	<|special_separator|>
(117.35, 508.62) (120.11, 508.62) (120.11, 517.33) (117.35, 517.33)       /F31 ,	<|special_separator|>
(121.78, 508.62) (124.55, 508.62) (124.55, 517.33) (121.78, 517.33)       /F31 .	<|special_separator|>
(126.20, 508.62) (128.97, 508.62) (128.97, 517.33) (126.20, 517.33)       /F31 .	<|special_separator|>
(130.63, 508.62) (133.40, 508.62) (133.40, 517.33) (130.63, 517.33)       /F31 .	<|special_separator|>
(135.06, 508.62) (137.83, 508.62) (137.83, 517.33) (135.06, 517.33)       /F31 ,	<|special_separator|>
(139.48, 508.62) (144.36, 508.62) (144.36, 517.33) (139.48, 517.33)       /F31 f	<|special_separator|>
(144.36, 507.69) (150.68, 507.69) (150.68, 513.79) (144.36, 513.79)       /F30 N	<|special_separator|>
(154.42, 508.45) (168.54, 508.45) (168.54, 517.01) (154.42, 517.01)       /F81 and	<|special_separator|>
(171.04, 508.45) (175.38, 508.45) (175.38, 517.01) (171.04, 517.01)       /F81 a	<|special_separator|>
(177.88, 508.45) (201.24, 508.45) (201.24, 517.01) (177.88, 517.01)       /F81 router	<|special_separator|>
(203.74, 508.62) (208.49, 508.62) (208.49, 517.33) (203.74, 517.33)       /F31 g	<|special_separator|>
(208.85, 508.62) (212.72, 508.62) (212.72, 517.33) (208.85, 517.33)       /F28 (	<|special_separator|>
(212.72, 508.62) (217.36, 508.62) (217.36, 517.33) (212.72, 517.33)       /F31 e	<|special_separator|>
(220.14, 508.76) (222.91, 508.76) (222.91, 517.33) (220.14, 517.33)       /F34 |	<|special_separator|>
(225.69, 508.61) (231.74, 508.61) (231.74, 517.36) (225.69, 517.36)       /F55 x	<|special_separator|>
(231.74, 508.62) (235.61, 508.62) (235.61, 517.33) (231.74, 517.33)       /F28 )	<|special_separator|>
(235.61, 508.45) (238.06, 508.45) (238.06, 517.01) (235.61, 517.01)       /F81 .	<|special_separator|>
(241.17, 508.45) (264.68, 508.45) (264.68, 517.01) (241.17, 517.01)       /F81 Given	<|special_separator|>
(267.17, 508.45) (276.41, 508.45) (276.41, 517.01) (267.17, 517.01)       /F81 an	<|special_separator|>
(278.90, 508.45) (299.02, 508.45) (299.02, 517.01) (278.90, 517.01)       /F81 input	<|special_separator|>
(301.52, 508.61) (307.56, 508.61) (307.56, 517.36) (301.52, 517.36)       /F55 x	<|special_separator|>
(310.06, 508.45) (317.67, 508.45) (317.67, 517.01) (310.06, 517.01)       /F81 to	<|special_separator|>
(320.17, 508.45) (332.12, 508.45) (332.12, 517.01) (320.17, 517.01)       /F81 the	<|special_separator|>
(334.62, 508.45) (354.18, 508.45) (354.18, 517.01) (334.62, 517.01)       /F81 MoE	<|special_separator|>
(356.69, 508.45) (378.29, 508.45) (378.29, 517.01) (356.69, 517.01)       /F81 layer,	<|special_separator|>
(380.78, 508.45) (392.74, 508.45) (392.74, 517.01) (380.78, 517.01)       /F81 the	<|special_separator|>
(395.24, 508.45) (418.61, 508.45) (418.61, 517.01) (395.24, 517.01)       /F81 router	<|special_separator|>
(421.10, 508.45) (452.07, 508.45) (452.07, 517.01) (421.10, 517.01)       /F81 predicts	<|special_separator|>
(454.57, 508.45) (458.91, 508.45) (458.91, 517.01) (454.57, 517.01)       /F81 a	<|special_separator|>
(461.41, 508.45) (504.35, 508.45) (504.35, 517.01) (461.41, 517.01)       /F81 probability	<|special_separator|>
(108.00, 497.50) (153.15, 497.50) (153.15, 506.05) (108.00, 506.05)       /F81 distribution	<|special_separator|>
(155.64, 497.50) (172.82, 497.50) (172.82, 506.05) (155.64, 506.05)       /F81 over	<|special_separator|>
(175.32, 497.50) (187.33, 497.50) (187.33, 506.05) (175.32, 506.05)       /F81 the	<|special_separator|>
(189.83, 497.66) (197.83, 497.66) (197.83, 506.37) (189.83, 506.37)       /F31 N	<|special_separator|>
(201.41, 497.50) (237.19, 497.50) (237.19, 506.05) (201.41, 506.05)       /F81 modules.	<|special_separator|>
(240.29, 497.50) (250.67, 497.50) (250.67, 506.05) (240.29, 506.05)       /F81 Of	<|special_separator|>
(253.16, 497.50) (275.83, 497.50) (275.83, 506.05) (253.16, 506.05)       /F81 these,	<|special_separator|>
(278.33, 497.50) (289.79, 497.50) (289.79, 506.05) (278.33, 506.05)       /F81 we	<|special_separator|>
(292.29, 497.50) (314.68, 497.50) (314.68, 506.05) (292.29, 506.05)       /F81 select	<|special_separator|>
(317.17, 497.50) (329.18, 497.50) (329.18, 506.05) (317.17, 506.05)       /F81 the	<|special_separator|>
(331.68, 497.50) (344.25, 497.50) (344.25, 506.05) (331.68, 506.05)       /F81 top	<|special_separator|>
(346.74, 497.66) (351.93, 497.66) (351.93, 506.37) (346.74, 506.37)       /F31 k	<|special_separator|>
(354.74, 497.50) (385.45, 497.50) (385.45, 506.05) (354.74, 506.05)       /F81 experts.	<|special_separator|>
(388.55, 497.50) (412.03, 497.50) (412.03, 506.05) (388.55, 506.05)       /F81 When	<|special_separator|>
(414.52, 497.66) (419.71, 497.66) (419.71, 506.37) (414.52, 506.37)       /F31 k	<|special_separator|>
(422.80, 497.66) (430.55, 497.66) (430.55, 506.37) (422.80, 506.37)       /F31 <	<|special_separator|>
(433.33, 497.66) (441.33, 497.66) (441.33, 506.37) (433.33, 506.37)       /F31 N	<|special_separator|>
(442.42, 497.50) (444.87, 497.50) (444.87, 506.05) (442.42, 506.05)       /F81 ,	<|special_separator|>
(447.37, 497.50) (458.84, 497.50) (458.84, 506.05) (447.37, 506.05)       /F81 we	<|special_separator|>
(461.33, 497.50) (473.34, 497.50) (473.34, 506.05) (461.33, 506.05)       /F81 are	<|special_separator|>
(475.83, 497.50) (497.14, 497.50) (497.14, 506.05) (475.83, 506.05)       /F81 using	<|special_separator|>
(499.63, 497.50) (504.00, 497.50) (504.00, 506.05) (499.63, 506.05)       /F81 a	<|special_separator|>
(108.00, 486.54) (134.61, 486.54) (134.61, 495.09) (108.00, 495.09)       /F81 Sparse	<|special_separator|>
(137.10, 486.54) (169.26, 486.54) (169.26, 495.09) (137.10, 495.09)       /F81 Mixture	<|special_separator|>
(171.76, 486.54) (180.07, 486.54) (180.07, 495.09) (171.76, 495.09)       /F81 of	<|special_separator|>
(182.56, 486.54) (213.06, 486.54) (213.06, 495.09) (182.56, 495.09)       /F81 Experts	<|special_separator|>
(215.55, 486.54) (247.17, 486.54) (247.17, 495.09) (215.55, 495.09)       /F81 (SMoE;	<|special_separator|>
(249.65, 486.54) (281.25, 486.54) (281.25, 495.09) (249.65, 495.09)       /F81 Shazeer	<|special_separator|>
(283.74, 486.54) (290.95, 486.54) (290.95, 495.09) (283.74, 495.09)       /F81 et	<|special_separator|>
(293.44, 486.54) (303.14, 486.54) (303.14, 495.09) (293.44, 495.09)       /F81 al.	<|special_separator|>
(305.63, 486.54) (331.42, 486.54) (331.42, 495.09) (305.63, 495.09)       /F81 2017).	<|special_separator|>
(334.50, 486.54) (348.22, 486.54) (348.22, 495.09) (334.50, 495.09)       /F81 For	<|special_separator|>
(350.71, 486.54) (365.14, 486.54) (365.14, 495.09) (350.71, 495.09)       /F81 this	<|special_separator|>
(367.63, 486.54) (390.36, 486.54) (390.36, 495.09) (367.63, 495.09)       /F81 series	<|special_separator|>
(392.84, 486.54) (401.16, 486.54) (401.16, 495.09) (392.84, 495.09)       /F81 of	<|special_separator|>
(403.65, 486.54) (433.59, 486.54) (433.59, 495.09) (403.65, 495.09)       /F81 Granite	<|special_separator|>
(436.08, 486.54) (456.04, 486.54) (456.04, 495.09) (436.08, 495.09)       /F81 MoE	<|special_separator|>
(458.54, 486.54) (489.87, 486.54) (489.87, 495.09) (458.54, 495.09)       /F81 models,	<|special_separator|>
(492.36, 486.54) (504.00, 486.54) (504.00, 495.09) (492.36, 495.09)       /F81 we	<|special_separator|>
(108.00, 475.58) (121.28, 475.58) (121.28, 484.13) (108.00, 484.13)       /F81 use	<|special_separator|>
(123.77, 475.58) (128.19, 475.58) (128.19, 484.13) (123.77, 484.13)       /F81 a	<|special_separator|>
(130.69, 475.58) (153.37, 475.58) (153.37, 484.13) (130.69, 484.13)       /F81 linear	<|special_separator|>
(155.86, 475.58) (175.78, 475.58) (175.78, 484.13) (155.86, 484.13)       /F81 layer	<|special_separator|>
(178.27, 475.58) (186.02, 475.58) (186.02, 484.13) (178.27, 484.13)       /F81 to	<|special_separator|>
(188.51, 475.58) (213.41, 475.58) (213.41, 484.13) (188.51, 484.13)       /F81 model	<|special_separator|>
(215.91, 475.58) (228.08, 475.58) (228.08, 484.13) (215.91, 484.13)       /F81 the	<|special_separator|>
(230.57, 475.58) (257.13, 475.58) (257.13, 484.13) (230.57, 484.13)       /F81 router:	<|special_separator|>
(231.00, 456.29) (235.52, 456.29) (235.52, 465.04) (231.00, 465.04)       /F55 s	<|special_separator|>
(238.28, 456.30) (246.03, 456.30) (246.03, 465.01) (238.28, 465.01)       /F28 =	<|special_separator|>
(248.80, 456.29) (260.64, 456.29) (260.64, 465.04) (248.80, 465.04)       /F55 W	<|special_separator|>
(260.64, 455.38) (281.90, 455.38) (281.90, 461.47) (260.64, 461.47)       /F27 router	<|special_separator|>
(282.40, 456.29) (288.44, 456.29) (288.44, 465.04) (282.40, 465.04)       /F55 x	<|special_separator|>
(288.44, 456.30) (291.21, 456.30) (291.21, 465.01) (288.44, 465.01)       /F31 ,	<|special_separator|>
(493.05, 456.14) (504.67, 456.14) (504.67, 464.69) (493.05, 464.69)       /F81 (1)	<|special_separator|>
(203.67, 434.58) (208.42, 434.58) (208.42, 443.29) (203.67, 443.29)       /F31 g	<|special_separator|>
(208.78, 434.58) (212.65, 434.58) (212.65, 443.29) (208.78, 443.29)       /F28 (	<|special_separator|>
(212.65, 434.58) (217.29, 434.58) (217.29, 443.29) (212.65, 443.29)       /F31 e	<|special_separator|>
(220.06, 434.72) (222.83, 434.72) (222.83, 443.29) (220.06, 443.29)       /F34 |	<|special_separator|>
(225.59, 434.57) (231.64, 434.57) (231.64, 443.32) (225.59, 443.32)       /F55 x	<|special_separator|>
(231.64, 434.58) (235.52, 434.58) (235.52, 443.29) (231.64, 443.29)       /F28 )	<|special_separator|>
(238.29, 434.58) (246.03, 434.58) (246.03, 443.29) (238.29, 443.29)       /F28 =	<|special_separator|>
(248.80, 444.55) (256.27, 444.55) (256.27, 450.93) (248.80, 450.93)       /F21 {	<|special_separator|>
(256.27, 440.36) (290.64, 440.36) (290.64, 449.07) (256.27, 449.07)       /F28 softmax	<|special_separator|>
(292.31, 440.36) (296.18, 440.36) (296.18, 449.07) (292.31, 449.07)       /F28 (	<|special_separator|>
(296.18, 440.20) (311.43, 440.20) (311.43, 448.75) (296.18, 448.75)       /F81 Top	<|special_separator|>
(311.43, 440.36) (316.62, 440.36) (316.62, 449.07) (311.43, 449.07)       /F31 k	<|special_separator|>
(318.59, 440.36) (322.46, 440.36) (322.46, 449.07) (318.59, 449.07)       /F28 (	<|special_separator|>
(322.46, 440.35) (326.98, 440.35) (326.98, 449.10) (322.46, 449.10)       /F55 s	<|special_separator|>
(326.98, 440.36) (334.73, 440.36) (334.73, 449.07) (326.98, 449.07)       /F28 ))	<|special_separator|>
(334.73, 437.94) (337.55, 437.94) (337.55, 444.04) (334.73, 444.04)       /F30 i	<|special_separator|>
(339.71, 440.36) (342.48, 440.36) (342.48, 449.07) (339.71, 449.07)       /F31 ,	<|special_separator|>
(352.44, 440.35) (356.96, 440.35) (356.96, 449.10) (352.44, 449.10)       /F55 s	<|special_separator|>
(356.96, 439.44) (359.78, 439.44) (359.78, 445.53) (356.96, 445.53)       /F30 i	<|special_separator|>
(363.04, 440.50) (369.69, 440.50) (369.69, 449.07) (363.04, 449.07)       /F34 ∈	<|special_separator|>
(372.45, 440.20) (387.71, 440.20) (387.71, 448.75) (372.45, 448.75)       /F81 Top	<|special_separator|>
(387.71, 440.36) (392.89, 440.36) (392.89, 449.07) (387.71, 449.07)       /F31 k	<|special_separator|>
(394.87, 440.36) (398.74, 440.36) (398.74, 449.07) (394.87, 449.07)       /F28 (	<|special_separator|>
(398.74, 440.35) (403.26, 440.35) (403.26, 449.10) (398.74, 449.10)       /F55 s	<|special_separator|>
(403.26, 440.36) (407.13, 440.36) (407.13, 449.07) (403.26, 449.07)       /F28 )	<|special_separator|>
(295.50, 429.40) (300.48, 429.40) (300.48, 438.11) (295.50, 438.11)       /F28 0	<|special_separator|>
(300.48, 429.40) (303.25, 429.40) (303.25, 438.11) (300.48, 438.11)       /F31 ,	<|special_separator|>
(352.44, 429.39) (356.96, 429.39) (356.96, 438.14) (352.44, 438.14)       /F55 s	<|special_separator|>
(356.96, 428.48) (359.78, 428.48) (359.78, 434.57) (356.96, 434.57)       /F30 i	<|special_separator|>
(364.15, 429.40) (369.13, 429.40) (369.13, 438.11) (364.15, 438.11)       /F31 /	<|special_separator|>
(363.04, 429.54) (369.69, 429.54) (369.69, 438.11) (363.04, 438.11)       /F34 ∈	<|special_separator|>
(372.45, 429.24) (387.71, 429.24) (387.71, 437.79) (372.45, 437.79)       /F81 Top	<|special_separator|>
(387.71, 429.40) (392.89, 429.40) (392.89, 438.11) (387.71, 438.11)       /F31 k	<|special_separator|>
(394.87, 429.40) (398.74, 429.40) (398.74, 438.11) (394.87, 438.11)       /F28 (	<|special_separator|>
(398.74, 429.39) (403.26, 429.39) (403.26, 438.14) (398.74, 438.14)       /F55 s	<|special_separator|>
(403.26, 429.40) (407.13, 429.40) (407.13, 438.11) (403.26, 438.11)       /F28 )	<|special_separator|>
(493.05, 434.42) (504.67, 434.42) (504.67, 442.97) (493.05, 442.97)       /F81 (2)	<|special_separator|>
(107.64, 408.00) (132.47, 408.00) (132.47, 416.55) (107.64, 416.55)       /F81 where	<|special_separator|>
(135.29, 408.16) (147.14, 408.16) (147.14, 416.90) (135.29, 416.90)       /F55 W	<|special_separator|>
(147.14, 407.24) (168.39, 407.24) (168.39, 413.34) (147.14, 413.34)       /F27 router	<|special_separator|>
(171.71, 408.00) (178.49, 408.00) (178.49, 416.55) (171.71, 416.55)       /F81 is	<|special_separator|>
(181.32, 408.00) (193.74, 408.00) (193.74, 416.55) (181.32, 416.55)       /F81 the	<|special_separator|>
(196.56, 408.00) (221.80, 408.00) (221.80, 416.55) (196.56, 416.55)       /F81 expert	<|special_separator|>
(224.63, 408.00) (269.79, 408.00) (269.79, 416.55) (224.63, 416.55)       /F81 embedding	<|special_separator|>
(272.61, 408.00) (299.14, 408.00) (299.14, 416.55) (272.61, 416.55)       /F81 matrix	<|special_separator|>
(301.98, 408.00) (310.44, 408.00) (310.44, 416.55) (301.98, 416.55)       /F81 of	<|special_separator|>
(313.27, 408.00) (336.41, 408.00) (336.41, 416.55) (313.27, 416.55)       /F81 shape	<|special_separator|>
(339.23, 408.17) (343.10, 408.17) (343.10, 416.87) (339.23, 416.87)       /F28 (	<|special_separator|>
(343.10, 408.17) (364.31, 408.17) (364.31, 416.87) (343.10, 416.87)       /F31 N,D	<|special_separator|>
(364.32, 407.13) (376.33, 407.13) (376.33, 413.11) (364.32, 413.11)       /F81 emb	<|special_separator|>
(376.82, 408.17) (380.70, 408.17) (380.70, 416.87) (376.82, 416.87)       /F28 )	<|special_separator|>
(380.70, 408.00) (383.24, 408.00) (383.24, 416.55) (380.70, 416.55)       /F81 ,	<|special_separator|>
(386.14, 408.00) (400.82, 408.00) (400.82, 416.55) (386.14, 416.55)       /F81 and	<|special_separator|>
(403.65, 408.00) (418.90, 408.00) (418.90, 416.55) (403.65, 416.55)       /F81 Top	<|special_separator|>
(418.90, 408.17) (424.09, 408.17) (424.09, 416.87) (418.90, 416.87)       /F31 k	<|special_separator|>
(427.23, 408.00) (434.00, 408.00) (434.00, 416.55) (427.23, 416.55)       /F81 is	<|special_separator|>
(436.83, 408.00) (449.25, 408.00) (449.25, 416.55) (436.83, 416.55)       /F81 the	<|special_separator|>
(452.07, 408.00) (485.93, 408.00) (485.93, 416.55) (452.07, 416.55)       /F81 operator	<|special_separator|>
(488.76, 408.00) (504.00, 408.00) (504.00, 416.55) (488.76, 416.55)       /F81 that	<|special_separator|>
(108.00, 397.04) (134.56, 397.04) (134.56, 405.60) (108.00, 405.60)       /F81 selects	<|special_separator|>
(137.05, 397.04) (149.22, 397.04) (149.22, 405.60) (137.05, 405.60)       /F81 the	<|special_separator|>
(151.72, 397.04) (164.45, 397.04) (164.45, 405.60) (151.72, 405.60)       /F81 top	<|special_separator|>
(166.94, 397.21) (172.12, 397.21) (172.12, 405.91) (166.94, 405.91)       /F31 k	<|special_separator|>
(174.93, 397.04) (197.08, 397.04) (197.08, 405.60) (174.93, 405.60)       /F81 logits	<|special_separator|>
(199.57, 397.04) (218.93, 397.04) (218.93, 405.60) (199.57, 405.60)       /F81 from	<|special_separator|>
(221.42, 397.20) (225.94, 397.20) (225.94, 405.94) (221.42, 405.94)       /F55 s	<|special_separator|>
(225.94, 397.04) (228.43, 397.04) (228.43, 405.60) (225.94, 405.60)       /F81 .	<|special_separator|>
(231.52, 397.04) (247.01, 397.04) (247.01, 405.60) (231.52, 405.60)       /F81 The	<|special_separator|>
(249.50, 397.04) (267.22, 397.04) (267.22, 405.60) (249.50, 405.60)       /F81 final	<|special_separator|>
(269.71, 397.04) (295.17, 397.04) (295.17, 405.60) (269.71, 405.60)       /F81 output	<|special_separator|>
(297.66, 397.04) (305.96, 397.04) (305.96, 405.60) (297.66, 405.60)       /F81 of	<|special_separator|>
(308.45, 397.04) (320.63, 397.04) (320.63, 405.60) (308.45, 405.60)       /F81 the	<|special_separator|>
(323.12, 397.04) (348.58, 397.04) (348.58, 405.60) (323.12, 405.60)       /F81 SMoE	<|special_separator|>
(351.07, 397.04) (357.72, 397.04) (357.72, 405.60) (351.07, 405.60)       /F81 is	<|special_separator|>
(360.21, 397.04) (377.37, 397.04) (377.37, 405.60) (360.21, 405.60)       /F81 then	<|special_separator|>
(379.86, 397.04) (401.59, 397.04) (401.59, 405.60) (379.86, 405.60)       /F81 given	<|special_separator|>
(404.08, 397.04) (414.05, 397.04) (414.05, 405.60) (404.08, 405.60)       /F81 by	<|special_separator|>
(257.72, 367.36) (262.60, 367.36) (262.60, 376.06) (257.72, 376.06)       /F31 y	<|special_separator|>
(265.73, 367.36) (273.47, 367.36) (273.47, 376.06) (265.73, 376.06)       /F28 =	<|special_separator|>
(279.91, 380.38) (286.22, 380.38) (286.22, 386.48) (279.91, 386.48)       /F30 N	<|special_separator|>
(276.24, 372.75) (290.63, 372.75) (290.63, 379.12) (276.24, 379.12)       /F21 ∑	<|special_separator|>
(276.50, 356.29) (280.28, 356.29) (280.28, 362.39) (276.50, 362.39)       /F30 e	<|special_separator|>
(280.28, 356.29) (290.37, 356.29) (290.37, 362.39) (280.28, 362.39)       /F27 =1	<|special_separator|>
(292.29, 367.36) (297.04, 367.36) (297.04, 376.06) (292.29, 376.06)       /F31 g	<|special_separator|>
(297.40, 367.36) (301.27, 367.36) (301.27, 376.06) (297.40, 376.06)       /F28 (	<|special_separator|>
(301.28, 367.36) (305.92, 367.36) (305.92, 376.06) (301.28, 376.06)       /F31 e	<|special_separator|>
(308.68, 367.50) (311.45, 367.50) (311.45, 376.06) (308.68, 376.06)       /F34 |	<|special_separator|>
(314.22, 367.35) (320.26, 367.35) (320.26, 376.09) (314.22, 376.09)       /F55 x	<|special_separator|>
(320.26, 367.36) (324.14, 367.36) (324.14, 376.06) (320.26, 376.06)       /F28 )	<|special_separator|>
(326.35, 367.50) (329.12, 367.50) (329.12, 376.06) (326.35, 376.06)       /F34 ·	<|special_separator|>
(331.33, 367.36) (336.21, 367.36) (336.21, 376.06) (331.33, 376.06)       /F31 f	<|special_separator|>
(336.21, 366.43) (339.99, 366.43) (339.99, 372.53) (336.21, 372.53)       /F30 e	<|special_separator|>
(340.49, 367.36) (344.36, 367.36) (344.36, 376.06) (340.49, 376.06)       /F28 (	<|special_separator|>
(344.36, 367.35) (350.41, 367.35) (350.41, 376.09) (344.36, 376.09)       /F55 x	<|special_separator|>
(350.41, 367.36) (354.28, 367.36) (354.28, 376.06) (350.41, 376.06)       /F28 )	<|special_separator|>
(493.05, 367.19) (504.67, 367.19) (504.67, 375.75) (493.05, 375.75)       /F81 (3)	<|special_separator|>
(107.53, 337.61) (131.80, 337.61) (131.80, 346.16) (107.53, 346.16)       /F81 When	<|special_separator|>
(134.84, 337.78) (139.59, 337.78) (139.59, 346.48) (134.84, 346.48)       /F31 g	<|special_separator|>
(139.95, 337.78) (143.82, 337.78) (143.82, 346.48) (139.95, 346.48)       /F28 (	<|special_separator|>
(143.82, 337.78) (148.46, 337.78) (148.46, 346.48) (143.82, 346.48)       /F31 e	<|special_separator|>
(152.25, 337.92) (155.02, 337.92) (155.02, 346.48) (152.25, 346.48)       /F34 |	<|special_separator|>
(158.81, 337.77) (164.86, 337.77) (164.86, 346.51) (158.81, 346.51)       /F55 x	<|special_separator|>
(164.86, 337.78) (168.73, 337.78) (168.73, 346.48) (164.86, 346.48)       /F28 )	<|special_separator|>
(172.52, 337.78) (180.27, 337.78) (180.27, 346.48) (172.52, 346.48)       /F28 =	<|special_separator|>
(184.06, 337.78) (189.04, 337.78) (189.04, 346.48) (184.06, 346.48)       /F28 0	<|special_separator|>
(189.04, 337.61) (191.58, 337.61) (191.58, 346.16) (189.04, 346.16)       /F81 ,	<|special_separator|>
(194.76, 337.78) (199.64, 337.78) (199.64, 346.48) (194.76, 346.48)       /F31 f	<|special_separator|>
(199.64, 336.85) (203.42, 336.85) (203.42, 342.95) (199.64, 342.95)       /F30 e	<|special_separator|>
(203.92, 337.78) (207.79, 337.78) (207.79, 346.48) (203.92, 346.48)       /F28 (	<|special_separator|>
(207.79, 337.77) (213.84, 337.77) (213.84, 346.51) (207.79, 346.51)       /F55 x	<|special_separator|>
(213.84, 337.78) (217.72, 337.78) (217.72, 346.48) (213.84, 346.48)       /F28 )	<|special_separator|>
(220.76, 337.61) (236.57, 337.61) (236.57, 346.16) (220.76, 346.16)       /F81 will	<|special_separator|>
(239.61, 337.61) (252.60, 337.61) (252.60, 346.16) (239.61, 346.16)       /F81 not	<|special_separator|>
(255.65, 337.61) (274.83, 337.61) (274.83, 346.16) (255.65, 346.16)       /F81 need	<|special_separator|>
(277.87, 337.61) (285.77, 337.61) (285.77, 346.16) (277.87, 346.16)       /F81 to	<|special_separator|>
(288.82, 337.61) (298.42, 337.61) (298.42, 346.16) (288.82, 346.16)       /F81 be	<|special_separator|>
(301.45, 337.61) (342.44, 337.61) (342.44, 346.16) (301.45, 346.16)       /F81 evaluated,	<|special_separator|>
(345.62, 337.61) (362.56, 337.61) (362.56, 346.16) (345.62, 346.16)       /F81 thus	<|special_separator|>
(365.61, 337.61) (401.16, 337.61) (401.16, 346.16) (365.61, 346.16)       /F81 reducing	<|special_separator|>
(404.20, 337.61) (455.01, 337.61) (455.01, 346.16) (404.20, 346.16)       /F81 computation	<|special_separator|>
(458.05, 337.61) (474.42, 337.61) (474.42, 346.16) (458.05, 346.16)       /F81 cost	<|special_separator|>
(477.47, 337.61) (504.00, 337.61) (504.00, 346.16) (477.47, 346.16)       /F81 during	<|special_separator|>
(108.00, 326.65) (138.99, 326.65) (138.99, 335.20) (108.00, 335.20)       /F81 training	<|special_separator|>
(141.48, 326.65) (155.87, 326.65) (155.87, 335.20) (141.48, 335.20)       /F81 and	<|special_separator|>
(158.36, 326.65) (197.91, 326.65) (197.91, 335.20) (158.36, 335.20)       /F81 inference.	<|special_separator|>
(201.00, 326.65) (216.49, 326.65) (216.49, 335.20) (201.00, 335.20)       /F81 The	<|special_separator|>
(218.98, 326.65) (233.12, 326.65) (233.12, 335.20) (218.98, 335.20)       /F81 key	<|special_separator|>
(235.61, 326.65) (265.50, 326.65) (265.50, 335.20) (235.61, 335.20)       /F81 designs	<|special_separator|>
(267.99, 326.65) (276.29, 326.65) (276.29, 335.20) (267.99, 335.20)       /F81 of	<|special_separator|>
(278.78, 326.65) (290.95, 326.65) (290.95, 335.20) (278.78, 335.20)       /F81 the	<|special_separator|>
(293.44, 326.65) (323.32, 326.65) (323.32, 335.20) (293.44, 335.20)       /F81 Granite	<|special_separator|>
(325.81, 326.65) (345.74, 326.65) (345.74, 335.20) (325.81, 335.20)       /F81 MoE	<|special_separator|>
(348.23, 326.65) (377.01, 326.65) (377.01, 335.20) (348.23, 335.20)       /F81 models	<|special_separator|>
(379.50, 326.65) (391.67, 326.65) (391.67, 335.20) (379.50, 335.20)       /F81 are	<|special_separator|>
(394.16, 326.65) (442.85, 326.65) (442.85, 335.20) (394.16, 335.20)       /F81 summarized	<|special_separator|>
(445.34, 326.65) (472.21, 326.65) (472.21, 335.20) (445.34, 335.20)       /F81 below:	<|special_separator|>
(108.00, 301.28) (145.64, 301.28) (145.64, 310.24) (108.00, 310.24)       /F90 Dropless	<|special_separator|>
(148.79, 301.28) (175.44, 301.28) (175.44, 310.24) (148.79, 310.24)       /F90 Token	<|special_separator|>
(178.60, 301.28) (216.00, 301.28) (216.00, 310.24) (178.60, 310.24)       /F90 Routing.	<|special_separator|>
(225.96, 301.30) (248.54, 301.30) (248.54, 309.85) (225.96, 309.85)       /F81 Since	<|special_separator|>
(251.69, 301.30) (270.31, 301.30) (270.31, 309.85) (251.69, 309.85)       /F81 each	<|special_separator|>
(273.46, 301.30) (295.94, 301.30) (295.94, 309.85) (273.46, 309.85)       /F81 token	<|special_separator|>
(299.09, 301.30) (326.18, 301.30) (326.18, 309.85) (299.09, 309.85)       /F81 selects	<|special_separator|>
(329.34, 301.30) (358.53, 301.30) (358.53, 309.85) (329.34, 309.85)       /F81 experts	<|special_separator|>
(361.68, 301.30) (421.15, 301.30) (421.15, 309.85) (361.68, 309.85)       /F81 independently,	<|special_separator|>
(424.47, 301.30) (445.92, 301.30) (445.92, 309.85) (424.47, 309.85)       /F81 some	<|special_separator|>
(449.07, 301.30) (478.27, 301.30) (478.27, 309.85) (449.07, 309.85)       /F81 experts	<|special_separator|>
(481.42, 301.30) (504.00, 301.30) (504.00, 309.85) (481.42, 309.85)       /F81 could	<|special_separator|>
(108.00, 290.34) (136.94, 290.34) (136.94, 298.89) (108.00, 298.89)       /F81 receive	<|special_separator|>
(139.67, 290.34) (160.56, 290.34) (160.56, 298.89) (139.67, 298.89)       /F81 more	<|special_separator|>
(163.29, 290.34) (189.72, 290.34) (189.72, 298.89) (163.29, 298.89)       /F81 tokens	<|special_separator|>
(192.47, 290.34) (209.96, 290.34) (209.96, 298.89) (192.47, 298.89)       /F81 than	<|special_separator|>
(212.70, 290.34) (240.07, 290.34) (240.07, 298.89) (212.70, 298.89)       /F81 others.	<|special_separator|>
(243.89, 290.34) (252.36, 290.34) (252.36, 298.89) (243.89, 298.89)       /F81 In	<|special_separator|>
(255.10, 290.34) (289.85, 290.34) (289.85, 298.89) (255.10, 298.89)       /F81 previous	<|special_separator|>
(292.58, 290.34) (312.90, 290.34) (312.90, 298.89) (292.58, 298.89)       /F81 MoE	<|special_separator|>
(315.64, 290.34) (347.54, 290.34) (347.54, 298.89) (315.64, 298.89)       /F81 models,	<|special_separator|>
(350.34, 290.34) (365.48, 290.34) (365.48, 298.89) (350.34, 298.89)       /F81 like	<|special_separator|>
(368.21, 290.34) (396.44, 290.34) (396.44, 298.89) (368.21, 298.89)       /F81 Switch	<|special_separator|>
(399.18, 290.34) (449.62, 290.34) (449.62, 298.89) (399.18, 298.89)       /F81 Transformer	<|special_separator|>
(452.36, 290.34) (480.01, 290.34) (480.01, 298.89) (452.36, 298.89)       /F81 (Fedus	<|special_separator|>
(482.76, 290.34) (490.10, 290.34) (490.10, 298.89) (482.76, 298.89)       /F81 et	<|special_separator|>
(492.83, 290.34) (505.25, 290.34) (505.25, 298.89) (492.83, 298.89)       /F81 al.,	<|special_separator|>
(108.00, 279.38) (131.64, 279.38) (131.64, 287.93) (108.00, 287.93)       /F81 2022)	<|special_separator|>
(134.12, 279.38) (148.75, 279.38) (148.75, 287.93) (134.12, 287.93)       /F81 and	<|special_separator|>
(151.23, 279.38) (206.37, 279.38) (206.37, 287.93) (151.23, 287.93)       /F81 Deepseek-V2	<|special_separator|>
(208.86, 279.38) (226.31, 279.38) (226.31, 287.93) (208.86, 287.93)       /F81 (Liu	<|special_separator|>
(228.79, 279.38) (236.11, 279.38) (236.11, 287.93) (228.79, 287.93)       /F81 et	<|special_separator|>
(238.59, 279.38) (250.97, 279.38) (250.97, 287.93) (238.59, 287.93)       /F81 al.,	<|special_separator|>
(253.45, 279.38) (284.12, 279.38) (284.12, 287.93) (253.45, 287.93)       /F81 2024a),	<|special_separator|>
(286.61, 279.38) (291.11, 279.38) (291.11, 287.93) (286.61, 287.93)       /F81 a	<|special_separator|>
(293.60, 279.38) (327.36, 279.38) (327.36, 287.93) (293.60, 287.93)       /F81 capacity	<|special_separator|>
(329.84, 279.38) (343.90, 279.38) (343.90, 287.93) (329.84, 287.93)       /F81 cap	<|special_separator|>
(346.38, 279.38) (353.14, 279.38) (353.14, 287.93) (346.38, 287.93)       /F81 is	<|special_separator|>
(355.62, 279.38) (366.88, 279.38) (366.88, 287.93) (355.62, 287.93)       /F81 set	<|special_separator|>
(369.36, 279.38) (381.18, 279.38) (381.18, 287.93) (369.36, 287.93)       /F81 for	<|special_separator|>
(383.66, 279.38) (402.22, 279.38) (402.22, 287.93) (383.66, 287.93)       /F81 each	<|special_separator|>
(404.71, 279.38) (429.88, 279.38) (429.88, 287.93) (404.71, 287.93)       /F81 expert	<|special_separator|>
(432.37, 279.38) (440.81, 279.38) (440.81, 287.93) (432.37, 287.93)       /F81 or	<|special_separator|>
(443.29, 279.38) (472.02, 279.38) (472.02, 287.93) (443.29, 287.93)       /F81 device,	<|special_separator|>
(474.50, 279.38) (489.13, 279.38) (489.13, 287.93) (474.50, 287.93)       /F81 and	<|special_separator|>
(491.62, 279.38) (504.00, 279.38) (504.00, 287.93) (491.62, 287.93)       /F81 the	<|special_separator|>
(108.00, 268.42) (127.96, 268.42) (127.96, 276.97) (108.00, 276.97)       /F81 extra	<|special_separator|>
(130.45, 268.42) (156.63, 268.42) (156.63, 276.97) (130.45, 276.97)       /F81 tokens	<|special_separator|>
(159.12, 268.42) (174.21, 268.42) (174.21, 276.97) (159.12, 276.97)       /F81 that	<|special_separator|>
(176.69, 268.42) (204.48, 268.42) (204.48, 276.97) (176.69, 276.97)       /F81 exceed	<|special_separator|>
(206.97, 268.42) (219.27, 268.42) (219.27, 276.97) (206.97, 276.97)       /F81 the	<|special_separator|>
(221.75, 268.42) (235.72, 268.42) (235.72, 276.97) (221.75, 276.97)       /F81 cap	<|special_separator|>
(238.21, 268.42) (250.49, 268.42) (250.49, 276.97) (238.21, 276.97)       /F81 are	<|special_separator|>
(252.98, 268.42) (288.47, 268.42) (288.47, 276.97) (252.98, 276.97)       /F81 dropped.	<|special_separator|>
(291.56, 268.42) (302.73, 268.42) (302.73, 276.97) (291.56, 276.97)       /F81 As	<|special_separator|>
(305.23, 268.42) (341.40, 268.42) (341.40, 276.97) (305.23, 276.97)       /F81 observed	<|special_separator|>
(343.89, 268.42) (351.72, 268.42) (351.72, 276.97) (343.89, 276.97)       /F81 in	<|special_separator|>
(354.20, 268.42) (373.20, 268.42) (373.20, 276.97) (354.20, 276.97)       /F81 Gale	<|special_separator|>
(375.69, 268.42) (382.96, 268.42) (382.96, 276.97) (375.69, 276.97)       /F81 et	<|special_separator|>
(385.45, 268.42) (395.23, 268.42) (395.23, 276.97) (385.45, 276.97)       /F81 al.	<|special_separator|>
(397.71, 268.42) (427.05, 268.42) (427.05, 276.97) (397.71, 276.97)       /F81 (2023),	<|special_separator|>
(429.55, 268.42) (444.09, 268.42) (444.09, 276.97) (429.55, 276.97)       /F81 this	<|special_separator|>
(446.57, 268.42) (460.54, 268.42) (460.54, 276.97) (446.57, 276.97)       /F81 cap	<|special_separator|>
(463.03, 268.42) (504.35, 268.42) (504.35, 276.97) (463.03, 276.97)       /F81 negatively	<|special_separator|>
(108.00, 257.46) (134.22, 257.46) (134.22, 266.01) (108.00, 266.01)       /F81 affects	<|special_separator|>
(136.72, 257.46) (148.86, 257.46) (148.86, 266.01) (136.72, 266.01)       /F81 the	<|special_separator|>
(151.36, 257.46) (176.19, 257.46) (176.19, 266.01) (151.36, 266.01)       /F81 model	<|special_separator|>
(178.69, 257.46) (209.59, 257.46) (209.59, 266.01) (178.69, 266.01)       /F81 training	<|special_separator|>
(212.09, 257.46) (244.10, 257.46) (244.10, 266.01) (212.09, 266.01)       /F81 stability	<|special_separator|>
(246.60, 257.46) (260.94, 257.46) (260.94, 266.01) (246.60, 266.01)       /F81 and	<|special_separator|>
(263.45, 257.46) (281.38, 257.46) (281.38, 266.01) (263.45, 266.01)       /F81 loss.	<|special_separator|>
(284.49, 257.46) (292.77, 257.46) (292.77, 266.01) (284.49, 266.01)       /F81 In	<|special_separator|>
(295.27, 257.46) (308.51, 257.46) (308.51, 266.01) (295.27, 266.01)       /F81 our	<|special_separator|>
(311.01, 257.46) (344.40, 257.46) (344.40, 266.01) (311.01, 266.01)       /F81 training,	<|special_separator|>
(346.90, 257.46) (358.48, 257.46) (358.48, 266.01) (346.90, 266.01)       /F81 we	<|special_separator|>
(360.98, 257.46) (374.22, 257.46) (374.22, 266.01) (360.98, 266.01)       /F81 use	<|special_separator|>
(376.72, 257.46) (424.17, 257.46) (424.17, 266.01) (376.72, 266.01)       /F81 ScatterMoE	<|special_separator|>
(426.67, 257.46) (444.62, 257.46) (444.62, 266.01) (426.67, 266.01)       /F81 (Tan	<|special_separator|>
(447.12, 257.46) (454.29, 257.46) (454.29, 266.01) (447.12, 266.01)       /F81 et	<|special_separator|>
(456.80, 257.46) (468.94, 257.46) (468.94, 266.01) (456.80, 266.01)       /F81 al.,	<|special_separator|>
(471.43, 257.46) (497.08, 257.46) (497.08, 266.01) (471.43, 266.01)       /F81 2024),	<|special_separator|>
(499.59, 257.46) (504.00, 257.46) (504.00, 266.01) (499.59, 266.01)       /F81 a	<|special_separator|>
(108.00, 246.50) (141.21, 246.50) (141.21, 255.05) (108.00, 255.05)       /F81 dropless	<|special_separator|>
(143.70, 246.50) (163.62, 246.50) (163.62, 255.05) (143.70, 255.05)       /F81 MoE	<|special_separator|>
(166.11, 246.50) (231.15, 246.50) (231.15, 255.05) (166.11, 255.05)       /F81 implementation,	<|special_separator|>
(233.64, 246.50) (241.39, 246.50) (241.39, 255.05) (233.64, 255.05)       /F81 to	<|special_separator|>
(243.88, 246.50) (265.62, 246.50) (265.62, 255.05) (243.88, 255.05)       /F81 avoid	<|special_separator|>
(268.11, 246.50) (290.15, 246.50) (290.15, 255.05) (268.11, 255.05)       /F81 token	<|special_separator|>
(292.64, 246.50) (328.61, 246.50) (328.61, 255.05) (292.64, 255.05)       /F81 dropping	<|special_separator|>
(331.10, 246.50) (345.49, 246.50) (345.49, 255.05) (331.10, 255.05)       /F81 and	<|special_separator|>
(347.98, 246.50) (380.89, 246.50) (380.89, 255.05) (347.98, 255.05)       /F81 improve	<|special_separator|>
(383.38, 246.50) (414.37, 246.50) (414.37, 255.05) (383.38, 255.05)       /F81 training	<|special_separator|>
(416.86, 246.50) (457.59, 246.50) (457.59, 255.05) (416.86, 255.05)       /F81 efficiency.	<|special_separator|>
(108.00, 221.13) (161.81, 221.13) (161.81, 230.08) (108.00, 230.08)       /F90 Fine-grained	<|special_separator|>
(164.28, 221.13) (199.34, 221.13) (199.34, 230.08) (164.28, 230.08)       /F90 Experts.	<|special_separator|>
(209.30, 221.14) (236.47, 221.14) (236.47, 229.69) (209.30, 229.69)       /F81 Recent	<|special_separator|>
(238.94, 221.14) (266.12, 221.14) (266.12, 229.69) (238.94, 229.69)       /F81 studies	<|special_separator|>
(268.61, 221.14) (311.82, 221.14) (311.82, 229.69) (268.61, 229.69)       /F81 (Krajewski	<|special_separator|>
(314.31, 221.14) (321.37, 221.14) (321.37, 229.69) (314.31, 229.69)       /F81 et	<|special_separator|>
(323.85, 221.14) (335.80, 221.14) (335.80, 229.69) (323.85, 229.69)       /F81 al.,	<|special_separator|>
(338.27, 221.14) (360.56, 221.14) (360.56, 229.69) (338.27, 229.69)       /F81 2024;	<|special_separator|>
(363.05, 221.14) (377.17, 221.14) (377.17, 229.69) (363.05, 229.69)       /F81 Dai	<|special_separator|>
(379.65, 221.14) (386.71, 221.14) (386.71, 229.69) (379.65, 229.69)       /F81 et	<|special_separator|>
(389.19, 221.14) (401.14, 221.14) (401.14, 229.69) (389.19, 229.69)       /F81 al.,	<|special_separator|>
(403.63, 221.14) (426.45, 221.14) (426.45, 229.69) (403.63, 229.69)       /F81 2024)	<|special_separator|>
(428.93, 221.14) (458.28, 221.14) (458.28, 229.69) (428.93, 229.69)       /F81 suggest	<|special_separator|>
(460.75, 221.14) (475.43, 221.14) (475.43, 229.69) (460.75, 229.69)       /F81 that	<|special_separator|>
(477.91, 221.14) (504.00, 221.14) (504.00, 229.69) (477.91, 229.69)       /F81 setting	<|special_separator|>
(108.00, 210.18) (120.42, 210.18) (120.42, 218.74) (108.00, 218.74)       /F81 the	<|special_separator|>
(123.04, 210.18) (138.84, 210.18) (138.84, 218.74) (123.04, 218.74)       /F81 size	<|special_separator|>
(141.47, 210.18) (149.94, 210.18) (149.94, 218.74) (141.47, 218.74)       /F81 of	<|special_separator|>
(152.56, 210.18) (181.76, 210.18) (181.76, 218.74) (152.56, 218.74)       /F81 experts	<|special_separator|>
(184.39, 210.18) (192.29, 210.18) (192.29, 218.74) (184.39, 218.74)       /F81 in	<|special_separator|>
(194.91, 210.18) (215.24, 210.18) (215.24, 218.74) (194.91, 218.74)       /F81 MoE	<|special_separator|>
(217.87, 210.18) (225.78, 210.18) (225.78, 218.74) (217.87, 218.74)       /F81 to	<|special_separator|>
(228.40, 210.18) (254.36, 210.18) (254.36, 218.74) (228.40, 218.74)       /F81 mirror	<|special_separator|>
(256.99, 210.18) (269.41, 210.18) (269.41, 218.74) (256.99, 218.74)       /F81 the	<|special_separator|>
(272.03, 210.18) (324.97, 210.18) (324.97, 218.74) (272.03, 218.74)       /F81 feed-forward	<|special_separator|>
(327.60, 210.18) (347.91, 210.18) (347.91, 218.74) (327.60, 218.74)       /F81 layer	<|special_separator|>
(350.53, 210.18) (357.31, 210.18) (357.31, 218.74) (350.53, 218.74)       /F81 is	<|special_separator|>
(359.94, 210.18) (372.93, 210.18) (372.93, 218.74) (359.94, 218.74)       /F81 not	<|special_separator|>
(375.55, 210.18) (409.15, 210.18) (409.15, 218.74) (375.55, 218.74)       /F81 optimal.	<|special_separator|>
(412.64, 210.18) (444.53, 210.18) (444.53, 218.74) (412.64, 218.74)       /F81 Instead,	<|special_separator|>
(447.19, 210.18) (488.96, 210.18) (488.96, 218.74) (447.19, 218.74)       /F81 increasing	<|special_separator|>
(491.58, 210.18) (504.00, 210.18) (504.00, 218.74) (491.58, 218.74)       /F81 the	<|special_separator|>
(108.00, 199.22) (133.22, 199.22) (133.22, 207.78) (108.00, 207.78)       /F81 expert	<|special_separator|>
(135.69, 199.22) (182.14, 199.22) (182.14, 207.78) (135.69, 207.78)       /F81 granularity,	<|special_separator|>
(184.62, 199.22) (215.63, 199.22) (215.63, 207.78) (184.62, 207.78)       /F81 number	<|special_separator|>
(218.11, 199.22) (226.56, 199.22) (226.56, 207.78) (218.11, 207.78)       /F81 of	<|special_separator|>
(229.04, 199.22) (260.75, 199.22) (260.75, 207.78) (229.04, 207.78)       /F81 experts,	<|special_separator|>
(263.23, 199.22) (277.89, 199.22) (277.89, 207.78) (263.23, 207.78)       /F81 and	<|special_separator|>
(280.37, 199.22) (311.38, 199.22) (311.38, 207.78) (280.37, 207.78)       /F81 number	<|special_separator|>
(313.86, 199.22) (322.32, 199.22) (322.32, 207.78) (313.86, 207.78)       /F81 of	<|special_separator|>
(324.79, 199.22) (360.94, 199.22) (360.94, 207.78) (324.79, 207.78)       /F81 activated	<|special_separator|>
(363.42, 199.22) (392.59, 199.22) (392.59, 207.78) (363.42, 207.78)       /F81 experts	<|special_separator|>
(395.06, 199.22) (417.62, 199.22) (417.62, 207.78) (395.06, 207.78)       /F81 could	<|special_separator|>
(420.10, 199.22) (453.36, 199.22) (453.36, 207.78) (420.10, 207.78)       /F81 increase	<|special_separator|>
(455.83, 199.22) (468.24, 199.22) (468.24, 207.78) (455.83, 207.78)       /F81 the	<|special_separator|>
(470.72, 199.22) (504.00, 199.22) (504.00, 207.78) (470.72, 207.78)       /F81 possible	<|special_separator|>
(108.00, 188.26) (160.62, 188.26) (160.62, 196.82) (108.00, 196.82)       /F81 combinations	<|special_separator|>
(162.83, 188.26) (170.96, 188.26) (170.96, 196.82) (162.83, 196.82)       /F81 of	<|special_separator|>
(173.18, 188.26) (201.23, 188.26) (201.23, 196.82) (173.18, 196.82)       /F81 experts	<|special_separator|>
(203.45, 188.26) (217.54, 188.26) (217.54, 196.82) (203.45, 196.82)       /F81 and	<|special_separator|>
(219.76, 188.26) (241.46, 188.26) (241.46, 196.82) (219.76, 196.82)       /F81 result	<|special_separator|>
(243.67, 188.26) (251.27, 188.26) (251.27, 196.82) (243.67, 196.82)       /F81 in	<|special_separator|>
(253.48, 188.26) (275.71, 188.26) (275.71, 196.82) (253.48, 196.82)       /F81 better	<|special_separator|>
(277.93, 188.26) (302.34, 188.26) (302.34, 196.82) (277.93, 196.82)       /F81 model	<|special_separator|>
(304.56, 188.26) (356.33, 188.26) (356.33, 196.82) (304.56, 196.82)       /F81 performance.	<|special_separator|>
(359.33, 188.26) (399.07, 188.26) (399.07, 196.82) (359.33, 196.82)       /F81 Following	<|special_separator|>
(401.29, 188.26) (421.36, 188.26) (421.36, 196.82) (401.29, 196.82)       /F81 these	<|special_separator|>
(423.57, 188.26) (475.11, 188.26) (475.11, 196.82) (423.57, 196.82)       /F81 observations,	<|special_separator|>
(477.39, 188.26) (488.77, 188.26) (488.77, 196.82) (477.39, 196.82)       /F81 we	<|special_separator|>
(490.99, 188.26) (504.00, 188.26) (504.00, 196.82) (490.99, 196.82)       /F81 use	<|special_separator|>
(108.00, 177.31) (155.18, 177.31) (155.18, 185.86) (108.00, 185.86)       /F81 fine-grained	<|special_separator|>
(157.49, 177.31) (185.54, 177.31) (185.54, 185.86) (157.49, 185.86)       /F81 experts	<|special_separator|>
(187.84, 177.31) (201.94, 177.31) (201.94, 185.86) (187.84, 185.86)       /F81 and	<|special_separator|>
(204.26, 177.31) (208.59, 177.31) (208.59, 185.86) (204.26, 185.86)       /F81 a	<|special_separator|>
(210.91, 177.31) (233.50, 177.31) (233.50, 185.86) (210.91, 185.86)       /F81 larger	<|special_separator|>
(235.81, 177.31) (265.64, 177.31) (265.64, 185.86) (235.81, 185.86)       /F81 number	<|special_separator|>
(267.95, 177.31) (276.09, 177.31) (276.09, 185.86) (267.95, 185.86)       /F81 of	<|special_separator|>
(278.40, 177.31) (313.14, 177.31) (313.14, 185.86) (278.40, 185.86)       /F81 activated	<|special_separator|>
(315.45, 177.31) (343.50, 177.31) (343.50, 185.86) (315.45, 185.86)       /F81 experts	<|special_separator|>
(345.82, 177.31) (353.41, 177.31) (353.41, 185.86) (345.82, 185.86)       /F81 in	<|special_separator|>
(355.73, 177.31) (385.01, 177.31) (385.01, 185.86) (355.73, 185.86)       /F81 Granite	<|special_separator|>
(387.32, 177.31) (399.52, 177.31) (399.52, 185.86) (387.32, 185.86)       /F81 3.0	<|special_separator|>
(401.84, 177.31) (421.36, 177.31) (421.36, 185.86) (401.84, 185.86)       /F81 MoE	<|special_separator|>
(423.68, 177.31) (454.32, 177.31) (454.32, 185.86) (423.68, 185.86)       /F81 models.	<|special_separator|>
(457.35, 177.31) (505.24, 177.31) (505.24, 185.86) (457.35, 185.86)       /F81 Specifically,	<|special_separator|>
(107.64, 166.35) (119.26, 166.35) (119.26, 174.90) (107.64, 174.90)       /F81 we	<|special_separator|>
(121.75, 166.35) (135.03, 166.35) (135.03, 174.90) (121.75, 174.90)       /F81 use	<|special_separator|>
(137.52, 166.35) (141.94, 166.35) (141.94, 174.90) (137.52, 174.90)       /F81 a	<|special_separator|>
(144.43, 166.35) (160.48, 166.35) (160.48, 174.90) (144.43, 174.90)       /F81 top-	<|special_separator|>
(160.48, 166.51) (165.67, 166.51) (165.67, 175.22) (160.48, 175.22)       /F31 k	<|special_separator|>
(168.47, 166.35) (176.77, 166.35) (176.77, 174.90) (168.47, 174.90)       /F81 of	<|special_separator|>
(179.26, 166.35) (184.25, 166.35) (184.25, 174.90) (179.26, 174.90)       /F81 8	<|special_separator|>
(186.74, 166.35) (199.47, 166.35) (199.47, 174.90) (186.74, 174.90)       /F81 out	<|special_separator|>
(201.96, 166.35) (210.26, 166.35) (210.26, 174.90) (201.96, 174.90)       /F81 of	<|special_separator|>
(212.75, 166.35) (222.71, 166.35) (222.71, 174.90) (212.75, 174.90)       /F81 32	<|special_separator|>
(225.20, 166.35) (239.59, 166.35) (239.59, 174.90) (225.20, 174.90)       /F81 and	<|special_separator|>
(242.08, 166.35) (252.04, 166.35) (252.04, 174.90) (242.08, 174.90)       /F81 40	<|special_separator|>
(254.53, 166.35) (283.15, 166.35) (283.15, 174.90) (254.53, 174.90)       /F81 experts	<|special_separator|>
(285.64, 166.35) (333.38, 166.35) (333.38, 174.90) (285.64, 174.90)       /F81 respectively	<|special_separator|>
(335.88, 166.35) (347.49, 166.35) (347.49, 174.90) (335.88, 174.90)       /F81 for	<|special_separator|>
(349.98, 166.35) (362.16, 166.35) (362.16, 174.90) (349.98, 174.90)       /F81 the	<|special_separator|>
(364.65, 166.35) (376.27, 166.35) (376.27, 174.90) (364.65, 174.90)       /F81 1B	<|special_separator|>
(378.76, 166.35) (393.15, 166.35) (393.15, 174.90) (378.76, 174.90)       /F81 and	<|special_separator|>
(395.64, 166.35) (407.27, 166.35) (407.27, 174.90) (395.64, 174.90)       /F81 3B	<|special_separator|>
(409.76, 166.35) (429.68, 166.35) (429.68, 174.90) (409.76, 174.90)       /F81 MoE	<|special_separator|>
(432.17, 166.35) (463.45, 166.35) (463.45, 174.90) (432.17, 174.90)       /F81 models.	<|special_separator|>
(108.00, 140.97) (129.77, 140.97) (129.77, 149.93) (108.00, 149.93)       /F90 Load	<|special_separator|>
(132.27, 140.97) (174.17, 140.97) (174.17, 149.93) (132.27, 149.93)       /F90 Balancing	<|special_separator|>
(176.67, 140.97) (198.16, 140.97) (198.16, 149.93) (176.67, 149.93)       /F90 Loss.	<|special_separator|>
(208.12, 140.99) (218.21, 140.99) (218.21, 149.54) (208.12, 149.54)       /F81 To	<|special_separator|>
(220.70, 140.99) (242.07, 140.99) (242.07, 149.54) (220.70, 149.54)       /F81 avoid	<|special_separator|>
(244.56, 140.99) (272.85, 140.99) (272.85, 149.54) (244.56, 149.54)       /F81 routing	<|special_separator|>
(275.35, 140.99) (300.82, 140.99) (300.82, 149.54) (275.35, 149.54)       /F81 tokens	<|special_separator|>
(303.31, 140.99) (344.10, 140.99) (344.10, 149.54) (303.31, 149.54)       /F81 repeatedly	<|special_separator|>
(346.60, 140.99) (354.22, 140.99) (354.22, 149.54) (346.60, 149.54)       /F81 to	<|special_separator|>
(356.71, 140.99) (368.67, 140.99) (368.67, 149.54) (356.71, 149.54)       /F81 the	<|special_separator|>
(371.17, 140.99) (391.30, 140.99) (391.30, 149.54) (371.17, 149.54)       /F81 same	<|special_separator|>
(393.79, 140.99) (418.12, 140.99) (418.12, 149.54) (393.79, 149.54)       /F81 expert	<|special_separator|>
(420.61, 140.99) (434.75, 140.99) (434.75, 149.54) (420.61, 149.54)       /F81 and	<|special_separator|>
(437.25, 140.99) (467.62, 140.99) (467.62, 149.54) (437.25, 149.54)       /F81 wasting	<|special_separator|>
(470.11, 140.99) (482.08, 140.99) (482.08, 149.54) (470.11, 149.54)       /F81 the	<|special_separator|>
(484.58, 140.99) (504.00, 140.99) (504.00, 149.54) (484.58, 149.54)       /F81 extra	<|special_separator|>
(108.00, 130.03) (140.80, 130.03) (140.80, 138.58) (108.00, 138.58)       /F81 capacity	<|special_separator|>
(143.28, 130.03) (150.94, 130.03) (150.94, 138.58) (143.28, 138.58)       /F81 in	<|special_separator|>
(153.42, 130.03) (173.64, 130.03) (173.64, 138.58) (153.42, 138.58)       /F81 other	<|special_separator|>
(176.12, 130.03) (206.86, 130.03) (206.86, 138.58) (176.12, 138.58)       /F81 experts,	<|special_separator|>
(209.35, 130.03) (220.83, 130.03) (220.83, 138.58) (209.35, 138.58)       /F81 we	<|special_separator|>
(223.31, 130.03) (236.43, 130.03) (236.43, 138.58) (223.31, 138.58)       /F81 use	<|special_separator|>
(238.91, 130.03) (250.94, 130.03) (250.94, 138.58) (238.91, 138.58)       /F81 the	<|special_separator|>
(253.42, 130.03) (318.32, 130.03) (318.32, 138.58) (253.42, 138.58)       /F81 frequency-based	<|special_separator|>
(320.80, 130.03) (355.79, 130.03) (355.79, 138.58) (320.80, 138.58)       /F81 auxiliary	<|special_separator|>
(358.27, 130.03) (373.58, 130.03) (373.58, 138.58) (358.27, 138.58)       /F81 loss	<|special_separator|>
(376.07, 130.03) (418.16, 130.03) (418.16, 138.58) (376.07, 138.58)       /F81 introduced	<|special_separator|>
(420.65, 130.03) (428.31, 130.03) (428.31, 138.58) (420.65, 138.58)       /F81 in	<|special_separator|>
(430.79, 130.03) (454.31, 130.03) (454.31, 138.58) (430.79, 138.58)       /F81 Fedus	<|special_separator|>
(456.79, 130.03) (463.89, 130.03) (463.89, 138.58) (456.79, 138.58)       /F81 et	<|special_separator|>
(466.38, 130.03) (475.94, 130.03) (475.94, 138.58) (466.38, 138.58)       /F81 al.	<|special_separator|>
(478.42, 130.03) (504.67, 130.03) (504.67, 138.58) (478.42, 138.58)       /F81 (2022)	<|special_separator|>
(271.57, 100.48) (278.44, 100.48) (278.44, 109.05) (271.57, 109.05)       /F34 L	<|special_separator|>
(278.44, 099.42) (281.94, 099.42) (281.94, 105.51) (278.44, 105.51)       /F30 b	<|special_separator|>
(285.21, 100.34) (292.96, 100.34) (292.96, 109.05) (285.21, 109.05)       /F28 =	<|special_separator|>
(295.72, 100.34) (303.73, 100.34) (303.73, 109.05) (295.72, 109.05)       /F31 N	<|special_separator|>
(310.14, 113.37) (316.45, 113.37) (316.45, 119.46) (310.14, 119.46)       /F30 N	<|special_separator|>
(306.48, 105.73) (320.87, 105.73) (320.87, 112.11) (306.48, 112.11)       /F21 ∑	<|special_separator|>
(307.22, 089.16) (310.04, 089.16) (310.04, 095.25) (307.22, 095.25)       /F30 i	<|special_separator|>
(310.04, 089.16) (320.12, 089.16) (320.12, 095.25) (310.04, 095.25)       /F27 =1	<|special_separator|>
(322.53, 100.34) (327.40, 100.34) (327.40, 109.05) (322.53, 109.05)       /F31 f	<|special_separator|>
(327.40, 099.42) (330.22, 099.42) (330.22, 105.51) (327.40, 105.51)       /F30 i	<|special_separator|>
(330.72, 100.34) (337.12, 100.34) (337.12, 109.05) (330.72, 109.05)       /F31 P	<|special_separator|>
(337.12, 099.42) (339.94, 099.42) (339.94, 105.51) (337.12, 105.51)       /F30 i	<|special_separator|>
(493.05, 100.18) (504.67, 100.18) (504.67, 108.73) (493.05, 108.73)       /F81 (4)	<|special_separator|>
(107.64, 071.03) (132.47, 071.03) (132.47, 079.58) (107.64, 079.58)       /F81 where	<|special_separator|>
(135.09, 071.19) (143.10, 071.19) (143.10, 079.90) (135.09, 079.90)       /F31 N	<|special_separator|>
(146.81, 071.03) (153.58, 071.03) (153.58, 079.58) (146.81, 079.58)       /F81 is	<|special_separator|>
(156.21, 071.03) (168.62, 071.03) (168.62, 079.58) (156.21, 079.58)       /F81 the	<|special_separator|>
(171.25, 071.03) (202.29, 071.03) (202.29, 079.58) (171.25, 079.58)       /F81 number	<|special_separator|>
(204.92, 071.03) (213.39, 071.03) (213.39, 079.58) (204.92, 079.58)       /F81 of	<|special_separator|>
(216.01, 071.03) (247.74, 071.03) (247.74, 079.58) (216.01, 079.58)       /F81 experts,	<|special_separator|>
(250.40, 071.19) (255.28, 071.19) (255.28, 079.90) (250.40, 079.90)       /F31 f	<|special_separator|>
(255.28, 070.27) (258.10, 070.27) (258.10, 076.37) (255.28, 076.37)       /F30 i	<|special_separator|>
(261.22, 071.03) (268.00, 071.03) (268.00, 079.58) (261.22, 079.58)       /F81 is	<|special_separator|>
(270.62, 071.03) (283.04, 071.03) (283.04, 079.58) (270.62, 079.58)       /F81 the	<|special_separator|>
(285.66, 071.03) (317.26, 071.03) (317.26, 079.58) (285.66, 079.58)       /F81 fraction	<|special_separator|>
(319.90, 071.03) (328.36, 071.03) (328.36, 079.58) (319.90, 079.58)       /F81 of	<|special_separator|>
(330.98, 071.03) (357.41, 071.03) (357.41, 079.58) (330.98, 079.58)       /F81 tokens	<|special_separator|>
(360.04, 071.03) (403.50, 071.03) (403.50, 079.58) (360.04, 079.58)       /F81 dispatched	<|special_separator|>
(406.12, 071.03) (414.02, 071.03) (414.02, 079.58) (406.12, 079.58)       /F81 to	<|special_separator|>
(416.66, 071.03) (441.90, 071.03) (441.90, 079.58) (416.66, 079.58)       /F81 expert	<|special_separator|>
(444.52, 071.03) (449.89, 071.03) (449.89, 079.58) (444.52, 079.58)       /F81 i,	<|special_separator|>
(452.55, 071.03) (467.22, 071.03) (467.22, 079.58) (452.55, 079.58)       /F81 and	<|special_separator|>
(469.84, 071.19) (476.24, 071.19) (476.24, 079.90) (469.84, 079.90)       /F31 P	<|special_separator|>
(476.24, 070.27) (479.06, 070.27) (479.06, 076.37) (476.24, 076.37)       /F30 i	<|special_separator|>
(482.18, 071.03) (488.96, 071.03) (488.96, 079.58) (482.18, 079.58)       /F81 is	<|special_separator|>
(491.58, 071.03) (504.00, 071.03) (504.00, 079.58) (491.58, 079.58)       /F81 the	<|special_separator|>
(108.00, 060.07) (139.20, 060.07) (139.20, 068.62) (108.00, 068.62)       /F81 fraction	<|special_separator|>
(141.69, 060.07) (150.05, 060.07) (150.05, 068.62) (141.69, 068.62)       /F81 of	<|special_separator|>
(152.52, 060.07) (164.78, 060.07) (164.78, 068.62) (152.52, 068.62)       /F81 the	<|special_separator|>
(167.27, 060.07) (191.23, 060.07) (191.23, 068.62) (167.27, 068.62)       /F81 router	<|special_separator|>
(193.72, 060.07) (237.75, 060.07) (237.75, 068.62) (193.72, 068.62)       /F81 probability	<|special_separator|>
(240.24, 060.07) (276.45, 060.07) (276.45, 068.62) (240.24, 068.62)       /F81 allocated	<|special_separator|>
(278.93, 060.07) (290.63, 060.07) (290.63, 068.62) (278.93, 068.62)       /F81 for	<|special_separator|>
(293.12, 060.07) (318.04, 060.07) (318.04, 068.62) (293.12, 068.62)       /F81 expert	<|special_separator|>
(320.52, 060.24) (323.95, 060.24) (323.95, 068.94) (320.52, 068.94)       /F31 i	<|special_separator|>
(323.96, 060.07) (326.46, 060.07) (326.46, 068.62) (323.96, 068.62)       /F81 .	<|special_separator|>
(329.55, 060.07) (372.81, 060.07) (372.81, 068.62) (329.55, 068.62)       /F81 Intuitively,	<|special_separator|>
(375.30, 060.07) (389.80, 060.07) (389.80, 068.62) (375.30, 068.62)       /F81 this	<|special_separator|>
(392.29, 060.07) (407.90, 060.07) (407.90, 068.62) (392.29, 068.62)       /F81 loss	<|special_separator|>
(410.38, 060.07) (447.16, 060.07) (447.16, 068.62) (410.38, 068.62)       /F81 penalises	<|special_separator|>
(449.64, 060.07) (493.16, 060.07) (493.16, 068.62) (449.64, 068.62)       /F81 over-usage	<|special_separator|>
(495.64, 060.07) (504.00, 060.07) (504.00, 068.62) (495.64, 068.62)       /F81 of	<|special_separator|>
(303.33, 030.18) (308.32, 030.18) (308.32, 038.74) (303.33, 038.74)       /F81 4