From 8904b7659a3fff94637af328dae415110b6f46d7 Mon Sep 17 00:00:00 2001 From: Waldemar Brodkorb Date: Sat, 1 Aug 2015 10:41:19 +0200 Subject: update to 4.1.3, which will be the base for stable branch --- .../solidrun-imx6/patches/4.1.3/0001-xbian.patch | 171697 ++++++++++++++++++ 1 file changed, 171697 insertions(+) create mode 100644 target/arm/solidrun-imx6/patches/4.1.3/0001-xbian.patch (limited to 'target/arm/solidrun-imx6') diff --git a/target/arm/solidrun-imx6/patches/4.1.3/0001-xbian.patch b/target/arm/solidrun-imx6/patches/4.1.3/0001-xbian.patch new file mode 100644 index 000000000..6286c0868 --- /dev/null +++ b/target/arm/solidrun-imx6/patches/4.1.3/0001-xbian.patch @@ -0,0 +1,171697 @@ +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6dl.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6dl.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6dl.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6dl.dtsi 2015-07-27 23:13:00.299912248 +0200 +@@ -60,17 +60,103 @@ + }; + + soc { +- ocram: sram@00900000 { ++ busfreq { /* BUSFREQ */ ++ compatible = "fsl,imx6_busfreq"; ++ clocks = <&clks 171>, <&clks 6>, <&clks 11>, <&clks 104>, <&clks 172>, <&clks 58>, ++ <&clks 18>, <&clks 60>, <&clks 20>, <&clks 3>, <&clks 22> , <&clks 8>; ++ clock-names = "pll2_bus", "pll2_pfd2_396m", "pll2_198m", "arm", "pll3_usb_otg", "periph", ++ "periph_pre", "periph_clk2", "periph_clk2_sel", "osc", "axi_sel", "pll3_pfd1_540m"; ++ interrupts = <0 107 0x04>, <0 112 0x4>; ++ interrupt-names = "irq_busfreq_0", "irq_busfreq_1"; ++ fsl,max_ddr_freq = <400000000>; ++ }; ++ ++ gpu@00130000 { ++ compatible = "fsl,imx6dl-gpu", "fsl,imx6q-gpu"; ++ reg = <0x00130000 0x4000>, <0x00134000 0x4000>, ++ <0x0 0x0>; ++ reg-names = "iobase_3d", "iobase_2d", ++ "phys_baseaddr"; ++ interrupts = <0 9 0x04>, <0 10 0x04>; ++ interrupt-names = "irq_3d", "irq_2d"; ++ clocks = <&clks 26>, <&clks 27>, ++ <&clks 121>, <&clks 122>, ++ <&clks 74>; ++ clock-names = "gpu2d_axi_clk", "gpu3d_axi_clk", ++ "gpu2d_clk", "gpu3d_clk", ++ "gpu3d_shader_clk"; ++ resets = <&src 0>, <&src 3>; ++ reset-names = "gpu3d", "gpu2d"; ++ power-domains = <&gpc 1>; ++ }; ++ ++ hdmi_core: hdmi_core@00120000 { ++ compatible = "fsl,imx6q-hdmi-core"; ++ reg = <0x00120000 0x9000>; ++ clocks = <&clks 124>, <&clks 123>; ++ clock-names = "hdmi_isfr", "hdmi_iahb"; ++ status = "disabled"; ++ }; ++ ++ hdmi_video: hdmi_video@020e0000 { ++ compatible = "fsl,imx6q-hdmi-video"; ++ reg = <0x020e0000 0x1000>; ++ reg-names = "hdmi_gpr"; ++ interrupts = <0 115 0x04>; ++ clocks = <&clks 124>, <&clks 123>; ++ clock-names = "hdmi_isfr", "hdmi_iahb"; ++ status = "disabled"; ++ }; ++ ++ hdmi_audio: hdmi_audio@00120000 { ++ compatible = "fsl,imx6q-hdmi-audio"; ++ clocks = <&clks 124>, <&clks 123>; ++ clock-names = "hdmi_isfr", "hdmi_iahb"; ++ dmas = <&sdma 2 23 0>; ++ dma-names = "tx"; ++ status = "disabled"; ++ }; ++ ++ hdmi_cec: hdmi_cec@00120000 { ++ compatible = "fsl,imx6q-hdmi-cec"; ++ interrupts = <0 115 0x04>; ++ status = "disabled"; ++ }; ++ ++ ocrams: sram@00900000 { ++ compatible = "fsl,lpm-sram"; ++ reg = <0x00900000 0x4000>; ++ clocks = <&clks IMX6QDL_CLK_OCRAM>; ++ }; ++ ++ ocrams_ddr: sram@00904000 { ++ compatible = "fsl,ddr-lpm-sram"; ++ reg = <0x00904000 0x1000>; ++ clocks = <&clks IMX6QDL_CLK_OCRAM>; ++ }; ++ ++ ocram: sram@00905000 { + compatible = "mmio-sram"; +- reg = <0x00900000 0x20000>; ++ reg = <0x00905000 0x1B000>; + clocks = <&clks IMX6QDL_CLK_OCRAM>; + }; + + aips1: aips-bus@02000000 { ++ vpu@02040000 { ++ iramsize = <0>; ++ status = "okay"; ++ }; ++ + iomuxc: iomuxc@020e0000 { + compatible = "fsl,imx6dl-iomuxc"; + }; + ++ dcic2: dcic@020e8000 { ++ clocks = <&clks IMX6QDL_CLK_DCIC1 >, ++ <&clks IMX6QDL_CLK_DCIC2>; /* DCIC2 depend on DCIC1 clock in imx6dl*/ ++ clock-names = "dcic", "disp-axi"; ++ }; ++ + pxp: pxp@020f0000 { + reg = <0x020f0000 0x4000>; + interrupts = <0 98 IRQ_TYPE_LEVEL_HIGH>; +@@ -99,26 +185,13 @@ + }; + }; + }; +- +- display-subsystem { +- compatible = "fsl,imx-display-subsystem"; +- ports = <&ipu1_di0>, <&ipu1_di1>; +- }; +-}; +- +-&hdmi { +- compatible = "fsl,imx6dl-hdmi"; + }; + + &ldb { +- clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, <&clks IMX6QDL_CLK_LDB_DI1_SEL>, +- <&clks IMX6QDL_CLK_IPU1_DI0_SEL>, <&clks IMX6QDL_CLK_IPU1_DI1_SEL>, +- <&clks IMX6QDL_CLK_LDB_DI0>, <&clks IMX6QDL_CLK_LDB_DI1>; ++ clocks = <&clks 33>, <&clks 34>, ++ <&clks 39>, <&clks 40>, ++ <&clks 135>, <&clks 136>; + clock-names = "di0_pll", "di1_pll", + "di0_sel", "di1_sel", + "di0", "di1"; + }; +- +-&vpu { +- compatible = "fsl,imx6dl-vpu", "cnm,coda960"; +-}; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi 2015-07-27 23:13:00.303898027 +0200 +@@ -45,11 +45,22 @@ + #include + + / { ++ chosen { ++ bootargs = "quiet console=ttymxc0,115200 root=/dev/mmcblk0p2 rw"; ++ }; ++ ++ aliases { ++ mmc0 = &usdhc2; ++ mmc1 = &usdhc1; ++ mxcfb0 = &mxcfb1; ++ }; ++ + ir_recv: ir-receiver { + compatible = "gpio-ir-receiver"; + gpios = <&gpio3 9 1>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_cubox_i_ir>; ++ linux,rc-map-name = "rc-rc6-mce"; + }; + + pwmleds { +@@ -78,6 +89,8 @@ + + reg_usbh1_vbus: usb-h1-vbus { + compatible = "regulator-fixed"; ++ regulator-boot-on; ++ regulator-always-on; + enable-active-high; + gpio = <&gpio1 0 0>; + pinctrl-names = "default"; +@@ -89,6 +102,8 @@ + + reg_usbotg_vbus: usb-otg-vbus { + compatible = "regulator-fixed"; ++ regulator-boot-on; ++ regulator-always-on; + enable-active-high; + gpio = <&gpio3 22 0>; + pinctrl-names = "default"; +@@ -101,8 +116,7 @@ + + sound-spdif { + compatible = "fsl,imx-audio-spdif"; +- model = "Integrated SPDIF"; +- /* IMX6 doesn't implement this yet */ ++ model = "imx-spdif"; + spdif-controller = <&spdif>; + spdif-out; + }; +@@ -118,12 +132,45 @@ + linux,code = ; + }; + }; ++ ++ sound-hdmi { ++ compatible = "fsl,imx6q-audio-hdmi", ++ "fsl,imx-audio-hdmi"; ++ model = "imx-audio-hdmi"; ++ hdmi-controller = <&hdmi_audio>; ++ }; ++ ++ mxcfb1: fb@0 { ++ compatible = "fsl,mxc_sdc_fb"; ++ disp_dev = "hdmi"; ++ interface_pix_fmt = "RGB24"; ++ mode_str ="1920x1080M@60"; ++ default_bpp = <32>; ++ int_clk = <0>; ++ late_init = <0>; ++ status = "okay"; ++ }; ++}; ++ ++&hdmi_core { ++ ipu_id = <0>; ++ disp_id = <0>; ++ status = "okay"; ++}; ++ ++&hdmi_video { ++ fsl,phy_reg_vlev = <0x0294>; ++ fsl,phy_reg_cksymtx = <0x800d>; ++ status = "okay"; ++}; ++ ++&hdmi_audio { ++ status = "okay"; + }; + +-&hdmi { ++&hdmi_cec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_cubox_i_hdmi>; +- ddc-i2c-bus = <&i2c2>; + status = "okay"; + }; + +@@ -131,7 +178,13 @@ + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_cubox_i_i2c2>; ++ + status = "okay"; ++ ++ ddc: imx6_hdmi_i2c@50 { ++ compatible = "fsl,imx6-hdmi-i2c"; ++ reg = <0x50>; ++ }; + }; + + &i2c3 { +@@ -228,6 +281,28 @@ + MX6QDL_PAD_EIM_DA8__GPIO3_IO08 0x17059 + >; + }; ++ ++ pinctrl_cubox_i_usdhc2_100mhz: cubox-i-usdhc2-100mhz { ++ fsl,pins = < ++ MX6QDL_PAD_SD2_CMD__SD2_CMD 0x170b9 ++ MX6QDL_PAD_SD2_CLK__SD2_CLK 0x100b9 ++ MX6QDL_PAD_SD2_DAT0__SD2_DATA0 0x170b9 ++ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x170b9 ++ MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x170b9 ++ MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x130b9 ++ >; ++ }; ++ ++ pinctrl_cubox_i_usdhc2_200mhz: cubox-i-usdhc2-200mhz { ++ fsl,pins = < ++ MX6QDL_PAD_SD2_CMD__SD2_CMD 0x170f9 ++ MX6QDL_PAD_SD2_CLK__SD2_CLK 0x100f9 ++ MX6QDL_PAD_SD2_DAT0__SD2_DATA0 0x170f9 ++ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x170f9 ++ MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x170f9 ++ MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x130f9 ++ >; ++ }; + }; + }; + +@@ -256,9 +331,24 @@ + }; + + &usdhc2 { +- pinctrl-names = "default"; ++ pinctrl-names = "default", "state_100mhz", "state_200mhz"; + pinctrl-0 = <&pinctrl_cubox_i_usdhc2_aux &pinctrl_cubox_i_usdhc2>; ++ pinctrl-1 = <&pinctrl_cubox_i_usdhc2_aux &pinctrl_cubox_i_usdhc2_100mhz>; ++ pinctrl-2 = <&pinctrl_cubox_i_usdhc2_aux &pinctrl_cubox_i_usdhc2_200mhz>; + vmmc-supply = <®_3p3v>; + cd-gpios = <&gpio1 4 0>; + status = "okay"; ++ no-1-8-v; ++}; ++ ++&dcic1 { ++ dcic_id = <0>; ++ dcic_mux = "dcic-hdmi"; ++ status = "okay"; ++}; ++ ++&dcic2 { ++ dcic_id = <1>; ++ dcic_mux = "dcic-lvds1"; ++ status = "okay"; + }; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6qdl.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6qdl.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6qdl.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6qdl.dtsi 2015-07-27 23:13:00.303898027 +0200 +@@ -14,6 +14,7 @@ + #include + + #include "skeleton.dtsi" ++#include + + / { + aliases { +@@ -30,6 +31,7 @@ + i2c0 = &i2c1; + i2c1 = &i2c2; + i2c2 = &i2c3; ++ ipu0 = &ipu1; + mmc0 = &usdhc1; + mmc1 = &usdhc2; + mmc2 = &usdhc3; +@@ -79,6 +81,10 @@ + }; + }; + ++ pu_dummy: pudummy_reg { ++ compatible = "fsl,imx6-dummy-pureg"; /* only used in ldo-bypass */ ++ }; ++ + soc { + #address-cells = <1>; + #size-cells = <1>; +@@ -86,6 +92,11 @@ + interrupt-parent = <&gpc>; + ranges; + ++ caam_sm: caam-sm@00100000 { ++ compatible = "fsl,imx6q-caam-sm"; ++ reg = <0x00100000 0x3fff>; ++ }; ++ + dma_apbh: dma-apbh@00110000 { + compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; + reg = <0x00110000 0x2000>; +@@ -99,6 +110,12 @@ + clocks = <&clks IMX6QDL_CLK_APBH_DMA>; + }; + ++ irq_sec_vio: caam_secvio { ++ compatible = "fsl,imx6q-caam-secvio"; ++ interrupts = <0 20 0x04>; ++ secvio_src = <0x8000001d>; ++ }; ++ + gpmi: gpmi-nand@00112000 { + compatible = "fsl,imx6q-gpmi-nand"; + #address-cells = <1>; +@@ -190,16 +207,16 @@ + dmas = <&sdma 14 18 0>, + <&sdma 15 18 0>; + dma-names = "rx", "tx"; +- clocks = <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_OSC>, +- <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_DUMMY>, +- <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_DUMMY>, +- <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_DUMMY>, +- <&clks IMX6QDL_CLK_DUMMY>; ++ clocks = <&clks IMX6QDL_CLK_SPDIF_GCLK>, <&clks IMX6QDL_CLK_OSC>, ++ <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_ASRC>, ++ <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_ESAI_EXTAL>, ++ <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_MLB>, ++ <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_SPBA>; + clock-names = "core", "rxtx0", + "rxtx1", "rxtx2", + "rxtx3", "rxtx4", + "rxtx5", "rxtx6", +- "rxtx7"; ++ "rxtx7", "dma"; + status = "disabled"; + }; + +@@ -274,7 +291,12 @@ + esai: esai@02024000 { + reg = <0x02024000 0x4000>; + interrupts = <0 51 IRQ_TYPE_LEVEL_HIGH>; +- }; ++ compatible = "fsl,imx6q-esai"; ++ clocks = <&clks 118>; ++ fsl,esai-dma-events = <24 23>; ++ fsl,flags = <1>; ++ status = "disabled"; ++ }; + + ssi1: ssi@02028000 { + #sound-dai-cells = <0>; +@@ -325,8 +347,30 @@ + }; + + asrc: asrc@02034000 { ++ compatible = "fsl,imx53-asrc"; + reg = <0x02034000 0x4000>; + interrupts = <0 50 IRQ_TYPE_LEVEL_HIGH>; ++ clocks = <&clks IMX6QDL_CLK_ASRC_MEM>, ++ <&clks IMX6QDL_CLK_ASRC_IPG>, ++ <&clks IMX6QDL_CLK_SPDIF>, ++ <&clks IMX6QDL_CLK_SPBA>; ++ clock-names = "mem", "ipg", "asrck_0", "dma"; ++ dmas = <&sdma 17 20 1>, <&sdma 18 20 1>, <&sdma 19 20 1>, ++ <&sdma 20 20 1>, <&sdma 21 20 1>, <&sdma 22 20 1>; ++ dma-names = "rxa", "rxb", "rxc", ++ "txa", "txb", "txc"; ++ fsl,asrc-rate = <48000>; ++ fsl,asrc-width = <16>; ++ status = "okay"; ++ }; ++ ++ asrc_p2p: asrc_p2p { ++ compatible = "fsl,imx6q-asrc-p2p"; ++ fsl,output-rate = <48000>; ++ fsl,output-width = <16>; ++ fsl,asrc-dma-rx-events = <17 18 19>; ++ fsl,asrc-dma-tx-events = <20 21 22>; ++ status = "okay"; + }; + + spba@0203c000 { +@@ -335,16 +379,20 @@ + }; + + vpu: vpu@02040000 { +- compatible = "cnm,coda960"; ++ compatible = "cnm,coda960", "fsl,imx6-vpu"; + reg = <0x02040000 0x3c000>; ++ reg-names = "vpu_regs"; + interrupts = <0 12 IRQ_TYPE_LEVEL_HIGH>, + <0 3 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "bit", "jpeg"; + clocks = <&clks IMX6QDL_CLK_VPU_AXI>, +- <&clks IMX6QDL_CLK_MMDC_CH0_AXI>; +- clock-names = "per", "ahb"; +- resets = <&src 1>; ++ <&clks IMX6QDL_CLK_MMDC_CH0_AXI>, ++ <&clks IMX6QDL_CLK_OCRAM>; ++ clock-names = "per", "ahb", "ocram"; ++ iramsize = <0x21000>; + iram = <&ocram>; ++ resets = <&src 1>; ++ power-domains = <&gpc 1>; + }; + + aipstz@0207c000 { /* AIPSTZ1 */ +@@ -552,20 +600,21 @@ + anatop-min-bit-val = <4>; + anatop-min-voltage = <800000>; + anatop-max-voltage = <1375000>; ++ anatop-enable-bit = <0>; + }; + +- regulator-3p0@120 { ++ reg_3p0: regulator-3p0@120 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vdd3p0"; +- regulator-min-microvolt = <2800000>; +- regulator-max-microvolt = <3150000>; +- regulator-always-on; ++ regulator-min-microvolt = <2625000>; ++ regulator-max-microvolt = <3400000>; + anatop-reg-offset = <0x120>; + anatop-vol-bit-shift = <8>; + anatop-vol-bit-width = <5>; + anatop-min-bit-val = <0>; + anatop-min-voltage = <2625000>; + anatop-max-voltage = <3400000>; ++ anatop-enable-bit = <0>; + }; + + regulator-2p5@130 { +@@ -580,6 +629,7 @@ + anatop-min-bit-val = <0>; + anatop-min-voltage = <2000000>; + anatop-max-voltage = <2750000>; ++ anatop-enable-bit = <0>; + }; + + reg_arm: regulator-vddcore@140 { +@@ -647,6 +697,7 @@ + reg = <0x020c9000 0x1000>; + interrupts = <0 44 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6QDL_CLK_USBPHY1>; ++ phy-3p0-supply = <®_3p0>; + fsl,anatop = <&anatop>; + }; + +@@ -655,9 +706,15 @@ + reg = <0x020ca000 0x1000>; + interrupts = <0 45 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6QDL_CLK_USBPHY2>; ++ phy-3p0-supply = <®_3p0>; + fsl,anatop = <&anatop>; + }; + ++ caam_snvs: caam-snvs@020cc000 { ++ compatible = "fsl,imx6q-caam-snvs"; ++ reg = <0x020cc000 0x4000>; ++ }; ++ + snvs@020cc000 { + compatible = "fsl,sec-v4.0-mon", "simple-bus"; + #address-cells = <1>; +@@ -704,14 +761,12 @@ + interrupts = <0 89 IRQ_TYPE_LEVEL_HIGH>, + <0 90 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&intc>; +- pu-supply = <®_pu>; +- clocks = <&clks IMX6QDL_CLK_GPU3D_CORE>, +- <&clks IMX6QDL_CLK_GPU3D_SHADER>, +- <&clks IMX6QDL_CLK_GPU2D_CORE>, +- <&clks IMX6QDL_CLK_GPU2D_AXI>, +- <&clks IMX6QDL_CLK_OPENVG_AXI>, +- <&clks IMX6QDL_CLK_VPU_AXI>; + #power-domain-cells = <1>; ++ clocks = <&clks 122>, <&clks 74>, <&clks 121>, ++ <&clks 26>, <&clks 143>, <&clks 168>; ++ clock-names = "gpu3d_core", "gpu3d_shader", "gpu2d_core", ++ "gpu2d_axi", "openvg_axi", "vpu_axi"; ++ pu-supply = <®_pu>; + }; + + gpr: iomuxc-gpr@020e0000 { +@@ -736,22 +791,6 @@ + #size-cells = <0>; + reg = <0>; + status = "disabled"; +- +- port@0 { +- reg = <0>; +- +- lvds0_mux_0: endpoint { +- remote-endpoint = <&ipu1_di0_lvds0>; +- }; +- }; +- +- port@1 { +- reg = <1>; +- +- lvds0_mux_1: endpoint { +- remote-endpoint = <&ipu1_di1_lvds0>; +- }; +- }; + }; + + lvds-channel@1 { +@@ -759,22 +798,6 @@ + #size-cells = <0>; + reg = <1>; + status = "disabled"; +- +- port@0 { +- reg = <0>; +- +- lvds1_mux_0: endpoint { +- remote-endpoint = <&ipu1_di0_lvds1>; +- }; +- }; +- +- port@1 { +- reg = <1>; +- +- lvds1_mux_1: endpoint { +- remote-endpoint = <&ipu1_di1_lvds1>; +- }; +- }; + }; + }; + +@@ -788,32 +811,26 @@ + <&clks IMX6QDL_CLK_HDMI_ISFR>; + clock-names = "iahb", "isfr"; + status = "disabled"; +- +- port@0 { +- reg = <0>; +- +- hdmi_mux_0: endpoint { +- remote-endpoint = <&ipu1_di0_hdmi>; +- }; +- }; +- +- port@1 { +- reg = <1>; +- +- hdmi_mux_1: endpoint { +- remote-endpoint = <&ipu1_di1_hdmi>; +- }; +- }; + }; + + dcic1: dcic@020e4000 { ++ compatible = "fsl,imx6q-dcic"; + reg = <0x020e4000 0x4000>; + interrupts = <0 124 IRQ_TYPE_LEVEL_HIGH>; ++ clocks = <&clks IMX6QDL_CLK_DCIC1>, <&clks IMX6QDL_CLK_DCIC1>; ++ clock-names = "dcic", "disp-axi"; ++ gpr = <&gpr>; ++ status = "disabled"; + }; + + dcic2: dcic@020e8000 { ++ compatible = "fsl,imx6q-dcic"; + reg = <0x020e8000 0x4000>; + interrupts = <0 125 IRQ_TYPE_LEVEL_HIGH>; ++ clocks = <&clks IMX6QDL_CLK_DCIC2>, <&clks IMX6QDL_CLK_DCIC2>; ++ clock-names = "dcic", "disp-axi"; ++ gpr = <&gpr>; ++ status = "disabled"; + }; + + sdma: sdma@020ec000 { +@@ -824,6 +841,7 @@ + <&clks IMX6QDL_CLK_SDMA>; + clock-names = "ipg", "ahb"; + #dma-cells = <3>; ++ iram = <&ocram>; + fsl,sdma-ram-script-name = "imx/sdma/sdma-imx6q.bin"; + }; + }; +@@ -835,10 +853,30 @@ + reg = <0x02100000 0x100000>; + ranges; + +- caam@02100000 { +- reg = <0x02100000 0x40000>; +- interrupts = <0 105 IRQ_TYPE_LEVEL_HIGH>, +- <0 106 IRQ_TYPE_LEVEL_HIGH>; ++ crypto: caam@2100000 { ++ compatible = "fsl,sec-v4.0"; ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0x2100000 0x40000>; ++ ranges = <0 0x2100000 0x40000>; ++ interrupt-parent = <&intc>; /* interrupts = <0 92 0x4>; */ ++ interrupts = <0 92 0x4>; ++ clocks = <&clks 213>, <&clks 214>, <&clks 215> ,<&clks 196>; ++ clock-names = "caam_mem", "caam_aclk", "caam_ipg", "caam_emi_slow"; ++ ++ sec_jr0: jr0@1000 { ++ compatible = "fsl,sec-v4.0-job-ring"; ++ reg = <0x1000 0x1000>; ++ interrupt-parent = <&intc>; ++ interrupts = <0 105 IRQ_TYPE_LEVEL_HIGH>; ++ }; ++ ++ sec_jr1: jr1@2000 { ++ compatible = "fsl,sec-v4.0-job-ring"; ++ reg = <0x2000 0x1000>; ++ interrupt-parent = <&intc>; ++ interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>; ++ }; + }; + + aipstz@0217c000 { /* AIPSTZ2 */ +@@ -852,6 +890,7 @@ + clocks = <&clks IMX6QDL_CLK_USBOH3>; + fsl,usbphy = <&usbphy1>; + fsl,usbmisc = <&usbmisc 0>; ++ fsl,anatop = <&anatop>; + status = "disabled"; + }; + +@@ -903,14 +942,21 @@ + <&clks IMX6QDL_CLK_ENET>, + <&clks IMX6QDL_CLK_ENET_REF>; + clock-names = "ipg", "ahb", "ptp"; +- status = "disabled"; ++ phy-mode = "rgmii"; ++ fsl,magic-packet; ++ status = "okay"; + }; + +- mlb@0218c000 { ++ mlb: mlb@0218c000 { + reg = <0x0218c000 0x4000>; + interrupts = <0 53 IRQ_TYPE_LEVEL_HIGH>, + <0 117 IRQ_TYPE_LEVEL_HIGH>, + <0 126 IRQ_TYPE_LEVEL_HIGH>; ++ compatible = "fsl,imx6q-mlb150"; ++ clocks = <&clks 139>, <&clks 175>; ++ clock-names = "mlb", "pll8_mlb"; ++ iram = <&ocram>; ++ status = "disabled"; + }; + + usdhc1: usdhc@02190000 { +@@ -995,6 +1041,11 @@ + reg = <0x021ac000 0x4000>; + }; + ++ mmdc0-1@021b0000 { ++ compatible = "fsl,imx6q-mmdc-combine"; ++ reg = <0x021b0000 0x8000>; ++ }; ++ + mmdc0: mmdc@021b0000 { /* MMDC0 */ + compatible = "fsl,imx6q-mmdc"; + reg = <0x021b0000 0x4000>; +@@ -1011,11 +1062,17 @@ + clocks = <&clks IMX6QDL_CLK_EIM_SLOW>; + }; + +- ocotp: ocotp@021bc000 { +- compatible = "fsl,imx6q-ocotp", "syscon"; ++ ocotp: ocotp-ctrl@021bc000 { ++ compatible = "syscon"; + reg = <0x021bc000 0x4000>; + }; + ++ ocotp-fuse@021bc000 { ++ compatible = "fsl,imx6q-ocotp"; ++ reg = <0x021bc000 0x4000>; ++ clocks = <&clks 128>; ++ }; ++ + tzasc@021d0000 { /* TZASC1 */ + reg = <0x021d0000 0x4000>; + interrupts = <0 108 IRQ_TYPE_LEVEL_HIGH>; +@@ -1034,39 +1091,38 @@ + + mipi_csi: mipi@021dc000 { + reg = <0x021dc000 0x4000>; ++ compatible = "fsl,imx6q-mipi-csi2"; ++ interrupts = <0 100 0x04>, <0 101 0x04>; ++ clocks = <&clks IMX6QDL_CLK_HSI_TX>, ++ <&clks IMX6QDL_CLK_EIM_SEL>, ++ <&clks IMX6QDL_CLK_LVDS2_IN>; ++ /* Note: clks 138 is hsi_tx, however, the dphy_c ++ * hsi_tx and pll_refclk use the same clk gate. ++ * In current clk driver, open/close clk gate do ++ * use hsi_tx for a temporary debug purpose. ++ */ ++ clock-names = "dphy_clk", "pixel_clk", "cfg_clk"; ++ status = "disabled"; + }; + + mipi_dsi: mipi@021e0000 { ++ compatible = "fsl,imx6q-mipi-dsi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x021e0000 0x4000>; + status = "disabled"; +- +- ports { +- #address-cells = <1>; +- #size-cells = <0>; +- +- port@0 { +- reg = <0>; +- +- mipi_mux_0: endpoint { +- remote-endpoint = <&ipu1_di0_mipi>; +- }; +- }; +- +- port@1 { +- reg = <1>; +- +- mipi_mux_1: endpoint { +- remote-endpoint = <&ipu1_di1_mipi>; +- }; +- }; +- }; ++ interrupts = <0 102 0x04>; ++ gpr = <&gpr>; ++ clocks = <&clks IMX6QDL_CLK_HSI_TX>, <&clks IMX6QDL_CLK_VIDEO_27M>; ++ clock-names = "mipi_pllref_clk", "mipi_cfg_clk"; + }; + + vdoa@021e4000 { ++ compatible = "fsl,imx6q-vdoa"; + reg = <0x021e4000 0x4000>; + interrupts = <0 18 IRQ_TYPE_LEVEL_HIGH>; ++ clocks = <&clks 202>; ++ iram = <&ocram>; + }; + + uart2: serial@021e8000 { +@@ -1127,67 +1183,14 @@ + <0 5 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6QDL_CLK_IPU1>, + <&clks IMX6QDL_CLK_IPU1_DI0>, +- <&clks IMX6QDL_CLK_IPU1_DI1>; +- clock-names = "bus", "di0", "di1"; ++ <&clks IMX6QDL_CLK_IPU1_DI1>, ++ <&clks 39>, <&clks 40>, ++ <&clks 135>, <&clks 136>; ++ clock-names = "bus", "di0", "di1", ++ "di0_sel", "di1_sel", ++ "ldb_di0", "ldb_di1"; + resets = <&src 2>; +- +- ipu1_csi0: port@0 { +- reg = <0>; +- }; +- +- ipu1_csi1: port@1 { +- reg = <1>; +- }; +- +- ipu1_di0: port@2 { +- #address-cells = <1>; +- #size-cells = <0>; +- reg = <2>; +- +- ipu1_di0_disp0: endpoint@0 { +- }; +- +- ipu1_di0_hdmi: endpoint@1 { +- remote-endpoint = <&hdmi_mux_0>; +- }; +- +- ipu1_di0_mipi: endpoint@2 { +- remote-endpoint = <&mipi_mux_0>; +- }; +- +- ipu1_di0_lvds0: endpoint@3 { +- remote-endpoint = <&lvds0_mux_0>; +- }; +- +- ipu1_di0_lvds1: endpoint@4 { +- remote-endpoint = <&lvds1_mux_0>; +- }; +- }; +- +- ipu1_di1: port@3 { +- #address-cells = <1>; +- #size-cells = <0>; +- reg = <3>; +- +- ipu1_di0_disp1: endpoint@0 { +- }; +- +- ipu1_di1_hdmi: endpoint@1 { +- remote-endpoint = <&hdmi_mux_1>; +- }; +- +- ipu1_di1_mipi: endpoint@2 { +- remote-endpoint = <&mipi_mux_1>; +- }; +- +- ipu1_di1_lvds0: endpoint@3 { +- remote-endpoint = <&lvds0_mux_1>; +- }; +- +- ipu1_di1_lvds1: endpoint@4 { +- remote-endpoint = <&lvds1_mux_1>; +- }; +- }; ++ bypass_reset = <0>; + }; + }; + }; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi 2015-07-27 23:13:00.303898027 +0200 +@@ -43,8 +43,10 @@ + #include "imx6qdl-microsom-ar8035.dtsi" + + / { +- chosen { +- stdout-path = &uart1; ++ aliases { ++ mmc0 = &usdhc2; ++ mmc1 = &usdhc1; ++ mxcfb0 = &mxcfb1; + }; + + ir_recv: ir-receiver { +@@ -52,6 +54,7 @@ + gpios = <&gpio3 5 1>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hummingboard_gpio3_5>; ++ linux,rc-map-name = "rc-rc6-mce"; + }; + + regulators { +@@ -98,32 +101,70 @@ + model = "On-board Codec"; + mux-ext-port = <5>; + mux-int-port = <1>; ++ cpu-dai = <&ssi1>; + ssi-controller = <&ssi1>; + }; + + sound-spdif { + compatible = "fsl,imx-audio-spdif"; +- model = "On-board SPDIF"; ++ model = "imx-spdif"; + /* IMX6 doesn't implement this yet */ + spdif-controller = <&spdif>; + spdif-out; + }; ++ ++ sound-hdmi { ++ compatible = "fsl,imx6q-audio-hdmi", ++ "fsl,imx-audio-hdmi"; ++ model = "imx-audio-hdmi"; ++ hdmi-controller = <&hdmi_audio>; ++ }; ++ ++ mxcfb1: fb@0 { ++ compatible = "fsl,mxc_sdc_fb"; ++ disp_dev = "hdmi"; ++ interface_pix_fmt = "RGB24"; ++ mode_str ="1920x1080M@60"; ++ default_bpp = <32>; ++ int_clk = <0>; ++ late_init = <0>; ++ status = "okay"; ++ }; + }; + + &audmux { + status = "okay"; + }; + +-&can1 { ++/*&can1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hummingboard_flexcan1>; + status = "okay"; + }; ++*/ ++&hdmi_core { ++ ipu_id = <0>; ++ disp_id = <0>; ++ status = "okay"; ++}; ++ ++&hdmi_video { ++ fsl,phy_reg_vlev = <0x0294>; ++ fsl,phy_reg_cksymtx = <0x800d>; ++ status = "okay"; ++}; ++ ++&hdmi_audio { ++ status = "okay"; ++}; ++ ++&ocram { ++ status = "okay"; ++}; + +-&hdmi { ++&hdmi_cec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hummingboard_hdmi>; +- ddc-i2c-bus = <&i2c2>; + status = "okay"; + }; + +@@ -136,6 +177,7 @@ + rtc: pcf8523@68 { + compatible = "nxp,pcf8523"; + reg = <0x68>; ++ nxp,12p5_pf; + }; + + /* Pro baseboard model */ +@@ -155,20 +197,57 @@ + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hummingboard_i2c2>; + status = "okay"; ++ ++ ddc: imx6_hdmi_i2c@50 { ++ compatible = "fsl,imx6-hdmi-i2c"; ++ reg = <0x50>; ++ }; + }; + + &iomuxc { ++ pinctrl-names = "default"; ++ pinctrl-0 = <&pinctrl_hog>; + hummingboard { +- pinctrl_hummingboard_flexcan1: hummingboard-flexcan1 { ++ pinctrl_hog: hoggrp { ++ fsl,pins = < ++ /* ++ * 26 pin header GPIO description. The pins. ++ * numbering as following - ++ * GPIO number | GPIO (bank,num) | PIN number ++ * ------------+-----------------+------------ ++ * gpio1 | (1,1) | IO7 ++ * gpio73 | (3,9) | IO11 ++ * gpio72 | (3,8) | IO12 ++ * gpio71 | (3,7) | IO13 ++ * gpio70 | (3,6) | IO15 ++ * gpio194 | (7,2) | IO16 ++ * gpio195 | (7,3) | IO18 ++ * gpio67 | (3,3) | IO22 ++ * ++ * Notice the gpioX and GPIO (Y,Z) mapping forumla : ++ * X = (Y-1) * 32 + Z ++ */ ++ MX6QDL_PAD_GPIO_1__GPIO1_IO01 0x400130b1 ++ MX6QDL_PAD_EIM_DA9__GPIO3_IO09 0x400130b1 ++ MX6QDL_PAD_EIM_DA8__GPIO3_IO08 0x400130b1 ++ MX6QDL_PAD_EIM_DA7__GPIO3_IO07 0x400130b1 ++ MX6QDL_PAD_EIM_DA6__GPIO3_IO06 0x400130b1 ++ MX6QDL_PAD_SD3_CMD__GPIO7_IO02 0x400130b1 ++ MX6QDL_PAD_SD3_CLK__GPIO7_IO03 0x400130b1 ++ MX6QDL_PAD_EIM_DA3__GPIO3_IO03 0x400130b1 ++ >; ++ }; ++ ++/* pinctrl_hummingboard_flexcan1: hummingboard-flexcan1 { + fsl,pins = < + MX6QDL_PAD_SD3_CLK__FLEXCAN1_RX 0x80000000 + MX6QDL_PAD_SD3_CMD__FLEXCAN1_TX 0x80000000 + >; + }; +- ++*/ + pinctrl_hummingboard_gpio3_5: hummingboard-gpio3_5 { + fsl,pins = < +- MX6QDL_PAD_EIM_DA5__GPIO3_IO05 0x1b0b1 ++ MX6QDL_PAD_EIM_DA5__GPIO3_IO05 0x80000000 + >; + }; + +@@ -198,10 +277,10 @@ + + pinctrl_hummingboard_sgtl5000: hummingboard-sgtl5000 { + fsl,pins = < +- MX6QDL_PAD_DISP0_DAT19__AUD5_RXD 0x130b0 +- MX6QDL_PAD_KEY_COL0__AUD5_TXC 0x130b0 +- MX6QDL_PAD_KEY_ROW0__AUD5_TXD 0x110b0 +- MX6QDL_PAD_KEY_COL1__AUD5_TXFS 0x130b0 ++ MX6QDL_PAD_DISP0_DAT19__AUD5_RXD 0x130b0 /*brk*/ ++ MX6QDL_PAD_KEY_COL0__AUD5_TXC 0x130b0 /*ok*/ ++ MX6QDL_PAD_KEY_ROW0__AUD5_TXD 0x110b0 /*brk*/ ++ MX6QDL_PAD_KEY_COL1__AUD5_TXFS 0x130b0 /*ok*/ + MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 + >; + }; +@@ -219,7 +298,7 @@ + * Similar to pinctrl_usbotg_2, but we want it + * pulled down for a fixed host connection. + */ +- fsl,pins = ; ++ fsl,pins = ; + }; + + pinctrl_hummingboard_usbotg_vbus: hummingboard-usbotg-vbus { +@@ -242,6 +321,13 @@ + MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x13059 + >; + }; ++ ++ pinctrl_hummingboard_pcie_reset: hummingboard-pcie-reset { ++ fsl,pins = < ++ MX6QDL_PAD_EIM_DA4__GPIO3_IO04 0x80000000 ++ >; ++ }; ++ + }; + }; + +@@ -256,6 +342,14 @@ + status = "okay"; + }; + ++&pwm3 { ++ status = "disabled"; ++}; ++ ++&pwm4 { ++ status = "disabled"; ++}; ++ + &spdif { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hummingboard_spdif>; +@@ -291,3 +385,48 @@ + cd-gpios = <&gpio1 4 0>; + status = "okay"; + }; ++ ++&gpc { ++ fsl,cpu_pupscr_sw2iso = <0xf>; ++ fsl,cpu_pupscr_sw = <0xf>; ++ fsl,cpu_pdnscr_iso2sw = <0x1>; ++ fsl,cpu_pdnscr_iso = <0x1>; ++ status = "okay"; ++}; ++ ++&pcie { ++ pinctrl-names = "default"; ++ pinctrl-0 = < ++ &pinctrl_hummingboard_pcie_reset ++ >; ++ reset-gpio = <&gpio3 4 0>; ++ status = "okay"; ++ no-msi; ++}; ++ ++&ecspi1 { ++ status = "okay"; ++ fsl,spi-num-chipselects = <1>; ++}; ++ ++&ecspi2 { ++ status = "okay"; ++ fsl,spi-num-chipselects = <2>; ++}; ++ ++&ecspi3 { ++ status = "okay"; ++ fsl,spi-num-chipselects = <3>; ++}; ++ ++&dcic1 { ++ dcic_id = <0>; ++ dcic_mux = "dcic-hdmi"; ++ status = "okay"; ++}; ++ ++&dcic2 { ++ dcic_id = <1>; ++ dcic_mux = "dcic-lvds1"; ++ status = "okay"; ++}; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6qdl-microsom.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6qdl-microsom.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6qdl-microsom.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6qdl-microsom.dtsi 2015-07-27 23:13:00.303898027 +0200 +@@ -39,15 +39,98 @@ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ ++#include ++/ { ++ clk_sdio: sdio-clock { ++ compatible = "gpio-gate-clock"; ++ #clock-cells = <0>; ++ pinctrl-names = "default"; ++ pinctrl-0 = <&pinctrl_microsom_brcm_osc>; ++ enable-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH>; ++ }; ++ ++ regulators { ++ compatible = "simple-bus"; ++ ++ reg_brcm: brcm-reg { ++ compatible = "regulator-fixed"; ++ enable-active-high; ++ gpio = <&gpio3 19 0>; ++ pinctrl-names = "default"; ++ pinctrl-0 = <&pinctrl_microsom_brcm_reg>; ++ regulator-name = "brcm_reg"; ++ regulator-min-microvolt = <3300000>; ++ regulator-max-microvolt = <3300000>; ++ startup-delay-us = <200000>; ++ }; ++ }; ++ ++ usdhc1_pwrseq: usdhc1_pwrseq { ++ compatible = "mmc-pwrseq-simple"; ++ reset-gpios = <&gpio5 26 GPIO_ACTIVE_LOW>, ++ <&gpio6 0 GPIO_ACTIVE_LOW>; ++ clocks = <&clk_sdio>; ++ clock-names = "ext_clock"; ++ }; ++}; + + &iomuxc { + microsom { ++ pinctrl_microsom_brcm_bt: microsom-brcm-bt { ++ fsl,pins = < ++ MX6QDL_PAD_CSI0_DAT14__GPIO6_IO00 0x40013070 ++ MX6QDL_PAD_CSI0_DAT15__GPIO6_IO01 0x40013070 ++ MX6QDL_PAD_CSI0_DAT18__GPIO6_IO04 0x40013070 ++ >; ++ }; ++ ++ pinctrl_microsom_brcm_osc: microsom-brcm-osc { ++ fsl,pins = < ++ MX6QDL_PAD_DISP0_DAT11__GPIO5_IO05 0x40013070 ++ >; ++ }; ++ ++ pinctrl_microsom_brcm_reg: microsom-brcm-reg { ++ fsl,pins = < ++ MX6QDL_PAD_EIM_D19__GPIO3_IO19 0x40013070 ++ >; ++ }; ++ ++ pinctrl_microsom_brcm_wifi: microsom-brcm-wifi { ++ fsl,pins = < ++ MX6QDL_PAD_GPIO_8__XTALOSC_REF_CLK_32K 0x1b0b0 ++ MX6QDL_PAD_CSI0_DATA_EN__GPIO5_IO20 0x40013070 ++ MX6QDL_PAD_CSI0_DAT8__GPIO5_IO26 0x40013070 ++ MX6QDL_PAD_CSI0_DAT9__GPIO5_IO27 0x40013070 ++ >; ++ }; ++ + pinctrl_microsom_uart1: microsom-uart1 { + fsl,pins = < + MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA 0x1b0b1 + MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA 0x1b0b1 + >; + }; ++ ++ pinctrl_microsom_uart4: microsom-uart4 { ++ fsl,pins = < ++ MX6QDL_PAD_CSI0_DAT12__UART4_TX_DATA 0x1b0b1 ++ MX6QDL_PAD_CSI0_DAT13__UART4_RX_DATA 0x1b0b1 ++ MX6QDL_PAD_CSI0_DAT16__UART4_RTS_B 0x1b0b1 ++ MX6QDL_PAD_CSI0_DAT17__UART4_CTS_B 0x1b0b1 ++ >; ++ }; ++ ++ pinctrl_microsom_usdhc1: microsom-usdhc1 { ++ fsl,pins = < ++ MX6QDL_PAD_SD1_CMD__SD1_CMD 0x17059 ++ MX6QDL_PAD_SD1_CLK__SD1_CLK 0x10059 ++ MX6QDL_PAD_SD1_DAT0__SD1_DATA0 0x17059 ++ MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17059 ++ MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17059 ++ MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17059 ++ >; ++ }; + }; + }; + +@@ -56,3 +139,23 @@ + pinctrl-0 = <&pinctrl_microsom_uart1>; + status = "okay"; + }; ++ ++/* UART4 - Connected to optional BRCM Wifi/BT/FM */ ++&uart4 { ++ pinctrl-names = "default"; ++ pinctrl-0 = <&pinctrl_microsom_brcm_bt &pinctrl_microsom_uart4>; ++ fsl,uart-has-rtscts; ++ status = "okay"; ++}; ++ ++/* USDHC1 - Connected to optional BRCM Wifi/BT/FM */ ++&usdhc1 { ++ pinctrl-names = "default"; ++ pinctrl-0 = <&pinctrl_microsom_brcm_wifi &pinctrl_microsom_usdhc1>; ++ bus-width = <4>; ++ mmc-pwrseq = <&usdhc1_pwrseq>; ++ keep-power-in-suspend; ++ non-removable; ++ vmmc-supply = <®_brcm>; ++ status = "okay"; ++}; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6q.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6q.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6q.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6q.dtsi 2015-07-27 23:13:00.303898027 +0200 +@@ -14,6 +14,7 @@ + + / { + aliases { ++ ipu1 = &ipu2; + spi4 = &ecspi5; + }; + +@@ -47,9 +48,12 @@ + <&clks IMX6QDL_CLK_PLL2_PFD2_396M>, + <&clks IMX6QDL_CLK_STEP>, + <&clks IMX6QDL_CLK_PLL1_SW>, +- <&clks IMX6QDL_CLK_PLL1_SYS>; ++ <&clks IMX6QDL_CLK_PLL1_SYS>, ++ <&clks IMX6QDL_PLL1_BYPASS>, ++ <&clks IMX6QDL_CLK_PLL1>, ++ <&clks IMX6QDL_PLL1_BYPASS_SRC> ; + clock-names = "arm", "pll2_pfd2_396m", "step", +- "pll1_sw", "pll1_sys"; ++ "pll1_sw", "pll1_sys", "pll1_bypass", "pll1", "pll1_bypass_src"; + arm-supply = <®_arm>; + pu-supply = <®_pu>; + soc-supply = <®_soc>; +@@ -78,9 +82,85 @@ + }; + + soc { +- ocram: sram@00900000 { ++ ++ busfreq { /* BUSFREQ */ ++ compatible = "fsl,imx6_busfreq"; ++ clocks = <&clks 171>, <&clks 6>, <&clks 11>, <&clks 104>, <&clks 172>, <&clks 58>, ++ <&clks 18>, <&clks 60>, <&clks 20>, <&clks 3>; ++ clock-names = "pll2_bus", "pll2_pfd2_396m", "pll2_198m", "arm", "pll3_usb_otg", "periph", ++ "periph_pre", "periph_clk2", "periph_clk2_sel", "osc"; ++ interrupts = <0 107 0x04>, <0 112 0x4>, <0 113 0x4>, <0 114 0x4>; ++ interrupt-names = "irq_busfreq_0", "irq_busfreq_1", "irq_busfreq_2", "irq_busfreq_3"; ++ fsl,max_ddr_freq = <528000000>; ++ }; ++ ++ gpu@00130000 { ++ compatible = "fsl,imx6q-gpu"; ++ reg = <0x00130000 0x4000>, <0x00134000 0x4000>, ++ <0x02204000 0x4000>, <0x0 0x0>; ++ reg-names = "iobase_3d", "iobase_2d", ++ "iobase_vg", "phys_baseaddr"; ++ interrupts = <0 9 0x04>, <0 10 0x04>,<0 11 0x04>; ++ interrupt-names = "irq_3d", "irq_2d", "irq_vg"; ++ clocks = <&clks 26>, <&clks 143>, ++ <&clks 27>, <&clks 121>, ++ <&clks 122>, <&clks 74>; ++ clock-names = "gpu2d_axi_clk", "openvg_axi_clk", ++ "gpu3d_axi_clk", "gpu2d_clk", ++ "gpu3d_clk", "gpu3d_shader_clk"; ++ resets = <&src 0>, <&src 3>, <&src 3>; ++ reset-names = "gpu3d", "gpu2d", "gpuvg"; ++ power-domains = <&gpc 1>; ++ }; ++ ++ hdmi_core: hdmi_core@00120000 { ++ compatible = "fsl,imx6q-hdmi-core"; ++ reg = <0x00120000 0x9000>; ++ clocks = <&clks 124>, <&clks 123>; ++ clock-names = "hdmi_isfr", "hdmi_iahb"; ++ status = "disabled"; ++ }; ++ ++ hdmi_video: hdmi_video@020e0000 { ++ compatible = "fsl,imx6q-hdmi-video"; ++ reg = <0x020e0000 0x1000>; ++ reg-names = "hdmi_gpr"; ++ interrupts = <0 115 0x04>; ++ clocks = <&clks 124>, <&clks 123>; ++ clock-names = "hdmi_isfr", "hdmi_iahb"; ++ status = "disabled"; ++ }; ++ ++ hdmi_audio: hdmi_audio@00120000 { ++ compatible = "fsl,imx6q-hdmi-audio"; ++ clocks = <&clks 124>, <&clks 123>; ++ clock-names = "hdmi_isfr", "hdmi_iahb"; ++ dmas = <&sdma 2 23 0>; ++ dma-names = "tx"; ++ status = "disabled"; ++ }; ++ ++ hdmi_cec: hdmi_cec@00120000 { ++ compatible = "fsl,imx6q-hdmi-cec"; ++ interrupts = <0 115 0x04>; ++ status = "disabled"; ++ }; ++ ++ ocrams: sram@00900000 { ++ compatible = "fsl,lpm-sram"; ++ reg = <0x00900000 0x4000>; ++ clocks = <&clks IMX6QDL_CLK_OCRAM>; ++ }; ++ ++ ocrams_ddr: sram@00904000 { ++ compatible = "fsl,ddr-lpm-sram"; ++ reg = <0x00904000 0x1000>; ++ clocks = <&clks IMX6QDL_CLK_OCRAM>; ++ }; ++ ++ ocram: sram@00905000 { + compatible = "mmio-sram"; +- reg = <0x00900000 0x40000>; ++ reg = <0x00905000 0x3B000>; + clocks = <&clks IMX6QDL_CLK_OCRAM>; + }; + +@@ -101,6 +181,10 @@ + }; + }; + ++ vpu@02040000 { ++ status = "okay"; ++ }; ++ + iomuxc: iomuxc@020e0000 { + compatible = "fsl,imx6q-iomuxc"; + +@@ -154,165 +238,33 @@ + }; + + ipu2: ipu@02800000 { +- #address-cells = <1>; +- #size-cells = <0>; + compatible = "fsl,imx6q-ipu"; + reg = <0x02800000 0x400000>; + interrupts = <0 8 IRQ_TYPE_LEVEL_HIGH>, + <0 7 IRQ_TYPE_LEVEL_HIGH>; +- clocks = <&clks IMX6QDL_CLK_IPU2>, +- <&clks IMX6QDL_CLK_IPU2_DI0>, +- <&clks IMX6QDL_CLK_IPU2_DI1>; +- clock-names = "bus", "di0", "di1"; ++ clocks = <&clks 133>, <&clks 134>, <&clks 137>, ++ <&clks 41>, <&clks 42>, ++ <&clks 135>, <&clks 136>; ++ clock-names = "bus", "di0", "di1", ++ "di0_sel", "di1_sel", ++ "ldb_di0", "ldb_di1"; + resets = <&src 4>; +- +- ipu2_csi0: port@0 { +- reg = <0>; +- }; +- +- ipu2_csi1: port@1 { +- reg = <1>; +- }; +- +- ipu2_di0: port@2 { +- #address-cells = <1>; +- #size-cells = <0>; +- reg = <2>; +- +- ipu2_di0_disp0: endpoint@0 { +- }; +- +- ipu2_di0_hdmi: endpoint@1 { +- remote-endpoint = <&hdmi_mux_2>; +- }; +- +- ipu2_di0_mipi: endpoint@2 { +- }; +- +- ipu2_di0_lvds0: endpoint@3 { +- remote-endpoint = <&lvds0_mux_2>; +- }; +- +- ipu2_di0_lvds1: endpoint@4 { +- remote-endpoint = <&lvds1_mux_2>; +- }; +- }; +- +- ipu2_di1: port@3 { +- #address-cells = <1>; +- #size-cells = <0>; +- reg = <3>; +- +- ipu2_di1_hdmi: endpoint@1 { +- remote-endpoint = <&hdmi_mux_3>; +- }; +- +- ipu2_di1_mipi: endpoint@2 { +- }; +- +- ipu2_di1_lvds0: endpoint@3 { +- remote-endpoint = <&lvds0_mux_3>; +- }; +- +- ipu2_di1_lvds1: endpoint@4 { +- remote-endpoint = <&lvds1_mux_3>; +- }; +- }; +- }; +- }; +- +- display-subsystem { +- compatible = "fsl,imx-display-subsystem"; +- ports = <&ipu1_di0>, <&ipu1_di1>, <&ipu2_di0>, <&ipu2_di1>; +- }; +-}; +- +-&hdmi { +- compatible = "fsl,imx6q-hdmi"; +- +- port@2 { +- reg = <2>; +- +- hdmi_mux_2: endpoint { +- remote-endpoint = <&ipu2_di0_hdmi>; +- }; +- }; +- +- port@3 { +- reg = <3>; +- +- hdmi_mux_3: endpoint { +- remote-endpoint = <&ipu2_di1_hdmi>; ++ bypass_reset = <0>; + }; + }; + }; + + &ldb { +- clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, <&clks IMX6QDL_CLK_LDB_DI1_SEL>, ++ clocks = <&clks IMX6QDL_CLK_LDB_DI0>, <&clks IMX6QDL_CLK_LDB_DI1>, + <&clks IMX6QDL_CLK_IPU1_DI0_SEL>, <&clks IMX6QDL_CLK_IPU1_DI1_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI0_SEL>, <&clks IMX6QDL_CLK_IPU2_DI1_SEL>, +- <&clks IMX6QDL_CLK_LDB_DI0>, <&clks IMX6QDL_CLK_LDB_DI1>; +- clock-names = "di0_pll", "di1_pll", +- "di0_sel", "di1_sel", "di2_sel", "di3_sel", +- "di0", "di1"; +- +- lvds-channel@0 { +- port@2 { +- reg = <2>; +- +- lvds0_mux_2: endpoint { +- remote-endpoint = <&ipu2_di0_lvds0>; +- }; +- }; +- +- port@3 { +- reg = <3>; +- +- lvds0_mux_3: endpoint { +- remote-endpoint = <&ipu2_di1_lvds0>; +- }; +- }; +- }; +- +- lvds-channel@1 { +- port@2 { +- reg = <2>; +- +- lvds1_mux_2: endpoint { +- remote-endpoint = <&ipu2_di0_lvds1>; +- }; +- }; +- +- port@3 { +- reg = <3>; +- +- lvds1_mux_3: endpoint { +- remote-endpoint = <&ipu2_di1_lvds1>; +- }; +- }; +- }; +-}; +- +-&mipi_dsi { +- ports { +- port@2 { +- reg = <2>; +- +- mipi_mux_2: endpoint { +- remote-endpoint = <&ipu2_di0_mipi>; +- }; +- }; +- +- port@3 { +- reg = <3>; +- +- mipi_mux_3: endpoint { +- remote-endpoint = <&ipu2_di1_mipi>; +- }; +- }; +- }; +-}; +- +-&vpu { +- compatible = "fsl,imx6q-vpu", "cnm,coda960"; ++ <&clks IMX6QDL_CLK_LDB_DI0_DIV_3_5>, <&clks IMX6QDL_CLK_LDB_DI1_DIV_3_5>, ++ <&clks IMX6QDL_CLK_LDB_DI0_DIV_7>, <&clks IMX6QDL_CLK_LDB_DI1_DIV_7>, ++ <&clks IMX6QDL_CLK_LDB_DI0_DIV_SEL>, <&clks IMX6QDL_CLK_LDB_DI1_DIV_SEL>; ++ clock-names = "ldb_di0", "ldb_di1", ++ "di0_sel", "di1_sel", ++ "di2_sel", "di3_sel", ++ "ldb_di0_div_3_5", "ldb_di1_div_3_5", ++ "ldb_di0_div_7", "ldb_di1_div_7", ++ "ldb_di0_div_sel", "ldb_di1_div_sel"; + }; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6q-hummingboard.dts linux-xbian-imx6/arch/arm/boot/dts/imx6q-hummingboard.dts +--- linux-4.1.3/arch/arm/boot/dts/imx6q-hummingboard.dts 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6q-hummingboard.dts 2015-07-27 23:13:00.303898027 +0200 +@@ -57,3 +57,7 @@ + fsl,transmit-atten-16ths = <9>; + fsl,receive-eq-mdB = <3000>; + }; ++ ++&sgtl5000 { ++ status = "okay"; ++}; +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6sl.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6sl.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6sl.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6sl.dtsi 2015-07-27 23:13:00.307883804 +0200 +@@ -457,20 +457,21 @@ + anatop-min-bit-val = <4>; + anatop-min-voltage = <800000>; + anatop-max-voltage = <1375000>; ++ anatop-enable-bit = <0>; + }; + +- regulator-3p0@120 { ++ reg_3p0: regulator-3p0@120 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vdd3p0"; +- regulator-min-microvolt = <2800000>; +- regulator-max-microvolt = <3150000>; +- regulator-always-on; ++ regulator-min-microvolt = <2625000>; ++ regulator-max-microvolt = <3400000>; + anatop-reg-offset = <0x120>; + anatop-vol-bit-shift = <8>; + anatop-vol-bit-width = <5>; + anatop-min-bit-val = <0>; + anatop-min-voltage = <2625000>; + anatop-max-voltage = <3400000>; ++ anatop-enable-bit = <0>; + }; + + regulator-2p5@130 { +@@ -485,6 +486,7 @@ + anatop-min-bit-val = <0>; + anatop-min-voltage = <2100000>; + anatop-max-voltage = <2850000>; ++ anatop-enable-bit = <0>; + }; + + reg_arm: regulator-vddcore@140 { +@@ -552,6 +554,7 @@ + reg = <0x020c9000 0x1000>; + interrupts = <0 44 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6SL_CLK_USBPHY1>; ++ phy-3p0-supply = <®_3p0>; + fsl,anatop = <&anatop>; + }; + +@@ -560,6 +563,7 @@ + reg = <0x020ca000 0x1000>; + interrupts = <0 45 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6SL_CLK_USBPHY2>; ++ phy-3p0-supply = <®_3p0>; + fsl,anatop = <&anatop>; + }; + +diff -Nur linux-4.1.3/arch/arm/boot/dts/imx6sx.dtsi linux-xbian-imx6/arch/arm/boot/dts/imx6sx.dtsi +--- linux-4.1.3/arch/arm/boot/dts/imx6sx.dtsi 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/boot/dts/imx6sx.dtsi 2015-07-27 23:13:00.307883804 +0200 +@@ -556,20 +556,21 @@ + anatop-min-bit-val = <4>; + anatop-min-voltage = <800000>; + anatop-max-voltage = <1375000>; ++ anatop-enable-bit = <0>; + }; + +- regulator-3p0@120 { ++ reg_3p0: regulator-3p0@120 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vdd3p0"; +- regulator-min-microvolt = <2800000>; +- regulator-max-microvolt = <3150000>; +- regulator-always-on; ++ regulator-min-microvolt = <2625000>; ++ regulator-max-microvolt = <3400000>; + anatop-reg-offset = <0x120>; + anatop-vol-bit-shift = <8>; + anatop-vol-bit-width = <5>; + anatop-min-bit-val = <0>; + anatop-min-voltage = <2625000>; + anatop-max-voltage = <3400000>; ++ anatop-enable-bit = <0>; + }; + + regulator-2p5@130 { +@@ -584,6 +585,7 @@ + anatop-min-bit-val = <0>; + anatop-min-voltage = <2100000>; + anatop-max-voltage = <2875000>; ++ anatop-enable-bit = <0>; + }; + + reg_arm: regulator-vddcore@140 { +@@ -650,6 +652,7 @@ + reg = <0x020c9000 0x1000>; + interrupts = ; + clocks = <&clks IMX6SX_CLK_USBPHY1>; ++ phy-3p0-supply = <®_3p0>; + fsl,anatop = <&anatop>; + }; + +@@ -658,6 +661,7 @@ + reg = <0x020ca000 0x1000>; + interrupts = ; + clocks = <&clks IMX6SX_CLK_USBPHY2>; ++ phy-3p0-supply = <®_3p0>; + fsl,anatop = <&anatop>; + }; + +diff -Nur linux-4.1.3/arch/arm/include/asm/glue-cache.h linux-xbian-imx6/arch/arm/include/asm/glue-cache.h +--- linux-4.1.3/arch/arm/include/asm/glue-cache.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/include/asm/glue-cache.h 2015-07-27 23:13:00.746319518 +0200 +@@ -102,19 +102,19 @@ + #endif + + #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) +-# ifdef _CACHE ++//# ifdef _CACHE + # define MULTI_CACHE 1 +-# else +-# define _CACHE v6 +-# endif ++//# else ++//# define _CACHE v6 ++//# endif + #endif + + #if defined(CONFIG_CPU_V7) +-# ifdef _CACHE ++//# ifdef _CACHE + # define MULTI_CACHE 1 +-# else +-# define _CACHE v7 +-# endif ++//# else ++//# define _CACHE v7 ++//# endif + #endif + + #if defined(CONFIG_CPU_V7M) +diff -Nur linux-4.1.3/arch/arm/Kconfig linux-xbian-imx6/arch/arm/Kconfig +--- linux-4.1.3/arch/arm/Kconfig 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/Kconfig 2015-07-27 23:13:00.128523741 +0200 +@@ -1688,6 +1688,7 @@ + range 11 64 if ARCH_SHMOBILE_LEGACY + default "12" if SOC_AM33XX + default "9" if SA1111 || ARCH_EFM32 ++ default "14" if ARCH_MXC + default "11" + help + The kernel memory allocator divides physically contiguous memory +diff -Nur linux-4.1.3/arch/arm/mach-imx/busfreq_ddr3.c linux-xbian-imx6/arch/arm/mach-imx/busfreq_ddr3.c +--- linux-4.1.3/arch/arm/mach-imx/busfreq_ddr3.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/arch/arm/mach-imx/busfreq_ddr3.c 2015-07-27 23:13:01.073153409 +0200 +@@ -0,0 +1,514 @@ ++/* ++ * Copyright (C) 2011-2013 Freescale Semiconductor, Inc. All Rights Reserved. ++ */ ++ ++/* ++ * The code contained herein is licensed under the GNU General Public ++ * License. You may obtain a copy of the GNU General Public License ++ * Version 2 or later at the following locations: ++ * ++ * http://www.opensource.org/licenses/gpl-license.html ++ * http://www.gnu.org/copyleft/gpl.html ++ */ ++ ++/*! ++ * @file busfreq_ddr3.c ++ * ++ * @brief iMX6 DDR3 frequency change specific file. ++ * ++ * @ingroup PM ++ */ ++#define DEBUG ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "hardware.h" ++ ++/* DDR settings */ ++static unsigned long (*iram_ddr_settings)[2]; ++static unsigned long (*normal_mmdc_settings)[2]; ++static unsigned long (*iram_iomux_settings)[2]; ++static void __iomem *mmdc_base; ++static void __iomem *iomux_base; ++static void __iomem *ccm_base; ++static void __iomem *l2_base; ++static void __iomem *gic_dist_base; ++static u32 *irqs_used; ++ ++static void *ddr_freq_change_iram_base; ++static int ddr_settings_size; ++static int iomux_settings_size; ++static volatile unsigned int cpus_in_wfe; ++static volatile bool wait_for_ddr_freq_update; ++static int curr_ddr_rate; ++ ++void (*mx6_change_ddr_freq)(u32 freq, void *ddr_settings, ++ bool dll_mode, void *iomux_offsets) = NULL; ++ ++extern unsigned int ddr_med_rate; ++extern unsigned int ddr_normal_rate; ++extern int low_bus_freq_mode; ++extern int audio_bus_freq_mode; ++extern void mx6_ddr3_freq_change(u32 freq, void *ddr_settings, ++ bool dll_mode, void *iomux_offsets); ++extern unsigned long save_ttbr1(void); ++extern void restore_ttbr1(unsigned long ttbr1); ++ ++#ifdef CONFIG_SMP ++extern void __iomem *imx_scu_base; ++static unsigned int online_cpus; ++#endif ++ ++#define MIN_DLL_ON_FREQ 333000000 ++#define MAX_DLL_OFF_FREQ 125000000 ++#define DDR_FREQ_CHANGE_SIZE 0x2000 ++ ++unsigned long ddr3_dll_mx6q[][2] = { ++ {0x0c, 0x0}, ++ {0x10, 0x0}, ++ {0x1C, 0x04088032}, ++ {0x1C, 0x0408803a}, ++ {0x1C, 0x08408030}, ++ {0x1C, 0x08408038}, ++ {0x818, 0x0}, ++}; ++ ++unsigned long ddr3_calibration[][2] = { ++ {0x83c, 0x0}, ++ {0x840, 0x0}, ++ {0x483c, 0x0}, ++ {0x4840, 0x0}, ++ {0x848, 0x0}, ++ {0x4848, 0x0}, ++ {0x850, 0x0}, ++ {0x4850, 0x0}, ++}; ++ ++unsigned long ddr3_dll_mx6dl[][2] = { ++ {0x0c, 0x0}, ++ {0x10, 0x0}, ++ {0x1C, 0x04008032}, ++ {0x1C, 0x0400803a}, ++ {0x1C, 0x07208030}, ++ {0x1C, 0x07208038}, ++ {0x818, 0x0}, ++}; ++ ++unsigned long iomux_offsets_mx6q[][2] = { ++ {0x5A8, 0x0}, ++ {0x5B0, 0x0}, ++ {0x524, 0x0}, ++ {0x51C, 0x0}, ++ {0x518, 0x0}, ++ {0x50C, 0x0}, ++ {0x5B8, 0x0}, ++ {0x5C0, 0x0}, ++}; ++ ++unsigned long iomux_offsets_mx6dl[][2] = { ++ {0x4BC, 0x0}, ++ {0x4C0, 0x0}, ++ {0x4C4, 0x0}, ++ {0x4C8, 0x0}, ++ {0x4CC, 0x0}, ++ {0x4D0, 0x0}, ++ {0x4D4, 0x0}, ++ {0x4D8, 0x0}, ++}; ++ ++unsigned long ddr3_400[][2] = { ++ {0x83c, 0x42490249}, ++ {0x840, 0x02470247}, ++ {0x483c, 0x42570257}, ++ {0x4840, 0x02400240}, ++ {0x848, 0x4039363C}, ++ {0x4848, 0x3A39333F}, ++ {0x850, 0x38414441}, ++ {0x4850, 0x472D4833} ++}; ++ ++int can_change_ddr_freq(void) ++{ ++ return 0; ++} ++ ++/* ++ * each active core apart from the one changing ++ * the DDR frequency will execute this function. ++ * the rest of the cores have to remain in WFE ++ * state until the frequency is changed. ++ */ ++irqreturn_t wait_in_wfe_irq(int irq, void *dev_id) ++{ ++ u32 me = smp_processor_id(); ++ ++ *((char *)(&cpus_in_wfe) + (u8)me) = 0xff; ++ ++ while (wait_for_ddr_freq_update) ++ wfe(); ++ ++ *((char *)(&cpus_in_wfe) + (u8)me) = 0; ++ ++ return IRQ_HANDLED; ++} ++ ++/* change the DDR frequency. */ ++int update_ddr_freq(int ddr_rate) ++{ ++ int i, j; ++ bool dll_off = false; ++ int me = 0; ++ unsigned long ttbr1; ++#ifdef CONFIG_SMP ++ unsigned int reg; ++ int cpu = 0; ++#endif ++ ++ if (!can_change_ddr_freq()) ++ return -1; ++ ++ if (ddr_rate == curr_ddr_rate) ++ return 0; ++ ++ printk(KERN_DEBUG "\nBus freq set to %d start...\n", ddr_rate); ++ ++ if (low_bus_freq_mode || audio_bus_freq_mode) ++ dll_off = true; ++ ++ iram_ddr_settings[0][0] = ddr_settings_size; ++ iram_iomux_settings[0][0] = iomux_settings_size; ++ if (ddr_rate == ddr_med_rate && cpu_is_imx6q() && ++ ddr_med_rate != ddr_normal_rate) { ++ for (i = 0; i < ARRAY_SIZE(ddr3_dll_mx6q); i++) { ++ iram_ddr_settings[i + 1][0] = ++ normal_mmdc_settings[i][0]; ++ iram_ddr_settings[i + 1][1] = ++ normal_mmdc_settings[i][1]; ++ } ++ for (j = 0, i = ARRAY_SIZE(ddr3_dll_mx6q); ++ i < iram_ddr_settings[0][0]; j++, i++) { ++ iram_ddr_settings[i + 1][0] = ++ ddr3_400[j][0]; ++ iram_ddr_settings[i + 1][1] = ++ ddr3_400[j][1]; ++ } ++ } else if (ddr_rate == ddr_normal_rate) { ++ for (i = 0; i < iram_ddr_settings[0][0]; i++) { ++ iram_ddr_settings[i + 1][0] = ++ normal_mmdc_settings[i][0]; ++ iram_ddr_settings[i + 1][1] = ++ normal_mmdc_settings[i][1]; ++ } ++ } ++ ++ /* ensure that all Cores are in WFE. */ ++ local_irq_disable(); ++ ++#ifdef CONFIG_SMP ++ me = smp_processor_id(); ++ ++ /* Make sure all the online cores are active */ ++ while (1) { ++ bool not_exited_busfreq = false; ++ for_each_online_cpu(cpu) { ++ u32 reg = __raw_readl(imx_scu_base + 0x08); ++ if (reg & (0x02 << (cpu * 8))) ++ not_exited_busfreq = true; ++ } ++ if (!not_exited_busfreq) ++ break; ++ } ++ ++ wmb(); ++ wait_for_ddr_freq_update = 1; ++ dsb(); ++ ++ online_cpus = readl_relaxed(imx_scu_base + 0x08); ++ for_each_online_cpu(cpu) { ++ *((char *)(&online_cpus) + (u8)cpu) = 0x02; ++ if (cpu != me) { ++ /* set the interrupt to be pending in the GIC. */ ++ reg = 1 << (irqs_used[cpu] % 32); ++ writel_relaxed(reg, gic_dist_base + GIC_DIST_PENDING_SET ++ + (irqs_used[cpu] / 32) * 4); ++ } ++ } ++ /* Wait for the other active CPUs to idle */ ++ while (1) { ++ u32 reg = readl_relaxed(imx_scu_base + 0x08); ++ reg |= (0x02 << (me * 8)); ++ if (reg == online_cpus) ++ break; ++ } ++#endif ++ ++ /* Ensure iram_tlb_phys_addr is flushed to DDR. */ ++ /*__cpuc_flush_dcache_area(&iram_tlb_phys_addr, sizeof(iram_tlb_phys_addr)); ++ outer_clean_range(virt_to_phys(&iram_tlb_phys_addr), virt_to_phys(&iram_tlb_phys_addr + 1));*/ ++ ++ /* ++ * Flush the TLB, to ensure no TLB maintenance occurs ++ * when DDR is in self-refresh. ++ */ ++ local_flush_tlb_all(); ++ ++ ttbr1 = save_ttbr1(); ++ /* Now we can change the DDR frequency. */ ++ mx6_change_ddr_freq(ddr_rate, iram_ddr_settings, ++ dll_off, iram_iomux_settings); ++ restore_ttbr1(ttbr1); ++ curr_ddr_rate = ddr_rate; ++ ++#ifdef CONFIG_SMP ++ wmb(); ++ /* DDR frequency change is done . */ ++ wait_for_ddr_freq_update = 0; ++ dsb(); ++ ++ /* wake up all the cores. */ ++ sev(); ++#endif ++ ++ local_irq_enable(); ++ ++ printk(KERN_DEBUG "Bus freq set to %d done! cpu=%d\n", ddr_rate, me); ++ ++ return 0; ++} ++ ++int init_mmdc_ddr3_settings(struct platform_device *busfreq_pdev) ++{ ++ struct device *dev = &busfreq_pdev->dev; ++ struct platform_device *ocram_dev; ++ unsigned int iram_paddr; ++ int i, err; ++ u32 cpu; ++ struct device_node *node; ++ struct gen_pool *iram_pool; ++ ++ node = of_find_compatible_node(NULL, NULL, "fsl,imx6q-mmdc-combine"); ++ if (!node) { ++ pr_err("failed to find imx6q-mmdc device tree data!\n"); ++ return -EINVAL; ++ } ++ mmdc_base = of_iomap(node, 0); ++ WARN(!mmdc_base, "unable to map mmdc registers\n"); ++ ++ node = NULL; ++ if (cpu_is_imx6q()) ++ node = of_find_compatible_node(NULL, NULL, "fsl,imx6q-iomuxc"); ++ if (cpu_is_imx6dl()) ++ node = of_find_compatible_node(NULL, NULL, ++ "fsl,imx6dl-iomuxc"); ++ if (!node) { ++ pr_err("failed to find imx6q-iomux device tree data!\n"); ++ return -EINVAL; ++ } ++ iomux_base = of_iomap(node, 0); ++ WARN(!iomux_base, "unable to map iomux registers\n"); ++ ++ node = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ccm"); ++ if (!node) { ++ pr_err("failed to find imx6q-ccm device tree data!\n"); ++ return -EINVAL; ++ } ++ ccm_base = of_iomap(node, 0); ++ WARN(!ccm_base, "unable to map mmdc registers\n"); ++ ++ node = of_find_compatible_node(NULL, NULL, "arm,pl310-cache"); ++ if (!node) { ++ pr_err("failed to find imx6q-pl310-cache device tree data!\n"); ++ return -EINVAL; ++ } ++ l2_base = of_iomap(node, 0); ++ WARN(!ccm_base, "unable to map mmdc registers\n"); ++ ++ node = NULL; ++ node = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic"); ++ if (!node) { ++ pr_err("failed to find imx6q-a9-gic device tree data!\n"); ++ return -EINVAL; ++ } ++ gic_dist_base = of_iomap(node, 0); ++ WARN(!gic_dist_base, "unable to map gic dist registers\n"); ++ ++ if (cpu_is_imx6q()) ++ ddr_settings_size = ARRAY_SIZE(ddr3_dll_mx6q) + ++ ARRAY_SIZE(ddr3_calibration); ++ if (cpu_is_imx6dl()) ++ ddr_settings_size = ARRAY_SIZE(ddr3_dll_mx6dl) + ++ ARRAY_SIZE(ddr3_calibration); ++ ++ normal_mmdc_settings = kmalloc((ddr_settings_size * 8), GFP_KERNEL); ++ if (cpu_is_imx6q()) { ++ memcpy(normal_mmdc_settings, ddr3_dll_mx6q, ++ sizeof(ddr3_dll_mx6q)); ++ memcpy(((char *)normal_mmdc_settings + sizeof(ddr3_dll_mx6q)), ++ ddr3_calibration, sizeof(ddr3_calibration)); ++ } ++ if (cpu_is_imx6dl()) { ++ memcpy(normal_mmdc_settings, ddr3_dll_mx6dl, ++ sizeof(ddr3_dll_mx6dl)); ++ memcpy(((char *)normal_mmdc_settings + sizeof(ddr3_dll_mx6dl)), ++ ddr3_calibration, sizeof(ddr3_calibration)); ++ } ++ /* store the original DDR settings at boot. */ ++ for (i = 0; i < ddr_settings_size; i++) { ++ /* ++ * writes via command mode register cannot be read back. ++ * hence hardcode them in the initial static array. ++ * this may require modification on a per customer basis. ++ */ ++ if (normal_mmdc_settings[i][0] != 0x1C) ++ normal_mmdc_settings[i][1] = ++ readl_relaxed(mmdc_base ++ + normal_mmdc_settings[i][0]); ++ } ++ ++ irqs_used = devm_kzalloc(dev, sizeof(u32) * num_present_cpus(), ++ GFP_KERNEL); ++ ++ for_each_online_cpu(cpu) { ++ int irq; ++ ++ /* ++ * set up a reserved interrupt to get all ++ * the active cores into a WFE state ++ * before changing the DDR frequency. ++ */ ++ irq = platform_get_irq(busfreq_pdev, cpu); ++ err = request_irq(irq, wait_in_wfe_irq, ++ IRQF_PERCPU, "mmdc_1", NULL); ++ if (err) { ++ dev_err(dev, ++ "Busfreq:request_irq failed %d, err = %d\n", ++ irq, err); ++ return err; ++ } ++ err = irq_set_affinity(irq, cpumask_of(cpu)); ++ if (err) { ++ dev_err(dev, ++ "Busfreq: Cannot set irq affinity irq=%d,\n", ++ irq); ++ return err; ++ } ++ irqs_used[cpu] = irq; ++ } ++ ++ node = NULL; ++ node = of_find_compatible_node(NULL, NULL, "mmio-sram"); ++ if (!node) { ++ dev_err(dev, "%s: failed to find ocram node\n", ++ __func__); ++ return -EINVAL; ++ } ++ ++ ocram_dev = of_find_device_by_node(node); ++ if (!ocram_dev) { ++ dev_err(dev, "failed to find ocram device!\n"); ++ return -EINVAL; ++ } ++ ++ iram_pool = dev_get_gen_pool(&ocram_dev->dev); ++ if (!iram_pool) { ++ dev_err(dev, "iram pool unavailable!\n"); ++ return -EINVAL; ++ } ++ ++ iomux_settings_size = ARRAY_SIZE(iomux_offsets_mx6q); ++ iram_iomux_settings = (void*)gen_pool_alloc(iram_pool, ++ (iomux_settings_size * 8) + 8); ++ if (!iram_iomux_settings) { ++ dev_err(dev, "unable to alloc iram for IOMUX settings!\n"); ++ return -ENOMEM; ++ } ++ ++ /* ++ * Allocate extra space to store the number of entries in the ++ * ddr_settings plus 4 extra regsiter information that needs ++ * to be passed to the frequency change code. ++ * sizeof(iram_ddr_settings) = sizeof(ddr_settings) + ++ * entries in ddr_settings + 16. ++ * The last 4 enties store the addresses of the registers: ++ * CCM_BASE_ADDR ++ * MMDC_BASE_ADDR ++ * IOMUX_BASE_ADDR ++ * L2X0_BASE_ADDR ++ */ ++ iram_ddr_settings = (void*)gen_pool_alloc(iram_pool, ++ (ddr_settings_size * 8) + 8 + 32); ++ if (!iram_ddr_settings) { ++ dev_err(dev, "unable to alloc iram for ddr settings!\n"); ++ return -ENOMEM; ++ } ++ i = ddr_settings_size + 1; ++ iram_ddr_settings[i][0] = (unsigned long)mmdc_base; ++ iram_ddr_settings[i+1][0] = (unsigned long)ccm_base; ++ iram_ddr_settings[i+2][0] = (unsigned long)iomux_base; ++ iram_ddr_settings[i+3][0] = (unsigned long)l2_base; ++ ++ if (cpu_is_imx6q()) { ++ /* store the IOMUX settings at boot. */ ++ for (i = 0; i < iomux_settings_size; i++) { ++ iomux_offsets_mx6q[i][1] = ++ readl_relaxed(iomux_base + ++ iomux_offsets_mx6q[i][0]); ++ iram_iomux_settings[i+1][0] = iomux_offsets_mx6q[i][0]; ++ iram_iomux_settings[i+1][1] = iomux_offsets_mx6q[i][1]; ++ } ++ } ++ ++ if (cpu_is_imx6dl()) { ++ for (i = 0; i < iomux_settings_size; i++) { ++ iomux_offsets_mx6dl[i][1] = ++ readl_relaxed(iomux_base + ++ iomux_offsets_mx6dl[i][0]); ++ iram_iomux_settings[i+1][0] = iomux_offsets_mx6dl[i][0]; ++ iram_iomux_settings[i+1][1] = iomux_offsets_mx6dl[i][1]; ++ } ++ } ++ ++ ddr_freq_change_iram_base = (void*)gen_pool_alloc(iram_pool, ++ DDR_FREQ_CHANGE_SIZE); ++ if (!ddr_freq_change_iram_base) { ++ dev_err(dev, "Cannot alloc iram for ddr freq change code!\n"); ++ return -ENOMEM; ++ } ++ ++ iram_paddr = gen_pool_virt_to_phys(iram_pool, ++ (unsigned long)ddr_freq_change_iram_base); ++ /* ++ * Need to remap the area here since we want ++ * the memory region to be executable. ++ */ ++ ddr_freq_change_iram_base = __arm_ioremap(iram_paddr, ++ DDR_FREQ_CHANGE_SIZE, ++ MT_MEMORY_RWX_NONCACHED); ++ mx6_change_ddr_freq = (void *)fncpy(ddr_freq_change_iram_base, ++ &mx6_ddr3_freq_change, DDR_FREQ_CHANGE_SIZE); ++ ++ curr_ddr_rate = ddr_normal_rate; ++ ++ return 0; ++} +diff -Nur linux-4.1.3/arch/arm/mach-imx/busfreq-imx6.c linux-xbian-imx6/arch/arm/mach-imx/busfreq-imx6.c +--- linux-4.1.3/arch/arm/mach-imx/busfreq-imx6.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/arch/arm/mach-imx/busfreq-imx6.c 2015-07-27 23:13:01.073153409 +0200 +@@ -0,0 +1,984 @@ ++/* ++ * Copyright (C) 2011-2013 Freescale Semiconductor, Inc. All Rights Reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++/*! ++ * @file busfreq-imx6.c ++ * ++ * @brief A common API for the Freescale Semiconductor iMX6 Busfreq API ++ * ++ * The APIs are for setting bus frequency to different values based on the ++ * highest freqeuncy requested. ++ * ++ * @ingroup PM ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "hardware.h" ++ ++#define LPAPM_CLK 24000000 ++#define DDR3_AUDIO_CLK 50000000 ++#define LPDDR2_AUDIO_CLK 100000000 ++ ++int vpu352 = 0; ++ ++int high_bus_freq_mode; ++int med_bus_freq_mode; ++int audio_bus_freq_mode; ++int low_bus_freq_mode; ++int ultra_low_bus_freq_mode; ++unsigned int ddr_med_rate; ++unsigned int ddr_normal_rate; ++ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++static int bus_freq_scaling_initialized; ++static struct device *busfreq_dev; ++static int busfreq_suspended; ++static u32 org_arm_rate; ++static int bus_freq_scaling_is_active; ++static int high_bus_count, med_bus_count, audio_bus_count, low_bus_count; ++static unsigned int ddr_low_rate; ++ ++extern int init_mmdc_lpddr2_settings(struct platform_device *dev); ++extern int init_mmdc_ddr3_settings(struct platform_device *dev); ++extern int update_ddr_freq(int ddr_rate); ++extern int update_lpddr2_freq(int ddr_rate); ++ ++DEFINE_MUTEX(bus_freq_mutex); ++static DEFINE_SPINLOCK(freq_lock); ++ ++static struct clk *pll2_400; ++static struct clk *periph_clk; ++static struct clk *periph_pre_clk; ++static struct clk *periph_clk2_sel; ++static struct clk *periph_clk2; ++static struct clk *osc_clk; ++static struct clk *cpu_clk; ++static struct clk *pll3; ++static struct clk *pll2; ++static struct clk *pll2_200; ++static struct clk *pll1_sys; ++static struct clk *periph2_clk; ++static struct clk *ocram_clk; ++static struct clk *ahb_clk; ++static struct clk *pll1_sw_clk; ++static struct clk *periph2_pre_clk; ++static struct clk *periph2_clk2_sel; ++static struct clk *periph2_clk2; ++static struct clk *step_clk; ++static struct clk *axi_sel_clk; ++static struct clk *pll3_pfd1_540m; ++ ++static u32 pll2_org_rate; ++static struct delayed_work low_bus_freq_handler; ++static struct delayed_work bus_freq_daemon; ++ ++static void enter_lpm_imx6sl(void) ++{ ++ unsigned long flags; ++ ++ if (high_bus_freq_mode) { ++ pll2_org_rate = clk_get_rate(pll2); ++ /* Set periph_clk to be sourced from OSC_CLK */ ++ clk_set_parent(periph_clk2_sel, osc_clk); ++ clk_set_parent(periph_clk, periph_clk2); ++ /* Ensure AHB/AXI clks are at 24MHz. */ ++ clk_set_rate(ahb_clk, LPAPM_CLK); ++ clk_set_rate(ocram_clk, LPAPM_CLK); ++ } ++ if (audio_bus_count) { ++ /* Set AHB to 8MHz to lower pwer.*/ ++ clk_set_rate(ahb_clk, LPAPM_CLK / 3); ++ ++ /* Set up DDR to 100MHz. */ ++ spin_lock_irqsave(&freq_lock, flags); ++ update_lpddr2_freq(LPDDR2_AUDIO_CLK); ++ spin_unlock_irqrestore(&freq_lock, flags); ++ ++ /* Fix the clock tree in kernel */ ++ clk_set_rate(pll2, pll2_org_rate); ++ clk_set_parent(periph2_pre_clk, pll2_200); ++ clk_set_parent(periph2_clk, periph2_pre_clk); ++ ++ if (low_bus_freq_mode || ultra_low_bus_freq_mode) { ++ /* ++ * Swtich ARM to run off PLL2_PFD2_400MHz ++ * since DDR is anyway at 100MHz. ++ */ ++ clk_set_parent(step_clk, pll2_400); ++ clk_set_parent(pll1_sw_clk, step_clk); ++ /* ++ * Ensure that the clock will be ++ * at original speed. ++ */ ++ clk_set_rate(cpu_clk, org_arm_rate); ++ } ++ low_bus_freq_mode = 0; ++ ultra_low_bus_freq_mode = 0; ++ audio_bus_freq_mode = 1; ++ } else { ++ u32 arm_div, pll1_rate; ++ org_arm_rate = clk_get_rate(cpu_clk); ++ if (low_bus_freq_mode && low_bus_count == 0) { ++ /* ++ * We are already in DDR @ 24MHz state, but ++ * no one but ARM needs the DDR. In this case, ++ * we can lower the DDR freq to 1MHz when ARM ++ * enters WFI in this state. Keep track of this state. ++ */ ++ ultra_low_bus_freq_mode = 1; ++ low_bus_freq_mode = 0; ++ audio_bus_freq_mode = 0; ++ } else { ++ if (!ultra_low_bus_freq_mode && !low_bus_freq_mode) { ++ /* ++ * Set DDR to 24MHz. ++ * Since we are going to bypass PLL2, ++ * we need to move ARM clk off PLL2_PFD2 ++ * to PLL1. Make sure the PLL1 is running ++ * at the lowest possible freq. ++ */ ++ clk_set_rate(pll1_sys, ++ clk_round_rate(pll1_sys, org_arm_rate)); ++ pll1_rate = clk_get_rate(pll1_sys); ++ arm_div = pll1_rate / org_arm_rate + 1; ++ /* ++ * Ensure ARM CLK is lower before ++ * changing the parent. ++ */ ++ clk_set_rate(cpu_clk, org_arm_rate / arm_div); ++ /* Now set the ARM clk parent to PLL1_SYS. */ ++ clk_set_parent(pll1_sw_clk, pll1_sys); ++ ++ /* ++ * Set STEP_CLK back to OSC to save power and ++ * also to maintain the parent.The WFI iram code ++ * will switch step_clk to osc, but the clock API ++ * is not aware of the change and when a new request ++ * to change the step_clk parent to pll2_pfd2_400M ++ * is requested sometime later, the change is ignored. ++ */ ++ clk_set_parent(step_clk, osc_clk); ++ /* Now set DDR to 24MHz. */ ++ spin_lock_irqsave(&freq_lock, flags); ++ update_lpddr2_freq(LPAPM_CLK); ++ spin_unlock_irqrestore(&freq_lock, flags); ++ ++ /* ++ * Fix the clock tree in kernel. ++ * Make sure PLL2 rate is updated as it gets ++ * bypassed in the DDR freq change code. ++ */ ++ clk_set_rate(pll2, LPAPM_CLK); ++ clk_set_parent(periph2_clk2_sel, pll2); ++ clk_set_parent(periph2_clk, periph2_clk2_sel); ++ ++ } ++ if (low_bus_count == 0) { ++ ultra_low_bus_freq_mode = 1; ++ low_bus_freq_mode = 0; ++ } else { ++ ultra_low_bus_freq_mode = 0; ++ low_bus_freq_mode = 1; ++ } ++ audio_bus_freq_mode = 0; ++ } ++ } ++} ++ ++static void exit_lpm_imx6sl(void) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&freq_lock, flags); ++ /* Change DDR freq in IRAM. */ ++ update_lpddr2_freq(ddr_normal_rate); ++ spin_unlock_irqrestore(&freq_lock, flags); ++ ++ /* ++ * Fix the clock tree in kernel. ++ * Make sure PLL2 rate is updated as it gets ++ * un-bypassed in the DDR freq change code. ++ */ ++ clk_set_rate(pll2, pll2_org_rate); ++ clk_set_parent(periph2_pre_clk, pll2_400); ++ clk_set_parent(periph2_clk, periph2_pre_clk); ++ ++ /* Ensure that periph_clk is sourced from PLL2_400. */ ++ clk_set_parent(periph_pre_clk, pll2_400); ++ /* ++ * Before switching the perhiph_clk, ensure that the ++ * AHB/AXI will not be too fast. ++ */ ++ clk_set_rate(ahb_clk, LPAPM_CLK / 3); ++ clk_set_rate(ocram_clk, LPAPM_CLK / 2); ++ clk_set_parent(periph_clk, periph_pre_clk); ++ ++ if (low_bus_freq_mode || ultra_low_bus_freq_mode) { ++ /* Move ARM from PLL1_SW_CLK to PLL2_400. */ ++ clk_set_parent(step_clk, pll2_400); ++ clk_set_parent(pll1_sw_clk, step_clk); ++ clk_set_rate(cpu_clk, org_arm_rate); ++ ultra_low_bus_freq_mode = 0; ++ } ++} ++ ++int reduce_bus_freq(void) ++{ ++ int ret = 0; ++ clk_prepare_enable(pll3); ++ if (cpu_is_imx6sl()) ++ enter_lpm_imx6sl(); ++ else { ++ if (cpu_is_imx6dl() && (clk_get_parent(axi_sel_clk) ++ != periph_clk)) ++ /* Set axi to periph_clk */ ++ clk_set_parent(axi_sel_clk, periph_clk); ++ ++ if (audio_bus_count) { ++ /* Need to ensure that PLL2_PFD_400M is kept ON. */ ++ clk_prepare_enable(pll2_400); ++ update_ddr_freq(DDR3_AUDIO_CLK); ++ /* Make sure periph clk's parent also got updated */ ++ ret = clk_set_parent(periph_clk2_sel, pll3); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ ret = clk_set_parent(periph_pre_clk, pll2_200); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ ret = clk_set_parent(periph_clk, periph_pre_clk); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ audio_bus_freq_mode = 1; ++ low_bus_freq_mode = 0; ++ } else { ++ update_ddr_freq(LPAPM_CLK); ++ /* Make sure periph clk's parent also got updated */ ++ ret = clk_set_parent(periph_clk2_sel, osc_clk); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ /* Set periph_clk parent to OSC via periph_clk2_sel */ ++ ret = clk_set_parent(periph_clk, periph_clk2); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ if (audio_bus_freq_mode) ++ clk_disable_unprepare(pll2_400); ++ low_bus_freq_mode = 1; ++ audio_bus_freq_mode = 0; ++ } ++ } ++ clk_disable_unprepare(pll3); ++ ++ med_bus_freq_mode = 0; ++ high_bus_freq_mode = 0; ++ ++ if (audio_bus_freq_mode) ++ dev_dbg(busfreq_dev, "Bus freq set to audio mode. Count:\ ++ high %d, med %d, audio %d\n", ++ high_bus_count, med_bus_count, audio_bus_count); ++ if (low_bus_freq_mode) ++ dev_dbg(busfreq_dev, "Bus freq set to low mode. Count:\ ++ high %d, med %d, audio %d\n", ++ high_bus_count, med_bus_count, audio_bus_count); ++ ++ return ret; ++} ++ ++static void reduce_bus_freq_handler(struct work_struct *work) ++{ ++ mutex_lock(&bus_freq_mutex); ++ ++ reduce_bus_freq(); ++ ++ mutex_unlock(&bus_freq_mutex); ++} ++ ++/* ++ * Set the DDR, AHB to 24MHz. ++ * This mode will be activated only when none of the modules that ++ * need a higher DDR or AHB frequency are active. ++ */ ++int set_low_bus_freq(void) ++{ ++ if (busfreq_suspended) ++ return 0; ++ ++ if (!bus_freq_scaling_initialized || !bus_freq_scaling_is_active) ++ return 0; ++ ++ /* ++ * Check to see if we need to got from ++ * low bus freq mode to audio bus freq mode. ++ * If so, the change needs to be done immediately. ++ */ ++ if (audio_bus_count && (low_bus_freq_mode || ultra_low_bus_freq_mode)) ++ reduce_bus_freq(); ++ else ++ /* ++ * Don't lower the frequency immediately. Instead ++ * scheduled a delayed work and drop the freq if ++ * the conditions still remain the same. ++ */ ++ schedule_delayed_work(&low_bus_freq_handler, ++ usecs_to_jiffies(3000000)); ++ return 0; ++} ++ ++/* ++ * Set the DDR to either 528MHz or 400MHz for iMX6qd ++ * or 400MHz for iMX6dl. ++ */ ++int set_high_bus_freq(int high_bus_freq) ++{ ++ int ret = 0; ++ struct clk *periph_clk_parent; ++ ++ if (bus_freq_scaling_initialized && bus_freq_scaling_is_active) ++ cancel_delayed_work_sync(&low_bus_freq_handler); ++ ++ if (busfreq_suspended) ++ return 0; ++ ++ if (cpu_is_imx6q()) ++ periph_clk_parent = pll2; ++ else ++ periph_clk_parent = pll2_400; ++ ++ if (!bus_freq_scaling_initialized || !bus_freq_scaling_is_active) ++ return 0; ++ ++ if (high_bus_freq_mode) ++ return 0; ++ ++ /* medium bus freq is only supported for MX6DQ */ ++ if (med_bus_freq_mode && !high_bus_freq) ++ return 0; ++ ++ clk_prepare_enable(pll3); ++ if (cpu_is_imx6sl()) ++ exit_lpm_imx6sl(); ++ else { ++ if (high_bus_freq) { ++ update_ddr_freq(ddr_normal_rate); ++ /* Make sure periph clk's parent also got updated */ ++ ret = clk_set_parent(periph_clk2_sel, pll3); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ ret = clk_set_parent(periph_pre_clk, periph_clk_parent); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ ret = clk_set_parent(periph_clk, periph_pre_clk); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ if (cpu_is_imx6dl() && (clk_get_parent(axi_sel_clk) ++ != pll3_pfd1_540m)) ++ /* Set axi to pll3_pfd1_540m */ ++ clk_set_parent(axi_sel_clk, pll3_pfd1_540m); ++ } else { ++ update_ddr_freq(ddr_med_rate); ++ /* Make sure periph clk's parent also got updated */ ++ ret = clk_set_parent(periph_clk2_sel, pll3); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ ret = clk_set_parent(periph_pre_clk, pll2_400); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ ret = clk_set_parent(periph_clk, periph_pre_clk); ++ if (ret) ++ dev_WARN(busfreq_dev, ++ "%s: %d: clk set parent fail!\n", ++ __func__, __LINE__); ++ } ++ if (audio_bus_freq_mode) ++ clk_disable_unprepare(pll2_400); ++ } ++ ++ high_bus_freq_mode = 1; ++ med_bus_freq_mode = 0; ++ low_bus_freq_mode = 0; ++ audio_bus_freq_mode = 0; ++ ++ clk_disable_unprepare(pll3); ++ ++ if (high_bus_freq_mode) ++ dev_dbg(busfreq_dev, "Bus freq set to high mode. Count:\ ++ high %d, med %d, audio %d\n", ++ high_bus_count, med_bus_count, audio_bus_count); ++ if (med_bus_freq_mode) ++ dev_dbg(busfreq_dev, "Bus freq set to med mode. Count:\ ++ high %d, med %d, audio %d\n", ++ high_bus_count, med_bus_count, audio_bus_count); ++ ++ return 0; ++} ++#endif ++ ++void request_bus_freq(enum bus_freq_mode mode) ++{ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++ mutex_lock(&bus_freq_mutex); ++ ++ if (mode == BUS_FREQ_HIGH) ++ high_bus_count++; ++ else if (mode == BUS_FREQ_MED) ++ med_bus_count++; ++ else if (mode == BUS_FREQ_AUDIO) ++ audio_bus_count++; ++ else if (mode == BUS_FREQ_LOW) ++ low_bus_count++; ++ ++ if (busfreq_suspended || !bus_freq_scaling_initialized || ++ !bus_freq_scaling_is_active) { ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ cancel_delayed_work_sync(&low_bus_freq_handler); ++ ++ if (cpu_is_imx6dl()) { ++ /* No support for medium setpoint on MX6DL. */ ++ if (mode == BUS_FREQ_MED) { ++ high_bus_count++; ++ mode = BUS_FREQ_HIGH; ++ } ++ } ++ ++ if ((mode == BUS_FREQ_HIGH) && (!high_bus_freq_mode)) { ++ set_high_bus_freq(1); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ ++ if ((mode == BUS_FREQ_MED) && (!high_bus_freq_mode) && ++ (!med_bus_freq_mode)) { ++ set_high_bus_freq(0); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ if ((mode == BUS_FREQ_AUDIO) && (!high_bus_freq_mode) && ++ (!med_bus_freq_mode) && (!audio_bus_freq_mode)) { ++ set_low_bus_freq(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ mutex_unlock(&bus_freq_mutex); ++#endif ++ return; ++} ++EXPORT_SYMBOL(request_bus_freq); ++ ++void release_bus_freq(enum bus_freq_mode mode) ++{ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++ mutex_lock(&bus_freq_mutex); ++ ++ if (mode == BUS_FREQ_HIGH) { ++ if (high_bus_count == 0) { ++ dev_err(busfreq_dev, "high bus count mismatch!\n"); ++ dump_stack(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ high_bus_count--; ++ } else if (mode == BUS_FREQ_MED) { ++ if (med_bus_count == 0) { ++ dev_err(busfreq_dev, "med bus count mismatch!\n"); ++ dump_stack(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ med_bus_count--; ++ } else if (mode == BUS_FREQ_AUDIO) { ++ if (audio_bus_count == 0) { ++ dev_err(busfreq_dev, "audio bus count mismatch!\n"); ++ dump_stack(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ audio_bus_count--; ++ } else if (mode == BUS_FREQ_LOW) { ++ if (low_bus_count == 0) { ++ dev_err(busfreq_dev, "low bus count mismatch!\n"); ++ dump_stack(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ low_bus_count--; ++ } ++ ++ if (busfreq_suspended || !bus_freq_scaling_initialized || ++ !bus_freq_scaling_is_active) { ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ ++ if (cpu_is_imx6dl()) { ++ /* No support for medium setpoint on MX6DL. */ ++ if (mode == BUS_FREQ_MED) { ++ high_bus_count--; ++ mode = BUS_FREQ_HIGH; ++ } ++ } ++ ++ if ((!audio_bus_freq_mode) && (high_bus_count == 0) && ++ (med_bus_count == 0) && (audio_bus_count != 0)) { ++ set_low_bus_freq(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ if ((!low_bus_freq_mode) && (high_bus_count == 0) && ++ (med_bus_count == 0) && (audio_bus_count == 0) && ++ (low_bus_count != 0)) { ++ set_low_bus_freq(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ if ((!ultra_low_bus_freq_mode) && (high_bus_count == 0) && ++ (med_bus_count == 0) && (audio_bus_count == 0) && ++ (low_bus_count == 0)) { ++ set_low_bus_freq(); ++ mutex_unlock(&bus_freq_mutex); ++ return; ++ } ++ ++ mutex_unlock(&bus_freq_mutex); ++#endif ++ return; ++} ++EXPORT_SYMBOL(release_bus_freq); ++ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++static void bus_freq_daemon_handler(struct work_struct *work) ++{ ++ mutex_lock(&bus_freq_mutex); ++ if ((!low_bus_freq_mode) && (!ultra_low_bus_freq_mode) && (high_bus_count == 0) && ++ (med_bus_count == 0) && (audio_bus_count == 0)) ++ set_low_bus_freq(); ++ mutex_unlock(&bus_freq_mutex); ++} ++ ++static ssize_t bus_freq_scaling_enable_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (bus_freq_scaling_is_active) ++ return sprintf(buf, "Bus frequency scaling is enabled\n"); ++ else ++ return sprintf(buf, "Bus frequency scaling is disabled\n"); ++} ++ ++static ssize_t vpu352_enable_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (vpu352) ++ return sprintf(buf, "VPU352M is enabled\n"); ++ else ++ return sprintf(buf, "VPU352M is disabled\n"); ++} ++ ++static int vpu352_setup(char *options) ++{ ++ return kstrtol(options, 0, (long int *)&vpu352); ++} ++ ++static ssize_t bus_freq_scaling_enable_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t size) ++{ ++ if (strncmp(buf, "1", 1) == 0) { ++ bus_freq_scaling_is_active = 1; ++ set_high_bus_freq(1); ++ /* ++ * We set bus freq to highest at the beginning, ++ * so we use this daemon thread to make sure system ++ * can enter low bus mode if ++ * there is no high bus request pending ++ */ ++ schedule_delayed_work(&bus_freq_daemon, ++ usecs_to_jiffies(5000000)); ++ } else if (strncmp(buf, "0", 1) == 0) { ++ if (bus_freq_scaling_is_active) ++ set_high_bus_freq(1); ++ bus_freq_scaling_is_active = 0; ++ } ++ return size; ++} ++ ++static int bus_freq_pm_notify(struct notifier_block *nb, unsigned long event, ++ void *dummy) ++{ ++ mutex_lock(&bus_freq_mutex); ++ ++ if (event == PM_SUSPEND_PREPARE) { ++ high_bus_count++; ++ set_high_bus_freq(1); ++ busfreq_suspended = 1; ++ } else if (event == PM_POST_SUSPEND) { ++ busfreq_suspended = 0; ++ high_bus_count--; ++ schedule_delayed_work(&bus_freq_daemon, ++ usecs_to_jiffies(5000000)); ++ } ++ ++ mutex_unlock(&bus_freq_mutex); ++ ++ return NOTIFY_OK; ++} ++ ++static int busfreq_reboot_notifier_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ /* System is rebooting. Set the system into high_bus_freq_mode. */ ++ request_bus_freq(BUS_FREQ_HIGH); ++ ++ return 0; ++} ++ ++static struct notifier_block imx_bus_freq_pm_notifier = { ++ .notifier_call = bus_freq_pm_notify, ++}; ++ ++static struct notifier_block imx_busfreq_reboot_notifier = { ++ .notifier_call = busfreq_reboot_notifier_event, ++}; ++ ++ ++static DEVICE_ATTR(enable, 0644, bus_freq_scaling_enable_show, ++ bus_freq_scaling_enable_store); ++static DEVICE_ATTR(vpu352, 0444, vpu352_enable_show, ++ NULL); ++#endif ++ ++/*! ++ * This is the probe routine for the bus frequency driver. ++ * ++ * @param pdev The platform device structure ++ * ++ * @return The function returns 0 on success ++ * ++ */ ++ ++static int busfreq_probe(struct platform_device *pdev) ++{ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++ u32 err; ++ ++ busfreq_dev = &pdev->dev; ++ ++ pll2_400 = devm_clk_get(&pdev->dev, "pll2_pfd2_396m"); ++ if (IS_ERR(pll2_400)) { ++ dev_err(busfreq_dev, "%s: failed to get pll2_pfd2_396m\n", ++ __func__); ++ return PTR_ERR(pll2_400); ++ } ++ ++ pll2_200 = devm_clk_get(&pdev->dev, "pll2_198m"); ++ if (IS_ERR(pll2_200)) { ++ dev_err(busfreq_dev, "%s: failed to get pll2_198m\n", ++ __func__); ++ return PTR_ERR(pll2_200); ++ } ++ ++ pll2 = devm_clk_get(&pdev->dev, "pll2_bus"); ++ if (IS_ERR(pll2)) { ++ dev_err(busfreq_dev, "%s: failed to get pll2_bus\n", ++ __func__); ++ return PTR_ERR(pll2); ++ } ++ ++ cpu_clk = devm_clk_get(&pdev->dev, "arm"); ++ if (IS_ERR(cpu_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get cpu_clk\n", ++ __func__); ++ return PTR_ERR(cpu_clk); ++ } ++ ++ pll3 = devm_clk_get(&pdev->dev, "pll3_usb_otg"); ++ if (IS_ERR(pll3)) { ++ dev_err(busfreq_dev, "%s: failed to get pll3_usb_otg\n", ++ __func__); ++ return PTR_ERR(pll3); ++ } ++ ++ periph_clk = devm_clk_get(&pdev->dev, "periph"); ++ if (IS_ERR(periph_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get periph\n", ++ __func__); ++ return PTR_ERR(periph_clk); ++ } ++ ++ periph_pre_clk = devm_clk_get(&pdev->dev, "periph_pre"); ++ if (IS_ERR(periph_pre_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get periph_pre\n", ++ __func__); ++ return PTR_ERR(periph_pre_clk); ++ } ++ ++ periph_clk2 = devm_clk_get(&pdev->dev, "periph_clk2"); ++ if (IS_ERR(periph_clk2)) { ++ dev_err(busfreq_dev, "%s: failed to get periph_clk2\n", ++ __func__); ++ return PTR_ERR(periph_clk2); ++ } ++ ++ periph_clk2_sel = devm_clk_get(&pdev->dev, "periph_clk2_sel"); ++ if (IS_ERR(periph_clk2_sel)) { ++ dev_err(busfreq_dev, "%s: failed to get periph_clk2_sel\n", ++ __func__); ++ return PTR_ERR(periph_clk2_sel); ++ } ++ ++ osc_clk = devm_clk_get(&pdev->dev, "osc"); ++ if (IS_ERR(osc_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get osc_clk\n", ++ __func__); ++ return PTR_ERR(osc_clk); ++ } ++ ++ if (cpu_is_imx6dl()) { ++ axi_sel_clk = devm_clk_get(&pdev->dev, "axi_sel"); ++ if (IS_ERR(axi_sel_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get axi_sel_clk\n", ++ __func__); ++ return PTR_ERR(axi_sel_clk); ++ } ++ ++ pll3_pfd1_540m = devm_clk_get(&pdev->dev, "pll3_pfd1_540m"); ++ if (IS_ERR(pll3_pfd1_540m)) { ++ dev_err(busfreq_dev, ++ "%s: failed to get pll3_pfd1_540m\n", __func__); ++ return PTR_ERR(pll3_pfd1_540m); ++ } ++ } ++ ++ if (cpu_is_imx6sl()) { ++ pll1_sys = devm_clk_get(&pdev->dev, "pll1_sys"); ++ if (IS_ERR(pll1_sys)) { ++ dev_err(busfreq_dev, "%s: failed to get pll1_sys\n", ++ __func__); ++ return PTR_ERR(pll1_sys); ++ } ++ ++ ahb_clk = devm_clk_get(&pdev->dev, "ahb"); ++ if (IS_ERR(ahb_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get ahb_clk\n", ++ __func__); ++ return PTR_ERR(ahb_clk); ++ } ++ ++ ocram_clk = devm_clk_get(&pdev->dev, "ocram"); ++ if (IS_ERR(ocram_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get ocram_clk\n", ++ __func__); ++ return PTR_ERR(ocram_clk); ++ } ++ ++ pll1_sw_clk = devm_clk_get(&pdev->dev, "pll1_sw"); ++ if (IS_ERR(pll1_sw_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get pll1_sw_clk\n", ++ __func__); ++ return PTR_ERR(pll1_sw_clk); ++ } ++ ++ periph2_clk = devm_clk_get(&pdev->dev, "periph2"); ++ if (IS_ERR(periph2_clk)) { ++ dev_err(busfreq_dev, "%s: failed to get periph2\n", ++ __func__); ++ return PTR_ERR(periph2_clk); ++ } ++ ++ periph2_pre_clk = devm_clk_get(&pdev->dev, "periph2_pre"); ++ if (IS_ERR(periph2_pre_clk)) { ++ dev_err(busfreq_dev, ++ "%s: failed to get periph2_pre_clk\n", ++ __func__); ++ return PTR_ERR(periph2_pre_clk); ++ } ++ ++ periph2_clk2 = devm_clk_get(&pdev->dev, "periph2_clk2"); ++ if (IS_ERR(periph2_clk2)) { ++ dev_err(busfreq_dev, ++ "%s: failed to get periph2_clk2\n", ++ __func__); ++ return PTR_ERR(periph2_clk2); ++ } ++ ++ periph2_clk2_sel = devm_clk_get(&pdev->dev, "periph2_clk2_sel"); ++ if (IS_ERR(periph2_clk2_sel)) { ++ dev_err(busfreq_dev, ++ "%s: failed to get periph2_clk2_sel\n", ++ __func__); ++ return PTR_ERR(periph2_clk2_sel); ++ } ++ ++ step_clk = devm_clk_get(&pdev->dev, "step"); ++ if (IS_ERR(step_clk)) { ++ dev_err(busfreq_dev, ++ "%s: failed to get step_clk\n", ++ __func__); ++ return PTR_ERR(periph2_clk2_sel); ++ } ++ ++ } ++ ++ err = sysfs_create_file(&busfreq_dev->kobj, &dev_attr_enable.attr); ++ if (err) { ++ dev_err(busfreq_dev, ++ "Unable to register sysdev entry for BUSFREQ"); ++ return err; ++ } ++ err = sysfs_create_file(&busfreq_dev->kobj, &dev_attr_vpu352.attr); ++ if (err) { ++ dev_err(busfreq_dev, ++ "Unable to register sysdev entry for BUSFREQ"); ++ return err; ++ } ++ ++ if (of_property_read_u32(pdev->dev.of_node, "fsl,max_ddr_freq", ++ &ddr_normal_rate)) { ++ dev_err(busfreq_dev, "max_ddr_freq entry missing\n"); ++ return -EINVAL; ++ } ++#endif ++ ++ high_bus_freq_mode = 1; ++ med_bus_freq_mode = 0; ++ low_bus_freq_mode = 0; ++ audio_bus_freq_mode = 0; ++ ultra_low_bus_freq_mode = 0; ++ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++ bus_freq_scaling_is_active = 1; ++ bus_freq_scaling_initialized = 1; ++ ++ ddr_low_rate = LPAPM_CLK; ++ if (cpu_is_imx6q()) { ++ if (of_property_read_u32(pdev->dev.of_node, "fsl,med_ddr_freq", ++ &ddr_med_rate)) { ++ dev_info(busfreq_dev, ++ "DDR medium rate not supported.\n"); ++ ddr_med_rate = ddr_normal_rate; ++ } ++ } ++ ++ INIT_DELAYED_WORK(&low_bus_freq_handler, reduce_bus_freq_handler); ++ INIT_DELAYED_WORK(&bus_freq_daemon, bus_freq_daemon_handler); ++ register_pm_notifier(&imx_bus_freq_pm_notifier); ++ register_reboot_notifier(&imx_busfreq_reboot_notifier); ++ ++ if (cpu_is_imx6sl()) ++ err = init_mmdc_lpddr2_settings(pdev); ++ else ++ err = init_mmdc_ddr3_settings(pdev); ++ if (err) { ++ dev_err(busfreq_dev, "Busfreq init of MMDC failed\n"); ++ return err; ++ } ++#endif ++ return 0; ++} ++ ++static const struct of_device_id imx6_busfreq_ids[] = { ++ { .compatible = "fsl,imx6_busfreq", }, ++ { /* sentinel */ } ++}; ++ ++static struct platform_driver busfreq_driver = { ++ .driver = { ++ .name = "imx6_busfreq", ++ .owner = THIS_MODULE, ++ .of_match_table = imx6_busfreq_ids, ++ }, ++ .probe = busfreq_probe, ++}; ++ ++/*! ++ * Initialise the busfreq_driver. ++ * ++ * @return The function always returns 0. ++ */ ++ ++static int __init busfreq_init(void) ++{ ++ if (vpu352) { ++ printk(KERN_INFO "VPU@352Mhz activated. Bus freq driver module not loading\n"); ++ return 0; ++ } ++ ++ if (platform_driver_register(&busfreq_driver) != 0) ++ return -ENODEV; ++ ++ printk(KERN_INFO "Bus freq driver module loaded\n"); ++ ++ return 0; ++} ++ ++static void __exit busfreq_cleanup(void) ++{ ++#ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++ sysfs_remove_file(&busfreq_dev->kobj, &dev_attr_enable.attr); ++ ++ bus_freq_scaling_initialized = 0; ++#endif ++ /* Unregister the device structure */ ++ platform_driver_unregister(&busfreq_driver); ++} ++ ++__setup("vpu352=", vpu352_setup); ++module_init(busfreq_init); ++module_exit(busfreq_cleanup); ++ ++MODULE_AUTHOR("Freescale Semiconductor, Inc."); ++MODULE_DESCRIPTION("BusFreq driver"); ++MODULE_LICENSE("GPL"); +diff -Nur linux-4.1.3/arch/arm/mach-imx/busfreq_lpddr2.c linux-xbian-imx6/arch/arm/mach-imx/busfreq_lpddr2.c +--- linux-4.1.3/arch/arm/mach-imx/busfreq_lpddr2.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/arch/arm/mach-imx/busfreq_lpddr2.c 2015-07-27 23:13:01.073153409 +0200 +@@ -0,0 +1,183 @@ ++/* ++ * Copyright (C) 2011-2013 Freescale Semiconductor, Inc. All Rights Reserved. ++ */ ++ ++/* ++ * The code contained herein is licensed under the GNU General Public ++ * License. You may obtain a copy of the GNU General Public License ++ * Version 2 or later at the following locations: ++ * ++ * http://www.opensource.org/licenses/gpl-license.html ++ * http://www.gnu.org/copyleft/gpl.html ++ */ ++ ++/*! ++ * @file busfreq_lpddr2.c ++ * ++ * @brief iMX6 LPDDR2 frequency change specific file. ++ * ++ * @ingroup PM ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "hardware.h" ++ ++/* DDR settings */ ++static void __iomem *mmdc_base; ++static void __iomem *anatop_base; ++static void __iomem *ccm_base; ++static void __iomem *l2_base; ++static struct device *busfreq_dev; ++static void *ddr_freq_change_iram_base; ++static int curr_ddr_rate; ++ ++unsigned long reg_addrs[4]; ++ ++void (*mx6_change_lpddr2_freq)(u32 ddr_freq, int bus_freq_mode, ++ void *iram_addr) = NULL; ++ ++extern unsigned int ddr_normal_rate; ++extern int low_bus_freq_mode; ++extern int ultra_low_bus_freq_mode; ++extern void mx6_lpddr2_freq_change(u32 freq, int bus_freq_mode, ++ void *iram_addr); ++ ++ ++#define LPDDR2_FREQ_CHANGE_SIZE 0x1000 ++ ++ ++/* change the DDR frequency. */ ++int update_lpddr2_freq(int ddr_rate) ++{ ++ if (ddr_rate == curr_ddr_rate) ++ return 0; ++ ++ dev_dbg(busfreq_dev, "\nBus freq set to %d start...\n", ddr_rate); ++ ++ /* ++ * Flush the TLB, to ensure no TLB maintenance occurs ++ * when DDR is in self-refresh. ++ */ ++ local_flush_tlb_all(); ++ /* Now change DDR frequency. */ ++ mx6_change_lpddr2_freq(ddr_rate, ++ (low_bus_freq_mode | ultra_low_bus_freq_mode), ++ reg_addrs); ++ ++ curr_ddr_rate = ddr_rate; ++ ++ dev_dbg(busfreq_dev, "\nBus freq set to %d done...\n", ddr_rate); ++ ++ return 0; ++} ++ ++int init_mmdc_lpddr2_settings(struct platform_device *busfreq_pdev) ++{ ++ struct platform_device *ocram_dev; ++ unsigned int iram_paddr; ++ struct device_node *node; ++ struct gen_pool *iram_pool; ++ ++ busfreq_dev = &busfreq_pdev->dev; ++ node = of_find_compatible_node(NULL, NULL, "fsl,imx6sl-mmdc"); ++ if (!node) { ++ printk(KERN_ERR "failed to find imx6sl-mmdc device tree data!\n"); ++ return -EINVAL; ++ } ++ mmdc_base = of_iomap(node, 0); ++ WARN(!mmdc_base, "unable to map mmdc registers\n"); ++ ++ node = NULL; ++ node = of_find_compatible_node(NULL, NULL, "fsl,imx6sl-ccm"); ++ if (!node) { ++ printk(KERN_ERR "failed to find imx6sl-ccm device tree data!\n"); ++ return -EINVAL; ++ } ++ ccm_base = of_iomap(node, 0); ++ WARN(!ccm_base, "unable to map ccm registers\n"); ++ ++ node = of_find_compatible_node(NULL, NULL, "arm,pl310-cache"); ++ if (!node) { ++ printk(KERN_ERR "failed to find imx6sl-pl310-cache device tree data!\n"); ++ return -EINVAL; ++ } ++ l2_base = of_iomap(node, 0); ++ WARN(!l2_base, "unable to map PL310 registers\n"); ++ ++ node = of_find_compatible_node(NULL, NULL, "fsl,imx6sl-anatop"); ++ if (!node) { ++ printk(KERN_ERR "failed to find imx6sl-pl310-cache device tree data!\n"); ++ return -EINVAL; ++ } ++ anatop_base = of_iomap(node, 0); ++ WARN(!anatop_base, "unable to map anatop registers\n"); ++ ++ node = NULL; ++ node = of_find_compatible_node(NULL, NULL, "mmio-sram"); ++ if (!node) { ++ dev_err(busfreq_dev, "%s: failed to find ocram node\n", ++ __func__); ++ return -EINVAL; ++ } ++ ++ ocram_dev = of_find_device_by_node(node); ++ if (!ocram_dev) { ++ dev_err(busfreq_dev, "failed to find ocram device!\n"); ++ return -EINVAL; ++ } ++ ++ iram_pool = dev_get_gen_pool(&ocram_dev->dev); ++ if (!iram_pool) { ++ dev_err(busfreq_dev, "iram pool unavailable!\n"); ++ return -EINVAL; ++ } ++ ++ reg_addrs[0] = (unsigned long)anatop_base; ++ reg_addrs[1] = (unsigned long)ccm_base; ++ reg_addrs[2] = (unsigned long)mmdc_base; ++ reg_addrs[3] = (unsigned long)l2_base; ++ ++ ddr_freq_change_iram_base = (void *)gen_pool_alloc(iram_pool, ++ LPDDR2_FREQ_CHANGE_SIZE); ++ if (!ddr_freq_change_iram_base) { ++ dev_err(busfreq_dev, ++ "Cannot alloc iram for ddr freq change code!\n"); ++ return -ENOMEM; ++ } ++ ++ iram_paddr = gen_pool_virt_to_phys(iram_pool, ++ (unsigned long)ddr_freq_change_iram_base); ++ /* ++ * Need to remap the area here since we want ++ * the memory region to be executable. ++ */ ++ ddr_freq_change_iram_base = __arm_ioremap(iram_paddr, ++ LPDDR2_FREQ_CHANGE_SIZE, ++ MT_MEMORY_RWX_NONCACHED); ++ mx6_change_lpddr2_freq = (void *)fncpy(ddr_freq_change_iram_base, ++ &mx6_lpddr2_freq_change, LPDDR2_FREQ_CHANGE_SIZE); ++ ++ curr_ddr_rate = ddr_normal_rate; ++ ++ return 0; ++} +diff -Nur linux-4.1.3/arch/arm/mach-imx/clk.h linux-xbian-imx6/arch/arm/mach-imx/clk.h +--- linux-4.1.3/arch/arm/mach-imx/clk.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/clk.h 2015-07-27 23:13:01.085110746 +0200 +@@ -55,6 +55,34 @@ + shift, 0, &imx_ccm_lock, share_count); + } + ++static inline void imx_clk_prepare_enable(struct clk *clk) ++{ ++ int ret = clk_prepare_enable(clk); ++ ++ if (ret) ++ pr_err("failed to prepare and enable clk %s: %d\n", ++ __clk_get_name(clk), ret); ++} ++ ++static inline int imx_clk_set_parent(struct clk *clk, struct clk *parent) ++{ ++ int ret = clk_set_parent(clk, parent); ++ ++ if (ret) ++ pr_err("failed to set parent of clk %s to %s: %d\n", ++ __clk_get_name(clk), __clk_get_name(parent), ret); ++ return ret; ++} ++ ++static inline void imx_clk_set_rate(struct clk *clk, unsigned long rate) ++{ ++ int ret = clk_set_rate(clk, rate); ++ ++ if (ret) ++ pr_err("failed to set rate of clk %s to %ld: %d\n", ++ __clk_get_name(clk), rate, ret); ++} ++ + struct clk *imx_clk_pfd(const char *name, const char *parent_name, + void __iomem *reg, u8 idx); + +diff -Nur linux-4.1.3/arch/arm/mach-imx/clk-imx6q.c linux-xbian-imx6/arch/arm/mach-imx/clk-imx6q.c +--- linux-4.1.3/arch/arm/mach-imx/clk-imx6q.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/clk-imx6q.c 2015-07-27 23:13:01.081124967 +0200 +@@ -24,7 +24,6 @@ + #include "clk.h" + #include "common.h" + #include "hardware.h" +- + static const char *step_sels[] = { "osc", "pll2_pfd2_396m", }; + static const char *pll1_sw_sels[] = { "pll1_sys", "step", }; + static const char *periph_pre_sels[] = { "pll2_bus", "pll2_pfd2_396m", "pll2_pfd0_352m", "pll2_198m", }; +@@ -41,6 +40,8 @@ + static const char *ipu_sels[] = { "mmdc_ch0_axi", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", }; + static const char *ldb_di_sels[] = { "pll5_video_div", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_usb_otg", }; + static const char *ipu_di_pre_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll5_video_div", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll3_pfd1_540m", }; ++static const char *ldb_di0_div_sels[] = { "ldb_di0_div_3_5", "ldb_di0_div_7", }; ++static const char *ldb_di1_div_sels[] = { "ldb_di1_div_3_5", "ldb_di1_div_7", }; + static const char *ipu1_di0_sels[] = { "ipu1_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; + static const char *ipu1_di1_sels[] = { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; + static const char *ipu2_di0_sels[] = { "ipu2_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; +@@ -119,8 +120,118 @@ + static unsigned int share_count_ssi1; + static unsigned int share_count_ssi2; + static unsigned int share_count_ssi3; ++static unsigned int share_count_spdif; + static unsigned int share_count_mipi_core_cfg; + ++static void __iomem *ccm_base; ++ ++static void init_ldb_clks(struct clk *new_parent) ++{ ++ u32 reg; ++ ++ /* ++ * Need to follow a strict procedure when changing the LDB ++ * clock, else we can introduce a glitch. Things to keep in ++ * mind: ++ * 1. The current and new parent clocks must be disabled. ++ * 2. The default clock for ldb_dio_clk is mmdc_ch1 which has ++ * no CG bit. ++ * 3. In the RTL implementation of the LDB_DI_CLK_SEL mux ++ * the top four options are in one mux and the PLL3 option along ++ * with another option is in the second mux. There is third mux ++ * used to decide between the first and second mux. ++ * The code below switches the parent to the bottom mux first ++ * and then manipulates the top mux. This ensures that no glitch ++ * will enter the divider. ++ * ++ * Need to disable MMDC_CH1 clock manually as there is no CG bit ++ * for this clock. The only way to disable this clock is to move ++ * it topll3_sw_clk and then to disable pll3_sw_clk ++ * Make sure periph2_clk2_sel is set to pll3_sw_clk ++ */ ++ reg = readl_relaxed(ccm_base + 0x18); ++ reg &= ~(1 << 20); ++ writel_relaxed(reg, ccm_base + 0x18); ++ ++ /* ++ * Set MMDC_CH1 mask bit. ++ */ ++ reg = readl_relaxed(ccm_base + 0x4); ++ reg |= 1 << 16; ++ writel_relaxed(reg, ccm_base + 0x4); ++ ++ /* ++ * Set the periph2_clk_sel to the top mux so that ++ * mmdc_ch1 is from pll3_sw_clk. ++ */ ++ reg = readl_relaxed(ccm_base + 0x14); ++ reg |= 1 << 26; ++ writel_relaxed(reg, ccm_base + 0x14); ++ ++ /* ++ * Wait for the clock switch. ++ */ ++ while (readl_relaxed(ccm_base + 0x48)) ++ ; ++ ++ /* ++ * Disable pll3_sw_clk by selecting the bypass clock source. ++ */ ++ reg = readl_relaxed(ccm_base + 0xc); ++ reg |= 1 << 0; ++ writel_relaxed(reg, ccm_base + 0xc); ++ ++ /* ++ * Set the ldb_di0_clk and ldb_di1_clk to 111b. ++ */ ++ reg = readl_relaxed(ccm_base + 0x2c); ++ reg |= ((7 << 9) | (7 << 12)); ++ writel_relaxed(reg, ccm_base + 0x2c); ++ ++ /* ++ * Set the ldb_di0_clk and ldb_di1_clk to 100b. ++ */ ++ reg = readl_relaxed(ccm_base + 0x2c); ++ reg &= ~((7 << 9) | (7 << 12)); ++ reg |= ((4 << 9) | (4 << 12)); ++ writel_relaxed(reg, ccm_base + 0x2c); ++ ++ /* ++ * Perform the LDB parent clock switch. ++ */ ++ imx_clk_set_parent(clk[IMX6QDL_CLK_LDB_DI0_SEL], new_parent); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_LDB_DI1_SEL], new_parent); ++ ++ /* ++ * Unbypass pll3_sw_clk. ++ */ ++ reg = readl_relaxed(ccm_base + 0xc); ++ reg &= ~(1 << 0); ++ writel_relaxed(reg, ccm_base + 0xc); ++ ++ /* ++ * Set the periph2_clk_sel back to the bottom mux so that ++ * mmdc_ch1 is from its original parent. ++ */ ++ reg = readl_relaxed(ccm_base + 0x14); ++ reg &= ~(1 << 26); ++ writel_relaxed(reg, ccm_base + 0x14); ++ ++ /* ++ * Wait for the clock switch. ++ */ ++ while (readl_relaxed(ccm_base + 0x48)) ++ ; ++ ++ /* ++ * Clear MMDC_CH1 mask bit. ++ */ ++ reg = readl_relaxed(ccm_base + 0x4); ++ reg &= ~(1 << 16); ++ writel_relaxed(reg, ccm_base + 0x4); ++ ++} ++ + static void __init imx6q_clocks_init(struct device_node *ccm_node) + { + struct device_node *np; +@@ -174,13 +285,13 @@ + clk[IMX6QDL_PLL7_BYPASS] = imx_clk_mux_flags("pll7_bypass", base + 0x20, 16, 1, pll7_bypass_sels, ARRAY_SIZE(pll7_bypass_sels), CLK_SET_RATE_PARENT); + + /* Do not bypass PLLs initially */ +- clk_set_parent(clk[IMX6QDL_PLL1_BYPASS], clk[IMX6QDL_CLK_PLL1]); +- clk_set_parent(clk[IMX6QDL_PLL2_BYPASS], clk[IMX6QDL_CLK_PLL2]); +- clk_set_parent(clk[IMX6QDL_PLL3_BYPASS], clk[IMX6QDL_CLK_PLL3]); +- clk_set_parent(clk[IMX6QDL_PLL4_BYPASS], clk[IMX6QDL_CLK_PLL4]); +- clk_set_parent(clk[IMX6QDL_PLL5_BYPASS], clk[IMX6QDL_CLK_PLL5]); +- clk_set_parent(clk[IMX6QDL_PLL6_BYPASS], clk[IMX6QDL_CLK_PLL6]); +- clk_set_parent(clk[IMX6QDL_PLL7_BYPASS], clk[IMX6QDL_CLK_PLL7]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL1_BYPASS], clk[IMX6QDL_CLK_PLL1]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL2_BYPASS], clk[IMX6QDL_CLK_PLL2]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL3_BYPASS], clk[IMX6QDL_CLK_PLL3]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL4_BYPASS], clk[IMX6QDL_CLK_PLL4]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL5_BYPASS], clk[IMX6QDL_CLK_PLL5]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL6_BYPASS], clk[IMX6QDL_CLK_PLL6]); ++ imx_clk_set_parent(clk[IMX6QDL_PLL7_BYPASS], clk[IMX6QDL_CLK_PLL7]); + + clk[IMX6QDL_CLK_PLL1_SYS] = imx_clk_gate("pll1_sys", "pll1_bypass", base + 0x00, 13); + clk[IMX6QDL_CLK_PLL2_BUS] = imx_clk_gate("pll2_bus", "pll2_bypass", base + 0x30, 13); +@@ -259,7 +370,7 @@ + clk[IMX6QDL_CLK_PLL5_VIDEO_DIV] = clk_register_divider_table(NULL, "pll5_video_div", "pll5_post_div", CLK_SET_RATE_PARENT, base + 0x170, 30, 2, 0, video_div_table, &imx_ccm_lock); + + np = ccm_node; +- base = of_iomap(np, 0); ++ ccm_base = base = of_iomap(np, 0); + WARN_ON(!base); + + imx6q_pm_set_ccm_base(base); +@@ -286,6 +397,8 @@ + clk[IMX6QDL_CLK_IPU2_SEL] = imx_clk_mux("ipu2_sel", base + 0x3c, 14, 2, ipu_sels, ARRAY_SIZE(ipu_sels)); + clk[IMX6QDL_CLK_LDB_DI0_SEL] = imx_clk_mux_flags("ldb_di0_sel", base + 0x2c, 9, 3, ldb_di_sels, ARRAY_SIZE(ldb_di_sels), CLK_SET_RATE_PARENT); + clk[IMX6QDL_CLK_LDB_DI1_SEL] = imx_clk_mux_flags("ldb_di1_sel", base + 0x2c, 12, 3, ldb_di_sels, ARRAY_SIZE(ldb_di_sels), CLK_SET_RATE_PARENT); ++ clk[IMX6QDL_CLK_LDB_DI0_DIV_SEL] = imx_clk_mux_flags("ldb_di0_div_sel", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels), CLK_SET_RATE_PARENT); ++ clk[IMX6QDL_CLK_LDB_DI1_DIV_SEL] = imx_clk_mux_flags("ldb_di1_div_sel", base + 0x20, 11, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels), CLK_SET_RATE_PARENT); + clk[IMX6QDL_CLK_IPU1_DI0_PRE_SEL] = imx_clk_mux_flags("ipu1_di0_pre_sel", base + 0x34, 6, 3, ipu_di_pre_sels, ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT); + clk[IMX6QDL_CLK_IPU1_DI1_PRE_SEL] = imx_clk_mux_flags("ipu1_di1_pre_sel", base + 0x34, 15, 3, ipu_di_pre_sels, ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT); + clk[IMX6QDL_CLK_IPU2_DI0_PRE_SEL] = imx_clk_mux_flags("ipu2_di0_pre_sel", base + 0x38, 6, 3, ipu_di_pre_sels, ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT); +@@ -335,9 +448,9 @@ + clk[IMX6QDL_CLK_IPU1_PODF] = imx_clk_divider("ipu1_podf", "ipu1_sel", base + 0x3c, 11, 3); + clk[IMX6QDL_CLK_IPU2_PODF] = imx_clk_divider("ipu2_podf", "ipu2_sel", base + 0x3c, 16, 3); + clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7); +- clk[IMX6QDL_CLK_LDB_DI0_PODF] = imx_clk_divider_flags("ldb_di0_podf", "ldb_di0_div_3_5", base + 0x20, 10, 1, 0); ++ clk[IMX6QDL_CLK_LDB_DI0_DIV_7] = imx_clk_fixed_factor("ldb_di0_div_7", "ldb_di0_sel", 1, 7); + clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7); +- clk[IMX6QDL_CLK_LDB_DI1_PODF] = imx_clk_divider_flags("ldb_di1_podf", "ldb_di1_div_3_5", base + 0x20, 11, 1, 0); ++ clk[IMX6QDL_CLK_LDB_DI1_DIV_7] = imx_clk_fixed_factor("ldb_di1_div_7", "ldb_di1_sel", 1, 7); + clk[IMX6QDL_CLK_IPU1_DI0_PRE] = imx_clk_divider("ipu1_di0_pre", "ipu1_di0_pre_sel", base + 0x34, 3, 3); + clk[IMX6QDL_CLK_IPU1_DI1_PRE] = imx_clk_divider("ipu1_di1_pre", "ipu1_di1_pre_sel", base + 0x34, 12, 3); + clk[IMX6QDL_CLK_IPU2_DI0_PRE] = imx_clk_divider("ipu2_di0_pre", "ipu2_di0_pre_sel", base + 0x38, 3, 3); +@@ -378,6 +491,8 @@ + clk[IMX6QDL_CLK_CAN1_SERIAL] = imx_clk_gate2("can1_serial", "can_root", base + 0x68, 16); + clk[IMX6QDL_CLK_CAN2_IPG] = imx_clk_gate2("can2_ipg", "ipg", base + 0x68, 18); + clk[IMX6QDL_CLK_CAN2_SERIAL] = imx_clk_gate2("can2_serial", "can_root", base + 0x68, 20); ++ clk[IMX6QDL_CLK_DCIC1] = imx_clk_gate2("dcic1", "ipu1_podf", base + 0x68, 24); ++ clk[IMX6QDL_CLK_DCIC2] = imx_clk_gate2("dcic2", "ipu2_podf", base + 0x68, 26); + clk[IMX6QDL_CLK_ECSPI1] = imx_clk_gate2("ecspi1", "ecspi_root", base + 0x6c, 0); + clk[IMX6QDL_CLK_ECSPI2] = imx_clk_gate2("ecspi2", "ecspi_root", base + 0x6c, 2); + clk[IMX6QDL_CLK_ECSPI3] = imx_clk_gate2("ecspi3", "ecspi_root", base + 0x6c, 4); +@@ -414,9 +529,9 @@ + clk[IMX6QDL_CLK_IPU1_DI1] = imx_clk_gate2("ipu1_di1", "ipu1_di1_sel", base + 0x74, 4); + clk[IMX6QDL_CLK_IPU2] = imx_clk_gate2("ipu2", "ipu2_podf", base + 0x74, 6); + clk[IMX6QDL_CLK_IPU2_DI0] = imx_clk_gate2("ipu2_di0", "ipu2_di0_sel", base + 0x74, 8); +- clk[IMX6QDL_CLK_LDB_DI0] = imx_clk_gate2("ldb_di0", "ldb_di0_podf", base + 0x74, 12); +- clk[IMX6QDL_CLK_LDB_DI1] = imx_clk_gate2("ldb_di1", "ldb_di1_podf", base + 0x74, 14); + clk[IMX6QDL_CLK_IPU2_DI1] = imx_clk_gate2("ipu2_di1", "ipu2_di1_sel", base + 0x74, 10); ++ clk[IMX6QDL_CLK_LDB_DI0] = imx_clk_gate2("ldb_di0", "ldb_di0_div_sel", base + 0x74, 12); ++ clk[IMX6QDL_CLK_LDB_DI1] = imx_clk_gate2("ldb_di1", "ldb_di1_div_sel", base + 0x74, 14); + clk[IMX6QDL_CLK_HSI_TX] = imx_clk_gate2_shared("hsi_tx", "hsi_tx_podf", base + 0x74, 16, &share_count_mipi_core_cfg); + clk[IMX6QDL_CLK_MIPI_CORE_CFG] = imx_clk_gate2_shared("mipi_core_cfg", "video_27m", base + 0x74, 16, &share_count_mipi_core_cfg); + clk[IMX6QDL_CLK_MIPI_IPG] = imx_clk_gate2_shared("mipi_ipg", "ipg", base + 0x74, 16, &share_count_mipi_core_cfg); +@@ -446,7 +561,8 @@ + clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ahb", base + 0x7c, 4); + clk[IMX6QDL_CLK_SDMA] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6); + clk[IMX6QDL_CLK_SPBA] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12); +- clk[IMX6QDL_CLK_SPDIF] = imx_clk_gate2("spdif", "spdif_podf", base + 0x7c, 14); ++ clk[IMX6QDL_CLK_SPDIF] = imx_clk_gate2_shared("spdif", "spdif_podf", base + 0x7c, 14, &share_count_spdif); ++ clk[IMX6QDL_CLK_SPDIF_GCLK] = imx_clk_gate2_shared("spdif_gclk", "ipg", base + 0x7c, 14, &share_count_spdif); + clk[IMX6QDL_CLK_SSI1_IPG] = imx_clk_gate2_shared("ssi1_ipg", "ipg", base + 0x7c, 18, &share_count_ssi1); + clk[IMX6QDL_CLK_SSI2_IPG] = imx_clk_gate2_shared("ssi2_ipg", "ipg", base + 0x7c, 20, &share_count_ssi2); + clk[IMX6QDL_CLK_SSI3_IPG] = imx_clk_gate2_shared("ssi3_ipg", "ipg", base + 0x7c, 22, &share_count_ssi3); +@@ -479,54 +595,104 @@ + clk_data.clk_num = ARRAY_SIZE(clk); + of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data); + ++ clk_register_clkdev(clk[IMX6QDL_CLK_GPT_3M], "gpt_3m", "imx-gpt.0"); + clk_register_clkdev(clk[IMX6QDL_CLK_ENET_REF], "enet_ref", NULL); + + if ((imx_get_soc_revision() != IMX_CHIP_REVISION_1_0) || + cpu_is_imx6dl()) { +- clk_set_parent(clk[IMX6QDL_CLK_LDB_DI0_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); +- clk_set_parent(clk[IMX6QDL_CLK_LDB_DI1_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_LDB_DI0_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_LDB_DI1_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); + } + +- clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI0_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI1_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI0_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI1_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI0_SEL], clk[IMX6QDL_CLK_IPU1_DI0_PRE]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI1_SEL], clk[IMX6QDL_CLK_IPU1_DI1_PRE]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI0_SEL], clk[IMX6QDL_CLK_IPU2_DI0_PRE]); +- clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI1_SEL], clk[IMX6QDL_CLK_IPU2_DI1_PRE]); ++ init_ldb_clks(clk[IMX6QDL_CLK_PLL2_PFD0_352M]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI0_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI1_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI0_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI1_PRE_SEL], clk[IMX6QDL_CLK_PLL5_VIDEO_DIV]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI0_SEL], clk[IMX6QDL_CLK_IPU1_DI0_PRE]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU1_DI1_SEL], clk[IMX6QDL_CLK_IPU1_DI1_PRE]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI0_SEL], clk[IMX6QDL_CLK_IPU2_DI0_PRE]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU2_DI1_SEL], clk[IMX6QDL_CLK_IPU2_DI1_PRE]); ++ ++ if (cpu_is_imx6dl()) ++ imx_clk_set_parent(clk[IMX6QDL_CLK_IPU1_SEL], clk[IMX6QDL_CLK_PLL3_PFD1_540M]); + + /* + * The gpmi needs 100MHz frequency in the EDO/Sync mode, + * We can not get the 100MHz from the pll2_pfd0_352m. + * So choose pll2_pfd2_396m as enfc_sel's parent. + */ +- clk_set_parent(clk[IMX6QDL_CLK_ENFC_SEL], clk[IMX6QDL_CLK_PLL2_PFD2_396M]); +- +- for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) +- clk_prepare_enable(clk[clks_init_on[i]]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_ENFC_SEL], clk[IMX6QDL_CLK_PLL2_PFD2_396M]); + ++ /* gpu clock initilazation */ ++ /* ++ * On mx6dl, 2d core clock sources(sel, podf) is from 3d ++ * shader core clock, but 3d shader clock multiplexer of ++ * mx6dl is different. For instance the equivalent of ++ * pll2_pfd_594M on mx6q is pll2_pfd_528M on mx6dl. ++ * Make a note here. ++ */ ++#if 0 ++ imx_clk_set_parent(clk[IMX6QDL_CLK_GPU3D_SHADER_SEL], clk[IMX6QDL_CLK_PLL2_PFD1_594M]); ++ if (cpu_is_imx6dl()) { ++ imx_clk_set_rate(clk[IMX6QDL_CLK_GPU3D_SHADER], 528000000); ++ /* for mx6dl, change gpu3d_core parent to 594_PFD*/ ++ imx_clk_set_parent(clk[IMX6QDL_CLK_GPU3D_CORE_SEL], clk[IMX6QDL_CLK_PLL2_PFD1_594M]); ++ imx_clk_set_rate(clk[IMX6QDL_CLK_GPU3D_CORE], 528000000); ++ /* for mx6dl, change gpu2d_core parent to 594_PFD*/ ++ imx_clk_set_parent(clk[IMX6QDL_CLK_GPU2D_CORE_SEL], clk[IMX6QDL_CLK_PLL2_PFD1_594M]); ++ imx_clk_set_rate(clk[IMX6QDL_CLK_GPU2D_CORE], 528000000); ++ } else if (cpu_is_imx6q()) { ++ imx_clk_set_rate(clk[IMX6QDL_CLK_GPU3D_SHADER], 594000000); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_GPU3D_CORE_SEL], clk[IMX6QDL_CLK_MMDC_CH0_AXI]); ++ imx_clk_set_rate(clk[IMX6QDL_CLK_GPU3D_CORE], 528000000); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_GPU2D_CORE_SEL], clk[IMX6QDL_CLK_PLL3_USB_OTG]); ++ imx_clk_set_rate(clk[IMX6QDL_CLK_GPU2D_CORE], 480000000); ++ } ++#endif + if (IS_ENABLED(CONFIG_USB_MXS_PHY)) { +- clk_prepare_enable(clk[IMX6QDL_CLK_USBPHY1_GATE]); +- clk_prepare_enable(clk[IMX6QDL_CLK_USBPHY2_GATE]); ++ imx_clk_prepare_enable(clk[IMX6QDL_CLK_USBPHY1_GATE]); ++ imx_clk_prepare_enable(clk[IMX6QDL_CLK_USBPHY2_GATE]); + } + + /* + * Let's initially set up CLKO with OSC24M, since this configuration + * is widely used by imx6q board designs to clock audio codec. + */ +- ret = clk_set_parent(clk[IMX6QDL_CLK_CKO2_SEL], clk[IMX6QDL_CLK_OSC]); ++ ret = imx_clk_set_parent(clk[IMX6QDL_CLK_CKO2_SEL], clk[IMX6QDL_CLK_OSC]); + if (!ret) +- ret = clk_set_parent(clk[IMX6QDL_CLK_CKO], clk[IMX6QDL_CLK_CKO2]); ++ ret = imx_clk_set_parent(clk[IMX6QDL_CLK_CKO], clk[IMX6QDL_CLK_CKO2]); + if (ret) + pr_warn("failed to set up CLKO: %d\n", ret); + + /* Audio-related clocks configuration */ +- clk_set_parent(clk[IMX6QDL_CLK_SPDIF_SEL], clk[IMX6QDL_CLK_PLL3_PFD3_454M]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_SPDIF_SEL], clk[IMX6QDL_CLK_PLL3_PFD3_454M]); + + /* All existing boards with PCIe use LVDS1 */ + if (IS_ENABLED(CONFIG_PCI_IMX6)) +- clk_set_parent(clk[IMX6QDL_CLK_LVDS1_SEL], clk[IMX6QDL_CLK_SATA_REF_100M]); ++ imx_clk_set_parent(clk[IMX6QDL_CLK_LVDS1_SEL], clk[IMX6QDL_CLK_SATA_REF_100M]); ++ ++ /* ++ * Enable clocks only after both parent and rate are all initialized ++ * as needed ++ */ ++ for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) ++ imx_clk_prepare_enable(clk[clks_init_on[i]]); ++ ++ /* ++ * If VPU 352M is enabled, then PLL2_PDF2 need to be ++ * set to 352M, cpufreq will be disabled as VDDSOC/PU ++ * need to be at highest voltage, scaling cpu freq is ++ * not saving any power, and busfreq will be also disabled ++ * as the PLL2_PFD2 is not at default freq, in a word, ++ * all modules that sourceing clk from PLL2_PFD2 will ++ * be impacted. ++ */ ++ if (vpu352) { ++ clk_set_rate(clk[IMX6QDL_CLK_PLL2_PFD2_396M], 352000000); ++ clk_set_parent(clk[IMX6QDL_CLK_VPU_AXI_SEL], clk[IMX6QDL_CLK_PLL2_PFD2_396M]); ++ pr_info("VPU 352M is enabled!\n"); ++ } + + /* Set initial power mode */ + imx6q_set_lpm(WAIT_CLOCKED); +diff -Nur linux-4.1.3/arch/arm/mach-imx/clk-pllv3.c linux-xbian-imx6/arch/arm/mach-imx/clk-pllv3.c +--- linux-4.1.3/arch/arm/mach-imx/clk-pllv3.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/clk-pllv3.c 2015-07-27 23:13:01.085110746 +0200 +@@ -23,6 +23,7 @@ + #define PLL_DENOM_OFFSET 0x20 + + #define BM_PLL_POWER (0x1 << 12) ++#define BM_PLL_BYPASS (0x1 << 16) + #define BM_PLL_LOCK (0x1 << 31) + + /** +@@ -237,9 +238,10 @@ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + unsigned long min_rate = parent_rate * 27; + unsigned long max_rate = parent_rate * 54; +- u32 val, div; ++ u32 val, newval, div; + u32 mfn, mfd = 1000000; + s64 temp64; ++ int ret; + + if (rate < min_rate || rate > max_rate) + return -EINVAL; +@@ -251,13 +253,27 @@ + mfn = temp64; + + val = readl_relaxed(pll->base); +- val &= ~pll->div_mask; +- val |= div; +- writel_relaxed(val, pll->base); ++ ++ /* set the PLL into bypass mode */ ++ newval = val | BM_PLL_BYPASS; ++ writel_relaxed(newval, pll->base); ++ ++ /* configure the new frequency */ ++ newval &= ~pll->div_mask; ++ newval |= div; ++ writel_relaxed(newval, pll->base); + writel_relaxed(mfn, pll->base + PLL_NUM_OFFSET); + writel_relaxed(mfd, pll->base + PLL_DENOM_OFFSET); + +- return clk_pllv3_wait_lock(pll); ++ ret = clk_pllv3_wait_lock(pll); ++ if (ret == 0 && val & BM_PLL_POWER) { ++ /* only if it locked can we switch back to the PLL */ ++ newval &= ~BM_PLL_BYPASS; ++ newval |= val & BM_PLL_BYPASS; ++ writel_relaxed(newval, pll->base); ++ } ++ ++ return ret; + } + + static const struct clk_ops clk_pllv3_av_ops = { +diff -Nur linux-4.1.3/arch/arm/mach-imx/common.h linux-xbian-imx6/arch/arm/mach-imx/common.h +--- linux-4.1.3/arch/arm/mach-imx/common.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/common.h 2015-07-27 23:13:01.085110746 +0200 +@@ -56,6 +56,7 @@ + void mxc_set_cpu_type(unsigned int type); + void mxc_restart(enum reboot_mode, const char *); + void mxc_arch_reset_init(void __iomem *); ++void mxc_arch_reset_init_dt(void); + int mx51_revision(void); + int mx53_revision(void); + void imx_set_aips(void __iomem *); +@@ -86,6 +87,8 @@ + MX3_SLEEP, + }; + ++extern int vpu352; ++ + void mx3_cpu_lp_set(enum mx3_cpu_pwr_mode mode); + void imx_print_silicon_rev(const char *cpu, int srev); + +@@ -102,6 +105,7 @@ + static inline void imx_smp_prepare(void) {} + #endif + void imx_src_init(void); ++ + void imx_gpc_pre_suspend(bool arm_power_off); + void imx_gpc_post_resume(void); + void imx_gpc_mask_all(void); +diff -Nur linux-4.1.3/arch/arm/mach-imx/ddr3_freq_imx6.S linux-xbian-imx6/arch/arm/mach-imx/ddr3_freq_imx6.S +--- linux-4.1.3/arch/arm/mach-imx/ddr3_freq_imx6.S 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/arch/arm/mach-imx/ddr3_freq_imx6.S 2015-07-27 23:13:01.089096525 +0200 +@@ -0,0 +1,893 @@ ++/* ++ * Copyright (C) 2011-2013 Freescale Semiconductor, Inc. All Rights Reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include ++ ++#define MMDC0_MDPDC 0x4 ++#define MMDC0_MDCF0 0x0c ++#define MMDC0_MDCF1 0x10 ++#define MMDC0_MDMISC 0x18 ++#define MMDC0_MDSCR 0x1c ++#define MMDC0_MAPSR 0x404 ++#define MMDC0_MADPCR0 0x410 ++#define MMDC0_MPZQHWCTRL 0x800 ++#define MMDC1_MPZQHWCTRL 0x4800 ++#define MMDC0_MPODTCTRL 0x818 ++#define MMDC1_MPODTCTRL 0x4818 ++#define MMDC0_MPDGCTRL0 0x83c ++#define MMDC1_MPDGCTRL0 0x483c ++#define MMDC0_MPMUR0 0x8b8 ++#define MMDC1_MPMUR0 0x48b8 ++ ++#define CCM_CBCDR 0x14 ++#define CCM_CBCMR 0x18 ++#define CCM_CSCMR1 0x1c ++#define CCM_CDHIPR 0x48 ++ ++#define L2_CACHE_SYNC 0x730 ++ ++ .align 3 ++ ++ .macro switch_to_528MHz ++ ++ /* check if periph_clk_sel is already set */ ++ ldr r0, [r6, #CCM_CBCDR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ beq set_ahb_podf_before_switch ++ ++ /* change periph_clk to be sourced from pll3_clk. */ ++ ldr r0, [r6, #CCM_CBCMR] ++ bic r0, r0, #(3 << 12) ++ str r0, [r6, #CCM_CBCMR] ++ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(0x38 << 20) ++ str r0, [r6, #CCM_CBCDR] ++ ++ /* ++ * set the AHB dividers before the switch, ++ * don't change AXI clock divider, ++ * set the MMDC_DIV=1, AXI_DIV = 2, AHB_DIV=4, ++ */ ++ ldr r0, [r6, #CCM_CBCDR] ++ ldr r2, =0x3f1f00 ++ bic r0, r0, r2 ++ orr r0, r0, #0xd00 ++ orr r0, r0, #(1 << 16) ++ str r0, [r6, #CCM_CBCDR] ++ ++wait_div_update528: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne wait_div_update528 ++ ++ /* now switch periph_clk to pll3_main_clk. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ orr r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch3: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch3 ++ ++ b switch_pre_periph_clk_528 ++ ++set_ahb_podf_before_switch: ++ /* ++ * set the MMDC_DIV=1, AXI_DIV = 2, AHB_DIV=4, ++ */ ++ ldr r0, [r6, #CCM_CBCDR] ++ ldr r2, =0x3f1f00 ++ bic r0, r0, r2 ++ orr r0, r0, #0xd00 ++ orr r0, r0, #(1 << 16) ++ str r0, [r6, #CCM_CBCDR] ++ ++wait_div_update528_1: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne wait_div_update528_1 ++ ++switch_pre_periph_clk_528: ++ ++ /* now switch pre_periph_clk to PLL2_528MHz. */ ++ ldr r0, [r6, #CCM_CBCMR] ++ bic r0, r0, #(0xc << 16) ++ str r0, [r6, #CCM_CBCMR] ++ ++ /* now switch periph_clk back. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch4: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch4 ++ ++ .endm ++ ++ .macro switch_to_400MHz ++ ++ /* check if periph_clk_sel is already set. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ beq set_ahb_podf_before_switch1 ++ ++ /* change periph_clk to be sourced from pll3_clk. */ ++ ldr r0, [r6, #CCM_CBCMR] ++ bic r0, r0, #(3 << 12) ++ str r0, [r6, #CCM_CBCMR] ++ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(0x38 << 24) ++ str r0, [r6, #CCM_CBCDR] ++ ++ /* now switch periph_clk to pll3_main_clk. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ orr r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch5: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch5 ++ ++ b switch_pre_periph_clk_400 ++ ++set_ahb_podf_before_switch1: ++ /* ++ * set the MMDC_DIV=1, AXI_DIV = 2, AHB_DIV=4, ++ */ ++ ldr r0, [r6, #CCM_CBCDR] ++ ldr r2, =0x3f1f00 ++ bic r0, r0, r2 ++ orr r0, r0, #(0x9 << 8) ++ orr r0, r0, #(1 << 16) ++ str r0, [r6, #CCM_CBCDR] ++ ++wait_div_update400_1: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne wait_div_update400_1 ++ ++switch_pre_periph_clk_400: ++ ++ /* now switch pre_periph_clk to PFD_400MHz. */ ++ ldr r0, [r6, #CCM_CBCMR] ++ bic r0, r0, #(0xc << 16) ++ orr r0, r0, #(0x4 << 16) ++ str r0, [r6, #CCM_CBCMR] ++ ++ /* now switch periph_clk back. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch6: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch6 ++ ++ /* ++ * change AHB divider so that we are at 400/3=133MHz. ++ * don't change AXI clock divider. ++ * set the MMDC_DIV=1, AXI_DIV=2, AHB_DIV=3, ++ */ ++ ldr r0, [r6, #CCM_CBCDR] ++ ldr r2, =0x3f1f00 ++ bic r0, r0, r2 ++ orr r0, r0, #(0x9 << 8) ++ orr r0, r0, #(1 << 16) ++ str r0, [r6, #CCM_CBCDR] ++ ++wait_div_update400_2: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne wait_div_update400_2 ++ ++ .endm ++ ++ .macro switch_to_50MHz ++ ++ /* check if periph_clk_sel is already set. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ beq switch_pre_periph_clk_50 ++ ++ /* ++ * set the periph_clk to be sourced from PLL2_PFD_200M ++ * change periph_clk to be sourced from pll3_clk. ++ * ensure PLL3 is the source and set the divider to 1. ++ */ ++ ldr r0, [r6, #CCM_CBCMR] ++ bic r0, r0, #(0x3 << 12) ++ str r0, [r6, #CCM_CBCMR] ++ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(0x38 << 24) ++ str r0, [r6, #CCM_CBCDR] ++ ++ /* now switch periph_clk to pll3_main_clk. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ orr r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch_50: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch_50 ++ ++switch_pre_periph_clk_50: ++ ++ /* now switch pre_periph_clk to PFD_200MHz. */ ++ ldr r0, [r6, #CCM_CBCMR] ++ orr r0, r0, #(0xc << 16) ++ str r0, [r6, #CCM_CBCMR] ++ ++ /* ++ * set the MMDC_DIV=4, AXI_DIV = 4, AHB_DIV=8, ++ */ ++ ldr r0, [r6, #CCM_CBCDR] ++ ldr r2, =0x3f1f00 ++ bic r0, r0, r2 ++ orr r0, r0, #(0x18 << 16) ++ orr r0, r0, #(0x3 << 16) ++ ++ /* ++ * if changing AHB divider remember to change ++ * the IPGPER divider too below. ++ */ ++ orr r0, r0, #0x1d00 ++ str r0, [r6, #CCM_CBCDR] ++ ++wait_div_update_50: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne wait_div_update_50 ++ ++ /* now switch periph_clk back. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch2: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch2 ++ ++ .endm ++ ++ .macro switch_to_24MHz ++ /* ++ * change the freq now try setting DDR to 24MHz. ++ * source it from the periph_clk2 ensure the ++ * periph_clk2 is sourced from 24MHz and the ++ * divider is 1. ++ */ ++ ++ ldr r0, [r6, #CCM_CBCMR] ++ bic r0, r0, #(0x3 << 12) ++ orr r0, r0, #(1 << 12) ++ str r0, [r6, #CCM_CBCMR] ++ ++ ldr r0, [r6, #CCM_CBCDR] ++ bic r0, r0, #(0x38 << 24) ++ str r0, [r6, #CCM_CBCDR] ++ ++ /* now switch periph_clk to 24MHz. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ orr r0, r0, #(1 << 25) ++ str r0, [r6, #CCM_CBCDR] ++ ++periph_clk_switch1: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne periph_clk_switch1 ++ ++ /* change all the dividers to 1. */ ++ ldr r0, [r6, #CCM_CBCDR] ++ ldr r2, =0x3f1f00 ++ bic r0, r0, r2 ++ orr r0, r0, #(1 << 8) ++ str r0, [r6, #CCM_CBCDR] ++ ++ /* Wait for the divider to change. */ ++wait_div_update: ++ ldr r0, [r6, #CCM_CDHIPR] ++ cmp r0, #0 ++ bne wait_div_update ++ ++ .endm ++ ++/* ++ * mx6_ddr3_freq_change ++ * ++ * idle the processor (eg, wait for interrupt). ++ * make sure DDR is in self-refresh. ++ * IRQs are already disabled. ++ */ ++ENTRY(mx6_ddr3_freq_change) ++ ++ stmfd sp!, {r4-r12} ++ ++ /* ++ * r5 -> mmdc_base ++ * r6 -> ccm_base ++ * r7 -> iomux_base ++ * r12 -> l2_base ++ */ ++ mov r4, r0 ++ mov r8, r1 ++ mov r9, r2 ++ mov r11, r3 ++ ++ /* ++ * Get the addresses of the registers. ++ * They are last few entries in the ++ * ddr_settings parameter. ++ * The first entry contains the count, ++ * and each entry is 2 words. ++ */ ++ ldr r0, [r1] ++ add r0, r0, #1 ++ lsl r0, r0, #3 ++ add r1, r0, r1 ++ /* mmdc_base. */ ++ ldr r5, [r1] ++ add r1, #8 ++ /* ccm_base */ ++ ldr r6, [r1] ++ add r1, #8 ++ /*iomux_base */ ++ ldr r7, [r1] ++ add r1, #8 ++ /*l2_base */ ++ ldr r12, [r1] ++ ++ddr_freq_change: ++ /* ++ * make sure no TLB miss will occur when ++ * the DDR is in self refresh. invalidate ++ * TLB single entry to ensure that the ++ * address is not already in the TLB. ++ */ ++ ++ adr r10, ddr_freq_change ++ ++ ldr r2, [r6] ++ ldr r2, [r5] ++ ldr r2, [r7] ++ ldr r2, [r8] ++ ldr r2, [r10] ++ ldr r2, [r11] ++ ldr r2, [r12] ++ ++#ifdef CONFIG_CACHE_L2X0 ++ /* ++ * Make sure the L2 buffers are drained. ++ * Sync operation on L2 drains the buffers. ++ */ ++ mov r1, #0x0 ++ str r1, [r12, #L2_CACHE_SYNC] ++#endif ++ ++ /* disable automatic power saving. */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ orr r0, r0, #0x01 ++ str r0, [r5, #MMDC0_MAPSR] ++ ++ /* disable MMDC power down timer. */ ++ ldr r0, [r5, #MMDC0_MDPDC] ++ bic r0, r0, #(0xff << 8) ++ str r0, [r5, #MMDC0_MDPDC] ++ ++ /* delay for a while */ ++ ldr r1, =4 ++delay1: ++ ldr r2, =0 ++cont1: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont1 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay1 ++ ++ /* set CON_REG */ ++ ldr r0, =0x8000 ++ str r0, [r5, #MMDC0_MDSCR] ++poll_conreq_set_1: ++ ldr r0, [r5, #MMDC0_MDSCR] ++ and r0, r0, #(0x4 << 12) ++ cmp r0, #(0x4 << 12) ++ bne poll_conreq_set_1 ++ ++ ldr r0, =0x00008050 ++ str r0, [r5, #MMDC0_MDSCR] ++ ldr r0, =0x00008058 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ /* ++ * if requested frequency is greater than ++ * 300MHz go to DLL on mode. ++ */ ++ ldr r1, =300000000 ++ cmp r4, r1 ++ bge dll_on_mode ++ ++dll_off_mode: ++ ++ /* if DLL is currently on, turn it off. */ ++ cmp r9, #1 ++ beq continue_dll_off_1 ++ ++ ldr r0, =0x00018031 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x00018039 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r1, =10 ++delay1a: ++ ldr r2, =0 ++cont1a: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont1a ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay1a ++ ++continue_dll_off_1: ++ /* set DVFS - enter self refresh mode */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ orr r0, r0, #(1 << 21) ++ str r0, [r5, #MMDC0_MAPSR] ++ ++ /* de-assert con_req */ ++ mov r0, #0x0 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++poll_dvfs_set_1: ++ ldr r0, [r5, #MMDC0_MAPSR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ bne poll_dvfs_set_1 ++ ++ ldr r1, =24000000 ++ cmp r4, r1 ++ beq switch_freq_24 ++ ++ switch_to_50MHz ++ b continue_dll_off_2 ++ ++switch_freq_24: ++ switch_to_24MHz ++ ++continue_dll_off_2: ++ ++ /* set SBS - block ddr accesses */ ++ ldr r0, [r5, #MMDC0_MADPCR0] ++ orr r0, r0, #(1 << 8) ++ str r0, [r5, #MMDC0_MADPCR0] ++ ++ /* clear DVFS - exit from self refresh mode */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ bic r0, r0, #(1 << 21) ++ str r0, [r5, #MMDC0_MAPSR] ++ ++poll_dvfs_clear_1: ++ ldr r0, [r5, #MMDC0_MAPSR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ beq poll_dvfs_clear_1 ++ ++ /* if DLL was previously on, continue DLL off routine. */ ++ cmp r9, #1 ++ beq continue_dll_off_3 ++ ++ ldr r0, =0x00018031 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x00018039 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x08208030 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x08208038 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x00088032 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x0008803A ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ /* delay for a while. */ ++ ldr r1, =4 ++delay_1: ++ ldr r2, =0 ++cont_1: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont_1 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay_1 ++ ++ ldr r0, [r5, #MMDC0_MDCF0] ++ bic r0, r0, #0xf ++ orr r0, r0, #0x3 ++ str r0, [r5, #MMDC0_MDCF0] ++ ++ ldr r0, [r5, #MMDC0_MDCF1] ++ bic r0, r0, #0x7 ++ orr r0, r0, #0x4 ++ str r0, [r5, #MMDC0_MDCF1] ++ ++ ldr r0, =0x00011680 ++ str r0, [r5, #MMDC0_MDMISC] ++ ++ /* enable dqs pull down in the IOMUX. */ ++ ldr r1, [r11] ++ add r11, r11, #8 ++ ldr r2, =0x3028 ++update_iomux: ++ ldr r0, [r11, #0x0] ++ ldr r3, [r7, r0] ++ bic r3, r3, r2 ++ orr r3, r3, #(0x3 << 12) ++ orr r3, r3, #0x28 ++ str r3, [r7, r0] ++ add r11, r11, #8 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt update_iomux ++ ++ /* ODT disabled. */ ++ ldr r0, =0x0 ++ ldr r2, =MMDC0_MPODTCTRL ++ str r0, [r5, r2] ++ ldr r2, =MMDC1_MPODTCTRL ++ str r0, [r5, r2] ++ ++ /* DQS gating disabled. */ ++ ldr r2, =MMDC0_MPDGCTRL0 ++ ldr r0, [r5, r2] ++ orr r0, r0, #(1 << 29) ++ str r0, [r5, r2] ++ ++ ldr r2, =MMDC1_MPDGCTRL0 ++ ldr r0, [r5, r2] ++ orr r0, r0, #(0x1 << 29) ++ str r0, [r5, r2] ++ ++ /* MMDC0_MAPSR adopt power down enable. */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ bic r0, r0, #0x01 ++ str r0, [r5, #MMDC0_MAPSR] ++ ++ /* frc_msr + mu bypass */ ++ ldr r0, =0x00000060 ++ str r0, [r5, #MMDC0_MPMUR0] ++ ldr r2, =MMDC1_MPMUR0 ++ str r0, [r5, r2] ++ ldr r0, =0x00000460 ++ str r0, [r5, #MMDC0_MPMUR0] ++ ldr r2, =MMDC1_MPMUR0 ++ str r0, [r5, r2] ++ ldr r0, =0x00000c60 ++ str r0, [r5, #MMDC0_MPMUR0] ++ ldr r2, =MMDC1_MPMUR0 ++ str r0, [r5, r2] ++ ++continue_dll_off_3: ++ /* clear SBS - unblock accesses to DDR. */ ++ ldr r0, [r5, #MMDC0_MADPCR0] ++ bic r0, r0, #(0x1 << 8) ++ str r0, [r5, #MMDC0_MADPCR0] ++ ++ mov r0, #0x0 ++ str r0, [r5, #MMDC0_MDSCR] ++poll_conreq_clear_1: ++ ldr r0, [r5, #MMDC0_MDSCR] ++ and r0, r0, #(0x4 << 12) ++ cmp r0, #(0x4 << 12) ++ beq poll_conreq_clear_1 ++ ++ b done ++ ++dll_on_mode: ++ /* assert DVFS - enter self refresh mode. */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ orr r0, r0, #(1 << 21) ++ str r0, [r5, #MMDC0_MAPSR] ++ ++ /* de-assert CON_REQ. */ ++ mov r0, #0x0 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ /* poll DVFS ack. */ ++poll_dvfs_set_2: ++ ldr r0, [r5, #MMDC0_MAPSR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ bne poll_dvfs_set_2 ++ ++ ldr r1, =528000000 ++ cmp r4, r1 ++ beq switch_freq_528 ++ ++ switch_to_400MHz ++ ++ b continue_dll_on ++ ++switch_freq_528: ++ switch_to_528MHz ++ ++continue_dll_on: ++ ++ /* set SBS step-by-step mode. */ ++ ldr r0, [r5, #MMDC0_MADPCR0] ++ orr r0, r0, #( 1 << 8) ++ str r0, [r5, #MMDC0_MADPCR0] ++ ++ /* clear DVFS - exit self refresh mode. */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ bic r0, r0, #(1 << 21) ++ str r0, [r5, #MMDC0_MAPSR] ++ ++poll_dvfs_clear_2: ++ ldr r0, [r5, #MMDC0_MAPSR] ++ and r0, r0, #(1 << 25) ++ cmp r0, #(1 << 25) ++ beq poll_dvfs_clear_2 ++ ++ /* if DLL is currently off, turn it back on. */ ++ cmp r9, #0 ++ beq update_calibration_only ++ ++ ldr r0, =0xa5390003 ++ str r0, [r5, #MMDC0_MPZQHWCTRL] ++ ldr r2, =MMDC1_MPZQHWCTRL ++ str r0, [r5, r2] ++ ++ /* enable DQS gating. */ ++ ldr r2, =MMDC0_MPDGCTRL0 ++ ldr r0, [r5, r2] ++ bic r0, r0, #(1 << 29) ++ str r0, [r5, r2] ++ ++ ldr r2, =MMDC1_MPDGCTRL0 ++ ldr r0, [r5, r2] ++ bic r0, r0, #(1 << 29) ++ str r0, [r5, r2] ++ ++ /* force measure. */ ++ ldr r0, =0x00000800 ++ str r0, [r5, #MMDC0_MPMUR0] ++ ldr r2, =MMDC1_MPMUR0 ++ str r0, [r5, r2] ++ ++ /* delay for while. */ ++ ldr r1, =4 ++delay5: ++ ldr r2, =0 ++cont5: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont5 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay5 ++ ++ /* disable dqs pull down in the IOMUX. */ ++ ldr r1, [r11] ++ add r11, r11, #8 ++update_iomux1: ++ ldr r0, [r11, #0x0] ++ ldr r3, [r11, #0x4] ++ str r3, [r7, r0] ++ add r11, r11, #8 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt update_iomux1 ++ ++ /* config MMDC timings to 528MHz. */ ++ ldr r9, [r8] ++ add r8, r8, #8 ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ /* update MISC register: WALAT, RALAT */ ++ ldr r0, =0x00001740 ++ str r0, [r5, #MMDC0_MDMISC] ++ ++ /* configure ddr devices to dll on, odt. */ ++ ldr r0, =0x00048031 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x00048039 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ /* delay for while. */ ++ ldr r1, =4 ++delay7: ++ ldr r2, =0 ++cont7: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont7 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay7 ++ ++ /* reset dll. */ ++ ldr r0, =0x09408030 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x09408038 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ /* delay for while. */ ++ ldr r1, =100 ++delay8: ++ ldr r2, =0 ++cont8: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont8 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay8 ++ ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ ldr r0, =0x00428031 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x00428039 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ /* issue a zq command. */ ++ ldr r0, =0x04008040 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ ldr r0, =0x04008048 ++ str r0, [r5, #MMDC0_MDSCR] ++ ++ /* MMDC ODT enable. */ ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ ++ ldr r2, =0x4818 ++ str r3, [r5, r2] ++ ++ /* delay for while. */ ++ ldr r1, =40 ++delay15: ++ ldr r2, =0 ++cont15: ++ ldr r0, [r5, r2] ++ add r2, r2, #4 ++ cmp r2, #16 ++ bne cont15 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt delay15 ++ ++ /* MMDC0_MAPSR adopt power down enable. */ ++ ldr r0, [r5, #MMDC0_MAPSR] ++ bic r0, r0, #0x01 ++ str r0, [r5, #MMDC0_MAPSR] ++ ++ /* enable MMDC power down timer. */ ++ ldr r0, [r5, #MMDC0_MDPDC] ++ orr r0, r0, #(0x55 << 8) ++ str r0, [r5, #MMDC0_MDPDC] ++ ++ b update_calibration ++ ++update_calibration_only: ++ ldr r1, [r8] ++ sub r1, r1, #7 ++ add r8, r8, #64 ++ b update_calib ++ ++update_calibration: ++ /* write the new calibration values. */ ++ mov r1, r9 ++ sub r1, r1, #7 ++ ++update_calib: ++ ldr r0, [r8, #0x0] ++ ldr r3, [r8, #0x4] ++ str r3, [r5, r0] ++ add r8, r8, #8 ++ sub r1, r1, #1 ++ cmp r1, #0 ++ bgt update_calib ++ ++ /* perform a force measurement. */ ++ ldr r0, =0x800 ++ str r0, [r5, #MMDC0_MPMUR0] ++ ldr r2, =MMDC1_MPMUR0 ++ str r0, [r5, r2] ++ ++ /* clear SBS - unblock DDR accesses. */ ++ ldr r0, [r5, #MMDC0_MADPCR0] ++ bic r0, r0, #(1 << 8) ++ str r0, [r5, #MMDC0_MADPCR0] ++ ++ mov r0, #0x0 ++ str r0, [r5, #MMDC0_MDSCR] ++poll_conreq_clear_2: ++ ldr r0, [r5, #MMDC0_MDSCR] ++ and r0, r0, #(0x4 << 12) ++ cmp r0, #(0x4 << 12) ++ beq poll_conreq_clear_2 ++ ++done: ++ /* restore registers */ ++ ++ ldmfd sp!, {r4-r12} ++ mov pc, lr ++ ++ .type mx6_do_ddr3_freq_change, #object ++ENTRY(mx6_do_ddr_freq_change) ++ .word mx6_ddr3_freq_change ++ .size mx6_ddr3_freq_change, . - mx6_ddr3_freq_change +diff -Nur linux-4.1.3/arch/arm/mach-imx/Kconfig linux-xbian-imx6/arch/arm/mach-imx/Kconfig +--- linux-4.1.3/arch/arm/mach-imx/Kconfig 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/Kconfig 2015-07-27 23:13:01.053224513 +0200 +@@ -1,5 +1,6 @@ + menuconfig ARCH_MXC + bool "Freescale i.MX family" if ARCH_MULTI_V4_V5 || ARCH_MULTI_V6_V7 ++ select ARCH_HAS_RESET_CONTROLLER + select ARCH_REQUIRE_GPIOLIB + select ARM_CPU_SUSPEND if PM + select CLKSRC_MMIO +@@ -8,6 +9,7 @@ + select PM_OPP if PM + select SOC_BUS + select SRAM ++ select ZONE_DMA + help + Support for Freescale MXC/iMX-based family of processors + +@@ -58,7 +60,6 @@ + + config HAVE_IMX_SRC + def_bool y if SMP +- select ARCH_HAS_RESET_CONTROLLER + + config IMX_HAVE_IOMUX_V1 + bool +diff -Nur linux-4.1.3/arch/arm/mach-imx/lpddr2_freq_imx6.S linux-xbian-imx6/arch/arm/mach-imx/lpddr2_freq_imx6.S +--- linux-4.1.3/arch/arm/mach-imx/lpddr2_freq_imx6.S 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/arch/arm/mach-imx/lpddr2_freq_imx6.S 2015-07-27 23:13:01.093082305 +0200 +@@ -0,0 +1,484 @@ ++/* ++ * Copyright (C) 2012-2013 Freescale Semiconductor, Inc. All Rights Reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include ++ ++ .macro mx6sl_switch_to_24MHz ++ ++ /* ++ * Set MMDC clock to be sourced from PLL3. ++ * Ensure first periph2_clk2 is sourced from PLL3. ++ * Set the PERIPH2_CLK2_PODF to divide by 2. ++ */ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x7 ++ orr r6, r6, #0x1 ++ str r6, [r2, #0x14] ++ ++ /* Select PLL3 to source MMDC. */ ++ ldr r6, [r2, #0x18] ++ bic r6, r6, #0x100000 ++ str r6, [r2, #0x18] ++ ++ /* Swtich periph2_clk_sel to run from PLL3. */ ++ ldr r6, [r2, #0x14] ++ orr r6, r6, #0x4000000 ++ str r6, [r2, #0x14] ++ ++periph2_clk_switch1: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne periph2_clk_switch1 ++ ++ /* ++ * Need to clock gate the 528 PFDs before ++ * powering down PLL2. ++ * Only the PLL2_PFD2_400M should be ON ++ * at this time, so only clock gate that one. ++ */ ++ ldr r6, [r3, #0x100] ++ orr r6, r6, #0x800000 ++ str r6, [r3, #0x100] ++ ++ /* ++ * Set PLL2 to bypass state. We should be here ++ * only if MMDC is not sourced from PLL2. ++ */ ++ ldr r6, [r3, #0x30] ++ orr r6, r6, #0x10000 ++ str r6, [r3, #0x30] ++ ++ ldr r6, [r3, #0x30] ++ orr r6, r6, #0x1000 ++ str r6, [r3, #0x30] ++ ++ /* Ensure pre_periph2_clk_mux is set to pll2 */ ++ ldr r6, [r2, #0x18] ++ bic r6, r6, #0x600000 ++ str r6, [r2, #0x18] ++ ++ /* Set MMDC clock to be sourced from the bypassed PLL2. */ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x4000000 ++ str r6, [r2, #0x14] ++ ++periph2_clk_switch2: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne periph2_clk_switch2 ++ ++ /* ++ * Now move MMDC back to periph2_clk2 source. ++ * after selecting PLL2 as the option. ++ * Select PLL2 as the source. ++ */ ++ ldr r6, [r2, #0x18] ++ orr r6, r6, #0x100000 ++ str r6, [r2, #0x18] ++ ++ /* set periph2_clk2_podf to divide by 1. */ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x7 ++ str r6, [r2, #0x14] ++ ++ /* Now move periph2_clk to periph2_clk2 source */ ++ ldr r6, [r2, #0x14] ++ orr r6, r6, #0x4000000 ++ str r6, [r2, #0x14] ++ ++periph2_clk_switch3: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne periph2_clk_switch3 ++ ++ /* Now set the MMDC PODF back to 1.*/ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x38 ++ str r6, [r2, #0x14] ++ ++mmdc_podf0: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne mmdc_podf0 ++ ++ .endm ++ ++ .macro ddr_switch_400MHz ++ ++ /* Set MMDC divider first, in case PLL3 is at 480MHz. */ ++ ldr r6, [r3, #0x10] ++ and r6, r6, #0x10000 ++ cmp r6, #0x10000 ++ beq pll3_in_bypass ++ ++ /* Set MMDC divder to divide by 2. */ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x38 ++ orr r6, r6, #0x8 ++ str r6, [r2, #0x14] ++ ++mmdc_podf: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne mmdc_podf ++ ++pll3_in_bypass: ++ /* ++ * Check if we are switching between ++ * 400Mhz <-> 100MHz.If so, we should ++ * try to source MMDC from PLL2_200M. ++ */ ++ cmp r1, #0 ++ beq not_low_bus_freq ++ ++ /* Ensure that MMDC is sourced from PLL2 mux first. */ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x4000000 ++ str r6, [r2, #0x14] ++ ++periph2_clk_switch4: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne periph2_clk_switch4 ++ ++not_low_bus_freq: ++ /* Now ensure periph2_clk2_sel mux is set to PLL3 */ ++ ldr r6, [r2, #0x18] ++ bic r6, r6, #0x100000 ++ str r6, [r2, #0x18] ++ ++ /* Now switch MMDC to PLL3. */ ++ ldr r6, [r2, #0x14] ++ orr r6, r6, #0x4000000 ++ str r6, [r2, #0x14] ++ ++periph2_clk_switch5: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne periph2_clk_switch5 ++ ++ /* ++ * Check if PLL2 is already unlocked. ++ * If so do nothing with PLL2. ++ */ ++ cmp r1, #0 ++ beq pll2_already_on ++ ++ /* Now power up PLL2 and unbypass it. */ ++ ldr r6, [r3, #0x30] ++ bic r6, r6, #0x1000 ++ str r6, [r3, #0x30] ++ ++ /* Make sure PLL2 has locked.*/ ++wait_for_pll_lock: ++ ldr r6, [r3, #0x30] ++ and r6, r6, #0x80000000 ++ cmp r6, #0x80000000 ++ bne wait_for_pll_lock ++ ++ ldr r6, [r3, #0x30] ++ bic r6, r6, #0x10000 ++ str r6, [r3, #0x30] ++ ++ /* ++ * Need to enable the 528 PFDs after ++ * powering up PLL2. ++ * Only the PLL2_PFD2_400M should be ON ++ * as it feeds the MMDC. Rest should have ++ * been managed by clock code. ++ */ ++ ldr r6, [r3, #0x100] ++ bic r6, r6, #0x800000 ++ str r6, [r3, #0x100] ++ ++pll2_already_on: ++ /* ++ * Now switch MMDC clk back to pll2_mux option. ++ * Ensure pre_periph2_clk2 is set to pll2_pfd_400M. ++ * If switching to audio DDR freq, set the ++ * pre_periph2_clk2 to PLL2_PFD_200M ++ */ ++ ldr r6, =400000000 ++ cmp r6, r0 ++ bne use_pll2_pfd_200M ++ ++ ldr r6, [r2, #0x18] ++ bic r6, r6, #0x600000 ++ orr r6, r6, #0x200000 ++ str r6, [r2, #0x18] ++ ldr r6, =400000000 ++ b cont2 ++ ++use_pll2_pfd_200M: ++ ldr r6, [r2, #0x18] ++ orr r6, r6, #0x600000 ++ str r6, [r2, #0x18] ++ ldr r6, =200000000 ++ ++cont2: ++ ldr r4, [r2, #0x14] ++ bic r4, r4, #0x4000000 ++ str r4, [r2, #0x14] ++ ++periph2_clk_switch6: ++ ldr r4, [r2, #0x48] ++ cmp r4, #0 ++ bne periph2_clk_switch6 ++ ++change_divider_only: ++ /* ++ * Calculate the MMDC divider ++ * based on the requested freq. ++ */ ++ ldr r4, =0 ++Loop2: ++ sub r6, r6, r0 ++ cmp r6, r0 ++ blt Div_Found ++ add r4, r4, #1 ++ bgt Loop2 ++ ++ /* Shift divider into correct offset. */ ++ lsl r4, r4, #3 ++Div_Found: ++ /* Set the MMDC PODF. */ ++ ldr r6, [r2, #0x14] ++ bic r6, r6, #0x38 ++ orr r6, r6, r4 ++ str r6, [r2, #0x14] ++ ++mmdc_podf1: ++ ldr r6, [r2, #0x48] ++ cmp r6, #0 ++ bne mmdc_podf1 ++ ++ .endm ++ ++ .macro mmdc_clk_lower_100MHz ++ ++ /* ++ * Prior to reducing the DDR frequency (at 528/400 MHz), ++ * read the Measure unit count bits (MU_UNIT_DEL_NUM) ++ */ ++ ldr r5, =0x8B8 ++ ldr r6, [r8, r5] ++ /* Original MU unit count */ ++ mov r6, r6, LSR #16 ++ ldr r4, =0x3FF ++ and r6, r6, r4 ++ /* Original MU unit count * 2 */ ++ mov r7, r6, LSL #1 ++ /* ++ * Bypass the automatic measure unit when below 100 MHz ++ * by setting the Measure unit bypass enable bit (MU_BYP_EN) ++ */ ++ ldr r6, [r8, r5] ++ orr r6, r6, #0x400 ++ str r6, [r8, r5] ++ /* ++ * Double the measure count value read in step 1 and program it in the ++ * measurement bypass bits (MU_BYP_VAL) of the MMDC PHY Measure Unit ++ * Register for the reduced frequency operation below 100 MHz ++ */ ++ ldr r6, [r8, r5] ++ ldr r4, =0x3FF ++ bic r6, r6, r4 ++ orr r6, r6, r7 ++ str r6, [r8, r5] ++ /* Now perform a Force Measurement. */ ++ ldr r6, [r8, r5] ++ orr r6, r6, #0x800 ++ str r6, [r8, r5] ++ /* Wait for FRC_MSR to clear. */ ++force_measure: ++ ldr r6, [r8, r5] ++ and r6, r6, #0x800 ++ cmp r6, #0x0 ++ bne force_measure ++ ++ .endm ++ ++ .macro mmdc_clk_above_100MHz ++ ++ /* Make sure that the PHY measurement unit is NOT in bypass mode */ ++ ldr r5, =0x8B8 ++ ldr r6, [r8, r5] ++ bic r6, r6, #0x400 ++ str r6, [r8, r5] ++ /* Now perform a Force Measurement. */ ++ ldr r6, [r8, r5] ++ orr r6, r6, #0x800 ++ str r6, [r8, r5] ++ /* Wait for FRC_MSR to clear. */ ++force_measure1: ++ ldr r6, [r8, r5] ++ and r6, r6, #0x800 ++ cmp r6, #0x0 ++ bne force_measure1 ++ .endm ++ ++/* ++ * mx6_lpddr2_freq_change ++ * ++ * Make sure DDR is in self-refresh. ++ * IRQs are already disabled. ++ * r0 : DDR freq. ++ * r1: low_bus_freq_mode flag ++ * r2: Pointer to array containing addresses of registers. ++ */ ++ .align 3 ++ENTRY(mx6_lpddr2_freq_change) ++ ++ push {r4-r10} ++ ++ mov r4, r2 ++ ldr r3, [r4] @ANATOP_BASE_ADDR ++ ldr r2, [r4, #0x4] @CCM_BASE_ADDR ++ ldr r8, [r4, #0x8] @MMDC_P0_BASE_ADDR ++ ldr r7, [r4, #0xC] @L2_BASE_ADDR ++ ++lpddr2_freq_change: ++ adr r9, lpddr2_freq_change ++ ++ /* Prime all TLB entries. */ ++ ldr r6, [r9] ++ ldr r6, [r8] ++ ldr r6, [r3] ++ ldr r6, [r2] ++ ++ /* Drain all the L1 buffers. */ ++ dsb ++ ++#ifdef CONFIG_CACHE_L2X0 ++ /* ++ * Need to make sure the buffers in L2 are drained. ++ * Performing a sync operation does this. ++ */ ++ mov r6, #0x0 ++ str r6, [r7, #0x730] ++#endif ++ ++ /* ++ * The second dsb might be needed to keep cache sync (device write) ++ * ordering with the memory accesses before it. ++ */ ++ dsb ++ isb ++ ++ /* Disable Automatic power savings. */ ++ ldr r6, [r8, #0x404] ++ orr r6, r6, #0x01 ++ str r6, [r8, #0x404] ++ ++ /* MMDC0_MDPDC disable power down timer */ ++ ldr r6, [r8, #0x4] ++ bic r6, r6, #0xff00 ++ str r6, [r8, #0x4] ++ ++ /* Delay for a while */ ++ ldr r10, =10 ++delay1: ++ ldr r7, =0 ++cont1: ++ ldr r6, [r8, r7] ++ add r7, r7, #4 ++ cmp r7, #16 ++ bne cont1 ++ sub r10, r10, #1 ++ cmp r10, #0 ++ bgt delay1 ++ ++ /* Make the DDR explicitly enter self-refresh. */ ++ ldr r6, [r8, #0x404] ++ orr r6, r6, #0x200000 ++ str r6, [r8, #0x404] ++ ++poll_dvfs_set_1: ++ ldr r6, [r8, #0x404] ++ and r6, r6, #0x2000000 ++ cmp r6, #0x2000000 ++ bne poll_dvfs_set_1 ++ ++ /* set SBS step-by-step mode */ ++ ldr r6, [r8, #0x410] ++ orr r6, r6, #0x100 ++ str r6, [r8, #0x410] ++ ++ ldr r10, =100000000 ++ cmp r0, r10 ++ bgt set_ddr_mu_above_100 ++ mmdc_clk_lower_100MHz ++ ++set_ddr_mu_above_100: ++ ldr r10, =24000000 ++ cmp r0, r10 ++ beq set_to_24MHz ++ ++ ddr_switch_400MHz ++ ++ ldr r10,=100000000 ++ cmp r0, r10 ++ blt done ++ mmdc_clk_above_100MHz ++ ++ b done ++ ++set_to_24MHz: ++ mx6sl_switch_to_24MHz ++ ++done: ++ /* clear DVFS - exit from self refresh mode */ ++ ldr r6, [r8, #0x404] ++ bic r6, r6, #0x200000 ++ str r6, [r8, #0x404] ++ ++poll_dvfs_clear_1: ++ ldr r6, [r8, #0x404] ++ and r6, r6, #0x2000000 ++ cmp r6, #0x2000000 ++ beq poll_dvfs_clear_1 ++ ++ /* Enable Automatic power savings. */ ++ ldr r6, [r8, #0x404] ++ bic r6, r6, #0x01 ++ str r6, [r8, #0x404] ++ ++ ldr r10, =24000000 ++ cmp r0, r10 ++ beq skip_power_down ++ ++ /* Enable MMDC power down timer. */ ++ ldr r6, [r8, #0x4] ++ orr r6, r6, #0x5500 ++ str r6, [r8, #0x4] ++ ++skip_power_down: ++ /* clear SBS - unblock DDR accesses */ ++ ldr r6, [r8, #0x410] ++ bic r6, r6, #0x100 ++ str r6, [r8, #0x410] ++ ++ pop {r4-r10} ++ ++ /* Restore registers */ ++ mov pc, lr ++ ++ .type mx6_lpddr2_do_iram, #object ++ENTRY(mx6_lpddr2_do_iram) ++ .word mx6_lpddr2_freq_change ++ .size mx6_lpddr2_freq_change, . - mx6_lpddr2_freq_change +diff -Nur linux-4.1.3/arch/arm/mach-imx/mach-imx6q.c linux-xbian-imx6/arch/arm/mach-imx/mach-imx6q.c +--- linux-4.1.3/arch/arm/mach-imx/mach-imx6q.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/mach-imx6q.c 2015-07-27 23:13:01.097068084 +0200 +@@ -265,10 +265,12 @@ + static void __init imx6q_init_machine(void) + { + struct device *parent; ++ void __iomem *p; + + imx_print_silicon_rev(cpu_is_imx6dl() ? "i.MX6DL" : "i.MX6Q", + imx_get_soc_revision()); + ++ mxc_arch_reset_init_dt(); + parent = imx_soc_device_init(); + if (parent == NULL) + pr_warn("failed to initialize soc device\n"); +@@ -281,6 +283,12 @@ + cpu_is_imx6q() ? imx6q_pm_init() : imx6dl_pm_init(); + imx6q_1588_init(); + imx6q_axi_init(); ++ ++ p = ioremap(0x21b0000, SZ_4K); ++ if (p) { ++ writel(0x7f, p + 0x40); ++ iounmap(p); ++ } + } + + #define OCOTP_CFG3 0x440 +@@ -330,6 +338,12 @@ + if (dev_pm_opp_disable(cpu_dev, 852000000)) + pr_warn("failed to disable 852 MHz OPP\n"); + } ++ if (vpu352) { ++ if (dev_pm_opp_disable(cpu_dev, 396000000)) ++ pr_warn("VPU352: failed to disable 396MHz OPP\n"); ++ pr_info("VPU352: remove 396MHz OPP for VPU running at 352MHz!\n"); ++ } ++ + iounmap(base); + put_node: + of_node_put(np); +@@ -408,4 +422,5 @@ + .init_machine = imx6q_init_machine, + .init_late = imx6q_init_late, + .dt_compat = imx6q_dt_compat, ++ .restart = mxc_restart, + MACHINE_END +diff -Nur linux-4.1.3/arch/arm/mach-imx/Makefile linux-xbian-imx6/arch/arm/mach-imx/Makefile +--- linux-4.1.3/arch/arm/mach-imx/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/Makefile 2015-07-27 23:13:01.053224513 +0200 +@@ -28,6 +28,12 @@ + obj-$(CONFIG_MXC_USE_EPIT) += epit.o + obj-$(CONFIG_MXC_DEBUG_BOARD) += 3ds_debugboard.o + ++obj-y += busfreq-imx6.o ++ifdef CONFIG_ARM_IMX6Q_CPUFREQ ++obj-$(CONFIG_SOC_IMX6Q) += ddr3_freq_imx6.o busfreq_ddr3.o ++obj-$(CONFIG_SOC_IMX6SL) += lpddr2_freq_imx6.o busfreq_lpddr2.o ++endif ++ + ifeq ($(CONFIG_CPU_IDLE),y) + obj-$(CONFIG_SOC_IMX5) += cpuidle-imx5.o + obj-$(CONFIG_SOC_IMX6Q) += cpuidle-imx6q.o +diff -Nur linux-4.1.3/arch/arm/mach-imx/src.c linux-xbian-imx6/arch/arm/mach-imx/src.c +--- linux-4.1.3/arch/arm/mach-imx/src.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/src.c 2015-07-27 23:13:01.097068084 +0200 +@@ -1,5 +1,5 @@ + /* +- * Copyright 2011 Freescale Semiconductor, Inc. ++ * Copyright 2011-2014 Freescale Semiconductor, Inc. + * Copyright 2011 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public +@@ -18,6 +18,7 @@ + #include + #include + #include "common.h" ++#include "hardware.h" + + #define SRC_SCR 0x000 + #define SRC_GPR1 0x020 +@@ -32,6 +33,7 @@ + + static void __iomem *src_base; + static DEFINE_SPINLOCK(scr_lock); ++static bool m4_is_enabled; + + static const int sw_reset_bits[5] = { + BP_SRC_SCR_SW_GPU_RST, +@@ -41,6 +43,11 @@ + BP_SRC_SCR_SW_IPU2_RST + }; + ++bool imx_src_is_m4_enabled(void) ++{ ++ return m4_is_enabled; ++} ++ + static int imx_src_reset_module(struct reset_controller_dev *rcdev, + unsigned long sw_reset_idx) + { +@@ -136,6 +143,14 @@ + */ + spin_lock(&scr_lock); + val = readl_relaxed(src_base + SRC_SCR); ++ ++ /* bit 4 is m4c_non_sclr_rst on i.MX6SX */ ++ if (cpu_is_imx6sx() && ((val & ++ (1 << BP_SRC_SCR_SW_OPEN_VG_RST)) == 0)) ++ m4_is_enabled = true; ++ else ++ m4_is_enabled = false; ++ + val &= ~(1 << BP_SRC_SCR_WARM_RESET_ENABLE); + writel_relaxed(val, src_base + SRC_SCR); + spin_unlock(&scr_lock); +diff -Nur linux-4.1.3/arch/arm/mach-imx/system.c linux-xbian-imx6/arch/arm/mach-imx/system.c +--- linux-4.1.3/arch/arm/mach-imx/system.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mach-imx/system.c 2015-07-27 23:13:01.101053863 +0200 +@@ -34,6 +34,7 @@ + + static void __iomem *wdog_base; + static struct clk *wdog_clk; ++static u32 wdog_source = 1; /* use WDOG1 default */ + + /* + * Reset the system. It is called by machine_restart(). +@@ -50,6 +51,17 @@ + + if (cpu_is_mx1()) + wcr_enable = (1 << 0); ++ /* ++ * Some i.MX6 boards use WDOG2 to reset external pmic in bypass mode, ++ * so do WDOG2 reset here. Do not set SRS, since we will ++ * trigger external POR later. Use WDOG1 to reset in ldo-enable ++ * mode. You can set it by "fsl,wdog-reset" in dts. ++ * For i.MX6SX we have to trigger wdog-reset to reset QSPI-NOR flash to ++ * workaround qspi-nor reboot issue whatever ldo-bypass or not. ++ */ ++ else if ((wdog_source == 2 && (cpu_is_imx6q() || cpu_is_imx6dl() || ++ cpu_is_imx6sl())) || cpu_is_imx6sx()) ++ wcr_enable = 0x14; + else + wcr_enable = (1 << 2); + +@@ -89,6 +101,41 @@ + clk_prepare(wdog_clk); + } + ++void __init mxc_arch_reset_init_dt(void) ++{ ++ struct device_node *np = NULL; ++ ++ if (cpu_is_imx6q() || cpu_is_imx6dl()) ++ np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc"); ++ else if (cpu_is_imx6sl()) ++ np = of_find_compatible_node(NULL, NULL, "fsl,imx6sl-gpc"); ++ ++ if (np) ++ of_property_read_u32(np, "fsl,wdog-reset", &wdog_source); ++ pr_info("Use WDOG%d as reset source\n", wdog_source); ++ ++ np = of_find_compatible_node(NULL, NULL, "fsl,imx21-wdt"); ++ wdog_base = of_iomap(np, 0); ++ WARN_ON(!wdog_base); ++ ++ /* Some i.MX6 boards use WDOG2 to reset board in ldo-bypass mode */ ++ if (wdog_source == 2 && (cpu_is_imx6q() || cpu_is_imx6dl() || ++ cpu_is_imx6sl())) { ++ np = of_find_compatible_node(np, NULL, "fsl,imx21-wdt"); ++ wdog_base = of_iomap(np, 0); ++ WARN_ON(!wdog_base); ++ } ++ ++ wdog_clk = of_clk_get(np, 0); ++ if (IS_ERR(wdog_clk)) { ++ pr_warn("%s: failed to get wdog clock\n", __func__); ++ wdog_clk = NULL; ++ return; ++ } ++ ++ clk_prepare(wdog_clk); ++} ++ + #ifdef CONFIG_CACHE_L2X0 + void __init imx_init_l2cache(void) + { +diff -Nur linux-4.1.3/arch/arm/mm/cache-v7.S linux-xbian-imx6/arch/arm/mm/cache-v7.S +--- linux-4.1.3/arch/arm/mm/cache-v7.S 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/arch/arm/mm/cache-v7.S 2015-07-27 23:13:01.503617563 +0200 +@@ -446,3 +446,5 @@ + + @ define struct cpu_cache_fns (see and proc-macros.S) + define_cache_functions v7 ++ ++ .long v7_dma_flush_range +diff -Nur linux-4.1.3/block/bfq-cgroup.c linux-xbian-imx6/block/bfq-cgroup.c +--- linux-4.1.3/block/bfq-cgroup.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/block/bfq-cgroup.c 2015-07-27 23:13:03.600137415 +0200 +@@ -0,0 +1,936 @@ ++/* ++ * BFQ: CGROUPS support. ++ * ++ * Based on ideas and code from CFQ: ++ * Copyright (C) 2003 Jens Axboe ++ * ++ * Copyright (C) 2008 Fabio Checconi ++ * Paolo Valente ++ * ++ * Copyright (C) 2010 Paolo Valente ++ * ++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ ++ * file. ++ */ ++ ++#ifdef CONFIG_CGROUP_BFQIO ++ ++static DEFINE_MUTEX(bfqio_mutex); ++ ++static bool bfqio_is_removed(struct bfqio_cgroup *bgrp) ++{ ++ return bgrp ? !bgrp->online : false; ++} ++ ++static struct bfqio_cgroup bfqio_root_cgroup = { ++ .weight = BFQ_DEFAULT_GRP_WEIGHT, ++ .ioprio = BFQ_DEFAULT_GRP_IOPRIO, ++ .ioprio_class = BFQ_DEFAULT_GRP_CLASS, ++}; ++ ++static inline void bfq_init_entity(struct bfq_entity *entity, ++ struct bfq_group *bfqg) ++{ ++ entity->weight = entity->new_weight; ++ entity->orig_weight = entity->new_weight; ++ entity->ioprio = entity->new_ioprio; ++ entity->ioprio_class = entity->new_ioprio_class; ++ entity->parent = bfqg->my_entity; ++ entity->sched_data = &bfqg->sched_data; ++} ++ ++static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css) ++{ ++ return css ? container_of(css, struct bfqio_cgroup, css) : NULL; ++} ++ ++/* ++ * Search the bfq_group for bfqd into the hash table (by now only a list) ++ * of bgrp. Must be called under rcu_read_lock(). ++ */ ++static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, ++ struct bfq_data *bfqd) ++{ ++ struct bfq_group *bfqg; ++ void *key; ++ ++ hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) { ++ key = rcu_dereference(bfqg->bfqd); ++ if (key == bfqd) ++ return bfqg; ++ } ++ ++ return NULL; ++} ++ ++static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, ++ struct bfq_group *bfqg) ++{ ++ struct bfq_entity *entity = &bfqg->entity; ++ ++ /* ++ * If the weight of the entity has never been set via the sysfs ++ * interface, then bgrp->weight == 0. In this case we initialize ++ * the weight from the current ioprio value. Otherwise, the group ++ * weight, if set, has priority over the ioprio value. ++ */ ++ if (bgrp->weight == 0) { ++ entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); ++ entity->new_ioprio = bgrp->ioprio; ++ } else { ++ if (bgrp->weight < BFQ_MIN_WEIGHT || ++ bgrp->weight > BFQ_MAX_WEIGHT) { ++ printk(KERN_CRIT "bfq_group_init_entity: " ++ "bgrp->weight %d\n", bgrp->weight); ++ BUG(); ++ } ++ entity->new_weight = bgrp->weight; ++ entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); ++ } ++ entity->orig_weight = entity->weight = entity->new_weight; ++ entity->ioprio = entity->new_ioprio; ++ entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; ++ entity->my_sched_data = &bfqg->sched_data; ++ bfqg->active_entities = 0; ++} ++ ++static inline void bfq_group_set_parent(struct bfq_group *bfqg, ++ struct bfq_group *parent) ++{ ++ struct bfq_entity *entity; ++ ++ BUG_ON(parent == NULL); ++ BUG_ON(bfqg == NULL); ++ ++ entity = &bfqg->entity; ++ entity->parent = parent->my_entity; ++ entity->sched_data = &parent->sched_data; ++} ++ ++/** ++ * bfq_group_chain_alloc - allocate a chain of groups. ++ * @bfqd: queue descriptor. ++ * @css: the leaf cgroup_subsys_state this chain starts from. ++ * ++ * Allocate a chain of groups starting from the one belonging to ++ * @cgroup up to the root cgroup. Stop if a cgroup on the chain ++ * to the root has already an allocated group on @bfqd. ++ */ ++static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, ++ struct cgroup_subsys_state *css) ++{ ++ struct bfqio_cgroup *bgrp; ++ struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; ++ ++ for (; css != NULL; css = css->parent) { ++ bgrp = css_to_bfqio(css); ++ ++ bfqg = bfqio_lookup_group(bgrp, bfqd); ++ if (bfqg != NULL) { ++ /* ++ * All the cgroups in the path from there to the ++ * root must have a bfq_group for bfqd, so we don't ++ * need any more allocations. ++ */ ++ break; ++ } ++ ++ bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); ++ if (bfqg == NULL) ++ goto cleanup; ++ ++ bfq_group_init_entity(bgrp, bfqg); ++ bfqg->my_entity = &bfqg->entity; ++ ++ if (leaf == NULL) { ++ leaf = bfqg; ++ prev = leaf; ++ } else { ++ bfq_group_set_parent(prev, bfqg); ++ /* ++ * Build a list of allocated nodes using the bfqd ++ * filed, that is still unused and will be ++ * initialized only after the node will be ++ * connected. ++ */ ++ prev->bfqd = bfqg; ++ prev = bfqg; ++ } ++ } ++ ++ return leaf; ++ ++cleanup: ++ while (leaf != NULL) { ++ prev = leaf; ++ leaf = leaf->bfqd; ++ kfree(prev); ++ } ++ ++ return NULL; ++} ++ ++/** ++ * bfq_group_chain_link - link an allocated group chain to a cgroup ++ * hierarchy. ++ * @bfqd: the queue descriptor. ++ * @css: the leaf cgroup_subsys_state to start from. ++ * @leaf: the leaf group (to be associated to @cgroup). ++ * ++ * Try to link a chain of groups to a cgroup hierarchy, connecting the ++ * nodes bottom-up, so we can be sure that when we find a cgroup in the ++ * hierarchy that already as a group associated to @bfqd all the nodes ++ * in the path to the root cgroup have one too. ++ * ++ * On locking: the queue lock protects the hierarchy (there is a hierarchy ++ * per device) while the bfqio_cgroup lock protects the list of groups ++ * belonging to the same cgroup. ++ */ ++static void bfq_group_chain_link(struct bfq_data *bfqd, ++ struct cgroup_subsys_state *css, ++ struct bfq_group *leaf) ++{ ++ struct bfqio_cgroup *bgrp; ++ struct bfq_group *bfqg, *next, *prev = NULL; ++ unsigned long flags; ++ ++ assert_spin_locked(bfqd->queue->queue_lock); ++ ++ for (; css != NULL && leaf != NULL; css = css->parent) { ++ bgrp = css_to_bfqio(css); ++ next = leaf->bfqd; ++ ++ bfqg = bfqio_lookup_group(bgrp, bfqd); ++ BUG_ON(bfqg != NULL); ++ ++ spin_lock_irqsave(&bgrp->lock, flags); ++ ++ rcu_assign_pointer(leaf->bfqd, bfqd); ++ hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); ++ hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); ++ ++ spin_unlock_irqrestore(&bgrp->lock, flags); ++ ++ prev = leaf; ++ leaf = next; ++ } ++ ++ BUG_ON(css == NULL && leaf != NULL); ++ if (css != NULL && prev != NULL) { ++ bgrp = css_to_bfqio(css); ++ bfqg = bfqio_lookup_group(bgrp, bfqd); ++ bfq_group_set_parent(prev, bfqg); ++ } ++} ++ ++/** ++ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. ++ * @bfqd: queue descriptor. ++ * @cgroup: cgroup being searched for. ++ * ++ * Return a group associated to @bfqd in @cgroup, allocating one if ++ * necessary. When a group is returned all the cgroups in the path ++ * to the root have a group associated to @bfqd. ++ * ++ * If the allocation fails, return the root group: this breaks guarantees ++ * but is a safe fallback. If this loss becomes a problem it can be ++ * mitigated using the equivalent weight (given by the product of the ++ * weights of the groups in the path from @group to the root) in the ++ * root scheduler. ++ * ++ * We allocate all the missing nodes in the path from the leaf cgroup ++ * to the root and we connect the nodes only after all the allocations ++ * have been successful. ++ */ ++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, ++ struct cgroup_subsys_state *css) ++{ ++ struct bfqio_cgroup *bgrp = css_to_bfqio(css); ++ struct bfq_group *bfqg; ++ ++ bfqg = bfqio_lookup_group(bgrp, bfqd); ++ if (bfqg != NULL) ++ return bfqg; ++ ++ bfqg = bfq_group_chain_alloc(bfqd, css); ++ if (bfqg != NULL) ++ bfq_group_chain_link(bfqd, css, bfqg); ++ else ++ bfqg = bfqd->root_group; ++ ++ return bfqg; ++} ++ ++/** ++ * bfq_bfqq_move - migrate @bfqq to @bfqg. ++ * @bfqd: queue descriptor. ++ * @bfqq: the queue to move. ++ * @entity: @bfqq's entity. ++ * @bfqg: the group to move to. ++ * ++ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating ++ * it on the new one. Avoid putting the entity on the old group idle tree. ++ * ++ * Must be called under the queue lock; the cgroup owning @bfqg must ++ * not disappear (by now this just means that we are called under ++ * rcu_read_lock()). ++ */ ++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ struct bfq_entity *entity, struct bfq_group *bfqg) ++{ ++ int busy, resume; ++ ++ busy = bfq_bfqq_busy(bfqq); ++ resume = !RB_EMPTY_ROOT(&bfqq->sort_list); ++ ++ BUG_ON(resume && !entity->on_st); ++ BUG_ON(busy && !resume && entity->on_st && ++ bfqq != bfqd->in_service_queue); ++ ++ if (busy) { ++ BUG_ON(atomic_read(&bfqq->ref) < 2); ++ ++ if (!resume) ++ bfq_del_bfqq_busy(bfqd, bfqq, 0); ++ else ++ bfq_deactivate_bfqq(bfqd, bfqq, 0); ++ } else if (entity->on_st) ++ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); ++ ++ /* ++ * Here we use a reference to bfqg. We don't need a refcounter ++ * as the cgroup reference will not be dropped, so that its ++ * destroy() callback will not be invoked. ++ */ ++ entity->parent = bfqg->my_entity; ++ entity->sched_data = &bfqg->sched_data; ++ ++ if (busy && resume) ++ bfq_activate_bfqq(bfqd, bfqq); ++ ++ if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) ++ bfq_schedule_dispatch(bfqd); ++} ++ ++/** ++ * __bfq_bic_change_cgroup - move @bic to @cgroup. ++ * @bfqd: the queue descriptor. ++ * @bic: the bic to move. ++ * @cgroup: the cgroup to move to. ++ * ++ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller ++ * has to make sure that the reference to cgroup is valid across the call. ++ * ++ * NOTE: an alternative approach might have been to store the current ++ * cgroup in bfqq and getting a reference to it, reducing the lookup ++ * time here, at the price of slightly more complex code. ++ */ ++static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, ++ struct bfq_io_cq *bic, ++ struct cgroup_subsys_state *css) ++{ ++ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); ++ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); ++ struct bfq_entity *entity; ++ struct bfq_group *bfqg; ++ struct bfqio_cgroup *bgrp; ++ ++ bgrp = css_to_bfqio(css); ++ ++ bfqg = bfq_find_alloc_group(bfqd, css); ++ if (async_bfqq != NULL) { ++ entity = &async_bfqq->entity; ++ ++ if (entity->sched_data != &bfqg->sched_data) { ++ bic_set_bfqq(bic, NULL, 0); ++ bfq_log_bfqq(bfqd, async_bfqq, ++ "bic_change_group: %p %d", ++ async_bfqq, atomic_read(&async_bfqq->ref)); ++ bfq_put_queue(async_bfqq); ++ } ++ } ++ ++ if (sync_bfqq != NULL) { ++ entity = &sync_bfqq->entity; ++ if (entity->sched_data != &bfqg->sched_data) ++ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); ++ } ++ ++ return bfqg; ++} ++ ++/** ++ * bfq_bic_change_cgroup - move @bic to @cgroup. ++ * @bic: the bic being migrated. ++ * @cgroup: the destination cgroup. ++ * ++ * When the task owning @bic is moved to @cgroup, @bic is immediately ++ * moved into its new parent group. ++ */ ++static void bfq_bic_change_cgroup(struct bfq_io_cq *bic, ++ struct cgroup_subsys_state *css) ++{ ++ struct bfq_data *bfqd; ++ unsigned long uninitialized_var(flags); ++ ++ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), ++ &flags); ++ if (bfqd != NULL) { ++ __bfq_bic_change_cgroup(bfqd, bic, css); ++ bfq_put_bfqd_unlock(bfqd, &flags); ++ } ++} ++ ++/** ++ * bfq_bic_update_cgroup - update the cgroup of @bic. ++ * @bic: the @bic to update. ++ * ++ * Make sure that @bic is enqueued in the cgroup of the current task. ++ * We need this in addition to moving bics during the cgroup attach ++ * phase because the task owning @bic could be at its first disk ++ * access or we may end up in the root cgroup as the result of a ++ * memory allocation failure and here we try to move to the right ++ * group. ++ * ++ * Must be called under the queue lock. It is safe to use the returned ++ * value even after the rcu_read_unlock() as the migration/destruction ++ * paths act under the queue lock too. IOW it is impossible to race with ++ * group migration/destruction and end up with an invalid group as: ++ * a) here cgroup has not yet been destroyed, nor its destroy callback ++ * has started execution, as current holds a reference to it, ++ * b) if it is destroyed after rcu_read_unlock() [after current is ++ * migrated to a different cgroup] its attach() callback will have ++ * taken care of remove all the references to the old cgroup data. ++ */ ++static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic) ++{ ++ struct bfq_data *bfqd = bic_to_bfqd(bic); ++ struct bfq_group *bfqg; ++ struct cgroup_subsys_state *css; ++ ++ BUG_ON(bfqd == NULL); ++ ++ rcu_read_lock(); ++ css = task_css(current, bfqio_cgrp_id); ++ bfqg = __bfq_bic_change_cgroup(bfqd, bic, css); ++ rcu_read_unlock(); ++ ++ return bfqg; ++} ++ ++/** ++ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. ++ * @st: the service tree being flushed. ++ */ ++static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) ++{ ++ struct bfq_entity *entity = st->first_idle; ++ ++ for (; entity != NULL; entity = st->first_idle) ++ __bfq_deactivate_entity(entity, 0); ++} ++ ++/** ++ * bfq_reparent_leaf_entity - move leaf entity to the root_group. ++ * @bfqd: the device data structure with the root group. ++ * @entity: the entity to move. ++ */ ++static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, ++ struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ ++ BUG_ON(bfqq == NULL); ++ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); ++ return; ++} ++ ++/** ++ * bfq_reparent_active_entities - move to the root group all active ++ * entities. ++ * @bfqd: the device data structure with the root group. ++ * @bfqg: the group to move from. ++ * @st: the service tree with the entities. ++ * ++ * Needs queue_lock to be taken and reference to be valid over the call. ++ */ ++static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, ++ struct bfq_group *bfqg, ++ struct bfq_service_tree *st) ++{ ++ struct rb_root *active = &st->active; ++ struct bfq_entity *entity = NULL; ++ ++ if (!RB_EMPTY_ROOT(&st->active)) ++ entity = bfq_entity_of(rb_first(active)); ++ ++ for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) ++ bfq_reparent_leaf_entity(bfqd, entity); ++ ++ if (bfqg->sched_data.in_service_entity != NULL) ++ bfq_reparent_leaf_entity(bfqd, ++ bfqg->sched_data.in_service_entity); ++ ++ return; ++} ++ ++/** ++ * bfq_destroy_group - destroy @bfqg. ++ * @bgrp: the bfqio_cgroup containing @bfqg. ++ * @bfqg: the group being destroyed. ++ * ++ * Destroy @bfqg, making sure that it is not referenced from its parent. ++ */ ++static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) ++{ ++ struct bfq_data *bfqd; ++ struct bfq_service_tree *st; ++ struct bfq_entity *entity = bfqg->my_entity; ++ unsigned long uninitialized_var(flags); ++ int i; ++ ++ hlist_del(&bfqg->group_node); ++ ++ /* ++ * Empty all service_trees belonging to this group before ++ * deactivating the group itself. ++ */ ++ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { ++ st = bfqg->sched_data.service_tree + i; ++ ++ /* ++ * The idle tree may still contain bfq_queues belonging ++ * to exited task because they never migrated to a different ++ * cgroup from the one being destroyed now. No one else ++ * can access them so it's safe to act without any lock. ++ */ ++ bfq_flush_idle_tree(st); ++ ++ /* ++ * It may happen that some queues are still active ++ * (busy) upon group destruction (if the corresponding ++ * processes have been forced to terminate). We move ++ * all the leaf entities corresponding to these queues ++ * to the root_group. ++ * Also, it may happen that the group has an entity ++ * in service, which is disconnected from the active ++ * tree: it must be moved, too. ++ * There is no need to put the sync queues, as the ++ * scheduler has taken no reference. ++ */ ++ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); ++ if (bfqd != NULL) { ++ bfq_reparent_active_entities(bfqd, bfqg, st); ++ bfq_put_bfqd_unlock(bfqd, &flags); ++ } ++ BUG_ON(!RB_EMPTY_ROOT(&st->active)); ++ BUG_ON(!RB_EMPTY_ROOT(&st->idle)); ++ } ++ BUG_ON(bfqg->sched_data.next_in_service != NULL); ++ BUG_ON(bfqg->sched_data.in_service_entity != NULL); ++ ++ /* ++ * We may race with device destruction, take extra care when ++ * dereferencing bfqg->bfqd. ++ */ ++ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); ++ if (bfqd != NULL) { ++ hlist_del(&bfqg->bfqd_node); ++ __bfq_deactivate_entity(entity, 0); ++ bfq_put_async_queues(bfqd, bfqg); ++ bfq_put_bfqd_unlock(bfqd, &flags); ++ } ++ BUG_ON(entity->tree != NULL); ++ ++ /* ++ * No need to defer the kfree() to the end of the RCU grace ++ * period: we are called from the destroy() callback of our ++ * cgroup, so we can be sure that no one is a) still using ++ * this cgroup or b) doing lookups in it. ++ */ ++ kfree(bfqg); ++} ++ ++static void bfq_end_wr_async(struct bfq_data *bfqd) ++{ ++ struct hlist_node *tmp; ++ struct bfq_group *bfqg; ++ ++ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) ++ bfq_end_wr_async_queues(bfqd, bfqg); ++ bfq_end_wr_async_queues(bfqd, bfqd->root_group); ++} ++ ++/** ++ * bfq_disconnect_groups - disconnect @bfqd from all its groups. ++ * @bfqd: the device descriptor being exited. ++ * ++ * When the device exits we just make sure that no lookup can return ++ * the now unused group structures. They will be deallocated on cgroup ++ * destruction. ++ */ ++static void bfq_disconnect_groups(struct bfq_data *bfqd) ++{ ++ struct hlist_node *tmp; ++ struct bfq_group *bfqg; ++ ++ bfq_log(bfqd, "disconnect_groups beginning"); ++ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) { ++ hlist_del(&bfqg->bfqd_node); ++ ++ __bfq_deactivate_entity(bfqg->my_entity, 0); ++ ++ /* ++ * Don't remove from the group hash, just set an ++ * invalid key. No lookups can race with the ++ * assignment as bfqd is being destroyed; this ++ * implies also that new elements cannot be added ++ * to the list. ++ */ ++ rcu_assign_pointer(bfqg->bfqd, NULL); ++ ++ bfq_log(bfqd, "disconnect_groups: put async for group %p", ++ bfqg); ++ bfq_put_async_queues(bfqd, bfqg); ++ } ++} ++ ++static inline void bfq_free_root_group(struct bfq_data *bfqd) ++{ ++ struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; ++ struct bfq_group *bfqg = bfqd->root_group; ++ ++ bfq_put_async_queues(bfqd, bfqg); ++ ++ spin_lock_irq(&bgrp->lock); ++ hlist_del_rcu(&bfqg->group_node); ++ spin_unlock_irq(&bgrp->lock); ++ ++ /* ++ * No need to synchronize_rcu() here: since the device is gone ++ * there cannot be any read-side access to its root_group. ++ */ ++ kfree(bfqg); ++} ++ ++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) ++{ ++ struct bfq_group *bfqg; ++ struct bfqio_cgroup *bgrp; ++ int i; ++ ++ bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); ++ if (bfqg == NULL) ++ return NULL; ++ ++ bfqg->entity.parent = NULL; ++ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) ++ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; ++ ++ bgrp = &bfqio_root_cgroup; ++ spin_lock_irq(&bgrp->lock); ++ rcu_assign_pointer(bfqg->bfqd, bfqd); ++ hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); ++ spin_unlock_irq(&bgrp->lock); ++ ++ return bfqg; ++} ++ ++#define SHOW_FUNCTION(__VAR) \ ++static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \ ++ struct cftype *cftype) \ ++{ \ ++ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ ++ u64 ret = -ENODEV; \ ++ \ ++ mutex_lock(&bfqio_mutex); \ ++ if (bfqio_is_removed(bgrp)) \ ++ goto out_unlock; \ ++ \ ++ spin_lock_irq(&bgrp->lock); \ ++ ret = bgrp->__VAR; \ ++ spin_unlock_irq(&bgrp->lock); \ ++ \ ++out_unlock: \ ++ mutex_unlock(&bfqio_mutex); \ ++ return ret; \ ++} ++ ++SHOW_FUNCTION(weight); ++SHOW_FUNCTION(ioprio); ++SHOW_FUNCTION(ioprio_class); ++#undef SHOW_FUNCTION ++ ++#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ ++static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\ ++ struct cftype *cftype, \ ++ u64 val) \ ++{ \ ++ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ ++ struct bfq_group *bfqg; \ ++ int ret = -EINVAL; \ ++ \ ++ if (val < (__MIN) || val > (__MAX)) \ ++ return ret; \ ++ \ ++ ret = -ENODEV; \ ++ mutex_lock(&bfqio_mutex); \ ++ if (bfqio_is_removed(bgrp)) \ ++ goto out_unlock; \ ++ ret = 0; \ ++ \ ++ spin_lock_irq(&bgrp->lock); \ ++ bgrp->__VAR = (unsigned short)val; \ ++ hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \ ++ /* \ ++ * Setting the ioprio_changed flag of the entity \ ++ * to 1 with new_##__VAR == ##__VAR would re-set \ ++ * the value of the weight to its ioprio mapping. \ ++ * Set the flag only if necessary. \ ++ */ \ ++ if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ ++ bfqg->entity.new_##__VAR = (unsigned short)val; \ ++ /* \ ++ * Make sure that the above new value has been \ ++ * stored in bfqg->entity.new_##__VAR before \ ++ * setting the ioprio_changed flag. In fact, \ ++ * this flag may be read asynchronously (in \ ++ * critical sections protected by a different \ ++ * lock than that held here), and finding this \ ++ * flag set may cause the execution of the code \ ++ * for updating parameters whose value may \ ++ * depend also on bfqg->entity.new_##__VAR (in \ ++ * __bfq_entity_update_weight_prio). \ ++ * This barrier makes sure that the new value \ ++ * of bfqg->entity.new_##__VAR is correctly \ ++ * seen in that code. \ ++ */ \ ++ smp_wmb(); \ ++ bfqg->entity.ioprio_changed = 1; \ ++ } \ ++ } \ ++ spin_unlock_irq(&bgrp->lock); \ ++ \ ++out_unlock: \ ++ mutex_unlock(&bfqio_mutex); \ ++ return ret; \ ++} ++ ++STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); ++STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); ++STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); ++#undef STORE_FUNCTION ++ ++static struct cftype bfqio_files[] = { ++ { ++ .name = "weight", ++ .read_u64 = bfqio_cgroup_weight_read, ++ .write_u64 = bfqio_cgroup_weight_write, ++ }, ++ { ++ .name = "ioprio", ++ .read_u64 = bfqio_cgroup_ioprio_read, ++ .write_u64 = bfqio_cgroup_ioprio_write, ++ }, ++ { ++ .name = "ioprio_class", ++ .read_u64 = bfqio_cgroup_ioprio_class_read, ++ .write_u64 = bfqio_cgroup_ioprio_class_write, ++ }, ++ { }, /* terminate */ ++}; ++ ++static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state ++ *parent_css) ++{ ++ struct bfqio_cgroup *bgrp; ++ ++ if (parent_css != NULL) { ++ bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); ++ if (bgrp == NULL) ++ return ERR_PTR(-ENOMEM); ++ } else ++ bgrp = &bfqio_root_cgroup; ++ ++ spin_lock_init(&bgrp->lock); ++ INIT_HLIST_HEAD(&bgrp->group_data); ++ bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; ++ bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; ++ ++ return &bgrp->css; ++} ++ ++/* ++ * We cannot support shared io contexts, as we have no means to support ++ * two tasks with the same ioc in two different groups without major rework ++ * of the main bic/bfqq data structures. By now we allow a task to change ++ * its cgroup only if it's the only owner of its ioc; the drawback of this ++ * behavior is that a group containing a task that forked using CLONE_IO ++ * will not be destroyed until the tasks sharing the ioc die. ++ */ ++static int bfqio_can_attach(struct cgroup_subsys_state *css, ++ struct cgroup_taskset *tset) ++{ ++ struct task_struct *task; ++ struct io_context *ioc; ++ int ret = 0; ++ ++ cgroup_taskset_for_each(task, tset) { ++ /* ++ * task_lock() is needed to avoid races with ++ * exit_io_context() ++ */ ++ task_lock(task); ++ ioc = task->io_context; ++ if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) ++ /* ++ * ioc == NULL means that the task is either too ++ * young or exiting: if it has still no ioc the ++ * ioc can't be shared, if the task is exiting the ++ * attach will fail anyway, no matter what we ++ * return here. ++ */ ++ ret = -EINVAL; ++ task_unlock(task); ++ if (ret) ++ break; ++ } ++ ++ return ret; ++} ++ ++static void bfqio_attach(struct cgroup_subsys_state *css, ++ struct cgroup_taskset *tset) ++{ ++ struct task_struct *task; ++ struct io_context *ioc; ++ struct io_cq *icq; ++ ++ /* ++ * IMPORTANT NOTE: The move of more than one process at a time to a ++ * new group has not yet been tested. ++ */ ++ cgroup_taskset_for_each(task, tset) { ++ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); ++ if (ioc) { ++ /* ++ * Handle cgroup change here. ++ */ ++ rcu_read_lock(); ++ hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node) ++ if (!strncmp( ++ icq->q->elevator->type->elevator_name, ++ "bfq", ELV_NAME_MAX)) ++ bfq_bic_change_cgroup(icq_to_bic(icq), ++ css); ++ rcu_read_unlock(); ++ put_io_context(ioc); ++ } ++ } ++} ++ ++static void bfqio_destroy(struct cgroup_subsys_state *css) ++{ ++ struct bfqio_cgroup *bgrp = css_to_bfqio(css); ++ struct hlist_node *tmp; ++ struct bfq_group *bfqg; ++ ++ /* ++ * Since we are destroying the cgroup, there are no more tasks ++ * referencing it, and all the RCU grace periods that may have ++ * referenced it are ended (as the destruction of the parent ++ * cgroup is RCU-safe); bgrp->group_data will not be accessed by ++ * anything else and we don't need any synchronization. ++ */ ++ hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node) ++ bfq_destroy_group(bgrp, bfqg); ++ ++ BUG_ON(!hlist_empty(&bgrp->group_data)); ++ ++ kfree(bgrp); ++} ++ ++static int bfqio_css_online(struct cgroup_subsys_state *css) ++{ ++ struct bfqio_cgroup *bgrp = css_to_bfqio(css); ++ ++ mutex_lock(&bfqio_mutex); ++ bgrp->online = true; ++ mutex_unlock(&bfqio_mutex); ++ ++ return 0; ++} ++ ++static void bfqio_css_offline(struct cgroup_subsys_state *css) ++{ ++ struct bfqio_cgroup *bgrp = css_to_bfqio(css); ++ ++ mutex_lock(&bfqio_mutex); ++ bgrp->online = false; ++ mutex_unlock(&bfqio_mutex); ++} ++ ++struct cgroup_subsys bfqio_cgrp_subsys = { ++ .css_alloc = bfqio_create, ++ .css_online = bfqio_css_online, ++ .css_offline = bfqio_css_offline, ++ .can_attach = bfqio_can_attach, ++ .attach = bfqio_attach, ++ .css_free = bfqio_destroy, ++ .legacy_cftypes = bfqio_files, ++}; ++#else ++static inline void bfq_init_entity(struct bfq_entity *entity, ++ struct bfq_group *bfqg) ++{ ++ entity->weight = entity->new_weight; ++ entity->orig_weight = entity->new_weight; ++ entity->ioprio = entity->new_ioprio; ++ entity->ioprio_class = entity->new_ioprio_class; ++ entity->sched_data = &bfqg->sched_data; ++} ++ ++static inline struct bfq_group * ++bfq_bic_update_cgroup(struct bfq_io_cq *bic) ++{ ++ struct bfq_data *bfqd = bic_to_bfqd(bic); ++ return bfqd->root_group; ++} ++ ++static inline void bfq_bfqq_move(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq, ++ struct bfq_entity *entity, ++ struct bfq_group *bfqg) ++{ ++} ++ ++static void bfq_end_wr_async(struct bfq_data *bfqd) ++{ ++ bfq_end_wr_async_queues(bfqd, bfqd->root_group); ++} ++ ++static inline void bfq_disconnect_groups(struct bfq_data *bfqd) ++{ ++ bfq_put_async_queues(bfqd, bfqd->root_group); ++} ++ ++static inline void bfq_free_root_group(struct bfq_data *bfqd) ++{ ++ kfree(bfqd->root_group); ++} ++ ++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) ++{ ++ struct bfq_group *bfqg; ++ int i; ++ ++ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); ++ if (bfqg == NULL) ++ return NULL; ++ ++ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) ++ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; ++ ++ return bfqg; ++} ++#endif +diff -Nur linux-4.1.3/block/bfq.h linux-xbian-imx6/block/bfq.h +--- linux-4.1.3/block/bfq.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/block/bfq.h 2015-07-27 23:13:03.604123194 +0200 +@@ -0,0 +1,811 @@ ++/* ++ * BFQ-v7r7 for 4.0.0: data structures and common functions prototypes. ++ * ++ * Based on ideas and code from CFQ: ++ * Copyright (C) 2003 Jens Axboe ++ * ++ * Copyright (C) 2008 Fabio Checconi ++ * Paolo Valente ++ * ++ * Copyright (C) 2010 Paolo Valente ++ */ ++ ++#ifndef _BFQ_H ++#define _BFQ_H ++ ++#include ++#include ++#include ++#include ++ ++#define BFQ_IOPRIO_CLASSES 3 ++#define BFQ_CL_IDLE_TIMEOUT (HZ/5) ++ ++#define BFQ_MIN_WEIGHT 1 ++#define BFQ_MAX_WEIGHT 1000 ++ ++#define BFQ_DEFAULT_QUEUE_IOPRIO 4 ++ ++#define BFQ_DEFAULT_GRP_WEIGHT 10 ++#define BFQ_DEFAULT_GRP_IOPRIO 0 ++#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE ++ ++struct bfq_entity; ++ ++/** ++ * struct bfq_service_tree - per ioprio_class service tree. ++ * @active: tree for active entities (i.e., those backlogged). ++ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). ++ * @first_idle: idle entity with minimum F_i. ++ * @last_idle: idle entity with maximum F_i. ++ * @vtime: scheduler virtual time. ++ * @wsum: scheduler weight sum; active and idle entities contribute to it. ++ * ++ * Each service tree represents a B-WF2Q+ scheduler on its own. Each ++ * ioprio_class has its own independent scheduler, and so its own ++ * bfq_service_tree. All the fields are protected by the queue lock ++ * of the containing bfqd. ++ */ ++struct bfq_service_tree { ++ struct rb_root active; ++ struct rb_root idle; ++ ++ struct bfq_entity *first_idle; ++ struct bfq_entity *last_idle; ++ ++ u64 vtime; ++ unsigned long wsum; ++}; ++ ++/** ++ * struct bfq_sched_data - multi-class scheduler. ++ * @in_service_entity: entity in service. ++ * @next_in_service: head-of-the-line entity in the scheduler. ++ * @service_tree: array of service trees, one per ioprio_class. ++ * ++ * bfq_sched_data is the basic scheduler queue. It supports three ++ * ioprio_classes, and can be used either as a toplevel queue or as ++ * an intermediate queue on a hierarchical setup. ++ * @next_in_service points to the active entity of the sched_data ++ * service trees that will be scheduled next. ++ * ++ * The supported ioprio_classes are the same as in CFQ, in descending ++ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. ++ * Requests from higher priority queues are served before all the ++ * requests from lower priority queues; among requests of the same ++ * queue requests are served according to B-WF2Q+. ++ * All the fields are protected by the queue lock of the containing bfqd. ++ */ ++struct bfq_sched_data { ++ struct bfq_entity *in_service_entity; ++ struct bfq_entity *next_in_service; ++ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; ++}; ++ ++/** ++ * struct bfq_weight_counter - counter of the number of all active entities ++ * with a given weight. ++ * @weight: weight of the entities that this counter refers to. ++ * @num_active: number of active entities with this weight. ++ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree ++ * and @group_weights_tree). ++ */ ++struct bfq_weight_counter { ++ short int weight; ++ unsigned int num_active; ++ struct rb_node weights_node; ++}; ++ ++/** ++ * struct bfq_entity - schedulable entity. ++ * @rb_node: service_tree member. ++ * @weight_counter: pointer to the weight counter associated with this entity. ++ * @on_st: flag, true if the entity is on a tree (either the active or ++ * the idle one of its service_tree). ++ * @finish: B-WF2Q+ finish timestamp (aka F_i). ++ * @start: B-WF2Q+ start timestamp (aka S_i). ++ * @tree: tree the entity is enqueued into; %NULL if not on a tree. ++ * @min_start: minimum start time of the (active) subtree rooted at ++ * this entity; used for O(log N) lookups into active trees. ++ * @service: service received during the last round of service. ++ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. ++ * @weight: weight of the queue ++ * @parent: parent entity, for hierarchical scheduling. ++ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the ++ * associated scheduler queue, %NULL on leaf nodes. ++ * @sched_data: the scheduler queue this entity belongs to. ++ * @ioprio: the ioprio in use. ++ * @new_weight: when a weight change is requested, the new weight value. ++ * @orig_weight: original weight, used to implement weight boosting ++ * @new_ioprio: when an ioprio change is requested, the new ioprio value. ++ * @ioprio_class: the ioprio_class in use. ++ * @new_ioprio_class: when an ioprio_class change is requested, the new ++ * ioprio_class value. ++ * @ioprio_changed: flag, true when the user requested a weight, ioprio or ++ * ioprio_class change. ++ * ++ * A bfq_entity is used to represent either a bfq_queue (leaf node in the ++ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each ++ * entity belongs to the sched_data of the parent group in the cgroup ++ * hierarchy. Non-leaf entities have also their own sched_data, stored ++ * in @my_sched_data. ++ * ++ * Each entity stores independently its priority values; this would ++ * allow different weights on different devices, but this ++ * functionality is not exported to userspace by now. Priorities and ++ * weights are updated lazily, first storing the new values into the ++ * new_* fields, then setting the @ioprio_changed flag. As soon as ++ * there is a transition in the entity state that allows the priority ++ * update to take place the effective and the requested priority ++ * values are synchronized. ++ * ++ * Unless cgroups are used, the weight value is calculated from the ++ * ioprio to export the same interface as CFQ. When dealing with ++ * ``well-behaved'' queues (i.e., queues that do not spend too much ++ * time to consume their budget and have true sequential behavior, and ++ * when there are no external factors breaking anticipation) the ++ * relative weights at each level of the cgroups hierarchy should be ++ * guaranteed. All the fields are protected by the queue lock of the ++ * containing bfqd. ++ */ ++struct bfq_entity { ++ struct rb_node rb_node; ++ struct bfq_weight_counter *weight_counter; ++ ++ int on_st; ++ ++ u64 finish; ++ u64 start; ++ ++ struct rb_root *tree; ++ ++ u64 min_start; ++ ++ unsigned long service, budget; ++ unsigned short weight, new_weight; ++ unsigned short orig_weight; ++ ++ struct bfq_entity *parent; ++ ++ struct bfq_sched_data *my_sched_data; ++ struct bfq_sched_data *sched_data; ++ ++ unsigned short ioprio, new_ioprio; ++ unsigned short ioprio_class, new_ioprio_class; ++ ++ int ioprio_changed; ++}; ++ ++struct bfq_group; ++ ++/** ++ * struct bfq_queue - leaf schedulable entity. ++ * @ref: reference counter. ++ * @bfqd: parent bfq_data. ++ * @new_bfqq: shared bfq_queue if queue is cooperating with ++ * one or more other queues. ++ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). ++ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). ++ * @sort_list: sorted list of pending requests. ++ * @next_rq: if fifo isn't expired, next request to serve. ++ * @queued: nr of requests queued in @sort_list. ++ * @allocated: currently allocated requests. ++ * @meta_pending: pending metadata requests. ++ * @fifo: fifo list of requests in sort_list. ++ * @entity: entity representing this queue in the scheduler. ++ * @max_budget: maximum budget allowed from the feedback mechanism. ++ * @budget_timeout: budget expiration (in jiffies). ++ * @dispatched: number of requests on the dispatch list or inside driver. ++ * @flags: status flags. ++ * @bfqq_list: node for active/idle bfqq list inside our bfqd. ++ * @burst_list_node: node for the device's burst list. ++ * @seek_samples: number of seeks sampled ++ * @seek_total: sum of the distances of the seeks sampled ++ * @seek_mean: mean seek distance ++ * @last_request_pos: position of the last request enqueued ++ * @requests_within_timer: number of consecutive pairs of request completion ++ * and arrival, such that the queue becomes idle ++ * after the completion, but the next request arrives ++ * within an idle time slice; used only if the queue's ++ * IO_bound has been cleared. ++ * @pid: pid of the process owning the queue, used for logging purposes. ++ * @last_wr_start_finish: start time of the current weight-raising period if ++ * the @bfq-queue is being weight-raised, otherwise ++ * finish time of the last weight-raising period ++ * @wr_cur_max_time: current max raising time for this queue ++ * @soft_rt_next_start: minimum time instant such that, only if a new ++ * request is enqueued after this time instant in an ++ * idle @bfq_queue with no outstanding requests, then ++ * the task associated with the queue it is deemed as ++ * soft real-time (see the comments to the function ++ * bfq_bfqq_softrt_next_start()) ++ * @last_idle_bklogged: time of the last transition of the @bfq_queue from ++ * idle to backlogged ++ * @service_from_backlogged: cumulative service received from the @bfq_queue ++ * since the last transition from idle to ++ * backlogged ++ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the ++ * queue is shared ++ * ++ * A bfq_queue is a leaf request queue; it can be associated with an ++ * io_context or more, if it is async or shared between cooperating ++ * processes. @cgroup holds a reference to the cgroup, to be sure that it ++ * does not disappear while a bfqq still references it (mostly to avoid ++ * races between request issuing and task migration followed by cgroup ++ * destruction). ++ * All the fields are protected by the queue lock of the containing bfqd. ++ */ ++struct bfq_queue { ++ atomic_t ref; ++ struct bfq_data *bfqd; ++ ++ /* fields for cooperating queues handling */ ++ struct bfq_queue *new_bfqq; ++ struct rb_node pos_node; ++ struct rb_root *pos_root; ++ ++ struct rb_root sort_list; ++ struct request *next_rq; ++ int queued[2]; ++ int allocated[2]; ++ int meta_pending; ++ struct list_head fifo; ++ ++ struct bfq_entity entity; ++ ++ unsigned long max_budget; ++ unsigned long budget_timeout; ++ ++ int dispatched; ++ ++ unsigned int flags; ++ ++ struct list_head bfqq_list; ++ ++ struct hlist_node burst_list_node; ++ ++ unsigned int seek_samples; ++ u64 seek_total; ++ sector_t seek_mean; ++ sector_t last_request_pos; ++ ++ unsigned int requests_within_timer; ++ ++ pid_t pid; ++ struct bfq_io_cq *bic; ++ ++ /* weight-raising fields */ ++ unsigned long wr_cur_max_time; ++ unsigned long soft_rt_next_start; ++ unsigned long last_wr_start_finish; ++ unsigned int wr_coeff; ++ unsigned long last_idle_bklogged; ++ unsigned long service_from_backlogged; ++}; ++ ++/** ++ * struct bfq_ttime - per process thinktime stats. ++ * @ttime_total: total process thinktime ++ * @ttime_samples: number of thinktime samples ++ * @ttime_mean: average process thinktime ++ */ ++struct bfq_ttime { ++ unsigned long last_end_request; ++ ++ unsigned long ttime_total; ++ unsigned long ttime_samples; ++ unsigned long ttime_mean; ++}; ++ ++/** ++ * struct bfq_io_cq - per (request_queue, io_context) structure. ++ * @icq: associated io_cq structure ++ * @bfqq: array of two process queues, the sync and the async ++ * @ttime: associated @bfq_ttime struct ++ * @wr_time_left: snapshot of the time left before weight raising ends ++ * for the sync queue associated to this process; this ++ * snapshot is taken to remember this value while the weight ++ * raising is suspended because the queue is merged with a ++ * shared queue, and is used to set @raising_cur_max_time ++ * when the queue is split from the shared queue and its ++ * weight is raised again ++ * @saved_idle_window: same purpose as the previous field for the idle ++ * window ++ * @saved_IO_bound: same purpose as the previous two fields for the I/O ++ * bound classification of a queue ++ * @saved_in_large_burst: same purpose as the previous fields for the ++ * value of the field keeping the queue's belonging ++ * to a large burst ++ * @was_in_burst_list: true if the queue belonged to a burst list ++ * before its merge with another cooperating queue ++ * @cooperations: counter of consecutive successful queue merges underwent ++ * by any of the process' @bfq_queues ++ * @failed_cooperations: counter of consecutive failed queue merges of any ++ * of the process' @bfq_queues ++ */ ++struct bfq_io_cq { ++ struct io_cq icq; /* must be the first member */ ++ struct bfq_queue *bfqq[2]; ++ struct bfq_ttime ttime; ++ int ioprio; ++ ++ unsigned int wr_time_left; ++ bool saved_idle_window; ++ bool saved_IO_bound; ++ ++ bool saved_in_large_burst; ++ bool was_in_burst_list; ++ ++ unsigned int cooperations; ++ unsigned int failed_cooperations; ++}; ++ ++enum bfq_device_speed { ++ BFQ_BFQD_FAST, ++ BFQ_BFQD_SLOW, ++}; ++ ++/** ++ * struct bfq_data - per device data structure. ++ * @queue: request queue for the managed device. ++ * @root_group: root bfq_group for the device. ++ * @rq_pos_tree: rbtree sorted by next_request position, used when ++ * determining if two or more queues have interleaving ++ * requests (see bfq_close_cooperator()). ++ * @active_numerous_groups: number of bfq_groups containing more than one ++ * active @bfq_entity. ++ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by ++ * weight. Used to keep track of whether all @bfq_queues ++ * have the same weight. The tree contains one counter ++ * for each distinct weight associated to some active ++ * and not weight-raised @bfq_queue (see the comments to ++ * the functions bfq_weights_tree_[add|remove] for ++ * further details). ++ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted ++ * by weight. Used to keep track of whether all ++ * @bfq_groups have the same weight. The tree contains ++ * one counter for each distinct weight associated to ++ * some active @bfq_group (see the comments to the ++ * functions bfq_weights_tree_[add|remove] for further ++ * details). ++ * @busy_queues: number of bfq_queues containing requests (including the ++ * queue in service, even if it is idling). ++ * @busy_in_flight_queues: number of @bfq_queues containing pending or ++ * in-flight requests, plus the @bfq_queue in ++ * service, even if idle but waiting for the ++ * possible arrival of its next sync request. This ++ * field is updated only if the device is rotational, ++ * but used only if the device is also NCQ-capable. ++ * The reason why the field is updated also for non- ++ * NCQ-capable rotational devices is related to the ++ * fact that the value of @hw_tag may be set also ++ * later than when busy_in_flight_queues may need to ++ * be incremented for the first time(s). Taking also ++ * this possibility into account, to avoid unbalanced ++ * increments/decrements, would imply more overhead ++ * than just updating busy_in_flight_queues ++ * regardless of the value of @hw_tag. ++ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues ++ * (that is, seeky queues that expired ++ * for budget timeout at least once) ++ * containing pending or in-flight ++ * requests, including the in-service ++ * @bfq_queue if constantly seeky. This ++ * field is updated only if the device ++ * is rotational, but used only if the ++ * device is also NCQ-capable (see the ++ * comments to @busy_in_flight_queues). ++ * @wr_busy_queues: number of weight-raised busy @bfq_queues. ++ * @queued: number of queued requests. ++ * @rq_in_driver: number of requests dispatched and waiting for completion. ++ * @sync_flight: number of sync requests in the driver. ++ * @max_rq_in_driver: max number of reqs in driver in the last ++ * @hw_tag_samples completed requests. ++ * @hw_tag_samples: nr of samples used to calculate hw_tag. ++ * @hw_tag: flag set to one if the driver is showing a queueing behavior. ++ * @budgets_assigned: number of budgets assigned. ++ * @idle_slice_timer: timer set when idling for the next sequential request ++ * from the queue in service. ++ * @unplug_work: delayed work to restart dispatching on the request queue. ++ * @in_service_queue: bfq_queue in service. ++ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue. ++ * @last_position: on-disk position of the last served request. ++ * @last_budget_start: beginning of the last budget. ++ * @last_idling_start: beginning of the last idle slice. ++ * @peak_rate: peak transfer rate observed for a budget. ++ * @peak_rate_samples: number of samples used to calculate @peak_rate. ++ * @bfq_max_budget: maximum budget allotted to a bfq_queue before ++ * rescheduling. ++ * @group_list: list of all the bfq_groups active on the device. ++ * @active_list: list of all the bfq_queues active on the device. ++ * @idle_list: list of all the bfq_queues idle on the device. ++ * @bfq_quantum: max number of requests dispatched per dispatch round. ++ * @bfq_fifo_expire: timeout for async/sync requests; when it expires ++ * requests are served in fifo order. ++ * @bfq_back_penalty: weight of backward seeks wrt forward ones. ++ * @bfq_back_max: maximum allowed backward seek. ++ * @bfq_slice_idle: maximum idling time. ++ * @bfq_user_max_budget: user-configured max budget value ++ * (0 for auto-tuning). ++ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to ++ * async queues. ++ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to ++ * to prevent seeky queues to impose long latencies to well ++ * behaved ones (this also implies that seeky queues cannot ++ * receive guarantees in the service domain; after a timeout ++ * they are charged for the whole allocated budget, to try ++ * to preserve a behavior reasonably fair among them, but ++ * without service-domain guarantees). ++ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is ++ * no more granted any weight-raising. ++ * @bfq_failed_cooperations: number of consecutive failed cooperation ++ * chances after which weight-raising is restored ++ * to a queue subject to more than bfq_coop_thresh ++ * queue merges. ++ * @bfq_requests_within_timer: number of consecutive requests that must be ++ * issued within the idle time slice to set ++ * again idling to a queue which was marked as ++ * non-I/O-bound (see the definition of the ++ * IO_bound flag for further details). ++ * @last_ins_in_burst: last time at which a queue entered the current ++ * burst of queues being activated shortly after ++ * each other; for more details about this and the ++ * following parameters related to a burst of ++ * activations, see the comments to the function ++ * @bfq_handle_burst. ++ * @bfq_burst_interval: reference time interval used to decide whether a ++ * queue has been activated shortly after ++ * @last_ins_in_burst. ++ * @burst_size: number of queues in the current burst of queue activations. ++ * @bfq_large_burst_thresh: maximum burst size above which the current ++ * queue-activation burst is deemed as 'large'. ++ * @large_burst: true if a large queue-activation burst is in progress. ++ * @burst_list: head of the burst list (as for the above fields, more details ++ * in the comments to the function bfq_handle_burst). ++ * @low_latency: if set to true, low-latency heuristics are enabled. ++ * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised ++ * queue is multiplied. ++ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies). ++ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes. ++ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising ++ * may be reactivated for a queue (in jiffies). ++ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals ++ * after which weight-raising may be ++ * reactivated for an already busy queue ++ * (in jiffies). ++ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, ++ * sectors per seconds. ++ * @RT_prod: cached value of the product R*T used for computing the maximum ++ * duration of the weight raising automatically. ++ * @device_speed: device-speed class for the low-latency heuristic. ++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions. ++ * ++ * All the fields are protected by the @queue lock. ++ */ ++struct bfq_data { ++ struct request_queue *queue; ++ ++ struct bfq_group *root_group; ++ struct rb_root rq_pos_tree; ++ ++#ifdef CONFIG_CGROUP_BFQIO ++ int active_numerous_groups; ++#endif ++ ++ struct rb_root queue_weights_tree; ++ struct rb_root group_weights_tree; ++ ++ int busy_queues; ++ int busy_in_flight_queues; ++ int const_seeky_busy_in_flight_queues; ++ int wr_busy_queues; ++ int queued; ++ int rq_in_driver; ++ int sync_flight; ++ ++ int max_rq_in_driver; ++ int hw_tag_samples; ++ int hw_tag; ++ ++ int budgets_assigned; ++ ++ struct timer_list idle_slice_timer; ++ struct work_struct unplug_work; ++ ++ struct bfq_queue *in_service_queue; ++ struct bfq_io_cq *in_service_bic; ++ ++ sector_t last_position; ++ ++ ktime_t last_budget_start; ++ ktime_t last_idling_start; ++ int peak_rate_samples; ++ u64 peak_rate; ++ unsigned long bfq_max_budget; ++ ++ struct hlist_head group_list; ++ struct list_head active_list; ++ struct list_head idle_list; ++ ++ unsigned int bfq_quantum; ++ unsigned int bfq_fifo_expire[2]; ++ unsigned int bfq_back_penalty; ++ unsigned int bfq_back_max; ++ unsigned int bfq_slice_idle; ++ u64 bfq_class_idle_last_service; ++ ++ unsigned int bfq_user_max_budget; ++ unsigned int bfq_max_budget_async_rq; ++ unsigned int bfq_timeout[2]; ++ ++ unsigned int bfq_coop_thresh; ++ unsigned int bfq_failed_cooperations; ++ unsigned int bfq_requests_within_timer; ++ ++ unsigned long last_ins_in_burst; ++ unsigned long bfq_burst_interval; ++ int burst_size; ++ unsigned long bfq_large_burst_thresh; ++ bool large_burst; ++ struct hlist_head burst_list; ++ ++ bool low_latency; ++ ++ /* parameters of the low_latency heuristics */ ++ unsigned int bfq_wr_coeff; ++ unsigned int bfq_wr_max_time; ++ unsigned int bfq_wr_rt_max_time; ++ unsigned int bfq_wr_min_idle_time; ++ unsigned long bfq_wr_min_inter_arr_async; ++ unsigned int bfq_wr_max_softrt_rate; ++ u64 RT_prod; ++ enum bfq_device_speed device_speed; ++ ++ struct bfq_queue oom_bfqq; ++}; ++ ++enum bfqq_state_flags { ++ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ ++ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ ++ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ ++ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ ++ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ ++ BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ ++ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ ++ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ ++ BFQ_BFQQ_FLAG_IO_bound, /* ++ * bfqq has timed-out at least once ++ * having consumed at most 2/10 of ++ * its budget ++ */ ++ BFQ_BFQQ_FLAG_in_large_burst, /* ++ * bfqq activated in a large burst, ++ * see comments to bfq_handle_burst. ++ */ ++ BFQ_BFQQ_FLAG_constantly_seeky, /* ++ * bfqq has proved to be slow and ++ * seeky until budget timeout ++ */ ++ BFQ_BFQQ_FLAG_softrt_update, /* ++ * may need softrt-next-start ++ * update ++ */ ++ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ ++ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ ++ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ ++}; ++ ++#define BFQ_BFQQ_FNS(name) \ ++static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ ++{ \ ++ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ ++} \ ++static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ ++{ \ ++ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ ++} \ ++static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ ++{ \ ++ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ ++} ++ ++BFQ_BFQQ_FNS(busy); ++BFQ_BFQQ_FNS(wait_request); ++BFQ_BFQQ_FNS(must_alloc); ++BFQ_BFQQ_FNS(fifo_expire); ++BFQ_BFQQ_FNS(idle_window); ++BFQ_BFQQ_FNS(prio_changed); ++BFQ_BFQQ_FNS(sync); ++BFQ_BFQQ_FNS(budget_new); ++BFQ_BFQQ_FNS(IO_bound); ++BFQ_BFQQ_FNS(in_large_burst); ++BFQ_BFQQ_FNS(constantly_seeky); ++BFQ_BFQQ_FNS(coop); ++BFQ_BFQQ_FNS(split_coop); ++BFQ_BFQQ_FNS(just_split); ++BFQ_BFQQ_FNS(softrt_update); ++#undef BFQ_BFQQ_FNS ++ ++/* Logging facilities. */ ++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ ++ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) ++ ++#define bfq_log(bfqd, fmt, args...) \ ++ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) ++ ++/* Expiration reasons. */ ++enum bfqq_expiration { ++ BFQ_BFQQ_TOO_IDLE = 0, /* ++ * queue has been idling for ++ * too long ++ */ ++ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ ++ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ ++ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ ++}; ++ ++#ifdef CONFIG_CGROUP_BFQIO ++/** ++ * struct bfq_group - per (device, cgroup) data structure. ++ * @entity: schedulable entity to insert into the parent group sched_data. ++ * @sched_data: own sched_data, to contain child entities (they may be ++ * both bfq_queues and bfq_groups). ++ * @group_node: node to be inserted into the bfqio_cgroup->group_data ++ * list of the containing cgroup's bfqio_cgroup. ++ * @bfqd_node: node to be inserted into the @bfqd->group_list list ++ * of the groups active on the same device; used for cleanup. ++ * @bfqd: the bfq_data for the device this group acts upon. ++ * @async_bfqq: array of async queues for all the tasks belonging to ++ * the group, one queue per ioprio value per ioprio_class, ++ * except for the idle class that has only one queue. ++ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). ++ * @my_entity: pointer to @entity, %NULL for the toplevel group; used ++ * to avoid too many special cases during group creation/ ++ * migration. ++ * @active_entities: number of active entities belonging to the group; ++ * unused for the root group. Used to know whether there ++ * are groups with more than one active @bfq_entity ++ * (see the comments to the function ++ * bfq_bfqq_must_not_expire()). ++ * ++ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup ++ * there is a set of bfq_groups, each one collecting the lower-level ++ * entities belonging to the group that are acting on the same device. ++ * ++ * Locking works as follows: ++ * o @group_node is protected by the bfqio_cgroup lock, and is accessed ++ * via RCU from its readers. ++ * o @bfqd is protected by the queue lock, RCU is used to access it ++ * from the readers. ++ * o All the other fields are protected by the @bfqd queue lock. ++ */ ++struct bfq_group { ++ struct bfq_entity entity; ++ struct bfq_sched_data sched_data; ++ ++ struct hlist_node group_node; ++ struct hlist_node bfqd_node; ++ ++ void *bfqd; ++ ++ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; ++ struct bfq_queue *async_idle_bfqq; ++ ++ struct bfq_entity *my_entity; ++ ++ int active_entities; ++}; ++ ++/** ++ * struct bfqio_cgroup - bfq cgroup data structure. ++ * @css: subsystem state for bfq in the containing cgroup. ++ * @online: flag marked when the subsystem is inserted. ++ * @weight: cgroup weight. ++ * @ioprio: cgroup ioprio. ++ * @ioprio_class: cgroup ioprio_class. ++ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. ++ * @group_data: list containing the bfq_group belonging to this cgroup. ++ * ++ * @group_data is accessed using RCU, with @lock protecting the updates, ++ * @ioprio and @ioprio_class are protected by @lock. ++ */ ++struct bfqio_cgroup { ++ struct cgroup_subsys_state css; ++ bool online; ++ ++ unsigned short weight, ioprio, ioprio_class; ++ ++ spinlock_t lock; ++ struct hlist_head group_data; ++}; ++#else ++struct bfq_group { ++ struct bfq_sched_data sched_data; ++ ++ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; ++ struct bfq_queue *async_idle_bfqq; ++}; ++#endif ++ ++static inline struct bfq_service_tree * ++bfq_entity_service_tree(struct bfq_entity *entity) ++{ ++ struct bfq_sched_data *sched_data = entity->sched_data; ++ unsigned int idx = entity->ioprio_class - 1; ++ ++ BUG_ON(idx >= BFQ_IOPRIO_CLASSES); ++ BUG_ON(sched_data == NULL); ++ ++ return sched_data->service_tree + idx; ++} ++ ++static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, ++ bool is_sync) ++{ ++ return bic->bfqq[is_sync]; ++} ++ ++static inline void bic_set_bfqq(struct bfq_io_cq *bic, ++ struct bfq_queue *bfqq, bool is_sync) ++{ ++ bic->bfqq[is_sync] = bfqq; ++} ++ ++static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) ++{ ++ return bic->icq.q->elevator->elevator_data; ++} ++ ++/** ++ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. ++ * @ptr: a pointer to a bfqd. ++ * @flags: storage for the flags to be saved. ++ * ++ * This function allows bfqg->bfqd to be protected by the ++ * queue lock of the bfqd they reference; the pointer is dereferenced ++ * under RCU, so the storage for bfqd is assured to be safe as long ++ * as the RCU read side critical section does not end. After the ++ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be ++ * sure that no other writer accessed it. If we raced with a writer, ++ * the function returns NULL, with the queue unlocked, otherwise it ++ * returns the dereferenced pointer, with the queue locked. ++ */ ++static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, ++ unsigned long *flags) ++{ ++ struct bfq_data *bfqd; ++ ++ rcu_read_lock(); ++ bfqd = rcu_dereference(*(struct bfq_data **)ptr); ++ ++ if (bfqd != NULL) { ++ spin_lock_irqsave(bfqd->queue->queue_lock, *flags); ++ if (*ptr == bfqd) ++ goto out; ++ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); ++ } ++ ++ bfqd = NULL; ++out: ++ rcu_read_unlock(); ++ return bfqd; ++} ++ ++static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, ++ unsigned long *flags) ++{ ++ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); ++} ++ ++static void bfq_changed_ioprio(struct bfq_io_cq *bic); ++static void bfq_put_queue(struct bfq_queue *bfqq); ++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); ++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, ++ struct bfq_group *bfqg, int is_sync, ++ struct bfq_io_cq *bic, gfp_t gfp_mask); ++static void bfq_end_wr_async_queues(struct bfq_data *bfqd, ++ struct bfq_group *bfqg); ++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); ++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); ++ ++#endif /* _BFQ_H */ +diff -Nur linux-4.1.3/block/bfq-ioc.c linux-xbian-imx6/block/bfq-ioc.c +--- linux-4.1.3/block/bfq-ioc.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/block/bfq-ioc.c 2015-07-27 23:13:03.600137415 +0200 +@@ -0,0 +1,36 @@ ++/* ++ * BFQ: I/O context handling. ++ * ++ * Based on ideas and code from CFQ: ++ * Copyright (C) 2003 Jens Axboe ++ * ++ * Copyright (C) 2008 Fabio Checconi ++ * Paolo Valente ++ * ++ * Copyright (C) 2010 Paolo Valente ++ */ ++ ++/** ++ * icq_to_bic - convert iocontext queue structure to bfq_io_cq. ++ * @icq: the iocontext queue. ++ */ ++static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq) ++{ ++ /* bic->icq is the first member, %NULL will convert to %NULL */ ++ return container_of(icq, struct bfq_io_cq, icq); ++} ++ ++/** ++ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd. ++ * @bfqd: the lookup key. ++ * @ioc: the io_context of the process doing I/O. ++ * ++ * Queue lock must be held. ++ */ ++static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, ++ struct io_context *ioc) ++{ ++ if (ioc) ++ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue)); ++ return NULL; ++} +diff -Nur linux-4.1.3/block/bfq-iosched.c linux-xbian-imx6/block/bfq-iosched.c +--- linux-4.1.3/block/bfq-iosched.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/block/bfq-iosched.c 2015-07-27 23:13:03.600137415 +0200 +@@ -0,0 +1,4223 @@ ++/* ++ * Budget Fair Queueing (BFQ) disk scheduler. ++ * ++ * Based on ideas and code from CFQ: ++ * Copyright (C) 2003 Jens Axboe ++ * ++ * Copyright (C) 2008 Fabio Checconi ++ * Paolo Valente ++ * ++ * Copyright (C) 2010 Paolo Valente ++ * ++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ ++ * file. ++ * ++ * BFQ is a proportional-share storage-I/O scheduling algorithm based on ++ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, ++ * measured in number of sectors, to processes instead of time slices. The ++ * device is not granted to the in-service process for a given time slice, ++ * but until it has exhausted its assigned budget. This change from the time ++ * to the service domain allows BFQ to distribute the device throughput ++ * among processes as desired, without any distortion due to ZBR, workload ++ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, ++ * called B-WF2Q+, to schedule processes according to their budgets. More ++ * precisely, BFQ schedules queues associated to processes. Thanks to the ++ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to ++ * I/O-bound processes issuing sequential requests (to boost the ++ * throughput), and yet guarantee a low latency to interactive and soft ++ * real-time applications. ++ * ++ * BFQ is described in [1], where also a reference to the initial, more ++ * theoretical paper on BFQ can be found. The interested reader can find ++ * in the latter paper full details on the main algorithm, as well as ++ * formulas of the guarantees and formal proofs of all the properties. ++ * With respect to the version of BFQ presented in these papers, this ++ * implementation adds a few more heuristics, such as the one that ++ * guarantees a low latency to soft real-time applications, and a ++ * hierarchical extension based on H-WF2Q+. ++ * ++ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with ++ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) ++ * complexity derives from the one introduced with EEVDF in [3]. ++ * ++ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness ++ * with the BFQ Disk I/O Scheduler'', ++ * Proceedings of the 5th Annual International Systems and Storage ++ * Conference (SYSTOR '12), June 2012. ++ * ++ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf ++ * ++ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing ++ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, ++ * Oct 1997. ++ * ++ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz ++ * ++ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline ++ * First: A Flexible and Accurate Mechanism for Proportional Share ++ * Resource Allocation,'' technical report. ++ * ++ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "bfq.h" ++#include "blk.h" ++ ++/* Max number of dispatches in one round of service. */ ++static const int bfq_quantum = 4; ++ ++/* Expiration time of sync (0) and async (1) requests, in jiffies. */ ++static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; ++ ++/* Maximum backwards seek, in KiB. */ ++static const int bfq_back_max = 16 * 1024; ++ ++/* Penalty of a backwards seek, in number of sectors. */ ++static const int bfq_back_penalty = 2; ++ ++/* Idling period duration, in jiffies. */ ++static int bfq_slice_idle = HZ / 125; ++ ++/* Default maximum budget values, in sectors and number of requests. */ ++static const int bfq_default_max_budget = 16 * 1024; ++static const int bfq_max_budget_async_rq = 4; ++ ++/* ++ * Async to sync throughput distribution is controlled as follows: ++ * when an async request is served, the entity is charged the number ++ * of sectors of the request, multiplied by the factor below ++ */ ++static const int bfq_async_charge_factor = 10; ++ ++/* Default timeout values, in jiffies, approximating CFQ defaults. */ ++static const int bfq_timeout_sync = HZ / 8; ++static int bfq_timeout_async = HZ / 25; ++ ++struct kmem_cache *bfq_pool; ++ ++/* Below this threshold (in ms), we consider thinktime immediate. */ ++#define BFQ_MIN_TT 2 ++ ++/* hw_tag detection: parallel requests threshold and min samples needed. */ ++#define BFQ_HW_QUEUE_THRESHOLD 4 ++#define BFQ_HW_QUEUE_SAMPLES 32 ++ ++#define BFQQ_SEEK_THR (sector_t)(8 * 1024) ++#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) ++ ++/* Min samples used for peak rate estimation (for autotuning). */ ++#define BFQ_PEAK_RATE_SAMPLES 32 ++ ++/* Shift used for peak rate fixed precision calculations. */ ++#define BFQ_RATE_SHIFT 16 ++ ++/* ++ * By default, BFQ computes the duration of the weight raising for ++ * interactive applications automatically, using the following formula: ++ * duration = (R / r) * T, where r is the peak rate of the device, and ++ * R and T are two reference parameters. ++ * In particular, R is the peak rate of the reference device (see below), ++ * and T is a reference time: given the systems that are likely to be ++ * installed on the reference device according to its speed class, T is ++ * about the maximum time needed, under BFQ and while reading two files in ++ * parallel, to load typical large applications on these systems. ++ * In practice, the slower/faster the device at hand is, the more/less it ++ * takes to load applications with respect to the reference device. ++ * Accordingly, the longer/shorter BFQ grants weight raising to interactive ++ * applications. ++ * ++ * BFQ uses four different reference pairs (R, T), depending on: ++ * . whether the device is rotational or non-rotational; ++ * . whether the device is slow, such as old or portable HDDs, as well as ++ * SD cards, or fast, such as newer HDDs and SSDs. ++ * ++ * The device's speed class is dynamically (re)detected in ++ * bfq_update_peak_rate() every time the estimated peak rate is updated. ++ * ++ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] ++ * are the reference values for a slow/fast rotational device, whereas ++ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for ++ * a slow/fast non-rotational device. Finally, device_speed_thresh are the ++ * thresholds used to switch between speed classes. ++ * Both the reference peak rates and the thresholds are measured in ++ * sectors/usec, left-shifted by BFQ_RATE_SHIFT. ++ */ ++static int R_slow[2] = {1536, 10752}; ++static int R_fast[2] = {17415, 34791}; ++/* ++ * To improve readability, a conversion function is used to initialize the ++ * following arrays, which entails that they can be initialized only in a ++ * function. ++ */ ++static int T_slow[2]; ++static int T_fast[2]; ++static int device_speed_thresh[2]; ++ ++#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ ++ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) ++ ++#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) ++#define RQ_BFQQ(rq) ((rq)->elv.priv[1]) ++ ++static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); ++ ++#include "bfq-ioc.c" ++#include "bfq-sched.c" ++#include "bfq-cgroup.c" ++ ++#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ ++ IOPRIO_CLASS_IDLE) ++#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ ++ IOPRIO_CLASS_RT) ++ ++#define bfq_sample_valid(samples) ((samples) > 80) ++ ++/* ++ * We regard a request as SYNC, if either it's a read or has the SYNC bit ++ * set (in which case it could also be a direct WRITE). ++ */ ++static inline int bfq_bio_sync(struct bio *bio) ++{ ++ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * Scheduler run of queue, if there are requests pending and no one in the ++ * driver that will restart queueing. ++ */ ++static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) ++{ ++ if (bfqd->queued != 0) { ++ bfq_log(bfqd, "schedule dispatch"); ++ kblockd_schedule_work(&bfqd->unplug_work); ++ } ++} ++ ++/* ++ * Lifted from AS - choose which of rq1 and rq2 that is best served now. ++ * We choose the request that is closesr to the head right now. Distance ++ * behind the head is penalized and only allowed to a certain extent. ++ */ ++static struct request *bfq_choose_req(struct bfq_data *bfqd, ++ struct request *rq1, ++ struct request *rq2, ++ sector_t last) ++{ ++ sector_t s1, s2, d1 = 0, d2 = 0; ++ unsigned long back_max; ++#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ ++#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ ++ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ ++ ++ if (rq1 == NULL || rq1 == rq2) ++ return rq2; ++ if (rq2 == NULL) ++ return rq1; ++ ++ if (rq_is_sync(rq1) && !rq_is_sync(rq2)) ++ return rq1; ++ else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) ++ return rq2; ++ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) ++ return rq1; ++ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) ++ return rq2; ++ ++ s1 = blk_rq_pos(rq1); ++ s2 = blk_rq_pos(rq2); ++ ++ /* ++ * By definition, 1KiB is 2 sectors. ++ */ ++ back_max = bfqd->bfq_back_max * 2; ++ ++ /* ++ * Strict one way elevator _except_ in the case where we allow ++ * short backward seeks which are biased as twice the cost of a ++ * similar forward seek. ++ */ ++ if (s1 >= last) ++ d1 = s1 - last; ++ else if (s1 + back_max >= last) ++ d1 = (last - s1) * bfqd->bfq_back_penalty; ++ else ++ wrap |= BFQ_RQ1_WRAP; ++ ++ if (s2 >= last) ++ d2 = s2 - last; ++ else if (s2 + back_max >= last) ++ d2 = (last - s2) * bfqd->bfq_back_penalty; ++ else ++ wrap |= BFQ_RQ2_WRAP; ++ ++ /* Found required data */ ++ ++ /* ++ * By doing switch() on the bit mask "wrap" we avoid having to ++ * check two variables for all permutations: --> faster! ++ */ ++ switch (wrap) { ++ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ ++ if (d1 < d2) ++ return rq1; ++ else if (d2 < d1) ++ return rq2; ++ else { ++ if (s1 >= s2) ++ return rq1; ++ else ++ return rq2; ++ } ++ ++ case BFQ_RQ2_WRAP: ++ return rq1; ++ case BFQ_RQ1_WRAP: ++ return rq2; ++ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ ++ default: ++ /* ++ * Since both rqs are wrapped, ++ * start with the one that's further behind head ++ * (--> only *one* back seek required), ++ * since back seek takes more time than forward. ++ */ ++ if (s1 <= s2) ++ return rq1; ++ else ++ return rq2; ++ } ++} ++ ++static struct bfq_queue * ++bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, ++ sector_t sector, struct rb_node **ret_parent, ++ struct rb_node ***rb_link) ++{ ++ struct rb_node **p, *parent; ++ struct bfq_queue *bfqq = NULL; ++ ++ parent = NULL; ++ p = &root->rb_node; ++ while (*p) { ++ struct rb_node **n; ++ ++ parent = *p; ++ bfqq = rb_entry(parent, struct bfq_queue, pos_node); ++ ++ /* ++ * Sort strictly based on sector. Smallest to the left, ++ * largest to the right. ++ */ ++ if (sector > blk_rq_pos(bfqq->next_rq)) ++ n = &(*p)->rb_right; ++ else if (sector < blk_rq_pos(bfqq->next_rq)) ++ n = &(*p)->rb_left; ++ else ++ break; ++ p = n; ++ bfqq = NULL; ++ } ++ ++ *ret_parent = parent; ++ if (rb_link) ++ *rb_link = p; ++ ++ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", ++ (long long unsigned)sector, ++ bfqq != NULL ? bfqq->pid : 0); ++ ++ return bfqq; ++} ++ ++static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ struct rb_node **p, *parent; ++ struct bfq_queue *__bfqq; ++ ++ if (bfqq->pos_root != NULL) { ++ rb_erase(&bfqq->pos_node, bfqq->pos_root); ++ bfqq->pos_root = NULL; ++ } ++ ++ if (bfq_class_idle(bfqq)) ++ return; ++ if (!bfqq->next_rq) ++ return; ++ ++ bfqq->pos_root = &bfqd->rq_pos_tree; ++ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, ++ blk_rq_pos(bfqq->next_rq), &parent, &p); ++ if (__bfqq == NULL) { ++ rb_link_node(&bfqq->pos_node, parent, p); ++ rb_insert_color(&bfqq->pos_node, bfqq->pos_root); ++ } else ++ bfqq->pos_root = NULL; ++} ++ ++/* ++ * Tell whether there are active queues or groups with differentiated weights. ++ */ ++static inline bool bfq_differentiated_weights(struct bfq_data *bfqd) ++{ ++ BUG_ON(!bfqd->hw_tag); ++ /* ++ * For weights to differ, at least one of the trees must contain ++ * at least two nodes. ++ */ ++ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && ++ (bfqd->queue_weights_tree.rb_node->rb_left || ++ bfqd->queue_weights_tree.rb_node->rb_right) ++#ifdef CONFIG_CGROUP_BFQIO ++ ) || ++ (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && ++ (bfqd->group_weights_tree.rb_node->rb_left || ++ bfqd->group_weights_tree.rb_node->rb_right) ++#endif ++ ); ++} ++ ++/* ++ * If the weight-counter tree passed as input contains no counter for ++ * the weight of the input entity, then add that counter; otherwise just ++ * increment the existing counter. ++ * ++ * Note that weight-counter trees contain few nodes in mostly symmetric ++ * scenarios. For example, if all queues have the same weight, then the ++ * weight-counter tree for the queues may contain at most one node. ++ * This holds even if low_latency is on, because weight-raised queues ++ * are not inserted in the tree. ++ * In most scenarios, the rate at which nodes are created/destroyed ++ * should be low too. ++ */ ++static void bfq_weights_tree_add(struct bfq_data *bfqd, ++ struct bfq_entity *entity, ++ struct rb_root *root) ++{ ++ struct rb_node **new = &(root->rb_node), *parent = NULL; ++ ++ /* ++ * Do not insert if: ++ * - the device does not support queueing; ++ * - the entity is already associated with a counter, which happens if: ++ * 1) the entity is associated with a queue, 2) a request arrival ++ * has caused the queue to become both non-weight-raised, and hence ++ * change its weight, and backlogged; in this respect, each ++ * of the two events causes an invocation of this function, ++ * 3) this is the invocation of this function caused by the second ++ * event. This second invocation is actually useless, and we handle ++ * this fact by exiting immediately. More efficient or clearer ++ * solutions might possibly be adopted. ++ */ ++ if (!bfqd->hw_tag || entity->weight_counter) ++ return; ++ ++ while (*new) { ++ struct bfq_weight_counter *__counter = container_of(*new, ++ struct bfq_weight_counter, ++ weights_node); ++ parent = *new; ++ ++ if (entity->weight == __counter->weight) { ++ entity->weight_counter = __counter; ++ goto inc_counter; ++ } ++ if (entity->weight < __counter->weight) ++ new = &((*new)->rb_left); ++ else ++ new = &((*new)->rb_right); ++ } ++ ++ entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), ++ GFP_ATOMIC); ++ entity->weight_counter->weight = entity->weight; ++ rb_link_node(&entity->weight_counter->weights_node, parent, new); ++ rb_insert_color(&entity->weight_counter->weights_node, root); ++ ++inc_counter: ++ entity->weight_counter->num_active++; ++} ++ ++/* ++ * Decrement the weight counter associated with the entity, and, if the ++ * counter reaches 0, remove the counter from the tree. ++ * See the comments to the function bfq_weights_tree_add() for considerations ++ * about overhead. ++ */ ++static void bfq_weights_tree_remove(struct bfq_data *bfqd, ++ struct bfq_entity *entity, ++ struct rb_root *root) ++{ ++ /* ++ * Check whether the entity is actually associated with a counter. ++ * In fact, the device may not be considered NCQ-capable for a while, ++ * which implies that no insertion in the weight trees is performed, ++ * after which the device may start to be deemed NCQ-capable, and hence ++ * this function may start to be invoked. This may cause the function ++ * to be invoked for entities that are not associated with any counter. ++ */ ++ if (!entity->weight_counter) ++ return; ++ ++ BUG_ON(RB_EMPTY_ROOT(root)); ++ BUG_ON(entity->weight_counter->weight != entity->weight); ++ ++ BUG_ON(!entity->weight_counter->num_active); ++ entity->weight_counter->num_active--; ++ if (entity->weight_counter->num_active > 0) ++ goto reset_entity_pointer; ++ ++ rb_erase(&entity->weight_counter->weights_node, root); ++ kfree(entity->weight_counter); ++ ++reset_entity_pointer: ++ entity->weight_counter = NULL; ++} ++ ++static struct request *bfq_find_next_rq(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq, ++ struct request *last) ++{ ++ struct rb_node *rbnext = rb_next(&last->rb_node); ++ struct rb_node *rbprev = rb_prev(&last->rb_node); ++ struct request *next = NULL, *prev = NULL; ++ ++ BUG_ON(RB_EMPTY_NODE(&last->rb_node)); ++ ++ if (rbprev != NULL) ++ prev = rb_entry_rq(rbprev); ++ ++ if (rbnext != NULL) ++ next = rb_entry_rq(rbnext); ++ else { ++ rbnext = rb_first(&bfqq->sort_list); ++ if (rbnext && rbnext != &last->rb_node) ++ next = rb_entry_rq(rbnext); ++ } ++ ++ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); ++} ++ ++/* see the definition of bfq_async_charge_factor for details */ ++static inline unsigned long bfq_serv_to_charge(struct request *rq, ++ struct bfq_queue *bfqq) ++{ ++ return blk_rq_sectors(rq) * ++ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * ++ bfq_async_charge_factor)); ++} ++ ++/** ++ * bfq_updated_next_req - update the queue after a new next_rq selection. ++ * @bfqd: the device data the queue belongs to. ++ * @bfqq: the queue to update. ++ * ++ * If the first request of a queue changes we make sure that the queue ++ * has enough budget to serve at least its first request (if the ++ * request has grown). We do this because if the queue has not enough ++ * budget for its first request, it has to go through two dispatch ++ * rounds to actually get it dispatched. ++ */ ++static void bfq_updated_next_req(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ struct bfq_service_tree *st = bfq_entity_service_tree(entity); ++ struct request *next_rq = bfqq->next_rq; ++ unsigned long new_budget; ++ ++ if (next_rq == NULL) ++ return; ++ ++ if (bfqq == bfqd->in_service_queue) ++ /* ++ * In order not to break guarantees, budgets cannot be ++ * changed after an entity has been selected. ++ */ ++ return; ++ ++ BUG_ON(entity->tree != &st->active); ++ BUG_ON(entity == entity->sched_data->in_service_entity); ++ ++ new_budget = max_t(unsigned long, bfqq->max_budget, ++ bfq_serv_to_charge(next_rq, bfqq)); ++ if (entity->budget != new_budget) { ++ entity->budget = new_budget; ++ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", ++ new_budget); ++ bfq_activate_bfqq(bfqd, bfqq); ++ } ++} ++ ++static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) ++{ ++ u64 dur; ++ ++ if (bfqd->bfq_wr_max_time > 0) ++ return bfqd->bfq_wr_max_time; ++ ++ dur = bfqd->RT_prod; ++ do_div(dur, bfqd->peak_rate); ++ ++ return dur; ++} ++ ++static inline unsigned ++bfq_bfqq_cooperations(struct bfq_queue *bfqq) ++{ ++ return bfqq->bic ? bfqq->bic->cooperations : 0; ++} ++ ++static inline void ++bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ++{ ++ if (bic->saved_idle_window) ++ bfq_mark_bfqq_idle_window(bfqq); ++ else ++ bfq_clear_bfqq_idle_window(bfqq); ++ if (bic->saved_IO_bound) ++ bfq_mark_bfqq_IO_bound(bfqq); ++ else ++ bfq_clear_bfqq_IO_bound(bfqq); ++ /* Assuming that the flag in_large_burst is already correctly set */ ++ if (bic->wr_time_left && bfqq->bfqd->low_latency && ++ !bfq_bfqq_in_large_burst(bfqq) && ++ bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { ++ /* ++ * Start a weight raising period with the duration given by ++ * the raising_time_left snapshot. ++ */ ++ if (bfq_bfqq_busy(bfqq)) ++ bfqq->bfqd->wr_busy_queues++; ++ bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; ++ bfqq->wr_cur_max_time = bic->wr_time_left; ++ bfqq->last_wr_start_finish = jiffies; ++ bfqq->entity.ioprio_changed = 1; ++ } ++ /* ++ * Clear wr_time_left to prevent bfq_bfqq_save_state() from ++ * getting confused about the queue's need of a weight-raising ++ * period. ++ */ ++ bic->wr_time_left = 0; ++} ++ ++/* Must be called with the queue_lock held. */ ++static int bfqq_process_refs(struct bfq_queue *bfqq) ++{ ++ int process_refs, io_refs; ++ ++ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; ++ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; ++ BUG_ON(process_refs < 0); ++ return process_refs; ++} ++ ++/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ ++static inline void bfq_reset_burst_list(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ struct bfq_queue *item; ++ struct hlist_node *n; ++ ++ hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) ++ hlist_del_init(&item->burst_list_node); ++ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); ++ bfqd->burst_size = 1; ++} ++ ++/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */ ++static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ /* Increment burst size to take into account also bfqq */ ++ bfqd->burst_size++; ++ ++ if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { ++ struct bfq_queue *pos, *bfqq_item; ++ struct hlist_node *n; ++ ++ /* ++ * Enough queues have been activated shortly after each ++ * other to consider this burst as large. ++ */ ++ bfqd->large_burst = true; ++ ++ /* ++ * We can now mark all queues in the burst list as ++ * belonging to a large burst. ++ */ ++ hlist_for_each_entry(bfqq_item, &bfqd->burst_list, ++ burst_list_node) ++ bfq_mark_bfqq_in_large_burst(bfqq_item); ++ bfq_mark_bfqq_in_large_burst(bfqq); ++ ++ /* ++ * From now on, and until the current burst finishes, any ++ * new queue being activated shortly after the last queue ++ * was inserted in the burst can be immediately marked as ++ * belonging to a large burst. So the burst list is not ++ * needed any more. Remove it. ++ */ ++ hlist_for_each_entry_safe(pos, n, &bfqd->burst_list, ++ burst_list_node) ++ hlist_del_init(&pos->burst_list_node); ++ } else /* burst not yet large: add bfqq to the burst list */ ++ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); ++} ++ ++/* ++ * If many queues happen to become active shortly after each other, then, ++ * to help the processes associated to these queues get their job done as ++ * soon as possible, it is usually better to not grant either weight-raising ++ * or device idling to these queues. In this comment we describe, firstly, ++ * the reasons why this fact holds, and, secondly, the next function, which ++ * implements the main steps needed to properly mark these queues so that ++ * they can then be treated in a different way. ++ * ++ * As for the terminology, we say that a queue becomes active, i.e., ++ * switches from idle to backlogged, either when it is created (as a ++ * consequence of the arrival of an I/O request), or, if already existing, ++ * when a new request for the queue arrives while the queue is idle. ++ * Bursts of activations, i.e., activations of different queues occurring ++ * shortly after each other, are typically caused by services or applications ++ * that spawn or reactivate many parallel threads/processes. Examples are ++ * systemd during boot or git grep. ++ * ++ * These services or applications benefit mostly from a high throughput: ++ * the quicker the requests of the activated queues are cumulatively served, ++ * the sooner the target job of these queues gets completed. As a consequence, ++ * weight-raising any of these queues, which also implies idling the device ++ * for it, is almost always counterproductive: in most cases it just lowers ++ * throughput. ++ * ++ * On the other hand, a burst of activations may be also caused by the start ++ * of an application that does not consist in a lot of parallel I/O-bound ++ * threads. In fact, with a complex application, the burst may be just a ++ * consequence of the fact that several processes need to be executed to ++ * start-up the application. To start an application as quickly as possible, ++ * the best thing to do is to privilege the I/O related to the application ++ * with respect to all other I/O. Therefore, the best strategy to start as ++ * quickly as possible an application that causes a burst of activations is ++ * to weight-raise all the queues activated during the burst. This is the ++ * exact opposite of the best strategy for the other type of bursts. ++ * ++ * In the end, to take the best action for each of the two cases, the two ++ * types of bursts need to be distinguished. Fortunately, this seems ++ * relatively easy to do, by looking at the sizes of the bursts. In ++ * particular, we found a threshold such that bursts with a larger size ++ * than that threshold are apparently caused only by services or commands ++ * such as systemd or git grep. For brevity, hereafter we call just 'large' ++ * these bursts. BFQ *does not* weight-raise queues whose activations occur ++ * in a large burst. In addition, for each of these queues BFQ performs or ++ * does not perform idling depending on which choice boosts the throughput ++ * most. The exact choice depends on the device and request pattern at ++ * hand. ++ * ++ * Turning back to the next function, it implements all the steps needed ++ * to detect the occurrence of a large burst and to properly mark all the ++ * queues belonging to it (so that they can then be treated in a different ++ * way). This goal is achieved by maintaining a special "burst list" that ++ * holds, temporarily, the queues that belong to the burst in progress. The ++ * list is then used to mark these queues as belonging to a large burst if ++ * the burst does become large. The main steps are the following. ++ * ++ * . when the very first queue is activated, the queue is inserted into the ++ * list (as it could be the first queue in a possible burst) ++ * ++ * . if the current burst has not yet become large, and a queue Q that does ++ * not yet belong to the burst is activated shortly after the last time ++ * at which a new queue entered the burst list, then the function appends ++ * Q to the burst list ++ * ++ * . if, as a consequence of the previous step, the burst size reaches ++ * the large-burst threshold, then ++ * ++ * . all the queues in the burst list are marked as belonging to a ++ * large burst ++ * ++ * . the burst list is deleted; in fact, the burst list already served ++ * its purpose (keeping temporarily track of the queues in a burst, ++ * so as to be able to mark them as belonging to a large burst in the ++ * previous sub-step), and now is not needed any more ++ * ++ * . the device enters a large-burst mode ++ * ++ * . if a queue Q that does not belong to the burst is activated while ++ * the device is in large-burst mode and shortly after the last time ++ * at which a queue either entered the burst list or was marked as ++ * belonging to the current large burst, then Q is immediately marked ++ * as belonging to a large burst. ++ * ++ * . if a queue Q that does not belong to the burst is activated a while ++ * later, i.e., not shortly after, than the last time at which a queue ++ * either entered the burst list or was marked as belonging to the ++ * current large burst, then the current burst is deemed as finished and: ++ * ++ * . the large-burst mode is reset if set ++ * ++ * . the burst list is emptied ++ * ++ * . Q is inserted in the burst list, as Q may be the first queue ++ * in a possible new burst (then the burst list contains just Q ++ * after this step). ++ */ ++static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ bool idle_for_long_time) ++{ ++ /* ++ * If bfqq happened to be activated in a burst, but has been idle ++ * for at least as long as an interactive queue, then we assume ++ * that, in the overall I/O initiated in the burst, the I/O ++ * associated to bfqq is finished. So bfqq does not need to be ++ * treated as a queue belonging to a burst anymore. Accordingly, ++ * we reset bfqq's in_large_burst flag if set, and remove bfqq ++ * from the burst list if it's there. We do not decrement instead ++ * burst_size, because the fact that bfqq does not need to belong ++ * to the burst list any more does not invalidate the fact that ++ * bfqq may have been activated during the current burst. ++ */ ++ if (idle_for_long_time) { ++ hlist_del_init(&bfqq->burst_list_node); ++ bfq_clear_bfqq_in_large_burst(bfqq); ++ } ++ ++ /* ++ * If bfqq is already in the burst list or is part of a large ++ * burst, then there is nothing else to do. ++ */ ++ if (!hlist_unhashed(&bfqq->burst_list_node) || ++ bfq_bfqq_in_large_burst(bfqq)) ++ return; ++ ++ /* ++ * If bfqq's activation happens late enough, then the current ++ * burst is finished, and related data structures must be reset. ++ * ++ * In this respect, consider the special case where bfqq is the very ++ * first queue being activated. In this case, last_ins_in_burst is ++ * not yet significant when we get here. But it is easy to verify ++ * that, whether or not the following condition is true, bfqq will ++ * end up being inserted into the burst list. In particular the ++ * list will happen to contain only bfqq. And this is exactly what ++ * has to happen, as bfqq may be the first queue in a possible ++ * burst. ++ */ ++ if (time_is_before_jiffies(bfqd->last_ins_in_burst + ++ bfqd->bfq_burst_interval)) { ++ bfqd->large_burst = false; ++ bfq_reset_burst_list(bfqd, bfqq); ++ return; ++ } ++ ++ /* ++ * If we get here, then bfqq is being activated shortly after the ++ * last queue. So, if the current burst is also large, we can mark ++ * bfqq as belonging to this large burst immediately. ++ */ ++ if (bfqd->large_burst) { ++ bfq_mark_bfqq_in_large_burst(bfqq); ++ return; ++ } ++ ++ /* ++ * If we get here, then a large-burst state has not yet been ++ * reached, but bfqq is being activated shortly after the last ++ * queue. Then we add bfqq to the burst. ++ */ ++ bfq_add_to_burst(bfqd, bfqq); ++} ++ ++static void bfq_add_request(struct request *rq) ++{ ++ struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ struct bfq_entity *entity = &bfqq->entity; ++ struct bfq_data *bfqd = bfqq->bfqd; ++ struct request *next_rq, *prev; ++ unsigned long old_wr_coeff = bfqq->wr_coeff; ++ bool interactive = false; ++ ++ bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); ++ bfqq->queued[rq_is_sync(rq)]++; ++ bfqd->queued++; ++ ++ elv_rb_add(&bfqq->sort_list, rq); ++ ++ /* ++ * Check if this request is a better next-serve candidate. ++ */ ++ prev = bfqq->next_rq; ++ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); ++ BUG_ON(next_rq == NULL); ++ bfqq->next_rq = next_rq; ++ ++ /* ++ * Adjust priority tree position, if next_rq changes. ++ */ ++ if (prev != bfqq->next_rq) ++ bfq_rq_pos_tree_add(bfqd, bfqq); ++ ++ if (!bfq_bfqq_busy(bfqq)) { ++ bool soft_rt, coop_or_in_burst, ++ idle_for_long_time = time_is_before_jiffies( ++ bfqq->budget_timeout + ++ bfqd->bfq_wr_min_idle_time); ++ ++ if (bfq_bfqq_sync(bfqq)) { ++ bool already_in_burst = ++ !hlist_unhashed(&bfqq->burst_list_node) || ++ bfq_bfqq_in_large_burst(bfqq); ++ bfq_handle_burst(bfqd, bfqq, idle_for_long_time); ++ /* ++ * If bfqq was not already in the current burst, ++ * then, at this point, bfqq either has been ++ * added to the current burst or has caused the ++ * current burst to terminate. In particular, in ++ * the second case, bfqq has become the first ++ * queue in a possible new burst. ++ * In both cases last_ins_in_burst needs to be ++ * moved forward. ++ */ ++ if (!already_in_burst) ++ bfqd->last_ins_in_burst = jiffies; ++ } ++ ++ coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) || ++ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh; ++ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && ++ !coop_or_in_burst && ++ time_is_before_jiffies(bfqq->soft_rt_next_start); ++ interactive = !coop_or_in_burst && idle_for_long_time; ++ entity->budget = max_t(unsigned long, bfqq->max_budget, ++ bfq_serv_to_charge(next_rq, bfqq)); ++ ++ if (!bfq_bfqq_IO_bound(bfqq)) { ++ if (time_before(jiffies, ++ RQ_BIC(rq)->ttime.last_end_request + ++ bfqd->bfq_slice_idle)) { ++ bfqq->requests_within_timer++; ++ if (bfqq->requests_within_timer >= ++ bfqd->bfq_requests_within_timer) ++ bfq_mark_bfqq_IO_bound(bfqq); ++ } else ++ bfqq->requests_within_timer = 0; ++ } ++ ++ if (!bfqd->low_latency) ++ goto add_bfqq_busy; ++ ++ if (bfq_bfqq_just_split(bfqq)) ++ goto set_ioprio_changed; ++ ++ /* ++ * If the queue: ++ * - is not being boosted, ++ * - has been idle for enough time, ++ * - is not a sync queue or is linked to a bfq_io_cq (it is ++ * shared "for its nature" or it is not shared and its ++ * requests have not been redirected to a shared queue) ++ * start a weight-raising period. ++ */ ++ if (old_wr_coeff == 1 && (interactive || soft_rt) && ++ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { ++ bfqq->wr_coeff = bfqd->bfq_wr_coeff; ++ if (interactive) ++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); ++ else ++ bfqq->wr_cur_max_time = ++ bfqd->bfq_wr_rt_max_time; ++ bfq_log_bfqq(bfqd, bfqq, ++ "wrais starting at %lu, rais_max_time %u", ++ jiffies, ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ } else if (old_wr_coeff > 1) { ++ if (interactive) ++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); ++ else if (coop_or_in_burst || ++ (bfqq->wr_cur_max_time == ++ bfqd->bfq_wr_rt_max_time && ++ !soft_rt)) { ++ bfqq->wr_coeff = 1; ++ bfq_log_bfqq(bfqd, bfqq, ++ "wrais ending at %lu, rais_max_time %u", ++ jiffies, ++ jiffies_to_msecs(bfqq-> ++ wr_cur_max_time)); ++ } else if (time_before( ++ bfqq->last_wr_start_finish + ++ bfqq->wr_cur_max_time, ++ jiffies + ++ bfqd->bfq_wr_rt_max_time) && ++ soft_rt) { ++ /* ++ * ++ * The remaining weight-raising time is lower ++ * than bfqd->bfq_wr_rt_max_time, which means ++ * that the application is enjoying weight ++ * raising either because deemed soft-rt in ++ * the near past, or because deemed interactive ++ * a long ago. ++ * In both cases, resetting now the current ++ * remaining weight-raising time for the ++ * application to the weight-raising duration ++ * for soft rt applications would not cause any ++ * latency increase for the application (as the ++ * new duration would be higher than the ++ * remaining time). ++ * ++ * In addition, the application is now meeting ++ * the requirements for being deemed soft rt. ++ * In the end we can correctly and safely ++ * (re)charge the weight-raising duration for ++ * the application with the weight-raising ++ * duration for soft rt applications. ++ * ++ * In particular, doing this recharge now, i.e., ++ * before the weight-raising period for the ++ * application finishes, reduces the probability ++ * of the following negative scenario: ++ * 1) the weight of a soft rt application is ++ * raised at startup (as for any newly ++ * created application), ++ * 2) since the application is not interactive, ++ * at a certain time weight-raising is ++ * stopped for the application, ++ * 3) at that time the application happens to ++ * still have pending requests, and hence ++ * is destined to not have a chance to be ++ * deemed soft rt before these requests are ++ * completed (see the comments to the ++ * function bfq_bfqq_softrt_next_start() ++ * for details on soft rt detection), ++ * 4) these pending requests experience a high ++ * latency because the application is not ++ * weight-raised while they are pending. ++ */ ++ bfqq->last_wr_start_finish = jiffies; ++ bfqq->wr_cur_max_time = ++ bfqd->bfq_wr_rt_max_time; ++ } ++ } ++set_ioprio_changed: ++ if (old_wr_coeff != bfqq->wr_coeff) ++ entity->ioprio_changed = 1; ++add_bfqq_busy: ++ bfqq->last_idle_bklogged = jiffies; ++ bfqq->service_from_backlogged = 0; ++ bfq_clear_bfqq_softrt_update(bfqq); ++ bfq_add_bfqq_busy(bfqd, bfqq); ++ } else { ++ if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && ++ time_is_before_jiffies( ++ bfqq->last_wr_start_finish + ++ bfqd->bfq_wr_min_inter_arr_async)) { ++ bfqq->wr_coeff = bfqd->bfq_wr_coeff; ++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); ++ ++ bfqd->wr_busy_queues++; ++ entity->ioprio_changed = 1; ++ bfq_log_bfqq(bfqd, bfqq, ++ "non-idle wrais starting at %lu, rais_max_time %u", ++ jiffies, ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ } ++ if (prev != bfqq->next_rq) ++ bfq_updated_next_req(bfqd, bfqq); ++ } ++ ++ if (bfqd->low_latency && ++ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive)) ++ bfqq->last_wr_start_finish = jiffies; ++} ++ ++static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, ++ struct bio *bio) ++{ ++ struct task_struct *tsk = current; ++ struct bfq_io_cq *bic; ++ struct bfq_queue *bfqq; ++ ++ bic = bfq_bic_lookup(bfqd, tsk->io_context); ++ if (bic == NULL) ++ return NULL; ++ ++ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); ++ if (bfqq != NULL) ++ return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio)); ++ ++ return NULL; ++} ++ ++static void bfq_activate_request(struct request_queue *q, struct request *rq) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ ++ bfqd->rq_in_driver++; ++ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); ++ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", ++ (long long unsigned)bfqd->last_position); ++} ++ ++static inline void bfq_deactivate_request(struct request_queue *q, ++ struct request *rq) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ ++ BUG_ON(bfqd->rq_in_driver == 0); ++ bfqd->rq_in_driver--; ++} ++ ++static void bfq_remove_request(struct request *rq) ++{ ++ struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ struct bfq_data *bfqd = bfqq->bfqd; ++ const int sync = rq_is_sync(rq); ++ ++ if (bfqq->next_rq == rq) { ++ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); ++ bfq_updated_next_req(bfqd, bfqq); ++ } ++ ++ list_del_init(&rq->queuelist); ++ BUG_ON(bfqq->queued[sync] == 0); ++ bfqq->queued[sync]--; ++ bfqd->queued--; ++ elv_rb_del(&bfqq->sort_list, rq); ++ ++ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { ++ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) ++ bfq_del_bfqq_busy(bfqd, bfqq, 1); ++ /* ++ * Remove queue from request-position tree as it is empty. ++ */ ++ if (bfqq->pos_root != NULL) { ++ rb_erase(&bfqq->pos_node, bfqq->pos_root); ++ bfqq->pos_root = NULL; ++ } ++ } ++ ++ if (rq->cmd_flags & REQ_META) { ++ BUG_ON(bfqq->meta_pending == 0); ++ bfqq->meta_pending--; ++ } ++} ++ ++static int bfq_merge(struct request_queue *q, struct request **req, ++ struct bio *bio) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct request *__rq; ++ ++ __rq = bfq_find_rq_fmerge(bfqd, bio); ++ if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { ++ *req = __rq; ++ return ELEVATOR_FRONT_MERGE; ++ } ++ ++ return ELEVATOR_NO_MERGE; ++} ++ ++static void bfq_merged_request(struct request_queue *q, struct request *req, ++ int type) ++{ ++ if (type == ELEVATOR_FRONT_MERGE && ++ rb_prev(&req->rb_node) && ++ blk_rq_pos(req) < ++ blk_rq_pos(container_of(rb_prev(&req->rb_node), ++ struct request, rb_node))) { ++ struct bfq_queue *bfqq = RQ_BFQQ(req); ++ struct bfq_data *bfqd = bfqq->bfqd; ++ struct request *prev, *next_rq; ++ ++ /* Reposition request in its sort_list */ ++ elv_rb_del(&bfqq->sort_list, req); ++ elv_rb_add(&bfqq->sort_list, req); ++ /* Choose next request to be served for bfqq */ ++ prev = bfqq->next_rq; ++ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, ++ bfqd->last_position); ++ BUG_ON(next_rq == NULL); ++ bfqq->next_rq = next_rq; ++ /* ++ * If next_rq changes, update both the queue's budget to ++ * fit the new request and the queue's position in its ++ * rq_pos_tree. ++ */ ++ if (prev != bfqq->next_rq) { ++ bfq_updated_next_req(bfqd, bfqq); ++ bfq_rq_pos_tree_add(bfqd, bfqq); ++ } ++ } ++} ++ ++static void bfq_merged_requests(struct request_queue *q, struct request *rq, ++ struct request *next) ++{ ++ struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ ++ /* ++ * Reposition in fifo if next is older than rq. ++ */ ++ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && ++ time_before(next->fifo_time, rq->fifo_time)) { ++ list_move(&rq->queuelist, &next->queuelist); ++ rq->fifo_time = next->fifo_time; ++ } ++ ++ if (bfqq->next_rq == next) ++ bfqq->next_rq = rq; ++ ++ bfq_remove_request(next); ++} ++ ++/* Must be called with bfqq != NULL */ ++static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq) ++{ ++ BUG_ON(bfqq == NULL); ++ if (bfq_bfqq_busy(bfqq)) ++ bfqq->bfqd->wr_busy_queues--; ++ bfqq->wr_coeff = 1; ++ bfqq->wr_cur_max_time = 0; ++ /* Trigger a weight change on the next activation of the queue */ ++ bfqq->entity.ioprio_changed = 1; ++} ++ ++static void bfq_end_wr_async_queues(struct bfq_data *bfqd, ++ struct bfq_group *bfqg) ++{ ++ int i, j; ++ ++ for (i = 0; i < 2; i++) ++ for (j = 0; j < IOPRIO_BE_NR; j++) ++ if (bfqg->async_bfqq[i][j] != NULL) ++ bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); ++ if (bfqg->async_idle_bfqq != NULL) ++ bfq_bfqq_end_wr(bfqg->async_idle_bfqq); ++} ++ ++static void bfq_end_wr(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq; ++ ++ spin_lock_irq(bfqd->queue->queue_lock); ++ ++ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) ++ bfq_bfqq_end_wr(bfqq); ++ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) ++ bfq_bfqq_end_wr(bfqq); ++ bfq_end_wr_async(bfqd); ++ ++ spin_unlock_irq(bfqd->queue->queue_lock); ++} ++ ++static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) ++{ ++ if (request) ++ return blk_rq_pos(io_struct); ++ else ++ return ((struct bio *)io_struct)->bi_iter.bi_sector; ++} ++ ++static inline sector_t bfq_dist_from(sector_t pos1, ++ sector_t pos2) ++{ ++ if (pos1 >= pos2) ++ return pos1 - pos2; ++ else ++ return pos2 - pos1; ++} ++ ++static inline int bfq_rq_close_to_sector(void *io_struct, bool request, ++ sector_t sector) ++{ ++ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= ++ BFQQ_SEEK_THR; ++} ++ ++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) ++{ ++ struct rb_root *root = &bfqd->rq_pos_tree; ++ struct rb_node *parent, *node; ++ struct bfq_queue *__bfqq; ++ ++ if (RB_EMPTY_ROOT(root)) ++ return NULL; ++ ++ /* ++ * First, if we find a request starting at the end of the last ++ * request, choose it. ++ */ ++ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); ++ if (__bfqq != NULL) ++ return __bfqq; ++ ++ /* ++ * If the exact sector wasn't found, the parent of the NULL leaf ++ * will contain the closest sector (rq_pos_tree sorted by ++ * next_request position). ++ */ ++ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); ++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) ++ return __bfqq; ++ ++ if (blk_rq_pos(__bfqq->next_rq) < sector) ++ node = rb_next(&__bfqq->pos_node); ++ else ++ node = rb_prev(&__bfqq->pos_node); ++ if (node == NULL) ++ return NULL; ++ ++ __bfqq = rb_entry(node, struct bfq_queue, pos_node); ++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) ++ return __bfqq; ++ ++ return NULL; ++} ++ ++/* ++ * bfqd - obvious ++ * cur_bfqq - passed in so that we don't decide that the current queue ++ * is closely cooperating with itself ++ * sector - used as a reference point to search for a close queue ++ */ ++static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, ++ struct bfq_queue *cur_bfqq, ++ sector_t sector) ++{ ++ struct bfq_queue *bfqq; ++ ++ if (bfq_class_idle(cur_bfqq)) ++ return NULL; ++ if (!bfq_bfqq_sync(cur_bfqq)) ++ return NULL; ++ if (BFQQ_SEEKY(cur_bfqq)) ++ return NULL; ++ ++ /* If device has only one backlogged bfq_queue, don't search. */ ++ if (bfqd->busy_queues == 1) ++ return NULL; ++ ++ /* ++ * We should notice if some of the queues are cooperating, e.g. ++ * working closely on the same area of the disk. In that case, ++ * we can group them together and don't waste time idling. ++ */ ++ bfqq = bfqq_close(bfqd, sector); ++ if (bfqq == NULL || bfqq == cur_bfqq) ++ return NULL; ++ ++ /* ++ * Do not merge queues from different bfq_groups. ++ */ ++ if (bfqq->entity.parent != cur_bfqq->entity.parent) ++ return NULL; ++ ++ /* ++ * It only makes sense to merge sync queues. ++ */ ++ if (!bfq_bfqq_sync(bfqq)) ++ return NULL; ++ if (BFQQ_SEEKY(bfqq)) ++ return NULL; ++ ++ /* ++ * Do not merge queues of different priority classes. ++ */ ++ if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) ++ return NULL; ++ ++ return bfqq; ++} ++ ++static struct bfq_queue * ++bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) ++{ ++ int process_refs, new_process_refs; ++ struct bfq_queue *__bfqq; ++ ++ /* ++ * If there are no process references on the new_bfqq, then it is ++ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain ++ * may have dropped their last reference (not just their last process ++ * reference). ++ */ ++ if (!bfqq_process_refs(new_bfqq)) ++ return NULL; ++ ++ /* Avoid a circular list and skip interim queue merges. */ ++ while ((__bfqq = new_bfqq->new_bfqq)) { ++ if (__bfqq == bfqq) ++ return NULL; ++ new_bfqq = __bfqq; ++ } ++ ++ process_refs = bfqq_process_refs(bfqq); ++ new_process_refs = bfqq_process_refs(new_bfqq); ++ /* ++ * If the process for the bfqq has gone away, there is no ++ * sense in merging the queues. ++ */ ++ if (process_refs == 0 || new_process_refs == 0) ++ return NULL; ++ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", ++ new_bfqq->pid); ++ ++ /* ++ * Merging is just a redirection: the requests of the process ++ * owning one of the two queues are redirected to the other queue. ++ * The latter queue, in its turn, is set as shared if this is the ++ * first time that the requests of some process are redirected to ++ * it. ++ * ++ * We redirect bfqq to new_bfqq and not the opposite, because we ++ * are in the context of the process owning bfqq, hence we have ++ * the io_cq of this process. So we can immediately configure this ++ * io_cq to redirect the requests of the process to new_bfqq. ++ * ++ * NOTE, even if new_bfqq coincides with the in-service queue, the ++ * io_cq of new_bfqq is not available, because, if the in-service ++ * queue is shared, bfqd->in_service_bic may not point to the ++ * io_cq of the in-service queue. ++ * Redirecting the requests of the process owning bfqq to the ++ * currently in-service queue is in any case the best option, as ++ * we feed the in-service queue with new requests close to the ++ * last request served and, by doing so, hopefully increase the ++ * throughput. ++ */ ++ bfqq->new_bfqq = new_bfqq; ++ atomic_add(process_refs, &new_bfqq->ref); ++ return new_bfqq; ++} ++ ++/* ++ * Attempt to schedule a merge of bfqq with the currently in-service queue ++ * or with a close queue among the scheduled queues. ++ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue ++ * structure otherwise. ++ * ++ * The OOM queue is not allowed to participate to cooperation: in fact, since ++ * the requests temporarily redirected to the OOM queue could be redirected ++ * again to dedicated queues at any time, the state needed to correctly ++ * handle merging with the OOM queue would be quite complex and expensive ++ * to maintain. Besides, in such a critical condition as an out of memory, ++ * the benefits of queue merging may be little relevant, or even negligible. ++ */ ++static struct bfq_queue * ++bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ void *io_struct, bool request) ++{ ++ struct bfq_queue *in_service_bfqq, *new_bfqq; ++ ++ if (bfqq->new_bfqq) ++ return bfqq->new_bfqq; ++ ++ if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) ++ return NULL; ++ ++ in_service_bfqq = bfqd->in_service_queue; ++ ++ if (in_service_bfqq == NULL || in_service_bfqq == bfqq || ++ !bfqd->in_service_bic || ++ unlikely(in_service_bfqq == &bfqd->oom_bfqq)) ++ goto check_scheduled; ++ ++ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) ++ goto check_scheduled; ++ ++ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) ++ goto check_scheduled; ++ ++ if (in_service_bfqq->entity.parent != bfqq->entity.parent) ++ goto check_scheduled; ++ ++ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && ++ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) { ++ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); ++ if (new_bfqq != NULL) ++ return new_bfqq; /* Merge with in-service queue */ ++ } ++ ++ /* ++ * Check whether there is a cooperator among currently scheduled ++ * queues. The only thing we need is that the bio/request is not ++ * NULL, as we need it to establish whether a cooperator exists. ++ */ ++check_scheduled: ++ new_bfqq = bfq_close_cooperator(bfqd, bfqq, ++ bfq_io_struct_pos(io_struct, request)); ++ if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq)) ++ return bfq_setup_merge(bfqq, new_bfqq); ++ ++ return NULL; ++} ++ ++static inline void ++bfq_bfqq_save_state(struct bfq_queue *bfqq) ++{ ++ /* ++ * If bfqq->bic == NULL, the queue is already shared or its requests ++ * have already been redirected to a shared queue; both idle window ++ * and weight raising state have already been saved. Do nothing. ++ */ ++ if (bfqq->bic == NULL) ++ return; ++ if (bfqq->bic->wr_time_left) ++ /* ++ * This is the queue of a just-started process, and would ++ * deserve weight raising: we set wr_time_left to the full ++ * weight-raising duration to trigger weight-raising when ++ * and if the queue is split and the first request of the ++ * queue is enqueued. ++ */ ++ bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd); ++ else if (bfqq->wr_coeff > 1) { ++ unsigned long wr_duration = ++ jiffies - bfqq->last_wr_start_finish; ++ /* ++ * It may happen that a queue's weight raising period lasts ++ * longer than its wr_cur_max_time, as weight raising is ++ * handled only when a request is enqueued or dispatched (it ++ * does not use any timer). If the weight raising period is ++ * about to end, don't save it. ++ */ ++ if (bfqq->wr_cur_max_time <= wr_duration) ++ bfqq->bic->wr_time_left = 0; ++ else ++ bfqq->bic->wr_time_left = ++ bfqq->wr_cur_max_time - wr_duration; ++ /* ++ * The bfq_queue is becoming shared or the requests of the ++ * process owning the queue are being redirected to a shared ++ * queue. Stop the weight raising period of the queue, as in ++ * both cases it should not be owned by an interactive or ++ * soft real-time application. ++ */ ++ bfq_bfqq_end_wr(bfqq); ++ } else ++ bfqq->bic->wr_time_left = 0; ++ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); ++ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); ++ bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); ++ bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); ++ bfqq->bic->cooperations++; ++ bfqq->bic->failed_cooperations = 0; ++} ++ ++static inline void ++bfq_get_bic_reference(struct bfq_queue *bfqq) ++{ ++ /* ++ * If bfqq->bic has a non-NULL value, the bic to which it belongs ++ * is about to begin using a shared bfq_queue. ++ */ ++ if (bfqq->bic) ++ atomic_long_inc(&bfqq->bic->icq.ioc->refcount); ++} ++ ++static void ++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, ++ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) ++{ ++ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", ++ (long unsigned)new_bfqq->pid); ++ /* Save weight raising and idle window of the merged queues */ ++ bfq_bfqq_save_state(bfqq); ++ bfq_bfqq_save_state(new_bfqq); ++ if (bfq_bfqq_IO_bound(bfqq)) ++ bfq_mark_bfqq_IO_bound(new_bfqq); ++ bfq_clear_bfqq_IO_bound(bfqq); ++ /* ++ * Grab a reference to the bic, to prevent it from being destroyed ++ * before being possibly touched by a bfq_split_bfqq(). ++ */ ++ bfq_get_bic_reference(bfqq); ++ bfq_get_bic_reference(new_bfqq); ++ /* ++ * Merge queues (that is, let bic redirect its requests to new_bfqq) ++ */ ++ bic_set_bfqq(bic, new_bfqq, 1); ++ bfq_mark_bfqq_coop(new_bfqq); ++ /* ++ * new_bfqq now belongs to at least two bics (it is a shared queue): ++ * set new_bfqq->bic to NULL. bfqq either: ++ * - does not belong to any bic any more, and hence bfqq->bic must ++ * be set to NULL, or ++ * - is a queue whose owning bics have already been redirected to a ++ * different queue, hence the queue is destined to not belong to ++ * any bic soon and bfqq->bic is already NULL (therefore the next ++ * assignment causes no harm). ++ */ ++ new_bfqq->bic = NULL; ++ bfqq->bic = NULL; ++ bfq_put_queue(bfqq); ++} ++ ++static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) ++{ ++ struct bfq_io_cq *bic = bfqq->bic; ++ struct bfq_data *bfqd = bfqq->bfqd; ++ ++ if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { ++ bic->failed_cooperations++; ++ if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations) ++ bic->cooperations = 0; ++ } ++} ++ ++static int bfq_allow_merge(struct request_queue *q, struct request *rq, ++ struct bio *bio) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct bfq_io_cq *bic; ++ struct bfq_queue *bfqq, *new_bfqq; ++ ++ /* ++ * Disallow merge of a sync bio into an async request. ++ */ ++ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) ++ return 0; ++ ++ /* ++ * Lookup the bfqq that this bio will be queued with. Allow ++ * merge only if rq is queued there. ++ * Queue lock is held here. ++ */ ++ bic = bfq_bic_lookup(bfqd, current->io_context); ++ if (bic == NULL) ++ return 0; ++ ++ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); ++ /* ++ * We take advantage of this function to perform an early merge ++ * of the queues of possible cooperating processes. ++ */ ++ if (bfqq != NULL) { ++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); ++ if (new_bfqq != NULL) { ++ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); ++ /* ++ * If we get here, the bio will be queued in the ++ * shared queue, i.e., new_bfqq, so use new_bfqq ++ * to decide whether bio and rq can be merged. ++ */ ++ bfqq = new_bfqq; ++ } else ++ bfq_bfqq_increase_failed_cooperations(bfqq); ++ } ++ ++ return bfqq == RQ_BFQQ(rq); ++} ++ ++static void __bfq_set_in_service_queue(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ if (bfqq != NULL) { ++ bfq_mark_bfqq_must_alloc(bfqq); ++ bfq_mark_bfqq_budget_new(bfqq); ++ bfq_clear_bfqq_fifo_expire(bfqq); ++ ++ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; ++ ++ bfq_log_bfqq(bfqd, bfqq, ++ "set_in_service_queue, cur-budget = %lu", ++ bfqq->entity.budget); ++ } ++ ++ bfqd->in_service_queue = bfqq; ++} ++ ++/* ++ * Get and set a new queue for service. ++ */ ++static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); ++ ++ __bfq_set_in_service_queue(bfqd, bfqq); ++ return bfqq; ++} ++ ++/* ++ * If enough samples have been computed, return the current max budget ++ * stored in bfqd, which is dynamically updated according to the ++ * estimated disk peak rate; otherwise return the default max budget ++ */ ++static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) ++{ ++ if (bfqd->budgets_assigned < 194) ++ return bfq_default_max_budget; ++ else ++ return bfqd->bfq_max_budget; ++} ++ ++/* ++ * Return min budget, which is a fraction of the current or default ++ * max budget (trying with 1/32) ++ */ ++static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) ++{ ++ if (bfqd->budgets_assigned < 194) ++ return bfq_default_max_budget / 32; ++ else ++ return bfqd->bfq_max_budget / 32; ++} ++ ++static void bfq_arm_slice_timer(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq = bfqd->in_service_queue; ++ struct bfq_io_cq *bic; ++ unsigned long sl; ++ ++ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); ++ ++ /* Processes have exited, don't wait. */ ++ bic = bfqd->in_service_bic; ++ if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0) ++ return; ++ ++ bfq_mark_bfqq_wait_request(bfqq); ++ ++ /* ++ * We don't want to idle for seeks, but we do want to allow ++ * fair distribution of slice time for a process doing back-to-back ++ * seeks. So allow a little bit of time for him to submit a new rq. ++ * ++ * To prevent processes with (partly) seeky workloads from ++ * being too ill-treated, grant them a small fraction of the ++ * assigned budget before reducing the waiting time to ++ * BFQ_MIN_TT. This happened to help reduce latency. ++ */ ++ sl = bfqd->bfq_slice_idle; ++ /* ++ * Unless the queue is being weight-raised, grant only minimum idle ++ * time if the queue either has been seeky for long enough or has ++ * already proved to be constantly seeky. ++ */ ++ if (bfq_sample_valid(bfqq->seek_samples) && ++ ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > ++ bfq_max_budget(bfqq->bfqd) / 8) || ++ bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1) ++ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); ++ else if (bfqq->wr_coeff > 1) ++ sl = sl * 3; ++ bfqd->last_idling_start = ktime_get(); ++ mod_timer(&bfqd->idle_slice_timer, jiffies + sl); ++ bfq_log(bfqd, "arm idle: %u/%u ms", ++ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); ++} ++ ++/* ++ * Set the maximum time for the in-service queue to consume its ++ * budget. This prevents seeky processes from lowering the disk ++ * throughput (always guaranteed with a time slice scheme as in CFQ). ++ */ ++static void bfq_set_budget_timeout(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq = bfqd->in_service_queue; ++ unsigned int timeout_coeff; ++ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) ++ timeout_coeff = 1; ++ else ++ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; ++ ++ bfqd->last_budget_start = ktime_get(); ++ ++ bfq_clear_bfqq_budget_new(bfqq); ++ bfqq->budget_timeout = jiffies + ++ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; ++ ++ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", ++ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * ++ timeout_coeff)); ++} ++ ++/* ++ * Move request from internal lists to the request queue dispatch list. ++ */ ++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ ++ /* ++ * For consistency, the next instruction should have been executed ++ * after removing the request from the queue and dispatching it. ++ * We execute instead this instruction before bfq_remove_request() ++ * (and hence introduce a temporary inconsistency), for efficiency. ++ * In fact, in a forced_dispatch, this prevents two counters related ++ * to bfqq->dispatched to risk to be uselessly decremented if bfqq ++ * is not in service, and then to be incremented again after ++ * incrementing bfqq->dispatched. ++ */ ++ bfqq->dispatched++; ++ bfq_remove_request(rq); ++ elv_dispatch_sort(q, rq); ++ ++ if (bfq_bfqq_sync(bfqq)) ++ bfqd->sync_flight++; ++} ++ ++/* ++ * Return expired entry, or NULL to just start from scratch in rbtree. ++ */ ++static struct request *bfq_check_fifo(struct bfq_queue *bfqq) ++{ ++ struct request *rq = NULL; ++ ++ if (bfq_bfqq_fifo_expire(bfqq)) ++ return NULL; ++ ++ bfq_mark_bfqq_fifo_expire(bfqq); ++ ++ if (list_empty(&bfqq->fifo)) ++ return NULL; ++ ++ rq = rq_entry_fifo(bfqq->fifo.next); ++ ++ if (time_before(jiffies, rq->fifo_time)) ++ return NULL; ++ ++ return rq; ++} ++ ++static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ return entity->budget - entity->service; ++} ++ ++static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ BUG_ON(bfqq != bfqd->in_service_queue); ++ ++ __bfq_bfqd_reset_in_service(bfqd); ++ ++ /* ++ * If this bfqq is shared between multiple processes, check ++ * to make sure that those processes are still issuing I/Os ++ * within the mean seek distance. If not, it may be time to ++ * break the queues apart again. ++ */ ++ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) ++ bfq_mark_bfqq_split_coop(bfqq); ++ ++ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { ++ /* ++ * Overloading budget_timeout field to store the time ++ * at which the queue remains with no backlog; used by ++ * the weight-raising mechanism. ++ */ ++ bfqq->budget_timeout = jiffies; ++ bfq_del_bfqq_busy(bfqd, bfqq, 1); ++ } else { ++ bfq_activate_bfqq(bfqd, bfqq); ++ /* ++ * Resort priority tree of potential close cooperators. ++ */ ++ bfq_rq_pos_tree_add(bfqd, bfqq); ++ } ++} ++ ++/** ++ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. ++ * @bfqd: device data. ++ * @bfqq: queue to update. ++ * @reason: reason for expiration. ++ * ++ * Handle the feedback on @bfqq budget. See the body for detailed ++ * comments. ++ */ ++static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq, ++ enum bfqq_expiration reason) ++{ ++ struct request *next_rq; ++ unsigned long budget, min_budget; ++ ++ budget = bfqq->max_budget; ++ min_budget = bfq_min_budget(bfqd); ++ ++ BUG_ON(bfqq != bfqd->in_service_queue); ++ ++ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", ++ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); ++ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", ++ budget, bfq_min_budget(bfqd)); ++ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", ++ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); ++ ++ if (bfq_bfqq_sync(bfqq)) { ++ switch (reason) { ++ /* ++ * Caveat: in all the following cases we trade latency ++ * for throughput. ++ */ ++ case BFQ_BFQQ_TOO_IDLE: ++ /* ++ * This is the only case where we may reduce ++ * the budget: if there is no request of the ++ * process still waiting for completion, then ++ * we assume (tentatively) that the timer has ++ * expired because the batch of requests of ++ * the process could have been served with a ++ * smaller budget. Hence, betting that ++ * process will behave in the same way when it ++ * becomes backlogged again, we reduce its ++ * next budget. As long as we guess right, ++ * this budget cut reduces the latency ++ * experienced by the process. ++ * ++ * However, if there are still outstanding ++ * requests, then the process may have not yet ++ * issued its next request just because it is ++ * still waiting for the completion of some of ++ * the still outstanding ones. So in this ++ * subcase we do not reduce its budget, on the ++ * contrary we increase it to possibly boost ++ * the throughput, as discussed in the ++ * comments to the BUDGET_TIMEOUT case. ++ */ ++ if (bfqq->dispatched > 0) /* still outstanding reqs */ ++ budget = min(budget * 2, bfqd->bfq_max_budget); ++ else { ++ if (budget > 5 * min_budget) ++ budget -= 4 * min_budget; ++ else ++ budget = min_budget; ++ } ++ break; ++ case BFQ_BFQQ_BUDGET_TIMEOUT: ++ /* ++ * We double the budget here because: 1) it ++ * gives the chance to boost the throughput if ++ * this is not a seeky process (which may have ++ * bumped into this timeout because of, e.g., ++ * ZBR), 2) together with charge_full_budget ++ * it helps give seeky processes higher ++ * timestamps, and hence be served less ++ * frequently. ++ */ ++ budget = min(budget * 2, bfqd->bfq_max_budget); ++ break; ++ case BFQ_BFQQ_BUDGET_EXHAUSTED: ++ /* ++ * The process still has backlog, and did not ++ * let either the budget timeout or the disk ++ * idling timeout expire. Hence it is not ++ * seeky, has a short thinktime and may be ++ * happy with a higher budget too. So ++ * definitely increase the budget of this good ++ * candidate to boost the disk throughput. ++ */ ++ budget = min(budget * 4, bfqd->bfq_max_budget); ++ break; ++ case BFQ_BFQQ_NO_MORE_REQUESTS: ++ /* ++ * Leave the budget unchanged. ++ */ ++ default: ++ return; ++ } ++ } else /* async queue */ ++ /* async queues get always the maximum possible budget ++ * (their ability to dispatch is limited by ++ * @bfqd->bfq_max_budget_async_rq). ++ */ ++ budget = bfqd->bfq_max_budget; ++ ++ bfqq->max_budget = budget; ++ ++ if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && ++ bfqq->max_budget > bfqd->bfq_max_budget) ++ bfqq->max_budget = bfqd->bfq_max_budget; ++ ++ /* ++ * Make sure that we have enough budget for the next request. ++ * Since the finish time of the bfqq must be kept in sync with ++ * the budget, be sure to call __bfq_bfqq_expire() after the ++ * update. ++ */ ++ next_rq = bfqq->next_rq; ++ if (next_rq != NULL) ++ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, ++ bfq_serv_to_charge(next_rq, bfqq)); ++ else ++ bfqq->entity.budget = bfqq->max_budget; ++ ++ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", ++ next_rq != NULL ? blk_rq_sectors(next_rq) : 0, ++ bfqq->entity.budget); ++} ++ ++static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) ++{ ++ unsigned long max_budget; ++ ++ /* ++ * The max_budget calculated when autotuning is equal to the ++ * amount of sectors transfered in timeout_sync at the ++ * estimated peak rate. ++ */ ++ max_budget = (unsigned long)(peak_rate * 1000 * ++ timeout >> BFQ_RATE_SHIFT); ++ ++ return max_budget; ++} ++ ++/* ++ * In addition to updating the peak rate, checks whether the process ++ * is "slow", and returns 1 if so. This slow flag is used, in addition ++ * to the budget timeout, to reduce the amount of service provided to ++ * seeky processes, and hence reduce their chances to lower the ++ * throughput. See the code for more details. ++ */ ++static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ int compensate, enum bfqq_expiration reason) ++{ ++ u64 bw, usecs, expected, timeout; ++ ktime_t delta; ++ int update = 0; ++ ++ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) ++ return 0; ++ ++ if (compensate) ++ delta = bfqd->last_idling_start; ++ else ++ delta = ktime_get(); ++ delta = ktime_sub(delta, bfqd->last_budget_start); ++ usecs = ktime_to_us(delta); ++ ++ /* Don't trust short/unrealistic values. */ ++ if (usecs < 100 || usecs >= LONG_MAX) ++ return 0; ++ ++ /* ++ * Calculate the bandwidth for the last slice. We use a 64 bit ++ * value to store the peak rate, in sectors per usec in fixed ++ * point math. We do so to have enough precision in the estimate ++ * and to avoid overflows. ++ */ ++ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; ++ do_div(bw, (unsigned long)usecs); ++ ++ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); ++ ++ /* ++ * Use only long (> 20ms) intervals to filter out spikes for ++ * the peak rate estimation. ++ */ ++ if (usecs > 20000) { ++ if (bw > bfqd->peak_rate || ++ (!BFQQ_SEEKY(bfqq) && ++ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { ++ bfq_log(bfqd, "measured bw =%llu", bw); ++ /* ++ * To smooth oscillations use a low-pass filter with ++ * alpha=7/8, i.e., ++ * new_rate = (7/8) * old_rate + (1/8) * bw ++ */ ++ do_div(bw, 8); ++ if (bw == 0) ++ return 0; ++ bfqd->peak_rate *= 7; ++ do_div(bfqd->peak_rate, 8); ++ bfqd->peak_rate += bw; ++ update = 1; ++ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); ++ } ++ ++ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; ++ ++ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) ++ bfqd->peak_rate_samples++; ++ ++ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && ++ update) { ++ int dev_type = blk_queue_nonrot(bfqd->queue); ++ if (bfqd->bfq_user_max_budget == 0) { ++ bfqd->bfq_max_budget = ++ bfq_calc_max_budget(bfqd->peak_rate, ++ timeout); ++ bfq_log(bfqd, "new max_budget=%lu", ++ bfqd->bfq_max_budget); ++ } ++ if (bfqd->device_speed == BFQ_BFQD_FAST && ++ bfqd->peak_rate < device_speed_thresh[dev_type]) { ++ bfqd->device_speed = BFQ_BFQD_SLOW; ++ bfqd->RT_prod = R_slow[dev_type] * ++ T_slow[dev_type]; ++ } else if (bfqd->device_speed == BFQ_BFQD_SLOW && ++ bfqd->peak_rate > device_speed_thresh[dev_type]) { ++ bfqd->device_speed = BFQ_BFQD_FAST; ++ bfqd->RT_prod = R_fast[dev_type] * ++ T_fast[dev_type]; ++ } ++ } ++ } ++ ++ /* ++ * If the process has been served for a too short time ++ * interval to let its possible sequential accesses prevail on ++ * the initial seek time needed to move the disk head on the ++ * first sector it requested, then give the process a chance ++ * and for the moment return false. ++ */ ++ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) ++ return 0; ++ ++ /* ++ * A process is considered ``slow'' (i.e., seeky, so that we ++ * cannot treat it fairly in the service domain, as it would ++ * slow down too much the other processes) if, when a slice ++ * ends for whatever reason, it has received service at a ++ * rate that would not be high enough to complete the budget ++ * before the budget timeout expiration. ++ */ ++ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; ++ ++ /* ++ * Caveat: processes doing IO in the slower disk zones will ++ * tend to be slow(er) even if not seeky. And the estimated ++ * peak rate will actually be an average over the disk ++ * surface. Hence, to not be too harsh with unlucky processes, ++ * we keep a budget/3 margin of safety before declaring a ++ * process slow. ++ */ ++ return expected > (4 * bfqq->entity.budget) / 3; ++} ++ ++/* ++ * To be deemed as soft real-time, an application must meet two ++ * requirements. First, the application must not require an average ++ * bandwidth higher than the approximate bandwidth required to playback or ++ * record a compressed high-definition video. ++ * The next function is invoked on the completion of the last request of a ++ * batch, to compute the next-start time instant, soft_rt_next_start, such ++ * that, if the next request of the application does not arrive before ++ * soft_rt_next_start, then the above requirement on the bandwidth is met. ++ * ++ * The second requirement is that the request pattern of the application is ++ * isochronous, i.e., that, after issuing a request or a batch of requests, ++ * the application stops issuing new requests until all its pending requests ++ * have been completed. After that, the application may issue a new batch, ++ * and so on. ++ * For this reason the next function is invoked to compute ++ * soft_rt_next_start only for applications that meet this requirement, ++ * whereas soft_rt_next_start is set to infinity for applications that do ++ * not. ++ * ++ * Unfortunately, even a greedy application may happen to behave in an ++ * isochronous way if the CPU load is high. In fact, the application may ++ * stop issuing requests while the CPUs are busy serving other processes, ++ * then restart, then stop again for a while, and so on. In addition, if ++ * the disk achieves a low enough throughput with the request pattern ++ * issued by the application (e.g., because the request pattern is random ++ * and/or the device is slow), then the application may meet the above ++ * bandwidth requirement too. To prevent such a greedy application to be ++ * deemed as soft real-time, a further rule is used in the computation of ++ * soft_rt_next_start: soft_rt_next_start must be higher than the current ++ * time plus the maximum time for which the arrival of a request is waited ++ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. ++ * This filters out greedy applications, as the latter issue instead their ++ * next request as soon as possible after the last one has been completed ++ * (in contrast, when a batch of requests is completed, a soft real-time ++ * application spends some time processing data). ++ * ++ * Unfortunately, the last filter may easily generate false positives if ++ * only bfqd->bfq_slice_idle is used as a reference time interval and one ++ * or both the following cases occur: ++ * 1) HZ is so low that the duration of a jiffy is comparable to or higher ++ * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with ++ * HZ=100. ++ * 2) jiffies, instead of increasing at a constant rate, may stop increasing ++ * for a while, then suddenly 'jump' by several units to recover the lost ++ * increments. This seems to happen, e.g., inside virtual machines. ++ * To address this issue, we do not use as a reference time interval just ++ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In ++ * particular we add the minimum number of jiffies for which the filter ++ * seems to be quite precise also in embedded systems and KVM/QEMU virtual ++ * machines. ++ */ ++static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ return max(bfqq->last_idle_bklogged + ++ HZ * bfqq->service_from_backlogged / ++ bfqd->bfq_wr_max_softrt_rate, ++ jiffies + bfqq->bfqd->bfq_slice_idle + 4); ++} ++ ++/* ++ * Return the largest-possible time instant such that, for as long as possible, ++ * the current time will be lower than this time instant according to the macro ++ * time_is_before_jiffies(). ++ */ ++static inline unsigned long bfq_infinity_from_now(unsigned long now) ++{ ++ return now + ULONG_MAX / 2; ++} ++ ++/** ++ * bfq_bfqq_expire - expire a queue. ++ * @bfqd: device owning the queue. ++ * @bfqq: the queue to expire. ++ * @compensate: if true, compensate for the time spent idling. ++ * @reason: the reason causing the expiration. ++ * ++ * ++ * If the process associated to the queue is slow (i.e., seeky), or in ++ * case of budget timeout, or, finally, if it is async, we ++ * artificially charge it an entire budget (independently of the ++ * actual service it received). As a consequence, the queue will get ++ * higher timestamps than the correct ones upon reactivation, and ++ * hence it will be rescheduled as if it had received more service ++ * than what it actually received. In the end, this class of processes ++ * will receive less service in proportion to how slowly they consume ++ * their budgets (and hence how seriously they tend to lower the ++ * throughput). ++ * ++ * In contrast, when a queue expires because it has been idling for ++ * too much or because it exhausted its budget, we do not touch the ++ * amount of service it has received. Hence when the queue will be ++ * reactivated and its timestamps updated, the latter will be in sync ++ * with the actual service received by the queue until expiration. ++ * ++ * Charging a full budget to the first type of queues and the exact ++ * service to the others has the effect of using the WF2Q+ policy to ++ * schedule the former on a timeslice basis, without violating the ++ * service domain guarantees of the latter. ++ */ ++static void bfq_bfqq_expire(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq, ++ int compensate, ++ enum bfqq_expiration reason) ++{ ++ int slow; ++ BUG_ON(bfqq != bfqd->in_service_queue); ++ ++ /* Update disk peak rate for autotuning and check whether the ++ * process is slow (see bfq_update_peak_rate). ++ */ ++ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); ++ ++ /* ++ * As above explained, 'punish' slow (i.e., seeky), timed-out ++ * and async queues, to favor sequential sync workloads. ++ * ++ * Processes doing I/O in the slower disk zones will tend to be ++ * slow(er) even if not seeky. Hence, since the estimated peak ++ * rate is actually an average over the disk surface, these ++ * processes may timeout just for bad luck. To avoid punishing ++ * them we do not charge a full budget to a process that ++ * succeeded in consuming at least 2/3 of its budget. ++ */ ++ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && ++ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) ++ bfq_bfqq_charge_full_budget(bfqq); ++ ++ bfqq->service_from_backlogged += bfqq->entity.service; ++ ++ if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && ++ !bfq_bfqq_constantly_seeky(bfqq)) { ++ bfq_mark_bfqq_constantly_seeky(bfqq); ++ if (!blk_queue_nonrot(bfqd->queue)) ++ bfqd->const_seeky_busy_in_flight_queues++; ++ } ++ ++ if (reason == BFQ_BFQQ_TOO_IDLE && ++ bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) ++ bfq_clear_bfqq_IO_bound(bfqq); ++ ++ if (bfqd->low_latency && bfqq->wr_coeff == 1) ++ bfqq->last_wr_start_finish = jiffies; ++ ++ if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && ++ RB_EMPTY_ROOT(&bfqq->sort_list)) { ++ /* ++ * If we get here, and there are no outstanding requests, ++ * then the request pattern is isochronous (see the comments ++ * to the function bfq_bfqq_softrt_next_start()). Hence we ++ * can compute soft_rt_next_start. If, instead, the queue ++ * still has outstanding requests, then we have to wait ++ * for the completion of all the outstanding requests to ++ * discover whether the request pattern is actually ++ * isochronous. ++ */ ++ if (bfqq->dispatched == 0) ++ bfqq->soft_rt_next_start = ++ bfq_bfqq_softrt_next_start(bfqd, bfqq); ++ else { ++ /* ++ * The application is still waiting for the ++ * completion of one or more requests: ++ * prevent it from possibly being incorrectly ++ * deemed as soft real-time by setting its ++ * soft_rt_next_start to infinity. In fact, ++ * without this assignment, the application ++ * would be incorrectly deemed as soft ++ * real-time if: ++ * 1) it issued a new request before the ++ * completion of all its in-flight ++ * requests, and ++ * 2) at that time, its soft_rt_next_start ++ * happened to be in the past. ++ */ ++ bfqq->soft_rt_next_start = ++ bfq_infinity_from_now(jiffies); ++ /* ++ * Schedule an update of soft_rt_next_start to when ++ * the task may be discovered to be isochronous. ++ */ ++ bfq_mark_bfqq_softrt_update(bfqq); ++ } ++ } ++ ++ bfq_log_bfqq(bfqd, bfqq, ++ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, ++ slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); ++ ++ /* ++ * Increase, decrease or leave budget unchanged according to ++ * reason. ++ */ ++ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); ++ __bfq_bfqq_expire(bfqd, bfqq); ++} ++ ++/* ++ * Budget timeout is not implemented through a dedicated timer, but ++ * just checked on request arrivals and completions, as well as on ++ * idle timer expirations. ++ */ ++static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) ++{ ++ if (bfq_bfqq_budget_new(bfqq) || ++ time_before(jiffies, bfqq->budget_timeout)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * If we expire a queue that is waiting for the arrival of a new ++ * request, we may prevent the fictitious timestamp back-shifting that ++ * allows the guarantees of the queue to be preserved (see [1] for ++ * this tricky aspect). Hence we return true only if this condition ++ * does not hold, or if the queue is slow enough to deserve only to be ++ * kicked off for preserving a high throughput. ++*/ ++static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) ++{ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "may_budget_timeout: wait_request %d left %d timeout %d", ++ bfq_bfqq_wait_request(bfqq), ++ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, ++ bfq_bfqq_budget_timeout(bfqq)); ++ ++ return (!bfq_bfqq_wait_request(bfqq) || ++ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) ++ && ++ bfq_bfqq_budget_timeout(bfqq); ++} ++ ++/* ++ * Device idling is allowed only for the queues for which this function ++ * returns true. For this reason, the return value of this function plays a ++ * critical role for both throughput boosting and service guarantees. The ++ * return value is computed through a logical expression. In this rather ++ * long comment, we try to briefly describe all the details and motivations ++ * behind the components of this logical expression. ++ * ++ * First, the expression is false if bfqq is not sync, or if: bfqq happened ++ * to become active during a large burst of queue activations, and the ++ * pattern of requests bfqq contains boosts the throughput if bfqq is ++ * expired. In fact, queues that became active during a large burst benefit ++ * only from throughput, as discussed in the comments to bfq_handle_burst. ++ * In this respect, expiring bfqq certainly boosts the throughput on NCQ- ++ * capable flash-based devices, whereas, on rotational devices, it boosts ++ * the throughput only if bfqq contains random requests. ++ * ++ * On the opposite end, if (a) bfqq is sync, (b) the above burst-related ++ * condition does not hold, and (c) bfqq is being weight-raised, then the ++ * expression always evaluates to true, as device idling is instrumental ++ * for preserving low-latency guarantees (see [1]). If, instead, conditions ++ * (a) and (b) do hold, but (c) does not, then the expression evaluates to ++ * true only if: (1) bfqq is I/O-bound and has a non-null idle window, and ++ * (2) at least one of the following two conditions holds. ++ * The first condition is that the device is not performing NCQ, because ++ * idling the device most certainly boosts the throughput if this condition ++ * holds and bfqq is I/O-bound and has been granted a non-null idle window. ++ * The second compound condition is made of the logical AND of two components. ++ * ++ * The first component is true only if there is no weight-raised busy ++ * queue. This guarantees that the device is not idled for a sync non- ++ * weight-raised queue when there are busy weight-raised queues. The former ++ * is then expired immediately if empty. Combined with the timestamping ++ * rules of BFQ (see [1] for details), this causes sync non-weight-raised ++ * queues to get a lower number of requests served, and hence to ask for a ++ * lower number of requests from the request pool, before the busy weight- ++ * raised queues get served again. ++ * ++ * This is beneficial for the processes associated with weight-raised ++ * queues, when the request pool is saturated (e.g., in the presence of ++ * write hogs). In fact, if the processes associated with the other queues ++ * ask for requests at a lower rate, then weight-raised processes have a ++ * higher probability to get a request from the pool immediately (or at ++ * least soon) when they need one. Hence they have a higher probability to ++ * actually get a fraction of the disk throughput proportional to their ++ * high weight. This is especially true with NCQ-capable drives, which ++ * enqueue several requests in advance and further reorder internally- ++ * queued requests. ++ * ++ * In the end, mistreating non-weight-raised queues when there are busy ++ * weight-raised queues seems to mitigate starvation problems in the ++ * presence of heavy write workloads and NCQ, and hence to guarantee a ++ * higher application and system responsiveness in these hostile scenarios. ++ * ++ * If the first component of the compound condition is instead true, i.e., ++ * there is no weight-raised busy queue, then the second component of the ++ * compound condition takes into account service-guarantee and throughput ++ * issues related to NCQ (recall that the compound condition is evaluated ++ * only if the device is detected as supporting NCQ). ++ * ++ * As for service guarantees, allowing the drive to enqueue more than one ++ * request at a time, and hence delegating de facto final scheduling ++ * decisions to the drive's internal scheduler, causes loss of control on ++ * the actual request service order. In this respect, when the drive is ++ * allowed to enqueue more than one request at a time, the service ++ * distribution enforced by the drive's internal scheduler is likely to ++ * coincide with the desired device-throughput distribution only in the ++ * following, perfectly symmetric, scenario: ++ * 1) all active queues have the same weight, ++ * 2) all active groups at the same level in the groups tree have the same ++ * weight, ++ * 3) all active groups at the same level in the groups tree have the same ++ * number of children. ++ * ++ * Even in such a scenario, sequential I/O may still receive a preferential ++ * treatment, but this is not likely to be a big issue with flash-based ++ * devices, because of their non-dramatic loss of throughput with random ++ * I/O. Things do differ with HDDs, for which additional care is taken, as ++ * explained after completing the discussion for flash-based devices. ++ * ++ * Unfortunately, keeping the necessary state for evaluating exactly the ++ * above symmetry conditions would be quite complex and time-consuming. ++ * Therefore BFQ evaluates instead the following stronger sub-conditions, ++ * for which it is much easier to maintain the needed state: ++ * 1) all active queues have the same weight, ++ * 2) all active groups have the same weight, ++ * 3) all active groups have at most one active child each. ++ * In particular, the last two conditions are always true if hierarchical ++ * support and the cgroups interface are not enabled, hence no state needs ++ * to be maintained in this case. ++ * ++ * According to the above considerations, the second component of the ++ * compound condition evaluates to true if any of the above symmetry ++ * sub-condition does not hold, or the device is not flash-based. Therefore, ++ * if also the first component is true, then idling is allowed for a sync ++ * queue. These are the only sub-conditions considered if the device is ++ * flash-based, as, for such a device, it is sensible to force idling only ++ * for service-guarantee issues. In fact, as for throughput, idling ++ * NCQ-capable flash-based devices would not boost the throughput even ++ * with sequential I/O; rather it would lower the throughput in proportion ++ * to how fast the device is. In the end, (only) if all the three ++ * sub-conditions hold and the device is flash-based, the compound ++ * condition evaluates to false and therefore no idling is performed. ++ * ++ * As already said, things change with a rotational device, where idling ++ * boosts the throughput with sequential I/O (even with NCQ). Hence, for ++ * such a device the second component of the compound condition evaluates ++ * to true also if the following additional sub-condition does not hold: ++ * the queue is constantly seeky. Unfortunately, this different behavior ++ * with respect to flash-based devices causes an additional asymmetry: if ++ * some sync queues enjoy idling and some other sync queues do not, then ++ * the latter get a low share of the device throughput, simply because the ++ * former get many requests served after being set as in service, whereas ++ * the latter do not. As a consequence, to guarantee the desired throughput ++ * distribution, on HDDs the compound expression evaluates to true (and ++ * hence device idling is performed) also if the following last symmetry ++ * condition does not hold: no other queue is benefiting from idling. Also ++ * this last condition is actually replaced with a simpler-to-maintain and ++ * stronger condition: there is no busy queue which is not constantly seeky ++ * (and hence may also benefit from idling). ++ * ++ * To sum up, when all the required symmetry and throughput-boosting ++ * sub-conditions hold, the second component of the compound condition ++ * evaluates to false, and hence no idling is performed. This helps to ++ * keep the drives' internal queues full on NCQ-capable devices, and hence ++ * to boost the throughput, without causing 'almost' any loss of service ++ * guarantees. The 'almost' follows from the fact that, if the internal ++ * queue of one such device is filled while all the sub-conditions hold, ++ * but at some point in time some sub-condition stops to hold, then it may ++ * become impossible to let requests be served in the new desired order ++ * until all the requests already queued in the device have been served. ++ */ ++static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq) ++{ ++ struct bfq_data *bfqd = bfqq->bfqd; ++#ifdef CONFIG_CGROUP_BFQIO ++#define symmetric_scenario (!bfqd->active_numerous_groups && \ ++ !bfq_differentiated_weights(bfqd)) ++#else ++#define symmetric_scenario (!bfq_differentiated_weights(bfqd)) ++#endif ++#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \ ++ bfqd->busy_in_flight_queues == \ ++ bfqd->const_seeky_busy_in_flight_queues) ++ ++#define cond_for_expiring_in_burst (bfq_bfqq_in_large_burst(bfqq) && \ ++ bfqd->hw_tag && \ ++ (blk_queue_nonrot(bfqd->queue) || \ ++ bfq_bfqq_constantly_seeky(bfqq))) ++ ++/* ++ * Condition for expiring a non-weight-raised queue (and hence not idling ++ * the device). ++ */ ++#define cond_for_expiring_non_wr (bfqd->hw_tag && \ ++ (bfqd->wr_busy_queues > 0 || \ ++ (symmetric_scenario && \ ++ (blk_queue_nonrot(bfqd->queue) || \ ++ cond_for_seeky_on_ncq_hdd)))) ++ ++ return bfq_bfqq_sync(bfqq) && ++ !cond_for_expiring_in_burst && ++ (bfqq->wr_coeff > 1 || ++ (bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_idle_window(bfqq) && ++ !cond_for_expiring_non_wr) ++ ); ++} ++ ++/* ++ * If the in-service queue is empty but sync, and the function ++ * bfq_bfqq_must_not_expire returns true, then: ++ * 1) the queue must remain in service and cannot be expired, and ++ * 2) the disk must be idled to wait for the possible arrival of a new ++ * request for the queue. ++ * See the comments to the function bfq_bfqq_must_not_expire for the reasons ++ * why performing device idling is the best choice to boost the throughput ++ * and preserve service guarantees when bfq_bfqq_must_not_expire itself ++ * returns true. ++ */ ++static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) ++{ ++ struct bfq_data *bfqd = bfqq->bfqd; ++ ++ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && ++ bfq_bfqq_must_not_expire(bfqq); ++} ++ ++/* ++ * Select a queue for service. If we have a current queue in service, ++ * check whether to continue servicing it, or retrieve and set a new one. ++ */ ++static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq; ++ struct request *next_rq; ++ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; ++ ++ bfqq = bfqd->in_service_queue; ++ if (bfqq == NULL) ++ goto new_queue; ++ ++ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); ++ ++ if (bfq_may_expire_for_budg_timeout(bfqq) && ++ !timer_pending(&bfqd->idle_slice_timer) && ++ !bfq_bfqq_must_idle(bfqq)) ++ goto expire; ++ ++ next_rq = bfqq->next_rq; ++ /* ++ * If bfqq has requests queued and it has enough budget left to ++ * serve them, keep the queue, otherwise expire it. ++ */ ++ if (next_rq != NULL) { ++ if (bfq_serv_to_charge(next_rq, bfqq) > ++ bfq_bfqq_budget_left(bfqq)) { ++ reason = BFQ_BFQQ_BUDGET_EXHAUSTED; ++ goto expire; ++ } else { ++ /* ++ * The idle timer may be pending because we may ++ * not disable disk idling even when a new request ++ * arrives. ++ */ ++ if (timer_pending(&bfqd->idle_slice_timer)) { ++ /* ++ * If we get here: 1) at least a new request ++ * has arrived but we have not disabled the ++ * timer because the request was too small, ++ * 2) then the block layer has unplugged ++ * the device, causing the dispatch to be ++ * invoked. ++ * ++ * Since the device is unplugged, now the ++ * requests are probably large enough to ++ * provide a reasonable throughput. ++ * So we disable idling. ++ */ ++ bfq_clear_bfqq_wait_request(bfqq); ++ del_timer(&bfqd->idle_slice_timer); ++ } ++ goto keep_queue; ++ } ++ } ++ ++ /* ++ * No requests pending. If the in-service queue still has requests ++ * in flight (possibly waiting for a completion) or is idling for a ++ * new request, then keep it. ++ */ ++ if (timer_pending(&bfqd->idle_slice_timer) || ++ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { ++ bfqq = NULL; ++ goto keep_queue; ++ } ++ ++ reason = BFQ_BFQQ_NO_MORE_REQUESTS; ++expire: ++ bfq_bfqq_expire(bfqd, bfqq, 0, reason); ++new_queue: ++ bfqq = bfq_set_in_service_queue(bfqd); ++ bfq_log(bfqd, "select_queue: new queue %d returned", ++ bfqq != NULL ? bfqq->pid : 0); ++keep_queue: ++ return bfqq; ++} ++ ++static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ ++ bfq_log_bfqq(bfqd, bfqq, ++ "raising period dur %u/%u msec, old coeff %u, w %d(%d)", ++ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), ++ jiffies_to_msecs(bfqq->wr_cur_max_time), ++ bfqq->wr_coeff, ++ bfqq->entity.weight, bfqq->entity.orig_weight); ++ ++ BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != ++ entity->orig_weight * bfqq->wr_coeff); ++ if (entity->ioprio_changed) ++ bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); ++ ++ /* ++ * If the queue was activated in a burst, or ++ * too much time has elapsed from the beginning ++ * of this weight-raising period, or the queue has ++ * exceeded the acceptable number of cooperations, ++ * then end weight raising. ++ */ ++ if (bfq_bfqq_in_large_burst(bfqq) || ++ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || ++ time_is_before_jiffies(bfqq->last_wr_start_finish + ++ bfqq->wr_cur_max_time)) { ++ bfqq->last_wr_start_finish = jiffies; ++ bfq_log_bfqq(bfqd, bfqq, ++ "wrais ending at %lu, rais_max_time %u", ++ bfqq->last_wr_start_finish, ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ bfq_bfqq_end_wr(bfqq); ++ } ++ } ++ /* Update weight both if it must be raised and if it must be lowered */ ++ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) ++ __bfq_entity_update_weight_prio( ++ bfq_entity_service_tree(entity), ++ entity); ++} ++ ++/* ++ * Dispatch one request from bfqq, moving it to the request queue ++ * dispatch list. ++ */ ++static int bfq_dispatch_request(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ int dispatched = 0; ++ struct request *rq; ++ unsigned long service_to_charge; ++ ++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); ++ ++ /* Follow expired path, else get first next available. */ ++ rq = bfq_check_fifo(bfqq); ++ if (rq == NULL) ++ rq = bfqq->next_rq; ++ service_to_charge = bfq_serv_to_charge(rq, bfqq); ++ ++ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { ++ /* ++ * This may happen if the next rq is chosen in fifo order ++ * instead of sector order. The budget is properly ++ * dimensioned to be always sufficient to serve the next ++ * request only if it is chosen in sector order. The reason ++ * is that it would be quite inefficient and little useful ++ * to always make sure that the budget is large enough to ++ * serve even the possible next rq in fifo order. ++ * In fact, requests are seldom served in fifo order. ++ * ++ * Expire the queue for budget exhaustion, and make sure ++ * that the next act_budget is enough to serve the next ++ * request, even if it comes from the fifo expired path. ++ */ ++ bfqq->next_rq = rq; ++ /* ++ * Since this dispatch is failed, make sure that ++ * a new one will be performed ++ */ ++ if (!bfqd->rq_in_driver) ++ bfq_schedule_dispatch(bfqd); ++ goto expire; ++ } ++ ++ /* Finally, insert request into driver dispatch list. */ ++ bfq_bfqq_served(bfqq, service_to_charge); ++ bfq_dispatch_insert(bfqd->queue, rq); ++ ++ bfq_update_wr_data(bfqd, bfqq); ++ ++ bfq_log_bfqq(bfqd, bfqq, ++ "dispatched %u sec req (%llu), budg left %lu", ++ blk_rq_sectors(rq), ++ (long long unsigned)blk_rq_pos(rq), ++ bfq_bfqq_budget_left(bfqq)); ++ ++ dispatched++; ++ ++ if (bfqd->in_service_bic == NULL) { ++ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); ++ bfqd->in_service_bic = RQ_BIC(rq); ++ } ++ ++ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && ++ dispatched >= bfqd->bfq_max_budget_async_rq) || ++ bfq_class_idle(bfqq))) ++ goto expire; ++ ++ return dispatched; ++ ++expire: ++ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); ++ return dispatched; ++} ++ ++static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) ++{ ++ int dispatched = 0; ++ ++ while (bfqq->next_rq != NULL) { ++ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); ++ dispatched++; ++ } ++ ++ BUG_ON(!list_empty(&bfqq->fifo)); ++ return dispatched; ++} ++ ++/* ++ * Drain our current requests. ++ * Used for barriers and when switching io schedulers on-the-fly. ++ */ ++static int bfq_forced_dispatch(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq, *n; ++ struct bfq_service_tree *st; ++ int dispatched = 0; ++ ++ bfqq = bfqd->in_service_queue; ++ if (bfqq != NULL) ++ __bfq_bfqq_expire(bfqd, bfqq); ++ ++ /* ++ * Loop through classes, and be careful to leave the scheduler ++ * in a consistent state, as feedback mechanisms and vtime ++ * updates cannot be disabled during the process. ++ */ ++ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { ++ st = bfq_entity_service_tree(&bfqq->entity); ++ ++ dispatched += __bfq_forced_dispatch_bfqq(bfqq); ++ bfqq->max_budget = bfq_max_budget(bfqd); ++ ++ bfq_forget_idle(st); ++ } ++ ++ BUG_ON(bfqd->busy_queues != 0); ++ ++ return dispatched; ++} ++ ++static int bfq_dispatch_requests(struct request_queue *q, int force) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct bfq_queue *bfqq; ++ int max_dispatch; ++ ++ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); ++ if (bfqd->busy_queues == 0) ++ return 0; ++ ++ if (unlikely(force)) ++ return bfq_forced_dispatch(bfqd); ++ ++ bfqq = bfq_select_queue(bfqd); ++ if (bfqq == NULL) ++ return 0; ++ ++ max_dispatch = bfqd->bfq_quantum; ++ if (bfq_class_idle(bfqq)) ++ max_dispatch = 1; ++ ++ if (!bfq_bfqq_sync(bfqq)) ++ max_dispatch = bfqd->bfq_max_budget_async_rq; ++ ++ if (bfqq->dispatched >= max_dispatch) { ++ if (bfqd->busy_queues > 1) ++ return 0; ++ if (bfqq->dispatched >= 4 * max_dispatch) ++ return 0; ++ } ++ ++ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) ++ return 0; ++ ++ bfq_clear_bfqq_wait_request(bfqq); ++ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); ++ ++ if (!bfq_dispatch_request(bfqd, bfqq)) ++ return 0; ++ ++ bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)", ++ bfqq->pid, max_dispatch); ++ ++ return 1; ++} ++ ++/* ++ * Task holds one reference to the queue, dropped when task exits. Each rq ++ * in-flight on this queue also holds a reference, dropped when rq is freed. ++ * ++ * Queue lock must be held here. ++ */ ++static void bfq_put_queue(struct bfq_queue *bfqq) ++{ ++ struct bfq_data *bfqd = bfqq->bfqd; ++ ++ BUG_ON(atomic_read(&bfqq->ref) <= 0); ++ ++ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, ++ atomic_read(&bfqq->ref)); ++ if (!atomic_dec_and_test(&bfqq->ref)) ++ return; ++ ++ BUG_ON(rb_first(&bfqq->sort_list) != NULL); ++ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); ++ BUG_ON(bfqq->entity.tree != NULL); ++ BUG_ON(bfq_bfqq_busy(bfqq)); ++ BUG_ON(bfqd->in_service_queue == bfqq); ++ ++ if (bfq_bfqq_sync(bfqq)) ++ /* ++ * The fact that this queue is being destroyed does not ++ * invalidate the fact that this queue may have been ++ * activated during the current burst. As a consequence, ++ * although the queue does not exist anymore, and hence ++ * needs to be removed from the burst list if there, ++ * the burst size has not to be decremented. ++ */ ++ hlist_del_init(&bfqq->burst_list_node); ++ ++ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); ++ ++ kmem_cache_free(bfq_pool, bfqq); ++} ++ ++static void bfq_put_cooperator(struct bfq_queue *bfqq) ++{ ++ struct bfq_queue *__bfqq, *next; ++ ++ /* ++ * If this queue was scheduled to merge with another queue, be ++ * sure to drop the reference taken on that queue (and others in ++ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. ++ */ ++ __bfqq = bfqq->new_bfqq; ++ while (__bfqq) { ++ if (__bfqq == bfqq) ++ break; ++ next = __bfqq->new_bfqq; ++ bfq_put_queue(__bfqq); ++ __bfqq = next; ++ } ++} ++ ++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ if (bfqq == bfqd->in_service_queue) { ++ __bfq_bfqq_expire(bfqd, bfqq); ++ bfq_schedule_dispatch(bfqd); ++ } ++ ++ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, ++ atomic_read(&bfqq->ref)); ++ ++ bfq_put_cooperator(bfqq); ++ ++ bfq_put_queue(bfqq); ++} ++ ++static inline void bfq_init_icq(struct io_cq *icq) ++{ ++ struct bfq_io_cq *bic = icq_to_bic(icq); ++ ++ bic->ttime.last_end_request = jiffies; ++ /* ++ * A newly created bic indicates that the process has just ++ * started doing I/O, and is probably mapping into memory its ++ * executable and libraries: it definitely needs weight raising. ++ * There is however the possibility that the process performs, ++ * for a while, I/O close to some other process. EQM intercepts ++ * this behavior and may merge the queue corresponding to the ++ * process with some other queue, BEFORE the weight of the queue ++ * is raised. Merged queues are not weight-raised (they are assumed ++ * to belong to processes that benefit only from high throughput). ++ * If the merge is basically the consequence of an accident, then ++ * the queue will be split soon and will get back its old weight. ++ * It is then important to write down somewhere that this queue ++ * does need weight raising, even if it did not make it to get its ++ * weight raised before being merged. To this purpose, we overload ++ * the field raising_time_left and assign 1 to it, to mark the queue ++ * as needing weight raising. ++ */ ++ bic->wr_time_left = 1; ++} ++ ++static void bfq_exit_icq(struct io_cq *icq) ++{ ++ struct bfq_io_cq *bic = icq_to_bic(icq); ++ struct bfq_data *bfqd = bic_to_bfqd(bic); ++ ++ if (bic->bfqq[BLK_RW_ASYNC]) { ++ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]); ++ bic->bfqq[BLK_RW_ASYNC] = NULL; ++ } ++ ++ if (bic->bfqq[BLK_RW_SYNC]) { ++ /* ++ * If the bic is using a shared queue, put the reference ++ * taken on the io_context when the bic started using a ++ * shared bfq_queue. ++ */ ++ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) ++ put_io_context(icq->ioc); ++ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); ++ bic->bfqq[BLK_RW_SYNC] = NULL; ++ } ++} ++ ++/* ++ * Update the entity prio values; note that the new values will not ++ * be used until the next (re)activation. ++ */ ++static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ++{ ++ struct task_struct *tsk = current; ++ int ioprio_class; ++ ++ if (!bfq_bfqq_prio_changed(bfqq)) ++ return; ++ ++ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); ++ switch (ioprio_class) { ++ default: ++ dev_err(bfqq->bfqd->queue->backing_dev_info.dev, ++ "bfq: bad prio class %d\n", ioprio_class); ++ case IOPRIO_CLASS_NONE: ++ /* ++ * No prio set, inherit CPU scheduling settings. ++ */ ++ bfqq->entity.new_ioprio = task_nice_ioprio(tsk); ++ bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); ++ break; ++ case IOPRIO_CLASS_RT: ++ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); ++ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; ++ break; ++ case IOPRIO_CLASS_BE: ++ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); ++ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; ++ break; ++ case IOPRIO_CLASS_IDLE: ++ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; ++ bfqq->entity.new_ioprio = 7; ++ bfq_clear_bfqq_idle_window(bfqq); ++ break; ++ } ++ ++ if (bfqq->entity.new_ioprio < 0 || ++ bfqq->entity.new_ioprio >= IOPRIO_BE_NR) { ++ printk(KERN_CRIT "bfq_init_prio_data: new_ioprio %d\n", ++ bfqq->entity.new_ioprio); ++ BUG(); ++ } ++ ++ bfqq->entity.ioprio_changed = 1; ++ ++ bfq_clear_bfqq_prio_changed(bfqq); ++} ++ ++static void bfq_changed_ioprio(struct bfq_io_cq *bic) ++{ ++ struct bfq_data *bfqd; ++ struct bfq_queue *bfqq, *new_bfqq; ++ struct bfq_group *bfqg; ++ unsigned long uninitialized_var(flags); ++ int ioprio = bic->icq.ioc->ioprio; ++ ++ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), ++ &flags); ++ /* ++ * This condition may trigger on a newly created bic, be sure to ++ * drop the lock before returning. ++ */ ++ if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio)) ++ goto out; ++ ++ bfqq = bic->bfqq[BLK_RW_ASYNC]; ++ if (bfqq != NULL) { ++ bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, ++ sched_data); ++ new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic, ++ GFP_ATOMIC); ++ if (new_bfqq != NULL) { ++ bic->bfqq[BLK_RW_ASYNC] = new_bfqq; ++ bfq_log_bfqq(bfqd, bfqq, ++ "changed_ioprio: bfqq %p %d", ++ bfqq, atomic_read(&bfqq->ref)); ++ bfq_put_queue(bfqq); ++ } ++ } ++ ++ bfqq = bic->bfqq[BLK_RW_SYNC]; ++ if (bfqq != NULL) ++ bfq_mark_bfqq_prio_changed(bfqq); ++ ++ bic->ioprio = ioprio; ++ ++out: ++ bfq_put_bfqd_unlock(bfqd, &flags); ++} ++ ++static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ pid_t pid, int is_sync) ++{ ++ RB_CLEAR_NODE(&bfqq->entity.rb_node); ++ INIT_LIST_HEAD(&bfqq->fifo); ++ INIT_HLIST_NODE(&bfqq->burst_list_node); ++ ++ atomic_set(&bfqq->ref, 0); ++ bfqq->bfqd = bfqd; ++ ++ bfq_mark_bfqq_prio_changed(bfqq); ++ ++ if (is_sync) { ++ if (!bfq_class_idle(bfqq)) ++ bfq_mark_bfqq_idle_window(bfqq); ++ bfq_mark_bfqq_sync(bfqq); ++ } ++ bfq_mark_bfqq_IO_bound(bfqq); ++ ++ /* Tentative initial value to trade off between thr and lat */ ++ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; ++ bfqq->pid = pid; ++ ++ bfqq->wr_coeff = 1; ++ bfqq->last_wr_start_finish = 0; ++ /* ++ * Set to the value for which bfqq will not be deemed as ++ * soft rt when it becomes backlogged. ++ */ ++ bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); ++} ++ ++static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, ++ struct bfq_group *bfqg, ++ int is_sync, ++ struct bfq_io_cq *bic, ++ gfp_t gfp_mask) ++{ ++ struct bfq_queue *bfqq, *new_bfqq = NULL; ++ ++retry: ++ /* bic always exists here */ ++ bfqq = bic_to_bfqq(bic, is_sync); ++ ++ /* ++ * Always try a new alloc if we fall back to the OOM bfqq ++ * originally, since it should just be a temporary situation. ++ */ ++ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { ++ bfqq = NULL; ++ if (new_bfqq != NULL) { ++ bfqq = new_bfqq; ++ new_bfqq = NULL; ++ } else if (gfp_mask & __GFP_WAIT) { ++ spin_unlock_irq(bfqd->queue->queue_lock); ++ new_bfqq = kmem_cache_alloc_node(bfq_pool, ++ gfp_mask | __GFP_ZERO, ++ bfqd->queue->node); ++ spin_lock_irq(bfqd->queue->queue_lock); ++ if (new_bfqq != NULL) ++ goto retry; ++ } else { ++ bfqq = kmem_cache_alloc_node(bfq_pool, ++ gfp_mask | __GFP_ZERO, ++ bfqd->queue->node); ++ } ++ ++ if (bfqq != NULL) { ++ bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); ++ bfq_init_prio_data(bfqq, bic); ++ bfq_init_entity(&bfqq->entity, bfqg); ++ bfq_log_bfqq(bfqd, bfqq, "allocated"); ++ } else { ++ bfqq = &bfqd->oom_bfqq; ++ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); ++ } ++ } ++ ++ if (new_bfqq != NULL) ++ kmem_cache_free(bfq_pool, new_bfqq); ++ ++ return bfqq; ++} ++ ++static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, ++ struct bfq_group *bfqg, ++ int ioprio_class, int ioprio) ++{ ++ switch (ioprio_class) { ++ case IOPRIO_CLASS_RT: ++ return &bfqg->async_bfqq[0][ioprio]; ++ case IOPRIO_CLASS_NONE: ++ ioprio = IOPRIO_NORM; ++ /* fall through */ ++ case IOPRIO_CLASS_BE: ++ return &bfqg->async_bfqq[1][ioprio]; ++ case IOPRIO_CLASS_IDLE: ++ return &bfqg->async_idle_bfqq; ++ default: ++ BUG(); ++ } ++} ++ ++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, ++ struct bfq_group *bfqg, int is_sync, ++ struct bfq_io_cq *bic, gfp_t gfp_mask) ++{ ++ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); ++ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); ++ struct bfq_queue **async_bfqq = NULL; ++ struct bfq_queue *bfqq = NULL; ++ ++ if (!is_sync) { ++ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ++ ioprio); ++ bfqq = *async_bfqq; ++ } ++ ++ if (bfqq == NULL) ++ bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask); ++ ++ /* ++ * Pin the queue now that it's allocated, scheduler exit will ++ * prune it. ++ */ ++ if (!is_sync && *async_bfqq == NULL) { ++ atomic_inc(&bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", ++ bfqq, atomic_read(&bfqq->ref)); ++ *async_bfqq = bfqq; ++ } ++ ++ atomic_inc(&bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, ++ atomic_read(&bfqq->ref)); ++ return bfqq; ++} ++ ++static void bfq_update_io_thinktime(struct bfq_data *bfqd, ++ struct bfq_io_cq *bic) ++{ ++ unsigned long elapsed = jiffies - bic->ttime.last_end_request; ++ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); ++ ++ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; ++ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; ++ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / ++ bic->ttime.ttime_samples; ++} ++ ++static void bfq_update_io_seektime(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq, ++ struct request *rq) ++{ ++ sector_t sdist; ++ u64 total; ++ ++ if (bfqq->last_request_pos < blk_rq_pos(rq)) ++ sdist = blk_rq_pos(rq) - bfqq->last_request_pos; ++ else ++ sdist = bfqq->last_request_pos - blk_rq_pos(rq); ++ ++ /* ++ * Don't allow the seek distance to get too large from the ++ * odd fragment, pagein, etc. ++ */ ++ if (bfqq->seek_samples == 0) /* first request, not really a seek */ ++ sdist = 0; ++ else if (bfqq->seek_samples <= 60) /* second & third seek */ ++ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); ++ else ++ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); ++ ++ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; ++ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; ++ total = bfqq->seek_total + (bfqq->seek_samples/2); ++ do_div(total, bfqq->seek_samples); ++ bfqq->seek_mean = (sector_t)total; ++ ++ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, ++ (u64)bfqq->seek_mean); ++} ++ ++/* ++ * Disable idle window if the process thinks too long or seeks so much that ++ * it doesn't matter. ++ */ ++static void bfq_update_idle_window(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq, ++ struct bfq_io_cq *bic) ++{ ++ int enable_idle; ++ ++ /* Don't idle for async or idle io prio class. */ ++ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) ++ return; ++ ++ /* Idle window just restored, statistics are meaningless. */ ++ if (bfq_bfqq_just_split(bfqq)) ++ return; ++ ++ enable_idle = bfq_bfqq_idle_window(bfqq); ++ ++ if (atomic_read(&bic->icq.ioc->active_ref) == 0 || ++ bfqd->bfq_slice_idle == 0 || ++ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && ++ bfqq->wr_coeff == 1)) ++ enable_idle = 0; ++ else if (bfq_sample_valid(bic->ttime.ttime_samples)) { ++ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && ++ bfqq->wr_coeff == 1) ++ enable_idle = 0; ++ else ++ enable_idle = 1; ++ } ++ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", ++ enable_idle); ++ ++ if (enable_idle) ++ bfq_mark_bfqq_idle_window(bfqq); ++ else ++ bfq_clear_bfqq_idle_window(bfqq); ++} ++ ++/* ++ * Called when a new fs request (rq) is added to bfqq. Check if there's ++ * something we should do about it. ++ */ ++static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ struct request *rq) ++{ ++ struct bfq_io_cq *bic = RQ_BIC(rq); ++ ++ if (rq->cmd_flags & REQ_META) ++ bfqq->meta_pending++; ++ ++ bfq_update_io_thinktime(bfqd, bic); ++ bfq_update_io_seektime(bfqd, bfqq, rq); ++ if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { ++ bfq_clear_bfqq_constantly_seeky(bfqq); ++ if (!blk_queue_nonrot(bfqd->queue)) { ++ BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); ++ bfqd->const_seeky_busy_in_flight_queues--; ++ } ++ } ++ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || ++ !BFQQ_SEEKY(bfqq)) ++ bfq_update_idle_window(bfqd, bfqq, bic); ++ bfq_clear_bfqq_just_split(bfqq); ++ ++ bfq_log_bfqq(bfqd, bfqq, ++ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", ++ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), ++ (long long unsigned)bfqq->seek_mean); ++ ++ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); ++ ++ if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { ++ int small_req = bfqq->queued[rq_is_sync(rq)] == 1 && ++ blk_rq_sectors(rq) < 32; ++ int budget_timeout = bfq_bfqq_budget_timeout(bfqq); ++ ++ /* ++ * There is just this request queued: if the request ++ * is small and the queue is not to be expired, then ++ * just exit. ++ * ++ * In this way, if the disk is being idled to wait for ++ * a new request from the in-service queue, we avoid ++ * unplugging the device and committing the disk to serve ++ * just a small request. On the contrary, we wait for ++ * the block layer to decide when to unplug the device: ++ * hopefully, new requests will be merged to this one ++ * quickly, then the device will be unplugged and ++ * larger requests will be dispatched. ++ */ ++ if (small_req && !budget_timeout) ++ return; ++ ++ /* ++ * A large enough request arrived, or the queue is to ++ * be expired: in both cases disk idling is to be ++ * stopped, so clear wait_request flag and reset ++ * timer. ++ */ ++ bfq_clear_bfqq_wait_request(bfqq); ++ del_timer(&bfqd->idle_slice_timer); ++ ++ /* ++ * The queue is not empty, because a new request just ++ * arrived. Hence we can safely expire the queue, in ++ * case of budget timeout, without risking that the ++ * timestamps of the queue are not updated correctly. ++ * See [1] for more details. ++ */ ++ if (budget_timeout) ++ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); ++ ++ /* ++ * Let the request rip immediately, or let a new queue be ++ * selected if bfqq has just been expired. ++ */ ++ __blk_run_queue(bfqd->queue); ++ } ++} ++ ++static void bfq_insert_request(struct request_queue *q, struct request *rq) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; ++ ++ assert_spin_locked(bfqd->queue->queue_lock); ++ ++ /* ++ * An unplug may trigger a requeue of a request from the device ++ * driver: make sure we are in process context while trying to ++ * merge two bfq_queues. ++ */ ++ if (!in_interrupt()) { ++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); ++ if (new_bfqq != NULL) { ++ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) ++ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); ++ /* ++ * Release the request's reference to the old bfqq ++ * and make sure one is taken to the shared queue. ++ */ ++ new_bfqq->allocated[rq_data_dir(rq)]++; ++ bfqq->allocated[rq_data_dir(rq)]--; ++ atomic_inc(&new_bfqq->ref); ++ bfq_put_queue(bfqq); ++ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) ++ bfq_merge_bfqqs(bfqd, RQ_BIC(rq), ++ bfqq, new_bfqq); ++ rq->elv.priv[1] = new_bfqq; ++ bfqq = new_bfqq; ++ } else ++ bfq_bfqq_increase_failed_cooperations(bfqq); ++ } ++ ++ bfq_init_prio_data(bfqq, RQ_BIC(rq)); ++ ++ bfq_add_request(rq); ++ ++ /* ++ * Here a newly-created bfq_queue has already started a weight-raising ++ * period: clear raising_time_left to prevent bfq_bfqq_save_state() ++ * from assigning it a full weight-raising period. See the detailed ++ * comments about this field in bfq_init_icq(). ++ */ ++ if (bfqq->bic != NULL) ++ bfqq->bic->wr_time_left = 0; ++ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; ++ list_add_tail(&rq->queuelist, &bfqq->fifo); ++ ++ bfq_rq_enqueued(bfqd, bfqq, rq); ++} ++ ++static void bfq_update_hw_tag(struct bfq_data *bfqd) ++{ ++ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, ++ bfqd->rq_in_driver); ++ ++ if (bfqd->hw_tag == 1) ++ return; ++ ++ /* ++ * This sample is valid if the number of outstanding requests ++ * is large enough to allow a queueing behavior. Note that the ++ * sum is not exact, as it's not taking into account deactivated ++ * requests. ++ */ ++ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) ++ return; ++ ++ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) ++ return; ++ ++ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; ++ bfqd->max_rq_in_driver = 0; ++ bfqd->hw_tag_samples = 0; ++} ++ ++static void bfq_completed_request(struct request_queue *q, struct request *rq) ++{ ++ struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ struct bfq_data *bfqd = bfqq->bfqd; ++ bool sync = bfq_bfqq_sync(bfqq); ++ ++ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", ++ blk_rq_sectors(rq), sync); ++ ++ bfq_update_hw_tag(bfqd); ++ ++ BUG_ON(!bfqd->rq_in_driver); ++ BUG_ON(!bfqq->dispatched); ++ bfqd->rq_in_driver--; ++ bfqq->dispatched--; ++ ++ if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { ++ bfq_weights_tree_remove(bfqd, &bfqq->entity, ++ &bfqd->queue_weights_tree); ++ if (!blk_queue_nonrot(bfqd->queue)) { ++ BUG_ON(!bfqd->busy_in_flight_queues); ++ bfqd->busy_in_flight_queues--; ++ if (bfq_bfqq_constantly_seeky(bfqq)) { ++ BUG_ON(!bfqd-> ++ const_seeky_busy_in_flight_queues); ++ bfqd->const_seeky_busy_in_flight_queues--; ++ } ++ } ++ } ++ ++ if (sync) { ++ bfqd->sync_flight--; ++ RQ_BIC(rq)->ttime.last_end_request = jiffies; ++ } ++ ++ /* ++ * If we are waiting to discover whether the request pattern of the ++ * task associated with the queue is actually isochronous, and ++ * both requisites for this condition to hold are satisfied, then ++ * compute soft_rt_next_start (see the comments to the function ++ * bfq_bfqq_softrt_next_start()). ++ */ ++ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && ++ RB_EMPTY_ROOT(&bfqq->sort_list)) ++ bfqq->soft_rt_next_start = ++ bfq_bfqq_softrt_next_start(bfqd, bfqq); ++ ++ /* ++ * If this is the in-service queue, check if it needs to be expired, ++ * or if we want to idle in case it has no pending requests. ++ */ ++ if (bfqd->in_service_queue == bfqq) { ++ if (bfq_bfqq_budget_new(bfqq)) ++ bfq_set_budget_timeout(bfqd); ++ ++ if (bfq_bfqq_must_idle(bfqq)) { ++ bfq_arm_slice_timer(bfqd); ++ goto out; ++ } else if (bfq_may_expire_for_budg_timeout(bfqq)) ++ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); ++ else if (RB_EMPTY_ROOT(&bfqq->sort_list) && ++ (bfqq->dispatched == 0 || ++ !bfq_bfqq_must_not_expire(bfqq))) ++ bfq_bfqq_expire(bfqd, bfqq, 0, ++ BFQ_BFQQ_NO_MORE_REQUESTS); ++ } ++ ++ if (!bfqd->rq_in_driver) ++ bfq_schedule_dispatch(bfqd); ++ ++out: ++ return; ++} ++ ++static inline int __bfq_may_queue(struct bfq_queue *bfqq) ++{ ++ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { ++ bfq_clear_bfqq_must_alloc(bfqq); ++ return ELV_MQUEUE_MUST; ++ } ++ ++ return ELV_MQUEUE_MAY; ++} ++ ++static int bfq_may_queue(struct request_queue *q, int rw) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct task_struct *tsk = current; ++ struct bfq_io_cq *bic; ++ struct bfq_queue *bfqq; ++ ++ /* ++ * Don't force setup of a queue from here, as a call to may_queue ++ * does not necessarily imply that a request actually will be ++ * queued. So just lookup a possibly existing queue, or return ++ * 'may queue' if that fails. ++ */ ++ bic = bfq_bic_lookup(bfqd, tsk->io_context); ++ if (bic == NULL) ++ return ELV_MQUEUE_MAY; ++ ++ bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); ++ if (bfqq != NULL) { ++ bfq_init_prio_data(bfqq, bic); ++ ++ return __bfq_may_queue(bfqq); ++ } ++ ++ return ELV_MQUEUE_MAY; ++} ++ ++/* ++ * Queue lock held here. ++ */ ++static void bfq_put_request(struct request *rq) ++{ ++ struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ ++ if (bfqq != NULL) { ++ const int rw = rq_data_dir(rq); ++ ++ BUG_ON(!bfqq->allocated[rw]); ++ bfqq->allocated[rw]--; ++ ++ rq->elv.priv[0] = NULL; ++ rq->elv.priv[1] = NULL; ++ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", ++ bfqq, atomic_read(&bfqq->ref)); ++ bfq_put_queue(bfqq); ++ } ++} ++ ++/* ++ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this ++ * was the last process referring to said bfqq. ++ */ ++static struct bfq_queue * ++bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) ++{ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); ++ ++ put_io_context(bic->icq.ioc); ++ ++ if (bfqq_process_refs(bfqq) == 1) { ++ bfqq->pid = current->pid; ++ bfq_clear_bfqq_coop(bfqq); ++ bfq_clear_bfqq_split_coop(bfqq); ++ return bfqq; ++ } ++ ++ bic_set_bfqq(bic, NULL, 1); ++ ++ bfq_put_cooperator(bfqq); ++ ++ bfq_put_queue(bfqq); ++ return NULL; ++} ++ ++/* ++ * Allocate bfq data structures associated with this request. ++ */ ++static int bfq_set_request(struct request_queue *q, struct request *rq, ++ struct bio *bio, gfp_t gfp_mask) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); ++ const int rw = rq_data_dir(rq); ++ const int is_sync = rq_is_sync(rq); ++ struct bfq_queue *bfqq; ++ struct bfq_group *bfqg; ++ unsigned long flags; ++ bool split = false; ++ ++ might_sleep_if(gfp_mask & __GFP_WAIT); ++ ++ bfq_changed_ioprio(bic); ++ ++ spin_lock_irqsave(q->queue_lock, flags); ++ ++ if (bic == NULL) ++ goto queue_fail; ++ ++ bfqg = bfq_bic_update_cgroup(bic); ++ ++new_queue: ++ bfqq = bic_to_bfqq(bic, is_sync); ++ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { ++ bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); ++ bic_set_bfqq(bic, bfqq, is_sync); ++ if (split && is_sync) { ++ if ((bic->was_in_burst_list && bfqd->large_burst) || ++ bic->saved_in_large_burst) ++ bfq_mark_bfqq_in_large_burst(bfqq); ++ else { ++ bfq_clear_bfqq_in_large_burst(bfqq); ++ if (bic->was_in_burst_list) ++ hlist_add_head(&bfqq->burst_list_node, ++ &bfqd->burst_list); ++ } ++ } ++ } else { ++ /* If the queue was seeky for too long, break it apart. */ ++ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { ++ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); ++ bfqq = bfq_split_bfqq(bic, bfqq); ++ split = true; ++ if (!bfqq) ++ goto new_queue; ++ } ++ } ++ ++ bfqq->allocated[rw]++; ++ atomic_inc(&bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, ++ atomic_read(&bfqq->ref)); ++ ++ rq->elv.priv[0] = bic; ++ rq->elv.priv[1] = bfqq; ++ ++ /* ++ * If a bfq_queue has only one process reference, it is owned ++ * by only one bfq_io_cq: we can set the bic field of the ++ * bfq_queue to the address of that structure. Also, if the ++ * queue has just been split, mark a flag so that the ++ * information is available to the other scheduler hooks. ++ */ ++ if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { ++ bfqq->bic = bic; ++ if (split) { ++ bfq_mark_bfqq_just_split(bfqq); ++ /* ++ * If the queue has just been split from a shared ++ * queue, restore the idle window and the possible ++ * weight raising period. ++ */ ++ bfq_bfqq_resume_state(bfqq, bic); ++ } ++ } ++ ++ spin_unlock_irqrestore(q->queue_lock, flags); ++ ++ return 0; ++ ++queue_fail: ++ bfq_schedule_dispatch(bfqd); ++ spin_unlock_irqrestore(q->queue_lock, flags); ++ ++ return 1; ++} ++ ++static void bfq_kick_queue(struct work_struct *work) ++{ ++ struct bfq_data *bfqd = ++ container_of(work, struct bfq_data, unplug_work); ++ struct request_queue *q = bfqd->queue; ++ ++ spin_lock_irq(q->queue_lock); ++ __blk_run_queue(q); ++ spin_unlock_irq(q->queue_lock); ++} ++ ++/* ++ * Handler of the expiration of the timer running if the in-service queue ++ * is idling inside its time slice. ++ */ ++static void bfq_idle_slice_timer(unsigned long data) ++{ ++ struct bfq_data *bfqd = (struct bfq_data *)data; ++ struct bfq_queue *bfqq; ++ unsigned long flags; ++ enum bfqq_expiration reason; ++ ++ spin_lock_irqsave(bfqd->queue->queue_lock, flags); ++ ++ bfqq = bfqd->in_service_queue; ++ /* ++ * Theoretical race here: the in-service queue can be NULL or ++ * different from the queue that was idling if the timer handler ++ * spins on the queue_lock and a new request arrives for the ++ * current queue and there is a full dispatch cycle that changes ++ * the in-service queue. This can hardly happen, but in the worst ++ * case we just expire a queue too early. ++ */ ++ if (bfqq != NULL) { ++ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); ++ if (bfq_bfqq_budget_timeout(bfqq)) ++ /* ++ * Also here the queue can be safely expired ++ * for budget timeout without wasting ++ * guarantees ++ */ ++ reason = BFQ_BFQQ_BUDGET_TIMEOUT; ++ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) ++ /* ++ * The queue may not be empty upon timer expiration, ++ * because we may not disable the timer when the ++ * first request of the in-service queue arrives ++ * during disk idling. ++ */ ++ reason = BFQ_BFQQ_TOO_IDLE; ++ else ++ goto schedule_dispatch; ++ ++ bfq_bfqq_expire(bfqd, bfqq, 1, reason); ++ } ++ ++schedule_dispatch: ++ bfq_schedule_dispatch(bfqd); ++ ++ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); ++} ++ ++static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) ++{ ++ del_timer_sync(&bfqd->idle_slice_timer); ++ cancel_work_sync(&bfqd->unplug_work); ++} ++ ++static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, ++ struct bfq_queue **bfqq_ptr) ++{ ++ struct bfq_group *root_group = bfqd->root_group; ++ struct bfq_queue *bfqq = *bfqq_ptr; ++ ++ bfq_log(bfqd, "put_async_bfqq: %p", bfqq); ++ if (bfqq != NULL) { ++ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); ++ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", ++ bfqq, atomic_read(&bfqq->ref)); ++ bfq_put_queue(bfqq); ++ *bfqq_ptr = NULL; ++ } ++} ++ ++/* ++ * Release all the bfqg references to its async queues. If we are ++ * deallocating the group these queues may still contain requests, so ++ * we reparent them to the root cgroup (i.e., the only one that will ++ * exist for sure until all the requests on a device are gone). ++ */ ++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) ++{ ++ int i, j; ++ ++ for (i = 0; i < 2; i++) ++ for (j = 0; j < IOPRIO_BE_NR; j++) ++ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); ++ ++ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); ++} ++ ++static void bfq_exit_queue(struct elevator_queue *e) ++{ ++ struct bfq_data *bfqd = e->elevator_data; ++ struct request_queue *q = bfqd->queue; ++ struct bfq_queue *bfqq, *n; ++ ++ bfq_shutdown_timer_wq(bfqd); ++ ++ spin_lock_irq(q->queue_lock); ++ ++ BUG_ON(bfqd->in_service_queue != NULL); ++ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) ++ bfq_deactivate_bfqq(bfqd, bfqq, 0); ++ ++ bfq_disconnect_groups(bfqd); ++ spin_unlock_irq(q->queue_lock); ++ ++ bfq_shutdown_timer_wq(bfqd); ++ ++ synchronize_rcu(); ++ ++ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); ++ ++ bfq_free_root_group(bfqd); ++ kfree(bfqd); ++} ++ ++static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) ++{ ++ struct bfq_group *bfqg; ++ struct bfq_data *bfqd; ++ struct elevator_queue *eq; ++ ++ eq = elevator_alloc(q, e); ++ if (eq == NULL) ++ return -ENOMEM; ++ ++ bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); ++ if (bfqd == NULL) { ++ kobject_put(&eq->kobj); ++ return -ENOMEM; ++ } ++ eq->elevator_data = bfqd; ++ ++ /* ++ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. ++ * Grab a permanent reference to it, so that the normal code flow ++ * will not attempt to free it. ++ */ ++ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); ++ atomic_inc(&bfqd->oom_bfqq.ref); ++ bfqd->oom_bfqq.entity.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO; ++ bfqd->oom_bfqq.entity.new_ioprio_class = IOPRIO_CLASS_BE; ++ /* ++ * Trigger weight initialization, according to ioprio, at the ++ * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio ++ * class won't be changed any more. ++ */ ++ bfqd->oom_bfqq.entity.ioprio_changed = 1; ++ ++ bfqd->queue = q; ++ ++ spin_lock_irq(q->queue_lock); ++ q->elevator = eq; ++ spin_unlock_irq(q->queue_lock); ++ ++ bfqg = bfq_alloc_root_group(bfqd, q->node); ++ if (bfqg == NULL) { ++ kfree(bfqd); ++ kobject_put(&eq->kobj); ++ return -ENOMEM; ++ } ++ ++ bfqd->root_group = bfqg; ++ bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); ++#ifdef CONFIG_CGROUP_BFQIO ++ bfqd->active_numerous_groups = 0; ++#endif ++ ++ init_timer(&bfqd->idle_slice_timer); ++ bfqd->idle_slice_timer.function = bfq_idle_slice_timer; ++ bfqd->idle_slice_timer.data = (unsigned long)bfqd; ++ ++ bfqd->rq_pos_tree = RB_ROOT; ++ bfqd->queue_weights_tree = RB_ROOT; ++ bfqd->group_weights_tree = RB_ROOT; ++ ++ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); ++ ++ INIT_LIST_HEAD(&bfqd->active_list); ++ INIT_LIST_HEAD(&bfqd->idle_list); ++ INIT_HLIST_HEAD(&bfqd->burst_list); ++ ++ bfqd->hw_tag = -1; ++ ++ bfqd->bfq_max_budget = bfq_default_max_budget; ++ ++ bfqd->bfq_quantum = bfq_quantum; ++ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; ++ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; ++ bfqd->bfq_back_max = bfq_back_max; ++ bfqd->bfq_back_penalty = bfq_back_penalty; ++ bfqd->bfq_slice_idle = bfq_slice_idle; ++ bfqd->bfq_class_idle_last_service = 0; ++ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; ++ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; ++ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; ++ ++ bfqd->bfq_coop_thresh = 2; ++ bfqd->bfq_failed_cooperations = 7000; ++ bfqd->bfq_requests_within_timer = 120; ++ ++ bfqd->bfq_large_burst_thresh = 11; ++ bfqd->bfq_burst_interval = msecs_to_jiffies(500); ++ ++ bfqd->low_latency = true; ++ ++ bfqd->bfq_wr_coeff = 20; ++ bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); ++ bfqd->bfq_wr_max_time = 0; ++ bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); ++ bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); ++ bfqd->bfq_wr_max_softrt_rate = 7000; /* ++ * Approximate rate required ++ * to playback or record a ++ * high-definition compressed ++ * video. ++ */ ++ bfqd->wr_busy_queues = 0; ++ bfqd->busy_in_flight_queues = 0; ++ bfqd->const_seeky_busy_in_flight_queues = 0; ++ ++ /* ++ * Begin by assuming, optimistically, that the device peak rate is ++ * equal to the highest reference rate. ++ */ ++ bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * ++ T_fast[blk_queue_nonrot(bfqd->queue)]; ++ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; ++ bfqd->device_speed = BFQ_BFQD_FAST; ++ ++ return 0; ++} ++ ++static void bfq_slab_kill(void) ++{ ++ if (bfq_pool != NULL) ++ kmem_cache_destroy(bfq_pool); ++} ++ ++static int __init bfq_slab_setup(void) ++{ ++ bfq_pool = KMEM_CACHE(bfq_queue, 0); ++ if (bfq_pool == NULL) ++ return -ENOMEM; ++ return 0; ++} ++ ++static ssize_t bfq_var_show(unsigned int var, char *page) ++{ ++ return sprintf(page, "%d\n", var); ++} ++ ++static ssize_t bfq_var_store(unsigned long *var, const char *page, ++ size_t count) ++{ ++ unsigned long new_val; ++ int ret = kstrtoul(page, 10, &new_val); ++ ++ if (ret == 0) ++ *var = new_val; ++ ++ return count; ++} ++ ++static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) ++{ ++ struct bfq_data *bfqd = e->elevator_data; ++ return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? ++ jiffies_to_msecs(bfqd->bfq_wr_max_time) : ++ jiffies_to_msecs(bfq_wr_duration(bfqd))); ++} ++ ++static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) ++{ ++ struct bfq_queue *bfqq; ++ struct bfq_data *bfqd = e->elevator_data; ++ ssize_t num_char = 0; ++ ++ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n", ++ bfqd->queued); ++ ++ spin_lock_irq(bfqd->queue->queue_lock); ++ ++ num_char += sprintf(page + num_char, "Active:\n"); ++ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { ++ num_char += sprintf(page + num_char, ++ "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n", ++ bfqq->pid, ++ bfqq->entity.weight, ++ bfqq->queued[0], ++ bfqq->queued[1], ++ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ } ++ ++ num_char += sprintf(page + num_char, "Idle:\n"); ++ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { ++ num_char += sprintf(page + num_char, ++ "pid%d: weight %hu, dur %d/%u\n", ++ bfqq->pid, ++ bfqq->entity.weight, ++ jiffies_to_msecs(jiffies - ++ bfqq->last_wr_start_finish), ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ } ++ ++ spin_unlock_irq(bfqd->queue->queue_lock); ++ ++ return num_char; ++} ++ ++#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ ++static ssize_t __FUNC(struct elevator_queue *e, char *page) \ ++{ \ ++ struct bfq_data *bfqd = e->elevator_data; \ ++ unsigned int __data = __VAR; \ ++ if (__CONV) \ ++ __data = jiffies_to_msecs(__data); \ ++ return bfq_var_show(__data, (page)); \ ++} ++SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); ++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); ++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); ++SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); ++SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); ++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); ++SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); ++SHOW_FUNCTION(bfq_max_budget_async_rq_show, ++ bfqd->bfq_max_budget_async_rq, 0); ++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); ++SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); ++SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); ++SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); ++SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); ++SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1); ++SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, ++ 1); ++SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); ++#undef SHOW_FUNCTION ++ ++#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ ++static ssize_t \ ++__FUNC(struct elevator_queue *e, const char *page, size_t count) \ ++{ \ ++ struct bfq_data *bfqd = e->elevator_data; \ ++ unsigned long uninitialized_var(__data); \ ++ int ret = bfq_var_store(&__data, (page), count); \ ++ if (__data < (MIN)) \ ++ __data = (MIN); \ ++ else if (__data > (MAX)) \ ++ __data = (MAX); \ ++ if (__CONV) \ ++ *(__PTR) = msecs_to_jiffies(__data); \ ++ else \ ++ *(__PTR) = __data; \ ++ return ret; \ ++} ++STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); ++STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, ++ INT_MAX, 1); ++STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, ++ INT_MAX, 1); ++STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); ++STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, ++ INT_MAX, 0); ++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); ++STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, ++ 1, INT_MAX, 0); ++STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, ++ INT_MAX, 1); ++STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); ++STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); ++STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, ++ 1); ++STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0, ++ INT_MAX, 1); ++STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, ++ &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); ++STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, ++ INT_MAX, 0); ++#undef STORE_FUNCTION ++ ++/* do nothing for the moment */ ++static ssize_t bfq_weights_store(struct elevator_queue *e, ++ const char *page, size_t count) ++{ ++ return count; ++} ++ ++static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) ++{ ++ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); ++ ++ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) ++ return bfq_calc_max_budget(bfqd->peak_rate, timeout); ++ else ++ return bfq_default_max_budget; ++} ++ ++static ssize_t bfq_max_budget_store(struct elevator_queue *e, ++ const char *page, size_t count) ++{ ++ struct bfq_data *bfqd = e->elevator_data; ++ unsigned long uninitialized_var(__data); ++ int ret = bfq_var_store(&__data, (page), count); ++ ++ if (__data == 0) ++ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); ++ else { ++ if (__data > INT_MAX) ++ __data = INT_MAX; ++ bfqd->bfq_max_budget = __data; ++ } ++ ++ bfqd->bfq_user_max_budget = __data; ++ ++ return ret; ++} ++ ++static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, ++ const char *page, size_t count) ++{ ++ struct bfq_data *bfqd = e->elevator_data; ++ unsigned long uninitialized_var(__data); ++ int ret = bfq_var_store(&__data, (page), count); ++ ++ if (__data < 1) ++ __data = 1; ++ else if (__data > INT_MAX) ++ __data = INT_MAX; ++ ++ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); ++ if (bfqd->bfq_user_max_budget == 0) ++ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); ++ ++ return ret; ++} ++ ++static ssize_t bfq_low_latency_store(struct elevator_queue *e, ++ const char *page, size_t count) ++{ ++ struct bfq_data *bfqd = e->elevator_data; ++ unsigned long uninitialized_var(__data); ++ int ret = bfq_var_store(&__data, (page), count); ++ ++ if (__data > 1) ++ __data = 1; ++ if (__data == 0 && bfqd->low_latency != 0) ++ bfq_end_wr(bfqd); ++ bfqd->low_latency = __data; ++ ++ return ret; ++} ++ ++#define BFQ_ATTR(name) \ ++ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) ++ ++static struct elv_fs_entry bfq_attrs[] = { ++ BFQ_ATTR(quantum), ++ BFQ_ATTR(fifo_expire_sync), ++ BFQ_ATTR(fifo_expire_async), ++ BFQ_ATTR(back_seek_max), ++ BFQ_ATTR(back_seek_penalty), ++ BFQ_ATTR(slice_idle), ++ BFQ_ATTR(max_budget), ++ BFQ_ATTR(max_budget_async_rq), ++ BFQ_ATTR(timeout_sync), ++ BFQ_ATTR(timeout_async), ++ BFQ_ATTR(low_latency), ++ BFQ_ATTR(wr_coeff), ++ BFQ_ATTR(wr_max_time), ++ BFQ_ATTR(wr_rt_max_time), ++ BFQ_ATTR(wr_min_idle_time), ++ BFQ_ATTR(wr_min_inter_arr_async), ++ BFQ_ATTR(wr_max_softrt_rate), ++ BFQ_ATTR(weights), ++ __ATTR_NULL ++}; ++ ++static struct elevator_type iosched_bfq = { ++ .ops = { ++ .elevator_merge_fn = bfq_merge, ++ .elevator_merged_fn = bfq_merged_request, ++ .elevator_merge_req_fn = bfq_merged_requests, ++ .elevator_allow_merge_fn = bfq_allow_merge, ++ .elevator_dispatch_fn = bfq_dispatch_requests, ++ .elevator_add_req_fn = bfq_insert_request, ++ .elevator_activate_req_fn = bfq_activate_request, ++ .elevator_deactivate_req_fn = bfq_deactivate_request, ++ .elevator_completed_req_fn = bfq_completed_request, ++ .elevator_former_req_fn = elv_rb_former_request, ++ .elevator_latter_req_fn = elv_rb_latter_request, ++ .elevator_init_icq_fn = bfq_init_icq, ++ .elevator_exit_icq_fn = bfq_exit_icq, ++ .elevator_set_req_fn = bfq_set_request, ++ .elevator_put_req_fn = bfq_put_request, ++ .elevator_may_queue_fn = bfq_may_queue, ++ .elevator_init_fn = bfq_init_queue, ++ .elevator_exit_fn = bfq_exit_queue, ++ }, ++ .icq_size = sizeof(struct bfq_io_cq), ++ .icq_align = __alignof__(struct bfq_io_cq), ++ .elevator_attrs = bfq_attrs, ++ .elevator_name = "bfq", ++ .elevator_owner = THIS_MODULE, ++}; ++ ++static int __init bfq_init(void) ++{ ++ /* ++ * Can be 0 on HZ < 1000 setups. ++ */ ++ if (bfq_slice_idle == 0) ++ bfq_slice_idle = 1; ++ ++ if (bfq_timeout_async == 0) ++ bfq_timeout_async = 1; ++ ++ if (bfq_slab_setup()) ++ return -ENOMEM; ++ ++ /* ++ * Times to load large popular applications for the typical systems ++ * installed on the reference devices (see the comments before the ++ * definitions of the two arrays). ++ */ ++ T_slow[0] = msecs_to_jiffies(2600); ++ T_slow[1] = msecs_to_jiffies(1000); ++ T_fast[0] = msecs_to_jiffies(5500); ++ T_fast[1] = msecs_to_jiffies(2000); ++ ++ /* ++ * Thresholds that determine the switch between speed classes (see ++ * the comments before the definition of the array). ++ */ ++ device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; ++ device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; ++ ++ elv_register(&iosched_bfq); ++ pr_info("BFQ I/O-scheduler version: v7r7"); ++ ++ return 0; ++} ++ ++static void __exit bfq_exit(void) ++{ ++ elv_unregister(&iosched_bfq); ++ bfq_slab_kill(); ++} ++ ++module_init(bfq_init); ++module_exit(bfq_exit); ++ ++MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); ++MODULE_LICENSE("GPL"); +diff -Nur linux-4.1.3/block/bfq-sched.c linux-xbian-imx6/block/bfq-sched.c +--- linux-4.1.3/block/bfq-sched.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/block/bfq-sched.c 2015-07-27 23:13:03.604123194 +0200 +@@ -0,0 +1,1186 @@ ++/* ++ * BFQ: Hierarchical B-WF2Q+ scheduler. ++ * ++ * Based on ideas and code from CFQ: ++ * Copyright (C) 2003 Jens Axboe ++ * ++ * Copyright (C) 2008 Fabio Checconi ++ * Paolo Valente ++ * ++ * Copyright (C) 2010 Paolo Valente ++ */ ++ ++#ifdef CONFIG_CGROUP_BFQIO ++#define for_each_entity(entity) \ ++ for (; entity != NULL; entity = entity->parent) ++ ++#define for_each_entity_safe(entity, parent) \ ++ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) ++ ++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, ++ int extract, ++ struct bfq_data *bfqd); ++ ++static inline void bfq_update_budget(struct bfq_entity *next_in_service) ++{ ++ struct bfq_entity *bfqg_entity; ++ struct bfq_group *bfqg; ++ struct bfq_sched_data *group_sd; ++ ++ BUG_ON(next_in_service == NULL); ++ ++ group_sd = next_in_service->sched_data; ++ ++ bfqg = container_of(group_sd, struct bfq_group, sched_data); ++ /* ++ * bfq_group's my_entity field is not NULL only if the group ++ * is not the root group. We must not touch the root entity ++ * as it must never become an in-service entity. ++ */ ++ bfqg_entity = bfqg->my_entity; ++ if (bfqg_entity != NULL) ++ bfqg_entity->budget = next_in_service->budget; ++} ++ ++static int bfq_update_next_in_service(struct bfq_sched_data *sd) ++{ ++ struct bfq_entity *next_in_service; ++ ++ if (sd->in_service_entity != NULL) ++ /* will update/requeue at the end of service */ ++ return 0; ++ ++ /* ++ * NOTE: this can be improved in many ways, such as returning ++ * 1 (and thus propagating upwards the update) only when the ++ * budget changes, or caching the bfqq that will be scheduled ++ * next from this subtree. By now we worry more about ++ * correctness than about performance... ++ */ ++ next_in_service = bfq_lookup_next_entity(sd, 0, NULL); ++ sd->next_in_service = next_in_service; ++ ++ if (next_in_service != NULL) ++ bfq_update_budget(next_in_service); ++ ++ return 1; ++} ++ ++static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, ++ struct bfq_entity *entity) ++{ ++ BUG_ON(sd->next_in_service != entity); ++} ++#else ++#define for_each_entity(entity) \ ++ for (; entity != NULL; entity = NULL) ++ ++#define for_each_entity_safe(entity, parent) \ ++ for (parent = NULL; entity != NULL; entity = parent) ++ ++static inline int bfq_update_next_in_service(struct bfq_sched_data *sd) ++{ ++ return 0; ++} ++ ++static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, ++ struct bfq_entity *entity) ++{ ++} ++ ++static inline void bfq_update_budget(struct bfq_entity *next_in_service) ++{ ++} ++#endif ++ ++/* ++ * Shift for timestamp calculations. This actually limits the maximum ++ * service allowed in one timestamp delta (small shift values increase it), ++ * the maximum total weight that can be used for the queues in the system ++ * (big shift values increase it), and the period of virtual time ++ * wraparounds. ++ */ ++#define WFQ_SERVICE_SHIFT 22 ++ ++/** ++ * bfq_gt - compare two timestamps. ++ * @a: first ts. ++ * @b: second ts. ++ * ++ * Return @a > @b, dealing with wrapping correctly. ++ */ ++static inline int bfq_gt(u64 a, u64 b) ++{ ++ return (s64)(a - b) > 0; ++} ++ ++static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = NULL; ++ ++ BUG_ON(entity == NULL); ++ ++ if (entity->my_sched_data == NULL) ++ bfqq = container_of(entity, struct bfq_queue, entity); ++ ++ return bfqq; ++} ++ ++ ++/** ++ * bfq_delta - map service into the virtual time domain. ++ * @service: amount of service. ++ * @weight: scale factor (weight of an entity or weight sum). ++ */ ++static inline u64 bfq_delta(unsigned long service, ++ unsigned long weight) ++{ ++ u64 d = (u64)service << WFQ_SERVICE_SHIFT; ++ ++ do_div(d, weight); ++ return d; ++} ++ ++/** ++ * bfq_calc_finish - assign the finish time to an entity. ++ * @entity: the entity to act upon. ++ * @service: the service to be charged to the entity. ++ */ ++static inline void bfq_calc_finish(struct bfq_entity *entity, ++ unsigned long service) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ ++ BUG_ON(entity->weight == 0); ++ ++ entity->finish = entity->start + ++ bfq_delta(service, entity->weight); ++ ++ if (bfqq != NULL) { ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "calc_finish: serv %lu, w %d", ++ service, entity->weight); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "calc_finish: start %llu, finish %llu, delta %llu", ++ entity->start, entity->finish, ++ bfq_delta(service, entity->weight)); ++ } ++} ++ ++/** ++ * bfq_entity_of - get an entity from a node. ++ * @node: the node field of the entity. ++ * ++ * Convert a node pointer to the relative entity. This is used only ++ * to simplify the logic of some functions and not as the generic ++ * conversion mechanism because, e.g., in the tree walking functions, ++ * the check for a %NULL value would be redundant. ++ */ ++static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) ++{ ++ struct bfq_entity *entity = NULL; ++ ++ if (node != NULL) ++ entity = rb_entry(node, struct bfq_entity, rb_node); ++ ++ return entity; ++} ++ ++/** ++ * bfq_extract - remove an entity from a tree. ++ * @root: the tree root. ++ * @entity: the entity to remove. ++ */ ++static inline void bfq_extract(struct rb_root *root, ++ struct bfq_entity *entity) ++{ ++ BUG_ON(entity->tree != root); ++ ++ entity->tree = NULL; ++ rb_erase(&entity->rb_node, root); ++} ++ ++/** ++ * bfq_idle_extract - extract an entity from the idle tree. ++ * @st: the service tree of the owning @entity. ++ * @entity: the entity being removed. ++ */ ++static void bfq_idle_extract(struct bfq_service_tree *st, ++ struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ struct rb_node *next; ++ ++ BUG_ON(entity->tree != &st->idle); ++ ++ if (entity == st->first_idle) { ++ next = rb_next(&entity->rb_node); ++ st->first_idle = bfq_entity_of(next); ++ } ++ ++ if (entity == st->last_idle) { ++ next = rb_prev(&entity->rb_node); ++ st->last_idle = bfq_entity_of(next); ++ } ++ ++ bfq_extract(&st->idle, entity); ++ ++ if (bfqq != NULL) ++ list_del(&bfqq->bfqq_list); ++} ++ ++/** ++ * bfq_insert - generic tree insertion. ++ * @root: tree root. ++ * @entity: entity to insert. ++ * ++ * This is used for the idle and the active tree, since they are both ++ * ordered by finish time. ++ */ ++static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) ++{ ++ struct bfq_entity *entry; ++ struct rb_node **node = &root->rb_node; ++ struct rb_node *parent = NULL; ++ ++ BUG_ON(entity->tree != NULL); ++ ++ while (*node != NULL) { ++ parent = *node; ++ entry = rb_entry(parent, struct bfq_entity, rb_node); ++ ++ if (bfq_gt(entry->finish, entity->finish)) ++ node = &parent->rb_left; ++ else ++ node = &parent->rb_right; ++ } ++ ++ rb_link_node(&entity->rb_node, parent, node); ++ rb_insert_color(&entity->rb_node, root); ++ ++ entity->tree = root; ++} ++ ++/** ++ * bfq_update_min - update the min_start field of a entity. ++ * @entity: the entity to update. ++ * @node: one of its children. ++ * ++ * This function is called when @entity may store an invalid value for ++ * min_start due to updates to the active tree. The function assumes ++ * that the subtree rooted at @node (which may be its left or its right ++ * child) has a valid min_start value. ++ */ ++static inline void bfq_update_min(struct bfq_entity *entity, ++ struct rb_node *node) ++{ ++ struct bfq_entity *child; ++ ++ if (node != NULL) { ++ child = rb_entry(node, struct bfq_entity, rb_node); ++ if (bfq_gt(entity->min_start, child->min_start)) ++ entity->min_start = child->min_start; ++ } ++} ++ ++/** ++ * bfq_update_active_node - recalculate min_start. ++ * @node: the node to update. ++ * ++ * @node may have changed position or one of its children may have moved, ++ * this function updates its min_start value. The left and right subtrees ++ * are assumed to hold a correct min_start value. ++ */ ++static inline void bfq_update_active_node(struct rb_node *node) ++{ ++ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); ++ ++ entity->min_start = entity->start; ++ bfq_update_min(entity, node->rb_right); ++ bfq_update_min(entity, node->rb_left); ++} ++ ++/** ++ * bfq_update_active_tree - update min_start for the whole active tree. ++ * @node: the starting node. ++ * ++ * @node must be the deepest modified node after an update. This function ++ * updates its min_start using the values held by its children, assuming ++ * that they did not change, and then updates all the nodes that may have ++ * changed in the path to the root. The only nodes that may have changed ++ * are the ones in the path or their siblings. ++ */ ++static void bfq_update_active_tree(struct rb_node *node) ++{ ++ struct rb_node *parent; ++ ++up: ++ bfq_update_active_node(node); ++ ++ parent = rb_parent(node); ++ if (parent == NULL) ++ return; ++ ++ if (node == parent->rb_left && parent->rb_right != NULL) ++ bfq_update_active_node(parent->rb_right); ++ else if (parent->rb_left != NULL) ++ bfq_update_active_node(parent->rb_left); ++ ++ node = parent; ++ goto up; ++} ++ ++static void bfq_weights_tree_add(struct bfq_data *bfqd, ++ struct bfq_entity *entity, ++ struct rb_root *root); ++ ++static void bfq_weights_tree_remove(struct bfq_data *bfqd, ++ struct bfq_entity *entity, ++ struct rb_root *root); ++ ++ ++/** ++ * bfq_active_insert - insert an entity in the active tree of its ++ * group/device. ++ * @st: the service tree of the entity. ++ * @entity: the entity being inserted. ++ * ++ * The active tree is ordered by finish time, but an extra key is kept ++ * per each node, containing the minimum value for the start times of ++ * its children (and the node itself), so it's possible to search for ++ * the eligible node with the lowest finish time in logarithmic time. ++ */ ++static void bfq_active_insert(struct bfq_service_tree *st, ++ struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ struct rb_node *node = &entity->rb_node; ++#ifdef CONFIG_CGROUP_BFQIO ++ struct bfq_sched_data *sd = NULL; ++ struct bfq_group *bfqg = NULL; ++ struct bfq_data *bfqd = NULL; ++#endif ++ ++ bfq_insert(&st->active, entity); ++ ++ if (node->rb_left != NULL) ++ node = node->rb_left; ++ else if (node->rb_right != NULL) ++ node = node->rb_right; ++ ++ bfq_update_active_tree(node); ++ ++#ifdef CONFIG_CGROUP_BFQIO ++ sd = entity->sched_data; ++ bfqg = container_of(sd, struct bfq_group, sched_data); ++ BUG_ON(!bfqg); ++ bfqd = (struct bfq_data *)bfqg->bfqd; ++#endif ++ if (bfqq != NULL) ++ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); ++#ifdef CONFIG_CGROUP_BFQIO ++ else { /* bfq_group */ ++ BUG_ON(!bfqd); ++ bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree); ++ } ++ if (bfqg != bfqd->root_group) { ++ BUG_ON(!bfqg); ++ BUG_ON(!bfqd); ++ bfqg->active_entities++; ++ if (bfqg->active_entities == 2) ++ bfqd->active_numerous_groups++; ++ } ++#endif ++} ++ ++/** ++ * bfq_ioprio_to_weight - calc a weight from an ioprio. ++ * @ioprio: the ioprio value to convert. ++ */ ++static inline unsigned short bfq_ioprio_to_weight(int ioprio) ++{ ++ BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); ++ return IOPRIO_BE_NR - ioprio; ++} ++ ++/** ++ * bfq_weight_to_ioprio - calc an ioprio from a weight. ++ * @weight: the weight value to convert. ++ * ++ * To preserve as mush as possible the old only-ioprio user interface, ++ * 0 is used as an escape ioprio value for weights (numerically) equal or ++ * larger than IOPRIO_BE_NR ++ */ ++static inline unsigned short bfq_weight_to_ioprio(int weight) ++{ ++ BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); ++ return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; ++} ++ ++static inline void bfq_get_entity(struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ ++ if (bfqq != NULL) { ++ atomic_inc(&bfqq->ref); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", ++ bfqq, atomic_read(&bfqq->ref)); ++ } ++} ++ ++/** ++ * bfq_find_deepest - find the deepest node that an extraction can modify. ++ * @node: the node being removed. ++ * ++ * Do the first step of an extraction in an rb tree, looking for the ++ * node that will replace @node, and returning the deepest node that ++ * the following modifications to the tree can touch. If @node is the ++ * last node in the tree return %NULL. ++ */ ++static struct rb_node *bfq_find_deepest(struct rb_node *node) ++{ ++ struct rb_node *deepest; ++ ++ if (node->rb_right == NULL && node->rb_left == NULL) ++ deepest = rb_parent(node); ++ else if (node->rb_right == NULL) ++ deepest = node->rb_left; ++ else if (node->rb_left == NULL) ++ deepest = node->rb_right; ++ else { ++ deepest = rb_next(node); ++ if (deepest->rb_right != NULL) ++ deepest = deepest->rb_right; ++ else if (rb_parent(deepest) != node) ++ deepest = rb_parent(deepest); ++ } ++ ++ return deepest; ++} ++ ++/** ++ * bfq_active_extract - remove an entity from the active tree. ++ * @st: the service_tree containing the tree. ++ * @entity: the entity being removed. ++ */ ++static void bfq_active_extract(struct bfq_service_tree *st, ++ struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ struct rb_node *node; ++#ifdef CONFIG_CGROUP_BFQIO ++ struct bfq_sched_data *sd = NULL; ++ struct bfq_group *bfqg = NULL; ++ struct bfq_data *bfqd = NULL; ++#endif ++ ++ node = bfq_find_deepest(&entity->rb_node); ++ bfq_extract(&st->active, entity); ++ ++ if (node != NULL) ++ bfq_update_active_tree(node); ++ ++#ifdef CONFIG_CGROUP_BFQIO ++ sd = entity->sched_data; ++ bfqg = container_of(sd, struct bfq_group, sched_data); ++ BUG_ON(!bfqg); ++ bfqd = (struct bfq_data *)bfqg->bfqd; ++#endif ++ if (bfqq != NULL) ++ list_del(&bfqq->bfqq_list); ++#ifdef CONFIG_CGROUP_BFQIO ++ else { /* bfq_group */ ++ BUG_ON(!bfqd); ++ bfq_weights_tree_remove(bfqd, entity, ++ &bfqd->group_weights_tree); ++ } ++ if (bfqg != bfqd->root_group) { ++ BUG_ON(!bfqg); ++ BUG_ON(!bfqd); ++ BUG_ON(!bfqg->active_entities); ++ bfqg->active_entities--; ++ if (bfqg->active_entities == 1) { ++ BUG_ON(!bfqd->active_numerous_groups); ++ bfqd->active_numerous_groups--; ++ } ++ } ++#endif ++} ++ ++/** ++ * bfq_idle_insert - insert an entity into the idle tree. ++ * @st: the service tree containing the tree. ++ * @entity: the entity to insert. ++ */ ++static void bfq_idle_insert(struct bfq_service_tree *st, ++ struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ struct bfq_entity *first_idle = st->first_idle; ++ struct bfq_entity *last_idle = st->last_idle; ++ ++ if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) ++ st->first_idle = entity; ++ if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) ++ st->last_idle = entity; ++ ++ bfq_insert(&st->idle, entity); ++ ++ if (bfqq != NULL) ++ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); ++} ++ ++/** ++ * bfq_forget_entity - remove an entity from the wfq trees. ++ * @st: the service tree. ++ * @entity: the entity being removed. ++ * ++ * Update the device status and forget everything about @entity, putting ++ * the device reference to it, if it is a queue. Entities belonging to ++ * groups are not refcounted. ++ */ ++static void bfq_forget_entity(struct bfq_service_tree *st, ++ struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ struct bfq_sched_data *sd; ++ ++ BUG_ON(!entity->on_st); ++ ++ entity->on_st = 0; ++ st->wsum -= entity->weight; ++ if (bfqq != NULL) { ++ sd = entity->sched_data; ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", ++ bfqq, atomic_read(&bfqq->ref)); ++ bfq_put_queue(bfqq); ++ } ++} ++ ++/** ++ * bfq_put_idle_entity - release the idle tree ref of an entity. ++ * @st: service tree for the entity. ++ * @entity: the entity being released. ++ */ ++static void bfq_put_idle_entity(struct bfq_service_tree *st, ++ struct bfq_entity *entity) ++{ ++ bfq_idle_extract(st, entity); ++ bfq_forget_entity(st, entity); ++} ++ ++/** ++ * bfq_forget_idle - update the idle tree if necessary. ++ * @st: the service tree to act upon. ++ * ++ * To preserve the global O(log N) complexity we only remove one entry here; ++ * as the idle tree will not grow indefinitely this can be done safely. ++ */ ++static void bfq_forget_idle(struct bfq_service_tree *st) ++{ ++ struct bfq_entity *first_idle = st->first_idle; ++ struct bfq_entity *last_idle = st->last_idle; ++ ++ if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && ++ !bfq_gt(last_idle->finish, st->vtime)) { ++ /* ++ * Forget the whole idle tree, increasing the vtime past ++ * the last finish time of idle entities. ++ */ ++ st->vtime = last_idle->finish; ++ } ++ ++ if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) ++ bfq_put_idle_entity(st, first_idle); ++} ++ ++static struct bfq_service_tree * ++__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, ++ struct bfq_entity *entity) ++{ ++ struct bfq_service_tree *new_st = old_st; ++ ++ if (entity->ioprio_changed) { ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ unsigned short prev_weight, new_weight; ++ struct bfq_data *bfqd = NULL; ++ struct rb_root *root; ++#ifdef CONFIG_CGROUP_BFQIO ++ struct bfq_sched_data *sd; ++ struct bfq_group *bfqg; ++#endif ++ ++ if (bfqq != NULL) ++ bfqd = bfqq->bfqd; ++#ifdef CONFIG_CGROUP_BFQIO ++ else { ++ sd = entity->my_sched_data; ++ bfqg = container_of(sd, struct bfq_group, sched_data); ++ BUG_ON(!bfqg); ++ bfqd = (struct bfq_data *)bfqg->bfqd; ++ BUG_ON(!bfqd); ++ } ++#endif ++ ++ BUG_ON(old_st->wsum < entity->weight); ++ old_st->wsum -= entity->weight; ++ ++ if (entity->new_weight != entity->orig_weight) { ++ if (entity->new_weight < BFQ_MIN_WEIGHT || ++ entity->new_weight > BFQ_MAX_WEIGHT) { ++ printk(KERN_CRIT "update_weight_prio: " ++ "new_weight %d\n", ++ entity->new_weight); ++ BUG(); ++ } ++ entity->orig_weight = entity->new_weight; ++ entity->ioprio = ++ bfq_weight_to_ioprio(entity->orig_weight); ++ } else if (entity->new_ioprio != entity->ioprio) { ++ entity->ioprio = entity->new_ioprio; ++ entity->orig_weight = ++ bfq_ioprio_to_weight(entity->ioprio); ++ } else ++ entity->new_weight = entity->orig_weight = ++ bfq_ioprio_to_weight(entity->ioprio); ++ ++ entity->ioprio_class = entity->new_ioprio_class; ++ entity->ioprio_changed = 0; ++ ++ /* ++ * NOTE: here we may be changing the weight too early, ++ * this will cause unfairness. The correct approach ++ * would have required additional complexity to defer ++ * weight changes to the proper time instants (i.e., ++ * when entity->finish <= old_st->vtime). ++ */ ++ new_st = bfq_entity_service_tree(entity); ++ ++ prev_weight = entity->weight; ++ new_weight = entity->orig_weight * ++ (bfqq != NULL ? bfqq->wr_coeff : 1); ++ /* ++ * If the weight of the entity changes, remove the entity ++ * from its old weight counter (if there is a counter ++ * associated with the entity), and add it to the counter ++ * associated with its new weight. ++ */ ++ if (prev_weight != new_weight) { ++ root = bfqq ? &bfqd->queue_weights_tree : ++ &bfqd->group_weights_tree; ++ bfq_weights_tree_remove(bfqd, entity, root); ++ } ++ entity->weight = new_weight; ++ /* ++ * Add the entity to its weights tree only if it is ++ * not associated with a weight-raised queue. ++ */ ++ if (prev_weight != new_weight && ++ (bfqq ? bfqq->wr_coeff == 1 : 1)) ++ /* If we get here, root has been initialized. */ ++ bfq_weights_tree_add(bfqd, entity, root); ++ ++ new_st->wsum += entity->weight; ++ ++ if (new_st != old_st) ++ entity->start = new_st->vtime; ++ } ++ ++ return new_st; ++} ++ ++/** ++ * bfq_bfqq_served - update the scheduler status after selection for ++ * service. ++ * @bfqq: the queue being served. ++ * @served: bytes to transfer. ++ * ++ * NOTE: this can be optimized, as the timestamps of upper level entities ++ * are synchronized every time a new bfqq is selected for service. By now, ++ * we keep it to better check consistency. ++ */ ++static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ struct bfq_service_tree *st; ++ ++ for_each_entity(entity) { ++ st = bfq_entity_service_tree(entity); ++ ++ entity->service += served; ++ BUG_ON(entity->service > entity->budget); ++ BUG_ON(st->wsum == 0); ++ ++ st->vtime += bfq_delta(served, st->wsum); ++ bfq_forget_idle(st); ++ } ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); ++} ++ ++/** ++ * bfq_bfqq_charge_full_budget - set the service to the entity budget. ++ * @bfqq: the queue that needs a service update. ++ * ++ * When it's not possible to be fair in the service domain, because ++ * a queue is not consuming its budget fast enough (the meaning of ++ * fast depends on the timeout parameter), we charge it a full ++ * budget. In this way we should obtain a sort of time-domain ++ * fairness among all the seeky/slow queues. ++ */ ++static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); ++ ++ bfq_bfqq_served(bfqq, entity->budget - entity->service); ++} ++ ++/** ++ * __bfq_activate_entity - activate an entity. ++ * @entity: the entity being activated. ++ * ++ * Called whenever an entity is activated, i.e., it is not active and one ++ * of its children receives a new request, or has to be reactivated due to ++ * budget exhaustion. It uses the current budget of the entity (and the ++ * service received if @entity is active) of the queue to calculate its ++ * timestamps. ++ */ ++static void __bfq_activate_entity(struct bfq_entity *entity) ++{ ++ struct bfq_sched_data *sd = entity->sched_data; ++ struct bfq_service_tree *st = bfq_entity_service_tree(entity); ++ ++ if (entity == sd->in_service_entity) { ++ BUG_ON(entity->tree != NULL); ++ /* ++ * If we are requeueing the current entity we have ++ * to take care of not charging to it service it has ++ * not received. ++ */ ++ bfq_calc_finish(entity, entity->service); ++ entity->start = entity->finish; ++ sd->in_service_entity = NULL; ++ } else if (entity->tree == &st->active) { ++ /* ++ * Requeueing an entity due to a change of some ++ * next_in_service entity below it. We reuse the ++ * old start time. ++ */ ++ bfq_active_extract(st, entity); ++ } else if (entity->tree == &st->idle) { ++ /* ++ * Must be on the idle tree, bfq_idle_extract() will ++ * check for that. ++ */ ++ bfq_idle_extract(st, entity); ++ entity->start = bfq_gt(st->vtime, entity->finish) ? ++ st->vtime : entity->finish; ++ } else { ++ /* ++ * The finish time of the entity may be invalid, and ++ * it is in the past for sure, otherwise the queue ++ * would have been on the idle tree. ++ */ ++ entity->start = st->vtime; ++ st->wsum += entity->weight; ++ bfq_get_entity(entity); ++ ++ BUG_ON(entity->on_st); ++ entity->on_st = 1; ++ } ++ ++ st = __bfq_entity_update_weight_prio(st, entity); ++ bfq_calc_finish(entity, entity->budget); ++ bfq_active_insert(st, entity); ++} ++ ++/** ++ * bfq_activate_entity - activate an entity and its ancestors if necessary. ++ * @entity: the entity to activate. ++ * ++ * Activate @entity and all the entities on the path from it to the root. ++ */ ++static void bfq_activate_entity(struct bfq_entity *entity) ++{ ++ struct bfq_sched_data *sd; ++ ++ for_each_entity(entity) { ++ __bfq_activate_entity(entity); ++ ++ sd = entity->sched_data; ++ if (!bfq_update_next_in_service(sd)) ++ /* ++ * No need to propagate the activation to the ++ * upper entities, as they will be updated when ++ * the in-service entity is rescheduled. ++ */ ++ break; ++ } ++} ++ ++/** ++ * __bfq_deactivate_entity - deactivate an entity from its service tree. ++ * @entity: the entity to deactivate. ++ * @requeue: if false, the entity will not be put into the idle tree. ++ * ++ * Deactivate an entity, independently from its previous state. If the ++ * entity was not on a service tree just return, otherwise if it is on ++ * any scheduler tree, extract it from that tree, and if necessary ++ * and if the caller did not specify @requeue, put it on the idle tree. ++ * ++ * Return %1 if the caller should update the entity hierarchy, i.e., ++ * if the entity was in service or if it was the next_in_service for ++ * its sched_data; return %0 otherwise. ++ */ ++static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) ++{ ++ struct bfq_sched_data *sd = entity->sched_data; ++ struct bfq_service_tree *st = bfq_entity_service_tree(entity); ++ int was_in_service = entity == sd->in_service_entity; ++ int ret = 0; ++ ++ if (!entity->on_st) ++ return 0; ++ ++ BUG_ON(was_in_service && entity->tree != NULL); ++ ++ if (was_in_service) { ++ bfq_calc_finish(entity, entity->service); ++ sd->in_service_entity = NULL; ++ } else if (entity->tree == &st->active) ++ bfq_active_extract(st, entity); ++ else if (entity->tree == &st->idle) ++ bfq_idle_extract(st, entity); ++ else if (entity->tree != NULL) ++ BUG(); ++ ++ if (was_in_service || sd->next_in_service == entity) ++ ret = bfq_update_next_in_service(sd); ++ ++ if (!requeue || !bfq_gt(entity->finish, st->vtime)) ++ bfq_forget_entity(st, entity); ++ else ++ bfq_idle_insert(st, entity); ++ ++ BUG_ON(sd->in_service_entity == entity); ++ BUG_ON(sd->next_in_service == entity); ++ ++ return ret; ++} ++ ++/** ++ * bfq_deactivate_entity - deactivate an entity. ++ * @entity: the entity to deactivate. ++ * @requeue: true if the entity can be put on the idle tree ++ */ ++static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) ++{ ++ struct bfq_sched_data *sd; ++ struct bfq_entity *parent; ++ ++ for_each_entity_safe(entity, parent) { ++ sd = entity->sched_data; ++ ++ if (!__bfq_deactivate_entity(entity, requeue)) ++ /* ++ * The parent entity is still backlogged, and ++ * we don't need to update it as it is still ++ * in service. ++ */ ++ break; ++ ++ if (sd->next_in_service != NULL) ++ /* ++ * The parent entity is still backlogged and ++ * the budgets on the path towards the root ++ * need to be updated. ++ */ ++ goto update; ++ ++ /* ++ * If we reach there the parent is no more backlogged and ++ * we want to propagate the dequeue upwards. ++ */ ++ requeue = 1; ++ } ++ ++ return; ++ ++update: ++ entity = parent; ++ for_each_entity(entity) { ++ __bfq_activate_entity(entity); ++ ++ sd = entity->sched_data; ++ if (!bfq_update_next_in_service(sd)) ++ break; ++ } ++} ++ ++/** ++ * bfq_update_vtime - update vtime if necessary. ++ * @st: the service tree to act upon. ++ * ++ * If necessary update the service tree vtime to have at least one ++ * eligible entity, skipping to its start time. Assumes that the ++ * active tree of the device is not empty. ++ * ++ * NOTE: this hierarchical implementation updates vtimes quite often, ++ * we may end up with reactivated processes getting timestamps after a ++ * vtime skip done because we needed a ->first_active entity on some ++ * intermediate node. ++ */ ++static void bfq_update_vtime(struct bfq_service_tree *st) ++{ ++ struct bfq_entity *entry; ++ struct rb_node *node = st->active.rb_node; ++ ++ entry = rb_entry(node, struct bfq_entity, rb_node); ++ if (bfq_gt(entry->min_start, st->vtime)) { ++ st->vtime = entry->min_start; ++ bfq_forget_idle(st); ++ } ++} ++ ++/** ++ * bfq_first_active_entity - find the eligible entity with ++ * the smallest finish time ++ * @st: the service tree to select from. ++ * ++ * This function searches the first schedulable entity, starting from the ++ * root of the tree and going on the left every time on this side there is ++ * a subtree with at least one eligible (start >= vtime) entity. The path on ++ * the right is followed only if a) the left subtree contains no eligible ++ * entities and b) no eligible entity has been found yet. ++ */ ++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) ++{ ++ struct bfq_entity *entry, *first = NULL; ++ struct rb_node *node = st->active.rb_node; ++ ++ while (node != NULL) { ++ entry = rb_entry(node, struct bfq_entity, rb_node); ++left: ++ if (!bfq_gt(entry->start, st->vtime)) ++ first = entry; ++ ++ BUG_ON(bfq_gt(entry->min_start, st->vtime)); ++ ++ if (node->rb_left != NULL) { ++ entry = rb_entry(node->rb_left, ++ struct bfq_entity, rb_node); ++ if (!bfq_gt(entry->min_start, st->vtime)) { ++ node = node->rb_left; ++ goto left; ++ } ++ } ++ if (first != NULL) ++ break; ++ node = node->rb_right; ++ } ++ ++ BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); ++ return first; ++} ++ ++/** ++ * __bfq_lookup_next_entity - return the first eligible entity in @st. ++ * @st: the service tree. ++ * ++ * Update the virtual time in @st and return the first eligible entity ++ * it contains. ++ */ ++static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, ++ bool force) ++{ ++ struct bfq_entity *entity, *new_next_in_service = NULL; ++ ++ if (RB_EMPTY_ROOT(&st->active)) ++ return NULL; ++ ++ bfq_update_vtime(st); ++ entity = bfq_first_active_entity(st); ++ BUG_ON(bfq_gt(entity->start, st->vtime)); ++ ++ /* ++ * If the chosen entity does not match with the sched_data's ++ * next_in_service and we are forcedly serving the IDLE priority ++ * class tree, bubble up budget update. ++ */ ++ if (unlikely(force && entity != entity->sched_data->next_in_service)) { ++ new_next_in_service = entity; ++ for_each_entity(new_next_in_service) ++ bfq_update_budget(new_next_in_service); ++ } ++ ++ return entity; ++} ++ ++/** ++ * bfq_lookup_next_entity - return the first eligible entity in @sd. ++ * @sd: the sched_data. ++ * @extract: if true the returned entity will be also extracted from @sd. ++ * ++ * NOTE: since we cache the next_in_service entity at each level of the ++ * hierarchy, the complexity of the lookup can be decreased with ++ * absolutely no effort just returning the cached next_in_service value; ++ * we prefer to do full lookups to test the consistency of * the data ++ * structures. ++ */ ++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, ++ int extract, ++ struct bfq_data *bfqd) ++{ ++ struct bfq_service_tree *st = sd->service_tree; ++ struct bfq_entity *entity; ++ int i = 0; ++ ++ BUG_ON(sd->in_service_entity != NULL); ++ ++ if (bfqd != NULL && ++ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { ++ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, ++ true); ++ if (entity != NULL) { ++ i = BFQ_IOPRIO_CLASSES - 1; ++ bfqd->bfq_class_idle_last_service = jiffies; ++ sd->next_in_service = entity; ++ } ++ } ++ for (; i < BFQ_IOPRIO_CLASSES; i++) { ++ entity = __bfq_lookup_next_entity(st + i, false); ++ if (entity != NULL) { ++ if (extract) { ++ bfq_check_next_in_service(sd, entity); ++ bfq_active_extract(st + i, entity); ++ sd->in_service_entity = entity; ++ sd->next_in_service = NULL; ++ } ++ break; ++ } ++ } ++ ++ return entity; ++} ++ ++/* ++ * Get next queue for service. ++ */ ++static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) ++{ ++ struct bfq_entity *entity = NULL; ++ struct bfq_sched_data *sd; ++ struct bfq_queue *bfqq; ++ ++ BUG_ON(bfqd->in_service_queue != NULL); ++ ++ if (bfqd->busy_queues == 0) ++ return NULL; ++ ++ sd = &bfqd->root_group->sched_data; ++ for (; sd != NULL; sd = entity->my_sched_data) { ++ entity = bfq_lookup_next_entity(sd, 1, bfqd); ++ BUG_ON(entity == NULL); ++ entity->service = 0; ++ } ++ ++ bfqq = bfq_entity_to_bfqq(entity); ++ BUG_ON(bfqq == NULL); ++ ++ return bfqq; ++} ++ ++static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) ++{ ++ if (bfqd->in_service_bic != NULL) { ++ put_io_context(bfqd->in_service_bic->icq.ioc); ++ bfqd->in_service_bic = NULL; ++ } ++ ++ bfqd->in_service_queue = NULL; ++ del_timer(&bfqd->idle_slice_timer); ++} ++ ++static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ int requeue) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ ++ if (bfqq == bfqd->in_service_queue) ++ __bfq_bfqd_reset_in_service(bfqd); ++ ++ bfq_deactivate_entity(entity, requeue); ++} ++ ++static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ struct bfq_entity *entity = &bfqq->entity; ++ ++ bfq_activate_entity(entity); ++} ++ ++/* ++ * Called when the bfqq no longer has requests pending, remove it from ++ * the service tree. ++ */ ++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ int requeue) ++{ ++ BUG_ON(!bfq_bfqq_busy(bfqq)); ++ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); ++ ++ bfq_log_bfqq(bfqd, bfqq, "del from busy"); ++ ++ bfq_clear_bfqq_busy(bfqq); ++ ++ BUG_ON(bfqd->busy_queues == 0); ++ bfqd->busy_queues--; ++ ++ if (!bfqq->dispatched) { ++ bfq_weights_tree_remove(bfqd, &bfqq->entity, ++ &bfqd->queue_weights_tree); ++ if (!blk_queue_nonrot(bfqd->queue)) { ++ BUG_ON(!bfqd->busy_in_flight_queues); ++ bfqd->busy_in_flight_queues--; ++ if (bfq_bfqq_constantly_seeky(bfqq)) { ++ BUG_ON(!bfqd-> ++ const_seeky_busy_in_flight_queues); ++ bfqd->const_seeky_busy_in_flight_queues--; ++ } ++ } ++ } ++ if (bfqq->wr_coeff > 1) ++ bfqd->wr_busy_queues--; ++ ++ bfq_deactivate_bfqq(bfqd, bfqq, requeue); ++} ++ ++/* ++ * Called when an inactive queue receives a new request. ++ */ ++static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ BUG_ON(bfq_bfqq_busy(bfqq)); ++ BUG_ON(bfqq == bfqd->in_service_queue); ++ ++ bfq_log_bfqq(bfqd, bfqq, "add to busy"); ++ ++ bfq_activate_bfqq(bfqd, bfqq); ++ ++ bfq_mark_bfqq_busy(bfqq); ++ bfqd->busy_queues++; ++ ++ if (!bfqq->dispatched) { ++ if (bfqq->wr_coeff == 1) ++ bfq_weights_tree_add(bfqd, &bfqq->entity, ++ &bfqd->queue_weights_tree); ++ if (!blk_queue_nonrot(bfqd->queue)) { ++ bfqd->busy_in_flight_queues++; ++ if (bfq_bfqq_constantly_seeky(bfqq)) ++ bfqd->const_seeky_busy_in_flight_queues++; ++ } ++ } ++ if (bfqq->wr_coeff > 1) ++ bfqd->wr_busy_queues++; ++} +diff -Nur linux-4.1.3/block/Kconfig.iosched linux-xbian-imx6/block/Kconfig.iosched +--- linux-4.1.3/block/Kconfig.iosched 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/block/Kconfig.iosched 2015-07-27 23:13:03.600137415 +0200 +@@ -39,6 +39,27 @@ + ---help--- + Enable group IO scheduling in CFQ. + ++config IOSCHED_BFQ ++ tristate "BFQ I/O scheduler" ++ default n ++ ---help--- ++ The BFQ I/O scheduler tries to distribute bandwidth among ++ all processes according to their weights. ++ It aims at distributing the bandwidth as desired, independently of ++ the disk parameters and with any workload. It also tries to ++ guarantee low latency to interactive and soft real-time ++ applications. If compiled built-in (saying Y here), BFQ can ++ be configured to support hierarchical scheduling. ++ ++config CGROUP_BFQIO ++ bool "BFQ hierarchical scheduling support" ++ depends on CGROUPS && IOSCHED_BFQ=y ++ default n ++ ---help--- ++ Enable hierarchical scheduling in BFQ, using the cgroups ++ filesystem interface. The name of the subsystem will be ++ bfqio. ++ + choice + prompt "Default I/O scheduler" + default DEFAULT_CFQ +@@ -52,6 +73,16 @@ + config DEFAULT_CFQ + bool "CFQ" if IOSCHED_CFQ=y + ++ config DEFAULT_BFQ ++ bool "BFQ" if IOSCHED_BFQ=y ++ help ++ Selects BFQ as the default I/O scheduler which will be ++ used by default for all block devices. ++ The BFQ I/O scheduler aims at distributing the bandwidth ++ as desired, independently of the disk parameters and with ++ any workload. It also tries to guarantee low latency to ++ interactive and soft real-time applications. ++ + config DEFAULT_NOOP + bool "No-op" + +@@ -61,6 +92,7 @@ + string + default "deadline" if DEFAULT_DEADLINE + default "cfq" if DEFAULT_CFQ ++ default "bfq" if DEFAULT_BFQ + default "noop" if DEFAULT_NOOP + + endmenu +diff -Nur linux-4.1.3/block/Makefile linux-xbian-imx6/block/Makefile +--- linux-4.1.3/block/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/block/Makefile 2015-07-27 23:13:03.600137415 +0200 +@@ -18,6 +18,7 @@ + obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o + obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o + obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o ++obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o + + obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o + obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o +diff -Nur linux-4.1.3/Documentation/devicetree/bindings/mmc/mmc.txt linux-xbian-imx6/Documentation/devicetree/bindings/mmc/mmc.txt +--- linux-4.1.3/Documentation/devicetree/bindings/mmc/mmc.txt 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/Documentation/devicetree/bindings/mmc/mmc.txt 2015-07-27 23:12:59.335353684 +0200 +@@ -5,6 +5,8 @@ + Interpreted by the OF core: + - reg: Registers location and length. + - interrupts: Interrupts used by the MMC controller. ++- clocks: Clocks needed for the host controller, if any. ++- clock-names: Goes with clocks above. + + Card detection: + If no property below is supplied, host native card detect is used. +@@ -43,6 +45,15 @@ + - dsr: Value the card's (optional) Driver Stage Register (DSR) should be + programmed with. Valid range: [0 .. 0xffff]. + ++Card power and reset control: ++The following properties can be specified for cases where the MMC ++peripheral needs additional reset, regulator and clock lines. It is for ++example common for WiFi/BT adapters to have these separate from the main ++MMC bus: ++ - card-reset-gpios: Specify GPIOs for card reset (reset active low) ++ - card-external-vcc-supply: Regulator to drive (independent) card VCC ++ - clock with name "card_ext_clock": External clock provided to the card ++ + *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line + polarity properties, we have to fix the meaning of the "normal" and "inverted" + line levels. We choose to follow the SDHCI standard, which specifies both those +diff -Nur linux-4.1.3/Documentation/devicetree/bindings/vendor-prefixes.txt linux-xbian-imx6/Documentation/devicetree/bindings/vendor-prefixes.txt +--- linux-4.1.3/Documentation/devicetree/bindings/vendor-prefixes.txt 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/Documentation/devicetree/bindings/vendor-prefixes.txt 2015-07-27 23:12:59.518699524 +0200 +@@ -201,6 +201,7 @@ + variscite Variscite Ltd. + via VIA Technologies, Inc. + virtio Virtual I/O Device Specification, developed by the OASIS consortium ++vivante Vivante Corporation + voipac Voipac Technologies s.r.o. + winbond Winbond Electronics corp. + wlf Wolfson Microelectronics +diff -Nur linux-4.1.3/drivers/char/frandom.c linux-xbian-imx6/drivers/char/frandom.c +--- linux-4.1.3/drivers/char/frandom.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/char/frandom.c 2015-07-27 23:13:03.986757999 +0200 +@@ -0,0 +1,415 @@ ++/* ++** frandom.c ++** Fast pseudo-random generator ++** ++** (c) Copyright 2003-2011 Eli Billauer ++** http://www.billauer.co.il ++** ++** This program is free software; you can redistribute it and/or modify ++** it under the terms of the GNU General Public License as published by ++** the Free Software Foundation; either version 2 of the License, or ++** (at your option) any later version. ++** ++** ++*/ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#define INTERNAL_SEED 0 ++#define EXTERNAL_SEED 1 ++ ++#define FRANDOM_MAJOR 235 ++#define FRANDOM_MINOR 11 ++#define ERANDOM_MINOR 12 ++ ++static struct file_operations frandom_fops; /* Values assigned below */ ++ ++static int erandom_seeded = 0; /* Internal flag */ ++ ++static int frandom_major = FRANDOM_MAJOR; ++static int frandom_minor = FRANDOM_MINOR; ++static int erandom_minor = ERANDOM_MINOR; ++static int frandom_bufsize = 256; ++static int frandom_chunklimit = 0; /* =0 means unlimited */ ++ ++static struct cdev frandom_cdev; ++static struct cdev erandom_cdev; ++static struct class *frandom_class; ++struct device *frandom_device; ++struct device *erandom_device; ++ ++MODULE_DESCRIPTION("Fast pseudo-random number generator"); ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Eli Billauer"); ++module_param(frandom_major, int, 0); ++module_param(frandom_minor, int, 0); ++module_param(erandom_minor, int, 0); ++module_param(frandom_bufsize, int, 0); ++module_param(frandom_chunklimit, int, 0); ++ ++MODULE_PARM_DESC(frandom_major,"Major number of /dev/frandom and /dev/erandom"); ++MODULE_PARM_DESC(frandom_minor,"Minor number of /dev/frandom"); ++MODULE_PARM_DESC(erandom_minor,"Minor number of /dev/erandom"); ++MODULE_PARM_DESC(frandom_bufsize,"Internal buffer size in bytes. Default is 256. Must be >= 256"); ++MODULE_PARM_DESC(frandom_chunklimit,"Limit for read() blocks size. 0 (default) is unlimited, otherwise must be >= 256"); ++ ++struct frandom_state ++{ ++ struct semaphore sem; /* Semaphore on the state structure */ ++ ++ u8 S[256]; /* The state array */ ++ u8 i; ++ u8 j; ++ ++ char *buf; ++}; ++ ++static struct frandom_state *erandom_state; ++ ++static inline void swap_byte(u8 *a, u8 *b) ++{ ++ u8 swapByte; ++ ++ swapByte = *a; ++ *a = *b; ++ *b = swapByte; ++} ++ ++static void init_rand_state(struct frandom_state *state, int seedflag); ++ ++void erandom_get_random_bytes(char *buf, size_t count) ++{ ++ struct frandom_state *state = erandom_state; ++ int k; ++ ++ unsigned int i; ++ unsigned int j; ++ u8 *S; ++ ++ /* If we fail to get the semaphore, we revert to external random data. ++ Since semaphore blocking is expected to be very rare, and interrupts ++ during these rare and very short periods of time even less frequent, ++ we take the better-safe-than-sorry approach, and fill the buffer ++ some expensive random data, in case the caller wasn't aware of this ++ possibility, and expects random data anyhow. ++ */ ++ ++ if (down_interruptible(&state->sem)) { ++ get_random_bytes(buf, count); ++ return; ++ } ++ ++ /* We seed erandom as late as possible, hoping that the kernel's main ++ RNG is already restored in the boot sequence (not critical, but ++ better. ++ */ ++ ++ if (!erandom_seeded) { ++ erandom_seeded = 1; ++ init_rand_state(state, EXTERNAL_SEED); ++ printk(KERN_INFO "frandom: Seeded global generator now (used by erandom)\n"); ++ } ++ ++ i = state->i; ++ j = state->j; ++ S = state->S; ++ ++ for (k=0; ki = i; ++ state->j = j; ++ ++ up(&state->sem); ++} ++ ++static void init_rand_state(struct frandom_state *state, int seedflag) ++{ ++ unsigned int i, j, k; ++ u8 *S; ++ u8 *seed = state->buf; ++ ++ if (seedflag == INTERNAL_SEED) ++ erandom_get_random_bytes(seed, 256); ++ else ++ get_random_bytes(seed, 256); ++ ++ S = state->S; ++ for (i=0; i<256; i++) ++ *S++=i; ++ ++ j=0; ++ S = state->S; ++ ++ for (i=0; i<256; i++) { ++ j = (j + S[i] + *seed++) & 0xff; ++ swap_byte(&S[i], &S[j]); ++ } ++ ++ /* It's considered good practice to discard the first 256 bytes ++ generated. So we do it: ++ */ ++ ++ i=0; j=0; ++ for (k=0; k<256; k++) { ++ i = (i + 1) & 0xff; ++ j = (j + S[i]) & 0xff; ++ swap_byte(&S[i], &S[j]); ++ } ++ ++ state->i = i; /* Save state */ ++ state->j = j; ++} ++ ++static int frandom_open(struct inode *inode, struct file *filp) ++{ ++ ++ struct frandom_state *state; ++ ++ int num = iminor(inode); ++ ++ /* This should never happen, now when the minors are regsitered ++ * explicitly ++ */ ++ if ((num != frandom_minor) && (num != erandom_minor)) return -ENODEV; ++ ++ state = kmalloc(sizeof(struct frandom_state), GFP_KERNEL); ++ if (!state) ++ return -ENOMEM; ++ ++ state->buf = kmalloc(frandom_bufsize, GFP_KERNEL); ++ if (!state->buf) { ++ kfree(state); ++ return -ENOMEM; ++ } ++ ++ sema_init(&state->sem, 1); /* Init semaphore as a mutex */ ++ ++ if (num == frandom_minor) ++ init_rand_state(state, EXTERNAL_SEED); ++ else ++ init_rand_state(state, INTERNAL_SEED); ++ ++ filp->private_data = state; ++ ++ return 0; /* Success */ ++} ++ ++static int frandom_release(struct inode *inode, struct file *filp) ++{ ++ ++ struct frandom_state *state = filp->private_data; ++ ++ kfree(state->buf); ++ kfree(state); ++ ++ return 0; ++} ++ ++static ssize_t frandom_read(struct file *filp, char *buf, size_t count, ++ loff_t *f_pos) ++{ ++ struct frandom_state *state = filp->private_data; ++ ssize_t ret; ++ int dobytes, k; ++ char *localbuf; ++ ++ unsigned int i; ++ unsigned int j; ++ u8 *S; ++ ++ if (down_interruptible(&state->sem)) ++ return -ERESTARTSYS; ++ ++ if ((frandom_chunklimit > 0) && (count > frandom_chunklimit)) ++ count = frandom_chunklimit; ++ ++ ret = count; /* It's either everything or an error... */ ++ ++ i = state->i; ++ j = state->j; ++ S = state->S; ++ ++ while (count) { ++ if (count > frandom_bufsize) ++ dobytes = frandom_bufsize; ++ else ++ dobytes = count; ++ ++ localbuf = state->buf; ++ ++ for (k=0; kbuf, dobytes)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ buf += dobytes; ++ count -= dobytes; ++ } ++ ++ out: ++ state->i = i; ++ state->j = j; ++ ++ up(&state->sem); ++ return ret; ++} ++ ++static struct file_operations frandom_fops = { ++ read: frandom_read, ++ open: frandom_open, ++ release: frandom_release, ++}; ++ ++static void frandom_cleanup_module(void) { ++ unregister_chrdev_region(MKDEV(frandom_major, erandom_minor), 1); ++ cdev_del(&erandom_cdev); ++ device_destroy(frandom_class, MKDEV(frandom_major, erandom_minor)); ++ ++ unregister_chrdev_region(MKDEV(frandom_major, frandom_minor), 1); ++ cdev_del(&frandom_cdev); ++ device_destroy(frandom_class, MKDEV(frandom_major, frandom_minor)); ++ class_destroy(frandom_class); ++ ++ kfree(erandom_state->buf); ++ kfree(erandom_state); ++} ++ ++ ++static int frandom_init_module(void) ++{ ++ int result; ++ ++ /* The buffer size MUST be at least 256 bytes, because we assume that ++ minimal length in init_rand_state(). ++ */ ++ if (frandom_bufsize < 256) { ++ printk(KERN_ERR "frandom: Refused to load because frandom_bufsize=%d < 256\n",frandom_bufsize); ++ return -EINVAL; ++ } ++ if ((frandom_chunklimit != 0) && (frandom_chunklimit < 256)) { ++ printk(KERN_ERR "frandom: Refused to load because frandom_chunklimit=%d < 256 and != 0\n",frandom_chunklimit); ++ return -EINVAL; ++ } ++ ++ erandom_state = kmalloc(sizeof(struct frandom_state), GFP_KERNEL); ++ if (!erandom_state) ++ return -ENOMEM; ++ ++ /* This specific buffer is only used for seeding, so we need ++ 256 bytes exactly */ ++ erandom_state->buf = kmalloc(256, GFP_KERNEL); ++ if (!erandom_state->buf) { ++ kfree(erandom_state); ++ return -ENOMEM; ++ } ++ ++ sema_init(&erandom_state->sem, 1); /* Init semaphore as a mutex */ ++ ++ erandom_seeded = 0; ++ ++ frandom_class = class_create(THIS_MODULE, "fastrng"); ++ if (IS_ERR(frandom_class)) { ++ result = PTR_ERR(frandom_class); ++ printk(KERN_WARNING "frandom: Failed to register class fastrng\n"); ++ goto error0; ++ } ++ ++ /* ++ * Register your major, and accept a dynamic number. This is the ++ * first thing to do, in order to avoid releasing other module's ++ * fops in frandom_cleanup_module() ++ */ ++ ++ cdev_init(&frandom_cdev, &frandom_fops); ++ frandom_cdev.owner = THIS_MODULE; ++ result = cdev_add(&frandom_cdev, MKDEV(frandom_major, frandom_minor), 1); ++ if (result) { ++ printk(KERN_WARNING "frandom: Failed to add cdev for /dev/frandom\n"); ++ goto error1; ++ } ++ ++ result = register_chrdev_region(MKDEV(frandom_major, frandom_minor), 1, "/dev/frandom"); ++ if (result < 0) { ++ printk(KERN_WARNING "frandom: can't get major/minor %d/%d\n", frandom_major, frandom_minor); ++ goto error2; ++ } ++ ++ frandom_device = device_create(frandom_class, NULL, MKDEV(frandom_major, frandom_minor), NULL, "frandom"); ++ ++ if (IS_ERR(frandom_device)) { ++ printk(KERN_WARNING "frandom: Failed to create frandom device\n"); ++ goto error3; ++ } ++ ++ cdev_init(&erandom_cdev, &frandom_fops); ++ erandom_cdev.owner = THIS_MODULE; ++ result = cdev_add(&erandom_cdev, MKDEV(frandom_major, erandom_minor), 1); ++ if (result) { ++ printk(KERN_WARNING "frandom: Failed to add cdev for /dev/erandom\n"); ++ goto error4; ++ } ++ ++ result = register_chrdev_region(MKDEV(frandom_major, erandom_minor), 1, "/dev/erandom"); ++ if (result < 0) { ++ printk(KERN_WARNING "frandom: can't get major/minor %d/%d\n", frandom_major, erandom_minor); ++ goto error5; ++ } ++ ++ erandom_device = device_create(frandom_class, NULL, MKDEV(frandom_major, erandom_minor), NULL, "erandom"); ++ ++ if (IS_ERR(erandom_device)) { ++ printk(KERN_WARNING "frandom: Failed to create erandom device\n"); ++ goto error6; ++ } ++ return 0; /* succeed */ ++ ++ error6: ++ unregister_chrdev_region(MKDEV(frandom_major, erandom_minor), 1); ++ error5: ++ cdev_del(&erandom_cdev); ++ error4: ++ device_destroy(frandom_class, MKDEV(frandom_major, frandom_minor)); ++ error3: ++ unregister_chrdev_region(MKDEV(frandom_major, frandom_minor), 1); ++ error2: ++ cdev_del(&frandom_cdev); ++ error1: ++ class_destroy(frandom_class); ++ error0: ++ kfree(erandom_state->buf); ++ kfree(erandom_state); ++ ++ return result; ++} ++ ++module_init(frandom_init_module); ++module_exit(frandom_cleanup_module); ++ ++EXPORT_SYMBOL(erandom_get_random_bytes); +diff -Nur linux-4.1.3/drivers/char/Makefile linux-xbian-imx6/drivers/char/Makefile +--- linux-4.1.3/drivers/char/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/char/Makefile 2015-07-27 23:13:03.982772220 +0200 +@@ -2,6 +2,7 @@ + # Makefile for the kernel character device drivers. + # + ++obj-m += frandom.o + obj-y += mem.o random.o + obj-$(CONFIG_TTY_PRINTK) += ttyprintk.o + obj-y += misc.o +diff -Nur linux-4.1.3/drivers/cpufreq/imx6q-cpufreq.c linux-xbian-imx6/drivers/cpufreq/imx6q-cpufreq.c +--- linux-4.1.3/drivers/cpufreq/imx6q-cpufreq.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/cpufreq/imx6q-cpufreq.c 2015-07-27 23:13:04.158146502 +0200 +@@ -20,6 +20,8 @@ + #define PU_SOC_VOLTAGE_HIGH 1275000 + #define FREQ_1P2_GHZ 1200000000 + ++extern int vpu352; ++ + static struct regulator *arm_reg; + static struct regulator *pu_reg; + static struct regulator *soc_reg; +@@ -251,6 +253,10 @@ + unsigned long volt = be32_to_cpup(val++); + if (freq_table[j].frequency == freq) { + imx6_soc_volt[soc_opp_count++] = volt; ++ if (vpu352 && freq == 792000) { ++ pr_info("VPU352: increase SOC/PU voltage for VPU352MHz\n"); ++ imx6_soc_volt[soc_opp_count-1] = 1250000; ++ } + break; + } + } +diff -Nur linux-4.1.3/drivers/crypto/caam/caamalg.c linux-xbian-imx6/drivers/crypto/caam/caamalg.c +--- linux-4.1.3/drivers/crypto/caam/caamalg.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/caamalg.c 2015-07-27 23:13:04.205975852 +0200 +@@ -1,7 +1,7 @@ + /* + * caam - Freescale FSL CAAM support for crypto API + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + * + * Based on talitos crypto API driver. + * +@@ -53,6 +53,7 @@ + #include "error.h" + #include "sg_sw_sec4.h" + #include "key_gen.h" ++#include + + /* + * crypto alg +@@ -60,68 +61,42 @@ + #define CAAM_CRA_PRIORITY 3000 + /* max key is sum of AES_MAX_KEY_SIZE, max split key size */ + #define CAAM_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + \ +- CTR_RFC3686_NONCE_SIZE + \ + SHA512_DIGEST_SIZE * 2) + /* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ + #define CAAM_MAX_IV_LENGTH 16 + + /* length of descriptors text */ ++#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3) ++ + #define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) +-#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ) +-#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 18 * CAAM_CMD_SZ) ++#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 16 * CAAM_CMD_SZ) ++#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 21 * CAAM_CMD_SZ) + #define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ) + +-/* Note: Nonce is counted in enckeylen */ +-#define DESC_AEAD_CTR_RFC3686_LEN (6 * CAAM_CMD_SZ) +- +-#define DESC_AEAD_NULL_BASE (3 * CAAM_CMD_SZ) +-#define DESC_AEAD_NULL_ENC_LEN (DESC_AEAD_NULL_BASE + 14 * CAAM_CMD_SZ) +-#define DESC_AEAD_NULL_DEC_LEN (DESC_AEAD_NULL_BASE + 17 * CAAM_CMD_SZ) +- +-#define DESC_GCM_BASE (3 * CAAM_CMD_SZ) +-#define DESC_GCM_ENC_LEN (DESC_GCM_BASE + 23 * CAAM_CMD_SZ) +-#define DESC_GCM_DEC_LEN (DESC_GCM_BASE + 19 * CAAM_CMD_SZ) +- +-#define DESC_RFC4106_BASE (3 * CAAM_CMD_SZ) +-#define DESC_RFC4106_ENC_LEN (DESC_RFC4106_BASE + 15 * CAAM_CMD_SZ) +-#define DESC_RFC4106_DEC_LEN (DESC_RFC4106_BASE + 14 * CAAM_CMD_SZ) +-#define DESC_RFC4106_GIVENC_LEN (DESC_RFC4106_BASE + 21 * CAAM_CMD_SZ) +- +-#define DESC_RFC4543_BASE (3 * CAAM_CMD_SZ) +-#define DESC_RFC4543_ENC_LEN (DESC_RFC4543_BASE + 25 * CAAM_CMD_SZ) +-#define DESC_RFC4543_DEC_LEN (DESC_RFC4543_BASE + 27 * CAAM_CMD_SZ) +-#define DESC_RFC4543_GIVENC_LEN (DESC_RFC4543_BASE + 30 * CAAM_CMD_SZ) +- + #define DESC_ABLKCIPHER_BASE (3 * CAAM_CMD_SZ) + #define DESC_ABLKCIPHER_ENC_LEN (DESC_ABLKCIPHER_BASE + \ + 20 * CAAM_CMD_SZ) + #define DESC_ABLKCIPHER_DEC_LEN (DESC_ABLKCIPHER_BASE + \ + 15 * CAAM_CMD_SZ) + +-#define DESC_MAX_USED_BYTES (DESC_RFC4543_GIVENC_LEN + \ ++#define DESC_MAX_USED_BYTES (DESC_AEAD_GIVENC_LEN + \ + CAAM_MAX_KEY_SIZE) + #define DESC_MAX_USED_LEN (DESC_MAX_USED_BYTES / CAAM_CMD_SZ) + + #ifdef DEBUG + /* for print_hex_dumps with line references */ ++#define xstr(s) str(s) ++#define str(s) #s + #define debug(format, arg...) printk(format, arg) + #else + #define debug(format, arg...) + #endif +-static struct list_head alg_list; + + /* Set DK bit in class 1 operation if shared */ + static inline void append_dec_op1(u32 *desc, u32 type) + { + u32 *jump_cmd, *uncond_jump_cmd; + +- /* DK bit is valid only for AES */ +- if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) { +- append_operation(desc, type | OP_ALG_AS_INITFINAL | +- OP_ALG_DECRYPT); +- return; +- } +- + jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD); + append_operation(desc, type | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT); +@@ -133,26 +108,37 @@ + } + + /* ++ * Wait for completion of class 1 key loading before allowing ++ * error propagation ++ */ ++static inline void append_dec_shr_done(u32 *desc) ++{ ++ u32 *jump_cmd; ++ ++ jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1 | JUMP_TEST_ALL); ++ set_jump_tgt_here(desc, jump_cmd); ++ append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); ++} ++ ++/* + * For aead functions, read payload and write payload, + * both of which are specified in req->src and req->dst + */ + static inline void aead_append_src_dst(u32 *desc, u32 msg_type) + { +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | + KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH); ++ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); + } + + /* + * For aead encrypt and decrypt, read iv for both classes + */ +-static inline void aead_append_ld_iv(u32 *desc, int ivsize, int ivoffset) ++static inline void aead_append_ld_iv(u32 *desc, int ivsize) + { +- append_seq_load(desc, ivsize, LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- (ivoffset << LDST_OFFSET_SHIFT)); +- append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | +- (ivoffset << MOVE_OFFSET_SHIFT) | ivsize); ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | ivsize); ++ append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | ivsize); + } + + /* +@@ -198,309 +184,68 @@ + }; + + static void append_key_aead(u32 *desc, struct caam_ctx *ctx, +- int keys_fit_inline, bool is_rfc3686) ++ int keys_fit_inline) + { +- u32 *nonce; +- unsigned int enckeylen = ctx->enckeylen; +- +- /* +- * RFC3686 specific: +- * | ctx->key = {AUTH_KEY, ENC_KEY, NONCE} +- * | enckeylen = encryption key size + nonce size +- */ +- if (is_rfc3686) +- enckeylen -= CTR_RFC3686_NONCE_SIZE; +- + if (keys_fit_inline) { + append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, + ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + append_key_as_imm(desc, (void *)ctx->key + +- ctx->split_key_pad_len, enckeylen, +- enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); ++ ctx->split_key_pad_len, ctx->enckeylen, ++ ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); + } else { + append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + append_key(desc, ctx->key_dma + ctx->split_key_pad_len, +- enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- } +- +- /* Load Counter into CONTEXT1 reg */ +- if (is_rfc3686) { +- nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len + +- enckeylen); +- append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB | +- LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); +- append_move(desc, +- MOVE_SRC_OUTFIFO | +- MOVE_DEST_CLASS1CTX | +- (16 << MOVE_OFFSET_SHIFT) | +- (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); ++ ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); + } + } + + static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx, +- int keys_fit_inline, bool is_rfc3686) ++ int keys_fit_inline) + { + u32 *key_jump_cmd; + +- /* Note: Context registers are saved. */ +- init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); +- +- /* Skip if already shared */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- +- append_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); +- +- set_jump_tgt_here(desc, key_jump_cmd); +-} +- +-static int aead_null_set_sh_desc(struct crypto_aead *aead) +-{ +- struct aead_tfm *tfm = &aead->base.crt_aead; +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- bool keys_fit_inline = false; +- u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd; +- u32 *desc; +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- if (DESC_AEAD_NULL_ENC_LEN + DESC_JOB_IO_LEN + +- ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- /* aead_encrypt shared descriptor */ +- desc = ctx->sh_desc_enc; +- + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, +- ctx->split_key_len, CLASS_2 | +- KEY_DEST_MDHA_SPLIT | KEY_ENC); +- else +- append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | +- KEY_DEST_MDHA_SPLIT | KEY_ENC); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* cryptlen = seqoutlen - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); +- +- /* +- * NULL encryption; IV is zero +- * assoclen = (assoclen + cryptlen) - cryptlen +- */ +- append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); +- +- /* read assoc before reading payload */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | +- KEY_VLF); +- +- /* Prepare to read and write cryptlen bytes */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); +- +- /* +- * MOVE_LEN opcode is not available in all SEC HW revisions, +- * thus need to do some magic, i.e. self-patch the descriptor +- * buffer. +- */ +- read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | +- MOVE_DEST_MATH3 | +- (0x6 << MOVE_LEN_SHIFT)); +- write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | +- MOVE_DEST_DESCBUF | +- MOVE_WAITCOMP | +- (0x8 << MOVE_LEN_SHIFT)); +- +- /* Class 2 operation */ +- append_operation(desc, ctx->class2_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +- +- /* Read and write cryptlen bytes */ +- aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); +- +- set_move_tgt_here(desc, read_move_cmd); +- set_move_tgt_here(desc, write_move_cmd); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | +- MOVE_AUX_LS); +- +- /* Write ICV */ +- append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | +- LDST_SRCDST_BYTE_CONTEXT); +- +- ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "aead null enc shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- keys_fit_inline = false; +- if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN + +- ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- desc = ctx->sh_desc_dec; + +- /* aead_decrypt shared descriptor */ +- init_sh_desc(desc, HDR_SHARE_SERIAL); ++ append_key_aead(desc, ctx, keys_fit_inline); + +- /* Skip if already shared */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, +- ctx->split_key_len, CLASS_2 | +- KEY_DEST_MDHA_SPLIT | KEY_ENC); +- else +- append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | +- KEY_DEST_MDHA_SPLIT | KEY_ENC); + set_jump_tgt_here(desc, key_jump_cmd); + +- /* Class 2 operation */ +- append_operation(desc, ctx->class2_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); +- +- /* assoclen + cryptlen = seqinlen - ivsize - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, +- ctx->authsize + tfm->ivsize); +- /* assoclen = (assoclen + cryptlen) - cryptlen */ +- append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); +- append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); +- +- /* read assoc before reading payload */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | +- KEY_VLF); +- +- /* Prepare to read and write cryptlen bytes */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); +- +- /* +- * MOVE_LEN opcode is not available in all SEC HW revisions, +- * thus need to do some magic, i.e. self-patch the descriptor +- * buffer. +- */ +- read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | +- MOVE_DEST_MATH2 | +- (0x6 << MOVE_LEN_SHIFT)); +- write_move_cmd = append_move(desc, MOVE_SRC_MATH2 | +- MOVE_DEST_DESCBUF | +- MOVE_WAITCOMP | +- (0x8 << MOVE_LEN_SHIFT)); +- +- /* Read and write cryptlen bytes */ +- aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); +- +- /* +- * Insert a NOP here, since we need at least 4 instructions between +- * code patching the descriptor buffer and the location being patched. +- */ +- jump_cmd = append_jump(desc, JUMP_TEST_ALL); +- set_jump_tgt_here(desc, jump_cmd); +- +- set_move_tgt_here(desc, read_move_cmd); +- set_move_tgt_here(desc, write_move_cmd); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | +- MOVE_AUX_LS); +- append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); +- +- /* Load ICV */ +- append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 | +- FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); +- +- ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "aead null dec shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- return 0; ++ /* Propagate errors from shared to job descriptor */ ++ append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); + } + + static int aead_set_sh_desc(struct crypto_aead *aead) + { + struct aead_tfm *tfm = &aead->base.crt_aead; + struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct crypto_tfm *ctfm = crypto_aead_tfm(aead); +- const char *alg_name = crypto_tfm_alg_name(ctfm); + struct device *jrdev = ctx->jrdev; +- bool keys_fit_inline; ++ bool keys_fit_inline = false; ++ u32 *key_jump_cmd, *jump_cmd; + u32 geniv, moveiv; +- u32 ctx1_iv_off = 0; + u32 *desc; +- const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == +- OP_ALG_AAI_CTR_MOD128); +- const bool is_rfc3686 = (ctr_mode && +- (strstr(alg_name, "rfc3686") != NULL)); + +- if (!ctx->authsize) ++ if (!ctx->enckeylen || !ctx->authsize) + return 0; + +- /* NULL encryption / decryption */ +- if (!ctx->enckeylen) +- return aead_null_set_sh_desc(aead); +- +- /* +- * AES-CTR needs to load IV in CONTEXT1 reg +- * at an offset of 128bits (16bytes) +- * CONTEXT1[255:128] = IV +- */ +- if (ctr_mode) +- ctx1_iv_off = 16; +- +- /* +- * RFC3686 specific: +- * CONTEXT1[255:128] = {NONCE, IV, COUNTER} +- */ +- if (is_rfc3686) +- ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE; +- + /* + * Job Descriptor and Shared Descriptors + * must all fit into the 64-word Descriptor h/w Buffer + */ +- keys_fit_inline = false; + if (DESC_AEAD_ENC_LEN + DESC_JOB_IO_LEN + +- ctx->split_key_pad_len + ctx->enckeylen + +- (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <= ++ ctx->split_key_pad_len + ctx->enckeylen <= + CAAM_DESC_BYTES_MAX) + keys_fit_inline = true; + + /* aead_encrypt shared descriptor */ + desc = ctx->sh_desc_enc; + +- /* Note: Context registers are saved. */ +- init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); ++ init_sh_desc_key_aead(desc, ctx, keys_fit_inline); + + /* Class 2 operation */ + append_operation(desc, ctx->class2_alg_type | +@@ -512,21 +257,13 @@ + /* assoclen + cryptlen = seqinlen - ivsize */ + append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); + +- /* assoclen = (assoclen + cryptlen) - cryptlen */ ++ /* assoclen + cryptlen = (assoclen + cryptlen) - cryptlen */ + append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); + + /* read assoc before reading payload */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); +- aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off); +- +- /* Load Counter into CONTEXT1 reg */ +- if (is_rfc3686) +- append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | +- LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << +- LDST_OFFSET_SHIFT)); ++ aead_append_ld_iv(desc, tfm->ivsize); + + /* Class 1 operation */ + append_operation(desc, ctx->class1_alg_type | +@@ -549,35 +286,46 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "aead enc shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "aead enc shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif + ++ dma_sync_single_for_cpu(jrdev, ctx->sh_desc_enc_dma, desc_bytes(desc), ++ DMA_TO_DEVICE); + /* + * Job Descriptor and Shared Descriptors + * must all fit into the 64-word Descriptor h/w Buffer + */ +- keys_fit_inline = false; + if (DESC_AEAD_DEC_LEN + DESC_JOB_IO_LEN + +- ctx->split_key_pad_len + ctx->enckeylen + +- (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <= ++ ctx->split_key_pad_len + ctx->enckeylen <= + CAAM_DESC_BYTES_MAX) + keys_fit_inline = true; + +- /* aead_decrypt shared descriptor */ + desc = ctx->sh_desc_dec; + +- /* Note: Context registers are saved. */ +- init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); ++ /* aead_decrypt shared descriptor */ ++ init_sh_desc(desc, HDR_SHARE_SERIAL); ++ ++ /* Skip if already shared */ ++ key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | ++ JUMP_COND_SHRD); ++ ++ append_key_aead(desc, ctx, keys_fit_inline); ++ ++ /* Only propagate error immediately if shared */ ++ jump_cmd = append_jump(desc, JUMP_TEST_ALL); ++ set_jump_tgt_here(desc, key_jump_cmd); ++ append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); ++ set_jump_tgt_here(desc, jump_cmd); + + /* Class 2 operation */ + append_operation(desc, ctx->class2_alg_type | + OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); + +- /* assoclen + cryptlen = seqinlen - ivsize - authsize */ ++ /* assoclen + cryptlen = seqinlen - ivsize */ + append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, +- ctx->authsize + tfm->ivsize); ++ ctx->authsize + tfm->ivsize) + /* assoclen = (assoclen + cryptlen) - cryptlen */ + append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); + append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); +@@ -586,22 +334,9 @@ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); + +- aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off); ++ aead_append_ld_iv(desc, tfm->ivsize); + +- /* Load Counter into CONTEXT1 reg */ +- if (is_rfc3686) +- append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | +- LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << +- LDST_OFFSET_SHIFT)); +- +- /* Choose operation */ +- if (ctr_mode) +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT); +- else +- append_dec_op1(desc, ctx->class1_alg_type); ++ append_dec_op1(desc, ctx->class1_alg_type); + + /* Read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); +@@ -611,6 +346,7 @@ + /* Load ICV */ + append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 | + FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); ++ append_dec_shr_done(desc); + + ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, + desc_bytes(desc), +@@ -620,27 +356,26 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "aead dec shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "aead dec shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_cpu(jrdev, ctx->sh_desc_dec_dma, desc_bytes(desc), ++ DMA_TO_DEVICE); + + /* + * Job Descriptor and Shared Descriptors + * must all fit into the 64-word Descriptor h/w Buffer + */ +- keys_fit_inline = false; + if (DESC_AEAD_GIVENC_LEN + DESC_JOB_IO_LEN + +- ctx->split_key_pad_len + ctx->enckeylen + +- (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <= ++ ctx->split_key_pad_len + ctx->enckeylen <= + CAAM_DESC_BYTES_MAX) + keys_fit_inline = true; + + /* aead_givencrypt shared descriptor */ + desc = ctx->sh_desc_givenc; + +- /* Note: Context registers are saved. */ +- init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); ++ init_sh_desc_key_aead(desc, ctx, keys_fit_inline); + + /* Generate IV */ + geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | +@@ -649,16 +384,13 @@ + append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | + LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- append_move(desc, MOVE_WAITCOMP | +- MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX | +- (ctx1_iv_off << MOVE_OFFSET_SHIFT) | +- (tfm->ivsize << MOVE_LEN_SHIFT)); ++ append_move(desc, MOVE_SRC_INFIFO | ++ MOVE_DEST_CLASS1CTX | (tfm->ivsize << MOVE_LEN_SHIFT)); + append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); + + /* Copy IV to class 1 context */ +- append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO | +- (ctx1_iv_off << MOVE_OFFSET_SHIFT) | +- (tfm->ivsize << MOVE_LEN_SHIFT)); ++ append_move(desc, MOVE_SRC_CLASS1CTX | ++ MOVE_DEST_OUTFIFO | (tfm->ivsize << MOVE_LEN_SHIFT)); + + /* Return to encryption */ + append_operation(desc, ctx->class2_alg_type | +@@ -674,7 +406,7 @@ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); + +- /* Copy iv from outfifo to class 2 fifo */ ++ /* Copy iv from class 1 ctx to class 2 fifo*/ + moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 | + NFIFOENTRY_DTYPE_MSG | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB | +@@ -682,14 +414,6 @@ + append_load_imm_u32(desc, tfm->ivsize, LDST_CLASS_2_CCB | + LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM); + +- /* Load Counter into CONTEXT1 reg */ +- if (is_rfc3686) +- append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | +- LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << +- LDST_OFFSET_SHIFT)); +- + /* Class 1 operation */ + append_operation(desc, ctx->class1_alg_type | + OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +@@ -717,10 +441,12 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "aead givenc shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "aead givenc shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_cpu(jrdev, ctx->sh_desc_givenc_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); + + return 0; + } +@@ -736,977 +462,84 @@ + return 0; + } + +-static int gcm_set_sh_desc(struct crypto_aead *aead) ++static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in, ++ u32 authkeylen) + { +- struct aead_tfm *tfm = &aead->base.crt_aead; ++ return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, ++ ctx->split_key_pad_len, key_in, authkeylen, ++ ctx->alg_op); ++} ++ ++static int aead_setkey(struct crypto_aead *aead, ++ const u8 *key, unsigned int keylen) ++{ ++ /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */ ++ static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; +- bool keys_fit_inline = false; +- u32 *key_jump_cmd, *zero_payload_jump_cmd, +- *zero_assoc_jump_cmd1, *zero_assoc_jump_cmd2; +- u32 *desc; +- +- if (!ctx->enckeylen || !ctx->authsize) +- return 0; +- +- /* +- * AES GCM encrypt shared descriptor +- * Job Descriptor and Shared Descriptor +- * must fit into the 64-word Descriptor h/w Buffer +- */ +- if (DESC_GCM_ENC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; ++ struct rtattr *rta = (void *)key; ++ struct crypto_authenc_key_param *param; ++ unsigned int authkeylen; ++ unsigned int enckeylen; ++ int ret = 0; + +- desc = ctx->sh_desc_enc; ++ param = RTA_DATA(rta); ++ enckeylen = be32_to_cpu(param->enckeylen); + +- init_sh_desc(desc, HDR_SHARE_SERIAL); ++ key += RTA_ALIGN(rta->rta_len); ++ keylen -= RTA_ALIGN(rta->rta_len); + +- /* skip key loading if they are loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD | JUMP_COND_SELF); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); ++ if (keylen < enckeylen) ++ goto badkey; + +- /* class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); ++ authkeylen = keylen - enckeylen; + +- /* cryptlen = seqoutlen - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); ++ if (keylen > CAAM_MAX_KEY_SIZE) ++ goto badkey; + +- /* assoclen + cryptlen = seqinlen - ivsize */ +- append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); ++ /* Pick class 2 key length from algorithm submask */ ++ ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> ++ OP_ALG_ALGSEL_SHIFT] * 2; ++ ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); + +- /* assoclen = (assoclen + cryptlen) - cryptlen */ +- append_math_sub(desc, REG1, REG2, REG3, CAAM_CMD_SZ); ++#ifdef DEBUG ++ printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n", ++ keylen, enckeylen, authkeylen); ++ printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", ++ ctx->split_key_len, ctx->split_key_pad_len); ++ print_hex_dump(KERN_ERR, "key in @"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); ++#endif + +- /* if cryptlen is ZERO jump to zero-payload commands */ +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); +- zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | +- JUMP_COND_MATH_Z); +- /* read IV */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); +- +- /* if assoclen is ZERO, skip reading the assoc data */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); +- zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | +- JUMP_COND_MATH_Z); +- +- /* read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); +- set_jump_tgt_here(desc, zero_assoc_jump_cmd1); ++ ret = gen_split_aead_key(ctx, key, authkeylen); ++ if (ret) { ++ goto badkey; ++ } + +- append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); ++ /* postpend encryption key to auth split key */ ++ memcpy(ctx->key + ctx->split_key_pad_len, key + authkeylen, enckeylen); + +- /* write encrypted data */ +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); ++ ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len + ++ enckeylen, DMA_TO_DEVICE); ++ if (dma_mapping_error(jrdev, ctx->key_dma)) { ++ dev_err(jrdev, "unable to map key i/o memory\n"); ++ return -ENOMEM; ++ } ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, ++ ctx->split_key_pad_len + enckeylen, 1); ++#endif ++ dma_sync_single_for_device(jrdev, ctx->key_dma, ++ ctx->split_key_pad_len + enckeylen, ++ DMA_TO_DEVICE); + +- /* read payload data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); +- +- /* jump the zero-payload commands */ +- append_jump(desc, JUMP_TEST_ALL | 7); +- +- /* zero-payload commands */ +- set_jump_tgt_here(desc, zero_payload_jump_cmd); +- +- /* if assoclen is ZERO, jump to IV reading - is the only input data */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); +- zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | +- JUMP_COND_MATH_Z); +- /* read IV */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); +- +- /* read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1); +- +- /* jump to ICV writing */ +- append_jump(desc, JUMP_TEST_ALL | 2); +- +- /* read IV - is the only input data */ +- set_jump_tgt_here(desc, zero_assoc_jump_cmd2); +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | +- FIFOLD_TYPE_LAST1); +- +- /* write ICV */ +- append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT); +- +- ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "gcm enc shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- keys_fit_inline = false; +- if (DESC_GCM_DEC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- desc = ctx->sh_desc_dec; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* skip key loading if they are loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | +- JUMP_TEST_ALL | JUMP_COND_SHRD | +- JUMP_COND_SELF); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); +- +- /* assoclen + cryptlen = seqinlen - ivsize - icvsize */ +- append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, +- ctx->authsize + tfm->ivsize); +- +- /* assoclen = (assoclen + cryptlen) - cryptlen */ +- append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); +- append_math_sub(desc, REG1, REG3, REG2, CAAM_CMD_SZ); +- +- /* read IV */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); +- +- /* jump to zero-payload command if cryptlen is zero */ +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); +- zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | +- JUMP_COND_MATH_Z); +- +- append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); +- /* if asoclen is ZERO, skip reading assoc data */ +- zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | +- JUMP_COND_MATH_Z); +- /* read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); +- set_jump_tgt_here(desc, zero_assoc_jump_cmd1); +- +- append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); +- +- /* store encrypted data */ +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); +- +- /* read payload data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); +- +- /* jump the zero-payload commands */ +- append_jump(desc, JUMP_TEST_ALL | 4); +- +- /* zero-payload command */ +- set_jump_tgt_here(desc, zero_payload_jump_cmd); +- +- /* if assoclen is ZERO, jump to ICV reading */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); +- zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | +- JUMP_COND_MATH_Z); +- /* read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); +- set_jump_tgt_here(desc, zero_assoc_jump_cmd2); +- +- /* read ICV */ +- append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); +- +- ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "gcm dec shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- return 0; +-} +- +-static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) +-{ +- struct caam_ctx *ctx = crypto_aead_ctx(authenc); +- +- ctx->authsize = authsize; +- gcm_set_sh_desc(authenc); +- +- return 0; +-} +- +-static int rfc4106_set_sh_desc(struct crypto_aead *aead) +-{ +- struct aead_tfm *tfm = &aead->base.crt_aead; +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- bool keys_fit_inline = false; +- u32 *key_jump_cmd, *move_cmd, *write_iv_cmd; +- u32 *desc; +- u32 geniv; +- +- if (!ctx->enckeylen || !ctx->authsize) +- return 0; +- +- /* +- * RFC4106 encrypt shared descriptor +- * Job Descriptor and Shared Descriptor +- * must fit into the 64-word Descriptor h/w Buffer +- */ +- if (DESC_RFC4106_ENC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- desc = ctx->sh_desc_enc; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* Skip key loading if it is loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +- +- /* cryptlen = seqoutlen - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); +- +- /* assoclen + cryptlen = seqinlen - ivsize */ +- append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); +- +- /* assoclen = (assoclen + cryptlen) - cryptlen */ +- append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); +- +- /* Read Salt */ +- append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen), +- 4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV); +- /* Read AES-GCM-ESP IV */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); +- +- /* Read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); +- +- /* Will read cryptlen bytes */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); +- +- /* Write encrypted data */ +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); +- +- /* Read payload data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); +- +- /* Write ICV */ +- append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT); +- +- ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "rfc4106 enc shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- keys_fit_inline = false; +- if (DESC_RFC4106_DEC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- desc = ctx->sh_desc_dec; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* Skip key loading if it is loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | +- JUMP_TEST_ALL | JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); +- +- /* assoclen + cryptlen = seqinlen - ivsize - icvsize */ +- append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, +- ctx->authsize + tfm->ivsize); +- +- /* assoclen = (assoclen + cryptlen) - cryptlen */ +- append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); +- append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); +- +- /* Will write cryptlen bytes */ +- append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); +- +- /* Read Salt */ +- append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen), +- 4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV); +- /* Read AES-GCM-ESP IV */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); +- +- /* Read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); +- +- /* Will read cryptlen bytes */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); +- +- /* Store payload data */ +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); +- +- /* Read encrypted data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); +- +- /* Read ICV */ +- append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); +- +- ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "rfc4106 dec shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- keys_fit_inline = false; +- if (DESC_RFC4106_GIVENC_LEN + DESC_JOB_IO_LEN + +- ctx->split_key_pad_len + ctx->enckeylen <= +- CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- /* rfc4106_givencrypt shared descriptor */ +- desc = ctx->sh_desc_givenc; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* Skip key loading if it is loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Generate IV */ +- geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | +- NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | +- NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); +- append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | +- LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- move_cmd = append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); +- +- /* Copy generated IV to OFIFO */ +- write_iv_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_OUTFIFO | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* Class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +- +- /* ivsize + cryptlen = seqoutlen - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); +- +- /* assoclen = seqinlen - (ivsize + cryptlen) */ +- append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); +- +- /* Will write ivsize + cryptlen */ +- append_math_add(desc, VARSEQOUTLEN, REG3, REG0, CAAM_CMD_SZ); +- +- /* Read Salt and generated IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | +- FIFOLD_TYPE_FLUSH1 | IMMEDIATE | 12); +- /* Append Salt */ +- append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); +- set_move_tgt_here(desc, move_cmd); +- set_move_tgt_here(desc, write_iv_cmd); +- /* Blank commands. Will be overwritten by generated IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* No need to reload iv */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP); +- +- /* Read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); +- +- /* Will read cryptlen */ +- append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); +- +- /* Store generated IV and encrypted data */ +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); +- +- /* Read payload data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); +- +- /* Write ICV */ +- append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT); +- +- ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "rfc4106 givenc shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- return 0; +-} +- +-static int rfc4106_setauthsize(struct crypto_aead *authenc, +- unsigned int authsize) +-{ +- struct caam_ctx *ctx = crypto_aead_ctx(authenc); +- +- ctx->authsize = authsize; +- rfc4106_set_sh_desc(authenc); +- +- return 0; +-} +- +-static int rfc4543_set_sh_desc(struct crypto_aead *aead) +-{ +- struct aead_tfm *tfm = &aead->base.crt_aead; +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- bool keys_fit_inline = false; +- u32 *key_jump_cmd, *write_iv_cmd, *write_aad_cmd; +- u32 *read_move_cmd, *write_move_cmd; +- u32 *desc; +- u32 geniv; +- +- if (!ctx->enckeylen || !ctx->authsize) +- return 0; +- +- /* +- * RFC4543 encrypt shared descriptor +- * Job Descriptor and Shared Descriptor +- * must fit into the 64-word Descriptor h/w Buffer +- */ +- if (DESC_RFC4543_ENC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- desc = ctx->sh_desc_enc; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* Skip key loading if it is loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +- +- /* Load AES-GMAC ESP IV into Math1 register */ +- append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 | +- LDST_CLASS_DECO | tfm->ivsize); +- +- /* Wait the DMA transaction to finish */ +- append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM | +- (1 << JUMP_OFFSET_SHIFT)); +- +- /* Overwrite blank immediate AES-GMAC ESP IV data */ +- write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* Overwrite blank immediate AAD data */ +- write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* cryptlen = seqoutlen - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); +- +- /* assoclen = (seqinlen - ivsize) - cryptlen */ +- append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); +- +- /* Read Salt and AES-GMAC ESP IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); +- /* Append Salt */ +- append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); +- set_move_tgt_here(desc, write_iv_cmd); +- /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* Read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD); +- +- /* Will read cryptlen bytes */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); +- +- /* Will write cryptlen bytes */ +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); +- +- /* +- * MOVE_LEN opcode is not available in all SEC HW revisions, +- * thus need to do some magic, i.e. self-patch the descriptor +- * buffer. +- */ +- read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | +- (0x6 << MOVE_LEN_SHIFT)); +- write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | +- (0x8 << MOVE_LEN_SHIFT)); +- +- /* Authenticate AES-GMAC ESP IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | +- FIFOLD_TYPE_AAD | tfm->ivsize); +- set_move_tgt_here(desc, write_aad_cmd); +- /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* Read and write cryptlen bytes */ +- aead_append_src_dst(desc, FIFOLD_TYPE_AAD); +- +- set_move_tgt_here(desc, read_move_cmd); +- set_move_tgt_here(desc, write_move_cmd); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- /* Move payload data to OFIFO */ +- append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); +- +- /* Write ICV */ +- append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT); +- +- ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "rfc4543 enc shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- keys_fit_inline = false; +- if (DESC_RFC4543_DEC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- desc = ctx->sh_desc_dec; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* Skip key loading if it is loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | +- JUMP_TEST_ALL | JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); +- +- /* Load AES-GMAC ESP IV into Math1 register */ +- append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 | +- LDST_CLASS_DECO | tfm->ivsize); +- +- /* Wait the DMA transaction to finish */ +- append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM | +- (1 << JUMP_OFFSET_SHIFT)); +- +- /* assoclen + cryptlen = (seqinlen - ivsize) - icvsize */ +- append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, ctx->authsize); +- +- /* Overwrite blank immediate AES-GMAC ESP IV data */ +- write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* Overwrite blank immediate AAD data */ +- write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* assoclen = (assoclen + cryptlen) - cryptlen */ +- append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); +- append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); +- +- /* +- * MOVE_LEN opcode is not available in all SEC HW revisions, +- * thus need to do some magic, i.e. self-patch the descriptor +- * buffer. +- */ +- read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | +- (0x6 << MOVE_LEN_SHIFT)); +- write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | +- (0x8 << MOVE_LEN_SHIFT)); +- +- /* Read Salt and AES-GMAC ESP IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); +- /* Append Salt */ +- append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); +- set_move_tgt_here(desc, write_iv_cmd); +- /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* Read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD); +- +- /* Will read cryptlen bytes */ +- append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); +- +- /* Will write cryptlen bytes */ +- append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); +- +- /* Authenticate AES-GMAC ESP IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | +- FIFOLD_TYPE_AAD | tfm->ivsize); +- set_move_tgt_here(desc, write_aad_cmd); +- /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* Store payload data */ +- append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); +- +- /* In-snoop cryptlen data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1); +- +- set_move_tgt_here(desc, read_move_cmd); +- set_move_tgt_here(desc, write_move_cmd); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- /* Move payload data to OFIFO */ +- append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); +- append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); +- +- /* Read ICV */ +- append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | +- FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); +- +- ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "rfc4543 dec shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- /* +- * Job Descriptor and Shared Descriptors +- * must all fit into the 64-word Descriptor h/w Buffer +- */ +- keys_fit_inline = false; +- if (DESC_RFC4543_GIVENC_LEN + DESC_JOB_IO_LEN + +- ctx->enckeylen <= CAAM_DESC_BYTES_MAX) +- keys_fit_inline = true; +- +- /* rfc4543_givencrypt shared descriptor */ +- desc = ctx->sh_desc_givenc; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL); +- +- /* Skip key loading if it is loaded due to sharing */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- if (keys_fit_inline) +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); +- else +- append_key(desc, ctx->key_dma, ctx->enckeylen, +- CLASS_1 | KEY_DEST_CLASS_REG); +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Generate IV */ +- geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | +- NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | +- NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); +- append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | +- LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- /* Move generated IV to Math1 register */ +- append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_MATH1 | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); +- +- /* Overwrite blank immediate AES-GMAC IV data */ +- write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* Overwrite blank immediate AAD data */ +- write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* Copy generated IV to OFIFO */ +- append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_OUTFIFO | +- (tfm->ivsize << MOVE_LEN_SHIFT)); +- +- /* Class 1 operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +- +- /* ivsize + cryptlen = seqoutlen - authsize */ +- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); +- +- /* assoclen = seqinlen - (ivsize + cryptlen) */ +- append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); +- +- /* Will write ivsize + cryptlen */ +- append_math_add(desc, VARSEQOUTLEN, REG3, REG0, CAAM_CMD_SZ); +- +- /* +- * MOVE_LEN opcode is not available in all SEC HW revisions, +- * thus need to do some magic, i.e. self-patch the descriptor +- * buffer. +- */ +- read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | +- (0x6 << MOVE_LEN_SHIFT)); +- write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | +- (0x8 << MOVE_LEN_SHIFT)); +- +- /* Read Salt and AES-GMAC generated IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | +- FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); +- /* Append Salt */ +- append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); +- set_move_tgt_here(desc, write_iv_cmd); +- /* Blank commands. Will be overwritten by AES-GMAC generated IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* No need to reload iv */ +- append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP); +- +- /* Read assoc data */ +- append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | +- FIFOLD_TYPE_AAD); +- +- /* Will read cryptlen */ +- append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); +- +- /* Authenticate AES-GMAC IV */ +- append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | +- FIFOLD_TYPE_AAD | tfm->ivsize); +- set_move_tgt_here(desc, write_aad_cmd); +- /* Blank commands. Will be overwritten by AES-GMAC IV. */ +- append_cmd(desc, 0x00000000); +- append_cmd(desc, 0x00000000); +- /* End of blank commands */ +- +- /* Read and write cryptlen bytes */ +- aead_append_src_dst(desc, FIFOLD_TYPE_AAD); +- +- set_move_tgt_here(desc, read_move_cmd); +- set_move_tgt_here(desc, write_move_cmd); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- /* Move payload data to OFIFO */ +- append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); +- +- /* Write ICV */ +- append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT); +- +- ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "rfc4543 givenc shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- +- return 0; +-} +- +-static int rfc4543_setauthsize(struct crypto_aead *authenc, +- unsigned int authsize) +-{ +- struct caam_ctx *ctx = crypto_aead_ctx(authenc); +- +- ctx->authsize = authsize; +- rfc4543_set_sh_desc(authenc); +- +- return 0; +-} +- +-static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in, +- u32 authkeylen) +-{ +- return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, +- ctx->split_key_pad_len, key_in, authkeylen, +- ctx->alg_op); +-} +- +-static int aead_setkey(struct crypto_aead *aead, +- const u8 *key, unsigned int keylen) +-{ +- /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */ +- static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- struct crypto_authenc_keys keys; +- int ret = 0; +- +- if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) +- goto badkey; +- +- /* Pick class 2 key length from algorithm submask */ +- ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> +- OP_ALG_ALGSEL_SHIFT] * 2; +- ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); +- +- if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE) +- goto badkey; +- +-#ifdef DEBUG +- printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n", +- keys.authkeylen + keys.enckeylen, keys.enckeylen, +- keys.authkeylen); +- printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", +- ctx->split_key_len, ctx->split_key_pad_len); +- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); +-#endif +- +- ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen); +- if (ret) { +- goto badkey; +- } +- +- /* postpend encryption key to auth split key */ +- memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen); +- +- ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len + +- keys.enckeylen, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->key_dma)) { +- dev_err(jrdev, "unable to map key i/o memory\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, +- ctx->split_key_pad_len + keys.enckeylen, 1); +-#endif +- +- ctx->enckeylen = keys.enckeylen; ++ ctx->enckeylen = enckeylen; + + ret = aead_set_sh_desc(aead); + if (ret) { + dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len + +- keys.enckeylen, DMA_TO_DEVICE); ++ enckeylen, DMA_TO_DEVICE); + } + + return ret; +@@ -1715,154 +548,20 @@ + return -EINVAL; + } + +-static int gcm_setkey(struct crypto_aead *aead, +- const u8 *key, unsigned int keylen) +-{ +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- int ret = 0; +- +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); +-#endif +- +- memcpy(ctx->key, key, keylen); +- ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->key_dma)) { +- dev_err(jrdev, "unable to map key i/o memory\n"); +- return -ENOMEM; +- } +- ctx->enckeylen = keylen; +- +- ret = gcm_set_sh_desc(aead); +- if (ret) { +- dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen, +- DMA_TO_DEVICE); +- } +- +- return ret; +-} +- +-static int rfc4106_setkey(struct crypto_aead *aead, +- const u8 *key, unsigned int keylen) +-{ +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- int ret = 0; +- +- if (keylen < 4) +- return -EINVAL; +- +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); +-#endif +- +- memcpy(ctx->key, key, keylen); +- +- /* +- * The last four bytes of the key material are used as the salt value +- * in the nonce. Update the AES key length. +- */ +- ctx->enckeylen = keylen - 4; +- +- ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen, +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->key_dma)) { +- dev_err(jrdev, "unable to map key i/o memory\n"); +- return -ENOMEM; +- } +- +- ret = rfc4106_set_sh_desc(aead); +- if (ret) { +- dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen, +- DMA_TO_DEVICE); +- } +- +- return ret; +-} +- +-static int rfc4543_setkey(struct crypto_aead *aead, +- const u8 *key, unsigned int keylen) +-{ +- struct caam_ctx *ctx = crypto_aead_ctx(aead); +- struct device *jrdev = ctx->jrdev; +- int ret = 0; +- +- if (keylen < 4) +- return -EINVAL; +- +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); +-#endif +- +- memcpy(ctx->key, key, keylen); +- +- /* +- * The last four bytes of the key material are used as the salt value +- * in the nonce. Update the AES key length. +- */ +- ctx->enckeylen = keylen - 4; +- +- ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen, +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->key_dma)) { +- dev_err(jrdev, "unable to map key i/o memory\n"); +- return -ENOMEM; +- } +- +- ret = rfc4543_set_sh_desc(aead); +- if (ret) { +- dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen, +- DMA_TO_DEVICE); +- } +- +- return ret; +-} +- + static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, + const u8 *key, unsigned int keylen) + { + struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher); +- struct ablkcipher_tfm *crt = &ablkcipher->base.crt_ablkcipher; +- struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablkcipher); +- const char *alg_name = crypto_tfm_alg_name(tfm); ++ struct ablkcipher_tfm *tfm = &ablkcipher->base.crt_ablkcipher; + struct device *jrdev = ctx->jrdev; + int ret = 0; +- u32 *key_jump_cmd; ++ u32 *key_jump_cmd, *jump_cmd; + u32 *desc; +- u32 *nonce; +- u32 geniv; +- u32 ctx1_iv_off = 0; +- const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == +- OP_ALG_AAI_CTR_MOD128); +- const bool is_rfc3686 = (ctr_mode && +- (strstr(alg_name, "rfc3686") != NULL)); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "key in @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); + #endif +- /* +- * AES-CTR needs to load IV in CONTEXT1 reg +- * at an offset of 128bits (16bytes) +- * CONTEXT1[255:128] = IV +- */ +- if (ctr_mode) +- ctx1_iv_off = 16; +- +- /* +- * RFC3686 specific: +- * | CONTEXT1[255:128] = {NONCE, IV, COUNTER} +- * | *key = {KEY, NONCE} +- */ +- if (is_rfc3686) { +- ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE; +- keylen -= CTR_RFC3686_NONCE_SIZE; +- } + + memcpy(ctx->key, key, keylen); + ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, +@@ -1872,10 +571,11 @@ + return -ENOMEM; + } + ctx->enckeylen = keylen; ++ dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE); + + /* ablkcipher_encrypt shared descriptor */ + desc = ctx->sh_desc_enc; +- init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); ++ init_sh_desc(desc, HDR_SHARE_SERIAL); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); +@@ -1885,32 +585,20 @@ + ctx->enckeylen, CLASS_1 | + KEY_DEST_CLASS_REG); + +- /* Load nonce into CONTEXT1 reg */ +- if (is_rfc3686) { +- nonce = (u32 *)(key + keylen); +- append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB | +- LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); +- append_move(desc, MOVE_WAITCOMP | +- MOVE_SRC_OUTFIFO | +- MOVE_DEST_CLASS1CTX | +- (16 << MOVE_OFFSET_SHIFT) | +- (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); +- } +- + set_jump_tgt_here(desc, key_jump_cmd); + +- /* Load iv */ +- append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT | +- LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); +- +- /* Load counter into CONTEXT1 reg */ +- if (is_rfc3686) +- append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | +- LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << +- LDST_OFFSET_SHIFT)); ++ /* Propagate errors from shared to job descriptor */ ++ append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); + ++ /* load IV */ ++ if (strncmp(ablkcipher->base.__crt_alg->cra_name, "ctr(aes)", 8) == 0) { ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | tfm->ivsize | ++ (16 << LDST_OFFSET_SHIFT)); ++ } else { ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | tfm->ivsize); ++ } + /* Load operation */ + append_operation(desc, ctx->class1_alg_type | + OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +@@ -1926,15 +614,17 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ablkcipher enc shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ablkcipher enc shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); ++ + /* ablkcipher_decrypt shared descriptor */ + desc = ctx->sh_desc_dec; + +- init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); ++ init_sh_desc(desc, HDR_SHARE_SERIAL); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); +@@ -1944,133 +634,49 @@ + ctx->enckeylen, CLASS_1 | + KEY_DEST_CLASS_REG); + +- /* Load nonce into CONTEXT1 reg */ +- if (is_rfc3686) { +- nonce = (u32 *)(key + keylen); +- append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB | +- LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); +- append_move(desc, MOVE_WAITCOMP | +- MOVE_SRC_OUTFIFO | +- MOVE_DEST_CLASS1CTX | +- (16 << MOVE_OFFSET_SHIFT) | +- (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); +- } +- ++ /* For aead, only propagate error immediately if shared */ ++ jump_cmd = append_jump(desc, JUMP_TEST_ALL); + set_jump_tgt_here(desc, key_jump_cmd); ++ append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); ++ set_jump_tgt_here(desc, jump_cmd); + + /* load IV */ +- append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT | +- LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); +- +- /* Load counter into CONTEXT1 reg */ +- if (is_rfc3686) +- append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | +- LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << +- LDST_OFFSET_SHIFT)); ++ if (strncmp(ablkcipher->base.__crt_alg->cra_name, "ctr(aes)", 8) == 0) { ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | tfm->ivsize | ++ (16 << LDST_OFFSET_SHIFT)); + +- /* Choose operation */ +- if (ctr_mode) + append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT); +- else ++ OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT); ++ } else { ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | tfm->ivsize); ++ ++ /* Choose operation */ + append_dec_op1(desc, ctx->class1_alg_type); ++ } + + /* Perform operation */ + ablkcipher_append_src_dst(desc); + ++ /* Wait for key to load before allowing propagating error */ ++ append_dec_shr_done(desc); ++ + ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, + desc_bytes(desc), + DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { ++ if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ablkcipher dec shdesc@"__stringify(__LINE__)": ", +- DUMP_PREFIX_ADDRESS, 16, 4, desc, +- desc_bytes(desc), 1); +-#endif +- /* ablkcipher_givencrypt shared descriptor */ +- desc = ctx->sh_desc_givenc; +- +- init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); +- /* Skip if already shared */ +- key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | +- JUMP_COND_SHRD); +- +- /* Load class1 key only */ +- append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, +- ctx->enckeylen, CLASS_1 | +- KEY_DEST_CLASS_REG); +- +- /* Load Nonce into CONTEXT1 reg */ +- if (is_rfc3686) { +- nonce = (u32 *)(key + keylen); +- append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB | +- LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); +- append_move(desc, MOVE_WAITCOMP | +- MOVE_SRC_OUTFIFO | +- MOVE_DEST_CLASS1CTX | +- (16 << MOVE_OFFSET_SHIFT) | +- (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); +- } +- set_jump_tgt_here(desc, key_jump_cmd); +- +- /* Generate IV */ +- geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | +- NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | +- NFIFOENTRY_PTYPE_RND | (crt->ivsize << NFIFOENTRY_DLEN_SHIFT); +- append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | +- LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); +- append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); +- append_move(desc, MOVE_WAITCOMP | +- MOVE_SRC_INFIFO | +- MOVE_DEST_CLASS1CTX | +- (crt->ivsize << MOVE_LEN_SHIFT) | +- (ctx1_iv_off << MOVE_OFFSET_SHIFT)); +- append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); +- +- /* Copy generated IV to memory */ +- append_seq_store(desc, crt->ivsize, +- LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | +- (ctx1_iv_off << LDST_OFFSET_SHIFT)); +- +- /* Load Counter into CONTEXT1 reg */ +- if (is_rfc3686) +- append_load_imm_u32(desc, (u32)1, LDST_IMM | +- LDST_CLASS_1_CCB | +- LDST_SRCDST_BYTE_CONTEXT | +- ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << +- LDST_OFFSET_SHIFT)); +- +- if (ctx1_iv_off) +- append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP | +- (1 << JUMP_OFFSET_SHIFT)); +- +- /* Load operation */ +- append_operation(desc, ctx->class1_alg_type | +- OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); +- +- /* Perform operation */ +- ablkcipher_append_src_dst(desc); +- +- ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, +- desc_bytes(desc), +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ", ++ print_hex_dump(KERN_ERR, "ablkcipher dec shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); + + return ret; + } +@@ -2195,19 +801,22 @@ + edesc = (struct aead_edesc *)((char *)desc - + offsetof(struct aead_edesc, hw_desc)); + +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + aead_unmap(jrdev, edesc, req); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "assoc @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "assoc @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->assoc), + req->assoclen , 1); +- print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dstiv @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src) - ivsize, + edesc->src_nents ? 100 : ivsize, 1); +- print_hex_dump(KERN_ERR, "dst @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dst @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + edesc->src_nents ? 100 : req->cryptlen + + ctx->authsize + 4, 1); +@@ -2235,16 +844,19 @@ + offsetof(struct aead_edesc, hw_desc)); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dstiv @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->iv, + ivsize, 1); +- print_hex_dump(KERN_ERR, "dst @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dst @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->dst), +- req->cryptlen - ctx->authsize, 1); ++ req->cryptlen, 1); + #endif + +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + aead_unmap(jrdev, edesc, req); + +@@ -2255,7 +867,7 @@ + err = -EBADMSG; + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "iphdrout@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "iphdrout@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + ((char *)sg_virt(req->assoc) - sizeof(struct iphdr)), + sizeof(struct iphdr) + req->assoclen + +@@ -2263,7 +875,7 @@ + ctx->authsize + 36, 1); + if (!err && edesc->sec4_sg_bytes) { + struct scatterlist *sg = sg_last(req->src, edesc->src_nents); +- print_hex_dump(KERN_ERR, "sglastout@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "sglastout@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(sg), + sg->length + ctx->authsize + 16, 1); + } +@@ -2289,14 +901,17 @@ + edesc = (struct ablkcipher_edesc *)((char *)desc - + offsetof(struct ablkcipher_edesc, hw_desc)); + +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dstiv @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->info, + edesc->src_nents > 1 ? 100 : ivsize, 1); +- print_hex_dump(KERN_ERR, "dst @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dst @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); + #endif +@@ -2321,14 +936,17 @@ + + edesc = (struct ablkcipher_edesc *)((char *)desc - + offsetof(struct ablkcipher_edesc, hw_desc)); +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dstiv @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->info, + ivsize, 1); +- print_hex_dump(KERN_ERR, "dst @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dst @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); + #endif +@@ -2355,38 +973,29 @@ + u32 out_options = 0, in_options; + dma_addr_t dst_dma, src_dma; + int len, sec4_sg_index = 0; +- bool is_gcm = false; + + #ifdef DEBUG + debug("assoclen %d cryptlen %d authsize %d\n", + req->assoclen, req->cryptlen, authsize); +- print_hex_dump(KERN_ERR, "assoc @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "assoc @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->assoc), + req->assoclen , 1); +- print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->iv, + edesc->src_nents ? 100 : ivsize, 1); +- print_hex_dump(KERN_ERR, "src @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "src @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + edesc->src_nents ? 100 : req->cryptlen, 1); +- print_hex_dump(KERN_ERR, "shrdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "shrdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sh_desc, + desc_bytes(sh_desc), 1); + #endif + +- if (((ctx->class1_alg_type & OP_ALG_ALGSEL_MASK) == +- OP_ALG_ALGSEL_AES) && +- ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == OP_ALG_AAI_GCM)) +- is_gcm = true; +- + len = desc_len(sh_desc); + init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); + + if (all_contig) { +- if (is_gcm) +- src_dma = edesc->iv_dma; +- else +- src_dma = sg_dma_address(req->assoc); ++ src_dma = sg_dma_address(req->assoc); + in_options = 0; + } else { + src_dma = edesc->sec4_sg_dma; +@@ -2394,9 +1003,12 @@ + (edesc->src_nents ? : 1); + in_options = LDST_SGF; + } +- +- append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize + req->cryptlen, +- in_options); ++ if (encrypt) ++ append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize + ++ req->cryptlen - authsize, in_options); ++ else ++ append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize + ++ req->cryptlen, in_options); + + if (likely(req->src == req->dst)) { + if (all_contig) { +@@ -2417,8 +1029,7 @@ + } + } + if (encrypt) +- append_seq_out_ptr(desc, dst_dma, req->cryptlen + authsize, +- out_options); ++ append_seq_out_ptr(desc, dst_dma, req->cryptlen, out_options); + else + append_seq_out_ptr(desc, dst_dma, req->cryptlen - authsize, + out_options); +@@ -2440,53 +1051,43 @@ + u32 out_options = 0, in_options; + dma_addr_t dst_dma, src_dma; + int len, sec4_sg_index = 0; +- bool is_gcm = false; + + #ifdef DEBUG + debug("assoclen %d cryptlen %d authsize %d\n", + req->assoclen, req->cryptlen, authsize); +- print_hex_dump(KERN_ERR, "assoc @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "assoc @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->assoc), + req->assoclen , 1); +- print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ivsize, 1); +- print_hex_dump(KERN_ERR, "src @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "src @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + edesc->src_nents > 1 ? 100 : req->cryptlen, 1); +- print_hex_dump(KERN_ERR, "shrdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "shrdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sh_desc, + desc_bytes(sh_desc), 1); + #endif + +- if (((ctx->class1_alg_type & OP_ALG_ALGSEL_MASK) == +- OP_ALG_ALGSEL_AES) && +- ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == OP_ALG_AAI_GCM)) +- is_gcm = true; +- + len = desc_len(sh_desc); + init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); + + if (contig & GIV_SRC_CONTIG) { +- if (is_gcm) +- src_dma = edesc->iv_dma; +- else +- src_dma = sg_dma_address(req->assoc); ++ src_dma = sg_dma_address(req->assoc); + in_options = 0; + } else { + src_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->assoc_nents + 1 + edesc->src_nents; + in_options = LDST_SGF; + } +- append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize + req->cryptlen, +- in_options); ++ append_seq_in_ptr(desc, src_dma, req->assoclen + ivsize + ++ req->cryptlen - authsize, in_options); + + if (contig & GIV_DST_CONTIG) { + dst_dma = edesc->iv_dma; + } else { + if (likely(req->src == req->dst)) { + dst_dma = src_dma + sizeof(struct sec4_sg_entry) * +- (edesc->assoc_nents + +- (is_gcm ? 1 + edesc->src_nents : 0)); ++ edesc->assoc_nents; + out_options = LDST_SGF; + } else { + dst_dma = edesc->sec4_sg_dma + +@@ -2496,8 +1097,7 @@ + } + } + +- append_seq_out_ptr(desc, dst_dma, ivsize + req->cryptlen + authsize, +- out_options); ++ append_seq_out_ptr(desc, dst_dma, ivsize + req->cryptlen, out_options); + } + + /* +@@ -2516,10 +1116,10 @@ + int len, sec4_sg_index = 0; + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->info, + ivsize, 1); +- print_hex_dump(KERN_ERR, "src @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "src @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + edesc->src_nents ? 100 : req->nbytes, 1); + #endif +@@ -2532,7 +1132,7 @@ + in_options = 0; + } else { + src_dma = edesc->sec4_sg_dma; +- sec4_sg_index += edesc->src_nents + 1; ++ sec4_sg_index += (iv_contig ? 0 : 1) + edesc->src_nents; + in_options = LDST_SGF; + } + append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options); +@@ -2558,59 +1158,10 @@ + } + + /* +- * Fill in ablkcipher givencrypt job descriptor +- */ +-static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr, +- struct ablkcipher_edesc *edesc, +- struct ablkcipher_request *req, +- bool iv_contig) +-{ +- struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); +- int ivsize = crypto_ablkcipher_ivsize(ablkcipher); +- u32 *desc = edesc->hw_desc; +- u32 out_options, in_options; +- dma_addr_t dst_dma, src_dma; +- int len, sec4_sg_index = 0; +- +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ", +- DUMP_PREFIX_ADDRESS, 16, 4, req->info, +- ivsize, 1); +- print_hex_dump(KERN_ERR, "src @" __stringify(__LINE__) ": ", +- DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), +- edesc->src_nents ? 100 : req->nbytes, 1); +-#endif +- +- len = desc_len(sh_desc); +- init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); +- +- if (!edesc->src_nents) { +- src_dma = sg_dma_address(req->src); +- in_options = 0; +- } else { +- src_dma = edesc->sec4_sg_dma; +- sec4_sg_index += edesc->src_nents; +- in_options = LDST_SGF; +- } +- append_seq_in_ptr(desc, src_dma, req->nbytes, in_options); +- +- if (iv_contig) { +- dst_dma = edesc->iv_dma; +- out_options = 0; +- } else { +- dst_dma = edesc->sec4_sg_dma + +- sec4_sg_index * sizeof(struct sec4_sg_entry); +- out_options = LDST_SGF; +- } +- append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, out_options); +-} +- +-/* + * allocate and map the aead extended descriptor + */ + static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, +- int desc_bytes, bool *all_contig_ptr, +- bool encrypt) ++ int desc_bytes, bool *all_contig_ptr) + { + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); +@@ -2625,26 +1176,15 @@ + bool assoc_chained = false, src_chained = false, dst_chained = false; + int ivsize = crypto_aead_ivsize(aead); + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; +- unsigned int authsize = ctx->authsize; +- bool is_gcm = false; + + assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained); ++ src_nents = sg_count(req->src, req->cryptlen, &src_chained); + +- if (unlikely(req->dst != req->src)) { +- src_nents = sg_count(req->src, req->cryptlen, &src_chained); +- dst_nents = sg_count(req->dst, +- req->cryptlen + +- (encrypt ? authsize : (-authsize)), +- &dst_chained); +- } else { +- src_nents = sg_count(req->src, +- req->cryptlen + +- (encrypt ? authsize : 0), +- &src_chained); +- } ++ if (unlikely(req->dst != req->src)) ++ dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained); + + sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1, +- DMA_TO_DEVICE, assoc_chained); ++ DMA_BIDIRECTIONAL, assoc_chained); + if (likely(req->src == req->dst)) { + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); +@@ -2655,43 +1195,23 @@ + DMA_FROM_DEVICE, dst_chained); + } + ++ /* Check if data are contiguous */ + iv_dma = dma_map_single(jrdev, req->iv, ivsize, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, iv_dma)) { +- dev_err(jrdev, "unable to map IV\n"); +- return ERR_PTR(-ENOMEM); +- } +- +- if (((ctx->class1_alg_type & OP_ALG_ALGSEL_MASK) == +- OP_ALG_ALGSEL_AES) && +- ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == OP_ALG_AAI_GCM)) +- is_gcm = true; +- +- /* +- * Check if data are contiguous. +- * GCM expected input sequence: IV, AAD, text +- * All other - expected input sequence: AAD, IV, text +- */ +- if (is_gcm) +- all_contig = (!assoc_nents && +- iv_dma + ivsize == sg_dma_address(req->assoc) && +- !src_nents && sg_dma_address(req->assoc) + +- req->assoclen == sg_dma_address(req->src)); +- else +- all_contig = (!assoc_nents && sg_dma_address(req->assoc) + +- req->assoclen == iv_dma && !src_nents && +- iv_dma + ivsize == sg_dma_address(req->src)); +- if (!all_contig) { ++ if (assoc_nents || sg_dma_address(req->assoc) + req->assoclen != ++ iv_dma || src_nents || iv_dma + ivsize != ++ sg_dma_address(req->src)) { ++ all_contig = false; + assoc_nents = assoc_nents ? : 1; + src_nents = src_nents ? : 1; + sec4_sg_len = assoc_nents + 1 + src_nents; + } +- + sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); ++ dma_sync_single_for_device(jrdev, iv_dma, ivsize, DMA_TO_DEVICE); + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes + ++ edesc = kzalloc(sizeof(struct aead_edesc) + desc_bytes + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -2708,46 +1228,32 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + + desc_bytes; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, DMA_TO_DEVICE); + *all_contig_ptr = all_contig; + + sec4_sg_index = 0; + if (!all_contig) { +- if (!is_gcm) { +- sg_to_sec4_sg(req->assoc, +- assoc_nents, +- edesc->sec4_sg + +- sec4_sg_index, 0); +- sec4_sg_index += assoc_nents; +- } +- ++ sg_to_sec4_sg(req->assoc, ++ (assoc_nents ? : 1), ++ edesc->sec4_sg + ++ sec4_sg_index, 0); ++ sec4_sg_index += assoc_nents ? : 1; + dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, + iv_dma, ivsize, 0); + sec4_sg_index += 1; +- +- if (is_gcm) { +- sg_to_sec4_sg(req->assoc, +- assoc_nents, +- edesc->sec4_sg + +- sec4_sg_index, 0); +- sec4_sg_index += assoc_nents; +- } +- + sg_to_sec4_sg_last(req->src, +- src_nents, ++ (src_nents ? : 1), + edesc->sec4_sg + + sec4_sg_index, 0); +- sec4_sg_index += src_nents; ++ sec4_sg_index += src_nents ? : 1; + } + if (dst_nents) { + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + } +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return ERR_PTR(-ENOMEM); +- } ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, sec4_sg_bytes, ++ DMA_TO_DEVICE); + + return edesc; + } +@@ -2762,9 +1268,11 @@ + u32 *desc; + int ret = 0; + ++ req->cryptlen += ctx->authsize; ++ + /* allocate extended descriptor */ + edesc = aead_edesc_alloc(req, DESC_JOB_IO_LEN * +- CAAM_CMD_SZ, &all_contig, true); ++ CAAM_CMD_SZ, &all_contig); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + +@@ -2772,7 +1280,7 @@ + init_aead_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req, + all_contig, true); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "aead jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + #endif +@@ -2801,12 +1309,12 @@ + + /* allocate extended descriptor */ + edesc = aead_edesc_alloc(req, DESC_JOB_IO_LEN * +- CAAM_CMD_SZ, &all_contig, false); ++ CAAM_CMD_SZ, &all_contig); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "dec src@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "dec src@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + req->cryptlen, 1); + #endif +@@ -2815,7 +1323,7 @@ + init_aead_job(ctx->sh_desc_dec, + ctx->sh_desc_dec_dma, edesc, req, all_contig, false); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "aead jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + #endif +@@ -2853,17 +1361,15 @@ + int ivsize = crypto_aead_ivsize(aead); + bool assoc_chained = false, src_chained = false, dst_chained = false; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; +- bool is_gcm = false; + + assoc_nents = sg_count(req->assoc, req->assoclen, &assoc_chained); + src_nents = sg_count(req->src, req->cryptlen, &src_chained); + + if (unlikely(req->dst != req->src)) +- dst_nents = sg_count(req->dst, req->cryptlen + ctx->authsize, +- &dst_chained); ++ dst_nents = sg_count(req->dst, req->cryptlen, &dst_chained); + + sgc = dma_map_sg_chained(jrdev, req->assoc, assoc_nents ? : 1, +- DMA_TO_DEVICE, assoc_chained); ++ DMA_BIDIRECTIONAL, assoc_chained); + if (likely(req->src == req->dst)) { + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); +@@ -2874,64 +1380,32 @@ + DMA_FROM_DEVICE, dst_chained); + } + ++ /* Check if data are contiguous */ + iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, iv_dma)) { +- dev_err(jrdev, "unable to map IV\n"); +- return ERR_PTR(-ENOMEM); +- } +- +- if (((ctx->class1_alg_type & OP_ALG_ALGSEL_MASK) == +- OP_ALG_ALGSEL_AES) && +- ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == OP_ALG_AAI_GCM)) +- is_gcm = true; +- +- /* +- * Check if data are contiguous. +- * GCM expected input sequence: IV, AAD, text +- * All other - expected input sequence: AAD, IV, text +- */ +- +- if (is_gcm) { +- if (assoc_nents || iv_dma + ivsize != +- sg_dma_address(req->assoc) || src_nents || +- sg_dma_address(req->assoc) + req->assoclen != +- sg_dma_address(req->src)) +- contig &= ~GIV_SRC_CONTIG; +- } else { +- if (assoc_nents || +- sg_dma_address(req->assoc) + req->assoclen != iv_dma || +- src_nents || iv_dma + ivsize != sg_dma_address(req->src)) +- contig &= ~GIV_SRC_CONTIG; +- } +- ++ if (assoc_nents || sg_dma_address(req->assoc) + req->assoclen != ++ iv_dma || src_nents || iv_dma + ivsize != sg_dma_address(req->src)) ++ contig &= ~GIV_SRC_CONTIG; + if (dst_nents || iv_dma + ivsize != sg_dma_address(req->dst)) + contig &= ~GIV_DST_CONTIG; +- ++ if (unlikely(req->src != req->dst)) { ++ dst_nents = dst_nents ? : 1; ++ sec4_sg_len += 1; ++ } + if (!(contig & GIV_SRC_CONTIG)) { + assoc_nents = assoc_nents ? : 1; + src_nents = src_nents ? : 1; + sec4_sg_len += assoc_nents + 1 + src_nents; +- if (req->src == req->dst && +- (src_nents || iv_dma + ivsize != sg_dma_address(req->src))) +- contig &= ~GIV_DST_CONTIG; +- } +- +- /* +- * Add new sg entries for GCM output sequence. +- * Expected output sequence: IV, encrypted text. +- */ +- if (is_gcm && req->src == req->dst && !(contig & GIV_DST_CONTIG)) +- sec4_sg_len += 1 + src_nents; +- +- if (unlikely(req->src != req->dst)) { +- dst_nents = dst_nents ? : 1; +- sec4_sg_len += 1 + dst_nents; ++ if (likely(req->src == req->dst)) ++ contig &= ~GIV_DST_CONTIG; + } ++ sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); + ++ dma_sync_single_for_device(jrdev, iv_dma, ivsize, DMA_TO_DEVICE); ++ + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes + ++ edesc = kzalloc(sizeof(struct aead_edesc) + desc_bytes + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -2948,40 +1422,24 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + + desc_bytes; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, DMA_TO_DEVICE); + *contig_ptr = contig; + + sec4_sg_index = 0; + if (!(contig & GIV_SRC_CONTIG)) { +- if (!is_gcm) { +- sg_to_sec4_sg(req->assoc, assoc_nents, +- edesc->sec4_sg + sec4_sg_index, 0); +- sec4_sg_index += assoc_nents; +- } +- ++ sg_to_sec4_sg(req->assoc, assoc_nents, ++ edesc->sec4_sg + ++ sec4_sg_index, 0); ++ sec4_sg_index += assoc_nents; + dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, + iv_dma, ivsize, 0); + sec4_sg_index += 1; +- +- if (is_gcm) { +- sg_to_sec4_sg(req->assoc, assoc_nents, +- edesc->sec4_sg + sec4_sg_index, 0); +- sec4_sg_index += assoc_nents; +- } +- + sg_to_sec4_sg_last(req->src, src_nents, + edesc->sec4_sg + + sec4_sg_index, 0); + sec4_sg_index += src_nents; + } +- +- if (is_gcm && req->src == req->dst && !(contig & GIV_DST_CONTIG)) { +- dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, +- iv_dma, ivsize, 0); +- sec4_sg_index += 1; +- sg_to_sec4_sg_last(req->src, src_nents, +- edesc->sec4_sg + sec4_sg_index, 0); +- } +- + if (unlikely(req->src != req->dst && !(contig & GIV_DST_CONTIG))) { + dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, + iv_dma, ivsize, 0); +@@ -2989,12 +1447,8 @@ + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + } +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return ERR_PTR(-ENOMEM); +- } ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, sec4_sg_bytes, ++ DMA_TO_DEVICE); + + return edesc; + } +@@ -3010,6 +1464,8 @@ + u32 *desc; + int ret = 0; + ++ req->cryptlen += ctx->authsize; ++ + /* allocate extended descriptor */ + edesc = aead_giv_edesc_alloc(areq, DESC_JOB_IO_LEN * + CAAM_CMD_SZ, &contig); +@@ -3018,7 +1474,7 @@ + return PTR_ERR(edesc); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "giv src@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "giv src@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src), + req->cryptlen, 1); + #endif +@@ -3027,7 +1483,7 @@ + init_aead_giv_job(ctx->sh_desc_givenc, + ctx->sh_desc_givenc_dma, edesc, req, contig); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "aead jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + #endif +@@ -3044,11 +1500,6 @@ + return ret; + } + +-static int aead_null_givencrypt(struct aead_givcrypt_request *areq) +-{ +- return aead_encrypt(&areq->areq); +-} +- + /* + * allocate and map the ablkcipher extended descriptor for ablkcipher + */ +@@ -3086,16 +1537,12 @@ + DMA_FROM_DEVICE, dst_chained); + } + +- iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, iv_dma)) { +- dev_err(jrdev, "unable to map IV\n"); +- return ERR_PTR(-ENOMEM); +- } +- + /* + * Check if iv can be contiguous with source and destination. + * If so, include it. If not, create scatterlist. + */ ++ iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE); ++ dma_sync_single_for_device(jrdev, iv_dma, ivsize, DMA_TO_DEVICE); + if (!src_nents && iv_dma + ivsize == sg_dma_address(req->src)) + iv_contig = true; + else +@@ -3104,7 +1551,7 @@ + sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct ablkcipher_edesc) + desc_bytes + ++ edesc = kzalloc(sizeof(struct ablkcipher_edesc) + desc_bytes + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -3134,15 +1581,13 @@ + + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return ERR_PTR(-ENOMEM); +- } +- + edesc->iv_dma = iv_dma; + ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, sec4_sg_bytes, ++ DMA_TO_DEVICE); ++ + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ablkcipher sec4_sg@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ablkcipher sec4_sg@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg, + sec4_sg_bytes, 1); + #endif +@@ -3171,7 +1616,7 @@ + init_ablkcipher_job(ctx->sh_desc_enc, + ctx->sh_desc_enc_dma, edesc, req, iv_contig); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + #endif +@@ -3209,7 +1654,7 @@ + ctx->sh_desc_dec_dma, edesc, req, iv_contig); + desc = edesc->hw_desc; + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + #endif +@@ -3225,291 +1670,28 @@ + return ret; + } + +-/* +- * allocate and map the ablkcipher extended descriptor +- * for ablkcipher givencrypt +- */ +-static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( +- struct skcipher_givcrypt_request *greq, +- int desc_bytes, +- bool *iv_contig_out) +-{ +- struct ablkcipher_request *req = &greq->creq; +- struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); +- struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher); +- struct device *jrdev = ctx->jrdev; +- gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | +- CRYPTO_TFM_REQ_MAY_SLEEP)) ? +- GFP_KERNEL : GFP_ATOMIC; +- int src_nents, dst_nents = 0, sec4_sg_bytes; +- struct ablkcipher_edesc *edesc; +- dma_addr_t iv_dma = 0; +- bool iv_contig = false; +- int sgc; +- int ivsize = crypto_ablkcipher_ivsize(ablkcipher); +- bool src_chained = false, dst_chained = false; +- int sec4_sg_index; +- +- src_nents = sg_count(req->src, req->nbytes, &src_chained); +- +- if (unlikely(req->dst != req->src)) +- dst_nents = sg_count(req->dst, req->nbytes, &dst_chained); +- +- if (likely(req->src == req->dst)) { +- sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, +- DMA_BIDIRECTIONAL, src_chained); +- } else { +- sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, +- DMA_TO_DEVICE, src_chained); +- sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1, +- DMA_FROM_DEVICE, dst_chained); +- } +- +- /* +- * Check if iv can be contiguous with source and destination. +- * If so, include it. If not, create scatterlist. +- */ +- iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, iv_dma)) { +- dev_err(jrdev, "unable to map IV\n"); +- return ERR_PTR(-ENOMEM); +- } +- +- if (!dst_nents && iv_dma + ivsize == sg_dma_address(req->dst)) +- iv_contig = true; +- else +- dst_nents = dst_nents ? : 1; +- sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) * +- sizeof(struct sec4_sg_entry); +- +- /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(*edesc) + desc_bytes + +- sec4_sg_bytes, GFP_DMA | flags); +- if (!edesc) { +- dev_err(jrdev, "could not allocate extended descriptor\n"); +- return ERR_PTR(-ENOMEM); +- } +- +- edesc->src_nents = src_nents; +- edesc->src_chained = src_chained; +- edesc->dst_nents = dst_nents; +- edesc->dst_chained = dst_chained; +- edesc->sec4_sg_bytes = sec4_sg_bytes; +- edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + +- desc_bytes; +- +- sec4_sg_index = 0; +- if (src_nents) { +- sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); +- sec4_sg_index += src_nents; +- } +- +- if (!iv_contig) { +- dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, +- iv_dma, ivsize, 0); +- sec4_sg_index += 1; +- sg_to_sec4_sg_last(req->dst, dst_nents, +- edesc->sec4_sg + sec4_sg_index, 0); +- } +- +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return ERR_PTR(-ENOMEM); +- } +- edesc->iv_dma = iv_dma; +- +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ablkcipher sec4_sg@" __stringify(__LINE__) ": ", +- DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg, +- sec4_sg_bytes, 1); +-#endif +- +- *iv_contig_out = iv_contig; +- return edesc; +-} +- +-static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq) +-{ +- struct ablkcipher_request *req = &creq->creq; +- struct ablkcipher_edesc *edesc; +- struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); +- struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher); +- struct device *jrdev = ctx->jrdev; +- bool iv_contig; +- u32 *desc; +- int ret = 0; +- +- /* allocate extended descriptor */ +- edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN * +- CAAM_CMD_SZ, &iv_contig); +- if (IS_ERR(edesc)) +- return PTR_ERR(edesc); +- +- /* Create and submit job descriptor*/ +- init_ablkcipher_giv_job(ctx->sh_desc_givenc, ctx->sh_desc_givenc_dma, +- edesc, req, iv_contig); +-#ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ablkcipher jobdesc@" __stringify(__LINE__) ": ", +- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, +- desc_bytes(edesc->hw_desc), 1); +-#endif +- desc = edesc->hw_desc; +- ret = caam_jr_enqueue(jrdev, desc, ablkcipher_encrypt_done, req); +- +- if (!ret) { +- ret = -EINPROGRESS; +- } else { +- ablkcipher_unmap(jrdev, edesc, req); +- kfree(edesc); +- } +- +- return ret; +-} +- + #define template_aead template_u.aead + #define template_ablkcipher template_u.ablkcipher + struct caam_alg_template { + char name[CRYPTO_MAX_ALG_NAME]; +- char driver_name[CRYPTO_MAX_ALG_NAME]; +- unsigned int blocksize; +- u32 type; +- union { +- struct ablkcipher_alg ablkcipher; +- struct aead_alg aead; +- struct blkcipher_alg blkcipher; +- struct cipher_alg cipher; +- struct compress_alg compress; +- struct rng_alg rng; +- } template_u; +- u32 class1_alg_type; +- u32 class2_alg_type; +- u32 alg_op; +-}; +- +-static struct caam_alg_template driver_algs[] = { +- /* single-pass ipsec_esp descriptor */ +- { +- .name = "authenc(hmac(md5),ecb(cipher_null))", +- .driver_name = "authenc-hmac-md5-ecb-cipher_null-caam", +- .blocksize = NULL_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_null_givencrypt, +- .geniv = "", +- .ivsize = NULL_IV_SIZE, +- .maxauthsize = MD5_DIGEST_SIZE, +- }, +- .class1_alg_type = 0, +- .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha1),ecb(cipher_null))", +- .driver_name = "authenc-hmac-sha1-ecb-cipher_null-caam", +- .blocksize = NULL_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_null_givencrypt, +- .geniv = "", +- .ivsize = NULL_IV_SIZE, +- .maxauthsize = SHA1_DIGEST_SIZE, +- }, +- .class1_alg_type = 0, +- .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha224),ecb(cipher_null))", +- .driver_name = "authenc-hmac-sha224-ecb-cipher_null-caam", +- .blocksize = NULL_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_null_givencrypt, +- .geniv = "", +- .ivsize = NULL_IV_SIZE, +- .maxauthsize = SHA224_DIGEST_SIZE, +- }, +- .class1_alg_type = 0, +- .class2_alg_type = OP_ALG_ALGSEL_SHA224 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha256),ecb(cipher_null))", +- .driver_name = "authenc-hmac-sha256-ecb-cipher_null-caam", +- .blocksize = NULL_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_null_givencrypt, +- .geniv = "", +- .ivsize = NULL_IV_SIZE, +- .maxauthsize = SHA256_DIGEST_SIZE, +- }, +- .class1_alg_type = 0, +- .class2_alg_type = OP_ALG_ALGSEL_SHA256 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha384),ecb(cipher_null))", +- .driver_name = "authenc-hmac-sha384-ecb-cipher_null-caam", +- .blocksize = NULL_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_null_givencrypt, +- .geniv = "", +- .ivsize = NULL_IV_SIZE, +- .maxauthsize = SHA384_DIGEST_SIZE, +- }, +- .class1_alg_type = 0, +- .class2_alg_type = OP_ALG_ALGSEL_SHA384 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha512),ecb(cipher_null))", +- .driver_name = "authenc-hmac-sha512-ecb-cipher_null-caam", +- .blocksize = NULL_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_null_givencrypt, +- .geniv = "", +- .ivsize = NULL_IV_SIZE, +- .maxauthsize = SHA512_DIGEST_SIZE, +- }, +- .class1_alg_type = 0, +- .class2_alg_type = OP_ALG_ALGSEL_SHA512 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, +- }, ++ char driver_name[CRYPTO_MAX_ALG_NAME]; ++ unsigned int blocksize; ++ u32 type; ++ union { ++ struct ablkcipher_alg ablkcipher; ++ struct aead_alg aead; ++ struct blkcipher_alg blkcipher; ++ struct cipher_alg cipher; ++ struct compress_alg compress; ++ struct rng_alg rng; ++ } template_u; ++ u32 class1_alg_type; ++ u32 class2_alg_type; ++ u32 alg_op; ++}; ++ ++static struct caam_alg_template driver_algs[] = { ++ /* single-pass ipsec_esp descriptor */ + { + .name = "authenc(hmac(md5),cbc(aes))", + .driver_name = "authenc-hmac-md5-cbc-aes-caam", +@@ -3865,188 +2047,81 @@ + OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, + }, ++ /* ablkcipher descriptor */ + { +- .name = "authenc(hmac(md5),rfc3686(ctr(aes)))", +- .driver_name = "authenc-hmac-md5-rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = CTR_RFC3686_IV_SIZE, +- .maxauthsize = MD5_DIGEST_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha1),rfc3686(ctr(aes)))", +- .driver_name = "authenc-hmac-sha1-rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = CTR_RFC3686_IV_SIZE, +- .maxauthsize = SHA1_DIGEST_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "authenc(hmac(sha224),rfc3686(ctr(aes)))", +- .driver_name = "authenc-hmac-sha224-rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = CTR_RFC3686_IV_SIZE, +- .maxauthsize = SHA224_DIGEST_SIZE, ++ .name = "ecb(des)", ++ .driver_name = "ecb-des-caam", ++ .blocksize = DES_BLOCK_SIZE, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, ++ .template_ablkcipher = { ++ .setkey = ablkcipher_setkey, ++ .encrypt = ablkcipher_encrypt, ++ .decrypt = ablkcipher_decrypt, ++ .geniv = "eseqiv", ++ .min_keysize = DES_KEY_SIZE, ++ .max_keysize = DES_KEY_SIZE, ++ .ivsize = DES_BLOCK_SIZE, + }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- .class2_alg_type = OP_ALG_ALGSEL_SHA224 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, ++ .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_ECB, + }, + { +- .name = "authenc(hmac(sha256),rfc3686(ctr(aes)))", +- .driver_name = "authenc-hmac-sha256-rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = CTR_RFC3686_IV_SIZE, +- .maxauthsize = SHA256_DIGEST_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- .class2_alg_type = OP_ALG_ALGSEL_SHA256 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, ++ .name = "ecb(arc4)", ++ .driver_name = "ecb-arc4-caam", ++ .blocksize = ARC4_BLOCK_SIZE, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, ++ .template_ablkcipher = { ++ .setkey = ablkcipher_setkey, ++ .encrypt = ablkcipher_encrypt, ++ .decrypt = ablkcipher_decrypt, ++ .geniv = "eseqiv", ++ .min_keysize = ARC4_MIN_KEY_SIZE, ++ .max_keysize = ARC4_MAX_KEY_SIZE, ++ .ivsize = ARC4_BLOCK_SIZE, ++ }, ++ .class1_alg_type = OP_ALG_ALGSEL_ARC4 | OP_ALG_AAI_ECB + }, + { +- .name = "authenc(hmac(sha384),rfc3686(ctr(aes)))", +- .driver_name = "authenc-hmac-sha384-rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = CTR_RFC3686_IV_SIZE, +- .maxauthsize = SHA384_DIGEST_SIZE, ++ .name = "ecb(aes)", ++ .driver_name = "ecb-aes-caam", ++ .blocksize = AES_BLOCK_SIZE, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, ++ .template_ablkcipher = { ++ .setkey = ablkcipher_setkey, ++ .encrypt = ablkcipher_encrypt, ++ .decrypt = ablkcipher_decrypt, ++ .geniv = "eseqiv", ++ .min_keysize = AES_MIN_KEY_SIZE, ++ .max_keysize = AES_MAX_KEY_SIZE, ++ .ivsize = AES_BLOCK_SIZE, + }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- .class2_alg_type = OP_ALG_ALGSEL_SHA384 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, ++ .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_ECB, + }, + { +- .name = "authenc(hmac(sha512),rfc3686(ctr(aes)))", +- .driver_name = "authenc-hmac-sha512-rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = aead_setkey, +- .setauthsize = aead_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = CTR_RFC3686_IV_SIZE, +- .maxauthsize = SHA512_DIGEST_SIZE, ++ .name = "ctr(aes)", ++ .driver_name = "ctr-aes-caam", ++ .blocksize = AES_BLOCK_SIZE, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, ++ .template_ablkcipher = { ++ .setkey = ablkcipher_setkey, ++ .encrypt = ablkcipher_encrypt, ++ .decrypt = ablkcipher_decrypt, ++ .geniv = "eseqiv", ++ .min_keysize = AES_MIN_KEY_SIZE, ++ .max_keysize = AES_MAX_KEY_SIZE, ++ .ivsize = AES_BLOCK_SIZE, + }, + .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- .class2_alg_type = OP_ALG_ALGSEL_SHA512 | +- OP_ALG_AAI_HMAC_PRECOMP, +- .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, +- }, +- { +- .name = "rfc4106(gcm(aes))", +- .driver_name = "rfc4106-gcm-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = rfc4106_setkey, +- .setauthsize = rfc4106_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = 8, +- .maxauthsize = AES_BLOCK_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, +- }, +- { +- .name = "rfc4543(gcm(aes))", +- .driver_name = "rfc4543-gcm-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = rfc4543_setkey, +- .setauthsize = rfc4543_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = aead_givencrypt, +- .geniv = "", +- .ivsize = 8, +- .maxauthsize = AES_BLOCK_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, +- }, +- /* Galois Counter Mode */ +- { +- .name = "gcm(aes)", +- .driver_name = "gcm-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_AEAD, +- .template_aead = { +- .setkey = gcm_setkey, +- .setauthsize = gcm_setauthsize, +- .encrypt = aead_encrypt, +- .decrypt = aead_decrypt, +- .givencrypt = NULL, +- .geniv = "", +- .ivsize = 12, +- .maxauthsize = AES_BLOCK_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, + }, +- /* ablkcipher descriptor */ + { + .name = "cbc(aes)", + .driver_name = "cbc-aes-caam", + .blocksize = AES_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_GIVCIPHER, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .template_ablkcipher = { + .setkey = ablkcipher_setkey, + .encrypt = ablkcipher_encrypt, + .decrypt = ablkcipher_decrypt, +- .givencrypt = ablkcipher_givencrypt, +- .geniv = "", ++ .geniv = "eseqiv", + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, +@@ -4054,16 +2129,31 @@ + .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, + }, + { ++ .name = "ecb(des3_ede)", ++ .driver_name = "ecb-des3-caam", ++ .blocksize = DES3_EDE_BLOCK_SIZE, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, ++ .template_ablkcipher = { ++ .setkey = ablkcipher_setkey, ++ .encrypt = ablkcipher_encrypt, ++ .decrypt = ablkcipher_decrypt, ++ .geniv = "eseqiv", ++ .min_keysize = DES3_EDE_KEY_SIZE, ++ .max_keysize = DES3_EDE_KEY_SIZE, ++ .ivsize = DES3_EDE_BLOCK_SIZE, ++ }, ++ .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_ECB, ++ }, ++ { + .name = "cbc(des3_ede)", + .driver_name = "cbc-3des-caam", + .blocksize = DES3_EDE_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_GIVCIPHER, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .template_ablkcipher = { + .setkey = ablkcipher_setkey, + .encrypt = ablkcipher_encrypt, + .decrypt = ablkcipher_decrypt, +- .givencrypt = ablkcipher_givencrypt, +- .geniv = "", ++ .geniv = "eseqiv", + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, +@@ -4074,58 +2164,23 @@ + .name = "cbc(des)", + .driver_name = "cbc-des-caam", + .blocksize = DES_BLOCK_SIZE, +- .type = CRYPTO_ALG_TYPE_GIVCIPHER, ++ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .template_ablkcipher = { + .setkey = ablkcipher_setkey, + .encrypt = ablkcipher_encrypt, + .decrypt = ablkcipher_decrypt, +- .givencrypt = ablkcipher_givencrypt, +- .geniv = "", ++ .geniv = "eseqiv", + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + }, + .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, +- }, +- { +- .name = "ctr(aes)", +- .driver_name = "ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_ABLKCIPHER, +- .template_ablkcipher = { +- .setkey = ablkcipher_setkey, +- .encrypt = ablkcipher_encrypt, +- .decrypt = ablkcipher_decrypt, +- .geniv = "chainiv", +- .min_keysize = AES_MIN_KEY_SIZE, +- .max_keysize = AES_MAX_KEY_SIZE, +- .ivsize = AES_BLOCK_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, +- }, +- { +- .name = "rfc3686(ctr(aes))", +- .driver_name = "rfc3686-ctr-aes-caam", +- .blocksize = 1, +- .type = CRYPTO_ALG_TYPE_GIVCIPHER, +- .template_ablkcipher = { +- .setkey = ablkcipher_setkey, +- .encrypt = ablkcipher_encrypt, +- .decrypt = ablkcipher_decrypt, +- .givencrypt = ablkcipher_givencrypt, +- .geniv = "", +- .min_keysize = AES_MIN_KEY_SIZE + +- CTR_RFC3686_NONCE_SIZE, +- .max_keysize = AES_MAX_KEY_SIZE + +- CTR_RFC3686_NONCE_SIZE, +- .ivsize = CTR_RFC3686_IV_SIZE, +- }, +- .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128, + } + }; + + struct caam_crypto_alg { + struct list_head entry; ++ struct device *ctrldev; + int class1_alg_type; + int class2_alg_type; + int alg_op; +@@ -4138,12 +2193,14 @@ + struct caam_crypto_alg *caam_alg = + container_of(alg, struct caam_crypto_alg, crypto_alg); + struct caam_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct caam_drv_private *priv = dev_get_drvdata(caam_alg->ctrldev); ++ int tgt_jr = atomic_inc_return(&priv->tfm_count); + +- ctx->jrdev = caam_jr_alloc(); +- if (IS_ERR(ctx->jrdev)) { +- pr_err("Job Ring Device allocation for transform failed\n"); +- return PTR_ERR(ctx->jrdev); +- } ++ /* ++ * distribute tfms across job rings to ensure in-order ++ * crypto request processing per tfm ++ */ ++ ctx->jrdev = priv->algapi_jr[(tgt_jr / 2) % priv->num_jrs_for_algapi]; + + /* copy descriptor header template value */ + ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type; +@@ -4170,31 +2227,57 @@ + dma_unmap_single(ctx->jrdev, ctx->sh_desc_givenc_dma, + desc_bytes(ctx->sh_desc_givenc), + DMA_TO_DEVICE); +- if (ctx->key_dma && +- !dma_mapping_error(ctx->jrdev, ctx->key_dma)) +- dma_unmap_single(ctx->jrdev, ctx->key_dma, +- ctx->enckeylen + ctx->split_key_pad_len, +- DMA_TO_DEVICE); +- +- caam_jr_free(ctx->jrdev); + } + + static void __exit caam_algapi_exit(void) + { + ++ struct device_node *dev_node; ++ struct platform_device *pdev; ++ struct device *ctrldev; ++ struct caam_drv_private *priv; + struct caam_crypto_alg *t_alg, *n; ++ int i, err; ++ ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); ++ if (!dev_node) { ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); ++ if (!dev_node) ++ return; ++ } ++ ++ pdev = of_find_device_by_node(dev_node); ++ if (!pdev) { ++ of_node_put(dev_node); ++ return; ++ } + +- if (!alg_list.next) ++ ctrldev = &pdev->dev; ++ priv = dev_get_drvdata(ctrldev); ++ ++ if (!priv->alg_list.next) { ++ of_node_put(dev_node); + return; ++ } + +- list_for_each_entry_safe(t_alg, n, &alg_list, entry) { ++ list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) { + crypto_unregister_alg(&t_alg->crypto_alg); + list_del(&t_alg->entry); + kfree(t_alg); + } ++ ++ for (i = 0; i < priv->total_jobrs; i++) { ++ err = caam_jr_deregister(priv->algapi_jr[i]); ++ if (err < 0) ++ break; ++ } ++ kfree(priv->algapi_jr); ++ ++ of_node_put(dev_node); + } + +-static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template ++static struct caam_crypto_alg *caam_alg_alloc(struct device *ctrldev, ++ struct caam_alg_template + *template) + { + struct caam_crypto_alg *t_alg; +@@ -4202,7 +2285,7 @@ + + t_alg = kzalloc(sizeof(struct caam_crypto_alg), GFP_KERNEL); + if (!t_alg) { +- pr_err("failed to allocate t_alg\n"); ++ dev_err(ctrldev, "failed to allocate t_alg\n"); + return ERR_PTR(-ENOMEM); + } + +@@ -4218,13 +2301,13 @@ + alg->cra_blocksize = template->blocksize; + alg->cra_alignmask = 0; + alg->cra_ctxsize = sizeof(struct caam_ctx); +- alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY | +- template->type; ++ alg->cra_flags = CRYPTO_ALG_ASYNC | template->type; ++ ++#ifdef CRYPTO_ALG_KERN_DRIVER_ONLY ++ alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY; ++#endif ++ + switch (template->type) { +- case CRYPTO_ALG_TYPE_GIVCIPHER: +- alg->cra_type = &crypto_givcipher_type; +- alg->cra_ablkcipher = template->template_ablkcipher; +- break; + case CRYPTO_ALG_TYPE_ABLKCIPHER: + alg->cra_type = &crypto_ablkcipher_type; + alg->cra_ablkcipher = template->template_ablkcipher; +@@ -4238,6 +2321,7 @@ + t_alg->class1_alg_type = template->class1_alg_type; + t_alg->class2_alg_type = template->class2_alg_type; + t_alg->alg_op = template->alg_op; ++ t_alg->ctrldev = ctrldev; + + return t_alg; + } +@@ -4246,9 +2330,11 @@ + { + struct device_node *dev_node; + struct platform_device *pdev; +- struct device *ctrldev; +- void *priv; +- int i = 0, err = 0; ++ struct device *ctrldev, **jrdev; ++ struct caam_drv_private *priv; ++ int i = 0, err = 0, md_limit = 0; ++ int des_inst, aes_inst, md_inst; ++ u64 cha_inst; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { +@@ -4265,42 +2351,117 @@ + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); +- of_node_put(dev_node); + +- /* +- * If priv is NULL, it's probably because the caam driver wasn't +- * properly initialized (e.g. RNG4 init failed). Thus, bail out here. +- */ +- if (!priv) +- return -ENODEV; ++ INIT_LIST_HEAD(&priv->alg_list); ++ ++ jrdev = kmalloc(sizeof(*jrdev) * priv->total_jobrs, GFP_ATOMIC); ++ if (!jrdev) { ++ of_node_put(dev_node); ++ return -ENOMEM; ++ } + ++ for (i = 0; i < priv->total_jobrs; i++) { ++ err = caam_jr_register(ctrldev, &jrdev[i]); ++ if (err < 0) ++ break; ++ } ++ if (err < 0 && i == 0) { ++ dev_err(ctrldev, "algapi error in job ring registration: %d\n", ++ err); ++ of_node_put(dev_node); ++ kfree(jrdev); ++ return err; ++ } + +- INIT_LIST_HEAD(&alg_list); ++ priv->num_jrs_for_algapi = i; ++ priv->algapi_jr = jrdev; ++ atomic_set(&priv->tfm_count, -1); ++ ++ /* ++ * register crypto algorithms the device supports ++ * first, detect presence of DES, AES, and MD blocks. If MD present, ++ * determine limit of supported digest size ++ */ ++ cha_inst = rd_reg64(&priv->ctrl->perfmon.cha_num); ++ des_inst = (cha_inst & CHA_ID_DES_MASK) >> CHA_ID_DES_SHIFT; ++ aes_inst = (cha_inst & CHA_ID_AES_MASK) >> CHA_ID_AES_SHIFT; ++ md_inst = (cha_inst & CHA_ID_MD_MASK) >> CHA_ID_MD_SHIFT; ++ if (md_inst) { ++ md_limit = SHA512_DIGEST_SIZE; ++ if ((rd_reg64(&priv->ctrl->perfmon.cha_id) & CHA_ID_MD_MASK) ++ == CHA_ID_MD_LP256) /* LP256 limits digest size */ ++ md_limit = SHA256_DIGEST_SIZE; ++ } + +- /* register crypto algorithms the device supports */ + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { +- /* TODO: check if h/w supports alg */ + struct caam_crypto_alg *t_alg; ++ bool done = false; ++ ++authencesn: ++ /* ++ * All registrable algs in this module require a blockcipher ++ * All aead algs require message digests, so check them for ++ * instantiation and size. ++ */ ++ if (driver_algs[i].type == CRYPTO_ALG_TYPE_AEAD) { ++ /* If no MD instantiated, or MD too small, skip */ ++ if ((!md_inst) || ++ (driver_algs[i].template_aead.maxauthsize > ++ md_limit)) ++ continue; ++ } ++ /* If DES alg, and CHA not instantiated, skip */ ++ if ((driver_algs[i].class1_alg_type & OP_ALG_ALGSEL_3DES) || ++ (driver_algs[i].class1_alg_type & OP_ALG_ALGSEL_DES)) ++ if (!des_inst) ++ continue; ++ /* If AES alg, and CHA not instantiated, skip */ ++ if (driver_algs[i].class1_alg_type & OP_ALG_ALGSEL_AES) ++ if (!aes_inst) ++ continue; + +- t_alg = caam_alg_alloc(&driver_algs[i]); ++ t_alg = caam_alg_alloc(ctrldev, &driver_algs[i]); + if (IS_ERR(t_alg)) { + err = PTR_ERR(t_alg); +- pr_warn("%s alg allocation failed\n", +- driver_algs[i].driver_name); ++ dev_warn(ctrldev, "%s alg allocation failed\n", ++ driver_algs[i].driver_name); + continue; + } + + err = crypto_register_alg(&t_alg->crypto_alg); + if (err) { +- pr_warn("%s alg registration failed\n", ++ dev_warn(ctrldev, "%s alg registration failed\n", + t_alg->crypto_alg.cra_driver_name); + kfree(t_alg); +- } else +- list_add_tail(&t_alg->entry, &alg_list); ++ } else { ++ list_add_tail(&t_alg->entry, &priv->alg_list); ++ dev_info(ctrldev, "%s\n", ++ t_alg->crypto_alg.cra_driver_name); ++ ++ if (driver_algs[i].type == CRYPTO_ALG_TYPE_AEAD && ++ !memcmp(driver_algs[i].name, "authenc", 7) && ++ !done) { ++ char *name; ++ ++ name = driver_algs[i].name; ++ memmove(name + 10, name + 7, strlen(name) - 7); ++ memcpy(name + 7, "esn", 3); ++ ++ name = driver_algs[i].driver_name; ++ memmove(name + 10, name + 7, strlen(name) - 7); ++ memcpy(name + 7, "esn", 3); ++ ++ done = true; ++ goto authencesn; ++ } ++ } + } +- if (!list_empty(&alg_list)) +- pr_info("caam algorithms registered in /proc/crypto\n"); + ++ if (!list_empty(&priv->alg_list)) ++ dev_info(ctrldev, "%s algorithms registered in /proc/crypto\n", ++ (char *)of_get_property(dev_node, "compatible", NULL)); ++ ++ of_node_put(dev_node); + return err; + } + +diff -Nur linux-4.1.3/drivers/crypto/caam/caamhash.c linux-xbian-imx6/drivers/crypto/caam/caamhash.c +--- linux-4.1.3/drivers/crypto/caam/caamhash.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/caamhash.c 2015-07-27 23:13:04.209961631 +0200 +@@ -1,7 +1,7 @@ + /* + * caam - Freescale FSL CAAM support for ahash functions of crypto API + * +- * Copyright 2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2011-2013 Freescale Semiconductor, Inc. + * + * Based on caamalg.c crypto API driver. + * +@@ -62,6 +62,7 @@ + #include "error.h" + #include "sg_sw_sec4.h" + #include "key_gen.h" ++#include + + #define CAAM_CRA_PRIORITY 3000 + +@@ -72,6 +73,8 @@ + #define CAAM_MAX_HASH_DIGEST_SIZE SHA512_DIGEST_SIZE + + /* length of descriptors text */ ++#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3) ++ + #define DESC_AHASH_BASE (4 * CAAM_CMD_SZ) + #define DESC_AHASH_UPDATE_LEN (6 * CAAM_CMD_SZ) + #define DESC_AHASH_UPDATE_FIRST_LEN (DESC_AHASH_BASE + 4 * CAAM_CMD_SZ) +@@ -89,14 +92,13 @@ + + #ifdef DEBUG + /* for print_hex_dumps with line references */ ++#define xstr(s) str(s) ++#define str(s) #s + #define debug(format, arg...) printk(format, arg) + #else + #define debug(format, arg...) + #endif + +- +-static struct list_head hash_list; +- + /* ahash per-session context */ + struct caam_hash_ctx { + struct device *jrdev; +@@ -115,6 +117,7 @@ + u8 key[CAAM_MAX_HASH_KEY_SIZE]; + dma_addr_t key_dma; + int ctx_len; ++ unsigned int key_len; + unsigned int split_key_len; + unsigned int split_key_pad_len; + }; +@@ -137,20 +140,13 @@ + /* Common job descriptor seq in/out ptr routines */ + + /* Map state->caam_ctx, and append seq_out_ptr command that points to it */ +-static inline int map_seq_out_ptr_ctx(u32 *desc, struct device *jrdev, +- struct caam_hash_state *state, +- int ctx_len) ++static inline void map_seq_out_ptr_ctx(u32 *desc, struct device *jrdev, ++ struct caam_hash_state *state, ++ int ctx_len) + { + state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, + ctx_len, DMA_FROM_DEVICE); +- if (dma_mapping_error(jrdev, state->ctx_dma)) { +- dev_err(jrdev, "unable to map ctx\n"); +- return -ENOMEM; +- } +- + append_seq_out_ptr(desc, state->ctx_dma, ctx_len, 0); +- +- return 0; + } + + /* Map req->result, and append seq_out_ptr command that points to it */ +@@ -173,6 +169,7 @@ + dma_addr_t buf_dma; + + buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); ++ dma_sync_single_for_device(jrdev, buf_dma, buflen, DMA_TO_DEVICE); + dma_to_sec4_sg_one(sec4_sg, buf_dma, buflen, 0); + + return buf_dma; +@@ -208,19 +205,17 @@ + } + + /* Map state->caam_ctx, and add it to link table */ +-static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev, +- struct caam_hash_state *state, int ctx_len, +- struct sec4_sg_entry *sec4_sg, u32 flag) ++static inline void ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev, ++ struct caam_hash_state *state, ++ int ctx_len, ++ struct sec4_sg_entry *sec4_sg, ++ u32 flag) + { + state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, ctx_len, flag); +- if (dma_mapping_error(jrdev, state->ctx_dma)) { +- dev_err(jrdev, "unable to map ctx\n"); +- return -ENOMEM; +- } +- ++ if ((flag == DMA_TO_DEVICE) || (flag == DMA_BIDIRECTIONAL)) ++ dma_sync_single_for_device(jrdev, state->ctx_dma, ctx_len, ++ flag); + dma_to_sec4_sg_one(sec4_sg, state->ctx_dma, ctx_len, 0); +- +- return 0; + } + + /* Common shared descriptor commands */ +@@ -231,6 +226,13 @@ + KEY_DEST_MDHA_SPLIT | KEY_ENC); + } + ++static inline void append_key_axcbc(u32 *desc, struct caam_hash_ctx *ctx) ++{ ++ append_key_as_imm(desc, ctx->key, ctx->key_len, ++ ctx->key_len, CLASS_1 | ++ KEY_DEST_CLASS_REG); ++} ++ + /* Append key if it has been set */ + static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx) + { +@@ -252,6 +254,25 @@ + append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); + } + ++static inline void init_sh_desc_key_axcbc(u32 *desc, struct caam_hash_ctx *ctx) ++{ ++ u32 *key_jump_cmd; ++ ++ init_sh_desc(desc, HDR_SHARE_SERIAL); ++ ++ if (ctx->key_len) { ++ key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | ++ JUMP_COND_SHRD); ++ ++ append_key_axcbc(desc, ctx); ++ ++ set_jump_tgt_here(desc, key_jump_cmd); ++ } ++ ++ /* Propagate errors from shared to job descriptor */ ++ append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); ++ ++} + /* + * For ahash read data from seqin following state->caam_ctx, + * and write resulting class2 context to seqout, which may be state->caam_ctx +@@ -271,6 +292,20 @@ + LDST_SRCDST_BYTE_CONTEXT); + } + ++static inline void axcbc_append_load_str(u32 *desc, int digestsize) ++{ ++ /* Calculate remaining bytes to read */ ++ append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); ++ ++ /* Read remaining bytes */ ++ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_LAST1 | ++ FIFOLD_TYPE_MSG | KEY_VLF); ++ ++ /* Store class1 context bytes */ ++ append_seq_store(desc, digestsize, LDST_CLASS_1_CCB | ++ LDST_SRCDST_BYTE_CONTEXT); ++} ++ + /* + * For ahash update, final and finup, import context, read and write to seqout + */ +@@ -293,6 +328,27 @@ + ahash_append_load_str(desc, digestsize); + } + ++/* ++ * For ahash update, final and finup, import context, read and write to seqout ++ */ ++static inline void axcbc_ctx_data_to_out(u32 *desc, u32 op, u32 state, ++ int digestsize, ++ struct caam_hash_ctx *ctx) ++{ ++ init_sh_desc_key_axcbc(desc, ctx); ++ ++ /* Import context from software */ ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | ctx->ctx_len); ++ ++ /* Class 1 operation */ ++ append_operation(desc, op | state | OP_ALG_ENCRYPT); ++ ++ /* ++ * Load from buf and/or src and write to req->result or state->context ++ */ ++ axcbc_append_load_str(desc, digestsize); ++} + /* For ahash firsts and digest, read and write to seqout */ + static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state, + int digestsize, struct caam_hash_ctx *ctx) +@@ -308,6 +364,21 @@ + ahash_append_load_str(desc, digestsize); + } + ++/* For ahash firsts and digest, read and write to seqout */ ++static inline void axcbc_data_to_out(u32 *desc, u32 op, u32 state, ++ int digestsize, struct caam_hash_ctx *ctx) ++{ ++ init_sh_desc_key_axcbc(desc, ctx); ++ ++ /* Class 1 operation */ ++ append_operation(desc, op | state | OP_ALG_ENCRYPT); ++ ++ /* ++ * Load from buf and/or src and write to req->result or state->context ++ */ ++ axcbc_append_load_str(desc, digestsize); ++} ++ + static int ahash_set_sh_desc(struct crypto_ahash *ahash) + { + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); +@@ -342,8 +413,7 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ahash update shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ahash update shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -361,10 +431,11 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ahash update first shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ahash update first shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); + + /* ahash_final shared descriptor */ + desc = ctx->sh_desc_fin; +@@ -379,10 +450,12 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ahash final shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ahash final shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); + + /* ahash_finup shared descriptor */ + desc = ctx->sh_desc_finup; +@@ -397,10 +470,12 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ahash finup shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ahash finup shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_finup_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); + + /* ahash_digest shared descriptor */ + desc = ctx->sh_desc_digest; +@@ -416,15 +491,134 @@ + return -ENOMEM; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "ahash digest shdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ahash digest shdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); + + return 0; + } + ++static int axcbc_set_sh_desc(struct crypto_ahash *ahash) ++{ ++ struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); ++ int digestsize = crypto_ahash_digestsize(ahash); ++ struct device *jrdev = ctx->jrdev; ++ u32 have_key = 0; ++ u32 *desc; ++ ++ /* ahash_update shared descriptor */ ++ desc = ctx->sh_desc_update; ++ ++ init_sh_desc(desc, HDR_SHARE_SERIAL); ++ ++ /* Import context from software */ ++ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | ++ LDST_CLASS_1_CCB | ctx->ctx_len); ++ ++ /* Class 1 operation */ ++ append_operation(desc, ctx->alg_type | OP_ALG_AS_UPDATE | ++ OP_ALG_ENCRYPT); ++ ++ /* Load data and write to result or context */ ++ axcbc_append_load_str(desc, ctx->ctx_len); ++ ++ ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc), ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) { ++ dev_err(jrdev, "unable to map shared descriptor\n"); ++ return -ENOMEM; ++ } ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ahash update shdesc@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); ++#endif ++ ++ /* ahash_update_first shared descriptor */ ++ desc = ctx->sh_desc_update_first; ++ ++ axcbc_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT, ++ ctx->ctx_len, ctx); ++ ++ ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc, ++ desc_bytes(desc), ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(jrdev, ctx->sh_desc_update_first_dma)) { ++ dev_err(jrdev, "unable to map shared descriptor\n"); ++ return -ENOMEM; ++ } ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ahash update first shdesc@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); ++#endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); ++ ++ /* ahash_final shared descriptor */ ++ desc = ctx->sh_desc_fin; ++ ++ axcbc_ctx_data_to_out(desc, have_key | ctx->alg_type, ++ OP_ALG_AS_FINALIZE, digestsize, ctx); ++ ++ ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc), ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) { ++ dev_err(jrdev, "unable to map shared descriptor\n"); ++ return -ENOMEM; ++ } ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ahash final shdesc@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, desc, ++ desc_bytes(desc), 1); ++#endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); ++ ++ /* ahash_finup shared descriptor */ ++ desc = ctx->sh_desc_finup; ++ ++ axcbc_ctx_data_to_out(desc, have_key | ctx->alg_type, ++ OP_ALG_AS_FINALIZE, digestsize, ctx); ++ ++ ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc), ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) { ++ dev_err(jrdev, "unable to map shared descriptor\n"); ++ return -ENOMEM; ++ } ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ahash finup shdesc@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, desc, ++ desc_bytes(desc), 1); ++#endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_finup_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); ++ ++ /* ahash_digest shared descriptor */ ++ desc = ctx->sh_desc_digest; ++ ++ axcbc_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL, ++ digestsize, ctx); ++ ++ ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc, ++ desc_bytes(desc), ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(jrdev, ctx->sh_desc_digest_dma)) { ++ dev_err(jrdev, "unable to map shared descriptor\n"); ++ return -ENOMEM; ++ } ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ahash digest shdesc@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, desc, ++ desc_bytes(desc), 1); ++#endif ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma, ++ desc_bytes(desc), DMA_TO_DEVICE); ++ ++ return 0; ++} + static int gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, + u32 keylen) + { +@@ -458,6 +652,8 @@ + kfree(desc); + return -ENOMEM; + } ++ dma_sync_single_for_device(jrdev, src_dma, *keylen, DMA_TO_DEVICE); ++ + dst_dma = dma_map_single(jrdev, (void *)key_out, digestsize, + DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dst_dma)) { +@@ -478,9 +674,9 @@ + LDST_SRCDST_BYTE_CONTEXT); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "key_in@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "key_in@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, *keylen, 1); +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -493,17 +689,17 @@ + wait_for_completion_interruptible(&result.completion); + ret = result.err; + #ifdef DEBUG +- print_hex_dump(KERN_ERR, +- "digested key@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "digested key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, + digestsize, 1); + #endif + } ++ *keylen = digestsize; ++ + dma_unmap_single(jrdev, src_dma, *keylen, DMA_TO_DEVICE); ++ dma_sync_single_for_cpu(jrdev, dst_dma, digestsize, DMA_FROM_DEVICE); + dma_unmap_single(jrdev, dst_dma, digestsize, DMA_FROM_DEVICE); + +- *keylen = digestsize; +- + kfree(desc); + + return ret; +@@ -545,7 +741,7 @@ + #ifdef DEBUG + printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", + ctx->split_key_len, ctx->split_key_pad_len); +- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "key in @"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); + #endif + +@@ -557,11 +753,14 @@ + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->key_dma)) { + dev_err(jrdev, "unable to map key i/o memory\n"); +- ret = -ENOMEM; +- goto map_err; ++ return -ENOMEM; + } ++ ++ dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->split_key_pad_len, ++ DMA_TO_DEVICE); ++ + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, + ctx->split_key_pad_len, 1); + #endif +@@ -572,7 +771,6 @@ + DMA_TO_DEVICE); + } + +-map_err: + kfree(hashed_key); + return ret; + badkey: +@@ -581,6 +779,25 @@ + return -EINVAL; + } + ++static int axcbc_setkey(struct crypto_ahash *ahash, ++ const u8 *key, unsigned int keylen) ++{ ++ struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); ++ int ret = 0; ++ ++ ctx->key_len = keylen; ++ memcpy(ctx->key, key, keylen); ++ ++#ifdef DEBUG ++ print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", ++ DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, ++ ctx->key_len, 1); ++#endif ++ ++ ret = axcbc_set_sh_desc(ahash); ++ ++ return ret; ++} + /* + * ahash_edesc - s/w-extended ahash descriptor + * @dst_dma: physical mapped address of req->result +@@ -608,8 +825,11 @@ + if (edesc->src_nents) + dma_unmap_sg_chained(dev, req->src, edesc->src_nents, + DMA_TO_DEVICE, edesc->chained); +- if (edesc->dst_dma) ++ if (edesc->dst_dma) { ++ dma_sync_single_for_cpu(dev, edesc->dst_dma, dst_len, ++ DMA_FROM_DEVICE); + dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE); ++ } + + if (edesc->sec4_sg_bytes) + dma_unmap_single(dev, edesc->sec4_sg_dma, +@@ -624,8 +844,12 @@ + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + +- if (state->ctx_dma) ++ if (state->ctx_dma) { ++ if ((flag == DMA_FROM_DEVICE) || (flag == DMA_BIDIRECTIONAL)) ++ dma_sync_single_for_cpu(dev, state->ctx_dma, ++ ctx->ctx_len, flag); + dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag); ++ } + ahash_unmap(dev, edesc, req, dst_len); + } + +@@ -645,18 +869,21 @@ + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + ahash_unmap(jrdev, edesc, req, digestsize); + kfree(edesc); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) +- print_hex_dump(KERN_ERR, "result@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); + #endif +@@ -680,18 +907,21 @@ + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); + kfree(edesc); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) +- print_hex_dump(KERN_ERR, "result@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); + #endif +@@ -715,18 +945,21 @@ + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; + +- ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_TO_DEVICE); ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } ++ ++ ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + kfree(edesc); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) +- print_hex_dump(KERN_ERR, "result@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); + #endif +@@ -750,18 +983,21 @@ + + edesc = (struct ahash_edesc *)((char *)desc - + offsetof(struct ahash_edesc, hw_desc)); +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + +- ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE); ++ ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE); + kfree(edesc); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, + ctx->ctx_len, 1); + if (req->result) +- print_hex_dump(KERN_ERR, "result@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "result@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->result, + digestsize, 1); + #endif +@@ -807,7 +1043,7 @@ + * allocate space for base edesc and hw desc commands, + * link tables + */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, +@@ -820,11 +1056,12 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, ++ DMA_TO_DEVICE); + +- ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, +- edesc->sec4_sg, DMA_BIDIRECTIONAL); +- if (ret) +- return ret; ++ ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, ++ edesc->sec4_sg, DMA_BIDIRECTIONAL); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, + edesc->sec4_sg + 1, +@@ -851,21 +1088,16 @@ + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | + HDR_REVERSE); + +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } +- + append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + + to_hash, LDST_SGF); + + append_seq_out_ptr(desc, state->ctx_dma, ctx->ctx_len, 0); + ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, ++ sec4_sg_bytes, DMA_TO_DEVICE); ++ + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif +@@ -885,9 +1117,9 @@ + *next_buflen = last_buflen; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "buf@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1); +- print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, next_buf, + *next_buflen, 1); + #endif +@@ -918,7 +1150,7 @@ + sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -932,37 +1164,29 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, DMA_TO_DEVICE); + edesc->src_nents = 0; + +- ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, +- edesc->sec4_sg, DMA_TO_DEVICE); +- if (ret) +- return ret; ++ ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, edesc->sec4_sg, ++ DMA_TO_DEVICE); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, + buf, state->buf_dma, buflen, + last_buflen); + (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN; + +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } +- + append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen, + LDST_SGF); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); +- if (dma_mapping_error(jrdev, edesc->dst_dma)) { +- dev_err(jrdev, "unable to map dst\n"); +- return -ENOMEM; +- } ++ ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, sec4_sg_bytes, ++ DMA_TO_DEVICE); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -1005,7 +1229,7 @@ + sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -1021,11 +1245,11 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, DMA_TO_DEVICE); + +- ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, +- edesc->sec4_sg, DMA_TO_DEVICE); +- if (ret) +- return ret; ++ ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len, edesc->sec4_sg, ++ DMA_TO_DEVICE); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, + buf, state->buf_dma, buflen, +@@ -1034,25 +1258,17 @@ + src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + + sec4_sg_src_index, chained); + +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } +- + append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + + buflen + req->nbytes, LDST_SGF); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); +- if (dma_mapping_error(jrdev, edesc->dst_dma)) { +- dev_err(jrdev, "unable to map dst\n"); +- return -ENOMEM; +- } ++ ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, sec4_sg_bytes, ++ DMA_TO_DEVICE); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -1091,7 +1307,7 @@ + sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + sec4_sg_bytes + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + sec4_sg_bytes + + DESC_JOB_IO_LEN, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -1099,6 +1315,8 @@ + } + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, DMA_TO_DEVICE); + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->src_nents = src_nents; + edesc->chained = chained; +@@ -1109,12 +1327,6 @@ + + if (src_nents) { + sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } + src_dma = edesc->sec4_sg_dma; + options = LDST_SGF; + } else { +@@ -1123,15 +1335,14 @@ + } + append_seq_in_ptr(desc, src_dma, req->nbytes, options); + ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, ++ edesc->sec4_sg_bytes, DMA_TO_DEVICE); ++ + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); +- if (dma_mapping_error(jrdev, edesc->dst_dma)) { +- dev_err(jrdev, "unable to map dst\n"); +- return -ENOMEM; +- } + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -1165,7 +1376,7 @@ + int sh_len; + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN, ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN, + GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -1178,23 +1389,17 @@ + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); + + state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, state->buf_dma)) { +- dev_err(jrdev, "unable to map src\n"); +- return -ENOMEM; +- } + + append_seq_in_ptr(desc, state->buf_dma, buflen, 0); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); +- if (dma_mapping_error(jrdev, edesc->dst_dma)) { +- dev_err(jrdev, "unable to map dst\n"); +- return -ENOMEM; +- } + edesc->src_nents = 0; + ++ dma_sync_single_for_device(jrdev, state->buf_dma, buflen, ++ DMA_TO_DEVICE); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -1245,7 +1450,7 @@ + * allocate space for base edesc and hw desc commands, + * link tables + */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, +@@ -1258,7 +1463,9 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; +- edesc->dst_dma = 0; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, ++ DMA_TO_DEVICE); + + state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, + buf, *buflen); +@@ -1276,22 +1483,14 @@ + init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | + HDR_REVERSE); + +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } +- + append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF); + +- ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len); +- if (ret) +- return ret; ++ map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len); + ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, ++ sec4_sg_bytes, DMA_TO_DEVICE); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif +@@ -1314,9 +1513,9 @@ + *next_buflen = 0; + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "buf@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1); +- print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, next_buf, + *next_buflen, 1); + #endif +@@ -1352,7 +1551,7 @@ + sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); +@@ -1368,6 +1567,8 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, DMA_TO_DEVICE); + + state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf, + state->buf_dma, buflen, +@@ -1376,25 +1577,17 @@ + src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1, + chained); + +- edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, +- sec4_sg_bytes, DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } +- + append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen + + req->nbytes, LDST_SGF); + + edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, + digestsize); +- if (dma_mapping_error(jrdev, edesc->dst_dma)) { +- dev_err(jrdev, "unable to map dst\n"); +- return -ENOMEM; +- } ++ ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, sec4_sg_bytes, ++ DMA_TO_DEVICE); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -1447,7 +1640,7 @@ + * allocate space for base edesc and hw desc commands, + * link tables + */ +- edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + ++ edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, +@@ -1460,19 +1653,13 @@ + edesc->sec4_sg_bytes = sec4_sg_bytes; + edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) + + DESC_JOB_IO_LEN; +- edesc->dst_dma = 0; ++ edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, ++ sec4_sg_bytes, ++ DMA_TO_DEVICE); + + if (src_nents) { + sg_to_sec4_sg_last(req->src, src_nents, + edesc->sec4_sg, 0); +- edesc->sec4_sg_dma = dma_map_single(jrdev, +- edesc->sec4_sg, +- sec4_sg_bytes, +- DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { +- dev_err(jrdev, "unable to map S/G table\n"); +- return -ENOMEM; +- } + src_dma = edesc->sec4_sg_dma; + options = LDST_SGF; + } else { +@@ -1491,12 +1678,12 @@ + + append_seq_in_ptr(desc, src_dma, to_hash, options); + +- ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len); +- if (ret) +- return ret; ++ map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len); + ++ dma_sync_single_for_device(jrdev, edesc->sec4_sg_dma, ++ sec4_sg_bytes, DMA_TO_DEVICE); + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); + #endif +@@ -1521,7 +1708,7 @@ + req->nbytes, 0); + } + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "next buf@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, next_buf, + *next_buflen, 1); + #endif +@@ -1735,10 +1922,33 @@ + .alg_type = OP_ALG_ALGSEL_MD5, + .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, + }, ++ { ++ .name = "xcbc(aes)", ++ .driver_name = "xcbc-aes-caam", ++ .hmac_name = "xcbc(aes)", ++ .hmac_driver_name = "xcbc-aes-caam", ++ .blocksize = XCBC_MAC_BLOCK_WORDS * 4, ++ .template_ahash = { ++ .init = ahash_init, ++ .update = ahash_update, ++ .final = ahash_final, ++ .finup = ahash_finup, ++ .digest = ahash_digest, ++ .export = ahash_export, ++ .import = ahash_import, ++ .setkey = axcbc_setkey, ++ .halg = { ++ .digestsize = XCBC_MAC_DIGEST_SIZE, ++ }, ++ }, ++ .alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XCBC_MAC, ++ .alg_op = OP_ALG_ALGSEL_AES, ++ }, + }; + + struct caam_hash_alg { + struct list_head entry; ++ struct device *ctrldev; + int alg_type; + int alg_op; + struct ahash_alg ahash_alg; +@@ -1755,6 +1965,7 @@ + struct caam_hash_alg *caam_hash = + container_of(alg, struct caam_hash_alg, ahash_alg); + struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct caam_drv_private *priv = dev_get_drvdata(caam_hash->ctrldev); + /* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */ + static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE, + HASH_MSG_LEN + SHA1_DIGEST_SIZE, +@@ -1762,17 +1973,15 @@ + HASH_MSG_LEN + SHA256_DIGEST_SIZE, + HASH_MSG_LEN + 64, + HASH_MSG_LEN + SHA512_DIGEST_SIZE }; ++ int tgt_jr = atomic_inc_return(&priv->tfm_count); + int ret = 0; + + /* +- * Get a Job ring from Job Ring driver to ensure in-order ++ * distribute tfms across job rings to ensure in-order + * crypto request processing per tfm + */ +- ctx->jrdev = caam_jr_alloc(); +- if (IS_ERR(ctx->jrdev)) { +- pr_err("Job Ring Device allocation for transform failed\n"); +- return PTR_ERR(ctx->jrdev); +- } ++ ctx->jrdev = priv->jrdev[tgt_jr % priv->total_jobrs]; ++ + /* copy descriptor header template value */ + ctx->alg_type = OP_TYPE_CLASS2_ALG | caam_hash->alg_type; + ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_hash->alg_op; +@@ -1788,6 +1997,39 @@ + return ret; + } + ++static int caam_axcbc_cra_init(struct crypto_tfm *tfm) ++{ ++ struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); ++ struct crypto_alg *base = tfm->__crt_alg; ++ struct hash_alg_common *halg = ++ container_of(base, struct hash_alg_common, base); ++ struct ahash_alg *alg = ++ container_of(halg, struct ahash_alg, halg); ++ struct caam_hash_alg *caam_hash = ++ container_of(alg, struct caam_hash_alg, ahash_alg); ++ struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct caam_drv_private *priv = dev_get_drvdata(caam_hash->ctrldev); ++ int tgt_jr = atomic_inc_return(&priv->tfm_count); ++ int ret = 0; ++ ++ /* ++ * distribute tfms across job rings to ensure in-order ++ * crypto request processing per tfm ++ */ ++ ctx->jrdev = priv->jrdev[tgt_jr % priv->total_jobrs]; ++ ++ /* copy descriptor header template value */ ++ ctx->alg_type = OP_TYPE_CLASS1_ALG | caam_hash->alg_type; ++ ctx->alg_op = OP_TYPE_CLASS1_ALG | caam_hash->alg_op; ++ ++ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), ++ sizeof(struct caam_hash_state)); ++ ++ ret = axcbc_set_sh_desc(ahash); ++ ++ return ret; ++} ++ + static void caam_hash_cra_exit(struct crypto_tfm *tfm) + { + struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); +@@ -1815,35 +2057,57 @@ + !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma)) + dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma, + desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE); +- +- caam_jr_free(ctx->jrdev); + } + + static void __exit caam_algapi_hash_exit(void) + { ++ struct device_node *dev_node; ++ struct platform_device *pdev; ++ struct device *ctrldev; ++ struct caam_drv_private *priv; + struct caam_hash_alg *t_alg, *n; + +- if (!hash_list.next) ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); ++ if (!dev_node) { ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); ++ if (!dev_node) ++ return; ++ } ++ ++ pdev = of_find_device_by_node(dev_node); ++ if (!pdev) { ++ of_node_put(dev_node); + return; ++ } + +- list_for_each_entry_safe(t_alg, n, &hash_list, entry) { ++ ctrldev = &pdev->dev; ++ priv = dev_get_drvdata(ctrldev); ++ ++ if (!priv->hash_list.next) { ++ of_node_put(dev_node); ++ return; ++ } ++ ++ list_for_each_entry_safe(t_alg, n, &priv->hash_list, entry) { + crypto_unregister_ahash(&t_alg->ahash_alg); + list_del(&t_alg->entry); + kfree(t_alg); + } ++ ++ of_node_put(dev_node); + } + + static struct caam_hash_alg * +-caam_hash_alloc(struct caam_hash_template *template, ++caam_hash_alloc(struct device *ctrldev, struct caam_hash_template *template, + bool keyed) + { + struct caam_hash_alg *t_alg; + struct ahash_alg *halg; + struct crypto_alg *alg; + +- t_alg = kzalloc(sizeof(struct caam_hash_alg), GFP_KERNEL); ++ t_alg = kzalloc(sizeof(struct caam_hash_alg), GFP_ATOMIC); + if (!t_alg) { +- pr_err("failed to allocate t_alg\n"); ++ dev_err(ctrldev, "failed to allocate t_alg\n"); + return ERR_PTR(-ENOMEM); + } + +@@ -1863,7 +2127,11 @@ + template->driver_name); + } + alg->cra_module = THIS_MODULE; +- alg->cra_init = caam_hash_cra_init; ++ ++ if (strstr(alg->cra_name, "xcbc") > 0) ++ alg->cra_init = caam_axcbc_cra_init; ++ else ++ alg->cra_init = caam_hash_cra_init; + alg->cra_exit = caam_hash_cra_exit; + alg->cra_ctxsize = sizeof(struct caam_hash_ctx); + alg->cra_priority = CAAM_CRA_PRIORITY; +@@ -1874,6 +2142,7 @@ + + t_alg->alg_type = template->alg_type; + t_alg->alg_op = template->alg_op; ++ t_alg->ctrldev = ctrldev; + + return t_alg; + } +@@ -1883,8 +2152,9 @@ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; +- void *priv; +- int i = 0, err = 0; ++ struct caam_drv_private *priv; ++ int i = 0, err = 0, md_limit = 0, md_inst; ++ u64 cha_inst; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { +@@ -1898,60 +2168,68 @@ + of_node_put(dev_node); + return -ENODEV; + } +- + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); +- of_node_put(dev_node); + +- /* +- * If priv is NULL, it's probably because the caam driver wasn't +- * properly initialized (e.g. RNG4 init failed). Thus, bail out here. +- */ +- if (!priv) +- return -ENODEV; ++ INIT_LIST_HEAD(&priv->hash_list); + +- INIT_LIST_HEAD(&hash_list); ++ atomic_set(&priv->tfm_count, -1); ++ ++ /* register algorithms the device supports */ ++ cha_inst = rd_reg64(&priv->ctrl->perfmon.cha_num); ++ md_inst = (cha_inst & CHA_ID_MD_MASK) >> CHA_ID_MD_SHIFT; ++ if (md_inst) { ++ md_limit = SHA512_DIGEST_SIZE; ++ if ((rd_reg64(&priv->ctrl->perfmon.cha_id) & CHA_ID_MD_MASK) ++ == CHA_ID_MD_LP256) /* LP256 limits digest size */ ++ md_limit = SHA256_DIGEST_SIZE; ++ } + +- /* register crypto algorithms the device supports */ + for (i = 0; i < ARRAY_SIZE(driver_hash); i++) { +- /* TODO: check if h/w supports alg */ + struct caam_hash_alg *t_alg; + ++ /* If no MD instantiated, or MD too small, skip */ ++ if ((!md_inst) || ++ (driver_hash[i].template_ahash.halg.digestsize > ++ md_limit)) ++ continue; ++ + /* register hmac version */ +- t_alg = caam_hash_alloc(&driver_hash[i], true); ++ t_alg = caam_hash_alloc(ctrldev, &driver_hash[i], true); + if (IS_ERR(t_alg)) { + err = PTR_ERR(t_alg); +- pr_warn("%s alg allocation failed\n", +- driver_hash[i].driver_name); ++ dev_warn(ctrldev, "%s alg allocation failed\n", ++ driver_hash[i].driver_name); + continue; + } + + err = crypto_register_ahash(&t_alg->ahash_alg); + if (err) { +- pr_warn("%s alg registration failed\n", ++ dev_warn(ctrldev, "%s alg registration failed\n", + t_alg->ahash_alg.halg.base.cra_driver_name); + kfree(t_alg); + } else +- list_add_tail(&t_alg->entry, &hash_list); ++ list_add_tail(&t_alg->entry, &priv->hash_list); + + /* register unkeyed version */ +- t_alg = caam_hash_alloc(&driver_hash[i], false); ++ t_alg = caam_hash_alloc(ctrldev, &driver_hash[i], false); + if (IS_ERR(t_alg)) { + err = PTR_ERR(t_alg); +- pr_warn("%s alg allocation failed\n", +- driver_hash[i].driver_name); ++ dev_warn(ctrldev, "%s alg allocation failed\n", ++ driver_hash[i].driver_name); + continue; + } + + err = crypto_register_ahash(&t_alg->ahash_alg); + if (err) { +- pr_warn("%s alg registration failed\n", ++ dev_warn(ctrldev, "%s alg registration failed\n", + t_alg->ahash_alg.halg.base.cra_driver_name); + kfree(t_alg); + } else +- list_add_tail(&t_alg->entry, &hash_list); ++ list_add_tail(&t_alg->entry, &priv->hash_list); + } + ++ of_node_put(dev_node); + return err; + } + +diff -Nur linux-4.1.3/drivers/crypto/caam/caam_keyblob.c linux-xbian-imx6/drivers/crypto/caam/caam_keyblob.c +--- linux-4.1.3/drivers/crypto/caam/caam_keyblob.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/caam_keyblob.c 2015-07-27 23:13:04.205975852 +0200 +@@ -0,0 +1,687 @@ ++/* ++ * Key blob driver based on CAAM hardware ++ * ++ * Copyright (C) 2015 Freescale Semiconductor, Inc. ++ */ ++ ++#include ++#include ++ ++#include "compat.h" ++#include "regs.h" ++#include "jr.h" ++#include "desc.h" ++#include "intern.h" ++#include "sm.h" ++#include "caam_keyblob.h" ++ ++#define INITIAL_DESCSZ 16 /* size of tmp buffer for descriptor const. */ ++ ++/** ++ * struct kb_device - the metadata of the caam key blob device node ++ * @dev: the actual misc device ++ */ ++struct kb_device { ++ struct miscdevice misc_dev; ++ struct device *jr_dev; ++}; ++ ++/* ++ * Pseudo-synchronous ring access functions for carrying out key ++ * encapsulation and decapsulation ++ */ ++ ++struct sm_key_job_result { ++ int error; ++ struct completion completion; ++}; ++ ++ ++static struct kb_device *kb_dev; ++ ++static struct kb_device *kb_device_create(void); ++static int kb_device_destroy(struct kb_device *kb_dev); ++static int kb_open(struct inode *inode, struct file *file); ++static int kb_release(struct inode *inode, struct file *file); ++static void sm_key_job_done(struct device *dev, u32 *desc, ++ u32 err, void *context); ++static int gen_mem_encap(struct device *jr_dev, void __user *secretbuf, ++ int keylen, void __user *kmodbuf, void __user *outbuf); ++static int gen_mem_decap(struct device *jr_dev, void __user *keyblobbuf, ++ int bloblen, void __user *kmodbuf, void __user *outbuf); ++static long kb_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ++static int caam_keyblob_probe(struct platform_device *pdev); ++static int caam_keyblob_remove(struct platform_device *pdev); ++ ++static int kb_open(struct inode *inode, struct file *file) ++{ ++ struct miscdevice *miscdev = file->private_data; ++ struct kb_device *dev = container_of(miscdev, struct kb_device, misc_dev); ++ struct device *jr_dev; ++ ++ if (!dev->jr_dev) { ++ jr_dev = caam_jr_alloc(); ++ if (IS_ERR(jr_dev)) { ++ pr_err("Job Ring Device allocation for transform failed\n"); ++ return -ENOMEM; ++ } ++ pr_info("Allocate a job ring device\n"); ++ dev->jr_dev = jr_dev; ++ } ++ else { ++ pr_err("Already created a job ring device"); ++ return -EPERM; ++ } ++ ++ return 0; ++} ++ ++static int kb_release(struct inode *inode, struct file *file) ++{ ++ struct miscdevice *miscdev = file->private_data; ++ struct kb_device *dev = container_of(miscdev, struct kb_device, misc_dev); ++ ++ if (dev && dev->jr_dev) { ++ caam_jr_free(dev->jr_dev); ++ pr_info("Free a job ring device\n"); ++ dev->jr_dev = NULL; ++ } ++ return 0; ++} ++ ++static void sm_key_job_done(struct device *dev, u32 *desc, ++ u32 err, void *context) ++{ ++ struct sm_key_job_result *res = context; ++ ++ res->error = err; /* save off the error for postprocessing */ ++ complete(&res->completion); /* mark us complete */ ++} ++ ++/* ++ * Construct a blob encapsulation job descriptor ++ * ++ * This function dynamically constructs a blob encapsulation job descriptor ++ * from the following arguments: ++ * ++ * - desc pointer to a pointer to the descriptor generated by this ++ * function. Caller will be responsible to kfree() this ++ * descriptor after execution. ++ * - keymod Physical pointer to a key modifier, which must reside in a ++ * contiguous piece of memory. Modifier will be assumed to be ++ * 8 bytes long for a blob of type SM_SECMEM, or 16 bytes long ++ * for a blob of type SM_GENMEM (see blobtype argument). ++ * - secretbuf Physical pointer to a secret, normally a black or red key, ++ * possibly residing within an accessible secure memory page, ++ * of the secret to be encapsulated to an output blob. ++ * - outbuf Physical pointer to the destination buffer to receive the ++ * encapsulated output. This buffer will need to be 48 bytes ++ * larger than the input because of the added encapsulation data. ++ * The generated descriptor will account for the increase in size, ++ * but the caller must also account for this increase in the ++ * buffer allocator. ++ * - secretsz Size of input secret, in bytes. This is limited to 65536 ++ * less the size of blob overhead, since the length embeds into ++ * DECO pointer in/out instructions. ++ * - keycolor Determines if the source data is covered (black key) or ++ * plaintext (red key). RED_KEY or BLACK_KEY are defined in ++ * for this purpose. ++ * - blobtype Determine if encapsulated blob should be a secure memory ++ * blob (SM_SECMEM), with partition data embedded with key ++ * material, or a general memory blob (SM_GENMEM). ++ * - auth If BLACK_KEY source is covered via AES-CCM, specify ++ * KEY_COVER_CCM, else uses AES-ECB (KEY_COVER_ECB). ++ * ++ * Upon completion, desc points to a buffer containing a CAAM job ++ * descriptor which encapsulates data into an externally-storable blob ++ * suitable for use across power cycles. ++ * ++ * This is an example of a black key encapsulation job into a general memory ++ * blob. Notice the 16-byte key modifier in the LOAD instruction. Also note ++ * the output 48 bytes longer than the input: ++ * ++ * [00] B0800008 jobhdr: stidx=0 len=8 ++ * [01] 14400010 ld: ccb2-key len=16 offs=0 ++ * [02] 08144891 ptr->@0x08144891 ++ * [03] F800003A seqoutptr: len=58 ++ * [04] 01000000 out_ptr->@0x01000000 ++ * [05] F000000A seqinptr: len=10 ++ * [06] 09745090 in_ptr->@0x09745090 ++ * [07] 870D0004 operation: encap blob reg=memory, black, format=normal ++ * ++ * This is an example of a red key encapsulation job for storing a red key ++ * into a secure memory blob. Note the 8 byte modifier on the 12 byte offset ++ * in the LOAD instruction; this accounts for blob permission storage: ++ * ++ * [00] B0800008 jobhdr: stidx=0 len=8 ++ * [01] 14400C08 ld: ccb2-key len=8 offs=12 ++ * [02] 087D0784 ptr->@0x087d0784 ++ * [03] F8000050 seqoutptr: len=80 ++ * [04] 09251BB2 out_ptr->@0x09251bb2 ++ * [05] F0000020 seqinptr: len=32 ++ * [06] 40000F31 in_ptr->@0x40000f31 ++ * [07] 870D0008 operation: encap blob reg=memory, red, sec_mem, ++ * format=normal ++ * ++ * Note: this function only generates 32-bit pointers at present, and should ++ * be refactored using a scheme that allows both 32 and 64 bit addressing ++ */ ++ ++static int blob_encap_jobdesc(u32 **desc, dma_addr_t keymod, ++ void *secretbuf, dma_addr_t outbuf, ++ u16 secretsz, u8 keycolor, u8 blobtype, u8 auth) ++{ ++ u32 *tdesc, tmpdesc[INITIAL_DESCSZ]; ++ u16 dsize, idx; ++ ++ memset(tmpdesc, 0, INITIAL_DESCSZ * sizeof(u32)); ++ idx = 1; ++ ++ /* ++ * Key modifier works differently for secure/general memory blobs ++ * This accounts for the permission/protection data encapsulated ++ * within the blob if a secure memory blob is requested ++ */ ++ if (blobtype == SM_SECMEM) ++ tmpdesc[idx++] = CMD_LOAD | LDST_CLASS_2_CCB | ++ LDST_SRCDST_BYTE_KEY | ++ ((12 << LDST_OFFSET_SHIFT) & LDST_OFFSET_MASK) ++ | (8 & LDST_LEN_MASK); ++ else /* is general memory blob */ ++ tmpdesc[idx++] = CMD_LOAD | LDST_CLASS_2_CCB | ++ LDST_SRCDST_BYTE_KEY | (16 & LDST_LEN_MASK); ++ ++ tmpdesc[idx++] = (u32)keymod; ++ ++ /* ++ * Encapsulation output must include space for blob key encryption ++ * key and MAC tag ++ */ ++ tmpdesc[idx++] = CMD_SEQ_OUT_PTR | (secretsz + BLOB_OVERHEAD); ++ tmpdesc[idx++] = (u32)outbuf; ++ ++ /* Input data, should be somewhere in secure memory */ ++ tmpdesc[idx++] = CMD_SEQ_IN_PTR | secretsz; ++ tmpdesc[idx++] = (u32)secretbuf; ++ ++ /* Set blob encap, then color */ ++ tmpdesc[idx] = CMD_OPERATION | OP_TYPE_ENCAP_PROTOCOL | OP_PCLID_BLOB; ++ ++ if (blobtype == SM_SECMEM) ++ tmpdesc[idx] |= OP_PCL_BLOB_PTXT_SECMEM; ++ ++ if (auth == KEY_COVER_CCM) ++ tmpdesc[idx] |= OP_PCL_BLOB_EKT; ++ ++ if (keycolor == BLACK_KEY) ++ tmpdesc[idx] |= OP_PCL_BLOB_BLACK; ++ ++ idx++; ++ tmpdesc[0] = CMD_DESC_HDR | HDR_ONE | (idx & HDR_DESCLEN_MASK); ++ dsize = idx * sizeof(u32); ++ ++ tdesc = kmalloc(dsize, GFP_KERNEL | GFP_DMA); ++ if (tdesc == NULL) ++ return 0; ++ ++ memcpy(tdesc, tmpdesc, dsize); ++ *desc = tdesc; ++ return dsize; ++} ++ ++/* ++ * Construct a blob decapsulation job descriptor ++ * ++ * This function dynamically constructs a blob decapsulation job descriptor ++ * from the following arguments: ++ * ++ * - desc pointer to a pointer to the descriptor generated by this ++ * function. Caller will be responsible to kfree() this ++ * descriptor after execution. ++ * - keymod Physical pointer to a key modifier, which must reside in a ++ * contiguous piece of memory. Modifier will be assumed to be ++ * 8 bytes long for a blob of type SM_SECMEM, or 16 bytes long ++ * for a blob of type SM_GENMEM (see blobtype argument). ++ * - blobbuf Physical pointer (into external memory) of the blob to ++ * be decapsulated. Blob must reside in a contiguous memory ++ * segment. ++ * - outbuf Physical pointer of the decapsulated output, possibly into ++ * a location within a secure memory page. Must be contiguous. ++ * - secretsz Size of encapsulated secret in bytes (not the size of the ++ * input blob). ++ * - keycolor Determines if decapsulated content is encrypted (BLACK_KEY) ++ * or left as plaintext (RED_KEY). ++ * - blobtype Determine if encapsulated blob should be a secure memory ++ * blob (SM_SECMEM), with partition data embedded with key ++ * material, or a general memory blob (SM_GENMEM). ++ * - auth If decapsulation path is specified by BLACK_KEY, then if ++ * AES-CCM is requested for key covering use KEY_COVER_CCM, else ++ * use AES-ECB (KEY_COVER_ECB). ++ * ++ * Upon completion, desc points to a buffer containing a CAAM job descriptor ++ * that decapsulates a key blob from external memory into a black (encrypted) ++ * key or red (plaintext) content. ++ * ++ * This is an example of a black key decapsulation job from a general memory ++ * blob. Notice the 16-byte key modifier in the LOAD instruction. ++ * ++ * [00] B0800008 jobhdr: stidx=0 len=8 ++ * [01] 14400010 ld: ccb2-key len=16 offs=0 ++ * [02] 08A63B7F ptr->@0x08a63b7f ++ * [03] F8000010 seqoutptr: len=16 ++ * [04] 01000000 out_ptr->@0x01000000 ++ * [05] F000003A seqinptr: len=58 ++ * [06] 01000010 in_ptr->@0x01000010 ++ * [07] 860D0004 operation: decap blob reg=memory, black, format=normal ++ * ++ * This is an example of a red key decapsulation job for restoring a red key ++ * from a secure memory blob. Note the 8 byte modifier on the 12 byte offset ++ * in the LOAD instruction: ++ * ++ * [00] B0800008 jobhdr: stidx=0 len=8 ++ * [01] 14400C08 ld: ccb2-key len=8 offs=12 ++ * [02] 01000000 ptr->@0x01000000 ++ * [03] F8000020 seqoutptr: len=32 ++ * [04] 400000E6 out_ptr->@0x400000e6 ++ * [05] F0000050 seqinptr: len=80 ++ * [06] 08F0C0EA in_ptr->@0x08f0c0ea ++ * [07] 860D0008 operation: decap blob reg=memory, red, sec_mem, ++ * format=normal ++ * ++ * Note: this function only generates 32-bit pointers at present, and should ++ * be refactored using a scheme that allows both 32 and 64 bit addressing ++ */ ++ ++static int blob_decap_jobdesc(u32 **desc, dma_addr_t keymod, dma_addr_t blobbuf, ++ u8 *outbuf, u16 secretsz, u8 keycolor, ++ u8 blobtype, u8 auth) ++{ ++ u32 *tdesc, tmpdesc[INITIAL_DESCSZ]; ++ u16 dsize, idx; ++ ++ memset(tmpdesc, 0, INITIAL_DESCSZ * sizeof(u32)); ++ idx = 1; ++ ++ /* Load key modifier */ ++ if (blobtype == SM_SECMEM) ++ tmpdesc[idx++] = CMD_LOAD | LDST_CLASS_2_CCB | ++ LDST_SRCDST_BYTE_KEY | ++ ((12 << LDST_OFFSET_SHIFT) & LDST_OFFSET_MASK) ++ | (8 & LDST_LEN_MASK); ++ else /* is general memory blob */ ++ tmpdesc[idx++] = CMD_LOAD | LDST_CLASS_2_CCB | ++ LDST_SRCDST_BYTE_KEY | (16 & LDST_LEN_MASK); ++ ++ tmpdesc[idx++] = (u32)keymod; ++ ++ /* Compensate BKEK + MAC tag over size of encapsulated secret */ ++ tmpdesc[idx++] = CMD_SEQ_IN_PTR | (secretsz + BLOB_OVERHEAD); ++ tmpdesc[idx++] = (u32)blobbuf; ++ tmpdesc[idx++] = CMD_SEQ_OUT_PTR | secretsz; ++ tmpdesc[idx++] = (u32)outbuf; ++ ++ /* Decapsulate from secure memory partition to black blob */ ++ tmpdesc[idx] = CMD_OPERATION | OP_TYPE_DECAP_PROTOCOL | OP_PCLID_BLOB; ++ ++ if (blobtype == SM_SECMEM) ++ tmpdesc[idx] |= OP_PCL_BLOB_PTXT_SECMEM; ++ ++ if (auth == KEY_COVER_CCM) ++ tmpdesc[idx] |= OP_PCL_BLOB_EKT; ++ ++ if (keycolor == BLACK_KEY) ++ tmpdesc[idx] |= OP_PCL_BLOB_BLACK; ++ ++ idx++; ++ tmpdesc[0] = CMD_DESC_HDR | HDR_ONE | (idx & HDR_DESCLEN_MASK); ++ dsize = idx * sizeof(u32); ++ ++ tdesc = kmalloc(dsize, GFP_KERNEL | GFP_DMA); ++ if (tdesc == NULL) ++ return 0; ++ ++ memcpy(tdesc, tmpdesc, dsize); ++ *desc = tdesc; ++ return dsize; ++} ++ ++ ++ ++static int gen_mem_encap(struct device *jr_dev, void __user *secretbuf, ++ int keylen, void __user *kmodbuf, void __user *outbuf) ++{ ++ int retval = 0; ++ u32 dsize; ++ u32 __iomem *encapdesc = NULL; ++ dma_addr_t secret_dma = 0, keymod_dma = 0, outbuf_dma = 0; ++ u8 __iomem *lsecret = NULL, *lkeymod = NULL, *loutbuf = NULL; ++ struct sm_key_job_result testres; ++ ++ /* Build/map/flush the scret */ ++ lsecret = kmalloc(keylen, GFP_KERNEL | GFP_DMA); ++ if (!lsecret) { ++ dev_err(jr_dev, "%s: can't alloc for key\n", __func__); ++ retval = -ENOMEM; ++ goto out; ++ } ++ if (copy_from_user(lsecret, secretbuf, keylen)) { ++ dev_err(jr_dev, "%s: can't Copy for key\n", __func__); ++ retval = -EFAULT; ++ goto out; ++ } ++ secret_dma = dma_map_single(jr_dev, lsecret, keylen, ++ DMA_TO_DEVICE); ++ ++ /* Build/map/flush the key modifier */ ++ lkeymod = kmalloc(GENMEM_KEYMOD_LEN, GFP_KERNEL | GFP_DMA); ++ if (!lkeymod) { ++ dev_err(jr_dev, "%s: can't alloc for keymod\n", __func__); ++ retval = -ENOMEM; ++ goto out; ++ } ++ if (copy_from_user(lkeymod, kmodbuf, GENMEM_KEYMOD_LEN)) { ++ dev_err(jr_dev, "%s: can't Copy for keymod\n", __func__); ++ retval = -EFAULT; ++ goto out; ++ } ++ keymod_dma = dma_map_single(jr_dev, lkeymod, GENMEM_KEYMOD_LEN, ++ DMA_TO_DEVICE); ++ ++ loutbuf = kmalloc(keylen + BLOB_OVERHEAD, GFP_KERNEL | GFP_DMA); ++ if (!lkeymod) { ++ dev_err(jr_dev, "%s: can't alloc for output\n", __func__); ++ retval = -ENOMEM; ++ goto out; ++ } ++ outbuf_dma = dma_map_single(jr_dev, loutbuf, keylen + BLOB_OVERHEAD, ++ DMA_FROM_DEVICE); ++ dsize = blob_encap_jobdesc(&encapdesc, keymod_dma, (void *)secret_dma, outbuf_dma, ++ keylen, RED_KEY, SM_GENMEM, KEY_COVER_ECB); ++ if (!dsize) { ++ dev_err(jr_dev, "can't alloc an encapsulation descriptor\n"); ++ retval = -ENOMEM; ++ goto out; ++ } ++ init_completion(&testres.completion); ++ ++ retval = caam_jr_enqueue(jr_dev, encapdesc, sm_key_job_done, ++ &testres); ++ if (!retval) { ++ wait_for_completion_interruptible(&testres.completion); ++ dev_info(jr_dev, "job ring return %d\n", testres.error); ++ if (!testres.error) { ++ dma_sync_single_for_cpu(jr_dev, outbuf_dma, keylen + BLOB_OVERHEAD, ++ DMA_FROM_DEVICE); ++ ++ if (copy_to_user(outbuf, loutbuf, keylen + BLOB_OVERHEAD)) { ++ retval = -EFAULT; ++ dev_err(jr_dev, "can't copy for output\n"); ++ goto out; ++ } ++ } ++ retval = testres.error; ++ } ++ ++out: ++ if (outbuf_dma) ++ dma_unmap_single(jr_dev, outbuf_dma, keylen + BLOB_OVERHEAD, ++ DMA_FROM_DEVICE); ++ if (keymod_dma) ++ dma_unmap_single(jr_dev, keymod_dma, GENMEM_KEYMOD_LEN, DMA_TO_DEVICE); ++ if (secret_dma) ++ dma_unmap_single(jr_dev, secret_dma, keylen, DMA_TO_DEVICE); ++ kfree(encapdesc); ++ kfree(lkeymod); ++ kfree(lsecret); ++ kfree(loutbuf); ++ ++ return retval; ++} ++ ++static int gen_mem_decap(struct device *jr_dev, void __user *keyblobbuf, ++ int bloblen, void __user *kmodbuf, void __user *outbuf) ++{ ++ int retval = 0; ++ int keylen = bloblen - BLOB_OVERHEAD; ++ u32 dsize; ++ dma_addr_t keyblob_dma = 0, keymod_dma = 0, outbuf_dma = 0; ++ u8 __iomem *lkeyblob = NULL, *lkeymod = NULL, *loutbuf = NULL; ++ struct sm_key_job_result testres; ++ u32 __iomem *decapdesc = NULL; ++ ++ /* Build/map/flush the scret */ ++ lkeyblob = kmalloc(bloblen, GFP_KERNEL | GFP_DMA); ++ if (!lkeyblob) { ++ dev_err(jr_dev, "%s: can't alloc for keylob\n", __func__); ++ retval = -ENOMEM; ++ goto out; ++ } ++ if (copy_from_user(lkeyblob, keyblobbuf, bloblen)) { ++ dev_err(jr_dev, "%s: can't Copy for keyblob\n", __func__); ++ retval = -EFAULT; ++ goto out; ++ } ++ keyblob_dma = dma_map_single(jr_dev, lkeyblob, bloblen, ++ DMA_TO_DEVICE); ++ ++ /* Build/map/flush the key modifier */ ++ lkeymod = kmalloc(GENMEM_KEYMOD_LEN, GFP_KERNEL | GFP_DMA); ++ if (!lkeymod) { ++ dev_err(jr_dev, "%s: can't alloc for keymod\n", __func__); ++ retval = -ENOMEM; ++ goto out; ++ } ++ if (copy_from_user(lkeymod, kmodbuf, GENMEM_KEYMOD_LEN)) { ++ dev_err(jr_dev, "%s: can't Copy for keymod\n", __func__); ++ retval = -EFAULT; ++ goto out; ++ } ++ keymod_dma = dma_map_single(jr_dev, lkeymod, GENMEM_KEYMOD_LEN, ++ DMA_TO_DEVICE); ++ ++ loutbuf = kmalloc(keylen, GFP_KERNEL | GFP_DMA); ++ if (!loutbuf) { ++ dev_err(jr_dev, "%s: can't alloc for outbuf\n", __func__); ++ retval = -ENOMEM; ++ goto out; ++ } ++ outbuf_dma = dma_map_single(jr_dev, loutbuf, keylen, ++ DMA_FROM_DEVICE); ++ ++ /* Build the encapsulation job descriptor */ ++ dsize = blob_decap_jobdesc(&decapdesc, keymod_dma, keyblob_dma, (u8 *)outbuf_dma, ++ keylen, RED_KEY, SM_GENMEM, KEY_COVER_ECB); ++ if (!dsize) { ++ dev_err(jr_dev, "can't alloc a decapsulation descriptor\n"); ++ retval = -ENOMEM; ++ goto out; ++ } ++ ++ init_completion(&testres.completion); ++ ++ retval = caam_jr_enqueue(jr_dev, decapdesc, sm_key_job_done, ++ &testres); ++ if (!retval) { ++ wait_for_completion_interruptible(&testres.completion); ++ dev_info(jr_dev, "job ring return %d\n", testres.error); ++ if (!testres.error) { ++ dma_sync_single_for_cpu(jr_dev, outbuf_dma, keylen, ++ DMA_FROM_DEVICE); ++ ++ if (copy_to_user(outbuf, loutbuf, keylen)) { ++ retval = -EFAULT; ++ goto out; ++ } ++ } ++ retval = testres.error; ++ } ++ ++out: ++ if (outbuf_dma) ++ dma_unmap_single(jr_dev, outbuf_dma, keylen, ++ DMA_FROM_DEVICE); ++ if (keymod_dma) ++ dma_unmap_single(jr_dev, keymod_dma, GENMEM_KEYMOD_LEN, ++ DMA_TO_DEVICE); ++ if (keyblob_dma) ++ dma_unmap_single(jr_dev, keyblob_dma, bloblen, ++ DMA_TO_DEVICE); ++ kfree(decapdesc); ++ kfree(lkeymod); ++ kfree(lkeyblob); ++ kfree(loutbuf); ++ ++ return retval; ++} ++ ++ ++static long kb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ int retval = 0; ++ struct caam_kb_data kb_data; ++ struct miscdevice *miscdev = file->private_data; ++ struct kb_device *dev = container_of(miscdev, struct kb_device, misc_dev); ++ ++ if (copy_from_user(&kb_data, (void *)arg, sizeof(kb_data))) { ++ retval = -EFAULT; ++ goto err; ++ } ++ ++ if (!kb_data.rawkey || !kb_data.keyblob || ++ (kb_data.rawkey_len + BLOB_OVERHEAD != kb_data.keyblob_len) || ++ (kb_data.keymod_len != GENMEM_KEYMOD_LEN)) { ++ retval = -EINVAL; ++ goto err; ++ } ++ ++ printk(KERN_INFO"%s:rawkey_len %d, keyblob_len %d\n", ++ __func__, kb_data.rawkey_len, kb_data.keyblob_len); ++ ++ switch (cmd) { ++ case CAAM_KB_ENCRYPT: ++ { ++ retval = gen_mem_encap(dev->jr_dev, kb_data.rawkey, kb_data.rawkey_len, ++ kb_data.keymod, kb_data.keyblob); ++ break; ++ } ++ case CAAM_KB_DECRYPT: ++ { ++ retval = gen_mem_decap(dev->jr_dev, kb_data.keyblob, kb_data.keyblob_len, ++ kb_data.keymod, kb_data.rawkey); ++ break; ++ } ++ default: ++ return -ENOTTY; ++ } ++ ++err: ++ return retval; ++} ++ ++static const struct file_operations kb_fops = { ++ .owner = THIS_MODULE, ++ .open = kb_open, ++ .release = kb_release, ++ .unlocked_ioctl = kb_ioctl, ++}; ++ ++static struct kb_device *kb_device_create(void) ++{ ++ struct kb_device *idev; ++ int ret; ++ ++ idev = kzalloc(sizeof(struct kb_device), GFP_KERNEL); ++ if (!idev) ++ return ERR_PTR(-ENOMEM); ++ ++ idev->misc_dev.minor = MISC_DYNAMIC_MINOR; ++ idev->misc_dev.name = "caam_kb"; ++ idev->misc_dev.fops = &kb_fops; ++ idev->misc_dev.parent = NULL; ++ ret = misc_register(&idev->misc_dev); ++ if (ret) { ++ pr_err("ion: failed to register misc device.\n"); ++ return ERR_PTR(ret); ++ } ++ ++ return idev; ++} ++ ++static int kb_device_destroy(struct kb_device *kb_dev) ++{ ++ if ((kb_dev) && (kb_dev->jr_dev)) { ++ caam_jr_free(kb_dev->jr_dev); ++ kb_dev->jr_dev = NULL; ++ } ++ ++ if (kb_dev) ++ misc_deregister(&kb_dev->misc_dev); ++ ++ return 0; ++} ++/* ++ * Probe key blob device ++ */ ++static int caam_keyblob_probe(struct platform_device *pdev) ++{ ++ int err; ++ ++ dev_dbg(&pdev->dev, "%s enter\n", __func__); ++ kb_dev = kb_device_create(); ++ if (IS_ERR_OR_NULL(kb_dev)) { ++ err = PTR_ERR(kb_dev); ++ goto err; ++ } ++ return 0; ++err: ++ return err; ++} ++ ++/* ++ * Remove key blob device ++ */ ++static int caam_keyblob_remove(struct platform_device *pdev) ++{ ++ kb_device_destroy(kb_dev); ++ return 0; ++} ++ ++static struct of_device_id caam_keyblob_match[] = { ++ { ++ .compatible = "fsl,sec-v4.0-keyblob", ++ }, ++ { ++ .compatible = "fsl,sec4.0-keyblob", ++ }, ++ {}, ++}; ++ ++MODULE_DEVICE_TABLE(of, caam_keyblob_match); ++ ++static struct platform_driver caam_keyblob_driver = { ++ .driver = { ++ .name = "caam_keyblob", ++ .owner = THIS_MODULE, ++ .of_match_table = caam_keyblob_match, ++ }, ++ .probe = caam_keyblob_probe, ++ .remove = caam_keyblob_remove, ++}; ++ ++static int __init keyblob_driver_init(void) ++{ ++ return platform_driver_register(&caam_keyblob_driver); ++} ++ ++static void __exit keyblob_driver_exit(void) ++{ ++ platform_driver_unregister(&caam_keyblob_driver); ++} ++ ++module_init(keyblob_driver_init); ++module_exit(keyblob_driver_exit); ++ ++ ++MODULE_LICENSE("Dual BSD/GPL"); ++MODULE_DESCRIPTION("FSL CAAM Secure Memory / Keystore"); ++MODULE_AUTHOR("Freescale Semiconductor - NMSG/MAD"); +diff -Nur linux-4.1.3/drivers/crypto/caam/caam_keyblob.h linux-xbian-imx6/drivers/crypto/caam/caam_keyblob.h +--- linux-4.1.3/drivers/crypto/caam/caam_keyblob.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/caam_keyblob.h 2015-07-27 23:13:04.205975852 +0200 +@@ -0,0 +1,45 @@ ++/* ++ * CAAM public-level include definitions for the key blob ++ * ++ * Copyright (C) 2015 Freescale Semiconductor, Inc. ++ */ ++ ++#ifndef CAAM_KEYBLOB_H ++#define CAAM_KEYBLOB_H ++ ++ ++#include ++#include ++ ++struct caam_kb_data { ++ char *rawkey; ++ size_t rawkey_len; ++ char *keyblob; ++ size_t keyblob_len; ++ char *keymod; ++ size_t keymod_len; ++}; ++ ++ ++#define CAAM_KB_MAGIC 'I' ++ ++/** ++ * DOC: CAAM_KB_ENCRYPT - generate a key blob from raw key ++ * ++ * Takes an caam_kb_data struct and returns it with the key blob ++ */ ++#define CAAM_KB_ENCRYPT _IOWR(CAAM_KB_MAGIC, 0, \ ++ struct caam_kb_data) ++ ++/** ++ * DOC: CAAM_KB_DECRYPT - get keys from a key blob ++ * ++ * Takes an caam_kb_data struct and returns it with the raw key. ++ */ ++#define CAAM_KB_DECRYPT _IOWR(CAAM_KB_MAGIC, 1, struct caam_kb_data) ++ ++#ifndef GENMEM_KEYMOD_LEN ++#define GENMEM_KEYMOD_LEN 16 ++#endif ++ ++#endif /* CAAM_KEYBLOB_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/caamrng.c linux-xbian-imx6/drivers/crypto/caam/caamrng.c +--- linux-4.1.3/drivers/crypto/caam/caamrng.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/caamrng.c 2015-07-27 23:13:04.209961631 +0200 +@@ -1,7 +1,7 @@ + /* + * caam - Freescale FSL CAAM support for hw_random + * +- * Copyright 2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2011-2013 Freescale Semiconductor, Inc. + * + * Based on caamalg.c crypto API driver. + * +@@ -80,9 +80,12 @@ + + static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) + { +- if (bd->addr) ++ if (bd->addr) { ++ dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, ++ DMA_FROM_DEVICE); + dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE, + DMA_FROM_DEVICE); ++ } + } + + static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) +@@ -103,11 +106,18 @@ + bd = (struct buf_data *)((char *)desc - + offsetof(struct buf_data, hw_desc)); + +- if (err) +- caam_jr_strstatus(jrdev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + atomic_set(&bd->empty, BUF_NOT_EMPTY); + complete(&bd->filled); ++ ++ /* Buffer refilled, invalidate cache */ ++ dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE); ++ + #ifdef DEBUG + print_hex_dump(KERN_ERR, "rng refreshed buf@: ", + DUMP_PREFIX_ADDRESS, 16, 4, bd->buf, RN_BUF_SIZE, 1); +@@ -185,7 +195,7 @@ + max - copied_idx, false); + } + +-static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx) ++static inline void rng_create_sh_desc(struct caam_rng_ctx *ctx) + { + struct device *jrdev = ctx->jrdev; + u32 *desc = ctx->sh_desc; +@@ -203,18 +213,16 @@ + + ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), + DMA_TO_DEVICE); +- if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) { +- dev_err(jrdev, "unable to map shared descriptor\n"); +- return -ENOMEM; +- } ++ dma_sync_single_for_device(jrdev, ctx->sh_desc_dma, desc_bytes(desc), ++ DMA_TO_DEVICE); ++ + #ifdef DEBUG + print_hex_dump(KERN_ERR, "rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4, + desc, desc_bytes(desc), 1); + #endif +- return 0; + } + +-static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) ++static inline void rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) + { + struct device *jrdev = ctx->jrdev; + struct buf_data *bd = &ctx->bufs[buf_id]; +@@ -225,17 +233,12 @@ + HDR_REVERSE); + + bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE); +- if (dma_mapping_error(jrdev, bd->addr)) { +- dev_err(jrdev, "unable to map dst\n"); +- return -ENOMEM; +- } + + append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0); + #ifdef DEBUG + print_hex_dump(KERN_ERR, "rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4, + desc, desc_bytes(desc), 1); + #endif +- return 0; + } + + static void caam_cleanup(struct hwrng *rng) +@@ -252,44 +255,67 @@ + rng_unmap_ctx(rng_ctx); + } + +-static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) ++#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_TEST ++static inline void test_len(struct hwrng *rng, size_t len, bool wait) + { +- struct buf_data *bd = &ctx->bufs[buf_id]; +- int err; ++ u8 *buf; ++ int real_len; + +- err = rng_create_job_desc(ctx, buf_id); +- if (err) +- return err; ++ buf = kzalloc(sizeof(u8) * len, GFP_KERNEL); ++ real_len = rng->read(rng, buf, len, wait); ++ if (real_len == 0 && wait) ++ pr_err("WAITING FAILED\n"); ++ pr_info("wanted %d bytes, got %d\n", len, real_len); ++ print_hex_dump(KERN_INFO, "random bytes@: ", DUMP_PREFIX_ADDRESS, ++ 16, 4, buf, real_len, 1); ++ kfree(buf); ++} + +- atomic_set(&bd->empty, BUF_EMPTY); +- submit_job(ctx, buf_id == ctx->current_buf); +- wait_for_completion(&bd->filled); ++static inline void test_mode_once(struct hwrng *rng, bool wait) ++{ ++#define TEST_CHUNK (RN_BUF_SIZE / 4) + +- return 0; ++ test_len(rng, TEST_CHUNK, wait); ++ test_len(rng, RN_BUF_SIZE * 2, wait); ++ test_len(rng, RN_BUF_SIZE * 2 - TEST_CHUNK, wait); + } + +-static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) ++static inline void test_mode(struct hwrng *rng, bool wait) + { +- int err; +- +- ctx->jrdev = jrdev; ++#define TEST_PASS 1 ++ int i; + +- err = rng_create_sh_desc(ctx); +- if (err) +- return err; ++ for (i = 0; i < TEST_PASS; i++) ++ test_mode_once(rng, wait); ++} + +- ctx->current_buf = 0; +- ctx->cur_buf_idx = 0; ++static void self_test(struct hwrng *rng) ++{ ++ pr_info("testing without waiting\n"); ++ test_mode(rng, false); ++ pr_info("testing with waiting\n"); ++ test_mode(rng, true); ++} ++#endif + +- err = caam_init_buf(ctx, 0); +- if (err) +- return err; ++static void caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) ++{ ++ struct buf_data *bd = &ctx->bufs[buf_id]; + +- err = caam_init_buf(ctx, 1); +- if (err) +- return err; ++ rng_create_job_desc(ctx, buf_id); ++ atomic_set(&bd->empty, BUF_EMPTY); ++ submit_job(ctx, buf_id == ctx->current_buf); ++ wait_for_completion(&bd->filled); ++} + +- return 0; ++static void caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) ++{ ++ ctx->jrdev = jrdev; ++ rng_create_sh_desc(ctx); ++ ctx->current_buf = 0; ++ ctx->cur_buf_idx = 0; ++ caam_init_buf(ctx, 0); ++ caam_init_buf(ctx, 1); + } + + static struct hwrng caam_rng = { +@@ -300,19 +326,15 @@ + + static void __exit caam_rng_exit(void) + { +- caam_jr_free(rng_ctx->jrdev); + hwrng_unregister(&caam_rng); +- kfree(rng_ctx); + } + + static int __init caam_rng_init(void) + { +- struct device *dev; + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; +- void *priv; +- int err; ++ struct caam_drv_private *priv; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { +@@ -322,35 +344,26 @@ + } + + pdev = of_find_device_by_node(dev_node); +- if (!pdev) { +- of_node_put(dev_node); ++ if (!pdev) + return -ENODEV; +- } + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + +- /* +- * If priv is NULL, it's probably because the caam driver wasn't +- * properly initialized (e.g. RNG4 init failed). Thus, bail out here. +- */ +- if (!priv) ++ /* Check RNG present in hardware before registration */ ++ if (!(rd_reg64(&priv->ctrl->perfmon.cha_num) & CHA_ID_RNG_MASK)) + return -ENODEV; + +- dev = caam_jr_alloc(); +- if (IS_ERR(dev)) { +- pr_err("Job Ring Device allocation for transform failed\n"); +- return PTR_ERR(dev); +- } +- rng_ctx = kmalloc(sizeof(struct caam_rng_ctx), GFP_DMA); +- if (!rng_ctx) +- return -ENOMEM; +- err = caam_init_rng(rng_ctx, dev); +- if (err) +- return err; ++ rng_ctx = kmalloc(sizeof(struct caam_rng_ctx), GFP_KERNEL | GFP_DMA); ++ ++ caam_init_rng(rng_ctx, priv->jrdev[0]); ++ ++#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_TEST ++ self_test(&caam_rng); ++#endif + +- dev_info(dev, "registering rng-caam\n"); ++ dev_info(priv->jrdev[0], "registering rng-caam\n"); + return hwrng_register(&caam_rng); + } + +diff -Nur linux-4.1.3/drivers/crypto/caam/compat.h linux-xbian-imx6/drivers/crypto/caam/compat.h +--- linux-4.1.3/drivers/crypto/caam/compat.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/compat.h 2015-07-27 23:13:04.209961631 +0200 +@@ -14,6 +14,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -23,12 +25,15 @@ + #include + #include + #include ++#include ++ ++#ifdef CONFIG_ARM /* needs the clock control subsystem */ ++#include ++#endif + #include + + #include +-#include + #include +-#include + #include + #include + #include +diff -Nur linux-4.1.3/drivers/crypto/caam/ctrl.c linux-xbian-imx6/drivers/crypto/caam/ctrl.c +--- linux-4.1.3/drivers/crypto/caam/ctrl.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/ctrl.c 2015-07-27 23:13:04.209961631 +0200 +@@ -1,405 +1,260 @@ +-/* * CAAM control-plane driver backend ++/* ++ * CAAM control-plane driver backend + * Controller-level driver, kernel property detection, initialization + * +- * Copyright 2008-2012 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + */ + +-#include +-#include +-#include +- + #include "compat.h" + #include "regs.h" + #include "intern.h" + #include "jr.h" + #include "desc_constr.h" + #include "error.h" ++#include "ctrl.h" ++#include "sm.h" ++#include + +-/* +- * Descriptor to instantiate RNG State Handle 0 in normal mode and +- * load the JDKEK, TDKEK and TDSK registers +- */ +-static void build_instantiation_desc(u32 *desc, int handle, int do_sk) +-{ +- u32 *jump_cmd, op_flags; +- +- init_job_desc(desc, 0); +- +- op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | +- (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT; +- +- /* INIT RNG in non-test mode */ +- append_operation(desc, op_flags); +- +- if (!handle && do_sk) { +- /* +- * For SH0, Secure Keys must be generated as well +- */ ++/* Used to capture the array of job rings */ ++struct device **caam_jr_dev; + +- /* wait for done */ +- jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1); +- set_jump_tgt_here(desc, jump_cmd); ++static int caam_remove(struct platform_device *pdev) ++{ ++ struct device *ctrldev; ++ struct caam_drv_private *ctrlpriv; ++ struct caam_drv_private_jr *jrpriv; ++ struct caam_full __iomem *topregs; ++ int ring, ret = 0; + +- /* +- * load 1 to clear written reg: +- * resets the done interrrupt and returns the RNG to idle. +- */ +- append_load_imm_u32(desc, 1, LDST_SRCDST_WORD_CLRW); ++ ctrldev = &pdev->dev; ++ ctrlpriv = dev_get_drvdata(ctrldev); ++ topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; + +- /* Initialize State Handle */ +- append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | +- OP_ALG_AAI_RNG4_SK); ++ /* shut down JobRs */ ++ for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { ++ ret |= caam_jr_shutdown(ctrlpriv->jrdev[ring]); ++ jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]); ++ irq_dispose_mapping(jrpriv->irq); + } + +- append_jump(desc, JUMP_CLASS_CLASS1 | JUMP_TYPE_HALT); +-} ++ /* Shut down debug views */ ++#ifdef CONFIG_DEBUG_FS ++ debugfs_remove_recursive(ctrlpriv->dfs_root); ++#endif + +-/* Descriptor for deinstantiation of State Handle 0 of the RNG block. */ +-static void build_deinstantiation_desc(u32 *desc, int handle) +-{ +- init_job_desc(desc, 0); ++ /* Unmap controller region */ ++ iounmap(&topregs->ctrl); + +- /* Uninstantiate State Handle 0 */ +- append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | +- (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INITFINAL); ++#ifdef CONFIG_ARM ++ /* shut clocks off before finalizing shutdown */ ++ clk_disable(ctrlpriv->caam_ipg); ++ clk_disable(ctrlpriv->caam_mem); ++ clk_disable(ctrlpriv->caam_aclk); ++#endif ++ ++ kfree(ctrlpriv->jrdev); ++ kfree(ctrlpriv); + +- append_jump(desc, JUMP_CLASS_CLASS1 | JUMP_TYPE_HALT); ++ return ret; + } + + /* +- * run_descriptor_deco0 - runs a descriptor on DECO0, under direct control of +- * the software (no JR/QI used). +- * @ctrldev - pointer to device +- * @status - descriptor status, after being run +- * +- * Return: - 0 if no error occurred +- * - -ENODEV if the DECO couldn't be acquired +- * - -EAGAIN if an error occurred while executing the descriptor ++ * Descriptor to instantiate RNG State Handle 0 in normal mode and ++ * load the JDKEK, TDKEK and TDSK registers + */ +-static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, +- u32 *status) ++static void build_instantiation_desc(u32 *desc) + { +- struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); +- struct caam_ctrl __iomem *ctrl = ctrlpriv->ctrl; +- struct caam_deco __iomem *deco = ctrlpriv->deco; +- unsigned int timeout = 100000; +- u32 deco_dbg_reg, flags; +- int i; +- ++ u32 *jump_cmd; + +- if (ctrlpriv->virt_en == 1) { +- setbits32(&ctrl->deco_rsr, DECORSR_JR0); +- +- while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) && +- --timeout) +- cpu_relax(); +- +- timeout = 100000; +- } +- +- setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); +- +- while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) && +- --timeout) +- cpu_relax(); ++ init_job_desc(desc, 0); + +- if (!timeout) { +- dev_err(ctrldev, "failed to acquire DECO 0\n"); +- clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); +- return -ENODEV; +- } ++ /* INIT RNG in non-test mode */ ++ append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | ++ OP_ALG_AS_INIT); + +- for (i = 0; i < desc_len(desc); i++) +- wr_reg32(&deco->descbuf[i], *(desc + i)); ++ /* wait for done */ ++ jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1); ++ set_jump_tgt_here(desc, jump_cmd); + +- flags = DECO_JQCR_WHL; + /* +- * If the descriptor length is longer than 4 words, then the +- * FOUR bit in JRCTRL register must be set. ++ * load 1 to clear written reg: ++ * resets the done interrupt and returns the RNG to idle. + */ +- if (desc_len(desc) >= 4) +- flags |= DECO_JQCR_FOUR; ++ append_load_imm_u32(desc, 1, LDST_SRCDST_WORD_CLRW); + +- /* Instruct the DECO to execute it */ +- wr_reg32(&deco->jr_ctl_hi, flags); +- +- timeout = 10000000; +- do { +- deco_dbg_reg = rd_reg32(&deco->desc_dbg); +- /* +- * If an error occured in the descriptor, then +- * the DECO status field will be set to 0x0D +- */ +- if ((deco_dbg_reg & DESC_DBG_DECO_STAT_MASK) == +- DESC_DBG_DECO_STAT_HOST_ERR) +- break; +- cpu_relax(); +- } while ((deco_dbg_reg & DESC_DBG_DECO_STAT_VALID) && --timeout); +- +- *status = rd_reg32(&deco->op_status_hi) & +- DECO_OP_STATUS_HI_ERR_MASK; +- +- if (ctrlpriv->virt_en == 1) +- clrbits32(&ctrl->deco_rsr, DECORSR_JR0); +- +- /* Mark the DECO as free */ +- clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); +- +- if (!timeout) +- return -EAGAIN; +- +- return 0; + } + +-/* +- * instantiate_rng - builds and executes a descriptor on DECO0, +- * which initializes the RNG block. +- * @ctrldev - pointer to device +- * @state_handle_mask - bitmask containing the instantiation status +- * for the RNG4 state handles which exist in +- * the RNG4 block: 1 if it's been instantiated +- * by an external entry, 0 otherwise. +- * @gen_sk - generate data to be loaded into the JDKEK, TDKEK and TDSK; +- * Caution: this can be done only once; if the keys need to be +- * regenerated, a POR is required +- * +- * Return: - 0 if no error occurred +- * - -ENOMEM if there isn't enough memory to allocate the descriptor +- * - -ENODEV if DECO0 couldn't be acquired +- * - -EAGAIN if an error occurred when executing the descriptor +- * f.i. there was a RNG hardware error due to not "good enough" +- * entropy being aquired. +- */ +-static int instantiate_rng(struct device *ctrldev, int state_handle_mask, +- int gen_sk) ++static void generate_secure_keys_desc(u32 *desc) + { +- struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); +- struct caam_ctrl __iomem *ctrl; +- u32 *desc, status, rdsta_val; +- int ret = 0, sh_idx; +- +- ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; +- desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); +- if (!desc) +- return -ENOMEM; ++ /* generate secure keys (non-test) */ ++ append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | ++ OP_ALG_RNG4_SK); ++} + +- for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) { +- /* +- * If the corresponding bit is set, this state handle +- * was initialized by somebody else, so it's left alone. +- */ +- if ((1 << sh_idx) & state_handle_mask) +- continue; ++struct instantiate_result { ++ struct completion completion; ++ int err; ++}; + +- /* Create the descriptor for instantiating RNG State Handle */ +- build_instantiation_desc(desc, sh_idx, gen_sk); ++static void rng4_init_done(struct device *dev, u32 *desc, u32 err, ++ void *context) ++{ ++ struct instantiate_result *instantiation = context; + +- /* Try to run it through DECO0 */ +- ret = run_descriptor_deco0(ctrldev, desc, &status); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; + +- /* +- * If ret is not 0, or descriptor status is not 0, then +- * something went wrong. No need to try the next state +- * handle (if available), bail out here. +- * Also, if for some reason, the State Handle didn't get +- * instantiated although the descriptor has finished +- * without any error (HW optimizations for later +- * CAAM eras), then try again. +- */ +- rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; +- if (status || !(rdsta_val & (1 << sh_idx))) +- ret = -EAGAIN; +- if (ret) +- break; +- dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); +- /* Clear the contents before recreating the descriptor */ +- memset(desc, 0x00, CAAM_CMD_SZ * 7); ++ dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); + } + +- kfree(desc); +- +- return ret; ++ instantiation->err = err; ++ complete(&instantiation->completion); + } + +-/* +- * deinstantiate_rng - builds and executes a descriptor on DECO0, +- * which deinitializes the RNG block. +- * @ctrldev - pointer to device +- * @state_handle_mask - bitmask containing the instantiation status +- * for the RNG4 state handles which exist in +- * the RNG4 block: 1 if it's been instantiated +- * +- * Return: - 0 if no error occurred +- * - -ENOMEM if there isn't enough memory to allocate the descriptor +- * - -ENODEV if DECO0 couldn't be acquired +- * - -EAGAIN if an error occurred when executing the descriptor +- */ +-static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) ++static int instantiate_rng(struct device *jrdev, u32 keys_generated) + { +- u32 *desc, status; +- int sh_idx, ret = 0; ++ struct instantiate_result instantiation; + +- desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL); +- if (!desc) ++ dma_addr_t desc_dma; ++ u32 *desc; ++ int ret; ++ ++ desc = kmalloc(CAAM_CMD_SZ * 6, GFP_KERNEL | GFP_DMA); ++ if (!desc) { ++ dev_err(jrdev, "cannot allocate RNG init descriptor memory\n"); + return -ENOMEM; +- +- for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) { +- /* +- * If the corresponding bit is set, then it means the state +- * handle was initialized by us, and thus it needs to be +- * deintialized as well +- */ +- if ((1 << sh_idx) & state_handle_mask) { +- /* +- * Create the descriptor for deinstantating this state +- * handle +- */ +- build_deinstantiation_desc(desc, sh_idx); +- +- /* Try to run it through DECO0 */ +- ret = run_descriptor_deco0(ctrldev, desc, &status); +- +- if (ret || status) { +- dev_err(ctrldev, +- "Failed to deinstantiate RNG4 SH%d\n", +- sh_idx); +- break; +- } +- dev_info(ctrldev, "Deinstantiated RNG4 SH%d\n", sh_idx); +- } + } + +- kfree(desc); +- +- return ret; +-} ++ build_instantiation_desc(desc); + +-static int caam_remove(struct platform_device *pdev) +-{ +- struct device *ctrldev; +- struct caam_drv_private *ctrlpriv; +- struct caam_ctrl __iomem *ctrl; +- int ring, ret = 0; +- +- ctrldev = &pdev->dev; +- ctrlpriv = dev_get_drvdata(ctrldev); +- ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; +- +- /* Remove platform devices for JobRs */ +- for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { +- if (ctrlpriv->jrpdev[ring]) +- of_device_unregister(ctrlpriv->jrpdev[ring]); ++ /* If keys have not been generated, add op code to generate key. */ ++ if (!keys_generated) ++ generate_secure_keys_desc(desc); ++ ++ desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); ++ dma_sync_single_for_device(jrdev, desc_dma, desc_bytes(desc), ++ DMA_TO_DEVICE); ++ init_completion(&instantiation.completion); ++ ret = caam_jr_enqueue(jrdev, desc, rng4_init_done, &instantiation); ++ if (!ret) { ++ wait_for_completion_interruptible(&instantiation.completion); ++ ret = instantiation.err; ++ if (ret) ++ dev_err(jrdev, "unable to instantiate RNG\n"); + } + +- /* De-initialize RNG state handles initialized by this driver. */ +- if (ctrlpriv->rng4_sh_init) +- deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init); +- +- /* Shut down debug views */ +-#ifdef CONFIG_DEBUG_FS +- debugfs_remove_recursive(ctrlpriv->dfs_root); +-#endif ++ dma_unmap_single(jrdev, desc_dma, desc_bytes(desc), DMA_TO_DEVICE); + +- /* Unmap controller region */ +- iounmap(&ctrl); ++ kfree(desc); + + return ret; + } + + /* +- * kick_trng - sets the various parameters for enabling the initialization +- * of the RNG4 block in CAAM +- * @pdev - pointer to the platform device +- * @ent_delay - Defines the length (in system clocks) of each entropy sample. ++ * By default, the TRNG runs for 200 clocks per sample; ++ * 1600 clocks per sample generates better entropy. + */ +-static void kick_trng(struct platform_device *pdev, int ent_delay) ++static void kick_trng(struct platform_device *pdev) + { + struct device *ctrldev = &pdev->dev; + struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); +- struct caam_ctrl __iomem *ctrl; ++ struct caam_full __iomem *topregs; + struct rng4tst __iomem *r4tst; + u32 val; + +- ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; +- r4tst = &ctrl->r4tst[0]; ++ topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; ++ r4tst = &topregs->ctrl.r4tst[0]; + ++ val = rd_reg32(&r4tst->rtmctl); + /* put RNG4 into program mode */ + setbits32(&r4tst->rtmctl, RTMCTL_PRGM); +- +- /* +- * Performance-wise, it does not make sense to +- * set the delay to a value that is lower +- * than the last one that worked (i.e. the state handles +- * were instantiated properly. Thus, instead of wasting +- * time trying to set the values controlling the sample +- * frequency, the function simply returns. +- */ +- val = (rd_reg32(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK) +- >> RTSDCTL_ENT_DLY_SHIFT; +- if (ent_delay <= val) { +- /* put RNG4 into run mode */ +- clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); +- return; +- } +- ++ /* Set clocks per sample to the default, and divider to zero */ + val = rd_reg32(&r4tst->rtsdctl); + val = (val & ~RTSDCTL_ENT_DLY_MASK) | +- (ent_delay << RTSDCTL_ENT_DLY_SHIFT); ++ (RNG4_ENT_CLOCKS_SAMPLE << RTSDCTL_ENT_DLY_SHIFT); + wr_reg32(&r4tst->rtsdctl, val); +- /* min. freq. count, equal to 1/4 of the entropy sample length */ +- wr_reg32(&r4tst->rtfrqmin, ent_delay >> 2); +- /* disable maximum frequency count */ +- wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE); +- /* read the control register */ +- val = rd_reg32(&r4tst->rtmctl); +- /* +- * select raw sampling in both entropy shifter +- * and statistical checker +- */ +- setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC); ++ /* min. freq. count */ ++ wr_reg32(&r4tst->rtfrqmin, RNG4_ENT_CLOCKS_SAMPLE / 4); ++ /* max. freq. count */ ++ wr_reg32(&r4tst->rtfrqmax, RNG4_ENT_CLOCKS_SAMPLE * 8); + /* put RNG4 into run mode */ +- clrbits32(&val, RTMCTL_PRGM); +- /* write back the control register */ +- wr_reg32(&r4tst->rtmctl, val); ++ clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); + } + + /** + * caam_get_era() - Return the ERA of the SEC on SoC, based +- * on "sec-era" propery in the DTS. This property is updated by u-boot. ++ * on the SEC_VID register. ++ * Returns the ERA number (1..4) or -ENOTSUPP if the ERA is unknown. ++ * @caam_id - the value of the SEC_VID register + **/ +-int caam_get_era(void) ++int caam_get_era(u64 caam_id) + { +- struct device_node *caam_node; +- for_each_compatible_node(caam_node, NULL, "fsl,sec-v4.0") { +- const uint32_t *prop = (uint32_t *)of_get_property(caam_node, +- "fsl,sec-era", +- NULL); +- return prop ? *prop : -ENOTSUPP; +- } ++ struct sec_vid *sec_vid = (struct sec_vid *)&caam_id; ++ static const struct { ++ u16 ip_id; ++ u8 maj_rev; ++ u8 era; ++ } caam_eras[] = { ++ {0x0A10, 1, 1}, ++ {0x0A10, 2, 2}, ++ {0x0A12, 1, 3}, ++ {0x0A14, 1, 3}, ++ {0x0A14, 2, 4}, ++ {0x0A16, 1, 4}, ++ {0x0A11, 1, 4}, ++ {0x0A10, 3, 4}, ++ {0x0A18, 1, 4}, ++ {0x0A11, 2, 5}, ++ {0x0A12, 2, 5}, ++ {0x0A13, 1, 5}, ++ {0x0A1C, 1, 5}, ++ {0x0A12, 4, 6}, ++ {0x0A13, 2, 6}, ++ {0x0A16, 2, 6}, ++ {0x0A18, 2, 6}, ++ {0x0A1A, 1, 6}, ++ {0x0A1C, 2, 6}, ++ {0x0A17, 1, 6} ++ }; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(caam_eras); i++) ++ if (caam_eras[i].ip_id == sec_vid->ip_id && ++ caam_eras[i].maj_rev == sec_vid->maj_rev) ++ return caam_eras[i].era; + + return -ENOTSUPP; + } + EXPORT_SYMBOL(caam_get_era); + ++/* ++ * Return a job ring device. This is available so outside ++ * entities can gain direct access to the job ring. For now, ++ * this function returns the first job ring (at index 0). ++ */ ++struct device *caam_get_jrdev(void) ++{ ++ return caam_jr_dev[0]; ++} ++EXPORT_SYMBOL(caam_get_jrdev); ++ ++ + /* Probe routine for CAAM top (controller) level */ + static int caam_probe(struct platform_device *pdev) + { +- int ret, ring, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; ++ int ret, ring, rspec; + u64 caam_id; + struct device *dev; + struct device_node *nprop, *np; + struct caam_ctrl __iomem *ctrl; ++ struct caam_full __iomem *topregs; ++ struct snvs_full __iomem *snvsregs; + struct caam_drv_private *ctrlpriv; + #ifdef CONFIG_DEBUG_FS + struct caam_perfmon *perfmon; + #endif +- u32 scfgr, comp_params; +- u32 cha_vid_ls; +- int pg_size; +- int BLOCK_OFFSET = 0; + +- ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(struct caam_drv_private), +- GFP_KERNEL); ++ ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL); + if (!ctrlpriv) + return -ENOMEM; + +@@ -415,71 +270,128 @@ + dev_err(dev, "caam: of_iomap() failed\n"); + return -ENOMEM; + } +- /* Finding the page size for using the CTPR_MS register */ +- comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms); +- pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT; +- +- /* Allocating the BLOCK_OFFSET based on the supported page size on +- * the platform +- */ +- if (pg_size == 0) +- BLOCK_OFFSET = PG_SIZE_4K; +- else +- BLOCK_OFFSET = PG_SIZE_64K; +- + ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl; +- ctrlpriv->assure = (struct caam_assurance __force *) +- ((uint8_t *)ctrl + +- BLOCK_OFFSET * ASSURE_BLOCK_NUMBER +- ); +- ctrlpriv->deco = (struct caam_deco __force *) +- ((uint8_t *)ctrl + +- BLOCK_OFFSET * DECO_BLOCK_NUMBER +- ); ++ ++ /* topregs used to derive pointers to CAAM sub-blocks only */ ++ topregs = (struct caam_full __iomem *)ctrl; + + /* Get the IRQ of the controller (for security violations only) */ +- ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0); ++ ctrlpriv->secvio_irq = of_irq_to_resource(nprop, 0, NULL); ++ ++ /* Get SNVS register Page */ ++ np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-caam-snvs"); ++ ++ if (!np) ++ return -ENODEV; ++ ++ snvsregs = of_iomap(np, 0); ++ ctrlpriv->snvs = snvsregs; ++ /* Get CAAM-SM node and of_iomap() and save */ ++ np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-caam-sm"); ++ ++ if (!np) ++ return -ENODEV; ++ ++ ctrlpriv->sm_base = of_iomap(np, 0); ++ ctrlpriv->sm_size = 0x3fff; ++ ++/* ++ * ARM targets tend to have clock control subsystems that can ++ * enable/disable clocking to our device. Turn clocking on to proceed ++ */ ++#ifdef CONFIG_ARM ++ ctrlpriv->caam_ipg = devm_clk_get(&ctrlpriv->pdev->dev, "caam_ipg"); ++ if (IS_ERR(ctrlpriv->caam_ipg)) { ++ ret = PTR_ERR(ctrlpriv->caam_ipg); ++ dev_err(&ctrlpriv->pdev->dev, ++ "can't identify CAAM ipg clk: %d\n", ret); ++ return -ENODEV; ++ } ++ ctrlpriv->caam_mem = devm_clk_get(&ctrlpriv->pdev->dev, "caam_mem"); ++ if (IS_ERR(ctrlpriv->caam_mem)) { ++ ret = PTR_ERR(ctrlpriv->caam_mem); ++ dev_err(&ctrlpriv->pdev->dev, ++ "can't identify CAAM secure mem clk: %d\n", ret); ++ return -ENODEV; ++ } ++ ctrlpriv->caam_aclk = devm_clk_get(&ctrlpriv->pdev->dev, "caam_aclk"); ++ if (IS_ERR(ctrlpriv->caam_aclk)) { ++ ret = PTR_ERR(ctrlpriv->caam_aclk); ++ dev_err(&ctrlpriv->pdev->dev, ++ "can't identify CAAM aclk clk: %d\n", ret); ++ return -ENODEV; ++ } ++ ++ ret = clk_prepare(ctrlpriv->caam_ipg); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "can't prepare CAAM ipg clock: %d\n", ret); ++ return -ENODEV; ++ } ++ ret = clk_prepare(ctrlpriv->caam_mem); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "can't prepare CAAM secure mem clock: %d\n", ret); ++ return -ENODEV; ++ } ++ ret = clk_prepare(ctrlpriv->caam_aclk); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "can't prepare CAAM aclk clock: %d\n", ret); ++ return -ENODEV; ++ } ++ ++ ret = clk_enable(ctrlpriv->caam_ipg); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "can't enable CAAM ipg clock: %d\n", ret); ++ return -ENODEV; ++ } ++ ret = clk_enable(ctrlpriv->caam_mem); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n", ret); ++ return -ENODEV; ++ } ++ ret = clk_enable(ctrlpriv->caam_aclk); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "can't enable CAAM aclk clock: %d\n", ret); ++ return -ENODEV; ++ } ++ ++ pr_debug("%s caam_ipg clock:%d\n", __func__, ++ (int)clk_get_rate(ctrlpriv->caam_ipg)); ++ pr_debug("%s caam_mem clock:%d\n", __func__, ++ (int)clk_get_rate(ctrlpriv->caam_mem)); ++ pr_debug("%s caam_aclk clock:%d\n", __func__, ++ (int)clk_get_rate(ctrlpriv->caam_aclk)); ++#endif + + /* + * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, + * long pointers in master configuration register + */ +- setbits32(&ctrl->mcr, MCFGR_WDENABLE | ++ setbits32(&topregs->ctrl.mcr, MCFGR_WDENABLE | + (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0)); + ++#ifdef CONFIG_ARCH_MX6 + /* +- * Read the Compile Time paramters and SCFGR to determine +- * if Virtualization is enabled for this platform ++ * ERRATA: mx6 devices have an issue wherein AXI bus transactions ++ * may not occur in the correct order. This isn't a problem running ++ * single descriptors, but can be if running multiple concurrent ++ * descriptors. Reworking the driver to throttle to single requests ++ * is impractical, thus the workaround is to limit the AXI pipeline ++ * to a depth of 1 (from it's default of 4) to preclude this situation ++ * from occurring. + */ +- scfgr = rd_reg32(&ctrl->scfgr); +- +- ctrlpriv->virt_en = 0; +- if (comp_params & CTPR_MS_VIRT_EN_INCL) { +- /* VIRT_EN_INCL = 1 & VIRT_EN_POR = 1 or +- * VIRT_EN_INCL = 1 & VIRT_EN_POR = 0 & SCFGR_VIRT_EN = 1 +- */ +- if ((comp_params & CTPR_MS_VIRT_EN_POR) || +- (!(comp_params & CTPR_MS_VIRT_EN_POR) && +- (scfgr & SCFGR_VIRT_EN))) +- ctrlpriv->virt_en = 1; +- } else { +- /* VIRT_EN_INCL = 0 && VIRT_EN_POR_VALUE = 1 */ +- if (comp_params & CTPR_MS_VIRT_EN_POR) +- ctrlpriv->virt_en = 1; +- } +- +- if (ctrlpriv->virt_en == 1) +- setbits32(&ctrl->jrstart, JRSTART_JR0_START | +- JRSTART_JR1_START | JRSTART_JR2_START | +- JRSTART_JR3_START); ++ wr_reg32(&topregs->ctrl.mcr, ++ (rd_reg32(&topregs->ctrl.mcr) & ~(MCFGR_AXIPIPE_MASK)) | ++ ((1 << MCFGR_AXIPIPE_SHIFT) & MCFGR_AXIPIPE_MASK)); ++#endif + ++ /* Set DMA masks according to platform ranging */ + if (sizeof(dma_addr_t) == sizeof(u64)) +- if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) +- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); ++ if (of_device_is_compatible(nprop, "fsl,sec-v4.0")) ++ dma_set_mask(dev, DMA_BIT_MASK(40)); + else +- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); ++ dma_set_mask(dev, DMA_BIT_MASK(36)); + else +- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); ++ dma_set_mask(dev, DMA_BIT_MASK(32)); + + /* + * Detect and enable JobRs +@@ -487,51 +399,65 @@ + * for all, then go probe each one. + */ + rspec = 0; +- for_each_available_child_of_node(nprop, np) +- if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || +- of_device_is_compatible(np, "fsl,sec4.0-job-ring")) ++ for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring") ++ rspec++; ++ if (!rspec) { ++ /* for backward compatible with device trees */ ++ for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") + rspec++; ++ } + +- ctrlpriv->jrpdev = devm_kzalloc(&pdev->dev, +- sizeof(struct platform_device *) * rspec, +- GFP_KERNEL); +- if (ctrlpriv->jrpdev == NULL) { +- iounmap(&ctrl); ++ ctrlpriv->jrdev = kzalloc(sizeof(struct device *) * rspec, GFP_KERNEL); ++ if (ctrlpriv->jrdev == NULL) { ++ iounmap(&topregs->ctrl); + return -ENOMEM; + } + + ring = 0; + ctrlpriv->total_jobrs = 0; +- for_each_available_child_of_node(nprop, np) +- if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || +- of_device_is_compatible(np, "fsl,sec4.0-job-ring")) { +- ctrlpriv->jrpdev[ring] = +- of_platform_device_create(np, NULL, dev); +- if (!ctrlpriv->jrpdev[ring]) { +- pr_warn("JR%d Platform device creation error\n", +- ring); +- continue; ++ for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring") { ++ ret = caam_jr_probe(pdev, np, ring); ++ if (ret < 0) { ++ /* ++ * Job ring not found, error out. At some ++ * point, we should enhance job ring handling ++ * to allow for non-consecutive job rings to ++ * be found. ++ */ ++ pr_err("fsl,sec-v4.0-job-ring not found "); ++ pr_err("(ring %d)\n", ring); ++ return ret; ++ } ++ ctrlpriv->total_jobrs++; ++ ring++; ++ } ++ ++ if (!ring) { ++ for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") { ++ ret = caam_jr_probe(pdev, np, ring); ++ if (ret < 0) { ++ /* ++ * Job ring not found, error out. At some ++ * point, we should enhance job ring handling ++ * to allow for non-consecutive job rings to ++ * be found. ++ */ ++ pr_err("fsl,sec4.0-job-ring not found "); ++ pr_err("(ring %d)\n", ring); ++ return ret; + } +- ctrlpriv->jr[ring] = (struct caam_job_ring __force *) +- ((uint8_t *)ctrl + +- (ring + JR_BLOCK_NUMBER) * +- BLOCK_OFFSET +- ); + ctrlpriv->total_jobrs++; + ring++; ++ } + } + + /* Check to see if QI present. If so, enable */ +- ctrlpriv->qi_present = +- !!(rd_reg32(&ctrl->perfmon.comp_parms_ms) & +- CTPR_MS_QI_MASK); ++ ctrlpriv->qi_present = !!(rd_reg64(&topregs->ctrl.perfmon.comp_parms) & ++ CTPR_QI_MASK); + if (ctrlpriv->qi_present) { +- ctrlpriv->qi = (struct caam_queue_if __force *) +- ((uint8_t *)ctrl + +- BLOCK_OFFSET * QI_BLOCK_NUMBER +- ); ++ ctrlpriv->qi = (struct caam_queue_if __force *)&topregs->qi; + /* This is all that's required to physically enable QI */ +- wr_reg32(&ctrlpriv->qi->qi_control_lo, QICTL_DQEN); ++ wr_reg32(&topregs->qi.qi_control_lo, QICTL_DQEN); + } + + /* If no QI and no rings specified, quit and go home */ +@@ -541,81 +467,53 @@ + return -ENOMEM; + } + +- cha_vid_ls = rd_reg32(&ctrl->perfmon.cha_id_ls); +- + /* +- * If SEC has RNG version >= 4 and RNG state handle has not been +- * already instantiated, do RNG instantiation ++ * RNG4 based SECs (v5+ | >= i.MX6) need special initialization prior ++ * to executing any descriptors. If there's a problem with init, ++ * remove other subsystems and return; internal padding functions ++ * cannot run without an RNG. This procedure assumes a single RNG4 ++ * instance. + */ +- if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) { +- ctrlpriv->rng4_sh_init = +- rd_reg32(&ctrl->r4tst[0].rdsta); ++ if ((rd_reg64(&topregs->ctrl.perfmon.cha_id) & CHA_ID_RNG_MASK) ++ == CHA_ID_RNG_4) { ++ struct rng4tst __iomem *r4tst; ++ u32 rdsta, rng_if, rng_skvn; ++ + /* +- * If the secure keys (TDKEK, JDKEK, TDSK), were already +- * generated, signal this to the function that is instantiating +- * the state handles. An error would occur if RNG4 attempts +- * to regenerate these keys before the next POR. ++ * Check to see if the RNG has already been instantiated. ++ * If either the state 0 or 1 instantiated flags are set, ++ * then don't continue on and try to instantiate the RNG ++ * again. + */ +- gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1; +- ctrlpriv->rng4_sh_init &= RDSTA_IFMASK; +- do { +- int inst_handles = +- rd_reg32(&ctrl->r4tst[0].rdsta) & +- RDSTA_IFMASK; +- /* +- * If either SH were instantiated by somebody else +- * (e.g. u-boot) then it is assumed that the entropy +- * parameters are properly set and thus the function +- * setting these (kick_trng(...)) is skipped. +- * Also, if a handle was instantiated, do not change +- * the TRNG parameters. +- */ +- if (!(ctrlpriv->rng4_sh_init || inst_handles)) { +- dev_info(dev, +- "Entropy delay = %u\n", +- ent_delay); +- kick_trng(pdev, ent_delay); +- ent_delay += 400; ++ r4tst = &topregs->ctrl.r4tst[0]; ++ rdsta = rd_reg32(&r4tst->rdsta); /* Read RDSTA register */ ++ ++ /* Check IF bit for non-deterministic instantiation */ ++ rng_if = rdsta & RDSTA_IF; ++ ++ /* Check SKVN bit for non-deterministic key generation */ ++ rng_skvn = rdsta & RDSTA_SKVN; ++ if (!rng_if) { ++ kick_trng(pdev); ++ ret = instantiate_rng(ctrlpriv->jrdev[0], rng_skvn); ++ if (ret) { ++ caam_remove(pdev); ++ return -ENODEV; + } +- /* +- * if instantiate_rng(...) fails, the loop will rerun +- * and the kick_trng(...) function will modfiy the +- * upper and lower limits of the entropy sampling +- * interval, leading to a sucessful initialization of +- * the RNG. +- */ +- ret = instantiate_rng(dev, inst_handles, +- gen_sk); +- if (ret == -EAGAIN) +- /* +- * if here, the loop will rerun, +- * so don't hog the CPU +- */ +- cpu_relax(); +- } while ((ret == -EAGAIN) && (ent_delay < RTSDCTL_ENT_DLY_MAX)); +- if (ret) { +- dev_err(dev, "failed to instantiate RNG"); +- caam_remove(pdev); +- return ret; ++ ctrlpriv->rng_inst++; + } +- /* +- * Set handles init'ed by this module as the complement of the +- * already initialized ones +- */ +- ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; +- +- /* Enable RDB bit so that RNG works faster */ +- setbits32(&ctrl->scfgr, SCFGR_RDBENABLE); + } + + /* NOTE: RTIC detection ought to go here, around Si time */ + +- caam_id = (u64)rd_reg32(&ctrl->perfmon.caam_id_ms) << 32 | +- (u64)rd_reg32(&ctrl->perfmon.caam_id_ls); ++ /* Initialize queue allocator lock */ ++ spin_lock_init(&ctrlpriv->jr_alloc_lock); ++ ++ caam_id = rd_reg64(&topregs->ctrl.perfmon.caam_id); + + /* Report "alive" for developer to see */ + dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id, +- caam_get_era()); ++ caam_get_era(caam_id)); + dev_info(dev, "job rings = %d, qi = %d\n", + ctrlpriv->total_jobrs, ctrlpriv->qi_present); + +@@ -627,7 +525,7 @@ + */ + perfmon = (struct caam_perfmon __force *)&ctrl->perfmon; + +- ctrlpriv->dfs_root = debugfs_create_dir(dev_name(dev), NULL); ++ ctrlpriv->dfs_root = debugfs_create_dir("caam", NULL); + ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root); + + /* Controller-level - performance monitor counters */ +@@ -716,6 +614,7 @@ + static struct platform_driver caam_driver = { + .driver = { + .name = "caam", ++ .owner = THIS_MODULE, + .of_match_table = caam_match, + }, + .probe = caam_probe, +diff -Nur linux-4.1.3/drivers/crypto/caam/ctrl.h linux-xbian-imx6/drivers/crypto/caam/ctrl.h +--- linux-4.1.3/drivers/crypto/caam/ctrl.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/ctrl.h 2015-07-27 23:13:04.209961631 +0200 +@@ -8,6 +8,6 @@ + #define CTRL_H + + /* Prototypes for backend-level services exposed to APIs */ +-int caam_get_era(void); ++int caam_get_era(u64 caam_id); + + #endif /* CTRL_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/desc_constr.h linux-xbian-imx6/drivers/crypto/caam/desc_constr.h +--- linux-4.1.3/drivers/crypto/caam/desc_constr.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/desc_constr.h 2015-07-27 23:13:04.209961631 +0200 +@@ -10,7 +10,6 @@ + #define CAAM_CMD_SZ sizeof(u32) + #define CAAM_PTR_SZ sizeof(dma_addr_t) + #define CAAM_DESC_BYTES_MAX (CAAM_CMD_SZ * MAX_CAAM_DESCSIZE) +-#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3) + + #ifdef DEBUG + #define PRINT_POS do { printk(KERN_DEBUG "%02d: %s\n", desc_len(desc),\ +@@ -111,26 +110,6 @@ + (*desc)++; + } + +-#define append_u32 append_cmd +- +-static inline void append_u64(u32 *desc, u64 data) +-{ +- u32 *offset = desc_end(desc); +- +- *offset = upper_32_bits(data); +- *(++offset) = lower_32_bits(data); +- +- (*desc) += 2; +-} +- +-/* Write command without affecting header, and return pointer to next word */ +-static inline u32 *write_cmd(u32 *desc, u32 command) +-{ +- *desc = command; +- +- return desc + 1; +-} +- + static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, + u32 command) + { +@@ -143,8 +122,7 @@ + unsigned int len, u32 command) + { + append_cmd(desc, command); +- if (!(command & (SQIN_RTO | SQIN_PRE))) +- append_ptr(desc, ptr); ++ append_ptr(desc, ptr); + append_cmd(desc, len); + } + +@@ -155,29 +133,21 @@ + append_data(desc, data, len); + } + +-#define APPEND_CMD_RET(cmd, op) \ +-static inline u32 *append_##cmd(u32 *desc, u32 options) \ +-{ \ +- u32 *cmd = desc_end(desc); \ +- PRINT_POS; \ +- append_cmd(desc, CMD_##op | options); \ +- return cmd; \ ++static inline u32 *append_jump(u32 *desc, u32 options) ++{ ++ u32 *cmd = desc_end(desc); ++ ++ PRINT_POS; ++ append_cmd(desc, CMD_JUMP | options); ++ ++ return cmd; + } +-APPEND_CMD_RET(jump, JUMP) +-APPEND_CMD_RET(move, MOVE) + + static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) + { + *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); + } + +-static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) +-{ +- *move_cmd &= ~MOVE_OFFSET_MASK; +- *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & +- MOVE_OFFSET_MASK); +-} +- + #define APPEND_CMD(cmd, op) \ + static inline void append_##cmd(u32 *desc, u32 options) \ + { \ +@@ -185,6 +155,7 @@ + append_cmd(desc, CMD_##op | options); \ + } + APPEND_CMD(operation, OPERATION) ++APPEND_CMD(move, MOVE) + + #define APPEND_CMD_LEN(cmd, op) \ + static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \ +@@ -192,8 +163,6 @@ + PRINT_POS; \ + append_cmd(desc, CMD_##op | len | options); \ + } +- +-APPEND_CMD_LEN(seq_load, SEQ_LOAD) + APPEND_CMD_LEN(seq_store, SEQ_STORE) + APPEND_CMD_LEN(seq_fifo_load, SEQ_FIFO_LOAD) + APPEND_CMD_LEN(seq_fifo_store, SEQ_FIFO_STORE) +@@ -207,36 +176,17 @@ + } + APPEND_CMD_PTR(key, KEY) + APPEND_CMD_PTR(load, LOAD) ++APPEND_CMD_PTR(store, STORE) + APPEND_CMD_PTR(fifo_load, FIFO_LOAD) + APPEND_CMD_PTR(fifo_store, FIFO_STORE) + +-static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len, +- u32 options) +-{ +- u32 cmd_src; +- +- cmd_src = options & LDST_SRCDST_MASK; +- +- append_cmd(desc, CMD_STORE | options | len); +- +- /* The following options do not require pointer */ +- if (!(cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED || +- cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB || +- cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB_WE || +- cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED_WE)) +- append_ptr(desc, ptr); +-} +- + #define APPEND_SEQ_PTR_INTLEN(cmd, op) \ + static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ + unsigned int len, \ + u32 options) \ + { \ + PRINT_POS; \ +- if (options & (SQIN_RTO | SQIN_PRE)) \ +- append_cmd(desc, CMD_SEQ_##op##_PTR | len | options); \ +- else \ +- append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ ++ append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ + } + APPEND_SEQ_PTR_INTLEN(in, IN) + APPEND_SEQ_PTR_INTLEN(out, OUT) +@@ -309,7 +259,7 @@ + */ + #define APPEND_MATH(op, desc, dest, src_0, src_1, len) \ + append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ +- MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32)len); ++ MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32) (len & MATH_LEN_MASK)); + + #define append_math_add(desc, dest, src0, src1, len) \ + APPEND_MATH(ADD, desc, dest, src0, src1, len) +@@ -329,15 +279,13 @@ + APPEND_MATH(LSHIFT, desc, dest, src0, src1, len) + #define append_math_rshift(desc, dest, src0, src1, len) \ + APPEND_MATH(RSHIFT, desc, dest, src0, src1, len) +-#define append_math_ldshift(desc, dest, src0, src1, len) \ +- APPEND_MATH(SHLD, desc, dest, src0, src1, len) + + /* Exactly one source is IMM. Data is passed in as u32 value */ + #define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \ + do { \ + APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ); \ + append_cmd(desc, data); \ +-} while (0) ++} while (0); + + #define append_math_add_imm_u32(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u32(ADD, desc, dest, src0, src1, data) +@@ -357,34 +305,3 @@ + APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data) + #define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \ + APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data) +- +-/* Exactly one source is IMM. Data is passed in as u64 value */ +-#define APPEND_MATH_IMM_u64(op, desc, dest, src_0, src_1, data) \ +-do { \ +- u32 upper = (data >> 16) >> 16; \ +- APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ * 2 | \ +- (upper ? 0 : MATH_IFB)); \ +- if (upper) \ +- append_u64(desc, data); \ +- else \ +- append_u32(desc, data); \ +-} while (0) +- +-#define append_math_add_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(ADD, desc, dest, src0, src1, data) +-#define append_math_sub_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(SUB, desc, dest, src0, src1, data) +-#define append_math_add_c_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(ADDC, desc, dest, src0, src1, data) +-#define append_math_sub_b_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(SUBB, desc, dest, src0, src1, data) +-#define append_math_and_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(AND, desc, dest, src0, src1, data) +-#define append_math_or_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(OR, desc, dest, src0, src1, data) +-#define append_math_xor_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(XOR, desc, dest, src0, src1, data) +-#define append_math_lshift_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data) +-#define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \ +- APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data) +diff -Nur linux-4.1.3/drivers/crypto/caam/desc.h linux-xbian-imx6/drivers/crypto/caam/desc.h +--- linux-4.1.3/drivers/crypto/caam/desc.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/desc.h 2015-07-27 23:13:04.209961631 +0200 +@@ -2,19 +2,35 @@ + * CAAM descriptor composition header + * Definitions to support CAAM descriptor instruction generation + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + */ + + #ifndef DESC_H + #define DESC_H + ++/* ++ * 16-byte hardware scatter/gather table ++ * An 8-byte table exists in the hardware spec, but has never been ++ * implemented to date. The 8/16 option is selected at RTL-compile-time. ++ * and this selection is visible in the Compile Time Parameters Register ++ */ ++ ++#define SEC4_SG_LEN_EXT 0x80000000 /* Entry points to table */ ++#define SEC4_SG_LEN_FIN 0x40000000 /* Last ent in table */ ++#define SEC4_SG_BPID_MASK 0x000000ff ++#define SEC4_SG_BPID_SHIFT 16 ++#define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ ++#define SEC4_SG_OFFS_MASK 0x00001fff ++ + struct sec4_sg_entry { ++#ifdef CONFIG_64BIT + u64 ptr; +-#define SEC4_SG_LEN_FIN 0x40000000 +-#define SEC4_SG_LEN_EXT 0x80000000 ++#else ++ u32 reserved; ++ u32 ptr; ++#endif + u32 len; +- u8 reserved; +- u8 buf_pool_id; ++ u16 buf_pool_id; + u16 offset; + }; + +@@ -231,12 +247,7 @@ + #define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT) + #define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT) + #define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT) +-#define LDST_SRCDST_WORD_CLASS_CTX (0x20 << LDST_SRCDST_SHIFT) + #define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT) +-#define LDST_SRCDST_WORD_DESCBUF_JOB (0x41 << LDST_SRCDST_SHIFT) +-#define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT) +-#define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT) +-#define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT) + #define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT) + + /* Offset in source/destination */ +@@ -321,6 +332,7 @@ + /* Continue - Not the last FIFO store to come */ + #define FIFOST_CONT_SHIFT 23 + #define FIFOST_CONT_MASK (1 << FIFOST_CONT_SHIFT) ++#define FIFOST_CONT_MASK (1 << FIFOST_CONT_SHIFT) + + /* + * Extended Length - use 32-bit extended length that +@@ -370,7 +382,6 @@ + #define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT) + #define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT) + #define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT) +-#define FIFOLD_TYPE_NOINFOFIFO (0x0F << FIFOLD_TYPE_SHIFT) + + #define FIFOLDST_LEN_MASK 0xffff + #define FIFOLDST_EXT_LEN_MASK 0xffffffff +@@ -1092,6 +1103,23 @@ + #define OP_PCL_PKPROT_ECC 0x0002 + #define OP_PCL_PKPROT_F2M 0x0001 + ++/* Blob protocol protinfo bits */ ++#define OP_PCL_BLOB_TK 0x0200 ++#define OP_PCL_BLOB_EKT 0x0100 ++ ++#define OP_PCL_BLOB_K2KR_MEM 0x0000 ++#define OP_PCL_BLOB_K2KR_C1KR 0x0010 ++#define OP_PCL_BLOB_K2KR_C2KR 0x0030 ++#define OP_PCL_BLOB_K2KR_AFHAS 0x0050 ++#define OP_PCL_BLOB_K2KR_C2KR_SPLIT 0x0070 ++ ++#define OP_PCL_BLOB_PTXT_SECMEM 0x0008 ++#define OP_PCL_BLOB_BLACK 0x0004 ++ ++#define OP_PCL_BLOB_FMT_NORMAL 0x0000 ++#define OP_PCL_BLOB_FMT_MSTR 0x0002 ++#define OP_PCL_BLOB_FMT_TEST 0x0003 ++ + /* For non-protocol/alg-only op commands */ + #define OP_ALG_TYPE_SHIFT 24 + #define OP_ALG_TYPE_MASK (0x7 << OP_ALG_TYPE_SHIFT) +@@ -1154,15 +1182,8 @@ + + /* randomizer AAI set */ + #define OP_ALG_AAI_RNG (0x00 << OP_ALG_AAI_SHIFT) +-#define OP_ALG_AAI_RNG_NZB (0x10 << OP_ALG_AAI_SHIFT) +-#define OP_ALG_AAI_RNG_OBP (0x20 << OP_ALG_AAI_SHIFT) +- +-/* RNG4 AAI set */ +-#define OP_ALG_AAI_RNG4_SH_0 (0x00 << OP_ALG_AAI_SHIFT) +-#define OP_ALG_AAI_RNG4_SH_1 (0x01 << OP_ALG_AAI_SHIFT) +-#define OP_ALG_AAI_RNG4_PS (0x40 << OP_ALG_AAI_SHIFT) +-#define OP_ALG_AAI_RNG4_AI (0x80 << OP_ALG_AAI_SHIFT) +-#define OP_ALG_AAI_RNG4_SK (0x100 << OP_ALG_AAI_SHIFT) ++#define OP_ALG_AAI_RNG_NOZERO (0x10 << OP_ALG_AAI_SHIFT) ++#define OP_ALG_AAI_RNG_ODD (0x20 << OP_ALG_AAI_SHIFT) + + /* hmac/smac AAI set */ + #define OP_ALG_AAI_HASH (0x00 << OP_ALG_AAI_SHIFT) +@@ -1184,6 +1205,12 @@ + #define OP_ALG_AAI_GSM (0x10 << OP_ALG_AAI_SHIFT) + #define OP_ALG_AAI_EDGE (0x20 << OP_ALG_AAI_SHIFT) + ++/* RNG4 set */ ++#define OP_ALG_RNG4_SHIFT 4 ++#define OP_ALG_RNG4_MASK (0x1f3 << OP_ALG_RNG4_SHIFT) ++ ++#define OP_ALG_RNG4_SK (0x100 << OP_ALG_RNG4_SHIFT) ++ + #define OP_ALG_AS_SHIFT 2 + #define OP_ALG_AS_MASK (0x3 << OP_ALG_AS_SHIFT) + #define OP_ALG_AS_UPDATE (0 << OP_ALG_AS_SHIFT) +@@ -1300,10 +1327,10 @@ + #define SQOUT_SGF 0x01000000 + + /* Appends to a previous pointer */ +-#define SQOUT_PRE SQIN_PRE ++#define SQOUT_PRE 0x00800000 + + /* Restore sequence with pointer/length */ +-#define SQOUT_RTO SQIN_RTO ++#define SQOUT_RTO 0x00200000 + + /* Use extended length following pointer */ + #define SQOUT_EXT 0x00400000 +@@ -1365,7 +1392,6 @@ + #define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT) + #define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT) + #define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT) +-#define MOVE_DEST_INFIFO_NOINFO (0x0a << MOVE_DEST_SHIFT) + #define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT) + #define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT) + #define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT) +@@ -1418,7 +1444,6 @@ + #define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT) + #define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT) + #define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT) +-#define MATH_SRC0_DPOVRD (0x07 << MATH_SRC0_SHIFT) + #define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT) + #define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT) + #define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT) +@@ -1433,7 +1458,6 @@ + #define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT) + #define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT) + #define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT) +-#define MATH_SRC1_DPOVRD (0x07 << MATH_SRC0_SHIFT) + #define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT) + #define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT) + #define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT) +@@ -1609,13 +1633,28 @@ + #define NFIFOENTRY_PLEN_SHIFT 0 + #define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT) + +-/* Append Load Immediate Command */ +-#define FD_CMD_APPEND_LOAD_IMMEDIATE 0x80000000 ++/* ++ * PDB internal definitions ++ */ ++ ++/* IPSec ESP CBC Encap/Decap Options */ ++#define PDBOPTS_ESPCBC_ARSNONE 0x00 /* no antireplay window */ ++#define PDBOPTS_ESPCBC_ARS32 0x40 /* 32-entry antireplay window */ ++#define PDBOPTS_ESPCBC_ARS64 0xc0 /* 64-entry antireplay window */ ++#define PDBOPTS_ESPCBC_IVSRC 0x20 /* IV comes from internal random gen */ ++#define PDBOPTS_ESPCBC_ESN 0x10 /* extended sequence included */ ++#define PDBOPTS_ESPCBC_OUTFMT 0x08 /* output only decapsulation (decap) */ ++#define PDBOPTS_ESPCBC_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ ++#define PDBOPTS_ESPCBC_INCIPHDR 0x04 /* Prepend IP header to output frame */ ++#define PDBOPTS_ESPCBC_IPVSN 0x02 /* process IPv6 header */ ++#define PDBOPTS_ESPCBC_TUNNEL 0x01 /* tunnel mode next-header byte */ ++ ++#define ARC4_BLOCK_SIZE 1 ++#define ARC4_MAX_KEY_SIZE 256 ++#define ARC4_MIN_KEY_SIZE 1 + +-/* Set SEQ LIODN equal to the Non-SEQ LIODN for the job */ +-#define FD_CMD_SET_SEQ_LIODN_EQUAL_NONSEQ_LIODN 0x40000000 ++#define XCBC_MAC_DIGEST_SIZE 16 ++#define XCBC_MAC_BLOCK_WORDS 16 + +-/* Frame Descriptor Command for Replacement Job Descriptor */ +-#define FD_CMD_REPLACE_JOB_DESC 0x20000000 + + #endif /* DESC_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/error.c linux-xbian-imx6/drivers/crypto/caam/error.c +--- linux-4.1.3/drivers/crypto/caam/error.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/error.c 2015-07-27 23:13:04.213947410 +0200 +@@ -11,243 +11,264 @@ + #include "jr.h" + #include "error.h" + +-static const struct { +- u8 value; +- const char *error_text; +-} desc_error_list[] = { +- { 0x00, "No error." }, +- { 0x01, "SGT Length Error. The descriptor is trying to read more data than is contained in the SGT table." }, +- { 0x02, "SGT Null Entry Error." }, +- { 0x03, "Job Ring Control Error. There is a bad value in the Job Ring Control register." }, +- { 0x04, "Invalid Descriptor Command. The Descriptor Command field is invalid." }, +- { 0x05, "Reserved." }, +- { 0x06, "Invalid KEY Command" }, +- { 0x07, "Invalid LOAD Command" }, +- { 0x08, "Invalid STORE Command" }, +- { 0x09, "Invalid OPERATION Command" }, +- { 0x0A, "Invalid FIFO LOAD Command" }, +- { 0x0B, "Invalid FIFO STORE Command" }, +- { 0x0C, "Invalid MOVE/MOVE_LEN Command" }, +- { 0x0D, "Invalid JUMP Command. A nonlocal JUMP Command is invalid because the target is not a Job Header Command, or the jump is from a Trusted Descriptor to a Job Descriptor, or because the target Descriptor contains a Shared Descriptor." }, +- { 0x0E, "Invalid MATH Command" }, +- { 0x0F, "Invalid SIGNATURE Command" }, +- { 0x10, "Invalid Sequence Command. A SEQ IN PTR OR SEQ OUT PTR Command is invalid or a SEQ KEY, SEQ LOAD, SEQ FIFO LOAD, or SEQ FIFO STORE decremented the input or output sequence length below 0. This error may result if a built-in PROTOCOL Command has encountered a malformed PDU." }, +- { 0x11, "Skip data type invalid. The type must be 0xE or 0xF."}, +- { 0x12, "Shared Descriptor Header Error" }, +- { 0x13, "Header Error. Invalid length or parity, or certain other problems." }, +- { 0x14, "Burster Error. Burster has gotten to an illegal state" }, +- { 0x15, "Context Register Length Error. The descriptor is trying to read or write past the end of the Context Register. A SEQ LOAD or SEQ STORE with the VLF bit set was executed with too large a length in the variable length register (VSOL for SEQ STORE or VSIL for SEQ LOAD)." }, +- { 0x16, "DMA Error" }, +- { 0x17, "Reserved." }, +- { 0x1A, "Job failed due to JR reset" }, +- { 0x1B, "Job failed due to Fail Mode" }, +- { 0x1C, "DECO Watchdog timer timeout error" }, +- { 0x1D, "DECO tried to copy a key from another DECO but the other DECO's Key Registers were locked" }, +- { 0x1E, "DECO attempted to copy data from a DECO that had an unmasked Descriptor error" }, +- { 0x1F, "LIODN error. DECO was trying to share from itself or from another DECO but the two Non-SEQ LIODN values didn't match or the 'shared from' DECO's Descriptor required that the SEQ LIODNs be the same and they aren't." }, +- { 0x20, "DECO has completed a reset initiated via the DRR register" }, +- { 0x21, "Nonce error. When using EKT (CCM) key encryption option in the FIFO STORE Command, the Nonce counter reached its maximum value and this encryption mode can no longer be used." }, +- { 0x22, "Meta data is too large (> 511 bytes) for TLS decap (input frame; block ciphers) and IPsec decap (output frame, when doing the next header byte update) and DCRC (output frame)." }, +- { 0x23, "Read Input Frame error" }, +- { 0x24, "JDKEK, TDKEK or TDSK not loaded error" }, +- { 0x80, "DNR (do not run) error" }, +- { 0x81, "undefined protocol command" }, +- { 0x82, "invalid setting in PDB" }, +- { 0x83, "Anti-replay LATE error" }, +- { 0x84, "Anti-replay REPLAY error" }, +- { 0x85, "Sequence number overflow" }, +- { 0x86, "Sigver invalid signature" }, +- { 0x87, "DSA Sign Illegal test descriptor" }, +- { 0x88, "Protocol Format Error - A protocol has seen an error in the format of data received. When running RSA, this means that formatting with random padding was used, and did not follow the form: 0x00, 0x02, 8-to-N bytes of non-zero pad, 0x00, F data." }, +- { 0x89, "Protocol Size Error - A protocol has seen an error in size. When running RSA, pdb size N < (size of F) when no formatting is used; or pdb size N < (F + 11) when formatting is used." }, +- { 0xC1, "Blob Command error: Undefined mode" }, +- { 0xC2, "Blob Command error: Secure Memory Blob mode error" }, +- { 0xC4, "Blob Command error: Black Blob key or input size error" }, +- { 0xC5, "Blob Command error: Invalid key destination" }, +- { 0xC8, "Blob Command error: Trusted/Secure mode error" }, +- { 0xF0, "IPsec TTL or hop limit field either came in as 0, or was decremented to 0" }, +- { 0xF1, "3GPP HFN matches or exceeds the Threshold" }, +-}; +- +-static const char * const cha_id_list[] = { +- "", +- "AES", +- "DES", +- "ARC4", +- "MDHA", +- "RNG", +- "SNOW f8", +- "Kasumi f8/9", +- "PKHA", +- "CRCA", +- "SNOW f9", +- "ZUCE", +- "ZUCA", +-}; +- +-static const char * const err_id_list[] = { +- "No error.", +- "Mode error.", +- "Data size error.", +- "Key size error.", +- "PKHA A memory size error.", +- "PKHA B memory size error.", +- "Data arrived out of sequence error.", +- "PKHA divide-by-zero error.", +- "PKHA modulus even error.", +- "DES key parity error.", +- "ICV check failed.", +- "Hardware error.", +- "Unsupported CCM AAD size.", +- "Class 1 CHA is not reset", +- "Invalid CHA combination was selected", +- "Invalid CHA selected.", +-}; +- +-static const char * const rng_err_id_list[] = { +- "", +- "", +- "", +- "Instantiate", +- "Not instantiated", +- "Test instantiate", +- "Prediction resistance", +- "Prediction resistance and test request", +- "Uninstantiate", +- "Secure key generation", +-}; ++#define SPRINTFCAT(str, format, param, max_alloc) \ ++{ \ ++ char *tmp; \ ++ \ ++ tmp = kmalloc(sizeof(format) + max_alloc, GFP_ATOMIC); \ ++ if (likely(tmp)) { \ ++ sprintf(tmp, format, param); \ ++ strcat(str, tmp); \ ++ kfree(tmp); \ ++ } else { \ ++ strcat(str, "kmalloc failure in SPRINTFCAT"); \ ++ } \ ++} + +-static void report_ccb_status(struct device *jrdev, const u32 status, +- const char *error) ++static void report_jump_idx(u32 status, char *outstr) + { +- u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >> +- JRSTA_CCBERR_CHAID_SHIFT; +- u8 err_id = status & JRSTA_CCBERR_ERRID_MASK; + u8 idx = (status & JRSTA_DECOERR_INDEX_MASK) >> + JRSTA_DECOERR_INDEX_SHIFT; +- char *idx_str; +- const char *cha_str = "unidentified cha_id value 0x"; +- char cha_err_code[3] = { 0 }; +- const char *err_str = "unidentified err_id value 0x"; +- char err_err_code[3] = { 0 }; + + if (status & JRSTA_DECOERR_JUMP) +- idx_str = "jump tgt desc idx"; ++ strcat(outstr, "jump tgt desc idx "); + else +- idx_str = "desc idx"; ++ strcat(outstr, "desc idx "); + +- if (cha_id < ARRAY_SIZE(cha_id_list)) +- cha_str = cha_id_list[cha_id]; +- else +- snprintf(cha_err_code, sizeof(cha_err_code), "%02x", cha_id); ++ SPRINTFCAT(outstr, "%d: ", idx, sizeof("255")); ++} ++ ++static void report_ccb_status(u32 status, char *outstr) ++{ ++ static const char * const cha_id_list[] = { ++ "", ++ "AES", ++ "DES", ++ "ARC4", ++ "MDHA", ++ "RNG", ++ "SNOW f8", ++ "Kasumi f8/9", ++ "PKHA", ++ "CRCA", ++ "SNOW f9", ++ "ZUCE", ++ "ZUCA", ++ }; ++ static const char * const err_id_list[] = { ++ "No error.", ++ "Mode error.", ++ "Data size error.", ++ "Key size error.", ++ "PKHA A memory size error.", ++ "PKHA B memory size error.", ++ "Data arrived out of sequence error.", ++ "PKHA divide-by-zero error.", ++ "PKHA modulus even error.", ++ "DES key parity error.", ++ "ICV check failed.", ++ "Hardware error.", ++ "Unsupported CCM AAD size.", ++ "Class 1 CHA is not reset", ++ "Invalid CHA combination was selected", ++ "Invalid CHA selected.", ++ }; ++ static const char * const rng_err_id_list[] = { ++ "", ++ "", ++ "", ++ "Instantiate", ++ "Not instantiated", ++ "Test instantiate", ++ "Prediction resistance", ++ "Prediction resistance and test request", ++ "Uninstantiate", ++ "Secure key generation", ++ }; ++ u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >> ++ JRSTA_CCBERR_CHAID_SHIFT; ++ u8 err_id = status & JRSTA_CCBERR_ERRID_MASK; ++ ++ report_jump_idx(status, outstr); ++ ++ if (cha_id < ARRAY_SIZE(cha_id_list)) { ++ SPRINTFCAT(outstr, "%s: ", cha_id_list[cha_id], ++ strlen(cha_id_list[cha_id])); ++ } else { ++ SPRINTFCAT(outstr, "unidentified cha_id value 0x%02x: ", ++ cha_id, sizeof("ff")); ++ } + + if ((cha_id << JRSTA_CCBERR_CHAID_SHIFT) == JRSTA_CCBERR_CHAID_RNG && + err_id < ARRAY_SIZE(rng_err_id_list) && + strlen(rng_err_id_list[err_id])) { + /* RNG-only error */ +- err_str = rng_err_id_list[err_id]; +- } else if (err_id < ARRAY_SIZE(err_id_list)) +- err_str = err_id_list[err_id]; +- else +- snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id); +- +- /* +- * CCB ICV check failures are part of normal operation life; +- * we leave the upper layers to do what they want with them. +- */ +- if (err_id != JRSTA_CCBERR_ERRID_ICVCHK) +- dev_err(jrdev, "%08x: %s: %s %d: %s%s: %s%s\n", +- status, error, idx_str, idx, +- cha_str, cha_err_code, +- err_str, err_err_code); ++ SPRINTFCAT(outstr, "%s", rng_err_id_list[err_id], ++ strlen(rng_err_id_list[err_id])); ++ } else if (err_id < ARRAY_SIZE(err_id_list)) { ++ SPRINTFCAT(outstr, "%s", err_id_list[err_id], ++ strlen(err_id_list[err_id])); ++ } else { ++ SPRINTFCAT(outstr, "unidentified err_id value 0x%02x", ++ err_id, sizeof("ff")); ++ } + } + +-static void report_jump_status(struct device *jrdev, const u32 status, +- const char *error) ++static void report_jump_status(u32 status, char *outstr) + { +- dev_err(jrdev, "%08x: %s: %s() not implemented\n", +- status, error, __func__); ++ SPRINTFCAT(outstr, "%s() not implemented", __func__, sizeof(__func__)); + } + +-static void report_deco_status(struct device *jrdev, const u32 status, +- const char *error) ++static void report_deco_status(u32 status, char *outstr) + { +- u8 err_id = status & JRSTA_DECOERR_ERROR_MASK; +- u8 idx = (status & JRSTA_DECOERR_INDEX_MASK) >> +- JRSTA_DECOERR_INDEX_SHIFT; +- char *idx_str; +- const char *err_str = "unidentified error value 0x"; +- char err_err_code[3] = { 0 }; ++ static const struct { ++ u8 value; ++ char *error_text; ++ } desc_error_list[] = { ++ { 0x00, "No error." }, ++ { 0x01, "SGT Length Error. The descriptor is trying to read " ++ "more data than is contained in the SGT table." }, ++ { 0x02, "SGT Null Entry Error." }, ++ { 0x03, "Job Ring Control Error. There is a bad value in the " ++ "Job Ring Control register." }, ++ { 0x04, "Invalid Descriptor Command. The Descriptor Command " ++ "field is invalid." }, ++ { 0x05, "Reserved." }, ++ { 0x06, "Invalid KEY Command" }, ++ { 0x07, "Invalid LOAD Command" }, ++ { 0x08, "Invalid STORE Command" }, ++ { 0x09, "Invalid OPERATION Command" }, ++ { 0x0A, "Invalid FIFO LOAD Command" }, ++ { 0x0B, "Invalid FIFO STORE Command" }, ++ { 0x0C, "Invalid MOVE/MOVE_LEN Command" }, ++ { 0x0D, "Invalid JUMP Command. A nonlocal JUMP Command is " ++ "invalid because the target is not a Job Header " ++ "Command, or the jump is from a Trusted Descriptor to " ++ "a Job Descriptor, or because the target Descriptor " ++ "contains a Shared Descriptor." }, ++ { 0x0E, "Invalid MATH Command" }, ++ { 0x0F, "Invalid SIGNATURE Command" }, ++ { 0x10, "Invalid Sequence Command. A SEQ IN PTR OR SEQ OUT PTR " ++ "Command is invalid or a SEQ KEY, SEQ LOAD, SEQ FIFO " ++ "LOAD, or SEQ FIFO STORE decremented the input or " ++ "output sequence length below 0. This error may result " ++ "if a built-in PROTOCOL Command has encountered a " ++ "malformed PDU." }, ++ { 0x11, "Skip data type invalid. The type must be 0xE or 0xF."}, ++ { 0x12, "Shared Descriptor Header Error" }, ++ { 0x13, "Header Error. Invalid length or parity, or certain " ++ "other problems." }, ++ { 0x14, "Burster Error. Burster has gotten to an illegal " ++ "state" }, ++ { 0x15, "Context Register Length Error. The descriptor is " ++ "trying to read or write past the end of the Context " ++ "Register. A SEQ LOAD or SEQ STORE with the VLF bit " ++ "set was executed with too large a length in the " ++ "variable length register (VSOL for SEQ STORE or VSIL " ++ "for SEQ LOAD)." }, ++ { 0x16, "DMA Error" }, ++ { 0x17, "Reserved." }, ++ { 0x1A, "Job failed due to JR reset" }, ++ { 0x1B, "Job failed due to Fail Mode" }, ++ { 0x1C, "DECO Watchdog timer timeout error" }, ++ { 0x1D, "DECO tried to copy a key from another DECO but the " ++ "other DECO's Key Registers were locked" }, ++ { 0x1E, "DECO attempted to copy data from a DECO that had an " ++ "unmasked Descriptor error" }, ++ { 0x1F, "LIODN error. DECO was trying to share from itself or " ++ "from another DECO but the two Non-SEQ LIODN values " ++ "didn't match or the 'shared from' DECO's Descriptor " ++ "required that the SEQ LIODNs be the same and they " ++ "aren't." }, ++ { 0x20, "DECO has completed a reset initiated via the DRR " ++ "register" }, ++ { 0x21, "Nonce error. When using EKT (CCM) key encryption " ++ "option in the FIFO STORE Command, the Nonce counter " ++ "reached its maximum value and this encryption mode " ++ "can no longer be used." }, ++ { 0x22, "Meta data is too large (> 511 bytes) for TLS decap " ++ "(input frame; block ciphers) and IPsec decap (output " ++ "frame, when doing the next header byte update) and " ++ "DCRC (output frame)." }, ++ { 0x23, "Read Input Frame error" }, ++ { 0x24, "JDKEK, TDKEK or TDSK not loaded error" }, ++ { 0x80, "DNR (do not run) error" }, ++ { 0x81, "undefined protocol command" }, ++ { 0x82, "invalid setting in PDB" }, ++ { 0x83, "Anti-replay LATE error" }, ++ { 0x84, "Anti-replay REPLAY error" }, ++ { 0x85, "Sequence number overflow" }, ++ { 0x86, "Sigver invalid signature" }, ++ { 0x87, "DSA Sign Illegal test descriptor" }, ++ { 0x88, "Protocol Format Error - A protocol has seen an error " ++ "in the format of data received. When running RSA, " ++ "this means that formatting with random padding was " ++ "used, and did not follow the form: 0x00, 0x02, 8-to-N " ++ "bytes of non-zero pad, 0x00, F data." }, ++ { 0x89, "Protocol Size Error - A protocol has seen an error in " ++ "size. When running RSA, pdb size N < (size of F) when " ++ "no formatting is used; or pdb size N < (F + 11) when " ++ "formatting is used." }, ++ { 0xC1, "Blob Command error: Undefined mode" }, ++ { 0xC2, "Blob Command error: Secure Memory Blob mode error" }, ++ { 0xC4, "Blob Command error: Black Blob key or input size " ++ "error" }, ++ { 0xC5, "Blob Command error: Invalid key destination" }, ++ { 0xC8, "Blob Command error: Trusted/Secure mode error" }, ++ { 0xF0, "IPsec TTL or hop limit field either came in as 0, " ++ "or was decremented to 0" }, ++ { 0xF1, "3GPP HFN matches or exceeds the Threshold" }, ++ }; ++ u8 desc_error = status & JRSTA_DECOERR_ERROR_MASK; + int i; + +- if (status & JRSTA_DECOERR_JUMP) +- idx_str = "jump tgt desc idx"; +- else +- idx_str = "desc idx"; ++ report_jump_idx(status, outstr); + + for (i = 0; i < ARRAY_SIZE(desc_error_list); i++) +- if (desc_error_list[i].value == err_id) ++ if (desc_error_list[i].value == desc_error) + break; + +- if (i != ARRAY_SIZE(desc_error_list) && desc_error_list[i].error_text) +- err_str = desc_error_list[i].error_text; +- else +- snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id); +- +- dev_err(jrdev, "%08x: %s: %s %d: %s%s\n", +- status, error, idx_str, idx, err_str, err_err_code); ++ if (i != ARRAY_SIZE(desc_error_list) && desc_error_list[i].error_text) { ++ SPRINTFCAT(outstr, "%s", desc_error_list[i].error_text, ++ strlen(desc_error_list[i].error_text)); ++ } else { ++ SPRINTFCAT(outstr, "unidentified error value 0x%02x", ++ desc_error, sizeof("ff")); ++ } + } + +-static void report_jr_status(struct device *jrdev, const u32 status, +- const char *error) ++static void report_jr_status(u32 status, char *outstr) + { +- dev_err(jrdev, "%08x: %s: %s() not implemented\n", +- status, error, __func__); ++ SPRINTFCAT(outstr, "%s() not implemented", __func__, sizeof(__func__)); + } + +-static void report_cond_code_status(struct device *jrdev, const u32 status, +- const char *error) ++static void report_cond_code_status(u32 status, char *outstr) + { +- dev_err(jrdev, "%08x: %s: %s() not implemented\n", +- status, error, __func__); ++ SPRINTFCAT(outstr, "%s() not implemented", __func__, sizeof(__func__)); + } + +-void caam_jr_strstatus(struct device *jrdev, u32 status) ++char *caam_jr_strstatus(char *outstr, u32 status) + { + static const struct stat_src { +- void (*report_ssed)(struct device *jrdev, const u32 status, +- const char *error); +- const char *error; +- } status_src[16] = { ++ void (*report_ssed)(u32 status, char *outstr); ++ char *error; ++ } status_src[] = { + { NULL, "No error" }, + { NULL, NULL }, + { report_ccb_status, "CCB" }, + { report_jump_status, "Jump" }, + { report_deco_status, "DECO" }, +- { NULL, "Queue Manager Interface" }, ++ { NULL, NULL }, + { report_jr_status, "Job Ring" }, + { report_cond_code_status, "Condition Code" }, +- { NULL, NULL }, +- { NULL, NULL }, +- { NULL, NULL }, +- { NULL, NULL }, +- { NULL, NULL }, +- { NULL, NULL }, +- { NULL, NULL }, +- { NULL, NULL }, + }; + u32 ssrc = status >> JRSTA_SSRC_SHIFT; +- const char *error = status_src[ssrc].error; + +- /* +- * If there is an error handling function, call it to report the error. +- * Otherwise print the error source name. +- */ ++ sprintf(outstr, "%s: ", status_src[ssrc].error); ++ + if (status_src[ssrc].report_ssed) +- status_src[ssrc].report_ssed(jrdev, status, error); +- else if (error) +- dev_err(jrdev, "%d: %s\n", ssrc, error); +- else +- dev_err(jrdev, "%d: unknown error source\n", ssrc); ++ status_src[ssrc].report_ssed(status, outstr); ++ ++ return outstr; + } + EXPORT_SYMBOL(caam_jr_strstatus); +diff -Nur linux-4.1.3/drivers/crypto/caam/error.h linux-xbian-imx6/drivers/crypto/caam/error.h +--- linux-4.1.3/drivers/crypto/caam/error.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/error.h 2015-07-27 23:13:04.213947410 +0200 +@@ -7,5 +7,5 @@ + #ifndef CAAM_ERROR_H + #define CAAM_ERROR_H + #define CAAM_ERROR_STR_MAX 302 +-void caam_jr_strstatus(struct device *jrdev, u32 status); ++extern char *caam_jr_strstatus(char *outstr, u32 status); + #endif /* CAAM_ERROR_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/intern.h linux-xbian-imx6/drivers/crypto/caam/intern.h +--- linux-4.1.3/drivers/crypto/caam/intern.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/intern.h 2015-07-27 23:13:04.213947410 +0200 +@@ -2,13 +2,19 @@ + * CAAM/SEC 4.x driver backend + * Private/internal definitions between modules + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + * + */ + + #ifndef INTERN_H + #define INTERN_H + ++#define JOBR_UNASSIGNED 0 ++#define JOBR_ASSIGNED 1 ++ ++/* Default clock/sample settings for an RNG4 entropy source */ ++#define RNG4_ENT_CLOCKS_SAMPLE 1600 ++ + /* Currently comes from Kconfig param as a ^2 (driver-required) */ + #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE) + +@@ -37,15 +43,13 @@ + + /* Private sub-storage for a single JobR */ + struct caam_drv_private_jr { +- struct list_head list_node; /* Job Ring device list */ +- struct device *dev; ++ struct device *parentdev; /* points back to controller dev */ ++ struct platform_device *jr_pdev;/* points to platform device for JR */ + int ridx; + struct caam_job_ring __iomem *rregs; /* JobR's register space */ + struct tasklet_struct irqtask; + int irq; /* One per queue */ +- +- /* Number of scatterlist crypt transforms active on the JobR */ +- atomic_t tfm_count ____cacheline_aligned; ++ int assign; /* busy/free */ + + /* Job ring info */ + int ringsize; /* Size of rings (assume input = output) */ +@@ -66,15 +70,20 @@ + struct caam_drv_private { + + struct device *dev; +- struct platform_device **jrpdev; /* Alloc'ed array per sub-device */ ++ struct device *smdev; ++ struct device *secviodev; ++ struct device **jrdev; /* Alloc'ed array per sub-device */ ++ spinlock_t jr_alloc_lock; + struct platform_device *pdev; + + /* Physical-presence section */ +- struct caam_ctrl __iomem *ctrl; /* controller region */ +- struct caam_deco __iomem *deco; /* DECO/CCB views */ +- struct caam_assurance __iomem *assure; +- struct caam_queue_if __iomem *qi; /* QI control region */ +- struct caam_job_ring __iomem *jr[4]; /* JobR's register space */ ++ struct caam_ctrl *ctrl; /* controller region */ ++ struct caam_deco **deco; /* DECO/CCB views */ ++ struct caam_assurance *ac; ++ struct caam_queue_if *qi; /* QI control region */ ++ struct snvs_full __iomem *snvs; /* SNVS HP+LP register space */ ++ dma_addr_t __iomem *sm_base; /* Secure memory storage base */ ++ u32 sm_size; + + /* + * Detected geometry block. Filled in from device tree if powerpc, +@@ -83,14 +92,22 @@ + u8 total_jobrs; /* Total Job Rings in device */ + u8 qi_present; /* Nonzero if QI present in device */ + int secvio_irq; /* Security violation interrupt number */ +- int virt_en; /* Virtualization enabled in CAAM */ +- +-#define RNG4_MAX_HANDLES 2 +- /* RNG4 block */ +- u32 rng4_sh_init; /* This bitmap shows which of the State +- Handles of the RNG4 block are initialized +- by this driver */ ++ int rng_inst; /* Total instantiated RNGs */ + ++ /* which jr allocated to scatterlist crypto */ ++ atomic_t tfm_count ____cacheline_aligned; ++ int num_jrs_for_algapi; ++ struct device **algapi_jr; ++ /* list of registered crypto algorithms (mk generic context handle?) */ ++ struct list_head alg_list; ++ /* list of registered hash algorithms (mk generic context handle?) */ ++ struct list_head hash_list; ++ ++#ifdef CONFIG_ARM ++ struct clk *caam_ipg; ++ struct clk *caam_mem; ++ struct clk *caam_aclk; ++#endif + /* + * debugfs entries for developer view into driver/device + * variables at runtime. +diff -Nur linux-4.1.3/drivers/crypto/caam/jr.c linux-xbian-imx6/drivers/crypto/caam/jr.c +--- linux-4.1.3/drivers/crypto/caam/jr.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/jr.c 2015-07-27 23:13:04.213947410 +0200 +@@ -2,125 +2,15 @@ + * CAAM/SEC 4.x transport/backend driver + * JobR backend functionality + * +- * Copyright 2008-2012 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + */ + +-#include +-#include +- + #include "compat.h" + #include "regs.h" + #include "jr.h" + #include "desc.h" + #include "intern.h" + +-struct jr_driver_data { +- /* List of Physical JobR's with the Driver */ +- struct list_head jr_list; +- spinlock_t jr_alloc_lock; /* jr_list lock */ +-} ____cacheline_aligned; +- +-static struct jr_driver_data driver_data; +- +-static int caam_reset_hw_jr(struct device *dev) +-{ +- struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); +- unsigned int timeout = 100000; +- +- /* +- * mask interrupts since we are going to poll +- * for reset completion status +- */ +- setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); +- +- /* initiate flush (required prior to reset) */ +- wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); +- while (((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) == +- JRINT_ERR_HALT_INPROGRESS) && --timeout) +- cpu_relax(); +- +- if ((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) != +- JRINT_ERR_HALT_COMPLETE || timeout == 0) { +- dev_err(dev, "failed to flush job ring %d\n", jrp->ridx); +- return -EIO; +- } +- +- /* initiate reset */ +- timeout = 100000; +- wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); +- while ((rd_reg32(&jrp->rregs->jrcommand) & JRCR_RESET) && --timeout) +- cpu_relax(); +- +- if (timeout == 0) { +- dev_err(dev, "failed to reset job ring %d\n", jrp->ridx); +- return -EIO; +- } +- +- /* unmask interrupts */ +- clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); +- +- return 0; +-} +- +-/* +- * Shutdown JobR independent of platform property code +- */ +-int caam_jr_shutdown(struct device *dev) +-{ +- struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); +- dma_addr_t inpbusaddr, outbusaddr; +- int ret; +- +- ret = caam_reset_hw_jr(dev); +- +- tasklet_kill(&jrp->irqtask); +- +- /* Release interrupt */ +- free_irq(jrp->irq, dev); +- +- /* Free rings */ +- inpbusaddr = rd_reg64(&jrp->rregs->inpring_base); +- outbusaddr = rd_reg64(&jrp->rregs->outring_base); +- dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH, +- jrp->inpring, inpbusaddr); +- dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, +- jrp->outring, outbusaddr); +- kfree(jrp->entinfo); +- +- return ret; +-} +- +-static int caam_jr_remove(struct platform_device *pdev) +-{ +- int ret; +- struct device *jrdev; +- struct caam_drv_private_jr *jrpriv; +- +- jrdev = &pdev->dev; +- jrpriv = dev_get_drvdata(jrdev); +- +- /* +- * Return EBUSY if job ring already allocated. +- */ +- if (atomic_read(&jrpriv->tfm_count)) { +- dev_err(jrdev, "Device is busy\n"); +- return -EBUSY; +- } +- +- /* Remove the node from Physical JobR list maintained by driver */ +- spin_lock(&driver_data.jr_alloc_lock); +- list_del(&jrpriv->list_node); +- spin_unlock(&driver_data.jr_alloc_lock); +- +- /* Release ring */ +- ret = caam_jr_shutdown(jrdev); +- if (ret) +- dev_err(jrdev, "Failed to shut down job ring\n"); +- irq_dispose_mapping(jrpriv->irq); +- +- return ret; +-} +- + /* Main per-ring interrupt handler */ + static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) + { +@@ -168,6 +58,9 @@ + void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg); + u32 *userdesc, userstatus; + void *userarg; ++ dma_addr_t outbusaddr; ++ ++ outbusaddr = rd_reg64(&jrp->rregs->outring_base); + + while (rd_reg32(&jrp->rregs->outring_used)) { + +@@ -177,10 +70,15 @@ + + sw_idx = tail = jrp->tail; + hw_idx = jrp->out_ring_read_index; ++ dma_sync_single_for_cpu(dev, outbusaddr, ++ sizeof(struct jr_outentry) * JOBR_DEPTH, ++ DMA_FROM_DEVICE); + + for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) { + sw_idx = (tail + i) & (JOBR_DEPTH - 1); + ++ smp_read_barrier_depends(); ++ + if (jrp->outring[hw_idx].desc == + jrp->entinfo[sw_idx].desc_addr_dma) + break; /* found */ +@@ -202,6 +100,8 @@ + userdesc = jrp->entinfo[sw_idx].desc_addr_virt; + userstatus = jrp->outring[hw_idx].jrstatus; + ++ smp_mb(); ++ + /* set done */ + wr_reg32(&jrp->rregs->outring_rmvd, 1); + +@@ -216,6 +116,7 @@ + if (sw_idx == tail) { + do { + tail = (tail + 1) & (JOBR_DEPTH - 1); ++ smp_read_barrier_depends(); + } while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 && + jrp->entinfo[tail].desc_addr_dma == 0); + +@@ -233,57 +134,70 @@ + } + + /** +- * caam_jr_alloc() - Alloc a job ring for someone to use as needed. +- * +- * returns : pointer to the newly allocated physical +- * JobR dev can be written to if successful. ++ * caam_jr_register() - Alloc a ring for someone to use as needed. Returns ++ * an ordinal of the rings allocated, else returns -ENODEV if no rings ++ * are available. ++ * @ctrldev: points to the controller level dev (parent) that ++ * owns rings available for use. ++ * @dev: points to where a pointer to the newly allocated queue's ++ * dev can be written to if successful. + **/ +-struct device *caam_jr_alloc(void) ++int caam_jr_register(struct device *ctrldev, struct device **rdev) + { +- struct caam_drv_private_jr *jrpriv, *min_jrpriv = NULL; +- struct device *dev = NULL; +- int min_tfm_cnt = INT_MAX; +- int tfm_cnt; +- +- spin_lock(&driver_data.jr_alloc_lock); +- +- if (list_empty(&driver_data.jr_list)) { +- spin_unlock(&driver_data.jr_alloc_lock); +- return ERR_PTR(-ENODEV); +- } +- +- list_for_each_entry(jrpriv, &driver_data.jr_list, list_node) { +- tfm_cnt = atomic_read(&jrpriv->tfm_count); +- if (tfm_cnt < min_tfm_cnt) { +- min_tfm_cnt = tfm_cnt; +- min_jrpriv = jrpriv; ++ struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); ++ struct caam_drv_private_jr *jrpriv = NULL; ++ int ring; ++ ++ /* Lock, if free ring - assign, unlock */ ++ spin_lock(&ctrlpriv->jr_alloc_lock); ++ for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { ++ jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]); ++ if (jrpriv->assign == JOBR_UNASSIGNED) { ++ jrpriv->assign = JOBR_ASSIGNED; ++ *rdev = ctrlpriv->jrdev[ring]; ++ spin_unlock(&ctrlpriv->jr_alloc_lock); ++ return ring; + } +- if (!min_tfm_cnt) +- break; + } + +- if (min_jrpriv) { +- atomic_inc(&min_jrpriv->tfm_count); +- dev = min_jrpriv->dev; +- } +- spin_unlock(&driver_data.jr_alloc_lock); ++ /* If assigned, write dev where caller needs it */ ++ spin_unlock(&ctrlpriv->jr_alloc_lock); ++ *rdev = NULL; + +- return dev; ++ return -ENODEV; + } +-EXPORT_SYMBOL(caam_jr_alloc); ++EXPORT_SYMBOL(caam_jr_register); + + /** +- * caam_jr_free() - Free the Job Ring +- * @rdev - points to the dev that identifies the Job ring to +- * be released. ++ * caam_jr_deregister() - Deregister an API and release the queue. ++ * Returns 0 if OK, -EBUSY if queue still contains pending entries ++ * or unprocessed results at the time of the call ++ * @dev - points to the dev that identifies the queue to ++ * be released. + **/ +-void caam_jr_free(struct device *rdev) ++int caam_jr_deregister(struct device *rdev) + { + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(rdev); ++ struct caam_drv_private *ctrlpriv; ++ ++ /* Get the owning controller's private space */ ++ ctrlpriv = dev_get_drvdata(jrpriv->parentdev); ++ ++ /* ++ * Make sure ring empty before release ++ */ ++ if (rd_reg32(&jrpriv->rregs->outring_used) || ++ (rd_reg32(&jrpriv->rregs->inpring_avail) != JOBR_DEPTH)) ++ return -EBUSY; + +- atomic_dec(&jrpriv->tfm_count); ++ /* Release ring */ ++ spin_lock(&ctrlpriv->jr_alloc_lock); ++ jrpriv->assign = JOBR_UNASSIGNED; ++ spin_unlock(&ctrlpriv->jr_alloc_lock); ++ ++ return 0; + } +-EXPORT_SYMBOL(caam_jr_free); ++EXPORT_SYMBOL(caam_jr_deregister); + + /** + * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK, +@@ -321,7 +235,7 @@ + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); + struct caam_jrentry_info *head_entry; + int head, tail, desc_size; +- dma_addr_t desc_dma; ++ dma_addr_t desc_dma, inpbusaddr; + + desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32); + desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE); +@@ -330,6 +244,13 @@ + return -EIO; + } + ++ dma_sync_single_for_device(dev, desc_dma, desc_size, DMA_TO_DEVICE); ++ ++ inpbusaddr = rd_reg64(&jrp->rregs->inpring_base); ++ dma_sync_single_for_device(dev, inpbusaddr, ++ sizeof(dma_addr_t) * JOBR_DEPTH, ++ DMA_TO_DEVICE); ++ + spin_lock_bh(&jrp->inplock); + + head = jrp->head; +@@ -351,12 +272,18 @@ + + jrp->inpring[jrp->inp_ring_write_index] = desc_dma; + ++ dma_sync_single_for_device(dev, inpbusaddr, ++ sizeof(dma_addr_t) * JOBR_DEPTH, ++ DMA_TO_DEVICE); ++ + smp_wmb(); + + jrp->inp_ring_write_index = (jrp->inp_ring_write_index + 1) & + (JOBR_DEPTH - 1); + jrp->head = (head + 1) & (JOBR_DEPTH - 1); + ++ wmb(); ++ + wr_reg32(&jrp->rregs->inpring_jobadd, 1); + + spin_unlock_bh(&jrp->inplock); +@@ -365,6 +292,46 @@ + } + EXPORT_SYMBOL(caam_jr_enqueue); + ++static int caam_reset_hw_jr(struct device *dev) ++{ ++ struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); ++ unsigned int timeout = 100000; ++ ++ /* ++ * mask interrupts since we are going to poll ++ * for reset completion status ++ */ ++ setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); ++ ++ /* initiate flush (required prior to reset) */ ++ wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); ++ while (((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) == ++ JRINT_ERR_HALT_INPROGRESS) && --timeout) ++ cpu_relax(); ++ ++ if ((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) != ++ JRINT_ERR_HALT_COMPLETE || timeout == 0) { ++ dev_err(dev, "failed to flush job ring %d\n", jrp->ridx); ++ return -EIO; ++ } ++ ++ /* initiate reset */ ++ timeout = 100000; ++ wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); ++ while ((rd_reg32(&jrp->rregs->jrcommand) & JRCR_RESET) && --timeout) ++ cpu_relax(); ++ ++ if (timeout == 0) { ++ dev_err(dev, "failed to reset job ring %d\n", jrp->ridx); ++ return -EIO; ++ } ++ ++ /* unmask interrupts */ ++ clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); ++ ++ return 0; ++} ++ + /* + * Init JobR independent of platform property detection + */ +@@ -380,32 +347,34 @@ + + /* Connect job ring interrupt handler. */ + error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED, +- dev_name(dev), dev); ++ "caam-jobr", dev); + if (error) { + dev_err(dev, "can't connect JobR %d interrupt (%d)\n", + jrp->ridx, jrp->irq); +- goto out_kill_deq; ++ irq_dispose_mapping(jrp->irq); ++ jrp->irq = 0; ++ return -EINVAL; + } + + error = caam_reset_hw_jr(dev); + if (error) +- goto out_free_irq; ++ return error; + +- error = -ENOMEM; + jrp->inpring = dma_alloc_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH, + &inpbusaddr, GFP_KERNEL); +- if (!jrp->inpring) +- goto out_free_irq; + + jrp->outring = dma_alloc_coherent(dev, sizeof(struct jr_outentry) * + JOBR_DEPTH, &outbusaddr, GFP_KERNEL); +- if (!jrp->outring) +- goto out_free_inpring; + + jrp->entinfo = kzalloc(sizeof(struct caam_jrentry_info) * JOBR_DEPTH, + GFP_KERNEL); +- if (!jrp->entinfo) +- goto out_free_outring; ++ ++ if ((jrp->inpring == NULL) || (jrp->outring == NULL) || ++ (jrp->entinfo == NULL)) { ++ dev_err(dev, "can't allocate job rings for %d\n", ++ jrp->ridx); ++ return -ENOMEM; ++ } + + for (i = 0; i < JOBR_DEPTH; i++) + jrp->entinfo[i].desc_addr_dma = !0; +@@ -431,120 +400,123 @@ + (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | + (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); + ++ jrp->assign = JOBR_UNASSIGNED; + return 0; ++} + +-out_free_outring: +- dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, +- jrp->outring, outbusaddr); +-out_free_inpring: ++/* ++ * Shutdown JobR independent of platform property code ++ */ ++int caam_jr_shutdown(struct device *dev) ++{ ++ struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); ++ dma_addr_t inpbusaddr, outbusaddr; ++ int ret; ++ ++ ret = caam_reset_hw_jr(dev); ++ ++ tasklet_kill(&jrp->irqtask); ++ ++ /* Release interrupt */ ++ free_irq(jrp->irq, dev); ++ ++ /* Free rings */ ++ inpbusaddr = rd_reg64(&jrp->rregs->inpring_base); ++ outbusaddr = rd_reg64(&jrp->rregs->outring_base); + dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH, + jrp->inpring, inpbusaddr); +- dev_err(dev, "can't allocate job rings for %d\n", jrp->ridx); +-out_free_irq: +- free_irq(jrp->irq, dev); +-out_kill_deq: +- tasklet_kill(&jrp->irqtask); +- return error; +-} ++ dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, ++ jrp->outring, outbusaddr); ++ kfree(jrp->entinfo); ++ of_device_unregister(jrp->jr_pdev); + ++ return ret; ++} + + /* +- * Probe routine for each detected JobR subsystem. ++ * Probe routine for each detected JobR subsystem. It assumes that ++ * property detection was picked up externally. + */ +-static int caam_jr_probe(struct platform_device *pdev) ++int caam_jr_probe(struct platform_device *pdev, struct device_node *np, ++ int ring) + { +- struct device *jrdev; +- struct device_node *nprop; +- struct caam_job_ring __iomem *ctrl; ++ struct device *ctrldev, *jrdev; ++ struct platform_device *jr_pdev; ++ struct caam_drv_private *ctrlpriv; + struct caam_drv_private_jr *jrpriv; +- static int total_jobrs; ++ const __be32 *jroffset_addr; ++ u32 jroffset; + int error; + +- jrdev = &pdev->dev; +- jrpriv = devm_kmalloc(jrdev, sizeof(struct caam_drv_private_jr), +- GFP_KERNEL); +- if (!jrpriv) ++ ctrldev = &pdev->dev; ++ ctrlpriv = dev_get_drvdata(ctrldev); ++ ++ jrpriv = kmalloc(sizeof(struct caam_drv_private_jr), ++ GFP_KERNEL); ++ if (jrpriv == NULL) { ++ dev_err(ctrldev, "can't alloc private mem for job ring %d\n", ++ ring); + return -ENOMEM; ++ } ++ jrpriv->parentdev = ctrldev; /* point back to parent */ ++ jrpriv->ridx = ring; /* save ring identity relative to detection */ + +- dev_set_drvdata(jrdev, jrpriv); ++ /* ++ * Derive a pointer to the detected JobRs regs ++ * Driver has already iomapped the entire space, we just ++ * need to add in the offset to this JobR. Don't know if I ++ * like this long-term, but it'll run ++ */ ++ jroffset_addr = of_get_property(np, "reg", NULL); + +- /* save ring identity relative to detection */ +- jrpriv->ridx = total_jobrs++; ++ if (jroffset_addr == NULL) { ++ kfree(jrpriv); ++ return -EINVAL; ++ } + +- nprop = pdev->dev.of_node; +- /* Get configuration properties from device tree */ +- /* First, get register page */ +- ctrl = of_iomap(nprop, 0); +- if (!ctrl) { +- dev_err(jrdev, "of_iomap() failed\n"); +- return -ENOMEM; ++ /* ++ * Fix the endianness of this value read from the device ++ * tree if running on ARM. ++ */ ++ jroffset = be32_to_cpup(jroffset_addr); ++ ++ jrpriv->rregs = (struct caam_job_ring __iomem *)((void *)ctrlpriv->ctrl ++ + jroffset); ++ ++ /* Build a local dev for each detected queue */ ++ jr_pdev = of_platform_device_create(np, NULL, ctrldev); ++ if (jr_pdev == NULL) { ++ kfree(jrpriv); ++ return -EINVAL; + } + +- jrpriv->rregs = (struct caam_job_ring __force *)ctrl; ++ jrpriv->jr_pdev = jr_pdev; ++ jrdev = &jr_pdev->dev; ++ dev_set_drvdata(jrdev, jrpriv); ++ ctrlpriv->jrdev[ring] = jrdev; + + if (sizeof(dma_addr_t) == sizeof(u64)) +- if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring")) +- dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(40)); ++ if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring")) ++ dma_set_mask(jrdev, DMA_BIT_MASK(40)); + else +- dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(36)); ++ dma_set_mask(jrdev, DMA_BIT_MASK(36)); + else +- dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(32)); ++ dma_set_mask(jrdev, DMA_BIT_MASK(32)); + + /* Identify the interrupt */ +- jrpriv->irq = irq_of_parse_and_map(nprop, 0); ++ jrpriv->irq = of_irq_to_resource(np, 0, NULL); ++ if (jrpriv->irq <= 0) { ++ kfree(jrpriv); ++ return -EINVAL; ++ } + + /* Now do the platform independent part */ + error = caam_jr_init(jrdev); /* now turn on hardware */ + if (error) { +- irq_dispose_mapping(jrpriv->irq); ++ of_device_unregister(jr_pdev); ++ kfree(jrpriv); + return error; + } + +- jrpriv->dev = jrdev; +- spin_lock(&driver_data.jr_alloc_lock); +- list_add_tail(&jrpriv->list_node, &driver_data.jr_list); +- spin_unlock(&driver_data.jr_alloc_lock); +- +- atomic_set(&jrpriv->tfm_count, 0); +- +- return 0; +-} +- +-static struct of_device_id caam_jr_match[] = { +- { +- .compatible = "fsl,sec-v4.0-job-ring", +- }, +- { +- .compatible = "fsl,sec4.0-job-ring", +- }, +- {}, +-}; +-MODULE_DEVICE_TABLE(of, caam_jr_match); +- +-static struct platform_driver caam_jr_driver = { +- .driver = { +- .name = "caam_jr", +- .of_match_table = caam_jr_match, +- }, +- .probe = caam_jr_probe, +- .remove = caam_jr_remove, +-}; +- +-static int __init jr_driver_init(void) +-{ +- spin_lock_init(&driver_data.jr_alloc_lock); +- INIT_LIST_HEAD(&driver_data.jr_list); +- return platform_driver_register(&caam_jr_driver); +-} +- +-static void __exit jr_driver_exit(void) +-{ +- platform_driver_unregister(&caam_jr_driver); ++ return error; + } +- +-module_init(jr_driver_init); +-module_exit(jr_driver_exit); +- +-MODULE_LICENSE("GPL"); +-MODULE_DESCRIPTION("FSL CAAM JR request backend"); +-MODULE_AUTHOR("Freescale Semiconductor - NMG/STC"); +diff -Nur linux-4.1.3/drivers/crypto/caam/jr.h linux-xbian-imx6/drivers/crypto/caam/jr.h +--- linux-4.1.3/drivers/crypto/caam/jr.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/jr.h 2015-07-27 23:13:04.213947410 +0200 +@@ -1,18 +1,22 @@ + /* + * CAAM public-level include definitions for the JobR backend + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + */ + + #ifndef JR_H + #define JR_H + + /* Prototypes for backend-level services exposed to APIs */ +-struct device *caam_jr_alloc(void); +-void caam_jr_free(struct device *rdev); ++int caam_jr_register(struct device *ctrldev, struct device **rdev); ++int caam_jr_deregister(struct device *rdev); + int caam_jr_enqueue(struct device *dev, u32 *desc, + void (*cbk)(struct device *dev, u32 *desc, u32 status, + void *areq), + void *areq); + ++extern int caam_jr_probe(struct platform_device *pdev, struct device_node *np, ++ int ring); ++extern int caam_jr_shutdown(struct device *dev); ++extern struct device *caam_get_jrdev(void); + #endif /* JR_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/Kconfig linux-xbian-imx6/drivers/crypto/caam/Kconfig +--- linux-4.1.3/drivers/crypto/caam/Kconfig 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/Kconfig 2015-07-27 23:13:04.205975852 +0200 +@@ -1,32 +1,19 @@ + config CRYPTO_DEV_FSL_CAAM + tristate "Freescale CAAM-Multicore driver backend" +- depends on FSL_SOC ++ depends on FSL_SOC || ARCH_MXC + help + Enables the driver module for Freescale's Cryptographic Accelerator + and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). +- This module creates job ring devices, and configures h/w ++ This module adds a job ring operation interface, and configures h/w + to operate as a DPAA component automatically, depending + on h/w feature availability. + + To compile this driver as a module, choose M here: the module + will be called caam. + +-config CRYPTO_DEV_FSL_CAAM_JR +- tristate "Freescale CAAM Job Ring driver backend" +- depends on CRYPTO_DEV_FSL_CAAM +- default y +- help +- Enables the driver module for Job Rings which are part of +- Freescale's Cryptographic Accelerator +- and Assurance Module (CAAM). This module adds a job ring operation +- interface. +- +- To compile this driver as a module, choose M here: the module +- will be called caam_jr. +- + config CRYPTO_DEV_FSL_CAAM_RINGSIZE + int "Job Ring size" +- depends on CRYPTO_DEV_FSL_CAAM_JR ++ depends on CRYPTO_DEV_FSL_CAAM + range 2 9 + default "9" + help +@@ -44,7 +31,7 @@ + + config CRYPTO_DEV_FSL_CAAM_INTC + bool "Job Ring interrupt coalescing" +- depends on CRYPTO_DEV_FSL_CAAM_JR ++ depends on CRYPTO_DEV_FSL_CAAM + default n + help + Enable the Job Ring's interrupt coalescing feature. +@@ -75,7 +62,7 @@ + + config CRYPTO_DEV_FSL_CAAM_CRYPTO_API + tristate "Register algorithm implementations with the Crypto API" +- depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR ++ depends on CRYPTO_DEV_FSL_CAAM + default y + select CRYPTO_ALGAPI + select CRYPTO_AUTHENC +@@ -89,7 +76,7 @@ + + config CRYPTO_DEV_FSL_CAAM_AHASH_API + tristate "Register hash algorithm implementations with Crypto API" +- depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR ++ depends on CRYPTO_DEV_FSL_CAAM + default y + select CRYPTO_HASH + help +@@ -101,7 +88,7 @@ + + config CRYPTO_DEV_FSL_CAAM_RNG_API + tristate "Register caam device for hwrng API" +- depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR ++ depends on CRYPTO_DEV_FSL_CAAM + default y + select CRYPTO_RNG + select HW_RANDOM +@@ -112,6 +99,54 @@ + To compile this as a module, choose M here: the module + will be called caamrng. + ++config CRYPTO_DEV_FSL_CAAM_RNG_TEST ++ boolean "Test caam rng" ++ depends on CRYPTO_DEV_FSL_CAAM_RNG_API ++ default n ++ help ++ Selecting this will enable self-test for caam rng. ++ ++config CRYPTO_DEV_FSL_CAAM_SM ++ tristate "CAAM Secure Memory / Keystore API (EXPERIMENTAL)" ++ default n ++ help ++ Enables use of a prototype kernel-level Keystore API with CAAM ++ Secure Memory for insertion/extraction of bus-protected secrets. ++ ++config CRYPTO_DEV_FSL_CAAM_SM_SLOTSIZE ++ int "Size of each keystore slot in Secure Memory" ++ depends on CRYPTO_DEV_FSL_CAAM_SM ++ range 5 9 ++ default 7 ++ help ++ Select size of allocation units to divide Secure Memory pages into ++ (the size of a "slot" as referenced inside the API code). ++ Established as powers of two. ++ Examples: ++ 5 => 32 bytes ++ 6 => 64 bytes ++ 7 => 128 bytes ++ 8 => 256 bytes ++ 9 => 512 bytes ++ ++config CRYPTO_DEV_FSL_CAAM_SM_TEST ++ tristate "CAAM Secure Memory - Keystore Test/Example (EXPERIMENTAL)" ++ depends on CRYPTO_DEV_FSL_CAAM_SM ++ default n ++ help ++ Example thread to exercise the Keystore API and to verify that ++ stored and recovered secrets can be used for general purpose ++ encryption/decryption. ++ ++config CRYPTO_DEV_FSL_CAAM_SECVIO ++ tristate "CAAM/SNVS Security Violation Handler (EXPERIMENTAL)" ++ depends on CRYPTO_DEV_FSL_CAAM ++ default n ++ help ++ Enables installation of an interrupt handler with registrable ++ handler functions which can be specified to act on the consequences ++ of a security violation. ++ + config CRYPTO_DEV_FSL_CAAM_DEBUG + bool "Enable debug output in CAAM driver" + depends on CRYPTO_DEV_FSL_CAAM +@@ -119,3 +154,19 @@ + help + Selecting this will enable printing of various debug + information in the CAAM driver. ++ ++config CRYPTO_DEV_FSL_CAAM_KEYBLOB ++ tristate "Freescale CAAM memory keyblob driver backend" ++ depends on CRYPTO_DEV_FSL_CAAM ++ depends on CRYPTO_DEV_FSL_CAAM_JR ++ default y ++ help ++ Enables the driver module for Key Blob which are part of ++ Freescale's Cryptographic Accelerator ++ and Assurance Module (CAAM). This module adds a key blob operation ++ interface. ++ ++ To compile this driver as a module, choose M here: the module ++ will be called caam_keyblob. ++ ++ +diff -Nur linux-4.1.3/drivers/crypto/caam/key_gen.c linux-xbian-imx6/drivers/crypto/caam/key_gen.c +--- linux-4.1.3/drivers/crypto/caam/key_gen.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/key_gen.c 2015-07-27 23:13:04.213947410 +0200 +@@ -1,7 +1,7 @@ + /* + * CAAM/SEC 4.x functions for handling key-generation jobs + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + * + */ + #include "compat.h" +@@ -19,8 +19,11 @@ + dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err); + #endif + +- if (err) +- caam_jr_strstatus(dev, err); ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ ++ dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } + + res->err = err; + +@@ -48,29 +51,24 @@ + u32 *desc; + struct split_key_result result; + dma_addr_t dma_addr_in, dma_addr_out; +- int ret = -ENOMEM; ++ int ret = 0; + + desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + if (!desc) { + dev_err(jrdev, "unable to allocate key input memory\n"); +- return ret; ++ return -ENOMEM; + } + ++ init_job_desc(desc, 0); ++ + dma_addr_in = dma_map_single(jrdev, (void *)key_in, keylen, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, dma_addr_in)) { + dev_err(jrdev, "unable to map key input memory\n"); +- goto out_free; ++ kfree(desc); ++ return -ENOMEM; + } +- +- dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len, +- DMA_FROM_DEVICE); +- if (dma_mapping_error(jrdev, dma_addr_out)) { +- dev_err(jrdev, "unable to map key output memory\n"); +- goto out_unmap_in; +- } +- +- init_job_desc(desc, 0); ++ dma_sync_single_for_device(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE); + append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG); + + /* Sets MDHA up into an HMAC-INIT */ +@@ -91,9 +89,9 @@ + LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK); + + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1); +- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); + #endif + +@@ -106,12 +104,13 @@ + wait_for_completion_interruptible(&result.completion); + ret = result.err; + #ifdef DEBUG +- print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", ++ print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_out, + split_key_pad_len, 1); + #endif + } +- ++ dma_sync_single_for_cpu(jrdev, dma_addr_out, split_key_pad_len, ++ DMA_FROM_DEVICE); + dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len, + DMA_FROM_DEVICE); + out_unmap_in: +diff -Nur linux-4.1.3/drivers/crypto/caam/Makefile linux-xbian-imx6/drivers/crypto/caam/Makefile +--- linux-4.1.3/drivers/crypto/caam/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/Makefile 2015-07-27 23:13:04.205975852 +0200 +@@ -1,15 +1,14 @@ + # + # Makefile for the CAAM backend and dependent components + # +-ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y) +- EXTRA_CFLAGS := -DDEBUG +-endif + + obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o +-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o + obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o + obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o + obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o ++obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_SM) += sm_store.o ++obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_SM_TEST) += sm_test.o ++obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_SECVIO) += secvio.o ++obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_KEYBLOB) += caam_keyblob.o + +-caam-objs := ctrl.o +-caam_jr-objs := jr.o key_gen.o error.o ++caam-objs := ctrl.o jr.o error.o key_gen.o +diff -Nur linux-4.1.3/drivers/crypto/caam/pdb.h linux-xbian-imx6/drivers/crypto/caam/pdb.h +--- linux-4.1.3/drivers/crypto/caam/pdb.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/pdb.h 2015-07-27 23:13:04.213947410 +0200 +@@ -44,7 +44,6 @@ + #define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ + #define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */ + #define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */ +-#define PDBOPTS_ESP_AOFL 0x04 /* adjust out frame len (decap, SEC>=5.3)*/ + #define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */ + #define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */ + #define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */ +diff -Nur linux-4.1.3/drivers/crypto/caam/regs.h linux-xbian-imx6/drivers/crypto/caam/regs.h +--- linux-4.1.3/drivers/crypto/caam/regs.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/regs.h 2015-07-27 23:13:04.213947410 +0200 +@@ -1,7 +1,7 @@ + /* + * CAAM hardware register-level view + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + */ + + #ifndef REGS_H +@@ -74,17 +74,22 @@ + #endif + #else + #ifdef __LITTLE_ENDIAN +-#define wr_reg32(reg, data) __raw_writel(data, reg) +-#define rd_reg32(reg) __raw_readl(reg) ++#define wr_reg32(reg, data) writel(data, reg) ++#define rd_reg32(reg) readl(reg) + #ifdef CONFIG_64BIT +-#define wr_reg64(reg, data) __raw_writeq(data, reg) +-#define rd_reg64(reg) __raw_readq(reg) ++#define wr_reg64(reg, data) writeq(data, reg) ++#define rd_reg64(reg) readq(reg) + #endif + #endif + #endif + ++#ifdef CONFIG_ARM ++/* These are common macros for Power, put here for ARMs */ ++#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr)) ++#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr)) ++#endif ++ + #ifndef CONFIG_64BIT +-#ifdef __BIG_ENDIAN + static inline void wr_reg64(u64 __iomem *reg, u64 data) + { + wr_reg32((u32 __iomem *)reg, (data & 0xffffffff00000000ull) >> 32); +@@ -96,21 +101,6 @@ + return (((u64)rd_reg32((u32 __iomem *)reg)) << 32) | + ((u64)rd_reg32((u32 __iomem *)reg + 1)); + } +-#else +-#ifdef __LITTLE_ENDIAN +-static inline void wr_reg64(u64 __iomem *reg, u64 data) +-{ +- wr_reg32((u32 __iomem *)reg + 1, (data & 0xffffffff00000000ull) >> 32); +- wr_reg32((u32 __iomem *)reg, data & 0x00000000ffffffffull); +-} +- +-static inline u64 rd_reg64(u64 __iomem *reg) +-{ +- return (((u64)rd_reg32((u32 __iomem *)reg + 1)) << 32) | +- ((u64)rd_reg32((u32 __iomem *)reg)); +-} +-#endif +-#endif + #endif + + /* +@@ -123,6 +113,98 @@ + } __packed; + + /* ++ * CHA version ID / instantiation bitfields ++ * Defined for use within cha_id in perfmon ++ * Note that the same shift/mask selectors can be used to pull out number ++ * of instantiated blocks within cha_num in perfmon, the locations are ++ * the same. ++ */ ++ ++/* Job Ring */ ++#define CHA_ID_JR_SHIFT 60 ++#define CHA_ID_JR_MASK (0xfull << CHA_ID_JR_SHIFT) ++ ++/* DEscriptor COntroller */ ++#define CHA_ID_DECO_SHIFT 56 ++#define CHA_ID_DECO_MASK (0xfull << CHA_ID_DECO_SHIFT) ++#define CHA_NUM_DECONUM_SHIFT 56 /* legacy definition */ ++#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) ++ ++/* ZUC-Authentication */ ++#define CHA_ID_ZA_SHIFT 44 ++#define CHA_ID_ZA_MASK (0xfull << CHA_ID_ZA_SHIFT) ++ ++/* ZUC-Encryption */ ++#define CHA_ID_ZE_SHIFT 40 ++#define CHA_ID_ZE_MASK (0xfull << CHA_ID_ZE_SHIFT) ++ ++/* SNOW f9 */ ++#define CHA_ID_SNW9_SHIFT 36 ++#define CHA_ID_SNW9_MASK (0xfull << CHA_ID_SNW9_SHIFT) ++ ++/* CRC */ ++#define CHA_ID_CRC_SHIFT 32 ++#define CHA_ID_CRC_MASK (0xfull << CHA_ID_CRC_SHIFT) ++ ++/* Public Key */ ++#define CHA_ID_PK_SHIFT 28 ++#define CHA_ID_PK_MASK (0xfull << CHA_ID_PK_SHIFT) ++ ++/* Kasumi */ ++#define CHA_ID_KAS_SHIFT 24 ++#define CHA_ID_KAS_MASK (0xfull << CHA_ID_KAS_SHIFT) ++ ++/* SNOW f8 */ ++#define CHA_ID_SNW8_SHIFT 20 ++#define CHA_ID_SNW8_MASK (0xfull << CHA_ID_SNW8_SHIFT) ++ ++/* ++ * Random Generator ++ * RNG4 = FIPS-verification-compliant, requires init kickstart for use ++ */ ++#define CHA_ID_RNG_SHIFT 16 ++#define CHA_ID_RNG_MASK (0xfull << CHA_ID_RNG_SHIFT) ++#define CHA_ID_RNG_A (0x1ull << CHA_ID_RNG_SHIFT) ++#define CHA_ID_RNG_B (0x2ull << CHA_ID_RNG_SHIFT) ++#define CHA_ID_RNG_C (0x3ull << CHA_ID_RNG_SHIFT) ++#define CHA_ID_RNG_4 (0x4ull << CHA_ID_RNG_SHIFT) ++ ++/* ++ * Message Digest ++ * LP256 = Low Power (MD5/SHA1/SHA224/SHA256 + HMAC) ++ * LP512 = Low Power (LP256 + SHA384/SHA512) ++ * HP = High Power (LP512 + SMAC) ++ */ ++#define CHA_ID_MD_SHIFT 12 ++#define CHA_ID_MD_MASK (0xfull << CHA_ID_MD_SHIFT) ++#define CHA_ID_MD_LP256 (0x0ull << CHA_ID_MD_SHIFT) ++#define CHA_ID_MD_LP512 (0x1ull << CHA_ID_MD_SHIFT) ++#define CHA_ID_MD_HP (0x2ull << CHA_ID_MD_SHIFT) ++ ++/* ARC4 Streamcipher */ ++#define CHA_ID_ARC4_SHIFT 8 ++#define CHA_ID_ARC4_MASK (0xfull << CHA_ID_ARC4_SHIFT) ++#define CHA_ID_ARC4_LP (0x0ull << CHA_ID_ARC4_SHIFT) ++#define CHA_ID_ARC4_HP (0x1ull << CHA_ID_ARC4_SHIFT) ++ ++/* DES Blockcipher Accelerator */ ++#define CHA_ID_DES_SHIFT 4 ++#define CHA_ID_DES_MASK (0xfull << CHA_ID_DES_SHIFT) ++ ++/* ++ * AES Blockcipher + Combo Mode Accelerator ++ * LP = Low Power (includes ECB/CBC/CFB128/OFB/CTR/CCM/CMAC/XCBC-MAC) ++ * HP = High Power (LP + CBCXCBC/CTRXCBC/XTS/GCM) ++ * DIFFPWR = ORed in if differential-power-analysis resistance implemented ++ */ ++#define CHA_ID_AES_SHIFT 0 ++#define CHA_ID_AES_MASK (0xfull << CHA_ID_AES_SHIFT) ++#define CHA_ID_AES_LP (0x3ull << CHA_ID_AES_SHIFT) ++#define CHA_ID_AES_HP (0x4ull << CHA_ID_AES_SHIFT) ++#define CHA_ID_AES_DIFFPWR (0x1ull << CHA_ID_AES_SHIFT) ++ ++ ++/* + * caam_perfmon - Performance Monitor/Secure Memory Status/ + * CAAM Global Status/Component Version IDs + * +@@ -130,45 +212,8 @@ + */ + + /* Number of DECOs */ +-#define CHA_NUM_MS_DECONUM_SHIFT 24 +-#define CHA_NUM_MS_DECONUM_MASK (0xfull << CHA_NUM_MS_DECONUM_SHIFT) +- +-/* CHA Version IDs */ +-#define CHA_ID_LS_AES_SHIFT 0 +-#define CHA_ID_LS_AES_MASK (0xfull << CHA_ID_LS_AES_SHIFT) +- +-#define CHA_ID_LS_DES_SHIFT 4 +-#define CHA_ID_LS_DES_MASK (0xfull << CHA_ID_LS_DES_SHIFT) +- +-#define CHA_ID_LS_ARC4_SHIFT 8 +-#define CHA_ID_LS_ARC4_MASK (0xfull << CHA_ID_LS_ARC4_SHIFT) +- +-#define CHA_ID_LS_MD_SHIFT 12 +-#define CHA_ID_LS_MD_MASK (0xfull << CHA_ID_LS_MD_SHIFT) +- +-#define CHA_ID_LS_RNG_SHIFT 16 +-#define CHA_ID_LS_RNG_MASK (0xfull << CHA_ID_LS_RNG_SHIFT) +- +-#define CHA_ID_LS_SNW8_SHIFT 20 +-#define CHA_ID_LS_SNW8_MASK (0xfull << CHA_ID_LS_SNW8_SHIFT) +- +-#define CHA_ID_LS_KAS_SHIFT 24 +-#define CHA_ID_LS_KAS_MASK (0xfull << CHA_ID_LS_KAS_SHIFT) +- +-#define CHA_ID_LS_PK_SHIFT 28 +-#define CHA_ID_LS_PK_MASK (0xfull << CHA_ID_LS_PK_SHIFT) +- +-#define CHA_ID_MS_CRC_SHIFT 0 +-#define CHA_ID_MS_CRC_MASK (0xfull << CHA_ID_MS_CRC_SHIFT) +- +-#define CHA_ID_MS_SNW9_SHIFT 4 +-#define CHA_ID_MS_SNW9_MASK (0xfull << CHA_ID_MS_SNW9_SHIFT) +- +-#define CHA_ID_MS_DECO_SHIFT 24 +-#define CHA_ID_MS_DECO_MASK (0xfull << CHA_ID_MS_DECO_SHIFT) +- +-#define CHA_ID_MS_JR_SHIFT 28 +-#define CHA_ID_MS_JR_MASK (0xfull << CHA_ID_MS_JR_SHIFT) ++#define CHA_NUM_DECONUM_SHIFT 56 ++#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) + + struct sec_vid { + u16 ip_id; +@@ -176,6 +221,10 @@ + u8 min_rev; + }; + ++#define SEC_VID_IPID_SHIFT 16 ++#define SEC_VID_MAJ_SHIFT 8 ++#define SEC_VID_MAJ_MASK 0xFF00 ++ + struct caam_perfmon { + /* Performance Monitor Registers f00-f9f */ + u64 req_dequeued; /* PC_REQ_DEQ - Dequeued Requests */ +@@ -188,36 +237,89 @@ + u64 rsvd[13]; + + /* CAAM Hardware Instantiation Parameters fa0-fbf */ +- u32 cha_rev_ms; /* CRNR - CHA Rev No. Most significant half*/ +- u32 cha_rev_ls; /* CRNR - CHA Rev No. Least significant half*/ +-#define CTPR_MS_QI_SHIFT 25 +-#define CTPR_MS_QI_MASK (0x1ull << CTPR_MS_QI_SHIFT) +-#define CTPR_MS_VIRT_EN_INCL 0x00000001 +-#define CTPR_MS_VIRT_EN_POR 0x00000002 +-#define CTPR_MS_PG_SZ_MASK 0x10 +-#define CTPR_MS_PG_SZ_SHIFT 4 +- u32 comp_parms_ms; /* CTPR - Compile Parameters Register */ +- u32 comp_parms_ls; /* CTPR - Compile Parameters Register */ +- u64 rsvd1[2]; ++ u64 cha_rev; /* CRNR - CHA Revision Number */ ++#define CTPR_QI_SHIFT 57 ++#define CTPR_QI_MASK (0x1ull << CTPR_QI_SHIFT) ++ u64 comp_parms; /* CTPR - Compile Parameters Register */ ++ ++ /* Secure Memory State Visibility */ ++ u32 rsvd1; ++ u32 smstatus; /* Secure memory status */ ++ u32 rsvd2; ++ u32 smpartown; /* Secure memory partition owner */ + + /* CAAM Global Status fc0-fdf */ + u64 faultaddr; /* FAR - Fault Address */ + u32 faultliodn; /* FALR - Fault Address LIODN */ + u32 faultdetail; /* FADR - Fault Addr Detail */ +- u32 rsvd2; ++ u32 rsvd3; + u32 status; /* CSTA - CAAM Status */ +- u64 rsvd3; ++ u32 smpart; /* Secure Memory Partition Parameters */ ++ u32 smvid; /* Secure Memory Version ID */ + + /* Component Instantiation Parameters fe0-fff */ + u32 rtic_id; /* RVID - RTIC Version ID */ + u32 ccb_id; /* CCBVID - CCB Version ID */ +- u32 cha_id_ms; /* CHAVID - CHA Version ID Most Significant*/ +- u32 cha_id_ls; /* CHAVID - CHA Version ID Least Significant*/ +- u32 cha_num_ms; /* CHANUM - CHA Number Most Significant */ +- u32 cha_num_ls; /* CHANUM - CHA Number Least Significant*/ +- u32 caam_id_ms; /* CAAMVID - CAAM Version ID MS */ +- u32 caam_id_ls; /* CAAMVID - CAAM Version ID LS */ +-}; ++ u64 cha_id; /* CHAVID - CHA Version ID */ ++ u64 cha_num; /* CHANUM - CHA Number */ ++ u64 caam_id; /* CAAMVID - CAAM Version ID */ ++}; ++ ++#define SMSTATUS_PART_SHIFT 28 ++#define SMSTATUS_PART_MASK (0xf << SMSTATUS_PART_SHIFT) ++#define SMSTATUS_PAGE_SHIFT 16 ++#define SMSTATUS_PAGE_MASK (0x7ff << SMSTATUS_PAGE_SHIFT) ++#define SMSTATUS_MID_SHIFT 8 ++#define SMSTATUS_MID_MASK (0x3f << SMSTATUS_MID_SHIFT) ++#define SMSTATUS_ACCERR_SHIFT 4 ++#define SMSTATUS_ACCERR_MASK (0xf << SMSTATUS_ACCERR_SHIFT) ++#define SMSTATUS_ACCERR_NONE 0 ++#define SMSTATUS_ACCERR_ALLOC 1 /* Page not allocated */ ++#define SMSTATUS_ACCESS_ID 2 /* Not granted by ID */ ++#define SMSTATUS_ACCESS_WRITE 3 /* Writes not allowed */ ++#define SMSTATUS_ACCESS_READ 4 /* Reads not allowed */ ++#define SMSTATUS_ACCESS_NONKEY 6 /* Non-key reads not allowed */ ++#define SMSTATUS_ACCESS_BLOB 9 /* Blob access not allowed */ ++#define SMSTATUS_ACCESS_DESCB 10 /* Descriptor Blob access spans pages */ ++#define SMSTATUS_ACCESS_NON_SM 11 /* Outside Secure Memory range */ ++#define SMSTATUS_ACCESS_XPAGE 12 /* Access crosses pages */ ++#define SMSTATUS_ACCESS_INITPG 13 /* Page still initializing */ ++#define SMSTATUS_STATE_SHIFT 0 ++#define SMSTATUS_STATE_MASK (0xf << SMSTATUS_STATE_SHIFT) ++#define SMSTATUS_STATE_RESET 0 ++#define SMSTATUS_STATE_INIT 1 ++#define SMSTATUS_STATE_NORMAL 2 ++#define SMSTATUS_STATE_FAIL 3 ++ ++/* up to 15 rings, 2 bits shifted by ring number */ ++#define SMPARTOWN_RING_SHIFT 2 ++#define SMPARTOWN_RING_MASK 3 ++#define SMPARTOWN_AVAILABLE 0 ++#define SMPARTOWN_NOEXIST 1 ++#define SMPARTOWN_UNAVAILABLE 2 ++#define SMPARTOWN_OURS 3 ++ ++/* Maximum number of pages possible */ ++#define SMPART_MAX_NUMPG_SHIFT 16 ++#define SMPART_MAX_NUMPG_MASK (0x3f << SMPART_MAX_NUMPG_SHIFT) ++ ++/* Maximum partition number */ ++#define SMPART_MAX_PNUM_SHIFT 12 ++#define SMPART_MAX_PNUM_MASK (0xf << SMPART_MAX_PNUM_SHIFT) ++ ++/* Highest possible page number */ ++#define SMPART_MAX_PG_SHIFT 0 ++#define SMPART_MAX_PG_MASK (0x3f << SMPART_MAX_PG_SHIFT) ++ ++/* Max size of a page */ ++#define SMVID_PG_SIZE_SHIFT 16 ++#define SMVID_PG_SIZE_MASK (0x7 << SMVID_PG_SIZE_SHIFT) ++ ++/* Major/Minor Version ID */ ++#define SMVID_MAJ_VERS_SHIFT 8 ++#define SMVID_MAJ_VERS (0xf << SMVID_MAJ_VERS_SHIFT) ++#define SMVID_MIN_VERS_SHIFT 0 ++#define SMVID_MIN_VERS (0xf << SMVID_MIN_VERS_SHIFT) + + /* LIODN programming for DMA configuration */ + #define MSTRID_LOCK_LIODN 0x80000000 +@@ -270,17 +372,7 @@ + + /* RNG4 TRNG test registers */ + struct rng4tst { +-#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ +-#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in +- both entropy shifter and +- statistical checker */ +-#define RTMCTL_SAMP_MODE_RAW_ES_SC 1 /* use raw data in both +- entropy shifter and +- statistical checker */ +-#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_RAW_SC 2 /* use von Neumann data in +- entropy shifter, raw data +- in statistical checker */ +-#define RTMCTL_SAMP_MODE_INVALID 3 /* invalid combination */ ++#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ + u32 rtmctl; /* misc. control register */ + u32 rtscmisc; /* statistical check misc. register */ + u32 rtpkrrng; /* poker range register */ +@@ -290,26 +382,22 @@ + }; + #define RTSDCTL_ENT_DLY_SHIFT 16 + #define RTSDCTL_ENT_DLY_MASK (0xffff << RTSDCTL_ENT_DLY_SHIFT) +-#define RTSDCTL_ENT_DLY_MIN 3200 +-#define RTSDCTL_ENT_DLY_MAX 12800 + u32 rtsdctl; /* seed control register */ + union { + u32 rtsblim; /* PRGM=1: sparse bit limit register */ + u32 rttotsam; /* PRGM=0: total samples register */ + }; + u32 rtfrqmin; /* frequency count min. limit register */ +-#define RTFRQMAX_DISABLE (1 << 20) + union { + u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ + u32 rtfrqcnt; /* PRGM=0: freq. count register */ + }; + u32 rsvd1[40]; +-#define RDSTA_SKVT 0x80000000 +-#define RDSTA_SKVN 0x40000000 +-#define RDSTA_IF0 0x00000001 +-#define RDSTA_IF1 0x00000002 +-#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0) +- u32 rdsta; ++#define RDSTA_IF 0x00000003 /* state handle instantiated flags 0 and 1 */ ++#define RDSTA_SKVN 0x40000000 /* Secure Key Valid Non-Test mode */ ++#define RDSTA_SKVT 0x80000000 /* Secure Key Valid Test. non-test mode */ ++#define RDSTA_TF 0x00000300 /* State handle instantiated Test-mode */ ++ u32 rdsta; /* DRNG status register */ + u32 rsvd2[15]; + }; + +@@ -340,12 +428,9 @@ + /* Bus Access Configuration Section 010-11f */ + /* Read/Writable */ + struct masterid jr_mid[4]; /* JRxLIODNR - JobR LIODN setup */ +- u32 rsvd3[11]; +- u32 jrstart; /* JRSTART - Job Ring Start Register */ ++ u32 rsvd3[12]; + struct masterid rtic_mid[4]; /* RTICxLIODNR - RTIC LIODN setup */ +- u32 rsvd4[5]; +- u32 deco_rsr; /* DECORSR - Deco Request Source */ +- u32 rsvd11; ++ u32 rsvd4[7]; + u32 deco_rq; /* DECORR - DECO Request */ + struct partid deco_mid[5]; /* DECOxLIODNR - 1 per DECO */ + u32 rsvd5[22]; +@@ -386,11 +471,6 @@ + #define MCFGR_DMA_RESET 0x10000000 + #define MCFGR_LONG_PTR 0x00010000 /* Use >32-bit desc addressing */ + #define SCFGR_RDBENABLE 0x00000400 +-#define SCFGR_VIRT_EN 0x00008000 +-#define DECORR_RQD0ENABLE 0x00000001 /* Enable DECO0 for direct access */ +-#define DECORSR_JR0 0x00000001 /* JR to supply TZ, SDID, ICID */ +-#define DECORSR_VALID 0x80000000 +-#define DECORR_DEN0 0x00010000 /* DECO0 available for access*/ + + /* AXI read cache control */ + #define MCFGR_ARCACHE_SHIFT 12 +@@ -407,12 +487,6 @@ + #define MCFGR_AXIPRI 0x00000008 /* Assert AXI priority sideband */ + #define MCFGR_BURST_64 0x00000001 /* Max burst size */ + +-/* JRSTART register offsets */ +-#define JRSTART_JR0_START 0x00000001 /* Start Job ring 0 */ +-#define JRSTART_JR1_START 0x00000002 /* Start Job ring 1 */ +-#define JRSTART_JR2_START 0x00000004 /* Start Job ring 2 */ +-#define JRSTART_JR3_START 0x00000008 /* Start Job ring 3 */ +- + /* + * caam_job_ring - direct job ring setup + * 1-4 possible per instantiation, base + 1000/2000/3000/4000 +@@ -455,7 +529,18 @@ + u32 rsvd11; + u32 jrcommand; /* JRCRx - JobR command */ + +- u32 rsvd12[932]; ++ u32 rsvd12[33]; ++ ++ /* Secure Memory Configuration - if you have it */ ++ u32 sm_cmd; /* SMCJRx - Secure memory command */ ++ u32 rsvd13; ++ u32 sm_status; /* SMCSJRx - Secure memory status */ ++ u32 rsvd14; ++ u32 sm_perm; /* SMAPJRx - Secure memory access perms */ ++ u32 sm_group2; /* SMAP2JRx - Secure memory access group 2 */ ++ u32 sm_group1; /* SMAP1JRx - Secure memory access group 1 */ ++ ++ u32 rsvd15[891]; + + /* Performance Monitor f00-fff */ + struct caam_perfmon perfmon; +@@ -578,6 +663,62 @@ + + #define JRCR_RESET 0x01 + ++/* secure memory command */ ++#define SMC_PAGE_SHIFT 16 ++#define SMC_PAGE_MASK (0xffff << SMC_PAGE_SHIFT) ++#define SMC_PART_SHIFT 8 ++#define SMC_PART_MASK (0x0f << SMC_PART_SHIFT) ++#define SMC_CMD_SHIFT 0 ++#define SMC_CMD_MASK (0x0f << SMC_CMD_SHIFT) ++ ++#define SMC_CMD_ALLOC_PAGE 0x01 /* allocate page to this partition */ ++#define SMC_CMD_DEALLOC_PAGE 0x02 /* deallocate page from partition */ ++#define SMC_CMD_DEALLOC_PART 0x03 /* deallocate partition */ ++#define SMC_CMD_PAGE_INQUIRY 0x05 /* find partition associate with page */ ++ ++/* secure memory (command) status */ ++#define SMCS_PAGE_SHIFT 16 ++#define SMCS_PAGE_MASK (0x0fff << SMCS_PAGE_SHIFT) ++#define SMCS_CMDERR_SHIFT 14 ++#define SMCS_CMDERR_MASK (3 << SMCS_CMDERR_SHIFT) ++#define SMCS_ALCERR_SHIFT 12 ++#define SMCS_ALCERR_MASK (3 << SMCS_ALCERR_SHIFT) ++#define SMCS_PGOWN_SHIFT 6 ++#define SMCS_PGWON_MASK (3 << SMCS_PGOWN_SHIFT) ++#define SMCS_PART_SHIFT 0 ++#define SMCS_PART_MASK (0xf << SMCS_PART_SHIFT) ++ ++#define SMCS_CMDERR_NONE 0 ++#define SMCS_CMDERR_INCOMP 1 /* Command not yet complete */ ++#define SMCS_CMDERR_SECFAIL 2 /* Security failure occurred */ ++#define SMCS_CMDERR_OVERFLOW 3 /* Command overflow */ ++ ++#define SMCS_ALCERR_NONE 0 ++#define SMCS_ALCERR_PSPERR 1 /* Partion marked PSP (dealloc only) */ ++#define SMCS_ALCERR_PAGEAVAIL 2 /* Page not available */ ++#define SMCS_ALCERR_PARTOWN 3 /* Partition ownership error */ ++ ++#define SMCS_PGOWN_AVAIL 0 /* Page is available */ ++#define SMCS_PGOWN_NOEXIST 1 /* Page initializing or nonexistent */ ++#define SMCS_PGOWN_NOOWN 2 /* Page owned by another processor */ ++#define SMCS_PGOWN_OWNED 3 /* Page belongs to this processor */ ++ ++/* secure memory access permissions */ ++#define SMCS_PERM_KEYMOD_SHIFT 16 ++#define SMCA_PERM_KEYMOD_MASK (0xff << SMCS_PERM_KEYMOD_SHIFT) ++#define SMCA_PERM_CSP_ZERO 0x8000 /* Zero when deallocated or released */ ++#define SMCA_PERM_PSP_LOCK 0x4000 /* Part./pages can't be deallocated */ ++#define SMCA_PERM_PERM_LOCK 0x2000 /* Lock permissions */ ++#define SMCA_PERM_GRP_LOCK 0x1000 /* Lock access groups */ ++#define SMCA_PERM_RINGID_SHIFT 10 ++#define SMCA_PERM_RINGID_MASK (3 << SMCA_PERM_RINGID_SHIFT) ++#define SMCA_PERM_G2_BLOB 0x0080 /* Group 2 blob import/export */ ++#define SMCA_PERM_G2_WRITE 0x0020 /* Group 2 write */ ++#define SMCA_PERM_G2_READ 0x0010 /* Group 2 read */ ++#define SMCA_PERM_G1_BLOB 0x0008 /* Group 1... */ ++#define SMCA_PERM_G1_WRITE 0x0002 ++#define SMCA_PERM_G1_READ 0x0001 ++ + /* + * caam_assurance - Assurance Controller View + * base + 0x6000 padded out to 0x1000 +@@ -746,7 +887,6 @@ + u32 jr_ctl_hi; /* CxJRR - JobR Control Register @800 */ + u32 jr_ctl_lo; + u64 jr_descaddr; /* CxDADR - JobR Descriptor Address */ +-#define DECO_OP_STATUS_HI_ERR_MASK 0xF00000FF + u32 op_status_hi; /* DxOPSTA - DECO Operation Status */ + u32 op_status_lo; + u32 rsvd24[2]; +@@ -760,21 +900,36 @@ + struct deco_sg_table sctr_tbl[4]; /* DxSTR - Scatter Tables */ + u32 rsvd29[48]; + u32 descbuf[64]; /* DxDESB - Descriptor buffer */ +- u32 rscvd30[193]; +-#define DESC_DBG_DECO_STAT_HOST_ERR 0x00D00000 +-#define DESC_DBG_DECO_STAT_VALID 0x80000000 +-#define DESC_DBG_DECO_STAT_MASK 0x00F00000 +- u32 desc_dbg; /* DxDDR - DECO Debug Register */ +- u32 rsvd31[126]; +-}; +- +-#define DECO_JQCR_WHL 0x20000000 +-#define DECO_JQCR_FOUR 0x10000000 +- +-#define JR_BLOCK_NUMBER 1 +-#define ASSURE_BLOCK_NUMBER 6 +-#define QI_BLOCK_NUMBER 7 +-#define DECO_BLOCK_NUMBER 8 +-#define PG_SIZE_4K 0x1000 +-#define PG_SIZE_64K 0x10000 ++ u32 rsvd30[320]; ++}; ++ ++/* ++ * Current top-level view of memory map is: ++ * ++ * 0x0000 - 0x0fff - CAAM Top-Level Control ++ * 0x1000 - 0x1fff - Job Ring 0 ++ * 0x2000 - 0x2fff - Job Ring 1 ++ * 0x3000 - 0x3fff - Job Ring 2 ++ * 0x4000 - 0x4fff - Job Ring 3 ++ * 0x5000 - 0x5fff - (unused) ++ * 0x6000 - 0x6fff - Assurance Controller ++ * 0x7000 - 0x7fff - Queue Interface ++ * 0x8000 - 0x8fff - DECO-CCB 0 ++ * 0x9000 - 0x9fff - DECO-CCB 1 ++ * 0xa000 - 0xafff - DECO-CCB 2 ++ * 0xb000 - 0xbfff - DECO-CCB 3 ++ * 0xc000 - 0xcfff - DECO-CCB 4 ++ * ++ * caam_full describes the full register view of CAAM if useful, ++ * although many configurations may choose to implement parts of ++ * the register map separately, in differing privilege regions ++ */ ++struct caam_full { ++ struct caam_ctrl __iomem ctrl; ++ struct caam_job_ring jr[4]; ++ u64 rsvd[512]; ++ struct caam_assurance assure; ++ struct caam_queue_if qi; ++}; ++ + #endif /* REGS_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/secvio.c linux-xbian-imx6/drivers/crypto/caam/secvio.c +--- linux-4.1.3/drivers/crypto/caam/secvio.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/secvio.c 2015-07-27 23:13:04.213947410 +0200 +@@ -0,0 +1,290 @@ ++ ++/* ++ * SNVS Security Violation Handler ++ * Copyright (C) 2012-2015 Freescale Semiconductor, Inc., All Rights Reserved ++ */ ++ ++#include "compat.h" ++#include "intern.h" ++#include "secvio.h" ++#include "regs.h" ++ ++/* ++ * These names are associated with each violation handler. ++ * The source names were taken from MX6, and are based on recommendations ++ * for most common SoCs. ++ */ ++static const u8 *violation_src_name[] = { ++ "CAAM Internal Security Violation", ++ "JTAG Alarm", ++ "Watchdog", ++ "(reserved)", ++ "External Boot", ++ "External Tamper Detect", ++}; ++ ++/* These names help describe security monitor state for the console */ ++static const u8 *snvs_ssm_state_name[] = { ++ "init", ++ "hard fail", ++ "(undef:2)", ++ "soft fail", ++ "(undef:4)", ++ "(undef:5)", ++ "(undef:6)", ++ "(undef:7)", ++ "transition", ++ "check", ++ "(undef:10)", ++ "non-secure", ++ "(undef:12)", ++ "trusted", ++ "(undef:14)", ++ "secure", ++}; ++ ++/* Top-level security violation interrupt */ ++static irqreturn_t snvs_secvio_interrupt(int irq, void *snvsdev) ++{ ++ struct device *dev = snvsdev; ++ struct snvs_secvio_drv_private *svpriv = dev_get_drvdata(dev); ++ ++ /* Check the HP secvio status register */ ++ svpriv->irqcause = rd_reg32(&svpriv->svregs->hp.secvio_status) & ++ HP_SECVIOST_SECVIOMASK; ++ ++ if (!svpriv->irqcause) ++ return IRQ_NONE; ++ ++ /* Now ACK cause */ ++ setbits32(&svpriv->svregs->hp.secvio_status, svpriv->irqcause); ++ ++ /* And run deferred service */ ++ preempt_disable(); ++ tasklet_schedule(&svpriv->irqtask[smp_processor_id()]); ++ preempt_enable(); ++ ++ return IRQ_HANDLED; ++} ++ ++/* Deferred service handler. Tasklet arg is simply the SNVS dev */ ++static void snvs_secvio_dispatch(unsigned long indev) ++{ ++ struct device *dev = (struct device *)indev; ++ struct snvs_secvio_drv_private *svpriv = dev_get_drvdata(dev); ++ unsigned long flags; ++ int i; ++ ++ ++ /* Look through stored causes, call each handler if exists */ ++ for (i = 0; i < MAX_SECVIO_SOURCES; i++) ++ if (svpriv->irqcause & (1 << i)) { ++ spin_lock_irqsave(&svpriv->svlock, flags); ++ svpriv->intsrc[i].handler(dev, i, ++ svpriv->intsrc[i].ext); ++ spin_unlock_irqrestore(&svpriv->svlock, flags); ++ }; ++ ++ /* Re-enable now-serviced interrupts */ ++ setbits32(&svpriv->svregs->hp.secvio_intcfg, svpriv->irqcause); ++} ++ ++/* ++ * Default cause handler, used in lieu of an application-defined handler. ++ * All it does at this time is print a console message. It could force a halt. ++ */ ++static void snvs_secvio_default(struct device *dev, u32 cause, void *ext) ++{ ++ struct snvs_secvio_drv_private *svpriv = dev_get_drvdata(dev); ++ ++ dev_err(dev, "Unhandled Security Violation Interrupt %d = %s\n", ++ cause, svpriv->intsrc[cause].intname); ++} ++ ++/* ++ * Install an application-defined handler for a specified cause ++ * Arguments: ++ * - dev points to SNVS-owning device ++ * - cause interrupt source cause ++ * - handler application-defined handler, gets called with dev ++ * source cause, and locally-defined handler argument ++ * - cause_description points to a string to override the default cause ++ * name, this can be used as an alternate for error ++ * messages and such. If left NULL, the default ++ * description string is used. ++ * - ext pointer to any extra data needed by the handler. ++ */ ++int snvs_secvio_install_handler(struct device *dev, enum secvio_cause cause, ++ void (*handler)(struct device *dev, u32 cause, ++ void *ext), ++ u8 *cause_description, void *ext) ++{ ++ unsigned long flags; ++ struct snvs_secvio_drv_private *svpriv; ++ ++ svpriv = dev_get_drvdata(dev); ++ ++ if ((handler == NULL) || (cause > SECVIO_CAUSE_SOURCE_5)) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&svpriv->svlock, flags); ++ svpriv->intsrc[cause].handler = handler; ++ if (cause_description != NULL) ++ svpriv->intsrc[cause].intname = cause_description; ++ if (ext != NULL) ++ svpriv->intsrc[cause].ext = ext; ++ spin_unlock_irqrestore(&svpriv->svlock, flags); ++ ++ return 0; ++} ++EXPORT_SYMBOL(snvs_secvio_install_handler); ++ ++/* ++ * Remove an application-defined handler for a specified cause (and, by ++ * implication, restore the "default". ++ * Arguments: ++ * - dev points to SNVS-owning device ++ * - cause interrupt source cause ++ */ ++int snvs_secvio_remove_handler(struct device *dev, enum secvio_cause cause) ++{ ++ unsigned long flags; ++ struct snvs_secvio_drv_private *svpriv; ++ ++ svpriv = dev_get_drvdata(dev); ++ ++ if (cause > SECVIO_CAUSE_SOURCE_5) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&svpriv->svlock, flags); ++ svpriv->intsrc[cause].intname = violation_src_name[cause]; ++ svpriv->intsrc[cause].handler = snvs_secvio_default; ++ svpriv->intsrc[cause].ext = NULL; ++ spin_unlock_irqrestore(&svpriv->svlock, flags); ++ return 0; ++} ++EXPORT_SYMBOL(snvs_secvio_remove_handler); ++ ++static int snvs_secvio_remove(struct platform_device *pdev) ++{ ++ struct device *svdev; ++ struct snvs_secvio_drv_private *svpriv; ++ int i; ++ ++ svdev = &pdev->dev; ++ svpriv = dev_get_drvdata(svdev); ++ ++ /* Set all sources to nonfatal */ ++ wr_reg32(&svpriv->svregs->hp.secvio_intcfg, 0); ++ ++ /* Remove tasklets and release interrupt */ ++ for_each_possible_cpu(i) ++ tasklet_kill(&svpriv->irqtask[i]); ++ ++ free_irq(svpriv->irq, svdev); ++ iounmap(svpriv->svregs); ++ kfree(svpriv); ++ ++ return 0; ++} ++ ++static int snvs_secvio_probe(struct platform_device *pdev) ++{ ++ struct device *svdev; ++ struct snvs_secvio_drv_private *svpriv; ++ struct device_node *np, *npirq; ++ struct snvs_full __iomem *snvsregs; ++ int i, error; ++ u32 hpstate; ++ ++ svpriv = kzalloc(sizeof(struct snvs_secvio_drv_private), GFP_KERNEL); ++ if (!svpriv) ++ return -ENOMEM; ++ ++ svdev = &pdev->dev; ++ dev_set_drvdata(svdev, svpriv); ++ svpriv->pdev = pdev; ++ np = pdev->dev.of_node; ++ ++ npirq = of_find_compatible_node(NULL, NULL, "fsl,imx6q-caam-secvio"); ++ if (!npirq) { ++ dev_err(svdev, "can't identify secvio interrupt\n"); ++ kfree(svpriv); ++ return -EINVAL; ++ } ++ svpriv->irq = irq_of_parse_and_map(npirq, 0); ++ if (svpriv->irq <= 0) { ++ kfree(svpriv); ++ return -EINVAL; ++ } ++ ++ snvsregs = of_iomap(np, 0); ++ if (!snvsregs) { ++ dev_err(svdev, "register mapping failed\n"); ++ return -ENOMEM; ++ } ++ svpriv->svregs = (struct snvs_full __force *)snvsregs; ++ ++ /* Device data set up. Now init interrupt source descriptions */ ++ for (i = 0; i < MAX_SECVIO_SOURCES; i++) { ++ svpriv->intsrc[i].intname = violation_src_name[i]; ++ svpriv->intsrc[i].handler = snvs_secvio_default; ++ } ++ /* Connect main handler */ ++ for_each_possible_cpu(i) ++ tasklet_init(&svpriv->irqtask[i], snvs_secvio_dispatch, ++ (unsigned long)svdev); ++ ++ error = request_irq(svpriv->irq, snvs_secvio_interrupt, ++ IRQF_SHARED, "snvs-secvio", svdev); ++ if (error) { ++ dev_err(svdev, "can't connect secvio interrupt\n"); ++ irq_dispose_mapping(svpriv->irq); ++ svpriv->irq = 0; ++ iounmap(svpriv->svregs); ++ kfree(svpriv); ++ return -EINVAL; ++ } ++ ++ /* ++ * Configure all sources as fatal violations except LP section, ++ * source #5 (typically used as an external tamper detect), and ++ * source #3 (typically unused). Whenever the transition to ++ * secure mode has occurred, these will now be "fatal" violations ++ */ ++ wr_reg32(&svpriv->svregs->hp.secvio_intcfg, ++ HP_SECVIO_INTEN_SRC4 | HP_SECVIO_INTEN_SRC2 | ++ HP_SECVIO_INTEN_SRC1 | HP_SECVIO_INTEN_SRC0); ++ ++ hpstate = (rd_reg32(&svpriv->svregs->hp.status) & ++ HP_STATUS_SSM_ST_MASK) >> HP_STATUS_SSM_ST_SHIFT; ++ dev_info(svdev, "violation handlers armed - %s state\n", ++ snvs_ssm_state_name[hpstate]); ++ ++ return 0; ++} ++ ++static struct of_device_id snvs_secvio_match[] = { ++ { ++ .compatible = "fsl,imx6q-caam-snvs", ++ }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(of, snvs_secvio_match); ++ ++static struct platform_driver snvs_secvio_driver = { ++ .driver = { ++ .name = "snvs-secvio", ++ .owner = THIS_MODULE, ++ .of_match_table = snvs_secvio_match, ++ }, ++ .probe = snvs_secvio_probe, ++ .remove = snvs_secvio_remove, ++}; ++ ++module_platform_driver(snvs_secvio_driver); ++ ++MODULE_LICENSE("Dual BSD/GPL"); ++MODULE_DESCRIPTION("FSL SNVS Security Violation Handler"); ++MODULE_AUTHOR("Freescale Semiconductor - MCU"); ++ +diff -Nur linux-4.1.3/drivers/crypto/caam/secvio.h linux-xbian-imx6/drivers/crypto/caam/secvio.h +--- linux-4.1.3/drivers/crypto/caam/secvio.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/secvio.h 2015-07-27 23:13:04.213947410 +0200 +@@ -0,0 +1,66 @@ ++ ++/* ++ * CAAM Security Violation Handler ++ * Copyright (C) 2012-2014 Freescale Semiconductor, Inc., All Rights Reserved ++ */ ++ ++#ifndef SECVIO_H ++#define SECVIO_H ++ ++#include "snvsregs.h" ++ ++ ++/* ++ * Defines the published interfaces to install/remove application-specified ++ * handlers for catching violations ++ */ ++ ++#define MAX_SECVIO_SOURCES 6 ++ ++/* these are the untranslated causes */ ++enum secvio_cause { ++ SECVIO_CAUSE_SOURCE_0, ++ SECVIO_CAUSE_SOURCE_1, ++ SECVIO_CAUSE_SOURCE_2, ++ SECVIO_CAUSE_SOURCE_3, ++ SECVIO_CAUSE_SOURCE_4, ++ SECVIO_CAUSE_SOURCE_5 ++}; ++ ++/* These are common "recommended" cause definitions for most devices */ ++#define SECVIO_CAUSE_CAAM_VIOLATION SECVIO_CAUSE_SOURCE_0 ++#define SECVIO_CAUSE_JTAG_ALARM SECVIO_CAUSE_SOURCE_1 ++#define SECVIO_CAUSE_WATCHDOG SECVIO_CAUSE_SOURCE_2 ++#define SECVIO_CAUSE_EXTERNAL_BOOT SECVIO_CAUSE_SOURCE_4 ++#define SECVIO_CAUSE_TAMPER_DETECT SECVIO_CAUSE_SOURCE_5 ++ ++int snvs_secvio_install_handler(struct device *dev, enum secvio_cause cause, ++ void (*handler)(struct device *dev, u32 cause, ++ void *ext), ++ u8 *cause_description, void *ext); ++int snvs_secvio_remove_handler(struct device *dev, enum secvio_cause cause); ++ ++/* ++ * Private data definitions for the secvio "driver" ++ */ ++ ++struct secvio_int_src { ++ const u8 *intname; /* Points to a descriptive name for source */ ++ void *ext; /* Extended data to pass to the handler */ ++ void (*handler)(struct device *dev, u32 cause, void *ext); ++}; ++ ++struct snvs_secvio_drv_private { ++ struct platform_device *pdev; ++ spinlock_t svlock ____cacheline_aligned; ++ struct tasklet_struct irqtask[NR_CPUS]; ++ struct snvs_full __iomem *svregs; /* both HP and LP domains */ ++ int irq; ++ u32 irqcause; /* stashed cause of violation interrupt */ ++ ++ /* Registered handlers for each violation */ ++ struct secvio_int_src intsrc[MAX_SECVIO_SOURCES]; ++ ++}; ++ ++#endif /* SECVIO_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/sg_sw_sec4.h linux-xbian-imx6/drivers/crypto/caam/sg_sw_sec4.h +--- linux-4.1.3/drivers/crypto/caam/sg_sw_sec4.h 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/crypto/caam/sg_sw_sec4.h 2015-07-27 23:13:04.213947410 +0200 +@@ -1,7 +1,7 @@ + /* + * CAAM/SEC 4.x functions for using scatterlists in caam driver + * +- * Copyright 2008-2011 Freescale Semiconductor, Inc. ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. + * + */ + +@@ -91,13 +91,22 @@ + { + if (unlikely(chained)) { + int i; ++ struct scatterlist *tsg = sg; ++ ++ /* We use a local copy of the sg pointer to avoid moving the ++ * head of the list pointed to by sg as we wall the list. ++ */ + for (i = 0; i < nents; i++) { +- dma_map_sg(dev, sg, 1, dir); +- sg = sg_next(sg); ++ dma_map_sg(dev, tsg, 1, dir); ++ tsg = sg_next(tsg); + } + } else { + dma_map_sg(dev, sg, nents, dir); + } ++ ++ if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL)) ++ dma_sync_sg_for_device(dev, sg, nents, dir); ++ + return nents; + } + +@@ -105,6 +114,9 @@ + unsigned int nents, enum dma_data_direction dir, + bool chained) + { ++ if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) ++ dma_sync_sg_for_cpu(dev, sg, nents, dir); ++ + if (unlikely(chained)) { + int i; + for (i = 0; i < nents; i++) { +@@ -116,3 +128,41 @@ + } + return nents; + } ++ ++/* Copy from len bytes of sg to dest, starting from beginning */ ++static inline void sg_copy(u8 *dest, struct scatterlist *sg, unsigned int len) ++{ ++ struct scatterlist *current_sg = sg; ++ int cpy_index = 0, next_cpy_index = current_sg->length; ++ ++ while (next_cpy_index < len) { ++ memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg), ++ current_sg->length); ++ current_sg = sg_next(current_sg); ++ cpy_index = next_cpy_index; ++ next_cpy_index += current_sg->length; ++ } ++ if (cpy_index < len) ++ memcpy(dest + cpy_index, (u8 *) sg_virt(current_sg), ++ len - cpy_index); ++} ++ ++/* Copy sg data, from to_skip to end, to dest */ ++static inline void sg_copy_part(u8 *dest, struct scatterlist *sg, ++ int to_skip, unsigned int end) ++{ ++ struct scatterlist *current_sg = sg; ++ int sg_index, cpy_index; ++ ++ sg_index = current_sg->length; ++ while (sg_index <= to_skip) { ++ current_sg = sg_next(current_sg); ++ sg_index += current_sg->length; ++ } ++ cpy_index = sg_index - to_skip; ++ memcpy(dest, (u8 *) sg_virt(current_sg) + ++ current_sg->length - cpy_index, cpy_index); ++ current_sg = sg_next(current_sg); ++ if (end - sg_index) ++ sg_copy(dest + cpy_index, current_sg, end - sg_index); ++} +diff -Nur linux-4.1.3/drivers/crypto/caam/sm.h linux-xbian-imx6/drivers/crypto/caam/sm.h +--- linux-4.1.3/drivers/crypto/caam/sm.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/sm.h 2015-07-27 23:13:04.213947410 +0200 +@@ -0,0 +1,88 @@ ++ ++/* ++ * CAAM Secure Memory/Keywrap API Definitions ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. ++ */ ++ ++#ifndef SM_H ++#define SM_H ++ ++ ++/* Storage access permissions */ ++#define SM_PERM_READ 0x01 ++#define SM_PERM_WRITE 0x02 ++#define SM_PERM_BLOB 0x03 ++ ++ ++/* Keystore maintenance functions */ ++void sm_init_keystore(struct device *dev); ++u32 sm_detect_keystore_units(struct device *dev); ++int sm_establish_keystore(struct device *dev, u32 unit); ++void sm_release_keystore(struct device *dev, u32 unit); ++void caam_sm_shutdown(struct platform_device *pdev); ++int caam_sm_example_init(struct platform_device *pdev); ++ ++/* Keystore accessor functions */ ++extern int sm_keystore_slot_alloc(struct device *dev, u32 unit, u32 size, ++ u32 *slot); ++extern int sm_keystore_slot_dealloc(struct device *dev, u32 unit, u32 slot); ++extern int sm_keystore_slot_load(struct device *dev, u32 unit, u32 slot, ++ const u8 *key_data, u32 key_length); ++extern int sm_keystore_slot_read(struct device *dev, u32 unit, u32 slot, ++ u32 key_length, u8 *key_data); ++extern int sm_keystore_slot_encapsulate(struct device *dev, u32 unit, ++ u32 inslot, u32 outslot, u16 secretlen, ++ u8 *keymod, u16 keymodlen); ++extern int sm_keystore_slot_decapsulate(struct device *dev, u32 unit, ++ u32 inslot, u32 outslot, u16 secretlen, ++ u8 *keymod, u16 keymodlen); ++ ++/* Data structure to hold per-slot information */ ++struct keystore_data_slot_info { ++ u8 allocated; /* Track slot assignments */ ++ u32 key_length; /* Size of the key */ ++}; ++ ++/* Data structure to hold keystore information */ ++struct keystore_data { ++ void *base_address; /* Base of the Secure Partition */ ++ u32 slot_count; /* Number of slots in the keystore */ ++ struct keystore_data_slot_info *slot; /* Per-slot information */ ++}; ++ ++/* store the detected attributes of a secure memory page */ ++struct sm_page_descriptor { ++ u16 phys_pagenum; /* may be discontiguous */ ++ u16 own_part; /* Owning partition */ ++ void *pg_base; /* Calculated virtual address */ ++ struct keystore_data *ksdata; ++}; ++ ++struct caam_drv_private_sm { ++ struct device *parentdev; /* this ends up as the controller */ ++ struct device *smringdev; /* ring that owns this instance */ ++ spinlock_t kslock ____cacheline_aligned; ++ ++ /* Default parameters for geometry */ ++ u32 max_pages; /* maximum pages this instance can support */ ++ u32 top_partition; /* highest partition number in this instance */ ++ u32 top_page; /* highest page number in this instance */ ++ u32 page_size; /* page size */ ++ u32 slot_size; /* selected size of each storage block */ ++ ++ /* Partition/Page Allocation Map */ ++ u32 localpages; /* Number of pages we can access */ ++ struct sm_page_descriptor *pagedesc; /* Allocated per-page */ ++ ++ /* Installed handlers for keystore access */ ++ int (*data_init)(struct device *dev, u32 unit); ++ void (*data_cleanup)(struct device *dev, u32 unit); ++ int (*slot_alloc)(struct device *dev, u32 unit, u32 size, u32 *slot); ++ int (*slot_dealloc)(struct device *dev, u32 unit, u32 slot); ++ void *(*slot_get_address)(struct device *dev, u32 unit, u32 handle); ++ u32 (*slot_get_base)(struct device *dev, u32 unit, u32 handle); ++ u32 (*slot_get_offset)(struct device *dev, u32 unit, u32 handle); ++ u32 (*slot_get_slot_size)(struct device *dev, u32 unit, u32 handle); ++}; ++ ++#endif /* SM_H */ +diff -Nur linux-4.1.3/drivers/crypto/caam/sm_store.c linux-xbian-imx6/drivers/crypto/caam/sm_store.c +--- linux-4.1.3/drivers/crypto/caam/sm_store.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/sm_store.c 2015-07-27 23:13:04.213947410 +0200 +@@ -0,0 +1,896 @@ ++ ++/* ++ * CAAM Secure Memory Storage Interface ++ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. ++ * ++ * Loosely based on the SHW Keystore API for SCC/SCC2 ++ * Experimental implementation and NOT intended for upstream use. Expect ++ * this interface to be amended significantly in the future once it becomes ++ * integrated into live applications. ++ * ++ * Known issues: ++ * ++ * - Executes one instance of an secure memory "driver". This is tied to the ++ * fact that job rings can't run as standalone instances in the present ++ * configuration. ++ * ++ * - It does not expose a userspace interface. The value of a userspace ++ * interface for access to secrets is a point for further architectural ++ * discussion. ++ * ++ * - Partition/permission management is not part of this interface. It ++ * depends on some level of "knowledge" agreed upon between bootloader, ++ * provisioning applications, and OS-hosted software (which uses this ++ * driver). ++ * ++ * - No means of identifying the location or purpose of secrets managed by ++ * this interface exists; "slot location" and format of a given secret ++ * needs to be agreed upon between bootloader, provisioner, and OS-hosted ++ * application. ++ */ ++ ++#include "compat.h" ++#include "regs.h" ++#include "jr.h" ++#include "desc.h" ++#include "intern.h" ++#include "error.h" ++#include "sm.h" ++ ++#ifdef SM_DEBUG_CONT ++void sm_show_page(struct device *dev, struct sm_page_descriptor *pgdesc) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ u32 i, *smdata; ++ ++ dev_info(dev, "physical page %d content at 0x%08x\n", ++ pgdesc->phys_pagenum, pgdesc->pg_base); ++ smdata = pgdesc->pg_base; ++ for (i = 0; i < (smpriv->page_size / sizeof(u32)); i += 4) ++ dev_info(dev, "[0x%08x] 0x%08x 0x%08x 0x%08x 0x%08x\n", ++ (u32)&smdata[i], smdata[i], smdata[i+1], smdata[i+2], ++ smdata[i+3]); ++} ++#endif ++ ++/* ++ * Construct a secure memory blob encapsulation job descriptor ++ * ++ * - desc pointer to hold new (to be allocated) pointer to the generated ++ * descriptor for later use. Calling thread can kfree the ++ * descriptor after execution. ++ * - keymod Physical pointer to key modifier (contiguous piece). ++ * - keymodsz Size of key modifier in bytes (should normally be 8). ++ * - secretbuf Physical pointer (within an accessible secure memory page) ++ * of the secret to be encapsulated. ++ * - outbuf Physical pointer (within an accessible secure memory page) ++ * of the encapsulated output. This will be larger than the ++ * input secret because of the added encapsulation data. ++ * - secretsz Size of input secret, in bytes. ++ * - auth If nonzero, use AES-CCM for encapsulation, else use ECB ++ * ++ * Note: this uses 32-bit pointers at present ++ */ ++#define INITIAL_DESCSZ 16 /* size of tmp buffer for descriptor const. */ ++static int blob_encap_desc(u32 **desc, dma_addr_t keymod, u16 keymodsz, ++ dma_addr_t secretbuf, dma_addr_t outbuf, ++ u16 secretsz, bool auth) ++{ ++ u32 *tdesc, tmpdesc[INITIAL_DESCSZ]; ++ u16 dsize, idx; ++ ++ memset(tmpdesc, 0, INITIAL_DESCSZ * sizeof(u32)); ++ idx = 1; ++ ++ /* Load key modifier */ ++ tmpdesc[idx++] = CMD_LOAD | LDST_CLASS_2_CCB | LDST_SRCDST_BYTE_KEY | ++ ((12 << LDST_OFFSET_SHIFT) & LDST_OFFSET_MASK) | ++ (keymodsz & LDST_LEN_MASK); ++ ++ tmpdesc[idx++] = (u32)keymod; ++ ++ /* Encapsulate to secure memory */ ++ tmpdesc[idx++] = CMD_SEQ_IN_PTR | secretsz; ++ tmpdesc[idx++] = (u32)secretbuf; ++ ++ /* Add space for BKEK and MAC tag */ ++ tmpdesc[idx++] = CMD_SEQ_IN_PTR | (secretsz + (32 + 16)); ++ ++ tmpdesc[idx++] = (u32)outbuf; ++ tmpdesc[idx] = CMD_OPERATION | OP_TYPE_ENCAP_PROTOCOL | OP_PCLID_BLOB | ++ OP_PCL_BLOB_PTXT_SECMEM; ++ if (auth) ++ tmpdesc[idx] |= OP_PCL_BLOB_EKT; ++ ++ idx++; ++ tmpdesc[0] = CMD_DESC_HDR | HDR_ONE | (idx & HDR_DESCLEN_MASK); ++ dsize = idx * sizeof(u32); ++ ++ tdesc = kmalloc(dsize, GFP_KERNEL | GFP_DMA); ++ if (tdesc == NULL) ++ return 0; ++ ++ memcpy(tdesc, tmpdesc, dsize); ++ *desc = tdesc; ++ return dsize; ++} ++ ++/* ++ * Construct a secure memory blob decapsulation job descriptor ++ * ++ * - desc pointer to hold new (to be allocated) pointer to the generated ++ * descriptor for later use. Calling thread can kfree the ++ * descriptor after execution. ++ * - keymod Physical pointer to key modifier (contiguous piece). ++ * - keymodsz Size of key modifier in bytes (should normally be 16). ++ * - blobbuf Physical pointer (within an accessible secure memory page) ++ * of the blob to be decapsulated. ++ * - outbuf Physical pointer (within an accessible secure memory page) ++ * of the decapsulated output. ++ * - secretsz Size of input blob, in bytes. ++ * - auth If nonzero, assume AES-CCM for decapsulation, else use ECB ++ * ++ * Note: this uses 32-bit pointers at present ++ */ ++static int blob_decap_desc(u32 **desc, dma_addr_t keymod, u16 keymodsz, ++ dma_addr_t blobbuf, dma_addr_t outbuf, ++ u16 blobsz, bool auth) ++{ ++ u32 *tdesc, tmpdesc[INITIAL_DESCSZ]; ++ u16 dsize, idx; ++ ++ memset(tmpdesc, 0, INITIAL_DESCSZ * sizeof(u32)); ++ idx = 1; ++ ++ /* Load key modifier */ ++ tmpdesc[idx++] = CMD_LOAD | LDST_CLASS_2_CCB | LDST_SRCDST_BYTE_KEY | ++ ((12 << LDST_OFFSET_SHIFT) & LDST_OFFSET_MASK) | ++ (keymodsz & LDST_LEN_MASK); ++ ++ tmpdesc[idx++] = (u32)keymod; ++ ++ /* Compensate BKEK + MAC tag */ ++ tmpdesc[idx++] = CMD_SEQ_IN_PTR | (blobsz + 32 + 16); ++ ++ tmpdesc[idx++] = (u32)blobbuf; ++ tmpdesc[idx++] = CMD_SEQ_OUT_PTR | blobsz; ++ tmpdesc[idx++] = (u32)outbuf; ++ ++ /* Decapsulate from secure memory partition to black blob */ ++ tmpdesc[idx] = CMD_OPERATION | OP_TYPE_DECAP_PROTOCOL | OP_PCLID_BLOB | ++ OP_PCL_BLOB_PTXT_SECMEM | OP_PCL_BLOB_BLACK; ++ if (auth) ++ tmpdesc[idx] |= OP_PCL_BLOB_EKT; ++ ++ idx++; ++ tmpdesc[0] = CMD_DESC_HDR | HDR_ONE | (idx & HDR_DESCLEN_MASK); ++ dsize = idx * sizeof(u32); ++ ++ tdesc = kmalloc(dsize, GFP_KERNEL | GFP_DMA); ++ if (tdesc == NULL) ++ return 0; ++ ++ memcpy(tdesc, tmpdesc, dsize); ++ *desc = tdesc; ++ return dsize; ++} ++ ++/* ++ * Pseudo-synchronous ring access functions for carrying out key ++ * encapsulation and decapsulation ++ */ ++ ++struct sm_key_job_result { ++ int error; ++ struct completion completion; ++}; ++ ++void sm_key_job_done(struct device *dev, u32 *desc, u32 err, void *context) ++{ ++ struct sm_key_job_result *res = context; ++ ++ res->error = err; /* save off the error for postprocessing */ ++ complete(&res->completion); /* mark us complete */ ++} ++ ++static int sm_key_job(struct device *ksdev, u32 *jobdesc) ++{ ++ struct sm_key_job_result testres; ++ struct caam_drv_private_sm *kspriv; ++ int rtn = 0; ++ ++ kspriv = dev_get_drvdata(ksdev); ++ ++ init_completion(&testres.completion); ++ ++ rtn = caam_jr_enqueue(kspriv->smringdev, jobdesc, sm_key_job_done, ++ &testres); ++ if (!rtn) { ++ wait_for_completion_interruptible(&testres.completion); ++ rtn = testres.error; ++ } ++ return rtn; ++} ++ ++/* ++ * Following section establishes the default methods for keystore access ++ * They are NOT intended for use external to this module ++ * ++ * In the present version, these are the only means for the higher-level ++ * interface to deal with the mechanics of accessing the phyiscal keystore ++ */ ++ ++ ++int slot_alloc(struct device *dev, u32 unit, u32 size, u32 *slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ struct keystore_data *ksdata = smpriv->pagedesc[unit].ksdata; ++ u32 i; ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_alloc(): requesting slot for %d bytes\n", size); ++#endif ++ ++ if (size > smpriv->slot_size) ++ return -EKEYREJECTED; ++ ++ for (i = 0; i < ksdata->slot_count; i++) { ++ if (ksdata->slot[i].allocated == 0) { ++ ksdata->slot[i].allocated = 1; ++ (*slot) = i; ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_alloc(): new slot %d allocated\n", ++ *slot); ++#endif ++ return 0; ++ } ++ } ++ ++ return -ENOSPC; ++} ++EXPORT_SYMBOL(slot_alloc); ++ ++int slot_dealloc(struct device *dev, u32 unit, u32 slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ struct keystore_data *ksdata = smpriv->pagedesc[unit].ksdata; ++ u8 __iomem *slotdata; ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_dealloc(): releasing slot %d\n", slot); ++#endif ++ if (slot >= ksdata->slot_count) ++ return -EINVAL; ++ slotdata = ksdata->base_address + slot * smpriv->slot_size; ++ ++ if (ksdata->slot[slot].allocated == 1) { ++ /* Forcibly overwrite the data from the keystore */ ++ memset(ksdata->base_address + slot * smpriv->slot_size, 0, ++ smpriv->slot_size); ++ ++ ksdata->slot[slot].allocated = 0; ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_dealloc(): slot %d released\n", slot); ++#endif ++ return 0; ++ } ++ ++ return -EINVAL; ++} ++EXPORT_SYMBOL(slot_dealloc); ++ ++void *slot_get_address(struct device *dev, u32 unit, u32 slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ struct keystore_data *ksdata = smpriv->pagedesc[unit].ksdata; ++ ++ if (slot >= ksdata->slot_count) ++ return NULL; ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_get_address(): slot %d is 0x%08x\n", slot, ++ (u32)ksdata->base_address + slot * smpriv->slot_size); ++#endif ++ ++ return ksdata->base_address + slot * smpriv->slot_size; ++} ++ ++u32 slot_get_base(struct device *dev, u32 unit, u32 slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ struct keystore_data *ksdata = smpriv->pagedesc[unit].ksdata; ++ ++ /* ++ * There could potentially be more than one secure partition object ++ * associated with this keystore. For now, there is just one. ++ */ ++ ++ (void)slot; ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_get_base(): slot %d = 0x%08x\n", ++ slot, (u32)ksdata->base_address); ++#endif ++ ++ return (u32)(ksdata->base_address); ++} ++ ++u32 slot_get_offset(struct device *dev, u32 unit, u32 slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ struct keystore_data *ksdata = smpriv->pagedesc[unit].ksdata; ++ ++ if (slot >= ksdata->slot_count) ++ return -EINVAL; ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_get_offset(): slot %d = %d\n", slot, ++ slot * smpriv->slot_size); ++#endif ++ ++ return slot * smpriv->slot_size; ++} ++ ++u32 slot_get_slot_size(struct device *dev, u32 unit, u32 slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "slot_get_slot_size(): slot %d = %d\n", slot, ++ smpriv->slot_size); ++#endif ++ /* All slots are the same size in the default implementation */ ++ return smpriv->slot_size; ++} ++ ++ ++ ++int kso_init_data(struct device *dev, u32 unit) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = -EINVAL; ++ struct keystore_data *keystore_data = NULL; ++ u32 slot_count; ++ u32 keystore_data_size; ++ ++ /* ++ * Calculate the required size of the keystore data structure, based ++ * on the number of keys that can fit in the partition. ++ */ ++ slot_count = smpriv->page_size / smpriv->slot_size; ++#ifdef SM_DEBUG ++ dev_info(dev, "kso_init_data: %d slots initializing\n", slot_count); ++#endif ++ ++ keystore_data_size = sizeof(struct keystore_data) + ++ slot_count * ++ sizeof(struct keystore_data_slot_info); ++ ++ keystore_data = kzalloc(keystore_data_size, GFP_KERNEL); ++ ++ if (keystore_data == NULL) { ++ retval = -ENOSPC; ++ goto out; ++ } ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "kso_init_data: keystore data size = %d\n", ++ keystore_data_size); ++#endif ++ ++ /* ++ * Place the slot information structure directly after the keystore data ++ * structure. ++ */ ++ keystore_data->slot = (struct keystore_data_slot_info *) ++ (keystore_data + 1); ++ keystore_data->slot_count = slot_count; ++ ++ smpriv->pagedesc[unit].ksdata = keystore_data; ++ smpriv->pagedesc[unit].ksdata->base_address = ++ smpriv->pagedesc[unit].pg_base; ++ ++ retval = 0; ++ ++out: ++ if (retval != 0) ++ if (keystore_data != NULL) ++ kfree(keystore_data); ++ ++ ++ return retval; ++} ++ ++void kso_cleanup_data(struct device *dev, u32 unit) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ struct keystore_data *keystore_data = NULL; ++ ++ if (smpriv->pagedesc[unit].ksdata != NULL) ++ keystore_data = smpriv->pagedesc[unit].ksdata; ++ ++ /* Release the allocated keystore management data */ ++ kfree(smpriv->pagedesc[unit].ksdata); ++ ++ return; ++} ++ ++ ++ ++/* ++ * Keystore management section ++ */ ++ ++void sm_init_keystore(struct device *dev) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ ++ smpriv->data_init = kso_init_data; ++ smpriv->data_cleanup = kso_cleanup_data; ++ smpriv->slot_alloc = slot_alloc; ++ smpriv->slot_dealloc = slot_dealloc; ++ smpriv->slot_get_address = slot_get_address; ++ smpriv->slot_get_base = slot_get_base; ++ smpriv->slot_get_offset = slot_get_offset; ++ smpriv->slot_get_slot_size = slot_get_slot_size; ++#ifdef SM_DEBUG ++ dev_info(dev, "sm_init_keystore(): handlers installed\n"); ++#endif ++} ++EXPORT_SYMBOL(sm_init_keystore); ++ ++/* Return available pages/units */ ++u32 sm_detect_keystore_units(struct device *dev) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ ++ return smpriv->localpages; ++} ++EXPORT_SYMBOL(sm_detect_keystore_units); ++ ++/* ++ * Do any keystore specific initializations ++ */ ++int sm_establish_keystore(struct device *dev, u32 unit) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "sm_establish_keystore(): unit %d initializing\n", unit); ++#endif ++ ++ if (smpriv->data_init == NULL) ++ return -EINVAL; ++ ++ /* Call the data_init function for any user setup */ ++ return smpriv->data_init(dev, unit); ++} ++EXPORT_SYMBOL(sm_establish_keystore); ++ ++void sm_release_keystore(struct device *dev, u32 unit) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ ++#ifdef SM_DEBUG ++ dev_info(dev, "sm_establish_keystore(): unit %d releasing\n", unit); ++#endif ++ if ((smpriv != NULL) && (smpriv->data_cleanup != NULL)) ++ smpriv->data_cleanup(dev, unit); ++ ++ return; ++} ++EXPORT_SYMBOL(sm_release_keystore); ++ ++/* ++ * Subsequent interfacce (sm_keystore_*) forms the accessor interfacce to ++ * the keystore ++ */ ++int sm_keystore_slot_alloc(struct device *dev, u32 unit, u32 size, u32 *slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = -EINVAL; ++ ++ spin_lock(&smpriv->kslock); ++ ++ if ((smpriv->slot_alloc == NULL) || ++ (smpriv->pagedesc[unit].ksdata == NULL)) ++ goto out; ++ ++ retval = smpriv->slot_alloc(dev, unit, size, slot); ++ ++out: ++ spin_unlock(&smpriv->kslock); ++ return retval; ++} ++EXPORT_SYMBOL(sm_keystore_slot_alloc); ++ ++int sm_keystore_slot_dealloc(struct device *dev, u32 unit, u32 slot) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = -EINVAL; ++ ++ spin_lock(&smpriv->kslock); ++ ++ if ((smpriv->slot_alloc == NULL) || ++ (smpriv->pagedesc[unit].ksdata == NULL)) ++ goto out; ++ ++ retval = smpriv->slot_dealloc(dev, unit, slot); ++out: ++ spin_unlock(&smpriv->kslock); ++ return retval; ++} ++EXPORT_SYMBOL(sm_keystore_slot_dealloc); ++ ++int sm_keystore_slot_load(struct device *dev, u32 unit, u32 slot, ++ const u8 *key_data, u32 key_length) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = -EINVAL; ++ u32 slot_size; ++ u32 i; ++ u8 __iomem *slot_location; ++ ++ spin_lock(&smpriv->kslock); ++ ++ slot_size = smpriv->slot_get_slot_size(dev, unit, slot); ++ ++ if (key_length > slot_size) { ++ retval = -EFBIG; ++ goto out; ++ } ++ ++ slot_location = smpriv->slot_get_address(dev, unit, slot); ++ ++ for (i = 0; i < key_length; i++) ++ slot_location[i] = key_data[i]; ++ ++ retval = 0; ++ ++out: ++ spin_unlock(&smpriv->kslock); ++ return retval; ++} ++EXPORT_SYMBOL(sm_keystore_slot_load); ++ ++int sm_keystore_slot_read(struct device *dev, u32 unit, u32 slot, ++ u32 key_length, u8 *key_data) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = -EINVAL; ++ u8 __iomem *slot_addr; ++ u32 slot_size; ++ ++ spin_lock(&smpriv->kslock); ++ ++ slot_addr = smpriv->slot_get_address(dev, unit, slot); ++ slot_size = smpriv->slot_get_slot_size(dev, unit, slot); ++ ++ if (key_length > slot_size) { ++ retval = -EKEYREJECTED; ++ goto out; ++ } ++ ++ memcpy(key_data, slot_addr, key_length); ++ retval = 0; ++ ++out: ++ spin_unlock(&smpriv->kslock); ++ return retval; ++} ++EXPORT_SYMBOL(sm_keystore_slot_read); ++ ++int sm_keystore_slot_encapsulate(struct device *dev, u32 unit, u32 inslot, ++ u32 outslot, u16 secretlen, u8 *keymod, ++ u16 keymodlen) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = 0; ++ u32 slot_length, dsize, jstat; ++ u32 __iomem *encapdesc = NULL; ++ u8 __iomem *lkeymod, *inpslotaddr, *outslotaddr; ++ dma_addr_t keymod_dma; ++ ++ /* Ensure that the full blob will fit in the key slot */ ++ slot_length = smpriv->slot_get_slot_size(dev, unit, outslot); ++ if ((secretlen + 48) > slot_length) ++ goto out; ++ ++ /* Get the base addresses of both keystore slots */ ++ inpslotaddr = (u8 *)smpriv->slot_get_address(dev, unit, inslot); ++ outslotaddr = (u8 *)smpriv->slot_get_address(dev, unit, outslot); ++ ++ /* Build the key modifier */ ++ lkeymod = kmalloc(keymodlen, GFP_KERNEL | GFP_DMA); ++ memcpy(lkeymod, keymod, keymodlen); ++ keymod_dma = dma_map_single(dev, lkeymod, keymodlen, DMA_TO_DEVICE); ++ dma_sync_single_for_device(dev, keymod_dma, keymodlen, DMA_TO_DEVICE); ++ ++ /* Build the encapsulation job descriptor */ ++ dsize = blob_encap_desc(&encapdesc, keymod_dma, keymodlen, ++ __pa(inpslotaddr), __pa(outslotaddr), ++ secretlen, 0); ++ if (!dsize) { ++ dev_err(dev, "can't alloc an encap descriptor\n"); ++ retval = -ENOMEM; ++ goto out; ++ } ++ jstat = sm_key_job(dev, encapdesc); ++ ++ dma_unmap_single(dev, keymod_dma, keymodlen, DMA_TO_DEVICE); ++ kfree(encapdesc); ++ ++out: ++ return retval; ++ ++} ++EXPORT_SYMBOL(sm_keystore_slot_encapsulate); ++ ++int sm_keystore_slot_decapsulate(struct device *dev, u32 unit, u32 inslot, ++ u32 outslot, u16 secretlen, u8 *keymod, ++ u16 keymodlen) ++{ ++ struct caam_drv_private_sm *smpriv = dev_get_drvdata(dev); ++ int retval = 0; ++ u32 slot_length, dsize, jstat; ++ u32 __iomem *decapdesc = NULL; ++ u8 __iomem *lkeymod, *inpslotaddr, *outslotaddr; ++ dma_addr_t keymod_dma; ++ ++ /* Ensure that the decap data will fit in the key slot */ ++ slot_length = smpriv->slot_get_slot_size(dev, unit, outslot); ++ if (secretlen > slot_length) ++ goto out; ++ ++ /* Get the base addresses of both keystore slots */ ++ inpslotaddr = (u8 *)smpriv->slot_get_address(dev, unit, inslot); ++ outslotaddr = (u8 *)smpriv->slot_get_address(dev, unit, outslot); ++ ++ /* Build the key modifier */ ++ lkeymod = kmalloc(keymodlen, GFP_KERNEL | GFP_DMA); ++ memcpy(lkeymod, keymod, keymodlen); ++ keymod_dma = dma_map_single(dev, lkeymod, keymodlen, DMA_TO_DEVICE); ++ dma_sync_single_for_device(dev, keymod_dma, keymodlen, DMA_TO_DEVICE); ++ ++ /* Build the decapsulation job descriptor */ ++ dsize = blob_decap_desc(&decapdesc, keymod_dma, keymodlen, ++ __pa(inpslotaddr), __pa(outslotaddr), ++ secretlen, 0); ++ if (!dsize) { ++ dev_err(dev, "can't alloc a decap descriptor\n"); ++ retval = -ENOMEM; ++ goto out; ++ } ++ jstat = sm_key_job(dev, decapdesc); ++ ++ dma_unmap_single(dev, keymod_dma, keymodlen, DMA_TO_DEVICE); ++ kfree(decapdesc); ++ ++out: ++ return retval; ++ ++} ++EXPORT_SYMBOL(sm_keystore_slot_decapsulate); ++ ++ ++/* ++ * Initialization/shutdown subsystem ++ * Assumes statically-invoked startup/shutdown from the controller driver ++ * for the present time, to be reworked when a device tree becomes ++ * available. This code will not modularize in present form. ++ * ++ * Also, simply uses ring 0 for execution at the present ++ */ ++ ++int caam_sm_startup(struct platform_device *pdev) ++{ ++ struct device *ctrldev, *smdev; ++ struct caam_drv_private *ctrlpriv; ++ struct caam_drv_private_sm *smpriv; ++ struct caam_drv_private_jr *jrpriv; /* need this for reg page */ ++ struct platform_device *sm_pdev; ++ struct sm_page_descriptor *lpagedesc; ++ u32 page, pgstat, lpagect, detectedpage; ++ ++ struct device_node *np; ++ ctrldev = &pdev->dev; ++ ctrlpriv = dev_get_drvdata(ctrldev); ++ ++ /* ++ * Set up the private block for secure memory ++ * Only one instance is possible ++ */ ++ smpriv = kzalloc(sizeof(struct caam_drv_private_sm), GFP_KERNEL); ++ if (smpriv == NULL) { ++ dev_err(ctrldev, "can't alloc private mem for secure memory\n"); ++ return -ENOMEM; ++ } ++ smpriv->parentdev = ctrldev; /* copy of parent dev is handy */ ++ ++ /* Create the dev */ ++#ifdef CONFIG_OF ++ np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-caam-sm"); ++ sm_pdev = of_platform_device_create(np, "caam_sm", ctrldev); ++#else ++ sm_pdev = platform_device_register_data(ctrldev, "caam_sm", 0, ++ smpriv, ++ sizeof(struct caam_drv_private_sm)); ++#endif ++ if (sm_pdev == NULL) { ++ kfree(smpriv); ++ return -EINVAL; ++ } ++ smdev = &sm_pdev->dev; ++ dev_set_drvdata(smdev, smpriv); ++ ctrlpriv->smdev = smdev; ++ ++ /* ++ * Collect configuration limit data for reference ++ * This batch comes from the partition data/vid registers in perfmon ++ */ ++ smpriv->max_pages = ((rd_reg32(&ctrlpriv->ctrl->perfmon.smpart) ++ & SMPART_MAX_NUMPG_MASK) >> ++ SMPART_MAX_NUMPG_SHIFT) + 1; ++ smpriv->top_partition = ((rd_reg32(&ctrlpriv->ctrl->perfmon.smpart) ++ & SMPART_MAX_PNUM_MASK) >> ++ SMPART_MAX_PNUM_SHIFT) + 1; ++ smpriv->top_page = ((rd_reg32(&ctrlpriv->ctrl->perfmon.smpart) ++ & SMPART_MAX_PG_MASK) >> SMPART_MAX_PG_SHIFT) + 1; ++ smpriv->page_size = 1024 << ((rd_reg32(&ctrlpriv->ctrl->perfmon.smvid) ++ & SMVID_PG_SIZE_MASK) >> SMVID_PG_SIZE_SHIFT); ++ smpriv->slot_size = 1 << CONFIG_CRYPTO_DEV_FSL_CAAM_SM_SLOTSIZE; ++ ++#ifdef SM_DEBUG ++ dev_info(smdev, "max pages = %d, top partition = %d\n", ++ smpriv->max_pages, smpriv->top_partition); ++ dev_info(smdev, "top page = %d, page size = %d (total = %d)\n", ++ smpriv->top_page, smpriv->page_size, ++ smpriv->top_page * smpriv->page_size); ++ dev_info(smdev, "selected slot size = %d\n", smpriv->slot_size); ++#endif ++ ++ /* ++ * Now probe for partitions/pages to which we have access. Note that ++ * these have likely been set up by a bootloader or platform ++ * provisioning application, so we have to assume that we "inherit" ++ * a configuration and work within the constraints of what it might be. ++ * ++ * Assume use of the zeroth ring in the present iteration (until ++ * we can divorce the controller and ring drivers, and then assign ++ * an SM instance to any ring instance). ++ */ ++ smpriv->smringdev = ctrlpriv->jrdev[0]; ++ jrpriv = dev_get_drvdata(smpriv->smringdev); ++ lpagect = 0; ++ lpagedesc = kzalloc(sizeof(struct sm_page_descriptor) ++ * smpriv->max_pages, GFP_KERNEL); ++ if (lpagedesc == NULL) { ++ kfree(smpriv); ++ return -ENOMEM; ++ } ++ ++ for (page = 0; page < smpriv->max_pages; page++) { ++ wr_reg32(&jrpriv->rregs->sm_cmd, ++ ((page << SMC_PAGE_SHIFT) & SMC_PAGE_MASK) | ++ (SMC_CMD_PAGE_INQUIRY & SMC_CMD_MASK)); ++ pgstat = rd_reg32(&jrpriv->rregs->sm_status); ++ if (((pgstat & SMCS_PGWON_MASK) >> SMCS_PGOWN_SHIFT) ++ == SMCS_PGOWN_OWNED) { /* our page? */ ++ lpagedesc[page].phys_pagenum = ++ (pgstat & SMCS_PAGE_MASK) >> SMCS_PAGE_SHIFT; ++ lpagedesc[page].own_part = ++ (pgstat & SMCS_PART_SHIFT) >> SMCS_PART_MASK; ++ lpagedesc[page].pg_base = ctrlpriv->sm_base + ++ ((smpriv->page_size * page) / sizeof(u32)); ++ lpagect++; ++#ifdef SM_DEBUG ++ dev_info(smdev, ++ "physical page %d, owning partition = %d\n", ++ lpagedesc[page].phys_pagenum, ++ lpagedesc[page].own_part); ++#endif ++ } ++ } ++ ++ smpriv->pagedesc = kzalloc(sizeof(struct sm_page_descriptor) * lpagect, ++ GFP_KERNEL); ++ if (smpriv->pagedesc == NULL) { ++ kfree(lpagedesc); ++ kfree(smpriv); ++ return -ENOMEM; ++ } ++ smpriv->localpages = lpagect; ++ ++ detectedpage = 0; ++ for (page = 0; page < smpriv->max_pages; page++) { ++ if (lpagedesc[page].pg_base != NULL) { /* e.g. live entry */ ++ memcpy(&smpriv->pagedesc[detectedpage], ++ &lpagedesc[page], ++ sizeof(struct sm_page_descriptor)); ++#ifdef SM_DEBUG_CONT ++ sm_show_page(smdev, &smpriv->pagedesc[detectedpage]); ++#endif ++ detectedpage++; ++ } ++ } ++ ++ kfree(lpagedesc); ++ ++ sm_init_keystore(smdev); ++ ++ return 0; ++} ++ ++void caam_sm_shutdown(struct platform_device *pdev) ++{ ++ struct device *ctrldev, *smdev; ++ struct caam_drv_private *priv; ++ struct caam_drv_private_sm *smpriv; ++ ++ ctrldev = &pdev->dev; ++ priv = dev_get_drvdata(ctrldev); ++ smdev = priv->smdev; ++ smpriv = dev_get_drvdata(smdev); ++ ++ kfree(smpriv->pagedesc); ++ kfree(smpriv); ++} ++EXPORT_SYMBOL(caam_sm_shutdown); ++#ifdef CONFIG_OF ++static void __exit caam_sm_exit(void) ++{ ++ struct device_node *dev_node; ++ struct platform_device *pdev; ++ ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); ++ if (!dev_node) { ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); ++ if (!dev_node) ++ return; ++ } ++ ++ pdev = of_find_device_by_node(dev_node); ++ if (!pdev) ++ return; ++ ++ of_node_put(dev_node); ++ ++ caam_sm_shutdown(pdev); ++ ++ return; ++} ++ ++static int __init caam_sm_init(void) ++{ ++ struct device_node *dev_node; ++ struct platform_device *pdev; ++ ++ /* ++ * Do of_find_compatible_node() then of_find_device_by_node() ++ * once a functional device tree is available ++ */ ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); ++ if (!dev_node) { ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); ++ if (!dev_node) ++ return -ENODEV; ++ } ++ ++ pdev = of_find_device_by_node(dev_node); ++ if (!pdev) ++ return -ENODEV; ++ ++ of_node_get(dev_node); ++ ++ caam_sm_startup(pdev); ++ ++ return 0; ++} ++ ++module_init(caam_sm_init); ++module_exit(caam_sm_exit); ++ ++MODULE_LICENSE("Dual BSD/GPL"); ++MODULE_DESCRIPTION("FSL CAAM Secure Memory / Keystore"); ++MODULE_AUTHOR("Freescale Semiconductor - NMSG/MAD"); ++#endif +diff -Nur linux-4.1.3/drivers/crypto/caam/sm_test.c linux-xbian-imx6/drivers/crypto/caam/sm_test.c +--- linux-4.1.3/drivers/crypto/caam/sm_test.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/sm_test.c 2015-07-27 23:13:04.213947410 +0200 +@@ -0,0 +1,844 @@ ++/* ++ * Secure Memory / Keystore Exemplification Module ++ * Copyright (C) 2013 Freescale Semiconductor, Inc. All Rights Reserved ++ * ++ * Serves as a functional example, and as a self-contained unit test for ++ * the functionality contained in sm_store.c. ++ * ++ * The example function, caam_sm_example_init(), runs a thread that: ++ * ++ * - initializes a set of fixed keys ++ * - stores one copy in clear buffers ++ * - stores them again in secure memory ++ * - extracts stored keys back out for use ++ * - intializes 3 data buffers for a test: ++ * (1) containing cleartext ++ * (2) to hold ciphertext encrypted with an extracted black key ++ * (3) to hold extracted cleartext decrypted with an equivalent clear key ++ * ++ * The function then builds simple job descriptors that reference the key ++ * material and buffers as initialized, and executes an encryption job ++ * with a black key, and a decryption job using a the same key held in the ++ * clear. The output of the decryption job is compared to the original ++ * cleartext; if they don't compare correctly, one can assume a key problem ++ * exists, where the function will exit with an error. ++ * ++ * This module can use a substantial amount of refactoring, which may occur ++ * after the API gets some mileage. Furthermore, expect this module to ++ * eventually disappear once the API is integrated into "real" software. ++ */ ++ ++#include "compat.h" ++#include "intern.h" ++#include "desc.h" ++#include "error.h" ++#include "jr.h" ++#include "sm.h" ++ ++static u8 skeymod[] = { ++ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, ++ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 ++}; ++static u8 symkey[] = { ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, ++ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, ++ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, ++ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f ++}; ++ ++static u8 symdata[] = { ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x0f, 0x06, 0x07, ++ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, ++ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, ++ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, ++ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, ++ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, ++ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, ++ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, ++ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, ++ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, ++ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, ++ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, ++ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, ++ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, ++ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, ++ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, ++ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, ++ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, ++ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, ++ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, ++ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, ++ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, ++ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, ++ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, ++ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, ++ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, ++ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, ++ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff ++}; ++ ++static int mk_job_desc(u32 *desc, dma_addr_t key, u16 keysz, dma_addr_t indata, ++ dma_addr_t outdata, u16 sz, u32 cipherdir, u32 keymode) ++{ ++ desc[1] = CMD_KEY | CLASS_1 | (keysz & KEY_LENGTH_MASK) | keymode; ++ desc[2] = (u32)key; ++ desc[3] = CMD_OPERATION | OP_TYPE_CLASS1_ALG | OP_ALG_AAI_ECB | ++ cipherdir; ++ desc[4] = CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | ++ FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1 | sz; ++ desc[5] = (u32)indata; ++ desc[6] = CMD_FIFO_STORE | FIFOST_TYPE_MESSAGE_DATA | sz; ++ desc[7] = (u32)outdata; ++ ++ desc[0] = CMD_DESC_HDR | HDR_ONE | (8 & HDR_DESCLEN_MASK); ++ return 8 * sizeof(u32); ++} ++ ++struct exec_test_result { ++ int error; ++ struct completion completion; ++}; ++ ++void exec_test_done(struct device *dev, u32 *desc, u32 err, void *context) ++{ ++ struct exec_test_result *res = context; ++ ++ if (err) { ++ char tmp[CAAM_ERROR_STR_MAX]; ++ dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err)); ++ } ++ ++ res->error = err; ++ complete(&res->completion); ++} ++ ++static int exec_test_job(struct device *ksdev, u32 *jobdesc) ++{ ++ struct exec_test_result testres; ++ struct caam_drv_private_sm *kspriv; ++ int rtn = 0; ++ ++ kspriv = dev_get_drvdata(ksdev); ++ ++ init_completion(&testres.completion); ++ ++ rtn = caam_jr_enqueue(kspriv->smringdev, jobdesc, exec_test_done, ++ &testres); ++ if (!rtn) { ++ wait_for_completion_interruptible(&testres.completion); ++ rtn = testres.error; ++ } ++ return rtn; ++} ++ ++ ++int caam_sm_example_init(struct platform_device *pdev) ++{ ++ struct device *ctrldev, *ksdev; ++ struct caam_drv_private *ctrlpriv; ++ struct caam_drv_private_sm *kspriv; ++ u32 unit, units, jdescsz; ++ int stat, jstat, rtnval = 0; ++ u8 __iomem *syminp, *symint, *symout = NULL; ++ dma_addr_t syminp_dma, symint_dma, symout_dma; ++ u8 __iomem *black_key_des, *black_key_aes128; ++ u8 __iomem *black_key_aes256; ++ dma_addr_t black_key_des_dma, black_key_aes128_dma; ++ dma_addr_t black_key_aes256_dma; ++ u8 __iomem *clear_key_des, *clear_key_aes128, *clear_key_aes256; ++ dma_addr_t clear_key_des_dma, clear_key_aes128_dma; ++ dma_addr_t clear_key_aes256_dma; ++ u32 __iomem *jdesc; ++ u32 keyslot_des, keyslot_aes128, keyslot_aes256 = 0; ++ ++ jdesc = NULL; ++ black_key_des = black_key_aes128 = black_key_aes256 = NULL; ++ clear_key_des = clear_key_aes128 = clear_key_aes256 = NULL; ++ ++ /* We can lose this cruft once we can get a pdev by name */ ++ ctrldev = &pdev->dev; ++ ctrlpriv = dev_get_drvdata(ctrldev); ++ ksdev = ctrlpriv->smdev; ++ kspriv = dev_get_drvdata(ksdev); ++ if (kspriv == NULL) ++ return -ENODEV; ++ ++ /* Now that we have the dev for the single SM instance, connect */ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test_init() running\n"); ++#endif ++ /* Probe to see what keystores are available to us */ ++ units = sm_detect_keystore_units(ksdev); ++ if (!units) ++ dev_err(ksdev, "caam_sm_test: no keystore units available\n"); ++ ++ /* ++ * MX6 bootloader stores some stuff in unit 0, so let's ++ * use 1 or above ++ */ ++ if (units < 2) { ++ dev_err(ksdev, "caam_sm_test: insufficient keystore units\n"); ++ return -ENODEV; ++ } ++ unit = 1; ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: %d keystore units available\n", units); ++#endif ++ ++ /* Initialize/Establish Keystore */ ++ sm_establish_keystore(ksdev, unit); /* Initalize store in #1 */ ++ ++ /* ++ * Top of main test thread ++ */ ++ ++ /* Allocate test data blocks (input, intermediate, output) */ ++ syminp = kmalloc(256, GFP_KERNEL | GFP_DMA); ++ symint = kmalloc(256, GFP_KERNEL | GFP_DMA); ++ symout = kmalloc(256, GFP_KERNEL | GFP_DMA); ++ if ((syminp == NULL) || (symint == NULL) || (symout == NULL)) { ++ rtnval = -ENOMEM; ++ dev_err(ksdev, "caam_sm_test: can't get test data buffers\n"); ++ goto freemem; ++ } ++ ++ /* Allocate storage for 3 black keys: encapsulated 8, 16, 32 */ ++ black_key_des = kmalloc(16, GFP_KERNEL | GFP_DMA); /* padded to 16... */ ++ black_key_aes128 = kmalloc(16, GFP_KERNEL | GFP_DMA); ++ black_key_aes256 = kmalloc(16, GFP_KERNEL | GFP_DMA); ++ if ((black_key_des == NULL) || (black_key_aes128 == NULL) || ++ (black_key_aes256 == NULL)) { ++ rtnval = -ENOMEM; ++ dev_err(ksdev, "caam_sm_test: can't black key buffers\n"); ++ goto freemem; ++ } ++ ++ clear_key_des = kmalloc(8, GFP_KERNEL | GFP_DMA); ++ clear_key_aes128 = kmalloc(16, GFP_KERNEL | GFP_DMA); ++ clear_key_aes256 = kmalloc(32, GFP_KERNEL | GFP_DMA); ++ if ((clear_key_des == NULL) || (clear_key_aes128 == NULL) || ++ (clear_key_aes256 == NULL)) { ++ rtnval = -ENOMEM; ++ dev_err(ksdev, "caam_sm_test: can't get clear key buffers\n"); ++ goto freemem; ++ } ++ ++ /* Allocate storage for job descriptor */ ++ jdesc = kmalloc(8 * sizeof(u32), GFP_KERNEL | GFP_DMA); ++ if (jdesc == NULL) { ++ rtnval = -ENOMEM; ++ dev_err(ksdev, "caam_sm_test: can't get descriptor buffers\n"); ++ goto freemem; ++ } ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: all buffers allocated\n"); ++#endif ++ ++ /* Load up input data block, clear outputs */ ++ memcpy(syminp, symdata, 256); ++ memset(symint, 0, 256); ++ memset(symout, 0, 256); ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[0], syminp[1], syminp[2], syminp[3], ++ syminp[4], syminp[5], syminp[6], syminp[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[0], symout[1], symout[2], symout[3], ++ symout[4], symout[5], symout[6], symout[7]); ++ ++ dev_info(ksdev, "caam_sm_test: data buffers initialized\n"); ++#endif ++ ++ /* Load up clear keys */ ++ memcpy(clear_key_des, symkey, 8); ++ memcpy(clear_key_aes128, symkey, 16); ++ memcpy(clear_key_aes256, symkey, 32); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: all clear keys loaded\n"); ++#endif ++ ++ /* ++ * Place clear keys in keystore. ++ * All the interesting stuff happens here. ++ */ ++ /* 8 bit DES key */ ++ stat = sm_keystore_slot_alloc(ksdev, unit, 8, &keyslot_des); ++ if (stat) ++ goto freemem; ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: 8 byte key slot in %d\n", keyslot_des); ++#endif ++ stat = sm_keystore_slot_load(ksdev, unit, keyslot_des, clear_key_des, ++ 8); ++ if (stat) { ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: can't load 8 byte key in %d\n", ++ keyslot_des); ++#endif ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_des); ++ goto freemem; ++ } ++ ++ /* 16 bit AES key */ ++ stat = sm_keystore_slot_alloc(ksdev, unit, 16, &keyslot_aes128); ++ if (stat) { ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_des); ++ goto freemem; ++ } ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: 16 byte key slot in %d\n", ++ keyslot_aes128); ++#endif ++ stat = sm_keystore_slot_load(ksdev, unit, keyslot_aes128, ++ clear_key_aes128, 16); ++ if (stat) { ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: can't load 16 byte key in %d\n", ++ keyslot_aes128); ++#endif ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_aes128); ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_des); ++ goto freemem; ++ } ++ ++ /* 32 bit AES key */ ++ stat = sm_keystore_slot_alloc(ksdev, unit, 32, &keyslot_aes256); ++ if (stat) { ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_aes128); ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_des); ++ goto freemem; ++ } ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: 32 byte key slot in %d\n", ++ keyslot_aes256); ++#endif ++ stat = sm_keystore_slot_load(ksdev, unit, keyslot_aes256, ++ clear_key_aes256, 32); ++ if (stat) { ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: can't load 32 byte key in %d\n", ++ keyslot_aes128); ++#endif ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_aes256); ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_aes128); ++ sm_keystore_slot_dealloc(ksdev, unit, keyslot_des); ++ goto freemem; ++ } ++ ++ /* Encapsulate all keys as SM blobs */ ++ stat = sm_keystore_slot_encapsulate(ksdev, unit, keyslot_des, ++ keyslot_des, 8, skeymod, 8); ++ if (stat) { ++ dev_info(ksdev, "caam_sm_test: can't encapsulate DES key\n"); ++ goto freekeys; ++ } ++ ++ stat = sm_keystore_slot_encapsulate(ksdev, unit, keyslot_aes128, ++ keyslot_aes128, 16, skeymod, 8); ++ if (stat) { ++ dev_info(ksdev, "caam_sm_test: can't encapsulate AES128 key\n"); ++ goto freekeys; ++ } ++ ++ stat = sm_keystore_slot_encapsulate(ksdev, unit, keyslot_aes256, ++ keyslot_aes256, 32, skeymod, 8); ++ if (stat) { ++ dev_info(ksdev, "caam_sm_test: can't encapsulate AES256 key\n"); ++ goto freekeys; ++ } ++ ++ /* Now decapsulate as black key blobs */ ++ stat = sm_keystore_slot_decapsulate(ksdev, unit, keyslot_des, ++ keyslot_des, 8, skeymod, 8); ++ if (stat) { ++ dev_info(ksdev, "caam_sm_test: can't decapsulate DES key\n"); ++ goto freekeys; ++ } ++ ++ stat = sm_keystore_slot_decapsulate(ksdev, unit, keyslot_aes128, ++ keyslot_aes128, 16, skeymod, 8); ++ if (stat) { ++ dev_info(ksdev, "caam_sm_test: can't decapsulate AES128 key\n"); ++ goto freekeys; ++ } ++ ++ stat = sm_keystore_slot_decapsulate(ksdev, unit, keyslot_aes256, ++ keyslot_aes256, 32, skeymod, 8); ++ if (stat) { ++ dev_info(ksdev, "caam_sm_test: can't decapsulate AES128 key\n"); ++ goto freekeys; ++ } ++ ++ /* Extract 8/16/32 byte black keys */ ++ sm_keystore_slot_read(ksdev, unit, keyslot_des, 8, black_key_des); ++ sm_keystore_slot_read(ksdev, unit, keyslot_aes128, 16, ++ black_key_aes128); ++ sm_keystore_slot_read(ksdev, unit, keyslot_aes256, 32, ++ black_key_aes256); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: all black keys extracted\n"); ++#endif ++ ++ /* DES encrypt using 8 byte black key */ ++ black_key_des_dma = dma_map_single(ksdev, black_key_des, 8, ++ DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, black_key_des_dma, 8, DMA_TO_DEVICE); ++ syminp_dma = dma_map_single(ksdev, syminp, 256, DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, syminp_dma, 256, DMA_TO_DEVICE); ++ symint_dma = dma_map_single(ksdev, symint, 256, DMA_FROM_DEVICE); ++ ++ jdescsz = mk_job_desc(jdesc, black_key_des_dma, 8, syminp_dma, ++ symint_dma, 256, ++ OP_ALG_ENCRYPT | OP_ALG_ALGSEL_DES, 0); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "jobdesc:\n"); ++ dev_info(ksdev, "0x%08x\n", jdesc[0]); ++ dev_info(ksdev, "0x%08x\n", jdesc[1]); ++ dev_info(ksdev, "0x%08x\n", jdesc[2]); ++ dev_info(ksdev, "0x%08x\n", jdesc[3]); ++ dev_info(ksdev, "0x%08x\n", jdesc[4]); ++ dev_info(ksdev, "0x%08x\n", jdesc[5]); ++ dev_info(ksdev, "0x%08x\n", jdesc[6]); ++ dev_info(ksdev, "0x%08x\n", jdesc[7]); ++#endif ++ ++ jstat = exec_test_job(ksdev, jdesc); ++ ++ dma_sync_single_for_cpu(ksdev, symint_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symint_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, syminp_dma, 256, DMA_TO_DEVICE); ++ dma_unmap_single(ksdev, black_key_des_dma, 8, DMA_TO_DEVICE); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "input block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[0], syminp[1], syminp[2], syminp[3], ++ syminp[4], syminp[5], syminp[6], syminp[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[8], syminp[9], syminp[10], syminp[11], ++ syminp[12], syminp[13], syminp[14], syminp[15]); ++ dev_info(ksdev, "intermediate block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[8], symint[9], symint[10], symint[11], ++ symint[12], symint[13], symint[14], symint[15]); ++ dev_info(ksdev, "caam_sm_test: encrypt cycle with 8 byte key\n"); ++#endif ++ ++ /* DES decrypt using 8 byte clear key */ ++ clear_key_des_dma = dma_map_single(ksdev, clear_key_des, 8, ++ DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, clear_key_des_dma, 8, DMA_TO_DEVICE); ++ symint_dma = dma_map_single(ksdev, symint, 256, DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, symint_dma, 256, DMA_TO_DEVICE); ++ symout_dma = dma_map_single(ksdev, symout, 256, DMA_FROM_DEVICE); ++ ++ jdescsz = mk_job_desc(jdesc, clear_key_des_dma, 8, symint_dma, ++ symout_dma, 256, ++ OP_ALG_DECRYPT | OP_ALG_ALGSEL_DES, 0); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "jobdesc:\n"); ++ dev_info(ksdev, "0x%08x\n", jdesc[0]); ++ dev_info(ksdev, "0x%08x\n", jdesc[1]); ++ dev_info(ksdev, "0x%08x\n", jdesc[2]); ++ dev_info(ksdev, "0x%08x\n", jdesc[3]); ++ dev_info(ksdev, "0x%08x\n", jdesc[4]); ++ dev_info(ksdev, "0x%08x\n", jdesc[5]); ++ dev_info(ksdev, "0x%08x\n", jdesc[6]); ++ dev_info(ksdev, "0x%08x\n", jdesc[7]); ++#endif ++ ++ jstat = exec_test_job(ksdev, jdesc); ++ ++ dma_sync_single_for_cpu(ksdev, symout_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symout_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symint_dma, 256, DMA_TO_DEVICE); ++ dma_unmap_single(ksdev, clear_key_des_dma, 8, DMA_TO_DEVICE); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "intermediate block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[8], symint[9], symint[10], symint[11], ++ symint[12], symint[13], symint[14], symint[15]); ++ dev_info(ksdev, "decrypted block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[0], symout[1], symout[2], symout[3], ++ symout[4], symout[5], symout[6], symout[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[8], symout[9], symout[10], symout[11], ++ symout[12], symout[13], symout[14], symout[15]); ++ dev_info(ksdev, "caam_sm_test: decrypt cycle with 8 byte key\n"); ++#endif ++ ++ /* Check result */ ++ if (memcmp(symout, syminp, 256)) { ++ dev_info(ksdev, "caam_sm_test: 8-byte key test mismatch\n"); ++ rtnval = -1; ++ goto freekeys; ++ } else ++ dev_info(ksdev, "caam_sm_test: 8-byte key test match OK\n"); ++ ++ /* AES-128 encrypt using 16 byte black key */ ++ black_key_aes128_dma = dma_map_single(ksdev, black_key_aes128, 16, ++ DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, black_key_aes128_dma, 16, ++ DMA_TO_DEVICE); ++ syminp_dma = dma_map_single(ksdev, syminp, 256, DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, syminp_dma, 256, DMA_TO_DEVICE); ++ symint_dma = dma_map_single(ksdev, symint, 256, DMA_FROM_DEVICE); ++ ++ jdescsz = mk_job_desc(jdesc, black_key_aes128_dma, 16, syminp_dma, ++ symint_dma, 256, ++ OP_ALG_ENCRYPT | OP_ALG_ALGSEL_AES, 0); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "jobdesc:\n"); ++ dev_info(ksdev, "0x%08x\n", jdesc[0]); ++ dev_info(ksdev, "0x%08x\n", jdesc[1]); ++ dev_info(ksdev, "0x%08x\n", jdesc[2]); ++ dev_info(ksdev, "0x%08x\n", jdesc[3]); ++ dev_info(ksdev, "0x%08x\n", jdesc[4]); ++ dev_info(ksdev, "0x%08x\n", jdesc[5]); ++ dev_info(ksdev, "0x%08x\n", jdesc[6]); ++ dev_info(ksdev, "0x%08x\n", jdesc[7]); ++#endif ++ ++ jstat = exec_test_job(ksdev, jdesc); ++ ++ dma_sync_single_for_cpu(ksdev, symint_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symint_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, syminp_dma, 256, DMA_TO_DEVICE); ++ dma_unmap_single(ksdev, black_key_aes128_dma, 16, DMA_TO_DEVICE); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "input block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[0], syminp[1], syminp[2], syminp[3], ++ syminp[4], syminp[5], syminp[6], syminp[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[8], syminp[9], syminp[10], syminp[11], ++ syminp[12], syminp[13], syminp[14], syminp[15]); ++ dev_info(ksdev, "intermediate block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[8], symint[9], symint[10], symint[11], ++ symint[12], symint[13], symint[14], symint[15]); ++ dev_info(ksdev, "caam_sm_test: encrypt cycle with 16 byte key\n"); ++#endif ++ ++ /* AES-128 decrypt using 16 byte clear key */ ++ clear_key_aes128_dma = dma_map_single(ksdev, clear_key_aes128, 16, ++ DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, clear_key_aes128_dma, 16, ++ DMA_TO_DEVICE); ++ symint_dma = dma_map_single(ksdev, symint, 256, DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, symint_dma, 256, DMA_TO_DEVICE); ++ symout_dma = dma_map_single(ksdev, symout, 256, DMA_FROM_DEVICE); ++ ++ jdescsz = mk_job_desc(jdesc, clear_key_aes128_dma, 16, symint_dma, ++ symout_dma, 256, ++ OP_ALG_DECRYPT | OP_ALG_ALGSEL_AES, 0); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "jobdesc:\n"); ++ dev_info(ksdev, "0x%08x\n", jdesc[0]); ++ dev_info(ksdev, "0x%08x\n", jdesc[1]); ++ dev_info(ksdev, "0x%08x\n", jdesc[2]); ++ dev_info(ksdev, "0x%08x\n", jdesc[3]); ++ dev_info(ksdev, "0x%08x\n", jdesc[4]); ++ dev_info(ksdev, "0x%08x\n", jdesc[5]); ++ dev_info(ksdev, "0x%08x\n", jdesc[6]); ++ dev_info(ksdev, "0x%08x\n", jdesc[7]); ++#endif ++ jstat = exec_test_job(ksdev, jdesc); ++ ++ dma_sync_single_for_cpu(ksdev, symout_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symout_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symint_dma, 256, DMA_TO_DEVICE); ++ dma_unmap_single(ksdev, clear_key_aes128_dma, 16, DMA_TO_DEVICE); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "intermediate block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[8], symint[9], symint[10], symint[11], ++ symint[12], symint[13], symint[14], symint[15]); ++ dev_info(ksdev, "decrypted block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[0], symout[1], symout[2], symout[3], ++ symout[4], symout[5], symout[6], symout[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[8], symout[9], symout[10], symout[11], ++ symout[12], symout[13], symout[14], symout[15]); ++ dev_info(ksdev, "caam_sm_test: decrypt cycle with 16 byte key\n"); ++#endif ++ ++ /* Check result */ ++ if (memcmp(symout, syminp, 256)) { ++ dev_info(ksdev, "caam_sm_test: 16-byte key test mismatch\n"); ++ rtnval = -1; ++ goto freekeys; ++ } else ++ dev_info(ksdev, "caam_sm_test: 16-byte key test match OK\n"); ++ ++ /* AES-256 encrypt using 32 byte black key */ ++ black_key_aes256_dma = dma_map_single(ksdev, black_key_aes256, 32, ++ DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, black_key_aes256_dma, 32, ++ DMA_TO_DEVICE); ++ syminp_dma = dma_map_single(ksdev, syminp, 256, DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, syminp_dma, 256, DMA_TO_DEVICE); ++ symint_dma = dma_map_single(ksdev, symint, 256, DMA_FROM_DEVICE); ++ ++ jdescsz = mk_job_desc(jdesc, black_key_aes256_dma, 32, syminp_dma, ++ symint_dma, 256, ++ OP_ALG_ENCRYPT | OP_ALG_ALGSEL_AES, 0); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "jobdesc:\n"); ++ dev_info(ksdev, "0x%08x\n", jdesc[0]); ++ dev_info(ksdev, "0x%08x\n", jdesc[1]); ++ dev_info(ksdev, "0x%08x\n", jdesc[2]); ++ dev_info(ksdev, "0x%08x\n", jdesc[3]); ++ dev_info(ksdev, "0x%08x\n", jdesc[4]); ++ dev_info(ksdev, "0x%08x\n", jdesc[5]); ++ dev_info(ksdev, "0x%08x\n", jdesc[6]); ++ dev_info(ksdev, "0x%08x\n", jdesc[7]); ++#endif ++ ++ jstat = exec_test_job(ksdev, jdesc); ++ ++ dma_sync_single_for_cpu(ksdev, symint_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symint_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, syminp_dma, 256, DMA_TO_DEVICE); ++ dma_unmap_single(ksdev, black_key_aes256_dma, 32, DMA_TO_DEVICE); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "input block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[0], syminp[1], syminp[2], syminp[3], ++ syminp[4], syminp[5], syminp[6], syminp[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ syminp[8], syminp[9], syminp[10], syminp[11], ++ syminp[12], syminp[13], syminp[14], syminp[15]); ++ dev_info(ksdev, "intermediate block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[8], symint[9], symint[10], symint[11], ++ symint[12], symint[13], symint[14], symint[15]); ++ dev_info(ksdev, "caam_sm_test: encrypt cycle with 32 byte key\n"); ++#endif ++ ++ /* AES-256 decrypt using 32-byte black key */ ++ clear_key_aes256_dma = dma_map_single(ksdev, clear_key_aes256, 32, ++ DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, clear_key_aes256_dma, 32, ++ DMA_TO_DEVICE); ++ symint_dma = dma_map_single(ksdev, symint, 256, DMA_TO_DEVICE); ++ dma_sync_single_for_device(ksdev, symint_dma, 256, DMA_TO_DEVICE); ++ symout_dma = dma_map_single(ksdev, symout, 256, DMA_FROM_DEVICE); ++ ++ jdescsz = mk_job_desc(jdesc, clear_key_aes256_dma, 32, symint_dma, ++ symout_dma, 256, ++ OP_ALG_DECRYPT | OP_ALG_ALGSEL_AES, 0); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "jobdesc:\n"); ++ dev_info(ksdev, "0x%08x\n", jdesc[0]); ++ dev_info(ksdev, "0x%08x\n", jdesc[1]); ++ dev_info(ksdev, "0x%08x\n", jdesc[2]); ++ dev_info(ksdev, "0x%08x\n", jdesc[3]); ++ dev_info(ksdev, "0x%08x\n", jdesc[4]); ++ dev_info(ksdev, "0x%08x\n", jdesc[5]); ++ dev_info(ksdev, "0x%08x\n", jdesc[6]); ++ dev_info(ksdev, "0x%08x\n", jdesc[7]); ++#endif ++ ++ jstat = exec_test_job(ksdev, jdesc); ++ ++ dma_sync_single_for_cpu(ksdev, symout_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symout_dma, 256, DMA_FROM_DEVICE); ++ dma_unmap_single(ksdev, symint_dma, 256, DMA_TO_DEVICE); ++ dma_unmap_single(ksdev, clear_key_aes256_dma, 32, DMA_TO_DEVICE); ++ ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "intermediate block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[0], symint[1], symint[2], symint[3], ++ symint[4], symint[5], symint[6], symint[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symint[8], symint[9], symint[10], symint[11], ++ symint[12], symint[13], symint[14], symint[15]); ++ dev_info(ksdev, "decrypted block:\n"); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[0], symout[1], symout[2], symout[3], ++ symout[4], symout[5], symout[6], symout[7]); ++ dev_info(ksdev, "0x%02x 0x%02x 0x%02x 0x%02x " \ ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ symout[8], symout[9], symout[10], symout[11], ++ symout[12], symout[13], symout[14], symout[15]); ++ dev_info(ksdev, "caam_sm_test: decrypt cycle with 32 byte key\n"); ++#endif ++ ++ /* Check result */ ++ if (memcmp(symout, syminp, 256)) { ++ dev_info(ksdev, "caam_sm_test: 32-byte key test mismatch\n"); ++ rtnval = -1; ++ goto freekeys; ++ } else ++ dev_info(ksdev, "caam_sm_test: 32-byte key test match OK\n"); ++ ++ ++ /* Remove 8/16/32 byte keys from keystore */ ++freekeys: ++ stat = sm_keystore_slot_dealloc(ksdev, unit, keyslot_des); ++ if (stat) ++ dev_info(ksdev, "caam_sm_test: can't release slot %d\n", ++ keyslot_des); ++ ++ stat = sm_keystore_slot_dealloc(ksdev, unit, keyslot_aes128); ++ if (stat) ++ dev_info(ksdev, "caam_sm_test: can't release slot %d\n", ++ keyslot_aes128); ++ ++ stat = sm_keystore_slot_dealloc(ksdev, unit, keyslot_aes256); ++ if (stat) ++ dev_info(ksdev, "caam_sm_test: can't release slot %d\n", ++ keyslot_aes256); ++ ++ ++ /* Free resources */ ++freemem: ++#ifdef SM_TEST_DETAIL ++ dev_info(ksdev, "caam_sm_test: cleaning up\n"); ++#endif ++ kfree(syminp); ++ kfree(symint); ++ kfree(symout); ++ kfree(clear_key_des); ++ kfree(clear_key_aes128); ++ kfree(clear_key_aes256); ++ kfree(black_key_des); ++ kfree(black_key_aes128); ++ kfree(black_key_aes256); ++ kfree(jdesc); ++ ++ /* Disconnect from keystore and leave */ ++ sm_release_keystore(ksdev, unit); ++ ++ return rtnval; ++} ++EXPORT_SYMBOL(caam_sm_example_init); ++ ++void caam_sm_example_shutdown(void) ++{ ++ /* unused in present version */ ++ struct device_node *dev_node; ++ struct platform_device *pdev; ++ ++ /* ++ * Do of_find_compatible_node() then of_find_device_by_node() ++ * once a functional device tree is available ++ */ ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); ++ if (!dev_node) { ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); ++ if (!dev_node) ++ return; ++ } ++ ++ pdev = of_find_device_by_node(dev_node); ++ if (!pdev) ++ return; ++ ++ of_node_get(dev_node); ++ ++} ++ ++static int __init caam_sm_test_init(void) ++{ ++ struct device_node *dev_node; ++ struct platform_device *pdev; ++ ++ /* ++ * Do of_find_compatible_node() then of_find_device_by_node() ++ * once a functional device tree is available ++ */ ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); ++ if (!dev_node) { ++ dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); ++ if (!dev_node) ++ return -ENODEV; ++ } ++ ++ pdev = of_find_device_by_node(dev_node); ++ if (!pdev) ++ return -ENODEV; ++ ++ of_node_put(dev_node); ++ ++ caam_sm_example_init(pdev); ++ ++ return 0; ++} ++ ++ ++/* Module-based initialization needs to wait for dev tree */ ++#ifdef CONFIG_OF ++module_init(caam_sm_test_init); ++module_exit(caam_sm_example_shutdown); ++ ++MODULE_LICENSE("Dual BSD/GPL"); ++MODULE_DESCRIPTION("FSL CAAM Keystore Usage Example"); ++MODULE_AUTHOR("Freescale Semiconductor - NMSG/MAD"); ++#endif +diff -Nur linux-4.1.3/drivers/crypto/caam/snvsregs.h linux-xbian-imx6/drivers/crypto/caam/snvsregs.h +--- linux-4.1.3/drivers/crypto/caam/snvsregs.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/crypto/caam/snvsregs.h 2015-07-27 23:13:04.213947410 +0200 +@@ -0,0 +1,237 @@ ++/* ++ * SNVS hardware register-level view ++ * ++ * Copyright (C) 2012-2014 Freescale Semiconductor, Inc., All Rights Reserved ++ */ ++ ++#ifndef SNVSREGS_H ++#define SNVSREGS_H ++ ++#include ++#include ++ ++/* ++ * SNVS High Power Domain ++ * Includes security violations, HA counter, RTC, alarm ++ */ ++struct snvs_hp { ++ u32 lock; /* HPLR - HP Lock */ ++ u32 cmd; /* HPCOMR - HP Command */ ++ u32 ctl; /* HPCR - HP Control */ ++ u32 secvio_intcfg; /* HPSICR - Security Violation Int Config */ ++ u32 secvio_ctl; /* HPSVCR - Security Violation Control */ ++ u32 status; /* HPSR - HP Status */ ++ u32 secvio_status; /* HPSVSR - Security Violation Status */ ++ u32 ha_counteriv; /* High Assurance Counter IV */ ++ u32 ha_counter; /* High Assurance Counter */ ++ u32 rtc_msb; /* Real Time Clock/Counter MSB */ ++ u32 rtc_lsb; /* Real Time Counter LSB */ ++ u32 time_alarm_msb; /* Time Alarm MSB */ ++ u32 time_alarm_lsb; /* Time Alarm LSB */ ++}; ++ ++#define HP_LOCK_HAC_LCK 0x00040000 ++#define HP_LOCK_HPSICR_LCK 0x00020000 ++#define HP_LOCK_HPSVCR_LCK 0x00010000 ++#define HP_LOCK_MKEYSEL_LCK 0x00000200 ++#define HP_LOCK_TAMPCFG_LCK 0x00000100 ++#define HP_LOCK_TAMPFLT_LCK 0x00000080 ++#define HP_LOCK_SECVIO_LCK 0x00000040 ++#define HP_LOCK_GENP_LCK 0x00000020 ++#define HP_LOCK_MONOCTR_LCK 0x00000010 ++#define HP_LOCK_CALIB_LCK 0x00000008 ++#define HP_LOCK_SRTC_LCK 0x00000004 ++#define HP_LOCK_ZMK_RD_LCK 0x00000002 ++#define HP_LOCK_ZMK_WT_LCK 0x00000001 ++ ++#define HP_CMD_NONPRIV_AXS 0x80000000 ++#define HP_CMD_HAC_STOP 0x00080000 ++#define HP_CMD_HAC_CLEAR 0x00040000 ++#define HP_CMD_HAC_LOAD 0x00020000 ++#define HP_CMD_HAC_CFG_EN 0x00010000 ++#define HP_CMD_SNVS_MSTR_KEY 0x00002000 ++#define HP_CMD_PROG_ZMK 0x00001000 ++#define HP_CMD_SW_LPSV 0x00000400 ++#define HP_CMD_SW_FSV 0x00000200 ++#define HP_CMD_SW_SV 0x00000100 ++#define HP_CMD_LP_SWR_DIS 0x00000020 ++#define HP_CMD_LP_SWR 0x00000010 ++#define HP_CMD_SSM_SFNS_DIS 0x00000004 ++#define HP_CMD_SSM_ST_DIS 0x00000002 ++#define HP_CMD_SMM_ST 0x00000001 ++ ++#define HP_CTL_TIME_SYNC 0x00010000 ++#define HP_CTL_CAL_VAL_SHIFT 10 ++#define HP_CTL_CAL_VAL_MASK (0x1f << HP_CTL_CALIB_SHIFT) ++#define HP_CTL_CALIB_EN 0x00000100 ++#define HP_CTL_PI_FREQ_SHIFT 4 ++#define HP_CTL_PI_FREQ_MASK (0xf << HP_CTL_PI_FREQ_SHIFT) ++#define HP_CTL_PI_EN 0x00000008 ++#define HP_CTL_TIMEALARM_EN 0x00000002 ++#define HP_CTL_RTC_EN 0x00000001 ++ ++#define HP_SECVIO_INTEN_EN 0x10000000 ++#define HP_SECVIO_INTEN_SRC5 0x00000020 ++#define HP_SECVIO_INTEN_SRC4 0x00000010 ++#define HP_SECVIO_INTEN_SRC3 0x00000008 ++#define HP_SECVIO_INTEN_SRC2 0x00000004 ++#define HP_SECVIO_INTEN_SRC1 0x00000002 ++#define HP_SECVIO_INTEN_SRC0 0x00000001 ++#define HP_SECVIO_INTEN_ALL 0x8000003f ++ ++#define HP_SECVIO_ICTL_CFG_SHIFT 30 ++#define HP_SECVIO_ICTL_CFG_MASK (0x3 << HP_SECVIO_ICTL_CFG_SHIFT) ++#define HP_SECVIO_ICTL_CFG5_SHIFT 5 ++#define HP_SECVIO_ICTL_CFG5_MASK (0x3 << HP_SECVIO_ICTL_CFG5_SHIFT) ++#define HP_SECVIO_ICTL_CFG_DISABLE 0 ++#define HP_SECVIO_ICTL_CFG_NONFATAL 1 ++#define HP_SECVIO_ICTL_CFG_FATAL 2 ++#define HP_SECVIO_ICTL_CFG4_FATAL 0x00000010 ++#define HP_SECVIO_ICTL_CFG3_FATAL 0x00000008 ++#define HP_SECVIO_ICTL_CFG2_FATAL 0x00000004 ++#define HP_SECVIO_ICTL_CFG1_FATAL 0x00000002 ++#define HP_SECVIO_ICTL_CFG0_FATAL 0x00000001 ++ ++#define HP_STATUS_ZMK_ZERO 0x80000000 ++#define HP_STATUS_OTPMK_ZERO 0x08000000 ++#define HP_STATUS_OTPMK_SYN_SHIFT 16 ++#define HP_STATUS_OTPMK_SYN_MASK (0x1ff << HP_STATUS_OTPMK_SYN_SHIFT) ++#define HP_STATUS_SSM_ST_SHIFT 8 ++#define HP_STATUS_SSM_ST_MASK (0xf << HP_STATUS_SSM_ST_SHIFT) ++#define HP_STATUS_SSM_ST_INIT 0 ++#define HP_STATUS_SSM_ST_HARDFAIL 1 ++#define HP_STATUS_SSM_ST_SOFTFAIL 3 ++#define HP_STATUS_SSM_ST_INITINT 8 ++#define HP_STATUS_SSM_ST_CHECK 9 ++#define HP_STATUS_SSM_ST_NONSECURE 11 ++#define HP_STATUS_SSM_ST_TRUSTED 13 ++#define HP_STATUS_SSM_ST_SECURE 15 ++ ++#define HP_SECVIOST_ZMK_ECC_FAIL 0x08000000 /* write to clear */ ++#define HP_SECVIOST_ZMK_SYN_SHIFT 16 ++#define HP_SECVIOST_ZMK_SYN_MASK (0x1ff << HP_SECVIOST_ZMK_SYN_SHIFT) ++#define HP_SECVIOST_SECVIO5 0x00000020 ++#define HP_SECVIOST_SECVIO4 0x00000010 ++#define HP_SECVIOST_SECVIO3 0x00000008 ++#define HP_SECVIOST_SECVIO2 0x00000004 ++#define HP_SECVIOST_SECVIO1 0x00000002 ++#define HP_SECVIOST_SECVIO0 0x00000001 ++#define HP_SECVIOST_SECVIOMASK 0x0000003f ++ ++/* ++ * SNVS Low Power Domain ++ * Includes glitch detector, SRTC, alarm, monotonic counter, ZMK ++ */ ++struct snvs_lp { ++ u32 lock; ++ u32 ctl; ++ u32 mstr_key_ctl; /* Master Key Control */ ++ u32 secvio_ctl; /* Security Violation Control */ ++ u32 tamper_filt_cfg; /* Tamper Glitch Filters Configuration */ ++ u32 tamper_det_cfg; /* Tamper Detectors Configuration */ ++ u32 status; ++ u32 srtc_msb; /* Secure Real Time Clock/Counter MSB */ ++ u32 srtc_lsb; /* Secure Real Time Clock/Counter LSB */ ++ u32 time_alarm; /* Time Alarm */ ++ u32 smc_msb; /* Secure Monotonic Counter MSB */ ++ u32 smc_lsb; /* Secure Monotonic Counter LSB */ ++ u32 pwr_glitch_det; /* Power Glitch Detector */ ++ u32 gen_purpose; ++ u32 zmk[8]; /* Zeroizable Master Key */ ++}; ++ ++#define LP_LOCK_MKEYSEL_LCK 0x00000200 ++#define LP_LOCK_TAMPDET_LCK 0x00000100 ++#define LP_LOCK_TAMPFLT_LCK 0x00000080 ++#define LP_LOCK_SECVIO_LCK 0x00000040 ++#define LP_LOCK_GENP_LCK 0x00000020 ++#define LP_LOCK_MONOCTR_LCK 0x00000010 ++#define LP_LOCK_CALIB_LCK 0x00000008 ++#define LP_LOCK_SRTC_LCK 0x00000004 ++#define LP_LOCK_ZMK_RD_LCK 0x00000002 ++#define LP_LOCK_ZMK_WT_LCK 0x00000001 ++ ++#define LP_CTL_CAL_VAL_SHIFT 10 ++#define LP_CTL_CAL_VAL_MASK (0x1f << LP_CTL_CAL_VAL_SHIFT) ++#define LP_CTL_CALIB_EN 0x00000100 ++#define LP_CTL_SRTC_INVAL_EN 0x00000010 ++#define LP_CTL_WAKE_INT_EN 0x00000008 ++#define LP_CTL_MONOCTR_EN 0x00000004 ++#define LP_CTL_TIMEALARM_EN 0x00000002 ++#define LP_CTL_SRTC_EN 0x00000001 ++ ++#define LP_MKEYCTL_ZMKECC_SHIFT 8 ++#define LP_MKEYCTL_ZMKECC_MASK (0xff << LP_MKEYCTL_ZMKECC_SHIFT) ++#define LP_MKEYCTL_ZMKECC_EN 0x00000010 ++#define LP_MKEYCTL_ZMKECC_VAL 0x00000008 ++#define LP_MKEYCTL_ZMKECC_PROG 0x00000004 ++#define LP_MKEYCTL_MKSEL_SHIFT 0 ++#define LP_MKEYCTL_MKSEL_MASK (3 << LP_MKEYCTL_MKSEL_SHIFT) ++#define LP_MKEYCTL_MK_OTP 0 ++#define LP_MKEYCTL_MK_ZMK 2 ++#define LP_MKEYCTL_MK_COMB 3 ++ ++#define LP_SECVIO_CTL_SRC5 0x20 ++#define LP_SECVIO_CTL_SRC4 0x10 ++#define LP_SECVIO_CTL_SRC3 0x08 ++#define LP_SECVIO_CTL_SRC2 0x04 ++#define LP_SECVIO_CTL_SRC1 0x02 ++#define LP_SECVIO_CTL_SRC0 0x01 ++ ++#define LP_TAMPFILT_EXT2_EN 0x80000000 ++#define LP_TAMPFILT_EXT2_SHIFT 24 ++#define LP_TAMPFILT_EXT2_MASK (0x1f << LP_TAMPFILT_EXT2_SHIFT) ++#define LP_TAMPFILT_EXT1_EN 0x00800000 ++#define LP_TAMPFILT_EXT1_SHIFT 16 ++#define LP_TAMPFILT_EXT1_MASK (0x1f << LP_TAMPFILT_EXT1_SHIFT) ++#define LP_TAMPFILT_WM_EN 0x00000080 ++#define LP_TAMPFILT_WM_SHIFT 0 ++#define LP_TAMPFILT_WM_MASK (0x1f << LP_TAMPFILT_WM_SHIFT) ++ ++#define LP_TAMPDET_OSC_BPS 0x10000000 ++#define LP_TAMPDET_VRC_SHIFT 24 ++#define LP_TAMPDET_VRC_MASK (3 << LP_TAMPFILT_VRC_SHIFT) ++#define LP_TAMPDET_HTDC_SHIFT 20 ++#define LP_TAMPDET_HTDC_MASK (3 << LP_TAMPFILT_HTDC_SHIFT) ++#define LP_TAMPDET_LTDC_SHIFT 16 ++#define LP_TAMPDET_LTDC_MASK (3 << LP_TAMPFILT_LTDC_SHIFT) ++#define LP_TAMPDET_POR_OBS 0x00008000 ++#define LP_TAMPDET_PFD_OBS 0x00004000 ++#define LP_TAMPDET_ET2_EN 0x00000400 ++#define LP_TAMPDET_ET1_EN 0x00000200 ++#define LP_TAMPDET_WMT2_EN 0x00000100 ++#define LP_TAMPDET_WMT1_EN 0x00000080 ++#define LP_TAMPDET_VT_EN 0x00000040 ++#define LP_TAMPDET_TT_EN 0x00000020 ++#define LP_TAMPDET_CT_EN 0x00000010 ++#define LP_TAMPDET_MCR_EN 0x00000004 ++#define LP_TAMPDET_SRTCR_EN 0x00000002 ++ ++#define LP_STATUS_SECURE ++#define LP_STATUS_NONSECURE ++#define LP_STATUS_SCANEXIT 0x00100000 /* all write 1 clear here on */ ++#define LP_STATUS_EXT_SECVIO 0x00010000 ++#define LP_STATUS_ET2 0x00000400 ++#define LP_STATUS_ET1 0x00000200 ++#define LP_STATUS_WMT2 0x00000100 ++#define LP_STATUS_WMT1 0x00000080 ++#define LP_STATUS_VTD 0x00000040 ++#define LP_STATUS_TTD 0x00000020 ++#define LP_STATUS_CTD 0x00000010 ++#define LP_STATUS_PGD 0x00000008 ++#define LP_STATUS_MCR 0x00000004 ++#define LP_STATUS_SRTCR 0x00000002 ++#define LP_STATUS_LPTA 0x00000001 ++ ++/* Full SNVS register page, including version/options */ ++struct snvs_full { ++ struct snvs_hp hp; ++ struct snvs_lp lp; ++ u32 rsvd[731]; /* deadspace 0x08c-0xbf7 */ ++ ++ /* Version / Revision / Option ID space - end of register page */ ++ u32 vid; /* 0xbf8 HP Version ID (VID 1) */ ++ u32 opt_rev; /* 0xbfc HP Options / Revision (VID 2) */ ++}; ++ ++#endif /* SNVSREGS_H */ +diff -Nur linux-4.1.3/drivers/dma/imx-sdma.c linux-xbian-imx6/drivers/dma/imx-sdma.c +--- linux-4.1.3/drivers/dma/imx-sdma.c 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/dma/imx-sdma.c 2015-07-27 23:13:04.285691435 +0200 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -232,6 +233,14 @@ + + struct sdma_engine; + ++enum sdma_mode { ++ SDMA_MODE_INVALID = 0, ++ SDMA_MODE_LOOP, ++ SDMA_MODE_NORMAL, ++ SDMA_MODE_P2P, ++ SDMA_MODE_NO_BD, ++}; ++ + /** + * struct sdma_channel - housekeeping for a SDMA channel + * +@@ -244,6 +253,7 @@ + * @word_size peripheral access size + * @buf_tail ID of the buffer that was processed + * @num_bd max NUM_BD. number of descriptors currently handling ++ * @bd_iram flag indicating the memory location of buffer descriptor + */ + struct sdma_channel { + struct sdma_engine *sdma; +@@ -258,12 +268,16 @@ + unsigned int period_len; + struct sdma_buffer_descriptor *bd; + dma_addr_t bd_phys; ++ bool bd_iram; + unsigned int pc_from_device, pc_to_device; +- unsigned long flags; +- dma_addr_t per_address; ++ unsigned int device_to_device; ++ unsigned int other_script; ++ enum sdma_mode mode; ++ dma_addr_t per_address, per_address2; + unsigned long event_mask[2]; + unsigned long watermark_level; + u32 shp_addr, per_addr; ++ u32 data_addr1, data_addr2; + struct dma_chan chan; + spinlock_t lock; + struct dma_async_tx_descriptor desc; +@@ -271,11 +285,8 @@ + unsigned int chn_count; + unsigned int chn_real_count; + struct tasklet_struct tasklet; +- struct imx_dma_data data; + }; + +-#define IMX_DMA_SG_LOOP BIT(0) +- + #define MAX_DMA_CHANNELS 32 + #define MXC_SDMA_DEFAULT_PRIORITY 1 + #define MXC_SDMA_MIN_PRIORITY 1 +@@ -327,6 +338,7 @@ + spinlock_t channel_0_lock; + u32 script_number; + struct sdma_script_start_addrs *script_addrs; ++ struct gen_pool *iram_pool; + const struct sdma_driver_data *drvdata; + }; + +@@ -546,12 +558,14 @@ + dma_addr_t buf_phys; + int ret; + unsigned long flags; ++ bool use_iram = true; + +- buf_virt = dma_alloc_coherent(NULL, +- size, +- &buf_phys, GFP_KERNEL); ++ buf_virt = gen_pool_dma_alloc(sdma->iram_pool, size, &buf_phys); + if (!buf_virt) { +- return -ENOMEM; ++ use_iram = false; ++ buf_virt = dma_alloc_coherent(NULL, size, &buf_phys, GFP_KERNEL); ++ if (!buf_virt) ++ return -ENOMEM; + } + + spin_lock_irqsave(&sdma->channel_0_lock, flags); +@@ -568,7 +582,10 @@ + + spin_unlock_irqrestore(&sdma->channel_0_lock, flags); + +- dma_free_coherent(NULL, size, buf_virt, buf_phys); ++ if (use_iram) ++ gen_pool_free(sdma->iram_pool, (unsigned long)buf_virt, size); ++ else ++ dma_free_coherent(NULL, size, buf_virt, buf_phys); + + return ret; + } +@@ -654,14 +671,31 @@ + sdmac->desc.callback(sdmac->desc.callback_param); + } + ++static void sdma_handle_other_intr(struct sdma_channel *sdmac) ++{ ++ if (sdmac->desc.callback) ++ sdmac->desc.callback(sdmac->desc.callback_param); ++} ++ + static void sdma_tasklet(unsigned long data) + { + struct sdma_channel *sdmac = (struct sdma_channel *) data; ++ struct sdma_engine *sdma = sdmac->sdma; + +- if (sdmac->flags & IMX_DMA_SG_LOOP) ++ switch (sdmac->mode) { ++ case SDMA_MODE_LOOP: + sdma_handle_channel_loop(sdmac); +- else ++ break; ++ case SDMA_MODE_NORMAL: + mxc_sdma_handle_channel_normal(sdmac); ++ break; ++ case SDMA_MODE_NO_BD: ++ sdma_handle_other_intr(sdmac); ++ break; ++ default: ++ dev_err(sdma->dev, "invalid SDMA MODE!\n"); ++ break; ++ } + } + + static irqreturn_t sdma_int_handler(int irq, void *dev_id) +@@ -678,7 +712,7 @@ + int channel = fls(stat) - 1; + struct sdma_channel *sdmac = &sdma->channel[channel]; + +- if (sdmac->flags & IMX_DMA_SG_LOOP) ++ if (sdmac->mode & SDMA_MODE_LOOP) + sdma_update_channel_loop(sdmac); + + tasklet_schedule(&sdmac->tasklet); +@@ -702,9 +736,12 @@ + * two peripherals or memory-to-memory transfers + */ + int per_2_per = 0, emi_2_emi = 0; ++ int other = 0; + + sdmac->pc_from_device = 0; + sdmac->pc_to_device = 0; ++ sdmac->device_to_device = 0; ++ sdmac->other_script = 0; + + switch (peripheral_type) { + case IMX_DMATYPE_MEMORY: +@@ -733,7 +770,6 @@ + case IMX_DMATYPE_CSPI: + case IMX_DMATYPE_EXT: + case IMX_DMATYPE_SSI: +- case IMX_DMATYPE_SAI: + per_2_emi = sdma->script_addrs->app_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_app_addr; + break; +@@ -751,11 +787,6 @@ + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + break; + case IMX_DMATYPE_ASRC: +- per_2_emi = sdma->script_addrs->asrc_2_mcu_addr; +- emi_2_per = sdma->script_addrs->asrc_2_mcu_addr; +- per_2_per = sdma->script_addrs->per_2_per_addr; +- break; +- case IMX_DMATYPE_ASRC_SP: + per_2_emi = sdma->script_addrs->shp_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + per_2_per = sdma->script_addrs->per_2_per_addr; +@@ -774,12 +805,17 @@ + case IMX_DMATYPE_IPU_MEMORY: + emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr; + break; ++ case IMX_DMATYPE_HDMI: ++ other = sdma->script_addrs->hdmi_dma_addr; ++ break; + default: + break; + } + + sdmac->pc_from_device = per_2_emi; + sdmac->pc_to_device = emi_2_per; ++ sdmac->device_to_device = per_2_per; ++ sdmac->other_script = other; + } + + static int sdma_load_context(struct sdma_channel *sdmac) +@@ -792,11 +828,14 @@ + int ret; + unsigned long flags; + +- if (sdmac->direction == DMA_DEV_TO_MEM) { ++ if (sdmac->direction == DMA_DEV_TO_MEM) + load_address = sdmac->pc_from_device; +- } else { ++ else if (sdmac->direction == DMA_DEV_TO_DEV) ++ load_address = sdmac->device_to_device; ++ else if (sdmac->direction == DMA_MEM_TO_DEV) + load_address = sdmac->pc_to_device; +- } ++ else ++ load_address = sdmac->other_script; + + if (load_address < 0) + return load_address; +@@ -816,11 +855,16 @@ + /* Send by context the event mask,base address for peripheral + * and watermark level + */ +- context->gReg[0] = sdmac->event_mask[1]; +- context->gReg[1] = sdmac->event_mask[0]; +- context->gReg[2] = sdmac->per_addr; +- context->gReg[6] = sdmac->shp_addr; +- context->gReg[7] = sdmac->watermark_level; ++ if (sdmac->peripheral_type == IMX_DMATYPE_HDMI) { ++ context->gReg[4] = sdmac->data_addr1; ++ context->gReg[6] = sdmac->data_addr2; ++ } else { ++ context->gReg[0] = sdmac->event_mask[1]; ++ context->gReg[1] = sdmac->event_mask[0]; ++ context->gReg[2] = sdmac->per_addr; ++ context->gReg[6] = sdmac->shp_addr; ++ context->gReg[7] = sdmac->watermark_level; ++ } + + bd0->mode.command = C0_SETDM; + bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; +@@ -854,6 +898,7 @@ + static int sdma_config_channel(struct dma_chan *chan) + { + struct sdma_channel *sdmac = to_sdma_chan(chan); ++ struct imx_dma_data *data = sdmac->chan.private; + int ret; + + sdma_disable_channel(chan); +@@ -862,12 +907,19 @@ + sdmac->event_mask[1] = 0; + sdmac->shp_addr = 0; + sdmac->per_addr = 0; ++ sdmac->data_addr1 = 0; ++ sdmac->data_addr2 = 0; + +- if (sdmac->event_id0) { ++ if (sdmac->event_id0 >= 0) { + if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id0); + } ++ if (sdmac->event_id1) { ++ if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events) ++ return -EINVAL; ++ sdma_event_enable(sdmac, sdmac->event_id1); ++ } + + switch (sdmac->peripheral_type) { + case IMX_DMATYPE_DSP: +@@ -887,19 +939,75 @@ + (sdmac->peripheral_type != IMX_DMATYPE_DSP)) { + /* Handle multiple event channels differently */ + if (sdmac->event_id1) { +- sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32); +- if (sdmac->event_id1 > 31) +- __set_bit(31, &sdmac->watermark_level); +- sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32); +- if (sdmac->event_id0 > 31) +- __set_bit(30, &sdmac->watermark_level); ++ if (sdmac->event_id0 > 31) { ++ sdmac->event_mask[0] |= 0; ++ __set_bit(28, &sdmac->watermark_level); ++ sdmac->event_mask[1] |= ++ BIT(sdmac->event_id0 % 32); ++ } else { ++ sdmac->event_mask[1] |= 0; ++ sdmac->event_mask[0] |= ++ BIT(sdmac->event_id0 % 32); ++ } ++ if (sdmac->event_id1 > 31) { ++ sdmac->event_mask[0] |= 0; ++ __set_bit(29, &sdmac->watermark_level); ++ sdmac->event_mask[1] |= ++ BIT(sdmac->event_id1 % 32); ++ } else { ++ sdmac->event_mask[1] |= 0; ++ sdmac->event_mask[0] |= ++ BIT(sdmac->event_id1 % 32); ++ } ++ /* BIT 11: ++ * 1 : Source on SPBA ++ * 0 : Source on AIPS ++ */ ++ __set_bit(11, &sdmac->watermark_level); ++ /* BIT 12: ++ * 1 : Destination on SPBA ++ * 0 : Destination on AIPS ++ */ ++ __set_bit(12, &sdmac->watermark_level); ++ __set_bit(31, &sdmac->watermark_level); ++ /* BIT 31: ++ * 1 : Amount of samples to be transferred is ++ * unknown and script will keep on transferring ++ * samples as long as both events are detected ++ * and script must be manually stopped by the ++ * application. ++ * 0 : The amount of samples to be is equal to ++ * the count field of mode word ++ * */ ++ __set_bit(25, &sdmac->watermark_level); ++ __clear_bit(24, &sdmac->watermark_level); + } else { +- __set_bit(sdmac->event_id0, sdmac->event_mask); ++ if (sdmac->event_id0 > 31) { ++ sdmac->event_mask[0] = 0; ++ sdmac->event_mask[1] |= ++ BIT(sdmac->event_id0 % 32); ++ } else { ++ sdmac->event_mask[0] |= ++ BIT(sdmac->event_id0 % 32); ++ sdmac->event_mask[1] = 0; ++ } + } + /* Watermark Level */ + sdmac->watermark_level |= sdmac->watermark_level; + /* Address */ +- sdmac->shp_addr = sdmac->per_address; ++ if (sdmac->direction == DMA_DEV_TO_DEV) { ++ sdmac->shp_addr = sdmac->per_address2; ++ sdmac->per_addr = sdmac->per_address; ++ } else if (sdmac->direction == DMA_TRANS_NONE) { ++ if (sdmac->peripheral_type != IMX_DMATYPE_HDMI || ++ !data->data_addr1 || !data->data_addr2) ++ return -EINVAL; ++ sdmac->data_addr1 = *(u32 *)data->data_addr1; ++ sdmac->data_addr2 = *(u32 *)data->data_addr2; ++ sdmac->watermark_level = 0; ++ } else { ++ sdmac->shp_addr = sdmac->per_address; ++ } + } else { + sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ + } +@@ -931,13 +1039,19 @@ + int channel = sdmac->channel; + int ret = -EBUSY; + +- sdmac->bd = dma_zalloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, +- GFP_KERNEL); ++ sdmac->bd_iram = true; ++ sdmac->bd = gen_pool_dma_alloc(sdma->iram_pool, PAGE_SIZE, &sdmac->bd_phys); + if (!sdmac->bd) { +- ret = -ENOMEM; +- goto out; ++ sdmac->bd_iram = false; ++ sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL); ++ if (!sdmac->bd) { ++ ret = -ENOMEM; ++ goto out; ++ } + } + ++ memset(sdmac->bd, 0, PAGE_SIZE); ++ + sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys; + sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys; + +@@ -987,6 +1101,7 @@ + + sdmac->peripheral_type = data->peripheral_type; + sdmac->event_id0 = data->dma_request; ++ sdmac->event_id1 = data->dma_request2; + + clk_enable(sdmac->sdma->clk_ipg); + clk_enable(sdmac->sdma->clk_ahb); +@@ -1004,6 +1119,9 @@ + /* txd.flags will be overwritten in prep funcs */ + sdmac->desc.flags = DMA_CTRL_ACK; + ++ /* Set SDMA channel mode to unvalid to avoid misconfig */ ++ sdmac->mode = SDMA_MODE_INVALID; ++ + return 0; + } + +@@ -1014,7 +1132,7 @@ + + sdma_disable_channel(chan); + +- if (sdmac->event_id0) ++ if (sdmac->event_id0 >= 0) + sdma_event_disable(sdmac, sdmac->event_id0); + if (sdmac->event_id1) + sdma_event_disable(sdmac, sdmac->event_id1); +@@ -1024,7 +1142,10 @@ + + sdma_set_channel_priority(sdmac, 0); + +- dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys); ++ if (sdmac->bd_iram) ++ gen_pool_free(sdma->iram_pool, (unsigned long)sdmac->bd, PAGE_SIZE); ++ else ++ dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys); + + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); +@@ -1045,7 +1166,7 @@ + return NULL; + sdmac->status = DMA_IN_PROGRESS; + +- sdmac->flags = 0; ++ sdmac->mode = SDMA_MODE_NORMAL; + + sdmac->buf_tail = 0; + +@@ -1134,13 +1255,13 @@ + static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, +- unsigned long flags) ++ unsigned long flags, void *context) + { + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; +- int num_periods = buf_len / period_len; + int channel = sdmac->channel; + int ret, i = 0, buf = 0; ++ int num_periods; + + dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel); + +@@ -1152,12 +1273,33 @@ + sdmac->buf_tail = 0; + sdmac->period_len = period_len; + +- sdmac->flags |= IMX_DMA_SG_LOOP; + sdmac->direction = direction; ++ ++ switch (sdmac->direction) { ++ case DMA_DEV_TO_DEV: ++ sdmac->mode = SDMA_MODE_P2P; ++ break; ++ case DMA_TRANS_NONE: ++ sdmac->mode = SDMA_MODE_NO_BD; ++ break; ++ case DMA_MEM_TO_DEV: ++ case DMA_DEV_TO_MEM: ++ sdmac->mode = SDMA_MODE_LOOP; ++ break; ++ default: ++ dev_err(sdma->dev, "invalid SDMA direction %d\n", direction); ++ return NULL; ++ } ++ + ret = sdma_load_context(sdmac); + if (ret) + goto err_out; + ++ if (period_len) ++ num_periods = buf_len / period_len; ++ else ++ return &sdmac->desc; ++ + if (num_periods > NUM_BD) { + dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n", + channel, num_periods, NUM_BD); +@@ -1216,7 +1358,16 @@ + { + struct sdma_channel *sdmac = to_sdma_chan(chan); + +- if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) { ++ if (dmaengine_cfg->direction == DMA_DEV_TO_DEV) { ++ sdmac->per_address = dmaengine_cfg->src_addr; ++ sdmac->per_address2 = dmaengine_cfg->dst_addr; ++ sdmac->watermark_level = 0; ++ sdmac->watermark_level |= ++ dmaengine_cfg->src_maxburst; ++ sdmac->watermark_level |= ++ dmaengine_cfg->dst_maxburst << 16; ++ sdmac->word_size = dmaengine_cfg->dst_addr_width; ++ } else if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) { + sdmac->per_address = dmaengine_cfg->src_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst * + dmaengine_cfg->src_addr_width; +@@ -1238,7 +1389,7 @@ + struct sdma_channel *sdmac = to_sdma_chan(chan); + u32 residue; + +- if (sdmac->flags & IMX_DMA_SG_LOOP) ++ if (sdmac->mode & SDMA_MODE_LOOP) + residue = (sdmac->num_bd - sdmac->buf_tail) * sdmac->period_len; + else + residue = sdmac->chn_count - sdmac->chn_real_count; +@@ -1286,8 +1437,7 @@ + unsigned short *ram_code; + + if (!fw) { +- dev_info(sdma->dev, "external firmware not found, using ROM firmware\n"); +- /* In this case we just use the ROM firmware. */ ++ dev_err(sdma->dev, "firmware not found\n"); + return; + } + +@@ -1302,7 +1452,10 @@ + goto err_firmware; + switch (header->version_major) { + case 1: +- sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; ++ if (header->version_minor > 0) ++ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2; ++ else ++ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; + break; + case 2: + sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2; +@@ -1337,7 +1490,7 @@ + release_firmware(fw); + } + +-static int sdma_get_firmware(struct sdma_engine *sdma, ++static int __init sdma_get_firmware(struct sdma_engine *sdma, + const char *fw_name) + { + int ret; +@@ -1349,9 +1502,9 @@ + return ret; + } + +-static int sdma_init(struct sdma_engine *sdma) ++static int __init sdma_init(struct sdma_engine *sdma) + { +- int i, ret; ++ int i, ret, ccbsize; + dma_addr_t ccb_phys; + + clk_enable(sdma->clk_ipg); +@@ -1360,14 +1513,17 @@ + /* Be sure SDMA has not started yet */ + writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); + +- sdma->channel_control = dma_alloc_coherent(NULL, +- MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) + +- sizeof(struct sdma_context_data), +- &ccb_phys, GFP_KERNEL); ++ ccbsize = MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) ++ + sizeof(struct sdma_context_data); + ++ sdma->channel_control = gen_pool_dma_alloc(sdma->iram_pool, ccbsize, &ccb_phys); + if (!sdma->channel_control) { +- ret = -ENOMEM; +- goto err_dma_alloc; ++ sdma->channel_control = dma_alloc_coherent(NULL, ccbsize, ++ &ccb_phys, GFP_KERNEL); ++ if (!sdma->channel_control) { ++ ret = -ENOMEM; ++ goto err_dma_alloc; ++ } + } + + sdma->context = (void *)sdma->channel_control + +@@ -1419,14 +1575,12 @@ + + static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param) + { +- struct sdma_channel *sdmac = to_sdma_chan(chan); + struct imx_dma_data *data = fn_param; + + if (!imx_dma_is_general_purpose(chan)) + return false; + +- sdmac->data = *data; +- chan->private = &sdmac->data; ++ chan->private = data; + + return true; + } +@@ -1444,11 +1598,12 @@ + data.dma_request = dma_spec->args[0]; + data.peripheral_type = dma_spec->args[1]; + data.priority = dma_spec->args[2]; ++ data.dma_request2 = 0; + + return dma_request_channel(mask, sdma_filter_fn, &data); + } + +-static int sdma_probe(struct platform_device *pdev) ++static int __init sdma_probe(struct platform_device *pdev) + { + const struct of_device_id *of_id = + of_match_device(sdma_dt_ids, &pdev->dev); +@@ -1547,6 +1702,11 @@ + &sdma->dma_device.channels); + } + ++ if (np) ++ sdma->iram_pool = of_get_named_gen_pool(np, "iram", 0); ++ if (!sdma->iram_pool) ++ dev_warn(&pdev->dev, "no iram assigned, using external mem\n"); ++ + ret = sdma_init(sdma); + if (ret) + goto err_init; +@@ -1583,7 +1743,7 @@ + sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources; + sdma->dma_device.device_tx_status = sdma_tx_status; + sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg; +- sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic; ++ sdma->dma_device.device_prep_dma_cyclic = (void*)sdma_prep_dma_cyclic; + sdma->dma_device.device_config = sdma_config; + sdma->dma_device.device_terminate_all = sdma_disable_channel; + sdma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); +@@ -1594,8 +1754,6 @@ + sdma->dma_device.dev->dma_parms = &sdma->dma_parms; + dma_set_max_seg_size(sdma->dma_device.dev, 65535); + +- platform_set_drvdata(pdev, sdma); +- + ret = dma_async_device_register(&sdma->dma_device); + if (ret) { + dev_err(&pdev->dev, "unable to register\n"); +@@ -1647,10 +1805,13 @@ + }, + .id_table = sdma_devtypes, + .remove = sdma_remove, +- .probe = sdma_probe, + }; + +-module_platform_driver(sdma_driver); ++static int __init sdma_module_init(void) ++{ ++ return platform_driver_probe(&sdma_driver, sdma_probe); ++} ++module_init(sdma_module_init); + + MODULE_AUTHOR("Sascha Hauer, Pengutronix "); + MODULE_DESCRIPTION("i.MX SDMA driver"); +diff -Nur linux-4.1.3/drivers/gpu/drm/Kconfig linux-xbian-imx6/drivers/gpu/drm/Kconfig +--- linux-4.1.3/drivers/gpu/drm/Kconfig 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/gpu/drm/Kconfig 2015-07-27 23:13:04.353449682 +0200 +@@ -217,3 +217,5 @@ + source "drivers/gpu/drm/amd/amdkfd/Kconfig" + + source "drivers/gpu/drm/imx/Kconfig" ++ ++source "drivers/gpu/drm/vivante/Kconfig" +diff -Nur linux-4.1.3/drivers/gpu/drm/Makefile linux-xbian-imx6/drivers/gpu/drm/Makefile +--- linux-4.1.3/drivers/gpu/drm/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/gpu/drm/Makefile 2015-07-27 23:13:04.353449682 +0200 +@@ -67,6 +67,7 @@ + obj-$(CONFIG_DRM_TEGRA) += tegra/ + obj-$(CONFIG_DRM_STI) += sti/ + obj-$(CONFIG_DRM_IMX) += imx/ ++obj-$(CONFIG_DRM_VIVANTE) += vivante/ + obj-y += i2c/ + obj-y += panel/ + obj-y += bridge/ +diff -Nur linux-4.1.3/drivers/gpu/drm/vivante/Kconfig linux-xbian-imx6/drivers/gpu/drm/vivante/Kconfig +--- linux-4.1.3/drivers/gpu/drm/vivante/Kconfig 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/gpu/drm/vivante/Kconfig 2015-07-27 23:13:04.744056042 +0200 +@@ -0,0 +1,6 @@ ++config DRM_VIVANTE ++ tristate "Vivante GCCore" ++ depends on DRM ++ help ++ Choose this option if you have a Vivante graphics card. ++ If M is selected, the module will be called vivante. +diff -Nur linux-4.1.3/drivers/gpu/drm/vivante/Makefile linux-xbian-imx6/drivers/gpu/drm/vivante/Makefile +--- linux-4.1.3/drivers/gpu/drm/vivante/Makefile 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/gpu/drm/vivante/Makefile 2015-07-27 23:13:04.744056042 +0200 +@@ -0,0 +1,29 @@ ++############################################################################## ++# ++# Copyright (C) 2005 - 2013 by Vivante Corp. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the license, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not write to the Free Software ++# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++# ++############################################################################## ++ ++ ++# ++# Makefile for the drm device driver. This driver provides support for the ++# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. ++ ++ccflags-y := -Iinclude/drm ++vivante-y := vivante_drv.o ++ ++obj-$(CONFIG_DRM_VIVANTE) += vivante.o +diff -Nur linux-4.1.3/drivers/gpu/drm/vivante/vivante_drv.c linux-xbian-imx6/drivers/gpu/drm/vivante/vivante_drv.c +--- linux-4.1.3/drivers/gpu/drm/vivante/vivante_drv.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/gpu/drm/vivante/vivante_drv.c 2015-07-27 23:13:04.744056042 +0200 +@@ -0,0 +1,112 @@ ++/**************************************************************************** ++* ++* Copyright (C) 2005 - 2013 by Vivante Corp. ++* ++* This program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2 of the license, or ++* (at your option) any later version. ++* ++* This program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with this program; if not write to the Free Software ++* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++* ++*****************************************************************************/ ++ ++ ++/* vivante_drv.c -- vivante driver -*- linux-c -*- ++ * ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Rickard E. (Rik) Faith ++ * Daryll Strauss ++ * Gareth Hughes ++ */ ++ ++#include ++#include ++ ++#include "drmP.h" ++#include "vivante_drv.h" ++ ++#include "drm_pciids.h" ++ ++static char platformdevicename[] = "Vivante GCCore"; ++static struct platform_device *pplatformdev; ++ ++static const struct file_operations viv_driver_fops = { ++ .owner = THIS_MODULE, ++ .open = drm_open, ++ .release = drm_release, ++ .unlocked_ioctl = drm_ioctl, ++ .mmap = drm_legacy_mmap, ++ .poll = drm_poll, ++ .llseek = noop_llseek, ++}; ++ ++static struct drm_driver driver = { ++// .driver_features = DRIVER_RENDER, ++ .fops = &viv_driver_fops, ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0) ++ .set_busid = drm_platform_set_busid, ++#endif ++ .name = DRIVER_NAME, ++ .desc = DRIVER_DESC, ++ .date = DRIVER_DATE, ++ .major = DRIVER_MAJOR, ++ .minor = DRIVER_MINOR, ++ .patchlevel = DRIVER_PATCHLEVEL, ++}; ++ ++static int __init vivante_init(void) ++{ ++ int retcode; ++ ++ pplatformdev = platform_device_register_simple(platformdevicename, ++ -1, NULL, 0); ++ if (pplatformdev == NULL) ++ printk(KERN_ERR"Platform device is null\n"); ++ ++ retcode = drm_platform_init(&driver, pplatformdev); ++ ++ return retcode; ++} ++ ++static void __exit vivante_exit(void) ++{ ++ if (pplatformdev) { ++ platform_device_unregister(pplatformdev); ++ pplatformdev = NULL; ++ } ++} ++ ++module_init(vivante_init); ++module_exit(vivante_exit); ++ ++MODULE_AUTHOR(DRIVER_AUTHOR); ++MODULE_DESCRIPTION(DRIVER_DESC); ++MODULE_LICENSE("GPL and additional rights"); +diff -Nur linux-4.1.3/drivers/gpu/drm/vivante/vivante_drv.h linux-xbian-imx6/drivers/gpu/drm/vivante/vivante_drv.h +--- linux-4.1.3/drivers/gpu/drm/vivante/vivante_drv.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/gpu/drm/vivante/vivante_drv.h 2015-07-27 23:13:04.744056042 +0200 +@@ -0,0 +1,69 @@ ++/**************************************************************************** ++* ++* Copyright (C) 2005 - 2013 by Vivante Corp. ++* ++* This program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2 of the license, or ++* (at your option) any later version. ++* ++* This program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with this program; if not write to the Free Software ++* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++* ++*****************************************************************************/ ++ ++ ++/* vivante_drv.h -- Vivante DRM template customization -*- linux-c -*- ++ * Created: Wed Feb 14 12:32:32 2012 by John Zhao ++ */ ++/* ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Gareth Hughes ++ */ ++ ++#ifndef __VIVANTE_DRV_H__ ++#define __VIVANTE_DRV_H__ ++ ++/* General customization: ++ */ ++ ++#include ++#include ++ ++#define DRIVER_AUTHOR "Vivante Inc." ++ ++#define DRIVER_NAME "vivante" ++#define DRIVER_DESC "Vivante GCCore" ++#define DRIVER_DATE "20120216" ++ ++#define DRIVER_MAJOR 1 ++#define DRIVER_MINOR 0 ++#define DRIVER_PATCHLEVEL 0 ++ ++#endif +diff -Nur linux-4.1.3/drivers/Kconfig linux-xbian-imx6/drivers/Kconfig +--- linux-4.1.3/drivers/Kconfig 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/Kconfig 2015-07-27 23:13:03.699781896 +0200 +@@ -182,4 +182,6 @@ + + source "drivers/android/Kconfig" + ++source "drivers/mxc/Kconfig" ++ + endmenu +diff -Nur linux-4.1.3/drivers/Makefile linux-xbian-imx6/drivers/Makefile +--- linux-4.1.3/drivers/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/Makefile 2015-07-27 23:13:03.699781896 +0200 +@@ -165,3 +165,4 @@ + obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ + obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ + obj-$(CONFIG_ANDROID) += android/ ++obj-y += mxc/ +diff -Nur linux-4.1.3/drivers/mfd/Kconfig linux-xbian-imx6/drivers/mfd/Kconfig +--- linux-4.1.3/drivers/mfd/Kconfig 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/mfd/Kconfig 2015-07-27 23:13:06.023491162 +0200 +@@ -207,6 +207,13 @@ + Additional drivers must be enabled in order to use the specific + features of the device. + ++config MFD_MXC_HDMI ++ tristate "Freescale HDMI Core" ++ select MFD_CORE ++ help ++ This is the core driver for the Freescale i.MX6 on-chip HDMI. ++ This MFD driver connects with the video and audio drivers for HDMI. ++ + config MFD_DLN2 + tristate "Diolan DLN2 support" + select MFD_CORE +diff -Nur linux-4.1.3/drivers/mfd/Makefile linux-xbian-imx6/drivers/mfd/Makefile +--- linux-4.1.3/drivers/mfd/Makefile 2015-07-21 19:10:33.000000000 +0200 ++++ linux-xbian-imx6/drivers/mfd/Makefile 2015-07-27 23:13:06.023491162 +0200 +@@ -185,3 +185,4 @@ + intel-soc-pmic-objs := intel_soc_pmic_core.o intel_soc_pmic_crc.o + obj-$(CONFIG_INTEL_SOC_PMIC) += intel-soc-pmic.o + obj-$(CONFIG_MFD_MT6397) += mt6397-core.o ++obj-$(CONFIG_MFD_MXC_HDMI) += mxc-hdmi-core.o +diff -Nur linux-4.1.3/drivers/mfd/mxc-hdmi-core.c linux-xbian-imx6/drivers/mfd/mxc-hdmi-core.c +--- linux-4.1.3/drivers/mfd/mxc-hdmi-core.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-xbian-imx6/drivers/mfd/mxc-hdmi-core.c 2015-07-27 23:13:06.051391618 +0200 +@@ -0,0 +1,723 @@ ++/* ++ * Copyright (C) 2011-2014 Freescale Semiconductor, Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include