# compiler/nativeGen/X86/CodeGen.hs

639 | | otherwise -> trivialUFCode FF80 (GNEG FF80) x | 639 | | otherwise -> trivialUFCode FF80 (GNEG FF80) x | ||

641 | MO_S_Neg w -> triv_ucode NEGI (intFormat w) | 641 | MO_S_Neg w -> triv_ucode NEGI (intFormat w) | ||

642 | MO_Not w -> triv_ucode NOT (intFormat w) | 642 | MO_Not w -> triv_ucode NOT (intFormat w) | ||

644 | -- Nop conversions | 644 | -- Nop conversions | ||

645 | MO_UU_Conv W32 W8 -> toI8Reg W32 x | 645 | MO_UU_Conv W32 W8 -> toI8Reg W32 x | ||

646 | MO_SS_Conv W32 W8 -> toI8Reg W32 x | 646 | MO_SS_Conv W32 W8 -> toI8Reg W32 x | ||

647 | MO_XX_Conv W32 W8 -> toI8Reg W32 x | ||||

647 | MO_UU_Conv W16 W8 -> toI8Reg W16 x | 648 | MO_UU_Conv W16 W8 -> toI8Reg W16 x | ||

648 | MO_SS_Conv W16 W8 -> toI8Reg W16 x | 649 | MO_SS_Conv W16 W8 -> toI8Reg W16 x | ||

650 | MO_XX_Conv W16 W8 -> toI8Reg W16 x | ||||

649 | MO_UU_Conv W32 W16 -> toI16Reg W32 x | 651 | MO_UU_Conv W32 W16 -> toI16Reg W32 x | ||

650 | MO_SS_Conv W32 W16 -> toI16Reg W32 x | 652 | MO_SS_Conv W32 W16 -> toI16Reg W32 x | ||

653 | MO_XX_Conv W32 W16 -> toI16Reg W32 x | ||||

652 | MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x | 655 | MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x | ||

653 | MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x | 656 | MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x | ||

657 | MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x | ||||

654 | MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x | 658 | MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x | ||

655 | MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x | 659 | MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x | ||

660 | MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x | ||||

656 | MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x | 661 | MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x | ||

657 | MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x | 662 | MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x | ||

663 | MO_XX_Conv W64 W8 | not is32Bit -> toI8Reg W64 x | ||||

659 | MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x | 665 | MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x | ||

660 | MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x | 666 | MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x | ||

667 | MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x | ||||

662 | -- widenings | 669 | -- widenings | ||

663 | MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x | 670 | MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x | ||

664 | MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x | 671 | MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x | ||

665 | MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x | 672 | MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x | ||

667 | MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x | 674 | MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x | ||

668 | MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x | 675 | MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x | ||

669 | MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x | 676 | MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x | ||

678 | -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. | ||||

679 | MO_XX_Conv W8 W32 -> integerExtend W8 W32 MOV x | ||||

680 | MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x | ||||

681 | MO_XX_Conv W8 W16 -> integerExtend W8 W16 MOV x | ||||

671 | MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x | 683 | MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x | ||

672 | MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x | 684 | MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x | ||

673 | MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x | 685 | MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x | ||

674 | MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x | 686 | MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x | ||

675 | MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x | 687 | MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x | ||

676 | MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x | 688 | MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x | ||

677 | -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. | 689 | -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl. | ||

678 | -- However, we don't want the register allocator to throw it | 690 | -- However, we don't want the register allocator to throw it | ||

679 | -- away as an unnecessary reg-to-reg move, so we keep it in | 691 | -- away as an unnecessary reg-to-reg move, so we keep it in | ||

680 | -- the form of a movzl and print it as a movl later. | 692 | -- the form of a movzl and print it as a movl later. | ||

693 | -- This doesn't apply to MO_XX_Conv since in this case we don't care about | ||||

694 | -- the upper bits. So we can just use MOV. | ||||

695 | MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x | ||||

696 | MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x | ||||

697 | MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x | ||||

682 | MO_FF_Conv W32 W64 | 699 | MO_FF_Conv W32 W64 | ||

683 | | sse2 -> coerceFP2FP W64 x | 700 | | sse2 -> coerceFP2FP W64 x | ||

684 | | otherwise -> conversionNop FF80 x | 701 | | otherwise -> conversionNop FF80 x | ||

686 | MO_FF_Conv W64 W32 -> coerceFP2FP W32 x | 703 | MO_FF_Conv W64 W32 -> coerceFP2FP W32 x | ||

688 | MO_FS_Conv from to -> coerceFP2Int from to x | 705 | MO_FS_Conv from to -> coerceFP2Int from to x | ||

781 | MO_S_Quot rep -> div_code rep True True x y | 798 | MO_S_Quot rep -> div_code rep True True x y | ||

782 | MO_S_Rem rep -> div_code rep True False x y | 799 | MO_S_Rem rep -> div_code rep True False x y | ||

783 | MO_U_Quot rep -> div_code rep False True x y | 800 | MO_U_Quot rep -> div_code rep False True x y | ||

784 | MO_U_Rem rep -> div_code rep False False x y | 801 | MO_U_Rem rep -> div_code rep False False x y | ||

786 | MO_S_MulMayOflo rep -> imulMayOflo rep x y | 803 | MO_S_MulMayOflo rep -> imulMayOflo rep x y | ||

805 | MO_Mul W8 -> imulW8 x y | ||||

788 | MO_Mul rep -> triv_op rep IMUL | 806 | MO_Mul rep -> triv_op rep IMUL | ||

789 | MO_And rep -> triv_op rep AND | 807 | MO_And rep -> triv_op rep AND | ||

790 | MO_Or rep -> triv_op rep OR | 808 | MO_Or rep -> triv_op rep OR | ||

791 | MO_Xor rep -> triv_op rep XOR | 809 | MO_Xor rep -> triv_op rep XOR | ||

793 | {- Shift ops on x86s have constraints on their source, it | 811 | {- Shift ops on x86s have constraints on their source, it | ||

794 | either has to be Imm, CL or 1 | 812 | either has to be Imm, CL or 1 | ||

795 | => trivialCode is not restrictive enough (sigh.) | 813 | => trivialCode is not restrictive enough (sigh.) | ||

815 | MO_VF_Neg {} -> needLlvm | 833 | MO_VF_Neg {} -> needLlvm | ||

817 | _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) | 835 | _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) | ||

818 | where | 836 | where | ||

819 | -------------------- | 837 | -------------------- | ||

820 | triv_op width instr = trivialCode width op (Just op) x y | 838 | triv_op width instr = trivialCode width op (Just op) x y | ||

821 | where op = instr (intFormat width) | 839 | where op = instr (intFormat width) | ||

841 | -- Special case for IMUL for bytes, since the result of IMULB will be in | ||||

842 | -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider | ||||

843 | -- values. | ||||

844 | imulW8 :: CmmExpr -> CmmExpr -> NatM Register | ||||

845 | imulW8 arg_a arg_b = do | ||||

846 | (a_reg, a_code) <- getNonClobberedReg arg_a | ||||

847 | b_code <- getAnyReg arg_b | ||||

849 | let code = a_code `appOL` b_code eax `appOL` | ||||

850 | toOL [ IMUL2 format (OpReg a_reg) ] | ||||

851 | format = intFormat W8 | ||||

852 | | ||||

853 | return (Fixed format eax code) | ||||

854 | | ||||

855 | | ||||

823 | imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register | 856 | imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register | ||

824 | imulMayOflo rep a b = do | 857 | imulMayOflo rep a b = do | ||

825 | (a_reg, a_code) <- getNonClobberedReg a | 858 | (a_reg, a_code) <- getNonClobberedReg a | ||

826 | b_code <- getAnyReg b | 859 | b_code <- getAnyReg b | ||

827 | let | 860 | let | ||

828 | shift_amt = case rep of | 861 | shift_amt = case rep of | ||

829 | W32 -> 31 | 862 | W32 -> 31 | ||

830 | W64 -> 63 | 863 | W64 -> 63 | ||

909 | = x_code `snocOL` | 942 | = x_code `snocOL` | ||

910 | LEA format | 943 | LEA format | ||

911 | (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) | 944 | (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) | ||

912 | (OpReg dst) | 945 | (OpReg dst) | ||

913 | -- | 946 | -- | ||

914 | return (Any format code) | 947 | return (Any format code) | ||

916 | ---------------------- | 949 | ---------------------- | ||

951 | -- See Note [DIV/IDIV for bytes] | ||||

952 | div_code W8 signed quotient x y = do | ||||

953 | let widen | signed = MO_SS_Conv W8 W16 | ||||

954 | | otherwise = MO_UU_Conv W8 W16 | ||||

955 | div_code | ||||

956 | W16 | ||||

957 | signed | ||||

958 | quotient | ||||

959 | (CmmMachOp widen [x]) | ||||

960 | (CmmMachOp widen [y]) | ||||

961 | | ||||

917 | div_code width signed quotient x y = do | 962 | div_code width signed quotient x y = do | ||

918 | (y_op, y_code) <- getRegOrMem y -- cannot be clobbered | 963 | (y_op, y_code) <- getRegOrMem y -- cannot be clobbered | ||

919 | x_code <- getAnyReg x | 964 | x_code <- getAnyReg x | ||

920 | let | 965 | let | ||

921 | format = intFormat width | 966 | format = intFormat width | ||

922 | widen | signed = CLTD format | 967 | widen | signed = CLTD format | ||

923 | | otherwise = XOR format (OpReg edx) (OpReg edx) | 968 | | otherwise = XOR format (OpReg edx) (OpReg edx) | ||

2260 | where divOp1 platform signed width results [arg_x, arg_y] | 2305 | where divOp1 platform signed width results [arg_x, arg_y] | ||

2261 | = divOp platform signed width results Nothing arg_x arg_y | 2306 | = divOp platform signed width results Nothing arg_x arg_y | ||

2262 | divOp1 _ _ _ _ _ | 2307 | divOp1 _ _ _ _ _ | ||

2263 | = panic "genCCall: Wrong number of arguments for divOp1" | 2308 | = panic "genCCall: Wrong number of arguments for divOp1" | ||

2264 | divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y] | 2309 | divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y] | ||

2265 | = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y | 2310 | = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y | ||

2266 | divOp2 _ _ _ _ _ | 2311 | divOp2 _ _ _ _ _ | ||

2267 | = panic "genCCall: Wrong number of arguments for divOp2" | 2312 | = panic "genCCall: Wrong number of arguments for divOp2" | ||

2314 | -- See Note [DIV/IDIV for bytes] | ||||

2315 | divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y = | ||||

2316 | let widen | signed = MO_SS_Conv W8 W16 | ||||

2317 | | otherwise = MO_UU_Conv W8 W16 | ||||

2318 | arg_x_low_16 = CmmMachOp widen [arg_x_low] | ||||

2319 | arg_y_16 = CmmMachOp widen [arg_y] | ||||

2320 | m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high | ||||

2321 | in divOp | ||||

2322 | platform signed W16 [res_q, res_r] | ||||

2323 | m_arg_x_high_16 arg_x_low_16 arg_y_16 | ||||

2324 | | ||||

2268 | divOp platform signed width [res_q, res_r] | 2325 | divOp platform signed width [res_q, res_r] | ||

2269 | m_arg_x_high arg_x_low arg_y | 2326 | m_arg_x_high arg_x_low arg_y | ||

2270 | = do let format = intFormat width | 2327 | = do let format = intFormat width | ||

2271 | reg_q = getRegisterReg platform True (CmmLocal res_q) | 2328 | reg_q = getRegisterReg platform True (CmmLocal res_q) | ||

2272 | reg_r = getRegisterReg platform True (CmmLocal res_r) | 2329 | reg_r = getRegisterReg platform True (CmmLocal res_r) | ||

2273 | widen | signed = CLTD format | 2330 | widen | signed = CLTD format | ||

2274 | | otherwise = XOR format (OpReg rdx) (OpReg rdx) | 2331 | | otherwise = XOR format (OpReg rdx) (OpReg rdx) | ||

2275 | instr | signed = IDIV | 2332 | instr | signed = IDIV | ||

2301 | code = rCode reg_r `snocOL` | 2358 | code = rCode reg_r `snocOL` | ||

2302 | SETCC cond (OpReg reg_tmp) `snocOL` | 2359 | SETCC cond (OpReg reg_tmp) `snocOL` | ||

2303 | MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) | 2360 | MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) | ||

2305 | return code | 2362 | return code | ||

2306 | addSubIntC _ _ _ _ _ _ _ _ | 2363 | addSubIntC _ _ _ _ _ _ _ _ | ||

2307 | = panic "genCCall: Wrong number of arguments/results for addSubIntC" | 2364 | = panic "genCCall: Wrong number of arguments/results for addSubIntC" | ||

2366 | -- Note [DIV/IDIV for bytes] | ||||

2367 | -- | ||||

2368 | -- IDIV reminder: | ||||

2369 | -- Size Dividend Divisor Quotient Remainder | ||||

2370 | -- byte %ax r/m8 %al %ah | ||||

2371 | -- word %dx:%ax r/m16 %ax %dx | ||||

2372 | -- dword %edx:%eax r/m32 %eax %edx | ||||

2373 | -- qword %rdx:%rax r/m64 %rax %rdx | ||||

2374 | -- | ||||

2375 | -- We do a special case for the byte division because the current | ||||

2376 | -- codegen doesn't deal well with accessing %ah register (also, | ||||

2377 | -- accessing %ah in 64-bit mode is complicated because it cannot be an | ||||

2378 | -- operand of many instructions). So we just widen operands to 16 bits | ||||

2379 | -- and get the results from %al, %dl. This is not optimal, but a few | ||||

2380 | -- register moves are probably not a huge deal when doing division. | ||||

2381 | | ||||

2309 | genCCall32' :: DynFlags | 2382 | genCCall32' :: DynFlags | ||

2310 | -> ForeignTarget -- function to call | 2383 | -> ForeignTarget -- function to call | ||

2311 | -> [CmmFormal] -- where to put the result | 2384 | -> [CmmFormal] -- where to put the result | ||

2312 | -> [CmmActual] -- arguments (of mixed type) | 2385 | -> [CmmActual] -- arguments (of mixed type) | ||

2313 | -> NatM InstrBlock | 2386 | -> NatM InstrBlock | ||

2314 | genCCall32' dflags target dest_regs args = do | 2387 | genCCall32' dflags target dest_regs args = do | ||

2315 | let | 2388 | let | ||

2316 | prom_args = map (maybePromoteCArg dflags W32) args | 2389 | prom_args = map (maybePromoteCArg dflags W32) args | ||

2444 | in | 2517 | in | ||

2445 | if use_sse2 | 2518 | if use_sse2 | ||

2446 | then MOV format (OpReg reg) (OpAddr addr) | 2519 | then MOV format (OpReg reg) (OpAddr addr) | ||

2447 | else GST format reg addr | 2520 | else GST format reg addr | ||

2448 | ] | 2521 | ] | ||

2449 | ) | 2522 | ) | ||

2450 | 2523 | | |||

2451 | | otherwise = do | 2524 | | otherwise = do | ||

2525 | -- Arguments can be smaller than 32-bit, but we still use @PUSH | ||||

2526 | -- II32@ - the usual calling conventions expect integers to be | ||||

2527 | -- 4-byte aligned. | ||||

2528 | ASSERT((typeWidth arg_ty) <= W32) return () | ||||

2452 | (operand, code) <- getOperand arg | 2529 | (operand, code) <- getOperand arg | ||

2453 | delta <- getDeltaNat | 2530 | delta <- getDeltaNat | ||

2454 | setDeltaNat (delta-size) | 2531 | setDeltaNat (delta-size) | ||

2455 | return (code `snocOL` | 2532 | return (code `snocOL` | ||

2456 | PUSH II32 operand `snocOL` | 2533 | PUSH II32 operand `snocOL` | ||

2457 | DELTA (delta-size)) | 2534 | DELTA (delta-size)) | ||

2459 | where | 2536 | where | ||

2683 | setDeltaNat (delta-arg_size) | 2760 | setDeltaNat (delta-arg_size) | ||

2684 | let code' = code `appOL` arg_code `appOL` toOL [ | 2761 | let code' = code `appOL` arg_code `appOL` toOL [ | ||

2685 | SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp), | 2762 | SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp), | ||

2686 | DELTA (delta-arg_size), | 2763 | DELTA (delta-arg_size), | ||

2687 | MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))] | 2764 | MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))] | ||

2688 | push_args rest code' | 2765 | push_args rest code' | ||

2689 | 2766 | | |||

2690 | | otherwise = do | 2767 | | otherwise = do | ||

2691 | ASSERT(width == W64) return () | 2768 | -- Arguments can be smaller than 64-bit, but we still use @PUSH | ||

2769 | -- II64@ - the usual calling conventions expect integers to be | ||||

2770 | -- 8-byte aligned. | ||||

2771 | ASSERT(width <= W64) return () | ||||

2692 | (arg_op, arg_code) <- getOperand arg | 2772 | (arg_op, arg_code) <- getOperand arg | ||

2693 | delta <- getDeltaNat | 2773 | delta <- getDeltaNat | ||

2694 | setDeltaNat (delta-arg_size) | 2774 | setDeltaNat (delta-arg_size) | ||

2695 | let code' = code `appOL` arg_code `appOL` toOL [ | 2775 | let code' = code `appOL` arg_code `appOL` toOL [ | ||

2696 | PUSH II64 arg_op, | 2776 | PUSH II64 arg_op, | ||

2697 | DELTA (delta-arg_size)] | 2777 | DELTA (delta-arg_size)] | ||

2698 | push_args rest code' | 2778 | push_args rest code' | ||

2699 | where | 2779 | where | ||

