# Changeset View

# Standalone View

# compiler/cmm/CmmExpr.hs

1 | {-# LANGUAGE BangPatterns #-} | 1 | {-# LANGUAGE BangPatterns #-} | ||
---|---|---|---|---|---|

2 | {-# LANGUAGE FlexibleContexts #-} | 2 | {-# LANGUAGE FlexibleContexts #-} | ||

3 | {-# LANGUAGE FlexibleInstances #-} | 3 | {-# LANGUAGE FlexibleInstances #-} | ||

4 | {-# LANGUAGE MultiParamTypeClasses #-} | 4 | {-# LANGUAGE MultiParamTypeClasses #-} | ||

5 | {-# LANGUAGE UndecidableInstances #-} | 5 | {-# LANGUAGE UndecidableInstances #-} | ||

6 | 6 | | |||

7 | module CmmExpr | 7 | module CmmExpr | ||

8 | ( CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr | 8 | ( CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr | ||

9 | , CmmReg(..), cmmRegType, cmmRegWidth | 9 | , CmmReg(..), cmmRegType, cmmRegWidth | ||

10 | , CmmLit(..), cmmLitType | 10 | , CmmLit(..), cmmLitType | ||

11 | , LocalReg(..), localRegType | 11 | , LocalReg(..), localRegType | ||

12 | , GlobalReg(..), isArgReg, globalRegType | 12 | , GlobalReg(..), isArgReg, globalRegType | ||

13 | , spReg, hpReg, spLimReg, hpLimReg, nodeReg | 13 | , spReg, hpReg, spLimReg, hpLimReg, nodeReg | ||

14 | , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg | 14 | , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg | ||

15 | , node, baseReg | 15 | , node, baseReg | ||

16 | , VGcPtr(..) | 16 | , VGcPtr(..) | ||

17 | , GlobalVecRegTy(..) | ||||

17 | 18 | | |||

18 | , DefinerOfRegs, UserOfRegs | 19 | , DefinerOfRegs, UserOfRegs | ||

19 | , foldRegsDefd, foldRegsUsed | 20 | , foldRegsDefd, foldRegsUsed | ||

20 | , foldLocalRegsDefd, foldLocalRegsUsed | 21 | , foldLocalRegsDefd, foldLocalRegsUsed | ||

21 | 22 | | |||

22 | , RegSet, LocalRegSet, GlobalRegSet | 23 | , RegSet, LocalRegSet, GlobalRegSet | ||

23 | , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet | 24 | , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet | ||

24 | , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet | 25 | , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet | ||

Show All 11 Lines | |||||

36 | import CLabel | 37 | import CLabel | ||

37 | import CmmMachOp | 38 | import CmmMachOp | ||

38 | import CmmType | 39 | import CmmType | ||

39 | import DynFlags | 40 | import DynFlags | ||

40 | import Outputable (panic) | 41 | import Outputable (panic) | ||

41 | import Unique | 42 | import Unique | ||

42 | 43 | | |||

43 | import Data.Set (Set) | 44 | import Data.Set (Set) | ||

45 | import Data.Monoid ((<>)) | ||||

44 | import qualified Data.Set as Set | 46 | import qualified Data.Set as Set | ||

45 | 47 | | |||

46 | ----------------------------------------------------------------------------- | 48 | ----------------------------------------------------------------------------- | ||

47 | -- CmmExpr | 49 | -- CmmExpr | ||

48 | -- An expression. Expressions have no side effects. | 50 | -- An expression. Expressions have no side effects. | ||

49 | ----------------------------------------------------------------------------- | 51 | ----------------------------------------------------------------------------- | ||

50 | 52 | | |||

51 | data CmmExpr | 53 | data CmmExpr | ||

▲ Show 20 Lines • Show All 326 Lines • ▼ Show 20 Line(s) | |||||

378 | 380 | | |||

379 | data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) | 381 | data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) | ||

380 | 382 | | |||

381 | ----------------------------------------------------------------------------- | 383 | ----------------------------------------------------------------------------- | ||

382 | -- Global STG registers | 384 | -- Global STG registers | ||

383 | ----------------------------------------------------------------------------- | 385 | ----------------------------------------------------------------------------- | ||

384 | {- | 386 | {- | ||

385 | Note [Overlapping global registers] | 387 | Note [Overlapping global registers] | ||

388 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||

386 | 389 | | |||

387 | The backend might not faithfully implement the abstraction of the STG | 390 | The backend might not faithfully implement the abstraction of the STG | ||

388 | machine with independent registers for different values of type | 391 | machine with independent registers for different values of type | ||

389 | GlobalReg. Specifically, certain pairs of registers (r1, r2) may | 392 | GlobalReg. Specifically, certain pairs of registers (r1, r2) may | ||

390 | overlap in the sense that a store to r1 invalidates the value in r2, | 393 | overlap in the sense that a store to r1 invalidates the value in r2, | ||

391 | and vice versa. | 394 | and vice versa. | ||

392 | 395 | | |||

393 | Currently this occurs only on the x86_64 architecture where FloatReg n | 396 | Currently this occurs only on the x86_64 architecture where FloatReg n | ||

394 | and DoubleReg n are assigned the same microarchitectural register, in | 397 | and DoubleReg n are assigned the same microarchitectural register, in | ||

395 | order to allow functions to receive more Float# or Double# arguments | 398 | order to allow functions to receive more Float# or Double# arguments | ||

396 | in registers (as opposed to on the stack). | 399 | in registers (as opposed to on the stack). | ||

397 | 400 | | |||

398 | There are no specific rules about which registers might overlap with | 401 | There are no specific rules about which registers might overlap with | ||

399 | which other registers, but presumably it's safe to assume that nothing | 402 | which other registers, but presumably it's safe to assume that nothing | ||

400 | will overlap with special registers like Sp or BaseReg. | 403 | will overlap with special registers like Sp or BaseReg. | ||

401 | 404 | | |||

402 | Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap | 405 | Use CmmUtils.regsOverlap to determine whether two GlobalRegs overlap | ||

403 | on a particular platform. The instance Eq GlobalReg is syntactic | 406 | on a particular platform. The instance Eq GlobalReg is syntactic | ||

404 | equality of STG registers and does not take overlap into | 407 | equality of STG registers and does not take overlap into | ||

405 | account. However it is still used in UserOfRegs/DefinerOfRegs and | 408 | account. However it is still used in UserOfRegs/DefinerOfRegs and | ||

406 | there are likely still bugs there, beware! | 409 | there are likely still bugs there, beware! | ||

410 | | ||||

411 | | ||||

412 | Note [SIMD registers] | ||||

413 | ~~~~~~~~~~~~~~~~~~~~~ | ||||

414 | | ||||

415 | GHC's treatment of SIMD registers is heavily modelled after the x86_64 | ||||

416 | architecture. Namely we have 128- (XMM), 256- (YMM), and 512-bit (ZMM) | ||||

417 | registers. Furthermore, we treat each possible format in these registers as a | ||||

418 | distinct register which overlaps with the others. For instance, we XMM1 as a | ||||

419 | 2xI64 register is distinct from but overlaps with (in the sense defined in Note | ||||

420 | [Overlapping global registers]) its use as a 4xI32 register. | ||||

421 | | ||||

422 | This model makes it easier to fit SIMD registers into the NCG, which generally | ||||

423 | expects that each global register has a single, known CmmType. | ||||

424 | | ||||

425 | In the future we could consider further refactoring this to eliminate the | ||||

426 | XMM, YMM, and ZMM register names (which are quite x86-specific) and instead just | ||||

427 | having a set of NxM-bit vector registers (e.g. Vec2x64A, Vec2x64B, ..., | ||||

428 | Vec4x32A, ..., Vec4x64A). | ||||

429 | | ||||

407 | -} | 430 | -} | ||

408 | 431 | | |||

409 | data GlobalReg | 432 | data GlobalReg | ||

410 | -- Argument and return registers | 433 | -- Argument and return registers | ||

411 | = VanillaReg -- pointers, unboxed ints and chars | 434 | = VanillaReg -- pointers, unboxed ints and chars | ||

412 | {-# UNPACK #-} !Int -- its number | 435 | {-# UNPACK #-} !Int -- its number | ||

413 | VGcPtr | 436 | VGcPtr | ||

414 | 437 | | |||

415 | | FloatReg -- single-precision floating-point registers | 438 | | FloatReg -- single-precision floating-point registers | ||

416 | {-# UNPACK #-} !Int -- its number | 439 | {-# UNPACK #-} !Int -- its number | ||

417 | 440 | | |||

418 | | DoubleReg -- double-precision floating-point registers | 441 | | DoubleReg -- double-precision floating-point registers | ||

419 | {-# UNPACK #-} !Int -- its number | 442 | {-# UNPACK #-} !Int -- its number | ||

420 | 443 | | |||

421 | | LongReg -- long int registers (64-bit, really) | 444 | | LongReg -- long int registers (64-bit, really) | ||

422 | {-# UNPACK #-} !Int -- its number | 445 | {-# UNPACK #-} !Int -- its number | ||

423 | 446 | | |||

424 | | XmmReg -- 128-bit SIMD vector register | 447 | | XmmReg -- 128-bit SIMD vector register | ||

425 | {-# UNPACK #-} !Int -- its number | 448 | {-# UNPACK #-} !Int -- its number | ||

449 | !Length !Width !GlobalVecRegTy | ||||

carter: same issue applies to YMM and ZMM,
after all, XMM registers are just the lower 128 bits of… | |||||

As I was experimenting with this representation, I wanted to keep the surface area of the experiment as small as possible. But yes I will eventually extend this to YMM and ZMM registers. Abhiroop: As I was experimenting with this representation, I wanted to keep the surface area of the… | |||||

426 | 450 | | |||

I agree with @AndreasK: It's quite unclear why this is split across two I'm also generally skeptical of this notion of attaching further information to bgamari: I agree with @AndreasK: It's quite unclear why this is split across two `Maybe`s.
I'm also… | |||||

@AndreasK, @carter, and I discussed this at ICFP. The design is certainly a bit of a compromise but I've largely come to peace with this design; ultimately I don't see a better way. I have, however, cleaned up the story a bit and have tried to put the scheme on a more principled footing. Essentially, we view SIMD registers like we view floating point registers: Every possible format the register might hold (e.g. length/width/number type in the case of a SIMD register; float/double in the case of an FP register) is considered to be a distinct register. That is to say that the machine's The reason that this is necessary is that the NCG generally assumes that each bgamari: @AndreasK, @carter, and I discussed this at ICFP. The design is certainly a bit of a compromise… | |||||

427 | | YmmReg -- 256-bit SIMD vector register | 451 | | YmmReg -- 256-bit SIMD vector register | ||

428 | {-# UNPACK #-} !Int -- its number | 452 | {-# UNPACK #-} !Int -- its number | ||

429 | 453 | | |||

430 | | ZmmReg -- 512-bit SIMD vector register | 454 | | ZmmReg -- 512-bit SIMD vector register | ||

431 | {-# UNPACK #-} !Int -- its number | 455 | {-# UNPACK #-} !Int -- its number | ||

432 | 456 | | |||

433 | -- STG registers | 457 | -- STG registers | ||

434 | | Sp -- Stack ptr; points to last occupied stack location. | 458 | | Sp -- Stack ptr; points to last occupied stack location. | ||

Show All 29 Lines | |||||

464 | 488 | | |||

465 | -- Base Register for PIC (position-independent code) calculations | 489 | -- Base Register for PIC (position-independent code) calculations | ||

466 | -- Only used inside the native code generator. It's exact meaning differs | 490 | -- Only used inside the native code generator. It's exact meaning differs | ||

467 | -- from platform to platform (see module PositionIndependentCode). | 491 | -- from platform to platform (see module PositionIndependentCode). | ||

468 | | PicBaseReg | 492 | | PicBaseReg | ||

469 | 493 | | |||

470 | deriving( Show ) | 494 | deriving( Show ) | ||

471 | 495 | | |||

496 | data GlobalVecRegTy = Integer | Float | ||||

497 | deriving (Show, Eq, Ord) | ||||

498 | | ||||

472 | instance Eq GlobalReg where | 499 | instance Eq GlobalReg where | ||

473 | VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes | 500 | VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes | ||

474 | FloatReg i == FloatReg j = i==j | 501 | FloatReg i == FloatReg j = i==j | ||

475 | DoubleReg i == DoubleReg j = i==j | 502 | DoubleReg i == DoubleReg j = i==j | ||

476 | LongReg i == LongReg j = i==j | 503 | LongReg i == LongReg j = i==j | ||

477 | XmmReg i == XmmReg j = i==j | 504 | XmmReg i l w grt == XmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' | ||

Lint: Line Too Long: This line is 86 characters long, but the convention is 80 characters. | |||||

The bgamari: The `Ord` instance ignores the two `Maybe`s whereas the `Eq` does not. This is really… | |||||

478 | YmmReg i == YmmReg j = i==j | 505 | YmmReg i == YmmReg j = i==j | ||

479 | ZmmReg i == ZmmReg j = i==j | 506 | ZmmReg i == ZmmReg j = i==j | ||

480 | Sp == Sp = True | 507 | Sp == Sp = True | ||

481 | SpLim == SpLim = True | 508 | SpLim == SpLim = True | ||

482 | Hp == Hp = True | 509 | Hp == Hp = True | ||

483 | HpLim == HpLim = True | 510 | HpLim == HpLim = True | ||

484 | CCCS == CCCS = True | 511 | CCCS == CCCS = True | ||

485 | CurrentTSO == CurrentTSO = True | 512 | CurrentTSO == CurrentTSO = True | ||

Show All 9 Lines | |||||

495 | _r1 == _r2 = False | 522 | _r1 == _r2 = False | ||

496 | 523 | | |||

497 | instance Ord GlobalReg where | 524 | instance Ord GlobalReg where | ||

498 | compare (VanillaReg i _) (VanillaReg j _) = compare i j | 525 | compare (VanillaReg i _) (VanillaReg j _) = compare i j | ||

499 | -- Ignore type when seeking clashes | 526 | -- Ignore type when seeking clashes | ||

500 | compare (FloatReg i) (FloatReg j) = compare i j | 527 | compare (FloatReg i) (FloatReg j) = compare i j | ||

501 | compare (DoubleReg i) (DoubleReg j) = compare i j | 528 | compare (DoubleReg i) (DoubleReg j) = compare i j | ||

502 | compare (LongReg i) (LongReg j) = compare i j | 529 | compare (LongReg i) (LongReg j) = compare i j | ||

503 | compare (XmmReg i) (XmmReg j) = compare i j | 530 | compare (XmmReg i l w grt) | ||

531 | (XmmReg j l' w' grt') = compare i j | ||||

532 | <> compare l l' | ||||

533 | <> compare w w' | ||||

534 | <> compare grt grt' | ||||

504 | compare (YmmReg i) (YmmReg j) = compare i j | 535 | compare (YmmReg i) (YmmReg j) = compare i j | ||

505 | compare (ZmmReg i) (ZmmReg j) = compare i j | 536 | compare (ZmmReg i) (ZmmReg j) = compare i j | ||

506 | compare Sp Sp = EQ | 537 | compare Sp Sp = EQ | ||

507 | compare SpLim SpLim = EQ | 538 | compare SpLim SpLim = EQ | ||

508 | compare Hp Hp = EQ | 539 | compare Hp Hp = EQ | ||

509 | compare HpLim HpLim = EQ | 540 | compare HpLim HpLim = EQ | ||

510 | compare CCCS CCCS = EQ | 541 | compare CCCS CCCS = EQ | ||

511 | compare CurrentTSO CurrentTSO = EQ | 542 | compare CurrentTSO CurrentTSO = EQ | ||

Show All 9 Lines | |||||

521 | compare (VanillaReg _ _) _ = LT | 552 | compare (VanillaReg _ _) _ = LT | ||

522 | compare _ (VanillaReg _ _) = GT | 553 | compare _ (VanillaReg _ _) = GT | ||

523 | compare (FloatReg _) _ = LT | 554 | compare (FloatReg _) _ = LT | ||

524 | compare _ (FloatReg _) = GT | 555 | compare _ (FloatReg _) = GT | ||

525 | compare (DoubleReg _) _ = LT | 556 | compare (DoubleReg _) _ = LT | ||

526 | compare _ (DoubleReg _) = GT | 557 | compare _ (DoubleReg _) = GT | ||

527 | compare (LongReg _) _ = LT | 558 | compare (LongReg _) _ = LT | ||

528 | compare _ (LongReg _) = GT | 559 | compare _ (LongReg _) = GT | ||

529 | compare (XmmReg _) _ = LT | 560 | compare (XmmReg _ _ _ _) _ = LT | ||

530 | compare _ (XmmReg _) = GT | 561 | compare _ (XmmReg _ _ _ _) = GT | ||

carter: THIS LOOKS WRONG | |||||

Abhiroop: Yes, I didn't understand what this comparison means! | |||||

531 | compare (YmmReg _) _ = LT | 562 | compare (YmmReg _) _ = LT | ||

532 | compare _ (YmmReg _) = GT | 563 | compare _ (YmmReg _) = GT | ||

533 | compare (ZmmReg _) _ = LT | 564 | compare (ZmmReg _) _ = LT | ||

534 | compare _ (ZmmReg _) = GT | 565 | compare _ (ZmmReg _) = GT | ||

535 | compare Sp _ = LT | 566 | compare Sp _ = LT | ||

536 | compare _ Sp = GT | 567 | compare _ Sp = GT | ||

537 | compare SpLim _ = LT | 568 | compare SpLim _ = LT | ||

538 | compare _ SpLim = GT | 569 | compare _ SpLim = GT | ||

Show All 40 Lines | |||||

579 | node = VanillaReg 1 VGcPtr | 610 | node = VanillaReg 1 VGcPtr | ||

580 | 611 | | |||

581 | globalRegType :: DynFlags -> GlobalReg -> CmmType | 612 | globalRegType :: DynFlags -> GlobalReg -> CmmType | ||

582 | globalRegType dflags (VanillaReg _ VGcPtr) = gcWord dflags | 613 | globalRegType dflags (VanillaReg _ VGcPtr) = gcWord dflags | ||

583 | globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags | 614 | globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags | ||

584 | globalRegType _ (FloatReg _) = cmmFloat W32 | 615 | globalRegType _ (FloatReg _) = cmmFloat W32 | ||

585 | globalRegType _ (DoubleReg _) = cmmFloat W64 | 616 | globalRegType _ (DoubleReg _) = cmmFloat W64 | ||

586 | globalRegType _ (LongReg _) = cmmBits W64 | 617 | globalRegType _ (LongReg _) = cmmBits W64 | ||

587 | globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) | 618 | -- NOTE: | ||

619 | -- The below XMM, YMM, ZMM CmmTypes are not fully correct because an | ||||

620 | -- XMM can also hold 2 doubles or 16 Int8s etc, similarly for YMM, ZMM | ||||

621 | globalRegType _ (XmmReg _ l w ty) = case ty of | ||||

This part currently works but this is not fully correct. This function maps an STG register to the respective CmmType. While this initially stated And even now it is stating that an XMM can hold only 2 64 bit doubles. While this change makes this code run, but we need to somehow modify the Abhiroop: This part currently works but this is not fully correct. This function maps an STG register to… | |||||

622 | Integer -> cmmVec l (cmmBits w) | ||||

This was a quite hard to track minor change. This function is called in a number of other places, so this change might potentially break something important. I ran the Abhiroop: This was a quite hard to track minor change. This function is called in a number of other… | |||||

As a general hint use grep (or similar) to find out where functions you change are used. People are more likely to check something if you can point at a function and say "I'm not sure if this will work here" instead of leaving it to them to check where it could break. I think the code path is currently only used to determine the size of the register. So this should work. What I noticed is AndreasK: As a general hint use grep (or similar) to find out where functions you change are used.
From… | |||||

Thanks for the feedback @AndreasK
If you notice the call graph
The Abhiroop: Thanks for the feedback @AndreasK
> I think the code path is currently only used to determine… | |||||

Fair enough. Just make sure that there is no code which assumes AndreasK: > So there is a separate helper function isVecType for inspecting vector of floats or ints etc… | |||||

how is register size used? it seems questionable to me that we're providing individual types rather than a set? or perhaps i dont understand this well. XMM is also ALWAYS used by scalar floating point code, so this is DEFINITELY wrong, because single precision "Float" is 32bit, and double precision "Double" is 64 bit ... what happens if xmm has one of those? carter: how is register size used? it seems questionable to me that we're providing individual types… | |||||

I mentioned the use case in the comments above. For Cmm like this _B1::Fx4V128 = XMM1; //CmmAssign Before my change, the above would break in the Cmm linter. To understand why this would break before, we need to see the notion of weak equality defined for the datatype
The level at which the linter is working it is not checking the width. It just takes the width whether The linter works using the weak equality notion mentioned in the previous paragraph, it doesn't actually checks the width. Abhiroop: > how is register size used?
I mentioned the use case in the comments above. For Cmm like this… | |||||

carter: wait ... WHY are you setting the register?
what do you mean by XMM1? | |||||

I am not sure what you are asking but I have specifically not added anything extra. For the simple lifting of an unlifted type code below: data FloatX4 = FX4# FloatX4# instance Show FloatX4 where show (FX4# f) = case (unpackFloatX4# f) of (# a, b, c, d #) -> show ((F# a), (F# b), (F# c), (F# d)) main :: IO () main = print (FX4# (plusFloatX4# (broadcastFloatX4# 1.3#) (broadcastFloatX4# 2.2#))) The output cmm looks like this [Main.FX4#_entry() // [XMM1] { info_tbls: [(c1NK, label: Main.FX4#_info rep: HeapRep static { Fun {arity: 1 fun_type: ArgSpec 9} } srt: Nothing)] stack_info: arg_space: 8 updfr_space: Just 8 } {offset c1NK: // global _B1::Fx4V128 = XMM1; // CmmAssign goto c1NM; // CmmBranch c1NM: // global Hp = Hp + 24; // CmmAssign if (Hp > HpLim) (likely: False) goto c1NO; else goto c1NN; // CmmCondBranch c1NO: // global HpAlloc = 24; // CmmAssign goto c1NL; // CmmBranch c1NL: // global XMM1 = _B1::Fx4V128; // CmmAssign R1 = Main.FX4#_closure; // CmmAssign call (stg_gc_fun)(XMM1, R1) args: 8, res: 0, upd: 8; // CmmCall c1NN: // global // allocHeapClosure I64[Hp - 16] = Main.FX4#_con_info; // CmmStore Fx4V128[Hp - 8] = _B1::Fx4V128; // CmmStore _c1NJ::P64 = Hp - 15; // CmmAssign R1 = _c1NJ::P64; // CmmAssign call (P64[Sp])(R1) args: 8, res: 0, upd: 8; // CmmCall } }, section ""data" . Main.FX4#_closure" { Main.FX4#_closure: const Main.FX4#_info; }] As you can see the Cmm generated Abhiroop: I am not sure what you are asking but I have specifically not added anything extra.
For the… | |||||

Ok, you *may* wanna also see about inspecting the CMM before register allocation as well as after. it sounds like you're only looking at the post register allocation CMM, both should be looked at i think carter: Ok, you *may* wanna also see about inspecting the CMM before register allocation as well as… | |||||

623 | Float -> cmmVec l (cmmFloat w) | ||||

588 | globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) | 624 | globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) | ||

589 | globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) | 625 | globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) | ||

590 | 626 | | |||

591 | globalRegType dflags Hp = gcWord dflags | 627 | globalRegType dflags Hp = gcWord dflags | ||

592 | -- The initialiser for all | 628 | -- The initialiser for all | ||

593 | -- dynamically allocated closures | 629 | -- dynamically allocated closures | ||

594 | globalRegType dflags _ = bWord dflags | 630 | globalRegType dflags _ = bWord dflags | ||

595 | 631 | | |||

Show All 9 Lines |

same issue applies to YMM and ZMM,

after all, XMM registers are just the lower 128 bits of those!