{
  "comments": [
    {
      "unresolved": false,
      "key": {
        "uuid": "6f23531a_fee498dc",
        "filename": "/PATCHSET_LEVEL",
        "patchSetId": 2
      },
      "lineNbr": 0,
      "author": {
        "id": 1115898
      },
      "writtenOn": "2024-12-12T07:45:57Z",
      "side": 1,
      "message": "ideally run sme benchmarks on apple m4 or equivalent.\nconsider doing LDP with int64 to achieve 128 bit load/store",
      "revId": "d5818026ec4bd861a5a8aba6d428c576ba2ba5f5",
      "serverId": "3ce6091f-6c88-37e8-8c75-72f92ae8dfba"
    },
    {
      "unresolved": false,
      "key": {
        "uuid": "aae285fe_0f0426f4",
        "filename": "/PATCHSET_LEVEL",
        "patchSetId": 2
      },
      "lineNbr": 0,
      "author": {
        "id": 1571352
      },
      "writtenOn": "2024-12-12T10:58:31Z",
      "side": 1,
      "message": "SVE and the first version of SME do not have an LDP equivalent instruction. SVE 2.1 and SME 2 introduce multi-vector load/store instructions which are equivalent to LDP/STP however these would need additional compiler features to be enabled (-march\u003d...+sme2 rather than -march\u003d...+sme which we currently use).\n\nOf course if you can guarantee that the vector length is 128 bits then you can use the Neon LDP/STP instructions since they share the same vector length, but this is not portable so I would prefer to avoid hard-coding this anywhere.",
      "parentUuid": "6f23531a_fee498dc",
      "revId": "d5818026ec4bd861a5a8aba6d428c576ba2ba5f5",
      "serverId": "3ce6091f-6c88-37e8-8c75-72f92ae8dfba"
    }
  ]
}