{ "comments": [ { "unresolved": false, "key": { "uuid": "19071804_6b60816d", "filename": "/PATCHSET_LEVEL", "patchSetId": 4 }, "lineNbr": 0, "author": { "id": 1115898 }, "writtenOn": "2024-12-12T11:11:42Z", "side": 1, "message": "All caught up on reviews. I\u0027m on holidays for 2 weeks. If theres an important change, as Justin to review and land it", "revId": "2d8652f3e7ba0427c2e72b086a787b96169e1466", "serverId": "3ce6091f-6c88-37e8-8c75-72f92ae8dfba" }, { "unresolved": true, "key": { "uuid": "0bb4bf5c_de5342c0", "filename": "source/row_sme.cc", "patchSetId": 4 }, "lineNbr": 474, "author": { "id": 1115898 }, "writtenOn": "2024-12-12T11:11:42Z", "side": 1, "message": "could regular sve do the same performance for inner loop as neon, but support predicate for remainder to get a small win?", "fixSuggestions": [ { "fixId": "2da94437_81e45043", "description": "prompt_to_edit API", "replacements": [ { "path": "source/row_sme.cc", "range": { "startLine": 473, "startChar": 0, "endLine": 512, "endChar": 0 }, "replacement": "" } ] } ], "revId": "2d8652f3e7ba0427c2e72b086a787b96169e1466", "serverId": "3ce6091f-6c88-37e8-8c75-72f92ae8dfba" }, { "unresolved": true, "key": { "uuid": "d8b5f72d_2bb0a37f", "filename": "source/row_sme.cc", "patchSetId": 4 }, "lineNbr": 488, "author": { "id": 1115898 }, "writtenOn": "2024-12-12T11:11:42Z", "side": 1, "message": "consider 2 vectors", "fixSuggestions": [ { "fixId": "99524abd_16ff1ed8", "description": "prompt_to_edit API", "replacements": [ { "path": "source/row_sme.cc", "range": { "startLine": 474, "startChar": 0, "endLine": 477, "endChar": 0 }, "replacement": "__arm_locally_streaming void CopyRow_16_SME(const uint16_t* src,\n uint16_t* dst,\n int width) {\n" }, { "path": "source/row_sme.cc", "range": { "startLine": 479, "startChar": 0, "endLine": 479, "endChar": 0 }, "replacement": " asm volatile(\n \"cnth %x[vl] \\n\"\n \"subs %w[width], %w[width], %w[vl] \\n\"\n \"b.lt 2f \\n\"\n\n // Run bulk of computation with an all-true predicate to avoid predicate\n // generation overhead.\n \"ptrue p0.h \\n\"\n \"1: \\n\"\n \"ld1h {z0.h}, p0/z, [%[src]] \\n\"\n \"incb %[src] \\n\"\n \"subs %w[width], %w[width], %w[vl] \\n\"\n \"st1h {z0.h}, p0, [%[dst]] \\n\"\n \"incb %[dst] \\n\"\n \"b.ge 1b \\n\"\n\n \"2: \\n\"\n \"adds %w[width], %w[width], %w[vl] \\n\"\n \"b.eq 99f \\n\"\n\n // Calculate a predicate for the final iteration to deal with the tail.\n \"whilelt p0.h, wzr, %w[width] \\n\"\n \"ld1h {z0.h}, p0/z, [%[src]] \\n\"\n \"st1h {z0.h}, p0, [%[dst]] \\n\"\n\n \"99: \\n\"\n : [src] \"+r\"(src), // %[src]\n [dst] \"+r\"(dst), // %[dst]\n [width] \"+r\"(width),// %[width]\n [vl] \"\u003d\u0026r\"(vl) // %[vl]\n :\n : \"memory\", \"cc\", \"z0\", \"p0\");\n}\n\n__arm_locally_streaming void CopyRow_8_SME(const uint8_t* src,\n uint8_t* dst,\n int width) {\n // Streaming-SVE only, no use of ZA tile.\n int vl;\n" }, { "path": "source/row_sme.cc", "range": { "startLine": 505, "startChar": 0, "endLine": 509, "endChar": 0 }, "replacement": " : [src] \"+r\"(src), // %[src]\n [dst] \"+r\"(dst), // %[dst]\n [width] \"+r\"(width),// %[width]\n [vl] \"\u003d\u0026r\"(vl) // %[vl]\n" } ] } ], "revId": "2d8652f3e7ba0427c2e72b086a787b96169e1466", "serverId": "3ce6091f-6c88-37e8-8c75-72f92ae8dfba" } ] }