Compare commits

..

No commits in common. "criu-dev" and "v4.2" have entirely different histories.

79 changed files with 247 additions and 7537 deletions

View file

@ -1,3 +1,3 @@
[codespell]
skip = ./.git,./test/pki,./tags,./plugins/amdgpu/amdgpu_drm.h,./plugins/amdgpu/drm.h,./plugins/amdgpu/drm_mode.h
skip = ./.git,./test/pki,./tags
ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen,sems

View file

@ -15,7 +15,6 @@ Checkpoint / Restore inside a docker container
Pytorch
Tensorflow
Using CRIU Image Streamer
Parallel Restore
DESCRIPTION
-----------

View file

@ -502,8 +502,8 @@ Restores previously checkpointed processes.
The 'resource' argument can be one of the following:
+
- **tty[**__rdev__**:**__dev__**]**
- **pipe:[**__inode__**]**
- **socket:[**__inode__*]*
- **pipe[**__inode__**]**
- **socket[**__inode__*]*
- **file[**__mnt_id__**:**__inode__**]**
- 'path/to/file'

View file

@ -1,136 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 16.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
width="560px" height="560px" viewBox="0 0 560 560" enable-background="new 0 0 560 560" xml:space="preserve">
<path opacity="0.3" fill="#990000" d="M315.137,360.271c-18.771-7.159-41.548-8.85-68.479-8.85c-16.661,0-46.255,2.939-74.654,3.38
c11.209-4.884,20.734-10.265,24.842-16.87c14.531-23.346,17.645-65.893,17.645-65.893l-20.758,3.114c0,0-2.591,35.8-16.085,47.733
c-5.35,4.736-15.96,7.834-27.916,10.856c2.447-26.071,29.477-57.552,29.477-57.552l-14.874-3.966l-5.88-7.448
c0,0-3.011,1.761-7.588,5.315c-18.298,4.208-75.946,20.443-75.946,57.983c0,15.292,5.77,26.308,14.768,34.244
c-22.858,26.966-20.755,61.618-20.755,61.618s-8.945,16.61-8.021,31.254c2.083,32.973,34.931,25.097,44.313,26.374
c9.644,1.313,34.313-4.18,34.313-4.18s-16.276-2.639-15.329-18.562c0.5-8.369-0.947-27.628-21.404-37.307
c-1.13-10.066,2.111-18.309,6.379-28.015c18.452,45.263,92.601,53.97,92.601,53.97c0.393-0.097-10.269,20.047,0.221,35.632
c4.652,6.915,18.284,10.019,22.436,19.356c4.151,9.341,2.199,30.354,2.199,30.354s21.267-16.864,27.239-30.18
c3.334-7.432,25.989,0.926,25.989-34.047c0-14.077-12.26-26.841-13.675-29.815c-20.858-20.334-5.427-4.743,2.677-8.236
c12.758-5.499,35.412,11.657,35.412,11.657s-10.402-20.119-11.437-31.013c-0.795-8.335-4.537-16.816-16.624-30.042
c7.166-0.752,20.362,2.327,20.362,2.327s-5.202,11.251-0.879,25.515c3.588,11.84,7.193,7.193,14.736,14.737
c6.599,6.598,3.146,26.284,3.146,26.284s4.674-4.513,18.081-18.235c9.072-9.29,23.645-16.717,23.645-47.86
C355.312,365.969,334.97,360.979,315.137,360.271z M134.108,285.901c-11.5,13.048-23.667,32.329-28.23,58.293
c-4.821-3.519-7.613-8.1-7.613-14.043C98.265,309.699,117.078,295.016,134.108,285.901z"/>
<path fill="#990000" d="M382.184,115.435c3.654,1.208,7.327,2.37,10.968,3.444c14.16,4.183,26.745-9.798,26.745-9.798
s-8.785-2.243-17.857-3.497c12.173-2.653,21.085-18.66,21.085-18.66s-17.366,4.819-27.224,5.087
c-2.042,0.057-4.107,0.118-6.189,0.186c2.464-0.37,4.925-0.847,7.361-1.485c14.201-3.714,21.505-23.382,21.505-23.382
s-15.411,6.743-24.951,9.239c-2.694,0.703-5.438,1.437-8.197,2.185c3.038-1.071,6.008-2.306,8.815-3.82
c12.922-6.965,12.241-29.347,12.241-29.347s-10.162,11.926-18.844,16.605c-3.557,1.916-7.199,3.904-10.846,5.911
c3.798-2.277,7.45-4.743,10.596-7.569c10.918-9.814,7.722-29.605,7.722-29.605s-9.801,12.54-17.135,19.131
c-8.939,8.037-18.775,14.104-27.014,21.81c-6.427,6.011-25.14,35.236-36.812,46.283c-11.671,11.047-18.301,12.476-19.159,14.388
c-0.863,1.913,1.006,30.46-14.078,39.145c-16.476-21.583-50.565-44.007-53.101-72.033c-2.079-22.959,5.209-34.055,19.149-35.316
c14.994-1.359,15.998,24.507,15.998,24.507s-1.379,1.064-1.708,6.391c-0.097,0.629-0.145,1.272-0.083,1.934
c0.004,0.031,0.008,0.06,0.011,0.091c-0.014,1.674,0.065,3.664,0.278,6.039c1.131,12.474,4.53,14.574,4.53,14.574l2.075-0.722
c0,0-2.24-4.079-2.554-7.529c-0.172-1.917-0.187-3.556-0.079-4.977c0.45,0.067,0.949,0.081,1.506,0.031
c4.398-0.399,6.049-4.141,5.65-8.539c-0.042-0.45-0.069-0.885-0.094-1.316c2.485-26.032-1.756-29.637,4.788-41.391
c9.032-16.218,17.279-16.015,17.279-16.015l1.402-8.155c0,0-6.817,2.462-14.819,13.652c-8.833,12.354-8.983,26.229-9.066,47.958
c-0.188-0.761-0.502-1.37-1.017-1.784c-2.457-11.192-9.087-32.13-24.112-30.77c-16.72,1.514-29.419,14.974-26.773,44.171
c3.609,39.832,26.186,52.701,29.829,80.84c-13.47-2.349-23.883-10.656-30.866-20.282c-7.803-10.749-7.297-22.949-8.324-24.779
c-1.027-1.829-7.761-2.662-20.367-12.627c-12.605-9.965-33.845-37.41-40.78-42.824c-8.895-6.942-19.229-12.111-28.848-19.32
c-7.892-5.915-18.769-17.531-18.769-17.531s-1.419,19.995,10.323,28.8c3.386,2.536,7.246,4.665,11.229,6.597
c-3.808-1.674-7.616-3.33-11.327-4.925c-9.062-3.887-20.246-14.861-20.246-14.861s1.31,22.353,14.803,28.143
c2.931,1.257,6,2.223,9.12,3.019c-2.818-0.5-5.615-0.985-8.357-1.447c-9.728-1.636-25.677-6.981-25.677-6.981
s9.025,18.94,23.5,21.376c2.485,0.417,4.975,0.674,7.466,0.822c-2.08,0.118-4.148,0.242-6.183,0.368
c-9.843,0.61-27.566-2.645-27.566-2.645S85.667,120.333,110,120c-8.922,2.057-25.678,6.008-25.678,6.008s13.778,12.806,27.508,7.38
c3.533-1.394,7.087-2.876,10.62-4.404c-3.726,1.804-7.424,3.581-11.005,5.273c-8.963,4.243-19.428,10.176-19.428,10.176
s15.069,9.759,27.305,1.497c0.558-0.378,3.121-1.76,3.678-2.143c-7.904,5.808-19.754,14.937-19.754,14.937
s15.802,6.027,27.092-3.354c4.663-3.875,8.104-7.185,12.238-11.618c-3.773,4.55-6.699,8.018-10.634,12.106
c-6.839,7.104-13.06,19.791-13.06,19.791s15.597,0.39,24.359-11.388c4.488-6.035,7.482-11.633,10.974-18.191
c-3.113,6.479-5.468,11.95-8.911,17.788c-5.018,8.49-7.574,22.624-7.574,22.624s15.342-3.655,21.07-17.17
c2.231-5.266,2.107-9.783,3.694-15.291c-1.257,5.272-0.666,9.475-2.24,14.319c-3.045,9.379,0.011,25.554,0.011,25.554
s9.713-5.855,10.359-20.52c0.006-0.153,0.5-8.47,0.5-8.625L171,171.496c0,9.917,6.295,23.276,6.295,23.276
s11.459-10.649,9.369-25.266c-0.188-1.31-0.1-2.627-0.305-3.947c0.408,1.507,0.998,3.016,1.493,4.524
c3.075,9.429,3.5,15.957,3.5,15.957s6.483,1.251,8.73-1.594c0.764,5.625-0.843,10.2-0.843,10.2s5.471-1.1,8.893-3.756
c0.705,5.331,0.155,8.789,0.155,8.789s5.106-1.603,8.419-4.323c0.611,4.642,1.764,7.542,1.764,7.542s6.398-0.88,9.021-5.393
c0.199,0.038,0.395,0.079,0.59,0.117c2.269,4.875,1.438,8.517,1.438,8.517s7.492-2.14,9.492-6.14c0.003,0,0.007,0,0.01,0
c1.798,4,2.727,6.102,2.727,6.102s4.853-2.349,7.093-6.064c0.189,0.009,0.364-0.093,0.547-0.086
c-4.702,19.629-23.62,29.658-42.207,42.764c-1.392,0.981-2.712,1.925-3.97,2.884c-2.891,1.512-6.788,3.495-11.311,5.724
c-9.829,3.363-23.7,6.057-41.038,4.084c-9.798-1.115-21.037,10.02-21.037,10.02s6.87,4.843,16.565,5.028
c-8.819,3.621-17.438,12.632-17.438,12.632s0.045,0.019,0.069,0.029c-27.096,11.688-51.621,29.917-47.651,57.105
c2.375,16.27,14.692,25.475,31.704,30.254c-17.81,14.742-32.921,36.129-30.707,60.59c0.134,1.487,0.309,2.916,0.508,4.311
c-2.209,5.6-3.288,17.842-2.674,24.886c0.949,10.838,13.686,8.662,18.219,6.729c14.139,12.202,32.258,10.252,32.258,10.252
s-17.301,1.211-30.306-11.156c5.551-2.659,6.424-3.925,6.788-11.579c0.36-7.61-9.104-20.759-20.57-21.966
c-1.25-20.07,9.861-43.32,30.603-60.203c0.02,0.249,0.023,0.491,0.048,0.742c4.248,46.957,30.584,54.634,81.148,63.26
c12.603,2.15,22.04,5.821,29.042,10.457c-3.844,5.388-5.706,21.559-2.895,32.325c3.045,11.655,12.647,14.53,19.429,14.955
c-3.304,16.035-11.235,29.024-11.235,29.024s10.015-11.628,15.04-29.016c0.48-0.031,0.928-0.069,1.319-0.114
c10.922-1.262,16.17-11.338,14.743-23.071c-1.195-9.826-13.974-24.54-28.598-25.992c-33.117-21.52-109.104-9.05-113.877-61.769
c-0.341-3.746-0.517-7.367-0.571-10.888c5.709,1.111,11.782,1.844,18.104,2.244c14.111,28.517,62.158,22.269,95.818,20.694
c1.764,3.09,7.043,7.064,13.929,9.779c11.751,4.633,14.889,3.742,18.869,1.502c1.484-0.835,2.828-1.92,3.979-3.155
c10.822,10.456,25.37,30.251,25.37,30.251s-12.29-22.284-22.733-33.97c2.601-4.923,2.433-10.619-2.559-13.297
c-6.956-3.732-31.321,1.581-36.316,4.981c-30.811,1.668-71.853,6.551-89.576-16.474c41.005,1.192,88.786-9.133,102.385-10.365
c21.726-1.966,47.319,1.367,64.887,8.228c-0.783,5.681,1.867,18.47,4.641,25.318c3.316,8.197,11.561,5.887,16.562,3.028
c-0.588,13.3-4.495,22.638-4.495,22.638s7.86-14.125,9.117-26.183c4.354-4.041,4.774-5.562,2.904-12.887
c-1.849-7.24-14.317-16.821-25.47-15.096c-21.855-8.906-54.594-11.087-75.74-9.175c-18.253,1.653-61.404,10.802-97.611,10.237
c-1.895-3.338-3.402-7.122-4.412-11.479c5.113-2.364,10.551-4.388,16.307-5.975c30.999-8.551,40.97-29.258,42.943-48.579
c1.127,1.303,1.938,2.069,1.938,2.069s7.087-12.679,5.522-27.275c-0.264-2.469-0.429-4.737-0.553-6.911
c2.499,6.741,7.778,13.001,7.778,13.001s16.438-20.208,5.846-27.268c-11.583-7.714-6.836-13.283-4.31-15.299
c3.354-1.984,6.973-3.94,10.859-5.817c26.561-12.817,59.903-20.002,64.443-40.039c0.265-1.172,0.388-2.34,0.443-3.507
c3.701,2.396,9.165,2.053,9.165,2.053s-0.367-2.88-0.601-7.556c3.747,2.081,8.874,1.758,8.874,1.758s-0.986-2.319-1.255-7.689
c3.846,1.998,8.434,2.278,8.434,2.278s-0.725-2.246-1.24-5.573c3.788,0.719,8.84,0.419,8.84,0.419s-3.543-7.302-1.316-16.965
c0.357-1.547,0.666-3.09,0.938-4.626c-0.087,1.332-0.169,2.662-0.238,3.985c-0.783,14.742,10.85,24.47,10.85,24.47
S337,172.178,337,162.303c0-0.021,0-0.042,0-0.061c0,0.153-0.804,0.309-0.782,0.46c1.951,14.548,13.499,20.839,13.499,20.839
s2.388-16.471-1.478-25.542c-1.998-4.686-3.966-9.742-5.688-14.881c2.068,5.344,4.374,10.673,7.067,15.72
c6.909,12.952,20.498,15.406,20.498,15.406s-1.832-14.029-7.581-22.041c-3.952-5.505-7.874-11.654-11.551-17.83
c4.059,6.22,8.622,12.438,13.631,18.048c9.774,10.953,25.27,9.178,25.27,9.178s-7.323-12.085-14.767-18.552
c-4.283-3.722-8.589-7.824-12.754-12.019c4.513,4.047,9.319,7.944,14.31,11.39c12.077,8.341,27.281,0.931,27.281,0.931
s-10.533-7.219-18.926-12.302c0.595,0.332,1.186,0.662,1.777,0.988c12.922,7.14,28.146-3.013,28.146-3.013
s-12.036-5.887-21.343-9.313C389.896,118.341,386.055,116.903,382.184,115.435z M116.917,367.418
c-0.172,0.131-0.344,0.268-0.516,0.398c-17.301-3.899-29.646-12.415-31.124-28.752c-2.244-24.777,21.669-42.631,47.562-54.59
c3.553,1,9.203,1.919,15.541,0.503c-4.694,4.817-7.998,9.859-7.998,9.859s2.076,0.564,5.3,0.733
C133.582,308.673,115.917,333.715,116.917,367.418z M146.295,295.598c1.834,0.062,3.979-0.014,6.326-0.386
c-0.141,0.365-0.274,0.72-0.401,1.069c-10.511,14.57-18.745,34.363-17.404,59.912c-4.522,2.267-9.248,5.074-13.939,8.343
C122.237,330.3,136.218,307.613,146.295,295.598z M121.776,368.86c4.131-2.979,8.589-5.697,13.361-8.115
c0.358,3.527,1.032,6.741,2.025,9.634C131.805,370.131,126.629,369.657,121.776,368.86z M150.478,350.278
c-3.791,0.864-8.16,2.403-12.812,4.546c-0.062-0.425-0.168-0.803-0.224-1.236c-2.557-19.875,3.873-37.276,13.005-51.347
c0,0.005-0.007,0.032-0.007,0.032s13.533-3.395,23.088-14.017c-1.715,7.205,0.158,14.79,0.158,14.79s9.774-5.185,16.654-15.216
c-0.131,5.548,2.84,10.803,5.451,14.331C193.303,321.731,182.711,342.934,150.478,350.278z M259.516,275.357
c0.846-4.127,1.649-8.135,2.42-12.012c2.199-4.002,5.203-6.524,9.011-7.55c3.808-1.04,7.78-1.559,11.919-1.559l1.739-17.042
c-5.942,0.378-11.657,1.419-17.144,3.105c-5.492,1.672-10.946,3.611-16.369,5.8c-4.526,4.131-7.915,8.875-10.169,14.237
c-2.262,5.359-3.755,11.051-4.655,17.055c-0.906,6.007-1.268,12.17-1.268,18.489v18.209c0,3.23,0.201,6.368,0.779,9.393
c0.584,3.045,1.728,5.66,3.543,7.85c3.614,2.588,7.203,3.85,10.822,3.771c3.619-0.066,7.224-0.712,10.842-1.925
c3.611-1.23,7.162-2.757,10.647-4.558c3.484-1.811,6.904-3.293,10.266-4.457l7.159-14.521c-2.066,0.505-4.2,1.23-6.394,2.127
c-2.199,0.9-4.453,1.643-6.777,2.224c-2.322,0.585-4.649,0.773-6.977,0.585c-2.322-0.189-4.649-1.2-6.976-2.994
c-2.063-3.626-3.355-7.475-3.87-11.541c-0.519-4.065-0.612-8.165-0.289-12.296C258.1,283.619,258.674,279.488,259.516,275.357z
M367.6,320.582c-0.196-3.025-1.001-5.908-2.42-8.623c-1.031-3.608-2.649-6.588-4.846-8.905c-2.193-2.333-4.682-4.162-7.458-5.516
c-2.773-1.358-5.712-2.364-8.812-3.014c-3.098-0.643-6.004-1.056-8.717-1.259c-2.711-0.188-5.101-0.285-7.166-0.285
s-3.419-0.062-4.064-0.189c0.25-1.037,0.449-2.302,0.574-3.783c0.133-1.481,0.322-2.866,0.584-4.162
c0.258-1.419,0.512-2.977,0.773-4.65c6.326,0,12.073-0.581,17.242-1.749c5.165-1.148,9.688-3.059,13.558-5.705
c3.876-2.646,7.135-6.131,9.781-10.469c2.649-4.318,4.558-9.583,5.715-15.776c-5.684,0-11.596,0.029-17.727,0.093
s-12.328,0.158-18.593,0.284c-6.266,0.143-12.431,0.332-18.5,0.583c-6.066,0.27-11.812,0.584-17.236,0.979
c0.128,0,0.221,1.387,0.293,4.161c0.062,2.775,0.062,6.465,0,11.035c-0.072,4.588-0.2,9.788-0.386,15.589
c-0.199,5.819-0.49,11.73-0.875,17.734c-0.386,6.007-0.878,11.901-1.451,17.72c-0.584,5.815-1.262,10.908-2.035,15.304
c5.552-0.268,11.432-0.488,17.624-0.677c2.162-0.065,4.33-0.127,6.503-0.176l1.247-5.547c0.385-2.192,0.708-4.776,0.969-7.739
c0.259-2.979,0.513-5.754,0.773-8.338c0.259-3.093,0.386-6.196,0.386-9.286c0.646-0.127,1.677-0.206,3.103-0.206
c1.547,0,3.225,0.269,5.039,0.773c1.804,0.519,3.68,1.292,5.612,2.334c1.938,1.041,3.615,2.522,5.034,4.46
c1.42,1.925,2.45,4.352,3.104,7.252c0.638,2.914,0.638,6.495,0,10.75l0.631,5.39c1.609,0.033,3.207,0.079,4.796,0.144
c6.068,0.189,11.812,0.471,17.234,0.866C367.891,326.747,367.795,323.609,367.6,320.582z M327.506,263.345
c0.707-4.397,1.323-8.133,1.835-11.238c1.168-0.521,2.522-0.835,4.069-0.962c1.549-0.125,3.103-0.205,4.65-0.205
c1.677,0,3.291,0.031,4.845,0.112c1.547,0.062,2.901,0.093,4.069,0.093c0,1.151-0.041,2.586-0.103,4.256
c-0.066,1.688-0.189,3.42-0.389,5.232c-0.189,1.815-0.512,3.578-0.97,5.331c-0.446,1.732-1.127,3.182-2.034,4.347
c-0.896,0.918-2.128,1.657-3.681,2.224c-1.543,0.584-3.159,1.042-4.84,1.357c-1.677,0.33-3.291,0.55-4.838,0.677
c-1.555,0.141-2.78,0.207-3.682,0.207C326.439,271.542,326.798,267.727,327.506,263.345z M393.035,246.385
c-2.517,0.33-4.84,0.584-6.97,0.773c-2.135,0.205-3.781,0.172-4.939-0.096l3.678,2.711c0.899,5.423,1.356,11.051,1.356,16.851
c0,5.818-0.195,11.695-0.584,17.642c-0.385,5.941-0.872,11.805-1.45,17.624c-0.581,5.801-1,11.427-1.261,16.85
c-0.907,4.522-1.519,9.238-1.835,14.139c-0.331,4.901-0.843,9.713-1.554,14.425c-0.708,4.712-1.812,9.3-3.297,13.761
c-1.48,4.443-3.773,8.481-6.869,12.107l-2.908,1.543c0.513,0.52,1.323,0.993,2.42,1.45c1.093,0.457,1.842,0.678,2.23,0.678
c2.708-3.23,4.712-6.558,6.004-9.978c1.286-3.419,2.64-6.746,4.069-9.963c1.544-2.711,2.969-5.626,4.261-8.716
c1.286-3.107,2.774-6.008,4.455-8.719c1.671-2.708,3.681-5.045,6.008-6.984c2.322-1.938,5.285-3.15,8.903-3.67
c0.386-6.319,0.836-13.114,1.354-20.335c0.517-7.235,1.001-14.534,1.451-21.896c0.457-7.361,0.846-14.596,1.168-21.689
c0.323-7.111,0.482-13.684,0.482-19.769c-2.713,0-5.458,0.143-8.229,0.394C398.196,245.785,395.553,246.07,393.035,246.385z
M483.002,245c0,4-0.061,5.618-0.188,7.038c-0.135,1.419-0.323,3.525-0.581,5.259c-0.261,1.751-0.584,4.166-0.972,6.752
c-0.386,2.584-0.843,6.388-1.354,11.165c-0.519,4.791-1.135,11.551-1.839,19.167c-0.715,7.612-1.519,18.619-2.427,29.619h-32.15
c0-15,1.065-26.686,3.192-39.535c2.138-12.847,4.101-25.911,5.911-38.695c-5.034,0.52-9.85,1.042-14.427,1.812
c-4.589,0.773-9.136,0.898-13.662,0.52c-0.513,13.682-1.543,27.507-3.097,41.521c-1.553,13.998-3.23,27.586-5.038,40.749
c4.52,0,9.396-0.166,14.631-0.496c5.224-0.316,10.292-0.479,15.2-0.479c0.649,1.152,1.285,2.776,1.942,4.838
c0.638,2.065,1.22,4.318,1.738,6.779c0.517,2.457,0.997,5.027,1.454,7.753c0.447,2.715,0.873,5.424,1.258,8.135
c0.9,6.32,1.681,13.102,2.327,20.336c2.192-6.196,4.454-12.28,6.777-18.209c1.938-5.045,4.004-10.262,6.196-15.699
c2.199-5.423,4.327-10.073,6.393-13.936c2.323,0.254,4.649,0.316,6.974,0.188c2.326-0.124,4.681-0.25,7.071-0.392
c2.386-0.127,4.775-0.127,7.163,0c2.389,0.142,4.681,0.52,6.88,1.165c-0.257-6.716-0.164-13.619,0.293-20.728
c0.449-7.093,1.096-14.204,1.932-21.297c0.841-7.111,1.707-15.14,2.615-22.062c0.907-6.901,1.742-13.27,2.522-21.27H483.002z"/>
</svg>

Before

Width:  |  Height:  |  Size: 15 KiB

View file

@ -7,7 +7,7 @@
[![CircleCI](https://circleci.com/gh/checkpoint-restore/criu.svg?style=svg)](
https://circleci.com/gh/checkpoint-restore/criu)
<p align="center"><img src="Documentation/logo.svg" width="256px"/></p>
<p align="center"><img src="https://criu.org/w/images/1/1c/CRIU.svg" width="256px"/></p>
## CRIU -- A project to implement checkpoint/restore functionality for Linux

View file

@ -1,47 +0,0 @@
#ifndef __UAPI_ASM_GCS_TYPES_H__
#define __UAPI_ASM_GCS_TYPES_H__
#ifndef NT_ARM_GCS
#define NT_ARM_GCS 0x410 /* ARM GCS state */
#endif
/* Shadow Stack/Guarded Control Stack interface */
#define PR_GET_SHADOW_STACK_STATUS 74
#define PR_SET_SHADOW_STACK_STATUS 75
#define PR_LOCK_SHADOW_STACK_STATUS 76
/* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack */
#ifndef PR_SHADOW_STACK_ENABLE
#define PR_SHADOW_STACK_ENABLE (1UL << 0)
#endif
/* Allows explicit GCS stores (eg. using GCSSTR) */
#ifndef PR_SHADOW_STACK_WRITE
#define PR_SHADOW_STACK_WRITE (1UL << 1)
#endif
/* Allows explicit GCS pushes (eg. using GCSPUSHM) */
#ifndef PR_SHADOW_STACK_PUSH
#define PR_SHADOW_STACK_PUSH (1UL << 2)
#endif
#ifndef SHADOW_STACK_SET_TOKEN
#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */
#endif
#define PR_SHADOW_STACK_ALL_MODES \
PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH
/* copied from: arch/arm64/include/asm/sysreg.h */
#define GCS_CAP_VALID_TOKEN 0x1
#define GCS_CAP_ADDR_MASK 0xFFFFFFFFFFFFF000ULL
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | GCS_CAP_VALID_TOKEN)
#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
#include <asm/hwcap.h>
#ifndef HWCAP_GCS
#define HWCAP_GCS (1UL << 32)
#endif
#endif /* __UAPI_ASM_GCS_TYPES_H__ */

View file

@ -2,7 +2,6 @@
#define UAPI_COMPEL_ASM_TYPES_H__
#include <stdint.h>
#include <stdbool.h>
#include <signal.h>
#include <sys/mman.h>
#include <asm/ptrace.h>
@ -17,24 +16,7 @@
*/
typedef struct user_pt_regs user_regs_struct_t;
/*
* GCS (Guarded Control Stack)
*
* This mirrors the kernel definition but renamed to cr_user_gcs
* to avoid conflict with kernel headers (/usr/include/asm/ptrace.h).
*/
struct cr_user_gcs {
__u64 features_enabled;
__u64 features_locked;
__u64 gcspr_el0;
};
struct user_fpregs_struct {
struct user_fpsimd_state fpstate;
struct cr_user_gcs gcs;
};
typedef struct user_fpregs_struct user_fpregs_struct_t;
typedef struct user_fpsimd_state user_fpregs_struct_t;
#define __compel_arch_fetch_thread_area(tid, th) 0
#define compel_arch_fetch_thread_area(tctl) 0
@ -57,12 +39,4 @@ typedef struct user_fpregs_struct user_fpregs_struct_t;
__NR_##syscall; \
})
extern bool __compel_host_supports_gcs(void);
#define compel_host_supports_gcs __compel_host_supports_gcs
struct parasite_ctl;
extern int __parasite_setup_shstk(struct parasite_ctl *ctl,
user_fpregs_struct_t *ext_regs);
#define parasite_setup_shstk __parasite_setup_shstk
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */

View file

@ -10,20 +10,11 @@
/* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */
#define FPSIMD_MAGIC 0x46508001
#define GCS_MAGIC 0x47435300
typedef struct fpsimd_context fpu_state_t;
struct gcs_context {
struct _aarch64_ctx head;
__u64 gcspr;
__u64 features_enabled;
__u64 reserved;
};
struct aux_context {
struct fpsimd_context fpsimd;
struct gcs_context gcs;
/* additional context to be added before "end" */
struct _aarch64_ctx end;
};
@ -72,7 +63,6 @@ struct cr_sigcontext {
#define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
#define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
#define RT_SIGFRAME_GCS(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->gcs)
#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))

View file

@ -2,8 +2,8 @@
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/auxv.h>
#include <asm/ptrace.h>
#include <linux/elf.h>
#include <compel/plugins/std/syscall-codes.h>
#include "common/page.h"
@ -13,8 +13,6 @@
#include "infect.h"
#include "infect-priv.h"
#include "asm/breakpoints.h"
#include "asm/gcs-types.h"
#include <linux/prctl.h>
unsigned __page_size = 0;
unsigned __page_shift = 0;
@ -35,54 +33,24 @@ static inline void __always_unused __check_code_syscall(void)
BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
}
bool __compel_host_supports_gcs(void)
{
unsigned long hwcap = getauxval(AT_HWCAP);
return (hwcap & HWCAP_GCS) != 0;
}
static bool __compel_gcs_enabled(struct cr_user_gcs *gcs)
{
if (!compel_host_supports_gcs())
return false;
return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
}
int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
struct gcs_context *gcs = RT_SIGFRAME_GCS(sigframe);
memcpy(sigframe->uc.uc_mcontext.regs, regs->regs, sizeof(regs->regs));
pr_debug("sigreturn_prep_regs_plain: sp %lx pc %lx\n", (long)regs->sp, (long)regs->pc);
sigframe->uc.uc_mcontext.sp = regs->sp;
sigframe->uc.uc_mcontext.pc = regs->pc;
sigframe->uc.uc_mcontext.pstate = regs->pstate;
memcpy(fpsimd->vregs, fpregs->fpstate.vregs, 32 * sizeof(__uint128_t));
memcpy(fpsimd->vregs, fpregs->vregs, 32 * sizeof(__uint128_t));
fpsimd->fpsr = fpregs->fpstate.fpsr;
fpsimd->fpcr = fpregs->fpstate.fpcr;
fpsimd->fpsr = fpregs->fpsr;
fpsimd->fpcr = fpregs->fpcr;
fpsimd->head.magic = FPSIMD_MAGIC;
fpsimd->head.size = sizeof(*fpsimd);
if (__compel_gcs_enabled(&fpregs->gcs)) {
gcs->head.magic = GCS_MAGIC;
gcs->head.size = sizeof(*gcs);
gcs->reserved = 0;
gcs->gcspr = fpregs->gcs.gcspr_el0 - 8;
gcs->features_enabled = fpregs->gcs.features_enabled;
pr_debug("sigframe gcspr=%llx features_enabled=%llx\n", fpregs->gcs.gcspr_el0 - 8, fpregs->gcs.features_enabled);
} else {
pr_debug("sigframe gcspr=[disabled]\n");
memset(gcs, 0, sizeof(*gcs));
}
return 0;
}
@ -91,7 +59,7 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr
return 0;
}
int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd, save_regs_t save,
void *arg, __maybe_unused unsigned long flags)
{
struct iovec iov;
@ -106,28 +74,14 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
goto err;
}
iov.iov_base = &ext_regs->fpstate;
iov.iov_len = sizeof(ext_regs->fpstate);
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
pr_perror("Failed to obtain FPU registers for %d", pid);
goto err;
}
memset(&ext_regs->gcs, 0, sizeof(ext_regs->gcs));
iov.iov_base = &ext_regs->gcs;
iov.iov_len = sizeof(ext_regs->gcs);
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &iov) == 0) {
pr_info("gcs: GCSPR_EL0 for %d: 0x%llx, features: 0x%llx\n",
pid, ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
if (!__compel_gcs_enabled(&ext_regs->gcs))
pr_info("gcs: GCS is NOT enabled\n");
} else {
pr_info("gcs: GCS state not available for %d\n", pid);
}
ret = save(pid, arg, regs, ext_regs);
ret = save(pid, arg, regs, fpsimd);
err:
return ret;
}
@ -136,44 +90,14 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
{
struct iovec iov;
struct cr_user_gcs gcs;
struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
pr_info("Restoring GP/FPU registers for %d\n", pid);
iov.iov_base = &ext_regs->fpstate;
iov.iov_len = sizeof(ext_regs->fpstate);
iov.iov_base = ext_regs;
iov.iov_len = sizeof(*ext_regs);
if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
pr_perror("Failed to set FPU registers for %d", pid);
return -1;
}
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
pr_warn("gcs: Failed to get GCS for %d\n", pid);
} else {
ext_regs->gcs = gcs;
compel_set_task_gcs_regs(pid, ext_regs);
}
return 0;
}
int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
{
struct iovec iov;
pr_info("gcs: restoring GCS registers for %d\n", pid);
pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n",
ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
iov.iov_base = &ext_regs->gcs;
iov.iov_len = sizeof(ext_regs->gcs);
if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) {
pr_perror("gcs: Failed to set GCS registers for %d", pid);
return -1;
}
return 0;
}
@ -362,68 +286,3 @@ int ptrace_flush_breakpoints(pid_t pid)
return 0;
}
int inject_gcs_cap_token(struct parasite_ctl *ctl, pid_t pid, struct cr_user_gcs *gcs)
{
struct iovec gcs_iov = { .iov_base = gcs, .iov_len = sizeof(*gcs) };
uint64_t token_addr = gcs->gcspr_el0 - 8;
uint64_t sigtramp_addr = gcs->gcspr_el0 - 16;
uint64_t cap_token = ALIGN_DOWN(GCS_SIGNAL_CAP(token_addr), 8);
unsigned long restorer_addr;
pr_info("gcs: (setup) CAP token: 0x%lx at addr: 0x%lx\n", cap_token, token_addr);
/* Inject capability token at gcspr_el0 - 8 */
if (ptrace(PTRACE_POKEDATA, pid, (void *)token_addr, cap_token)) {
pr_perror("gcs: (setup) Inject GCS cap token failed");
return -1;
}
/* Inject restorer trampoline address (gcspr_el0 - 16) */
restorer_addr = ctl->parasite_ip;
if (ptrace(PTRACE_POKEDATA, pid, (void *)sigtramp_addr, restorer_addr)) {
pr_perror("gcs: (setup) Inject GCS restorer failed");
return -1;
}
/* Update GCSPR_EL0 */
gcs->gcspr_el0 = token_addr;
if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &gcs_iov)) {
pr_perror("gcs: PTRACE_SETREGS FAILED");
return -1;
}
pr_debug("gcs: parasite_ip=%#lx sp=%#llx gcspr_el0=%#llx\n",
ctl->parasite_ip, ctl->orig.regs.sp, gcs->gcspr_el0);
return 0;
}
int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs)
{
struct cr_user_gcs gcs;
struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
pid_t pid = ctl->rpid;
if(!__compel_host_supports_gcs())
return 0;
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) != 0) {
pr_perror("GCS state not available for %d", pid);
return -1;
}
if (!__compel_gcs_enabled(&gcs))
return 0;
if (inject_gcs_cap_token(ctl, pid, &gcs)) {
pr_perror("Failed to inject GCS cap token for %d", pid);
return -1;
}
pr_info("gcs: GCS enabled for %d\n", pid);
return 0;
}

View file

@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer
timer_gettime 108 259 (int timer_id, const struct itimerspec *setting)
timer_getoverrun 109 260 (int timer_id)
timer_delete 111 261 (kernel_timer_t timer_id)
clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp)
clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp)
exit_group 94 248 (int error_code)
set_robust_list 99 338 (struct robust_list_head *head, size_t len)
get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
@ -124,4 +124,3 @@ openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
membarrier 283 389 (int cmd, unsigned int flags, int cpu_id)
map_shadow_stack 453 ! (unsigned long addr, unsigned long size, unsigned int flags)

View file

@ -46,7 +46,7 @@ __NR_sys_timer_gettime 108 sys_timer_gettime (int timer_id, const struct itimer
__NR_sys_timer_getoverrun 109 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_settime 110 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
__NR_sys_timer_delete 111 sys_timer_delete (kernel_timer_t timer_id)
__NR_clock_gettime 113 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_clock_gettime 113 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
__NR_sched_setscheduler 119 sys_sched_setscheduler (int pid, int policy, struct sched_param *p)
__NR_restart_syscall 128 sys_restart_syscall (void)
__NR_kill 129 sys_kill (long pid, int sig)

View file

@ -84,7 +84,7 @@ __NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int fl
__NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id)
__NR_clock_gettime 5222 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_clock_gettime 5222 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
__NR_exit_group 5205 sys_exit_group (int error_code)
__NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr)
__NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode)

View file

@ -82,7 +82,7 @@ __NR_sys_timer_settime 241 sys_timer_settime (kernel_timer_t timer_id, int flag
__NR_sys_timer_gettime 242 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 243 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 244 sys_timer_delete (kernel_timer_t timer_id)
__NR_clock_gettime 246 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_clock_gettime 246 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
__NR_exit_group 234 sys_exit_group (int error_code)
__NR_waitid 272 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
__NR_set_robust_list 300 sys_set_robust_list (struct robust_list_head *head, size_t len)

View file

@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer
timer_gettime 108 259 (int timer_id, const struct itimerspec *setting)
timer_getoverrun 109 260 (int timer_id)
timer_delete 111 261 (kernel_timer_t timer_id)
clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp)
clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp)
exit_group 94 248 (int error_code)
set_robust_list 99 338 (struct robust_list_head *head, size_t len)
get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)

View file

@ -82,7 +82,7 @@ __NR_sys_timer_settime 255 sys_timer_settime (kernel_timer_t timer_id, int flag
__NR_sys_timer_gettime 256 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 257 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 258 sys_timer_delete (kernel_timer_t timer_id)
__NR_clock_gettime 260 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_clock_gettime 260 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
__NR_exit_group 248 sys_exit_group (int error_code)
__NR_waitid 281 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
__NR_set_robust_list 304 sys_set_robust_list (struct robust_list_head *head, size_t len)

View file

@ -85,7 +85,7 @@ __NR_sys_timer_settime 223 sys_timer_settime (kernel_timer_t timer_id, int fla
__NR_sys_timer_gettime 224 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 225 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 226 sys_timer_delete (kernel_timer_t timer_id)
__NR_clock_gettime 228 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_clock_gettime 228 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
__NR_exit_group 231 sys_exit_group (int error_code)
__NR_openat 257 sys_openat (int dfd, const char *filename, int flags, int mode)
__NR_waitid 247 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)

View file

@ -761,7 +761,7 @@ bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
return false;
}
int parasite_setup_shstk(struct parasite_ctl *ctl, __maybe_unused user_fpregs_struct_t *ext_regs)
int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs)
{
pid_t pid = ctl->rpid;
unsigned long sa_restorer = ctl->parasite_ip;

View file

@ -72,7 +72,6 @@ extern bool arch_can_dump_task(struct parasite_ctl *ctl);
extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
void *arg, unsigned long flags);
extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s);
extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs,
user_fpregs_struct_t *fpregs);

View file

@ -192,14 +192,6 @@ void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v);
extern void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack);
#ifndef compel_host_supports_gcs
static inline bool compel_host_supports_gcs(void)
{
return false;
}
#define compel_host_supports_gcs
#endif
#ifndef compel_shstk_enabled
static inline bool compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
{

View file

@ -3,11 +3,6 @@ CFLAGS ?= -O2 -g -Wall -Werror
COMPEL := ../../../compel/compel-host
ifeq ($(GCS_ENABLE),1)
CFLAGS += -mbranch-protection=standard -DGCS_TEST_ENABLE=1
LDFLAGS += -z experimental-gcs=check
endif
all: victim spy
run:
@ -22,7 +17,7 @@ clean:
rm -f parasite.o
victim: victim.c
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
$(CC) $(CFLAGS) -o $@ $^
spy: spy.c parasite.h
$(CC) $(CFLAGS) $(shell $(COMPEL) includes) -o $@ $< $(shell $(COMPEL) --static libs)

View file

@ -112,9 +112,6 @@ int main(int argc, char **argv)
return -1;
}
#ifdef GCS_TEST_ENABLE
setenv("GLIBC_TUNABLES", "glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2", 1);
#endif
pid = vfork();
if (pid == 0) {
close(p_in[1]);

View file

@ -13,7 +13,6 @@ fi
build-essential \
gdb \
git-core \
iproute2 \
iptables \
kmod \
libaio-dev \

View file

@ -6,4 +6,3 @@ obj-y += cpu.o
obj-y += crtools.o
obj-y += sigframe.o
obj-y += bitops.o
obj-y += gcs.o

View file

@ -12,7 +12,6 @@
#include "common/compiler.h"
#include <compel/ptrace.h>
#include "asm/dump.h"
#include "asm/gcs-types.h"
#include "protobuf.h"
#include "images/core.pb-c.h"
#include "images/creds.pb-c.h"
@ -23,7 +22,6 @@
#include "restorer.h"
#include "compel/infect.h"
#include "pstree.h"
#include <stdbool.h>
/*
* cr_user_pac_* are a copy of the corresponding uapi structs
@ -148,11 +146,6 @@ static int save_pac_keys(int pid, CoreEntry *core)
int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd)
{
int i;
struct cr_user_gcs gcs_live;
struct iovec gcs_iov = {
.iov_base = &gcs_live,
.iov_len = sizeof(gcs_live),
};
CoreEntry *core = x;
// Save the Aarch64 CPU state
@ -164,25 +157,14 @@ int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_str
// Save the FP/SIMD state
for (i = 0; i < 32; ++i) {
core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->fpstate.vregs[i];
core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->fpstate.vregs[i] >> 64;
core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->vregs[i];
core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->vregs[i] >> 64;
}
assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpsr);
assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpcr);
assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpsr);
assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpcr);
if (save_pac_keys(pid, core))
return -1;
/* Save the GCS state */
if (compel_host_supports_gcs()) {
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
pr_perror("Failed to get GCS for %d", pid);
return -1;
}
core->ti_aarch64->gcs->gcspr_el0 = gcs_live.gcspr_el0;
core->ti_aarch64->gcs->features_enabled = gcs_live.features_enabled;
}
return 0;
}
@ -191,7 +173,6 @@ int arch_alloc_thread_info(CoreEntry *core)
ThreadInfoAarch64 *ti_aarch64;
UserAarch64RegsEntry *gpregs;
UserAarch64FpsimdContextEntry *fpsimd;
UserAarch64GcsEntry *gcs;
ti_aarch64 = xmalloc(sizeof(*ti_aarch64));
if (!ti_aarch64)
@ -221,15 +202,6 @@ int arch_alloc_thread_info(CoreEntry *core)
if (!fpsimd->vregs)
goto err;
/* Allocate & init GCS */
if (compel_host_supports_gcs()) {
gcs = xmalloc(sizeof(*gcs));
if (!gcs)
goto err;
user_aarch64_gcs_entry__init(gcs);
ti_aarch64->gcs = gcs;
}
return 0;
err:
return -1;
@ -259,7 +231,6 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
{
int i;
struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
struct gcs_context *gcs;
if (core->ti_aarch64->fpsimd->n_vregs != 64)
return 1;
@ -273,18 +244,6 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
fpsimd->head.magic = FPSIMD_MAGIC;
fpsimd->head.size = sizeof(*fpsimd);
if (compel_host_supports_gcs()) {
gcs = RT_SIGFRAME_GCS(sigframe);
pr_debug("sigframe gcspr %llx enabled %llx\n", gcs->gcspr, gcs->features_enabled);
gcs->head.magic = GCS_MAGIC;
gcs->head.size = sizeof(*gcs);
gcs->reserved = 0;
gcs->gcspr = core->ti_aarch64->gcs->gcspr_el0 - 8;
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
}
return 0;
}

View file

@ -1,157 +0,0 @@
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <common/list.h>
#include <compel/cpu.h>
#include "asm/gcs-types.h"
#include "pstree.h"
#include "restorer.h"
#include "rst-malloc.h"
#include "vma.h"
#include <sys/auxv.h>
#include <stdbool.h>
static bool task_has_gcs_enabled(UserAarch64GcsEntry *gcs)
{
return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
}
static bool host_supports_gcs(void)
{
unsigned long hwcap = getauxval(AT_HWCAP);
return (hwcap & HWCAP_GCS) != 0;
}
static bool task_needs_gcs(struct pstree_item *item, CoreEntry *core)
{
UserAarch64GcsEntry *gcs;
if (!task_alive(item))
return false;
gcs = core->ti_aarch64->gcs;
if (task_has_gcs_enabled(gcs)) {
if (!host_supports_gcs()) {
pr_warn_once("Restoring task with GCS on non-GCS host\n");
return false;
}
pr_info("Restoring task with GCS\n");
return true;
}
pr_info("Restoring a task without GCS\n");
return false;
}
static int gcs_prepare_task(struct vm_area_list *vmas,
struct rst_shstk_info *gcs)
{
struct vma_area *vma;
list_for_each_entry(vma, &vmas->h, list) {
if (vma_area_is(vma, VMA_AREA_SHSTK) &&
in_vma_area(vma, gcs->gcspr_el0)) {
unsigned long premapped_addr = vma->premmaped_addr;
unsigned long size = vma_area_len(vma);
gcs->vma_start = vma->e->start;
gcs->vma_size = size;
gcs->premapped_addr = premapped_addr;
return 0;
}
}
pr_err("Unable to find a shadow stack vma: %lx\n", gcs->gcspr_el0);
return -1;
}
int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta)
{
int i;
struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
struct vm_area_list *vmas = &rsti(item)->vmas;
struct rst_shstk_info *gcs = &ta->shstk;
if (!task_needs_gcs(item, core))
return 0;
gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
if (gcs_prepare_task(vmas, gcs)) {
pr_err("gcs: failed to prepare shadow stack memory\n");
return -1;
}
for (i = 0; i < item->nr_threads; i++) {
struct thread_restore_args *thread_args = &args_array[i];
core = item->core[i];
gcs = &thread_args->shstk;
gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
if (gcs_prepare_task(vmas, gcs)) {
pr_err("gcs: failed to prepare GCS memory\n");
return -1;
}
}
return 0;
}
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg)
{
int fret;
unsigned long flags = PR_SHADOW_STACK_ENABLE |
PR_SHADOW_STACK_PUSH |
PR_SHADOW_STACK_WRITE;
long ret, x1_after, x8_after;
/* If task doesn't need GCS, just call func */
if (!task_needs_gcs(item, core)) {
return func(arg);
}
pr_debug("gcs: GCS enable SVC about to fire: x8=%d x0=%d x1=0x%lx\n",
__NR_prctl, PR_SET_SHADOW_STACK_STATUS, flags);
asm volatile(
"mov x0, %3\n" // x0 = PR_SET_SHADOW_STACK_STATUS (75)
"mov x1, %4\n" // x1 = flags
"mov x2, xzr\n" // x2 = 0
"mov x3, xzr\n" // x3 = 0
"mov x4, xzr\n" // x4 = 0
"mov x8, %5\n" // x8 = __NR_prctl (167)
"svc #0\n" // Invoke syscall
"mov %0, x0\n" // Capture return value
"mov %1, x1\n" // Capture x1 after
"mov %2, x8\n" // Capture x8 after
: "=r"(ret), "=r"(x1_after), "=r"(x8_after)
: "i"(PR_SET_SHADOW_STACK_STATUS), // x0 - %3rd
"r"(flags), // x1 - %4th
"i"(__NR_prctl) // x8 - %5th
: "x0", "x1", "x2", "x3", "x4", "x8", "memory", "cc");
pr_info("gcs: after SVC: ret=%ld x1=%ld x8=%ld\n", ret, x1_after, x8_after);
if (ret != 0) {
int err = errno;
pr_err("gcs: failed to enable GCS: ret=%ld errno=%d (%s)\n", ret, err, strerror(err));
return -1;
}
fret = func(arg);
exit(fret);
return -1;
}

View file

@ -1,196 +0,0 @@
#ifndef __CR_ASM_GCS_H__
#define __CR_ASM_GCS_H__
#include <asm/gcs-types.h>
struct rst_shstk_info {
unsigned long vma_start; /* start of GCS VMA */
unsigned long vma_size; /* size of GCS VMA */
unsigned long premapped_addr; /* premapped buffer */
unsigned long tmp_gcs; /* temp area for GCS if needed */
u64 gcspr_el0; /* GCS pointer */
u64 features_enabled; /* GCS flags */
};
#define rst_shstk_info rst_shstk_info
struct task_restore_args;
struct pstree_item;
int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_gcs_prepare
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);
#define arch_shstk_trampoline arch_shstk_trampoline
static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *gcs, void *ptr)
{
gcs->tmp_gcs = (long unsigned)ptr;
}
#define shstk_set_restorer_stack shstk_set_restorer_stack
static always_inline long shstk_restorer_stack_size(void)
{
return PAGE_SIZE;
}
#define shstk_restorer_stack_size shstk_restorer_stack_size
#ifdef CR_NOGLIBC
#include <compel/plugins/std/syscall.h>
#include <compel/cpu.h>
#include "vma.h"
static inline unsigned long gcs_map(unsigned long addr, unsigned long size, unsigned int flags)
{
long gcspr = sys_map_shadow_stack(addr, size, flags);
pr_info("gcs: syscall: map_shadow_stack at=%lx size=%ld\n", addr, size);
if (gcspr < 0) {
pr_err("gcs: failed to map GCS at %lx: %ld\n", addr, gcspr);
return -1;
}
if (addr && gcspr != addr) {
pr_err("gcs: address mismatch: need %lx, got %lx\n", addr, gcspr);
return -1;
}
pr_info("gcs: mmapped GCS at %lx\n", gcspr);
return gcspr;
}
/* clang-format off */
static always_inline void gcsss1(unsigned long *Xt)
{
asm volatile (
"sys #3, C7, C7, #2, %0\n"
:
: "rZ" (Xt)
: "memory");
}
static always_inline unsigned long *gcsss2(void)
{
unsigned long *Xt;
asm volatile (
"SYSL %0, #3, C7, C7, #3\n"
: "=r" (Xt)
:
: "memory");
return Xt;
}
static inline void gcsstr(unsigned long addr, unsigned long val)
{
asm volatile(
"mov x0, %0\n"
"mov x1, %1\n"
".inst 0xd91f1c01\n" // GCSSTR x1, [x0]
"mov x0, #0\n"
:
: "r"(addr), "r"(val)
: "x0", "x1", "memory");
}
/* clang-format on */
static always_inline int gcs_restore(struct rst_shstk_info *gcs)
{
unsigned long gcspr, val;
if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
return 0;
}
gcspr = gcs->gcspr_el0 - 8;
val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8);
pr_debug("gcs: [0] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
gcsstr(gcspr, val);
val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8) | GCS_CAP_VALID_TOKEN;
gcspr -= 8;
pr_debug("gcs: [1] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
gcsstr(gcspr, val);
pr_debug("gcs: about to switch stacks via GCSSS1 to: %lx\n", gcspr);
gcsss1((unsigned long *)gcspr);
return 0;
}
#define arch_shstk_restore gcs_restore
static always_inline int gcs_vma_restore(VmaEntry *vma_entry)
{
unsigned long shstk, i, ret;
unsigned long *gcs_data = (void *)vma_premmaped_start(vma_entry);
unsigned long vma_size = vma_entry_len(vma_entry);
shstk = gcs_map(0, vma_size, SHADOW_STACK_SET_TOKEN);
if (shstk < 0) {
pr_err("Failed to map shadow stack at %lx: %ld\n", shstk, shstk);
}
/* restore shadow stack contents */
for (i = 0; i < vma_size / 8; i++)
gcsstr(shstk + i * 8, gcs_data[i]);
pr_debug("unmap %lx %ld\n", (unsigned long)gcs_data, vma_size);
ret = sys_munmap(gcs_data, vma_size);
if (ret < 0) {
pr_err("Failed to unmap premmaped shadow stack\n");
return ret;
}
vma_premmaped_start(vma_entry) = shstk;
return 0;
}
#define shstk_vma_restore gcs_vma_restore
static always_inline int gcs_switch_to_restorer(struct rst_shstk_info *gcs)
{
int ret;
unsigned long *ssp;
unsigned long addr;
unsigned long gcspr;
if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
return 0;
}
pr_debug("gcs->premapped_addr + gcs->vma_size = %lx\n", gcs->premapped_addr + gcs->vma_size);
pr_debug("gcs->tmp_gcs = %lx\n", gcs->tmp_gcs);
addr = gcs->tmp_gcs;
if (addr % PAGE_SIZE != 0) {
pr_err("gcs: 0x%lx not page-aligned to size 0x%lx\n", addr, PAGE_SIZE);
return -1;
}
ret = sys_munmap((void *)addr, PAGE_SIZE);
if (ret < 0) {
pr_err("gcs: Failed to unmap aarea for dumpee GCS VMAs\n");
return -1;
}
gcspr = gcs_map(addr, PAGE_SIZE, SHADOW_STACK_SET_TOKEN);
if (gcspr == -1) {
pr_err("gcs: failed to gcs_map(%lx, %lx)\n", (unsigned long)addr, PAGE_SIZE);
return -1;
}
ssp = (unsigned long *)(addr + PAGE_SIZE - 8);
gcsss1(ssp);
return 0;
}
#define arch_shstk_switch_to_restorer gcs_switch_to_restorer
#endif /* CR_NOGLIBC */
#endif /* __CR_ASM_GCS_H__ */

View file

@ -5,7 +5,6 @@
#include <sys/ucontext.h>
#include "asm/types.h"
#include "asm/gcs.h"
#include "images/core.pb-c.h"
#include <compel/asm/sigframe.h>

View file

@ -2138,8 +2138,8 @@ int cr_dump_tasks(pid_t pid)
InventoryEntry he = INVENTORY_ENTRY__INIT;
InventoryEntry *parent_ie = NULL;
struct pstree_item *item;
int ret;
int exit_code = -1;
int pre_dump_ret = 0;
int ret = -1;
kerndat_warn_about_madv_guards();
@ -2159,9 +2159,9 @@ int cr_dump_tasks(pid_t pid)
goto err;
root_item->pid->real = pid;
ret = run_scripts(ACT_PRE_DUMP);
if (ret != 0) {
pr_err("Pre dump script failed with %d!\n", ret);
pre_dump_ret = run_scripts(ACT_PRE_DUMP);
if (pre_dump_ret != 0) {
pr_err("Pre dump script failed with %d!\n", pre_dump_ret);
goto err;
}
if (init_stats(DUMP_STATS))
@ -2247,10 +2247,6 @@ int cr_dump_tasks(pid_t pid)
goto err;
}
ret = run_plugins(DUMP_DEVICES_LATE, pid);
if (ret && ret != -ENOTSUP)
goto err;
if (parent_ie) {
inventory_entry__free_unpacked(parent_ie, NULL);
parent_ie = NULL;
@ -2287,32 +2283,39 @@ int cr_dump_tasks(pid_t pid)
* ipc shared memory, but an ipc namespace is dumped in a child
* process.
*/
if (cr_dump_shmem())
ret = cr_dump_shmem();
if (ret)
goto err;
if (root_ns_mask) {
if (dump_namespaces(root_item, root_ns_mask))
ret = dump_namespaces(root_item, root_ns_mask);
if (ret)
goto err;
}
if ((root_ns_mask & CLONE_NEWTIME) == 0) {
if (dump_time_ns(0))
ret = dump_time_ns(0);
if (ret)
goto err;
}
if (dump_aa_namespaces() < 0)
goto err;
if (dump_cgroups())
ret = dump_cgroups();
if (ret)
goto err;
if (fix_external_unix_sockets())
ret = fix_external_unix_sockets();
if (ret)
goto err;
if (tty_post_actions())
ret = tty_post_actions();
if (ret)
goto err;
if (inventory_save_uptime(&he))
ret = inventory_save_uptime(&he);
if (ret)
goto err;
he.has_pre_dump_mode = false;
@ -2321,10 +2324,12 @@ int cr_dump_tasks(pid_t pid)
he.allow_uprobes = true;
}
exit_code = write_img_inventory(&he);
ret = write_img_inventory(&he);
if (ret)
goto err;
err:
if (parent_ie)
inventory_entry__free_unpacked(parent_ie, NULL);
return cr_dump_finish(exit_code);
return cr_dump_finish(ret);
}

View file

@ -1238,7 +1238,7 @@ static inline int fork_with_pid(struct pstree_item *item)
pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
}
arch_shstk_unlock(item, ca.core, ret);
arch_shstk_unlock(item, ca.core, pid);
err_unlock:
if (!(ca.clone_flags & CLONE_NEWPID))
@ -2440,7 +2440,6 @@ static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_he
end_vma.e = &end_e;
end_e.start = end_e.end = kdat.task_size;
INIT_LIST_HEAD(&end_vma.list);
s_vma = list_first_entry(self_vma_list, struct vma_area, list);
t_vma = list_first_entry(tgt_vma_list, struct vma_area, list);

View file

@ -503,8 +503,8 @@ usage:
" Inherit file descriptors, treating fd NUM as being\n"
" already opened via an existing RES, which can be:\n"
" tty[rdev:dev]\n"
" pipe:[inode]\n"
" socket:[inode]\n"
" pipe[inode]\n"
" socket[inode]\n"
" file[mnt_id:inode]\n"
" /memfd:name\n"
" path/to/file\n"

View file

@ -45,11 +45,10 @@ static int open_fd(struct file_desc *d, int *new_fd)
{
struct ext_file_info *xfi;
int fd;
bool retry_needed;
xfi = container_of(d, struct ext_file_info, d);
fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed);
fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id);
if (fd < 0) {
pr_err("Unable to restore %#x\n", xfi->xfe->id);
return -1;
@ -58,11 +57,8 @@ static int open_fd(struct file_desc *d, int *new_fd)
if (restore_fown(fd, xfi->xfe->fown))
return -1;
if (!retry_needed)
*new_fd = fd;
else
*new_fd = -1;
return retry_needed;
*new_fd = fd;
return 0;
}
static struct file_desc_ops ext_desc_ops = {

View file

@ -62,10 +62,6 @@ enum {
CR_PLUGIN_HOOK__POST_FORKING = 12,
CR_PLUGIN_HOOK__RESTORE_INIT = 13,
CR_PLUGIN_HOOK__DUMP_DEVICES_LATE = 14,
CR_PLUGIN_HOOK__MAX
};
@ -74,7 +70,7 @@ enum {
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind);
@ -85,8 +81,6 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__POST_FORKING, void);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_INIT, void);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, int id);
enum {
CR_PLUGIN_STAGE__DUMP,

View file

@ -2116,7 +2116,6 @@ int kerndat_init(void)
}
if (!ret && kerndat_has_timer_cr_ids()) {
pr_err("kerndat_has_timer_cr_ids has failed when initializing kerndat.\n");
ret = -1;
}
if (!ret && kerndat_breakpoints()) {
pr_err("kerndat_breakpoints has failed when initializing kerndat.\n");

View file

@ -1363,19 +1363,13 @@ __visible void __export_unmap(void)
sys_munmap(bootstrap_start, bootstrap_len - vdso_rt_size);
}
static int unregister_libc_rseq(struct rst_rseq_param *rseq)
static void unregister_libc_rseq(struct rst_rseq_param *rseq)
{
long ret;
if (!rseq->rseq_abi_pointer)
return 0;
return;
ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature);
if (ret) {
pr_err("Failed to unregister libc rseq %ld\n", ret);
return -1;
}
return 0;
/* can't fail if rseq is registered */
sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature);
}
/*
@ -1809,8 +1803,7 @@ __visible long __export_restore_task(struct task_restore_args *args)
* for instance once the kernel will want to update (struct rseq).cpu_id field:
* https://github.com/torvalds/linux/blob/ce522ba9ef7e/kernel/rseq.c#L89
*/
if (unregister_libc_rseq(&args->libc_rseq))
goto core_restore_end;
unregister_libc_rseq(&args->libc_rseq);
if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, bootstrap_start, bootstrap_len,
args->task_size))
@ -1996,9 +1989,6 @@ __visible long __export_restore_task(struct task_restore_args *args)
for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
if (vma_entry->madv & (1ul << m)) {
if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
continue;
ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m);
if (ret) {
pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) "

View file

@ -60,8 +60,6 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
__assign_hook(POST_FORKING, "cr_plugin_post_forking");
__assign_hook(RESTORE_INIT, "cr_plugin_restore_init");
__assign_hook(DUMP_DEVICES_LATE, "cr_plugin_dump_devices_late");
#undef __assign_hook
@ -259,16 +257,8 @@ int cr_plugin_init(int stage)
goto err;
}
if (stage == CR_PLUGIN_STAGE__RESTORE) {
int ret;
if (check_inventory_plugins())
goto err;
ret = run_plugins(RESTORE_INIT);
if (ret < 0 && ret != -ENOTSUP)
goto err;
}
if (stage == CR_PLUGIN_STAGE__RESTORE && check_inventory_plugins())
goto err;
exit_code = 0;
err:

View file

@ -1477,7 +1477,7 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
goto err;
new->mountpoint[0] = '.';
ret = sscanf(str, "%i %i %u:%u %ms %4094s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root,
ret = sscanf(str, "%i %i %u:%u %ms %s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root,
new->mountpoint + 1, &opt, &n);
if (ret != 7)
goto err;
@ -2302,10 +2302,10 @@ static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked)
char fl_flag[10], fl_type[15], fl_option[10];
if (is_blocked) {
num = sscanf(buf, "%lld: -> %9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option,
num = sscanf(buf, "%lld: -> %s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option,
&fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end);
} else {
num = sscanf(buf, "%lld:%9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option,
num = sscanf(buf, "%lld:%s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option,
&fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end);
}

View file

@ -313,4 +313,4 @@ int clone_service_fd(struct pstree_item *me)
ret = 0;
return ret;
}
}

View file

@ -259,7 +259,7 @@ static int pts_fd_get_index(int fd, const struct fd_parms *p)
{
int index;
const struct fd_link *link = p->link;
const char *pos = strrchr(link->name, '/');
char *pos = strrchr(link->name, '/');
if (!pos || pos == (link->name + link->len - 1)) {
pr_err("Unexpected format on path %s\n", link->name + 1);

View file

@ -222,9 +222,10 @@ int close_safe(int *fd)
if (*fd > -1) {
ret = close(*fd);
if (ret)
pr_perror("Failed closing fd %d", *fd);
*fd = -1;
if (!ret)
*fd = -1;
else
pr_perror("Unable to close fd %d", *fd);
}
return ret;

View file

@ -17,11 +17,6 @@ message user_aarch64_fpsimd_context_entry {
required uint32 fpcr = 3;
}
message user_aarch64_gcs_entry {
required uint64 gcspr_el0 = 1 [(criu).hex = true];
required uint64 features_enabled = 2 [(criu).hex = true];
}
message pac_address_keys {
required uint64 apiakey_lo = 1;
required uint64 apiakey_hi = 2;
@ -50,5 +45,4 @@ message thread_info_aarch64 {
required user_aarch64_regs_entry gpregs = 3[(criu).hex = true];
required user_aarch64_fpsimd_context_entry fpsimd = 4;
optional pac_keys pac_keys = 5;
optional user_aarch64_gcs_entry gcs = 6;
}

View file

@ -242,7 +242,7 @@ class criu:
# process resources from its own if criu is located in a same
# process tree it is trying to dump.
daemon = False
if req.type == rpc.DUMP and (not req.opts.HasField('pid') or req.opts.pid == os.getpid()):
if req.type == rpc.DUMP and not req.opts.HasField('pid'):
daemon = True
try:

View file

@ -154,9 +154,8 @@ flags_maps = {
gen_maps = {
'task_state': {
1: 'Alive',
2: 'Dead',
3: 'Stopped',
6: 'Zombie',
3: 'Zombie',
6: 'Stopped'
},
}

View file

@ -27,8 +27,8 @@ endif
criu-amdgpu.pb-c.c: criu-amdgpu.proto
protoc --proto_path=. --c_out=. criu-amdgpu.proto
amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_dmabuf.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c amdgpu_socket_utils.c
$(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c
$(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
amdgpu_plugin_clean:
$(call msg-clean, $@)

View file

@ -3,8 +3,7 @@ Supporting ROCm with CRIU
_Felix Kuehling <Felix.Kuehling@amd.com>_<br>
_Rajneesh Bardwaj <Rajneesh.Bhardwaj@amd.com>_<br>
_David Yat Sin <David.YatSin@amd.com>_<br>
_Yanning Yang <yangyanning@sjtu.edu.cn>_
_David Yat Sin <David.YatSin@amd.com>_
# Introduction
@ -225,26 +224,6 @@ to resume execution on the GPUs.
*This new plugin is enabled by the new hook `__RESUME_DEVICES_LATE` in our RFC
patch series.*
## Restoring BO content in parallel
Restoring the BO content is an important part in the restore of GPU state and
usually takes a significant amount of time. A possible location for this
procedure is the `cr_plugin_restore_file` hook. However, restoring in this hook
blocks the target process from performing other restore operations, which
hinders further optimization of the restore process.
Therefore, a new plugin hook that runs in the master restore process is
introduced, and it interacts with the `cr_plugin_restore_file` hook to complete
the restore of BO content. Specifically, the target process only needs to send
the relevant BOs to the master restore process, while this new hook handles all
the restore of buffer objects. Through this method, during the restore of the BO
content, the target process can perform other restore operations, thus
accelerating the restore procedure. This is an implementation of the gCROP
method proposed in the ACM SoCC'24 paper: [On-demand and Parallel
Checkpoint/Restore for GPU Applications](https://dl.acm.org/doi/10.1145/3698038.3698510).
*This optimization technique is enabled by the `__POST_FORKING` hook.*
## Other CRIU changes
In addition to the new plugins, we need to make some changes to CRIU itself to

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,197 +0,0 @@
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <linux/limits.h>
#include "common/list.h"
#include "criu-amdgpu.pb-c.h"
#include "xmalloc.h"
#include "criu-log.h"
#include "amdgpu_plugin_drm.h"
#include "amdgpu_plugin_util.h"
#include "amdgpu_plugin_dmabuf.h"
#include "fdstore.h"
#include "util.h"
#include "common/scm.h"
struct dmabuf {
int id;
int dmabuf_fd;
struct list_head node;
};
static LIST_HEAD(dmabuf_list);
/* Return < 0 for error, > 0 for "not a dmabuf" and 0 "is a dmabuf" */
int get_dmabuf_info(int fd, struct stat *st)
{
char path[PATH_MAX];
if (read_fd_link(fd, path, sizeof(path)) < 0)
return -1;
if (strncmp(path, DMABUF_LINK, strlen(DMABUF_LINK)) != 0)
return 1;
return 0;
}
int __amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id)
{
int ret = 0;
char path[PATH_MAX];
size_t len = 0;
unsigned char *buf = NULL;
int gem_handle;
gem_handle = handle_for_shared_bo_fd(dmabuf_fd);
if (gem_handle < 0) {
pr_err("Failed to get handle for dmabuf_fd = %d\n", dmabuf_fd);
return -EAGAIN; /* Retry needed */
}
CriuDmabufNode *node = xmalloc(sizeof(*node));
if (!node) {
pr_err("Failed to allocate memory for dmabuf node\n");
return -ENOMEM;
}
criu_dmabuf_node__init(node);
node->gem_handle = gem_handle;
if (node->gem_handle < 0) {
pr_err("Failed to get handle for dmabuf_fd\n");
xfree(node);
return -EINVAL;
}
/* Serialize metadata to a file */
snprintf(path, sizeof(path), IMG_DMABUF_FILE, id);
len = criu_dmabuf_node__get_packed_size(node);
buf = xmalloc(len);
if (!buf) {
pr_err("Failed to allocate buffer for dmabuf metadata\n");
xfree(node);
return -ENOMEM;
}
criu_dmabuf_node__pack(node, buf);
ret = write_img_file(path, buf, len);
xfree(buf);
xfree(node);
return ret;
}
int amdgpu_plugin_dmabuf_restore(int id)
{
char path[PATH_MAX];
size_t img_size;
FILE *img_fp = NULL;
int ret = 0;
CriuDmabufNode *rd = NULL;
unsigned char *buf = NULL;
int fd_id;
snprintf(path, sizeof(path), IMG_DMABUF_FILE, id);
/* Read serialized metadata */
img_fp = open_img_file(path, false, &img_size);
if (!img_fp) {
pr_err("Failed to open dmabuf metadata file: %s\n", path);
return -EINVAL;
}
pr_debug("dmabuf Image file size:%ld\n", img_size);
buf = xmalloc(img_size);
if (!buf) {
pr_perror("Failed to allocate memory");
return -ENOMEM;
}
ret = read_fp(img_fp, buf, img_size);
if (ret) {
pr_perror("Unable to read from %s", path);
xfree(buf);
return ret;
}
rd = criu_dmabuf_node__unpack(NULL, img_size, buf);
if (rd == NULL) {
pr_perror("Unable to parse the dmabuf message %d", id);
xfree(buf);
fclose(img_fp);
return -1;
}
fclose(img_fp);
/* Match GEM handle with shared_dmabuf list */
fd_id = amdgpu_id_for_handle(rd->gem_handle);
if (fd_id == -1) {
pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle);
return 1;
}
int dmabuf_fd = fdstore_get(fd_id);
if (dmabuf_fd == -1) {
pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle);
return 1; /* Retry needed */
}
pr_info("Restored dmabuf_fd = %d for GEM handle = %d\n", dmabuf_fd, rd->gem_handle);
ret = dmabuf_fd;
pr_info("Successfully restored dmabuf_fd %d\n", dmabuf_fd);
criu_dmabuf_node__free_unpacked(rd, NULL);
xfree(buf);
return ret;
}
int amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id)
{
int ret;
ret = __amdgpu_plugin_dmabuf_dump(dmabuf_fd, id);
if (ret == -EAGAIN) {
struct dmabuf *b = xmalloc(sizeof(*b));
b->id = id;
b->dmabuf_fd = dmabuf_fd;
list_add(&b->node, &dmabuf_list);
return 0;
}
return ret;
}
int try_dump_dmabuf_list()
{
struct dmabuf *b, *t;
list_for_each_entry_safe(b, t, &dmabuf_list, node) {
int ret = __amdgpu_plugin_dmabuf_dump(b->dmabuf_fd, b->id);
if (ret == -EAGAIN)
continue;
if (ret)
return ret;
list_del(&b->node);
xfree(b);
}
return 0;
}
int post_dump_dmabuf_check()
{
if (!list_empty(&dmabuf_list)) {
pr_err("Not all dma buffers have been dumped\n");
return -1;
}
return 0;
}

View file

@ -1,16 +0,0 @@
#ifndef __AMDGPU_PLUGIN_DMABUF_H__
#define __AMDGPU_PLUGIN_DMABUF_H__
#include "amdgpu_plugin_util.h"
#include "criu-amdgpu.pb-c.h"
int amdgpu_plugin_dmabuf_dump(int fd, int id);
int amdgpu_plugin_dmabuf_restore(int id);
int try_dump_dmabuf_list();
int post_dump_dmabuf_check();
int get_dmabuf_info(int fd, struct stat *st);
#endif /* __AMDGPU_PLUGIN_DMABUF_H__ */

View file

@ -19,115 +19,19 @@
#include <dirent.h>
#include "common/list.h"
#include "files.h"
#include "fdstore.h"
#include "criu-amdgpu.pb-c.h"
/* Define __user as empty for kernel headers in user-space */
#define __user
#include "drm.h"
#include <xf86drm.h>
#include <libdrm/amdgpu.h>
#include "xmalloc.h"
#include "amdgpu_drm.h"
#include "criu-log.h"
#include "kfd_ioctl.h"
#include "amdgpu_plugin_drm.h"
#include "amdgpu_plugin_util.h"
#include "amdgpu_plugin_topology.h"
#include "util.h"
#include "common/scm.h"
int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd)
{
uint32_t handle;
int fd = amdgpu_device_get_fd(h_dev);
if (dmabuf_fd == -1) {
return -1;
}
if (drmPrimeFDToHandle(fd, dmabuf_fd, &handle))
return -1;
return handle;
}
int drmIoctl(int fd, unsigned long request, void *arg)
{
int ret, max_retries = 200;
do {
ret = ioctl(fd, request, arg);
} while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN));
if (ret == -1 && errno == EBADF)
/* In case pthread_atfork didn't catch it, this will
* make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
*/
pr_perror("KFD file descriptor not valid in this process");
return ret;
}
static int allocate_bo_entries(CriuRenderNode *e, int num_bos)
{
e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos);
if (!e->bo_entries) {
pr_err("Failed to allocate bo_info\n");
return -ENOMEM;
}
for (int i = 0; i < num_bos; i++) {
DrmBoEntry *entry = xzalloc(sizeof(*entry));
if (!entry) {
pr_err("Failed to allocate botest\n");
return -ENOMEM;
}
drm_bo_entry__init(entry);
e->bo_entries[i] = entry;
e->n_bo_entries++;
}
return 0;
}
static int allocate_vm_entries(DrmBoEntry *e, int num_vms)
{
e->vm_entries = xmalloc(sizeof(DrmVmEntry *) * num_vms);
if (!e->vm_entries) {
pr_err("Failed to allocate bo_info\n");
return -ENOMEM;
}
for (int i = 0; i < num_vms; i++) {
DrmVmEntry *entry = xzalloc(sizeof(*entry));
if (!entry) {
pr_err("Failed to allocate botest\n");
return -ENOMEM;
}
drm_vm_entry__init(entry);
e->vm_entries[i] = entry;
e->n_vm_entries++;
}
return 0;
}
static void free_e(CriuRenderNode *e)
{
for (int i = 0; i < e->n_bo_entries; i++) {
if (e->bo_entries[i])
xfree(e->bo_entries[i]);
}
xfree(e);
}
int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
{
@ -156,257 +60,19 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
return 0;
}
static int restore_bo_contents_drm(int drm_render_minor, CriuRenderNode *rd, int drm_fd, int *dmabufs)
{
size_t image_size = 0, max_bo_size = 0, buffer_size;
struct amdgpu_gpu_info gpu_info = { 0 };
amdgpu_device_handle h_dev;
uint64_t max_copy_size;
uint32_t major, minor;
FILE *bo_contents_fp = NULL;
void *buffer = NULL;
char img_path[40];
int i, ret = 0;
ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev);
if (ret) {
pr_perror("failed to initialize device");
goto exit;
}
plugin_log_msg("libdrm initialized successfully\n");
ret = amdgpu_query_gpu_info(h_dev, &gpu_info);
if (ret) {
pr_perror("failed to query gpuinfo via libdrm");
goto exit;
}
max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
SDMA_LINEAR_COPY_MAX_SIZE - 1;
for (i = 0; i < rd->num_of_bos; i++) {
if (rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
if (rd->bo_entries[i]->size > max_bo_size)
max_bo_size = rd->bo_entries[i]->size;
}
}
buffer_size = max_bo_size;
posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
if (!buffer) {
pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
ret = -ENOMEM;
goto exit;
}
for (i = 0; i < rd->num_of_bos; i++) {
if (!(rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
continue;
if (rd->bo_entries[i]->num_of_vms == 0)
continue;
snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, drm_render_minor, i);
bo_contents_fp = open_img_file(img_path, false, &image_size);
ret = sdma_copy_bo(dmabufs[i], rd->bo_entries[i]->size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
SDMA_OP_VRAM_WRITE, true);
if (ret) {
pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
break;
}
plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i);
if (bo_contents_fp)
fclose(bo_contents_fp);
}
exit:
for (int i = 0; i < rd->num_of_bos; i++) {
if (dmabufs[i] != KFD_INVALID_FD)
close(dmabufs[i]);
}
xfree(buffer);
amdgpu_device_deinitialize(h_dev);
return ret;
}
int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
{
CriuRenderNode *rd = NULL;
CriuRenderNode rd = CRIU_RENDER_NODE__INIT;
struct tp_node *tp_node;
char path[PATH_MAX];
unsigned char *buf;
int minor;
int len;
int ret;
size_t image_size;
struct tp_node *tp_node;
struct drm_amdgpu_gem_list_handles list_handles_args = { 0 };
struct drm_amdgpu_gem_list_handles_entry *list_handles_entries;
int num_bos;
rd = xmalloc(sizeof(*rd));
if (!rd) {
ret = -ENOMEM;
goto exit;
}
criu_render_node__init(rd);
/* Get the topology node of the DRM device */
minor = minor(drm->st_rdev);
rd->drm_render_minor = minor;
rd->id = id;
num_bos = 8;
list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos);
list_handles_args.num_entries = num_bos;
list_handles_args.entries = (uintptr_t)list_handles_entries;
ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args);
if (ret && errno == EINVAL) {
pr_info("This kernel appears not to have AMDGPU_GEM_LIST_HANDLES ioctl. Consider disabling Dmabuf IPC or updating your kernel.\n");
list_handles_args.num_entries = 0;
} else if (ret) {
pr_perror("Failed to call bo info ioctl");
goto exit;
}
if (list_handles_args.num_entries > num_bos) {
num_bos = list_handles_args.num_entries;
xfree(list_handles_entries);
list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos);
list_handles_args.num_entries = num_bos;
list_handles_args.entries = (uintptr_t)list_handles_entries;
ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args);
if (ret) {
pr_perror("Failed to call bo info ioctl");
goto exit;
}
} else {
num_bos = list_handles_args.num_entries;
}
rd->num_of_bos = num_bos;
ret = allocate_bo_entries(rd, num_bos);
if (ret)
goto exit;
for (int i = 0; i < num_bos; i++) {
int num_vm_entries = 8;
struct drm_amdgpu_gem_vm_entry *vm_info_entries;
struct drm_amdgpu_gem_op vm_info_args = { 0 };
DrmBoEntry *boinfo = rd->bo_entries[i];
struct drm_amdgpu_gem_list_handles_entry handle_entry = list_handles_entries[i];
union drm_amdgpu_gem_mmap mmap_args = { 0 };
int dmabuf_fd;
uint32_t major, minor;
amdgpu_device_handle h_dev;
void *buffer = NULL;
char img_path[40];
FILE *bo_contents_fp = NULL;
int device_fd;
boinfo->size = handle_entry.size;
boinfo->alloc_flags = handle_entry.alloc_flags;
boinfo->preferred_domains = handle_entry.preferred_domains;
boinfo->alignment = handle_entry.alignment;
boinfo->handle = handle_entry.gem_handle;
boinfo->is_import = (handle_entry.flags & AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT) || shared_bo_has_exporter(boinfo->handle);
mmap_args.in.handle = boinfo->handle;
if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) {
pr_perror("Error Failed to call mmap ioctl");
ret = -1;
goto exit;
}
boinfo->offset = mmap_args.out.addr_ptr;
vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries);
vm_info_args.handle = handle_entry.gem_handle;
vm_info_args.num_entries = num_vm_entries;
vm_info_args.value = (uintptr_t)vm_info_entries;
vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO;
ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args);
if (ret) {
pr_perror("Failed to call vm info ioctl");
goto exit;
}
if (vm_info_args.num_entries > num_vm_entries) {
num_vm_entries = vm_info_args.num_entries;
xfree(vm_info_entries);
vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries);
vm_info_args.handle = handle_entry.gem_handle;
vm_info_args.num_entries = num_vm_entries;
vm_info_args.value = (uintptr_t)vm_info_entries;
vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO;
ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args);
if (ret) {
pr_perror("Failed to call vm info ioctl");
goto exit;
}
} else {
num_vm_entries = vm_info_args.num_entries;
}
boinfo->num_of_vms = num_vm_entries;
ret = allocate_vm_entries(boinfo, num_vm_entries);
if (ret)
goto exit;
for (int j = 0; j < num_vm_entries; j++) {
DrmVmEntry *vminfo = boinfo->vm_entries[j];
boinfo->addr = vm_info_entries[j].addr;
vminfo->addr = vm_info_entries[j].addr;
vminfo->size = vm_info_entries[j].size;
vminfo->offset = vm_info_entries[j].offset;
vminfo->flags = vm_info_entries[j].flags;
}
ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev);
device_fd = amdgpu_device_get_fd(h_dev);
drmPrimeHandleToFD(device_fd, boinfo->handle, 0, &dmabuf_fd);
snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, rd->drm_render_minor, i);
bo_contents_fp = open_img_file(img_path, true, &image_size);
posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), handle_entry.size);
ret = sdma_copy_bo(dmabuf_fd, handle_entry.size, bo_contents_fp, buffer, handle_entry.size, h_dev, 0x1000,
SDMA_OP_VRAM_READ, false);
if (dmabuf_fd != KFD_INVALID_FD)
close(dmabuf_fd);
if (bo_contents_fp)
fclose(bo_contents_fp);
ret = amdgpu_device_deinitialize(h_dev);
if (ret)
goto exit;
xfree(vm_info_entries);
}
xfree(list_handles_entries);
for (int i = 0; i < num_bos; i++) {
DrmBoEntry *boinfo = rd->bo_entries[i];
ret = record_shared_bo(boinfo->handle, boinfo->is_import);
if (ret)
goto exit;
}
tp_node = sys_get_node_by_render_minor(&src_topology, minor);
if (!tp_node) {
pr_err("Failed to find a device with minor number = %d\n", minor);
@ -414,156 +80,21 @@ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
}
/* Get the GPU_ID of the DRM device */
rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
if (!rd->gpu_id) {
pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id);
rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
if (!rd.gpu_id) {
pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id);
return -ENODEV;
}
len = criu_render_node__get_packed_size(rd);
len = criu_render_node__get_packed_size(&rd);
buf = xmalloc(len);
if (!buf)
return -ENOMEM;
criu_render_node__pack(rd, buf);
criu_render_node__pack(&rd, buf);
snprintf(path, sizeof(path), IMG_DRM_FILE, id);
ret = write_img_file(path, buf, len);
xfree(buf);
exit:
free_e(rd);
return ret;
}
int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd)
{
int ret = 0;
bool retry_needed = false;
uint32_t major, minor;
amdgpu_device_handle h_dev;
int device_fd;
int *dmabufs = xzalloc(sizeof(int) * rd->num_of_bos);
ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev);
if (ret) {
pr_info("Error in init amdgpu device\n");
goto exit;
}
device_fd = amdgpu_device_get_fd(h_dev);
for (int i = 0; i < rd->num_of_bos; i++) {
DrmBoEntry *boinfo = rd->bo_entries[i];
int dmabuf_fd = -1;
uint32_t handle;
struct drm_gem_change_handle change_args = { 0 };
union drm_amdgpu_gem_mmap mmap_args = { 0 };
struct drm_amdgpu_gem_va va_args = { 0 };
int fd_id;
if (work_already_completed(boinfo->handle, rd->drm_render_minor)) {
continue;
} else if (boinfo->handle != -1) {
if (boinfo->is_import) {
fd_id = amdgpu_id_for_handle(boinfo->handle);
if (fd_id == -1) {
retry_needed = true;
continue;
}
dmabuf_fd = fdstore_get(fd_id);
}
}
if (boinfo->is_import) {
drmPrimeFDToHandle(device_fd, dmabuf_fd, &handle);
} else {
union drm_amdgpu_gem_create create_args = { 0 };
create_args.in.bo_size = boinfo->size;
create_args.in.alignment = boinfo->alignment;
create_args.in.domains = boinfo->preferred_domains;
create_args.in.domain_flags = boinfo->alloc_flags;
if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &create_args) == -1) {
pr_perror("Error Failed to call create ioctl");
ret = -1;
goto exit;
}
handle = create_args.out.handle;
drmPrimeHandleToFD(device_fd, handle, 0, &dmabuf_fd);
}
change_args.handle = handle;
change_args.new_handle = boinfo->handle;
if (drmIoctl(fd, DRM_IOCTL_GEM_CHANGE_HANDLE, &change_args) == -1) {
pr_perror("Error Failed to call change ioctl; check if the kernel has DRM_IOCTL_GEM_CHANGE_HANDLE support");
ret = -1;
goto exit;
}
if (!boinfo->is_import)
store_dmabuf_fd(boinfo->handle, dmabuf_fd);
dmabufs[i] = dmabuf_fd;
ret = record_completed_work(boinfo->handle, rd->drm_render_minor);
if (ret)
goto exit;
mmap_args.in.handle = boinfo->handle;
if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) {
pr_perror("Error Failed to call mmap ioctl");
ret = -1;
goto exit;
}
for (int j = 0; j < boinfo->num_of_vms; j++) {
DrmVmEntry *vminfo = boinfo->vm_entries[j];
va_args.handle = boinfo->handle;
va_args.operation = AMDGPU_VA_OP_MAP;
va_args.flags = vminfo->flags;
va_args.va_address = vminfo->addr;
va_args.offset_in_bo = vminfo->offset;
va_args.map_size = vminfo->size;
if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_VA, &va_args) == -1) {
pr_perror("Error Failed to call gem va ioctl");
ret = -1;
goto exit;
}
}
ret = save_vma_updates(boinfo->offset, boinfo->addr, mmap_args.out.addr_ptr, fd);
if (ret < 0)
goto exit;
}
if (ret) {
pr_info("Error in deinit amdgpu device\n");
goto exit;
}
ret = record_completed_work(-1, rd->drm_render_minor);
if (ret)
goto exit;
ret = amdgpu_device_deinitialize(h_dev);
if (rd->num_of_bos > 0) {
ret = restore_bo_contents_drm(rd->drm_render_minor, rd, fd, dmabufs);
if (ret)
goto exit;
}
exit:
if (ret < 0)
return ret;
xfree(dmabufs);
return retry_needed;
}

View file

@ -24,17 +24,5 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm);
*/
int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm);
int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd);
int amdgpu_plugin_drm_unpause_file(int fd);
int amdgpu_id_for_handle(int handle);
int store_dmabuf_fd(int handle, int fd);
int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd);
int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id);
#endif /* __AMDGPU_PLUGIN_DRM_H__ */

View file

@ -45,7 +45,7 @@ bool kfd_capability_check = true;
*/
int fd_next = -1;
int open_drm_render_device(int minor)
static int open_drm_render_device(int minor)
{
char path[128];
int fd, ret_fd;

View file

@ -118,7 +118,6 @@ struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32
struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor);
struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index);
int open_drm_render_device(int minor);
int node_get_drm_render_device(struct tp_node *node);
void sys_close_drm_render_devices(struct tp_system *sys);

View file

@ -37,11 +37,9 @@
#include "amdgpu_drm.h"
#include "amdgpu_plugin_util.h"
#include "amdgpu_plugin_topology.h"
#include "amdgpu_plugin_drm.h"
static LIST_HEAD(dumped_fds);
static LIST_HEAD(shared_bos);
static LIST_HEAD(completed_work);
/* Tracks number of device files that need to be checkpointed */
static int dev_file_cnt = 0;
/* Helper structures to encode device topology of SRC and DEST platforms */
struct tp_system src_topology;
@ -51,145 +49,23 @@ struct tp_system dest_topology;
struct device_maps checkpoint_maps;
struct device_maps restore_maps;
int record_dumped_fd(int fd, bool is_drm)
bool checkpoint_is_complete()
{
int newfd = dup(fd);
if (newfd < 0)
return newfd;
struct dumped_fd *st = malloc(sizeof(struct dumped_fd));
if (!st)
return -1;
st->fd = newfd;
st->is_drm = is_drm;
list_add(&st->l, &dumped_fds);
return 0;
return (dev_file_cnt == 0);
}
struct list_head *get_dumped_fds()
void decrement_checkpoint_count()
{
return &dumped_fds;
dev_file_cnt--;
}
bool shared_bo_has_exporter(int handle)
void init_gpu_count(struct tp_system *topo)
{
struct shared_bo *bo;
if (dev_file_cnt != 0)
return;
if (handle == -1)
return false;
list_for_each_entry(bo, &shared_bos, l) {
if (bo->handle == handle) {
return bo->has_exporter;
}
}
return false;
}
int record_shared_bo(int handle, bool is_imported)
{
struct shared_bo *bo;
if (handle == -1)
return 0;
list_for_each_entry(bo, &shared_bos, l) {
if (bo->handle == handle) {
return 0;
}
}
bo = malloc(sizeof(struct shared_bo));
if (!bo)
return -1;
bo->handle = handle;
bo->has_exporter = !is_imported;
list_add(&bo->l, &shared_bos);
return 0;
}
int handle_for_shared_bo_fd(int fd)
{
struct dumped_fd *df;
int trial_handle;
amdgpu_device_handle h_dev;
uint32_t major, minor;
struct shared_bo *bo;
list_for_each_entry(df, &dumped_fds, l) {
/* see if the gem handle for fd using the hdev for df->fd is the
same as bo->handle. */
if (!df->is_drm) {
continue;
}
if (amdgpu_device_initialize(df->fd, &major, &minor, &h_dev)) {
pr_err("Failed to initialize amdgpu device\n");
continue;
}
trial_handle = get_gem_handle(h_dev, fd);
if (trial_handle < 0)
continue;
list_for_each_entry(bo, &shared_bos, l) {
if (bo->handle == trial_handle)
return trial_handle;
}
amdgpu_device_deinitialize(h_dev);
}
return -1;
}
int record_completed_work(int handle, int id)
{
struct restore_completed_work *work;
work = malloc(sizeof(struct restore_completed_work));
if (!work)
return -1;
work->handle = handle;
work->id = id;
list_add(&work->l, &completed_work);
return 0;
}
bool work_already_completed(int handle, int id)
{
struct restore_completed_work *work;
list_for_each_entry(work, &completed_work, l) {
if (work->handle == handle && work->id == id) {
return true;
}
}
return false;
}
void clear_restore_state()
{
while (!list_empty(&completed_work)) {
struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l);
list_del(&st->l);
free(st);
}
}
void clear_dumped_fds()
{
while (!list_empty(&dumped_fds)) {
struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l);
list_del(&st->l);
close(st->fd);
free(st);
}
/* We add ONE to include checkpointing of KFD device */
dev_file_cnt = 1 + topology_gpu_count(topo);
}
int read_fp(FILE *fp, void *buf, const size_t buf_len)

View file

@ -1,8 +1,6 @@
#ifndef __AMDGPU_PLUGIN_UTIL_H__
#define __AMDGPU_PLUGIN_UTIL_H__
#include <libdrm/amdgpu.h>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif
@ -53,18 +51,14 @@
/* Name of file having serialized data of DRM device */
#define IMG_DRM_FILE "amdgpu-renderD-%d.img"
/* Name of file having serialized data of dmabuf meta */
#define IMG_DMABUF_FILE "amdgpu-dmabuf_%d.img"
/* Name of file having serialized data of DRM device buffer objects (BOs) */
#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%d-%04x.img"
#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%04x.img"
/* Helper macros to Checkpoint and Restore a ROCm file */
#define HSAKMT_SHM_PATH "/dev/shm/hsakmt_shared_mem"
#define HSAKMT_SHM "/hsakmt_shared_mem"
#define HSAKMT_SEM_PATH "/dev/shm/sem.hsakmt_semaphore"
#define HSAKMT_SEM "hsakmt_semaphore"
#define DMABUF_LINK "/dmabuf"
/* Help macros to build sDMA command packets */
#define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0))
@ -79,24 +73,6 @@ enum sdma_op_type {
SDMA_OP_VRAM_WRITE,
};
struct dumped_fd {
struct list_head l;
int fd;
bool is_drm;
};
struct shared_bo {
struct list_head l;
int handle;
bool has_exporter;
};
struct restore_completed_work {
struct list_head l;
int handle;
int id;
};
/* Helper structures to encode device topology of SRC and DEST platforms */
extern struct tp_system src_topology;
extern struct tp_system dest_topology;
@ -121,25 +97,10 @@ int read_file(const char *file_path, void *buf, const size_t buf_len);
int write_img_file(char *path, const void *buf, const size_t buf_len);
FILE *open_img_file(char *path, bool write, size_t *size);
int record_dumped_fd(int fd, bool is_drm);
struct list_head *get_dumped_fds();
void clear_dumped_fds();
bool shared_bo_has_exporter(int handle);
int record_shared_bo(int handle, bool is_imported);
int handle_for_shared_bo_fd(int dmabuf_fd);
int record_completed_work(int handle, int id);
bool work_already_completed(int handle, int id);
void clear_restore_state();
bool checkpoint_is_complete();
void decrement_checkpoint_count();
void init_gpu_count(struct tp_system *topology);
void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list);
int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free);
int serve_out_dmabuf_fd(int handle, int fd);
#endif /* __AMDGPU_PLUGIN_UTIL_H__ */

View file

@ -1,320 +0,0 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include "amdgpu_socket_utils.h"
#include "criu-log.h"
#include "common/scm.h"
#include "fdstore.h"
#include "util-pie.h"
#include "util.h"
int parallel_socket_addr_len;
struct sockaddr_un parallel_socket_addr;
int parallel_socket_id = 0;
static void amdgpu_socket_name_gen(struct sockaddr_un *addr, int *len)
{
addr->sun_family = AF_UNIX;
snprintf(addr->sun_path, UNIX_PATH_MAX, "x/criu-amdgpu-parallel-%s", criu_run_id);
*len = SUN_LEN(addr);
*addr->sun_path = '\0';
}
int install_parallel_sock(void)
{
int ret = 0;
int sock_fd;
sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (sock_fd < 0) {
pr_perror("socket creation failed");
return -1;
}
amdgpu_socket_name_gen(&parallel_socket_addr, &parallel_socket_addr_len);
ret = bind(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
if (ret < 0) {
pr_perror("bind failed");
goto err;
}
ret = listen(sock_fd, SOMAXCONN);
if (ret < 0) {
pr_perror("listen failed");
goto err;
}
parallel_socket_id = fdstore_add(sock_fd);
if (parallel_socket_id < 0) {
ret = -1;
goto err;
}
err:
close(sock_fd);
return ret;
}
void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset,
parallel_restore_cmd *restore_cmd)
{
parallel_restore_entry *restore_entry = &restore_cmd->entries[restore_cmd->cmd_head.entry_num];
restore_entry->gpu_id = gpu_id;
restore_entry->write_id = restore_cmd->cmd_head.fd_write_num;
restore_entry->write_offset = 0;
restore_entry->read_offset = offset;
restore_entry->size = size;
restore_cmd->fds_write[restore_cmd->cmd_head.fd_write_num] = dmabuf_fd;
restore_cmd->cmd_head.entry_num += 1;
restore_cmd->cmd_head.fd_write_num += 1;
}
void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd)
{
restore_cmd->gpu_ids[restore_cmd->cmd_head.gpu_num] = (parallel_gpu_info){ gpu_id, minor };
restore_cmd->cmd_head.gpu_num += 1;
}
static int send_metadata(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send(sock_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
pr_perror("Send parallel restore command head fail");
return -1;
}
return 0;
}
static int send_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) {
pr_perror("Send GPU ids of parallel restore command fail");
return -1;
}
return 0;
}
static int send_cmds(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send(sock_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) {
pr_perror("Send parallel restore command fail");
return -1;
}
return 0;
}
static int send_dmabuf_fds(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send_fds(sock_fd, NULL, 0, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) {
pr_perror("Send dmabuf fds fail");
return -1;
}
return 0;
}
int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
{
int sock_fd;
int ret = 0;
sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (sock_fd < 0) {
pr_perror("Socket creation failed");
return -1;
}
ret = connect(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
if (ret < 0) {
pr_perror("Connect failed");
goto err;
}
ret = send_metadata(sock_fd, restore_cmd);
if (ret) {
goto err;
}
ret = send_gpu_ids(sock_fd, restore_cmd);
if (ret) {
goto err;
}
ret = send_cmds(sock_fd, restore_cmd);
if (ret) {
goto err;
}
ret = send_dmabuf_fds(sock_fd, restore_cmd);
err:
close(sock_fd);
return ret;
}
int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd)
{
restore_cmd->cmd_head.id = id;
restore_cmd->cmd_head.fd_write_num = 0;
restore_cmd->cmd_head.entry_num = 0;
restore_cmd->cmd_head.gpu_num = 0;
restore_cmd->gpu_ids = xzalloc(gpu_num * sizeof(parallel_gpu_info));
if (!restore_cmd->gpu_ids)
return -ENOMEM;
restore_cmd->fds_write = xzalloc(num * sizeof(int));
if (!restore_cmd->fds_write)
return -ENOMEM;
restore_cmd->entries = xzalloc(num * sizeof(parallel_restore_entry));
if (!restore_cmd->entries)
return -ENOMEM;
return 0;
}
void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
{
if (restore_cmd->gpu_ids)
xfree(restore_cmd->gpu_ids);
if (restore_cmd->fds_write)
xfree(restore_cmd->fds_write);
if (restore_cmd->entries)
xfree(restore_cmd->entries);
}
static int init_parallel_restore_cmd_by_head(parallel_restore_cmd *restore_cmd)
{
restore_cmd->gpu_ids = xzalloc(restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info));
if (!restore_cmd->gpu_ids)
return -ENOMEM;
restore_cmd->fds_write = xzalloc(restore_cmd->cmd_head.fd_write_num * sizeof(int));
if (!restore_cmd->fds_write)
return -ENOMEM;
restore_cmd->entries = xzalloc(restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry));
if (!restore_cmd->entries)
return -ENOMEM;
return 0;
}
static int check_quit_cmd(parallel_restore_cmd *restore_cmd)
{
return restore_cmd->cmd_head.fd_write_num == 0;
}
static int recv_metadata(int client_fd, parallel_restore_cmd *restore_cmd)
{
if (recv(client_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
pr_perror("Recv parallel restore command head fail");
return -1;
}
return 0;
}
static int recv_cmds(int client_fd, parallel_restore_cmd *restore_cmd)
{
if (recv(client_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) {
pr_perror("Recv parallel restore command fail");
return -1;
}
return 0;
}
static int recv_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (recv(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) {
pr_perror("Send GPU ids of parallel restore command fail");
return -1;
}
return 0;
}
static int recv_dmabuf_fds(int client_fd, parallel_restore_cmd *restore_cmd)
{
if (recv_fds(client_fd, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) {
pr_perror("Recv dmabuf fds fail");
return -1;
}
return 0;
}
int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
{
int sock_fd, client_fd;
int ret = 0;
sock_fd = fdstore_get(parallel_socket_id);
if (sock_fd < 0)
return -1;
client_fd = accept(sock_fd, NULL, NULL);
if (client_fd < 0) {
ret = client_fd;
goto err_accept;
}
ret = recv_metadata(client_fd, restore_cmd);
if (ret) {
goto err;
}
// Return 1 to quit
if (check_quit_cmd(restore_cmd)) {
ret = 1;
goto err;
}
ret = init_parallel_restore_cmd_by_head(restore_cmd);
if (ret) {
goto err;
}
ret = recv_gpu_ids(client_fd, restore_cmd);
if (ret) {
goto err;
}
ret = recv_cmds(client_fd, restore_cmd);
if (ret) {
goto err;
}
ret = recv_dmabuf_fds(client_fd, restore_cmd);
err:
close(client_fd);
err_accept:
close(sock_fd);
return ret;
}
int close_parallel_restore_server(void)
{
int sock_fd;
int ret = 0;
parallel_restore_cmd_head cmd_head;
sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (sock_fd < 0) {
pr_perror("Socket creation failed");
return -1;
}
ret = connect(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
if (ret < 0) {
pr_perror("Connect failed");
goto err;
}
memset(&cmd_head, 0, sizeof(parallel_restore_cmd_head));
if (send(sock_fd, &cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
pr_perror("Send parallel restore command head fail");
return -1;
}
err:
close(sock_fd);
return ret;
}

View file

@ -1,54 +0,0 @@
#ifndef __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__
#define __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__
typedef struct {
int id;
int fd_write_num; /* The number of buffer objects to be restored. */
int entry_num; /* The number of restore commands.*/
int gpu_num;
} parallel_restore_cmd_head;
typedef struct {
int gpu_id;
int minor;
} parallel_gpu_info;
typedef struct {
int gpu_id;
int write_id;
uint64_t read_offset;
uint64_t write_offset;
uint64_t size;
} parallel_restore_entry;
typedef struct {
parallel_restore_cmd_head cmd_head;
int *fds_write;
parallel_gpu_info *gpu_ids;
parallel_restore_entry *entries;
} parallel_restore_cmd;
/*
* For parallel_restore, a background thread in the main CRIU process is used to restore the GPU
* buffer object. However, initially, the ownership of these buffer objects and the metadata for
* restoration are all with the target process. Therefore, we introduce a series of functions to
* help the target process send these tasks to the main CRIU process.
*/
int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd);
void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
int install_parallel_sock(void);
int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset,
parallel_restore_cmd *restore_cmd);
void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd);
int close_parallel_restore_server(void);
#endif

View file

@ -46,7 +46,6 @@ message kfd_bo_entry {
required uint64 offset = 3;
required uint32 alloc_flags = 4;
required uint32 gpu_id = 5;
required uint32 handle = 6;
}
message criu_kfd {
@ -62,34 +61,6 @@ message criu_kfd {
required bytes priv_data = 10;
}
message drm_bo_entry {
required uint64 addr = 1;
required uint64 size = 2;
required uint64 offset = 3;
required uint64 alloc_flags = 4;
required uint64 alignment = 5;
required uint32 preferred_domains = 6;
required uint32 handle = 7;
required uint32 is_import = 8;
required uint32 num_of_vms = 9;
repeated drm_vm_entry vm_entries = 10;
}
message drm_vm_entry {
required uint64 addr = 1;
required uint64 size = 2;
required uint64 offset = 3;
required uint64 flags = 4;
}
message criu_render_node {
required uint32 gpu_id = 1;
required uint32 id = 2;
required uint32 drm_render_minor = 3;
required uint64 num_of_bos = 4;
repeated drm_bo_entry bo_entries = 5;
}
message criu_dmabuf_node {
required uint32 gem_handle = 1;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -23,12 +23,9 @@
#ifndef KFD_IOCTL_H_INCLUDED
#define KFD_IOCTL_H_INCLUDED
#include <libdrm/drm.h>
#include <linux/ioctl.h>
/* Define __user as empty for kernel headers in user-space */
#define __user
#include "drm.h"
/*
* - 1.1 - initial version
* - 1.3 - Add SMI events support

View file

@ -19,7 +19,7 @@ all: $(DEPS_CUDA)
cuda_plugin.so: cuda_plugin.c
$(call msg-gen, $@)
$(Q) $(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS)
$(Q) $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS)
clean:
$(call msg-clean, $@)

View file

@ -2,7 +2,7 @@
set -x -e -o pipefail
# Workaround: Docker 28.x and 29.x has a known regression that breaks the checkpoint and
# Workaround: Docker 28.x has a known regression that breaks the checkpoint and
# restore (C/R) feature. Let's install previous, or next major version. See
# https://github.com/moby/moby/issues/50750 for details on the bug.
export DEBIAN_FRONTEND=noninteractive
@ -17,7 +17,7 @@ echo \
$(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" > /etc/apt/sources.list.d/docker.list
apt update -y
apt-cache madison docker-ce | awk '{ print $3 }'
verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -Ev ':(28|29)\.'| tail -n 1)"
verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -v ':28\.'| tail -n 1)"
../../contrib/apt-install -y "docker-ce=$verstr" "docker-ce-cli=$verstr"
# docker checkpoint and restore is an experimental feature

View file

@ -79,14 +79,6 @@ define pkg-cflags
$(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --cflags $(1))
endef
ifeq ($(GCS_ENABLE),1)
CFLAGS += -mbranch-protection=standard
LDFLAGS += -z experimental-gcs=check
TEST_ENV = GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2
else
TEST_ENV =
endif
%.d: %.c
$(E) " DEP " $@
$(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP $< -o $@

View file

@ -54,7 +54,7 @@ mnt_info_t *get_cwd_mnt_info(void)
while (fgets(str, sizeof(str), f)) {
char *hyphen = strchr(str, '-');
ret = sscanf(str, "%i %i %u:%u %4095s %4095s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint);
ret = sscanf(str, "%i %i %u:%u %s %s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint);
if (ret != 6 || !hyphen)
goto err;
ret = sscanf(hyphen + 1, " %ms", &fsname);

View file

@ -520,30 +520,30 @@ install: all
.PHONY: all install
$(TST_NOFILE:%=%.pid): %.pid: %
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out
$(TST_FILE:%=%.pid): %.pid: %
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --filename=$<.test
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --filename=$<.test
$(TST_DIR:%=%.pid): %.pid: %
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.test
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.test
$(TST_DIR_FILE:%=%.pid): %.pid: %
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.dir.test --filename=$<.test
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.dir.test --filename=$<.test
cmdlinenv00.pid: cmdlinenv00
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --arg1=arg1 --arg2=arg2 --arg3=arg3
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --arg1=arg1 --arg2=arg2 --arg3=arg3
shm-unaligned.pid: shm-unaligned
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=5000
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=5000
shm-hugetlb.pid: shm-hugetlb
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=4194304
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=4194304
env00.pid: env00
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --envname=ENV_00_TEST
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --envname=ENV_00_TEST
umask00.pid: umask00
$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --mask=0345
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --mask=0345
fifo-rowo-pair.pid: fifo-rowo-pair
$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --name_master=$<.master.test --name_slave=$<.slave.test

View file

@ -59,7 +59,7 @@ int checkprofile(void)
return -1;
}
len = fscanf(f, "%1023[^ \n]s", profile);
len = fscanf(f, "%[^ \n]s", profile);
fclose(f);
if (len != 1) {
fail("wrong number of items scanned %d", len);

View file

@ -56,7 +56,7 @@ static int checkprofile(pid_t pid, char *expected)
return -1;
}
len = fscanf(f, "%1023[^ \n]s", profile);
len = fscanf(f, "%[^ \n]s", profile);
fclose(f);
if (len != 1) {
fail("wrong number of items scanned %d", len);

View file

@ -79,7 +79,7 @@ int main(int argc, char **argv)
if (!s)
continue;
sscanf(paux, "%*d %*d %*d:%*d %*s %1023s", aux);
sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux);
test_msg("found cgroup at %s\n", aux);
for (i = 0; i < 2; i++) {

View file

@ -75,7 +75,7 @@ bool test_exists(char *mountinfo_line, char *path)
char aux[1024], paux[1024];
struct stat st;
sscanf(mountinfo_line, "%*d %*d %*d:%*d %*s %1023s", aux);
sscanf(mountinfo_line, "%*d %*d %*d:%*d %*s %s", aux);
test_msg("found cgroup at %s\n", aux);
ssprintf(paux, "%s/%s", aux, path);

View file

@ -135,7 +135,7 @@ out:
int main(int argc, char **argv)
{
int ret = -1, sk_pair[2], sk, status;
char path[PATH_MAX], c = 0;
char path[PATH_MAX], c;
pid_t pid = 0;
test_init(argc, argv);

View file

@ -46,7 +46,7 @@ int main(int argc, char **argv)
if (!pos)
continue;
result = sscanf(pos, " - %*s %*s %1023s", opts);
result = sscanf(pos, " - %*s %*s %s", opts);
if (result != 1) {
fail("Not able to sscanf line from mountinfo");
goto out;

View file

@ -107,7 +107,7 @@ static int check_file_lock(int fd, char *expected_type, char *expected_option, u
memset(fl_type, 0, sizeof(fl_type));
memset(fl_option, 0, sizeof(fl_option));
num = sscanf(buf, "%*s %*d:%15s %15s %15s %d %x:%x:%ld %*d %*s", fl_flag, fl_type, fl_option, &fl_owner, &maj,
num = sscanf(buf, "%*s %*d:%s %s %s %d %x:%x:%ld %*d %*s", fl_flag, fl_type, fl_option, &fl_owner, &maj,
&min, &i_no);
if (num < 7) {
pr_err("Invalid lock info\n");

View file

@ -41,7 +41,7 @@ static int check_file_lock(pid_t pid, pid_t child, int fd, char *expected_type,
memset(fl_type, 0, sizeof(fl_type));
memset(fl_option, 0, sizeof(fl_option));
num = sscanf(buf, "%*s %*d:%15s %15s %15s %d", fl_flag, fl_type, fl_option, &fl_owner);
num = sscanf(buf, "%*s %*d:%s %s %s %d", fl_flag, fl_type, fl_option, &fl_owner);
if (num < 4) {
pr_perror("Invalid lock info.");
break;

View file

@ -41,7 +41,7 @@ static int check_file_lock(pid_t pid, pid_t child, int fd, char *expected_type,
memset(fl_type, 0, sizeof(fl_type));
memset(fl_option, 0, sizeof(fl_option));
num = sscanf(buf, "%*s %*d:%15s %15s %15s %d", fl_flag, fl_type, fl_option, &fl_owner);
num = sscanf(buf, "%*s %*d:%s %s %s %d", fl_flag, fl_type, fl_option, &fl_owner);
if (num < 4) {
pr_perror("Invalid lock info.");
break;

View file

@ -34,7 +34,7 @@ static int check_file_locks(pid_t child_pid, int fd, int child_fd)
continue;
test_msg("c: %s", buf);
num = sscanf(buf, "%*s %*d:%15s %15s %15s %d %*02x:%*02x:%*d %*d %*s", fl_flag, fl_type, fl_option,
num = sscanf(buf, "%*s %*d:%s %s %s %d %*02x:%*02x:%*d %*d %*s", fl_flag, fl_type, fl_option,
&fl_owner);
if (num < 4) {

View file

@ -414,7 +414,7 @@ static int check_stable_secret(struct test_conf *tc)
return -1;
}
ret = fscanf(fp, "%200s", val);
ret = fscanf(fp, "%s", val);
if (ret != 1) {
pr_perror("fscanf");
fclose(fp);

View file

@ -16,7 +16,7 @@ static int parse_ofd_lock(char *buf, struct flock *lck)
if (strncmp(buf, "lock:\t", 6) != 0)
return 1; /* isn't lock, skip record */
num = sscanf(buf, "%*s %*d: %9s %14s %9s %*d %*x:%*x:%*d %lld %31s", fl_flag, fl_type, fl_option, &start, fl_end);
num = sscanf(buf, "%*s %*d: %s %s %s %*d %*x:%*x:%*d %lld %s", fl_flag, fl_type, fl_option, &start, fl_end);
if (num < 4) {
pr_err("Invalid lock info %s\n", buf);

View file

@ -20,7 +20,7 @@ int main(int argc, char **argv)
{
int fds[2], i;
pid_t pid;
int status, fd = -1;
int fd, status;
test_init(argc, argv);

View file

@ -31,7 +31,7 @@ const char *test_author = "Shashank Balaji <shashank.mahadasyam@sony.com>";
* compiler optimization) and use it (to prevent "unused variable" warning)
*/
void UPROBED_FUNCTION(void) {
volatile int dummy __maybe_unused = 0;
volatile int dummy = 0;
dummy += 1;
}
/* Calling via volatile function pointer ensures noinline at callsite */