blogs.oracle.com.html - webdump_tests

blogs.oracle.com.html (134476B)
      1 <!DOCTYPE html>
      2 <html lang="en-US" class="no-js">
      3 
      4 <head>
      5 	<!-- Avoid FOUC issue in FF with async loading of style sheets -->
      6 	<style>
      7 		body {
      8 			opacity: 1;
      9 		}
     10 	</style>
     11 	<title>Syscall latency...  and some uses of speculative execution</title>
     12 	<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
     13 	<meta charset="utf-8">
     14 	<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
     15 	<link rel="alternate" type="application/rss+xml" href="https://blogs.oracle.com/rss">
     16 	<!-- $meta_tags -->
     17 	<meta name="country" content="">
     18 	<meta name="contenttype_id" content="WM147046">
     19 	<meta name="description" content="An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. ">
     20 	<link rel="canonical" href="">
     21 	<meta name="host_name" content="https://blogs.oracle.com">
     22 	<meta name="title" content="Syscall latency...  and some uses of speculative execution">
     23 	<meta name="blog_name" content="Oracle Linux Blog">
     24 	<meta name="author" content="Ankur">
     25 	<meta name="keywords" content="Technologies,Linux Kernel Development">
     26 	<meta name="publish_date" content="September 12, 2023">
     27 	<meta name="siteid" content="us">
     28 	<meta name="Language" content="">
     29 	<meta name="robots" content="index, follow">	
     30 	<meta name="audience" content="">
     31 	<meta name="product" content="">
     32 	<meta property="og:type" content="blog">
     33 	<meta property="og:title" content="Syscall latency...  and some uses of speculative execution">
     34 	<meta property="og:image" content="https://blogs.oracle.com/content/published/api/v1.1/assets/CONTCF8836A82B014903A5283C76DE901346/Medium?format=jpg&channelToken=3189ef66cf584820b5b19e6b10792d6f">
     35 	<meta property="og:description" content="An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. ">
     36 	<meta property="og:url" content="https://blogs.oracle.com/linux/post/syscall-latency">
     37 	<meta name="category" content="Technologies,Linux Kernel Development">
     38 	<meta name="twitter:card" content="summary_large_image">
     39 	<meta name="twitter:title" content="" />
     40 	<meta name="twitter:description" content="" />
     41 	<meta name="twitter:image" content="" />
     42 	<meta name="google-site-verification" content="OVRFC0CuVBZNzlfzelWzFIN7D4gCrVfzsfmMWvteKHs" />
     43 	<link rel="alternate" type="application/rss+xml" class="rss-link" title="Oracle Blogs"
     44 		href="https://blogs.oracle.com/rss">
     45 
     46 
     47 	<!-- <link data-wscss href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/css/blogs-style.css" rel="preload" as="style" onload="this.rel='stylesheet';" onerror="this.rel='stylesheet'"> -->
     48 	<link rel="preload" onload="this.rel='stylesheet'" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/css/blogs-style.css" as="style" />
     49 	<link rel="preload" onload="this.rel='stylesheet'" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/css/oracle-style.css" as="style" />
     50 
     51 	<!-- favicon -->
     52 	<link rel="icon" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/logo.ico" type="image/x-icon" />
     53 	<link rel="shortcut icon" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/logo.ico" type="image/x-icon" />
     54 
     55 	<link rel="preconnect" href="https://c.go-mpulse.net" crossorigin />
     56 	<link rel="preconnect" href="https://s.go-mpulse.net" crossorigin />
     57 	<link rel="preconnect" href="https://tms.oracle.com" crossorigin />
     58 	<link rel="preconnect" href="https://www.facebook.com" crossorigin />
     59 	<link rel="preconnect" href="https://connect.facebook.net" crossorigin />
     60 	<link rel="preconnect" href="https://www.oracle.com" crossorigin />
     61 	<link rel="preconnect" href="https://consent.trustarc.com" crossorigin />
     62 	<link rel="preconnect" href="https://www.oracleimg.com" crossorigin />
     63 	<link rel="preconnect" href="https://oracle.112.2o7.net" crossorigin />
     64 	<link rel="preconnect" href="https://trial-eum-clientnsv4-s.akamaihd.net" crossorigin />
     65 	<link rel="preconnect" href="https://trial-eum-clienttons-s.akamaihd.net" crossorigin />
     66 	<link rel="preconnect" href="https://d.oracleinfinity.io" crossorigin />
     67 	<link rel="preconnect" href="https://www.googletagmanager.com" crossorigin />
     68 
     69 	<link rel="dns-prefetch" href="https://static.ocecdn.oraclecloud.com" />
     70 	<link rel="dns-prefetch" href="https://c.go-mpulse.net" />
     71 	<link rel="dns-prefetch" href="https://s.go-mpulse.net" />
     72 	<link rel="dns-prefetch" href="https://tms.oracle.com" />
     73 	<link rel="dns-prefetch" href="https://www.facebook.com" />
     74 	<link rel="dns-prefetch" href="https://connect.facebook.net" />
     75 	<link rel="dns-prefetch" href="https://www.oracle.com" />
     76 	<link rel="dns-prefetch" href="https://consent.trustarc.com" />
     77 	<link rel="dns-prefetch" href="https://www.oracleimg.com" />
     78 	<link rel="dns-prefetch" href="https://oracle.112.2o7.net" />
     79 	<link rel="dns-prefetch" href="[https://trial-eum-clientnsv4-s.akamaihd.net" />
     80 	<link rel="dns-prefetch" href="https://trial-eum-clienttons-s.akamaihd.net" />
     81 	<link rel="dns-prefetch" href="https://d.oracleinfinity.io" />
     82 	<link rel="dns-prefetch" href="https://www.googletagmanager.com" />
     83 
     84 	<script type="text/javascript" src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/jquery/jquery-min.js"
     85 		onload="$('head link[data-reqjq][rel=preload]').each(function(){var a = document.createElement('script');a.async=false;a.src=$(this).attr('href');this.parentNode.insertBefore(a, this);});$(function(){$('script[data-reqjq][data-src]').each(function(){this.async=true;this.src=$(this).data('src');});});"></script>
     86 
     87 	<!-- <script type="text/javascript" src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/blogs-script.js"></script> -->
     88 
     89 
     90 	<!--<link data-wsjs data-reqjq href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/blogs-script.js" rel="preload" as="script">->
     91 
     92 	<script type="text/javascript" src="http://webstandards.us.oracle.com:9292/global_assets_v22.9.3/assets/js/redwood-blogs.js"></script> -->
     93 
     94 
     95 	<script id="scsRenderInfo" type="application/json">{&quot;sitePrefix&quot;:&quot;..&#x2F;&quot;,&quot;pageModel&quot;:{&quot;properties&quot;:{&quot;title&quot;:&quot;Blog Theme - Details&quot;,&quot;pageLayout&quot;:&quot;post-detail.html&quot;,&quot;mobileLayout&quot;:&quot;&quot;,&quot;pageDescription&quot;:&quot;&quot;,&quot;keywords&quot;:&quot;&quot;,&quot;hideFromSearchEngines&quot;:false,&quot;styles&quot;:[],&quot;header&quot;:&quot; &quot;,&quot;footer&quot;:&quot;&quot;,&quot;noIndex&quot;:false,&quot;noFollow&quot;:false,&quot;noArchive&quot;:false,&quot;noSnippet&quot;:false,&quot;isCobrowseEnabled&quot;:false,&quot;overrideWebAnalytics&quot;:false,&quot;webAnalyticsScript&quot;:null},&quot;slots&quot;:{&quot;post-id&quot;:{&quot;components&quot;:[&quot;a37b49d5-e11f-4e1f-a5e0-fd37af71a288&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;a37b49d5-e11f-4e1f-a5e0-fd37af71a288\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;,&quot;preRenderedByController&quot;:true},&quot;blog-search1&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;homepage-banner&quot;:{&quot;components&quot;:[&quot;c30bb2b5-2186-4cd6-aeb8-2f23c0d9360c&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;c30bb2b5-2186-4cd6-aeb8-2f23c0d9360c\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;},&quot;search&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;recent-posts&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;category-id&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;blogs-category-nav&quot;:{&quot;components&quot;:[&quot;f62eb3cd-6ac8-407d-9e06-69cbbc8d821e&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;f62eb3cd-6ac8-407d-9e06-69cbbc8d821e\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;,&quot;preRenderedByController&quot;:true},&quot;Next-Previous-Posts&quot;:{&quot;components&quot;:[&quot;a057a3dc-2397-4b35-88dc-e9904a3f1789&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;a057a3dc-2397-4b35-88dc-e9904a3f1789\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;,&quot;preRenderedByController&quot;:true}},&quot;componentInstances&quot;:{&quot;c30bb2b5-2186-4cd6-aeb8-2f23c0d9360c&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;componentName&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;default&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;&quot;,&quot;contentPlaceholder&quot;:false,&quot;contentTypes&quot;:[],&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:{},&quot;componentConfig&quot;:{&quot;id&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;settingsData&quot;:{&quot;settingsHeight&quot;:0,&quot;settingsWidth&quot;:0,&quot;settingsRenderOption&quot;:&quot;none&quot;,&quot;componentLayouts&quot;:[],&quot;styles&quot;:[{&quot;name&quot;:&quot;Medium Green&quot;,&quot;class&quot;:&quot;Ora-Paragraph-Banner-default-style&quot;},{&quot;name&quot;:&quot;Dark Green&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-dark-green&quot;},{&quot;name&quot;:&quot;Orange&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-orange&quot;},{&quot;name&quot;:&quot;Teal&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-teal&quot;},{&quot;name&quot;:&quot;Medium Teal&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-medium-teal&quot;},{&quot;name&quot;:&quot;Blue&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-blue&quot;},{&quot;name&quot;:&quot;Medium Brown&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-medium-brown&quot;},{&quot;name&quot;:&quot;Granite&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-granite&quot;},{&quot;name&quot;:&quot;Ecru&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-ecru&quot;},{&quot;name&quot;:&quot;Fog Blue&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-fog-blue&quot;},{&quot;name&quot;:&quot;Yellow&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-yellow&quot;},{&quot;name&quot;:&quot;Dark Brown&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-dark-brown&quot;}],&quot;triggers&quot;:[],&quot;actions&quot;:[]}},&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:false,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:0,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;nestedComponents&quot;:[{&quot;id&quot;:&quot;oraParagraphBannerCtaText&quot;,&quot;type&quot;:&quot;scs-button&quot;,&quot;data&quot;:{&quot;marginBottom&quot;:0,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;styleClass&quot;:&quot;scs-button-secondary-style&quot;,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;width&quot;:0}}],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:false,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0}},&quot;a37b49d5-e11f-4e1f-a5e0-fd37af71a288&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;scs-contentplaceholder&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;&quot;,&quot;componentName&quot;:&quot;&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;Blogs-Redwood-Post-Detail-Layout&quot;,&quot;contentPlaceholder&quot;:true,&quot;contentTypes&quot;:[&quot;Blog-Post&quot;,&quot;Syndicated-Blog-Post&quot;],&quot;contentTypeDisplayName&quot;:&quot;&quot;,&quot;contentTypeCategory&quot;:&quot;ContentType&quot;,&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:&quot;&quot;,&quot;componentConfig&quot;:&quot;&quot;,&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;105&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:true,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:5,&quot;marginLeft&quot;:5,&quot;marginRight&quot;:5,&quot;marginTop&quot;:5,&quot;nestedComponents&quot;:[],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:true,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0},&quot;preRenderedByController&quot;:true},&quot;f62eb3cd-6ac8-407d-9e06-69cbbc8d821e&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;Blogs-Redwood-Category-Nav&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;sampleComp&quot;,&quot;componentName&quot;:&quot;Blogs-Redwood-Category-Nav&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;default&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;&quot;,&quot;contentPlaceholder&quot;:false,&quot;contentTypes&quot;:[],&quot;contentTypeCategory&quot;:&quot;ContentType&quot;,&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:{&quot;taxonomyId&quot;:&quot;6BC2FEFCC79B49D7A757708E6D9BE2CB&quot;},&quot;componentConfig&quot;:{&quot;id&quot;:&quot;sample-component&quot;,&quot;settingsData&quot;:{&quot;settingsHeight&quot;:240,&quot;settingsWidth&quot;:300,&quot;settingsRenderOption&quot;:&quot;dialog&quot;,&quot;componentLayouts&quot;:[{&quot;name&quot;:&quot;default&quot;,&quot;displayName&quot;:&quot;IMAGE_LEFT_LAYOUT&quot;},{&quot;name&quot;:&quot;right&quot;,&quot;displayName&quot;:&quot;IMAGE_RIGHT_LAYOUT&quot;},{&quot;name&quot;:&quot;top&quot;,&quot;displayName&quot;:&quot;IMAGE_TOP_LAYOUT&quot;}],&quot;styles&quot;:[{&quot;name&quot;:&quot;Bold&quot;,&quot;class&quot;:&quot;sample-component-bold-style&quot;},{&quot;name&quot;:&quot;Italic&quot;,&quot;class&quot;:&quot;sample-component-italic-style&quot;}],&quot;triggers&quot;:[{&quot;triggerName&quot;:&quot;imageClicked&quot;,&quot;triggerDescription&quot;:&quot;Image clicked&quot;,&quot;triggerPayload&quot;:[{&quot;name&quot;:&quot;payloadData&quot;,&quot;displayName&quot;:&quot;Trigger Payload Data&quot;}]}],&quot;actions&quot;:[{&quot;actionName&quot;:&quot;setImageWidth&quot;,&quot;actionDescription&quot;:&quot;Update the image width&quot;,&quot;actionPayload&quot;:[{&quot;name&quot;:&quot;imageWidth&quot;,&quot;description&quot;:&quot;Image Width in pixels&quot;,&quot;type&quot;:{&quot;ojComponent&quot;:{&quot;component&quot;:&quot;ojInputText&quot;}},&quot;value&quot;:&quot;&quot;}]}]}},&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:false,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:5,&quot;marginLeft&quot;:5,&quot;marginRight&quot;:5,&quot;marginTop&quot;:5,&quot;nestedComponents&quot;:[{&quot;id&quot;:&quot;imageId&quot;,&quot;type&quot;:&quot;scs-image&quot;,&quot;data&quot;:{&quot;imageUrl&quot;:&quot;[!--$SCS_DIST_FOLDER--]&#x2F;renderer&#x2F;app&#x2F;sdk&#x2F;images&#x2F;sample-image.png&quot;,&quot;marginBottom&quot;:0,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:20,&quot;marginTop&quot;:0}},{&quot;id&quot;:&quot;titleId&quot;,&quot;type&quot;:&quot;scs-title&quot;,&quot;data&quot;:{&quot;userText&quot;:&quot;&lt;div&gt;Local Component&lt;&#x2F;div&gt;&quot;,&quot;fontColor&quot;:&quot;#000000&quot;,&quot;fontFamily&quot;:&quot;&#x27;Helvetica Neue Light&#x27;, Helvetica, Arial, sans-serif&quot;,&quot;fontSize&quot;:20,&quot;marginBottom&quot;:14,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;useStyleClass&quot;:&quot;false&quot;}},{&quot;id&quot;:&quot;paragraphId&quot;,&quot;type&quot;:&quot;scs-paragraph&quot;,&quot;data&quot;:{&quot;userText&quot;:&quot;&lt;p style=\&quot;line-height:1.4em;margin-bottom:4px;\&quot;&gt;As a page author, you can edit the content and settings for this component. To change settings, including triggers and actions, click the component menu and choose Settings.&lt;&#x2F;p&gt;&lt;p style=\&quot;line-height:1.4em;\&quot;&gt;As a component developer, you can change the component and its defaults (such as this text) either by working with the files directly through the components browser or by using the desktop app to work with the files on your local system.&lt;&#x2F;p&gt;&quot;,&quot;fontColor&quot;:&quot;#333333&quot;,&quot;fontFamily&quot;:&quot;&#x27;Helvetica Neue Regular&#x27;, Helvetica, Arial, sans-serif&quot;,&quot;fontSize&quot;:14,&quot;marginBottom&quot;:10,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;useStyleClass&quot;:&quot;false&quot;}}],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:true,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0},&quot;preRenderedByController&quot;:true},&quot;a057a3dc-2397-4b35-88dc-e9904a3f1789&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;scs-contentplaceholder&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;&quot;,&quot;componentName&quot;:&quot;&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;Blogs-Redwood-Next-Prev-Post&quot;,&quot;contentPlaceholder&quot;:true,&quot;contentTypes&quot;:[&quot;Blog-Post&quot;,&quot;Syndicated-Blog-Post&quot;],&quot;contentTypeDisplayName&quot;:&quot;&quot;,&quot;contentTypeCategory&quot;:&quot;ContentType&quot;,&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:&quot;&quot;,&quot;componentConfig&quot;:&quot;&quot;,&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;105&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:true,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:5,&quot;marginLeft&quot;:5,&quot;marginRight&quot;:5,&quot;marginTop&quot;:5,&quot;nestedComponents&quot;:[],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:true,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0},&quot;preRenderedByController&quot;:true}}},&quot;navigationCurr&quot;:105}</script>
     96 <script id="scsRenderObject" type="text/javascript">var require = {waitSeconds: 0};</script>
     97 	
     98 	 
     99 
    100 	<script type="text/javascript">
    101 		window.SCSMacros = window.SCSMacros || {};
    102 		var url = window.location.href.split('?')[0];
    103 		var slug = url.substring(url.lastIndexOf('/') + 1);
    104 		//var slug = window.location.href.substring(window.location.href.lastIndexOf('/') + 1);
    105 		window.SCSMacros.getSlugMacro = slug;
    106 	</script>
    107 
    108 
    109 	<script type="text/javascript">
    110 		function expand() {
    111 			var x = document.getElementsByClassName("u03-collapsed");
    112 			var y = document.getElementsByClassName("u03-expanded");
    113 			for (var i = 0; i < x.length; i += 1) {
    114 				x[i].style.display = 'none';
    115 			}
    116 			for (var i = 0; i < y.length; i += 1) {
    117 				y[i].style.display = 'inline';
    118 			}
    119 		}
    120 
    121 		function collapse() {
    122 			var x = document.getElementsByClassName("u03-collapsed");
    123 			var y = document.getElementsByClassName("u03-expanded");
    124 			for (var i = 0; i < x.length; i += 1) {
    125 				x[i].style.display = 'inline';
    126 			}
    127 			for (var i = 0; i < y.length; i += 1) {
    128 				y[i].style.display = 'none';
    129 			}
    130 		}
    131 	</script>
    132 	<!--DTM/Launch embed code - Header -->
    133 
    134 
    135                               <script>!function(e){var n="https://s.go-mpulse.net/boomerang/";if("False"=="True")e.BOOMR_config=e.BOOMR_config||{},e.BOOMR_config.PageParams=e.BOOMR_config.PageParams||{},e.BOOMR_config.PageParams.pci=!0,n="https://s2.go-mpulse.net/boomerang/";if(window.BOOMR_API_key="G52AM-AGLAF-9JTSA-TBAP5-PCJJE",function(){function e(){if(!o){var e=document.createElement("script");e.id="boomr-scr-as",e.src=window.BOOMR.url,e.async=!0,i.parentNode.appendChild(e),o=!0}}function t(e){o=!0;var n,t,a,r,d=document,O=window;if(window.BOOMR.snippetMethod=e?"if":"i",t=function(e,n){var t=d.createElement("script");t.id=n||"boomr-if-as",t.src=window.BOOMR.url,BOOMR_lstart=(new Date).getTime(),e=e||d.body,e.appendChild(t)},!window.addEventListener&&window.attachEvent&&navigator.userAgent.match(/MSIE [67]\./))return window.BOOMR.snippetMethod="s",void t(i.parentNode,"boomr-async");a=document.createElement("IFRAME"),a.src="about:blank",a.title="",a.role="presentation",a.loading="eager",r=(a.frameElement||a).style,r.width=0,r.height=0,r.border=0,r.display="none",i.parentNode.appendChild(a);try{O=a.contentWindow,d=O.document.open()}catch(_){n=document.domain,a.src="javascript:var d=document.open();d.domain='"+n+"';void(0);",O=a.contentWindow,d=O.document.open()}if(n)d._boomrl=function(){this.domain=n,t()},d.write("<bo"+"dy onload='document._boomrl();'>");else if(O._boomrl=function(){t()},O.addEventListener)O.addEventListener("load",O._boomrl,!1);else if(O.attachEvent)O.attachEvent("onload",O._boomrl);d.close()}function a(e){window.BOOMR_onload=e&&e.timeStamp||(new Date).getTime()}if(!window.BOOMR||!window.BOOMR.version&&!window.BOOMR.snippetExecuted){window.BOOMR=window.BOOMR||{},window.BOOMR.snippetStart=(new Date).getTime(),window.BOOMR.snippetExecuted=!0,window.BOOMR.snippetVersion=12,window.BOOMR.url=n+"G52AM-AGLAF-9JTSA-TBAP5-PCJJE";var i=document.currentScript||document.getElementsByTagName("script")[0],o=!1,r=document.createElement("link");if(r.relList&&"function"==typeof r.relList.supports&&r.relList.supports("preload")&&"as"in r)window.BOOMR.snippetMethod="p",r.href=window.BOOMR.url,r.rel="preload",r.as="script",r.addEventListener("load",e),r.addEventListener("error",function(){t(!0)}),setTimeout(function(){if(!o)t(!0)},3e3),BOOMR_lstart=(new Date).getTime(),i.parentNode.appendChild(r);else t(!1);if(window.addEventListener)window.addEventListener("load",a,!1);else if(window.attachEvent)window.attachEvent("onload",a)}}(),"".length>0)if(e&&"performance"in e&&e.performance&&"function"==typeof e.performance.setResourceTimingBufferSize)e.performance.setResourceTimingBufferSize();!function(){if(BOOMR=e.BOOMR||{},BOOMR.plugins=BOOMR.plugins||{},!BOOMR.plugins.AK){var n=""=="true"?1:0,t="",a="jwstvjqx2o5kqziiqb3q-f-48999fc27-clientnsv4-s.akamaihd.net",i="false"=="true"?2:1,o={"ak.v":"36","ak.cp":"87563","ak.ai":parseInt("165106",10),"ak.ol":"0","ak.cr":11,"ak.ipv":4,"ak.proto":"h2","ak.rid":"208b9d63","ak.r":43514,"ak.a2":n,"ak.m":"dscx","ak.n":"essl","ak.bpcip":"77.165.58.0","ak.cport":51967,"ak.gh":"23.209.124.154","ak.quicv":"","ak.tlsv":"tls1.3","ak.0rtt":"","ak.csrc":"-","ak.acc":"","ak.t":"1695055991","ak.ak":"hOBiQwZUYzCg5VSAfCLimQ==vTvvCcS7yUvMFUwhArQjprHmS4SyRkG4kcqkubHf0SBAfLCipu8Z3GPJ9e1GyMuPUYO3XEA5R7RpN9uJTq4BkLQbssemRdXUWVprB4rKDAEYqcmRCULA0ABeQ3qfODyP2aGzM6krlmzdUN3sDRsfq+Nqtt/d3qfpf8l9Q/pZ/isYuJ22cZkbqcxVDrtC3ZfyKoGxC4nBvQYyr+3eRkNd8Mn4h1+thrx2qiKO5Edi+DH9ERMG1glOjsIjPS83+W6/oMyxNe216KPSO85XtLtxCjStIG+EsWHRDNn8MY7U1+NNOw66FTYI/LJSo1dChisD40fj1vqVfOoGWFXZRyiJ2eXHK16Azfupm2/vPIH7sRubbBznZp4fKYBBPzIHbMC7CN7dAPBdYhG7M0KpFWkrEWxSbdA9McNb2A+jwoaq3mo=","ak.pv":"262","ak.dpoabenc":"","ak.tf":i};if(""!==t)o["ak.ruds"]=t;var r={i:!1,av:function(n){var t="http.initiator";if(n&&(!n[t]||"spa_hard"===n[t]))o["ak.feo"]=void 0!==e.aFeoApplied?1:0,BOOMR.addVar(o)},rv:function(){var e=["ak.bpcip","ak.cport","ak.cr","ak.csrc","ak.gh","ak.ipv","ak.m","ak.n","ak.ol","ak.proto","ak.quicv","ak.tlsv","ak.0rtt","ak.r","ak.acc","ak.t","ak.tf"];BOOMR.removeVar(e)}};BOOMR.plugins.AK={akVars:o,akDNSPreFetchDomain:a,init:function(){if(!r.i){var e=BOOMR.subscribe;e("before_beacon",r.av,null,null),e("onbeacon",r.rv,null,null),r.i=!0}return this},is_complete:function(){return!0}}}}()}(window);</script></head>
    136 
    137 <body class="f20 f20v1" style="opacity:0">
    138 	<script src="https://tms.oracle.com/main/prod/utag.sync.js"></script>
    139 
    140 	<!-- Loading script asynchronously -->
    141 	<script type="text/javascript">
    142 		(function (a, b, c, d) {
    143 			if (location.href.indexOf("tealium=dev") == -1) {
    144 				a = 'https://tms.oracle.com/main/prod/utag.js';
    145 			} else {
    146 				a = 'https://tms.oracle.com/main/dev/utag.js';
    147 			}
    148 			b = document; c = 'script'; d = b.createElement(c); d.src = a; d.type = 'text/java' + c; d.async = true;
    149 			a = b.getElementsByTagName(c)[0]; a.parentNode.insertBefore(d, a);
    150 		})();
    151 	</script>
    152 	<div class="f20w1">
    153 
    154 
    155 		<!-- U18v2 -->
    156 		<div class="u18 u18v2">
    157 
    158 			<div id="u18skip2content">
    159 				<ul>
    160 					<li><a id="u18skip2c" href="#maincontent">Skip to content</a></li>
    161 					<li><a id="u18acc" href="https://www.oracle.com/corporate/accessibility/">Accessibility Policy</a></li>
    162 				</ul>
    163 			</div>
    164 
    165 			<nav role="banner">
    166 				<div class="u18w1 cwidth">
    167 
    168 					<div class="u18w2">
    169 						<div class="u18-logo"><a href="https://blogs.oracle.com"><span>Oracle</span></a></div>
    170 						<div class="u18-title"><a href="" class="blog-name"></a></div>
    171 					</div>
    172 
    173 					<div class="u18w3">
    174 
    175 						<div class="u18-search">
    176                             <div class="u18-searchlink">
    177                                 <a href="#search" aria-label="Open Search Field"><span>Search</span></a>
    178                             </div>
    179                             <div class="u18-searchform">
    180                                 <a class="u18-search-action" id="u18exitsearch" href="#exitsearch"
    181                                     aria-label="Exit Search Field"><span>Exit Search Field</span></a>
    182                                 <div id="search" class="scs-slot" data-allowed-items="[ 'scs-contentsearch' ]">
    183                                 </div>
    184                                 <a class="u18-search-action" id="u18clearsearch" href="#clearsearch"
    185                                     aria-label="Clear Search Field" tabindex="0"><span>Clear Search Field</span></a>
    186                             </div>
    187                         </div>
    188 						<div class="u18-langdd u18-dd">
    189 							<div class="u18-langselect u18-ddlink">
    190 								<a href="#select-language" aria-label="Select Language" role="button"><span
    191 										class="globe">Select Language</span></a>
    192 							</div>
    193 
    194 							<div class="u18-langoptions u18-menu">
    195 								<ul class="languagelist" id="languagelist">
    196 									<li><a href="#" class="u18v1w5v1"></a></li>
    197 								</ul>
    198 							</div>
    199 						</div>
    200 
    201 						<div class="u18-menudd u18-dd">
    202 							<div class="u18-hamburger u18-ddlink">
    203 								<a href="#menu" aria-label="Menu" aria-haspopup="true"
    204 									role="button"><span>Menu</span></a>
    205 							</div>
    206 							<div class="u18-menuoptions u18-menu" aria-hidden="true">
    207 								<div id="menu" class="slide-menu">
    208 									<!-- <ul class="ul.icn-list" id="u18-subview"> -->
    209 									<div id="blogs-category-nav" class="scs-slot"
    210 										data-allowed-items="[ 'Blogs-Redwood-Category-Nav' ]"><div class="scs-row"><div class="scs-col" style="width: 100%;"><div id="f62eb3cd-6ac8-407d-9e06-69cbbc8d821e"><div class="scs-component-bounding-box"><!-- -->
    211 <div>
    212 	<div class="scs-custom-component scs-component sampleComp-default-style" style="margin-top:5px;margin-right:5px;margin-bottom:5px;margin-left:5px;">
    213 		<div class="scs-component-content" style="width:100%;">
    214 			<div style="" class="scs-custom-component-wrapper">
    215 				<div id="f62eb3cd-6ac8-407d-9e06-69cbbc8d821ecustomComponentDiv" data-scs-hydrate="true" data-asset-operation="view:CORE8B88E20204C04A0DADCEBC0499683C49">
    216 					<div class="blogs-nav">
    217 
    218   <span class="h2-nav categories-text">CATEGORIES</span>
    219   <ul class="ul.icn-list" id="u18-subview">
    220 
    221 
    222 
    223     <li class="mainMenu hasNoMenu">
    224       <a class="categ-menu" href="../category/lnx-announcements">Announcements</a>
    225     </li>
    226     
    227 
    228 
    229 
    230     <li class="mainMenu hasNoMenu">
    231       <a class="categ-menu" href="../category/lnx-events">Events</a>
    232     </li>
    233     
    234 
    235 
    236 
    237     <li class="mainMenu hasNoMenu">
    238       <a class="categ-menu" href="../category/lnx-oracle-cloud-infrastructure">Oracle Cloud Infrastructure</a>
    239     </li>
    240     
    241 
    242 
    243 
    244     <li class="mainMenu hasNoMenu">
    245       <a class="categ-menu" href="../category/lnx-partners">Partners</a>
    246     </li>
    247     
    248 
    249 
    250 
    251     <li class="mainMenu hasNoMenu">
    252       <a class="categ-menu" href="../category/lnx-perspectives">Perspectives</a>
    253     </li>
    254     
    255 
    256 
    257     
    258     <li class="mainMenu">
    259       <a class="hasMenu active categ-menu" href="../category/lnx-ksplice">Technologies</a>
    260       <div class="sub-categories">
    261         <span class="back-btn" style="display: none;"><a href="javascript:void(0)">Back</a></span>
    262        
    263         <ul style="margin: 0 !important">
    264           <li>
    265             <a href="../category/lnx-technologies">Technologies</a>
    266           </li>
    267           <li>
    268             <a href="../category/lnx-ksplice">Ksplice</a>
    269           </li>
    270           <li>
    271             <a href="../category/lnx-linux-kernel-development">Linux Kernel Development</a>
    272           </li>
    273           <li>
    274             <a href="../category/lnx-linux-toolchain-and-tracing">Linux Toolchain &amp; Tracing</a>
    275           </li>
    276         </ul>
    277      
    278       </div>
    279     </li>
    280 
    281 
    282 
    283     <li class="mainMenu hasNoMenu">
    284       <a class="categ-menu" href="../category/lnx-training">Training</a>
    285     </li>
    286     
    287 
    288   </ul>
    289 
    290   <div class="u18-navdivider"></div>
    291   
    292  
    293   <ul>
    294     <li class="h2-nav related-content">RELATED CONTENT</li>
    295     <div id="related-content">
    296       <li><a href="#">Wim Coekaert's blog</a></li>
    297       <li><a href="#">Hardware Cert. List</a></li>
    298       <li><a href="#">ISV Catalog </a></li>
    299       <li><a href="#">Validated Configs  </a></li>
    300       <li><a href="#">Developers</a></li>
    301       <li><a href="#">GitHub</a></li>
    302       <li><a href="#">Open Source</a></li>
    303     </div>
    304   </ul>
    305   <div class="u18-navdivider"></div>
    306 </div>
    307 
    308 
    309 <div class="hydrated-container" data-hydrated="{&quot;contentId&quot;:&quot;CORE8B88E20204C04A0DADCEBC0499683C49&quot;,&quot;categories&quot;:[&quot;Announcements&quot;,&quot;Events&quot;,&quot;Oracle Cloud Infrastructure&quot;,&quot;Partners&quot;,&quot;Perspectives&quot;,&quot;Technologies&quot;,&quot;Training&quot;],&quot;compiledSite&quot;:true}"></div>
    310 <!-- <script>
    311 
    312   function showCategories() {
    313     document.getElementsByClassName("categ-menu").classList.remove("categ-active");
    314     document.getElementsByClassName("mainMenu").style.display = "";
    315     document.getElementsByClassName("back-btn").classList.remove("show");
    316     document.getElementsByClassName("categories-text").style.display = "";
    317     document.getElementsByClassName("back-btn").style.display = "none";
    318     document.getElementsByClassName("sub-categories").classList.remove("active");
    319     document.getElementsByClassName("hasMenu").classList.add("active");
    320   }
    321 
    322   function showSubCategories(eventTarget) {
    323     eventTarget.classList.add("categ-active");
    324     document.getElementsByClassName("mainMenu").not(eventTarget).each(function () {
    325       this.style.display = "none";
    326     });
    327     document.querySelectorAll('.sub-categories.active li:first-child a').focus();
    328     document.getElementsByClassName("categories-text").style.display = "none";
    329     document.getElementsByClassName("categ-active").next('.back-btn').classList.add("show");
    330     eventTarget.parent('.mainMenu').style.display = "";
    331     eventTarget.siblings('.sub-categories').classList.add("active");
    332     eventTarget.classList.remove("active");
    333     document.getElementsByClassName("back-btn").style.display = "";
    334     document.querySelectorAll(".sub-categories.active li:last-child a").addEventListener('keydown', function (e) {
    335       if (e.keyCode == 9) {
    336         showCategories();
    337         document.getElementsByClassName("hasMenu").classList.add("active");
    338       }
    339     });
    340 
    341   }
    342 
    343   document.on('click', '.hasMenu', function (e) {
    344     e.preventDefault();
    345     showSubCategories(this);
    346   });
    347   document.on('click', '.back-btn', function (e) {
    348     showCategories();
    349 
    350   });
    351   document.getElementsByClassName("back-btn").keydown(function (e) {
    352     if (e.keyCode == 9) {
    353       showCategories();
    354       $('.categ-menu.active').parent('.mainMenu').next('li').find('a').focus();
    355     }
    356   });
    357 
    358   document.getElementsByClassName("hasMenu").keydown(function (e) {
    359     if (e.keyCode == 9) {
    360       showSubCategories(this);
    361     }
    362   });
    363   document.getElementsByClassName("mainMenu").keydown(function (e) {
    364     if (e.shiftKey && e.keyCode == 9) {
    365       let hasMenuElement = this.prev('li').find('a').classList.contains("hasMenu");
    366       if (hasMenuElement) {
    367         e.preventDefault();
    368         showSubCategories(this.prev('li').find('a.hasMenu'));
    369         document.querySelectorAll(".sub-categories.active li:last-child a").focus();
    370       }
    371     }
    372   })
    373 
    374 
    375   let hydrateData = document.getElementsByClassName("hydrated-container")[0].getAttribute('data-hydrated');
    376 
    377   if (hydrateData) {
    378 
    379     var data = JSON.parse(hydrateData);
    380     var postCategories = data.categories ? data.categories : [];
    381     var metatags = document.getElementsByTagName("meta");
    382     for (var i = 0; i < metatags.length; i++) {
    383       if (metatags[i].name === "category" && postCategories.length !== 0) {
    384         document.getElementsByTagName("meta")[i].content = postCategories.join();
    385       }
    386       if (metatags[i].name === "keywords" && postCategories.length !== 0) {
    387         document.getElementsByTagName("meta")[i].content = postCategories.join();
    388       }
    389 
    390     }
    391   }
    392 
    393 </script> -->
    394 				</div>
    395 			</div>
    396 		</div>
    397 	</div>
    398 </div>
    399 </div></div></div></div></div>
    400 									<!-- </ul> -->
    401 								</div>
    402 
    403 								<ul>
    404 									<li><a href="" class="homepage">Blogs Home</a></li>
    405 									<li><a href="" class="blogdirectory">Blogs Directory</a></li>
    406 									<li><a href="" class="authordirectory">Featured Authors</a></li>
    407 
    408 									<li><a href="" class="rss-link">RSS</a></li>
    409 								</ul>
    410 							</div>
    411 
    412 						</div>
    413 
    414 					</div>
    415 
    416 				</div>
    417 			</nav>
    418 			<a id="maincontent"></a>
    419 		</div>
    420 		<!-- /U18v2 -->
    421 		<!-- RH03v5 -->
    422 		<section class="rh03 rh03v5 rw-ocean-150bg rw-pattern16w rw-pattern-15p rw-strip rw-strip-custom social">
    423 			<div class="rh03w1 cwidth social-wrapper">
    424 
    425 				<!-- <div class="rh03bc">
    426 
    427 					
    428 					<div class="rh03bc">
    429 						<div class="rh03bc1">
    430 					<ol>
    431 						<li><a href="placeholder.html">Oracle blogs</a></li>
    432 						<li><a href="placeholder.html">Lorem ipsum dolor</a></li>
    433 					</ol>
    434 				</div> 
    435 					</div>
    436 					
    437 
    438 				</div> -->
    439 
    440 				<div class="rh03pgtitle">
    441 					<div class="blog-name"></div>
    442 					<div class="rh03subtitle">
    443 						<p></p>
    444 					</div>
    445 					<!-- <div id="bannerdescription"></div> -->
    446 				</div>
    447 				<div class="social-share-wrapper">
    448 					<label id="social-share">Follow: </label>
    449 					<ul class="social-share" aria-labelledby="social-share">
    450 						<li>
    451 							<a href="" title="Oracle blog RSS" class="icn-rss" target="_blank">
    452 								<span class="sr-only">RSS</span>
    453 							</a>
    454 						</li>
    455 						<li>
    456 							<a href="" title="Oracle blog on Facebook" class="icn-facebook" id="facebook-url" target="_blank">
    457 								<span class="sr-only">Facebook</span>
    458 							</a>
    459 						</li>
    460 						<li>
    461 							<a href="" title="Oracle blog on Twitter" class="icn-twitter" id="twitter-url" target="_blank">
    462 								<span class="sr-only">Twitter</span>
    463 							</a>
    464 						</li>
    465 						<li>
    466 							<a href="" title="Oracle blog on Linkedin" class="icn-linkedin" id="linkedin-url" target="_blank">
    467 								<span class="sr-only">LinkedIn</span>
    468 							</a>
    469 						</li>
    470 						<li>
    471 							<a href="" title="Oracle blog on Youtube" class="icn-youtube" id="youtube-url" target="_blank">
    472 								<span class="sr-only">Youtube</span>
    473 							</a>
    474 						</li>
    475 						<li>
    476 							<a href="" title="Oracle blog on Instagram" class="icn-instagram" id="instagram-url" target="_blank">
    477 								<span class="sr-only">Instagram</span>
    478 							</a>
    479 						</li>
    480 					</ul>
    481 				</div>
    482 
    483 			</div>
    484 			<div class="rh03customstrip" data-bgimg="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/rwstrip-blogs-fpo.png"></div>
    485 		</section>
    486 		<!-- /RH03v5 -->
    487 		<div class="scs-slot" id="post-id"><div class="scs-row"><div class="scs-col" style="width: 100%;"><div id="a37b49d5-e11f-4e1f-a5e0-fd37af71a288"><div class="scs-component-bounding-box"><!-- -->
    488 <div>
    489 	<div class="scs-custom-component scs-component scs-component-default-style" style="margin-top:5px;margin-right:5px;margin-bottom:5px;margin-left:5px;">
    490 		<div class="scs-component-content" style="width:100%;">
    491 			<div style="" class="scs-custom-component-wrapper">
    492 				<div id="a37b49d5-e11f-4e1f-a5e0-fd37af71a288customComponentDiv" data-scs-hydrate="true" data-scs-contenttype="Blog-Post" data-asset-operation="view:CORE8B88E20204C04A0DADCEBC0499683C49">
    493 					<style>
    494     .title {
    495         background-color: #fff;
    496         border: 1px solid #F1EFED;
    497         border-radius: 22px;
    498         max-width: 940px;
    499         margin: 0 auto;
    500         padding: 5px 25px;
    501     }
    502 </style>
    503 <!-- RC81v1 -->
    504 
    505 <section class="rc81 rc81v1 cpad">
    506 
    507     <div class="rc81w1 bwidth">
    508         
    509             <div class="rc81">
    510                 <ul>
    511                   <li class="post-categories"><a href="../category/lnx-technologies" class="rc81accent"> Technologies<span>, </span> </a></li> 
    512                   <li class="post-categories"><a href="../category/lnx-linux-kernel-development" class="rc81accent"> Linux Kernel Development<span>, </span> </a></li> 
    513                 </ul>
    514                 
    515               </div>
    516               <p class="rc81accent" id="categories"></p>
    517         <h1>Syscall latency...  and some uses of speculative execution</h1>
    518         <span id="publishdate">September 12, 2023 |</span><span id="publishdate"> 23 minute read</span>
    519 
    520         <div class="rc81sub ">
    521             <img src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/ui_defaultuserimage.jpg" alt="">
    522 
    523             <span><a id="postAuthorName" href="/authors/ankur-arora">Ankur Arora</a>
    524                 <div><span class="rc81title rw-neutral-200txt"></span>
    525             
    526         </div>
    527     </span></div>
    528 
    529      
    530     <!--
    531         <div class="rc81photo">
    532              <img src=""> 
    533         </div>
    534         -->
    535 
    536     </div>
    537 </section>
    538 <!-- /RC81v1 --><!-- RC82v0 -->
    539 <!-- /RC82v0 --><!-- RC86v0 -->
    540 <section class="rc86 rc86v0 cpad">
    541     <div class="rc86w1 bwidth">
    542         <div class="rc86social">
    543             <a href="https://www.facebook.com/dialog/share?app_id=209650819625026&amp;href=../post/syscall-latency" class="sharelink icn-img icn-facebook" aria-label="Share post on Facebook" data-sharetype="facebook">
    544                 <!-- <span>Facebook</span> -->
    545             </a>
    546             <a href="https://twitter.com/share?url=../post/syscall-latency" class="sharelink icn-img icn-twitter" aria-label="Share post on Twitter" data-sharetype="twitter">
    547                 <!-- <span>Twitter</span> -->
    548             </a>
    549             <a href="https://www.linkedin.com/shareArticle?url=../post/syscall-latency" aria-label="Share post on Linkedin" class="sharelink icn-img icn-linkedin" data-sharetype="linked-in">
    550                 <!-- <span>LinkedIn</span> -->
    551             </a>
    552             <a href="placeholder.html" class="sharelink icn-img icn-email" aria-label="Share post on Email" data-sharetype="email">
    553                 <!-- <span>Email</span> -->
    554             </a>
    555         </div>
    556     </div>
    557 </section>
    558 <!-- /RC86v0 -->
    559 
    560 <!-- RC84v0 -->
    561 <section class="rc84v0 rc84zoom ">
    562     <div class="rc84w1 bwidth">
    563         <div class="rc84zoomui">
    564             <b>Text Size <span id="rc84fs">100%</span>:</b>
    565             <div>
    566                 <a href="#smaller-text" class="rc84-smaller" aria-label="decrease font size to 90%">－</a>
    567                 <a href="#larger-text" class="rc84-larger" aria-label="increase font size to 110%">＋</a>
    568             </div>
    569         </div>
    570 
    571 
    572         <div class="rc84post">
    573 
    574             <!-- RC84v1 -->
    575             <section class="rc84 rc84v1">
    576 
    577                  <h2 id="introduction">Introduction</h2>
    578 
    579 <p>Moving from UEK5 to UEK6 brought about an unwelcome surprise: an increase in syscall latency on some x86 systems. The root cause, as we will see, was slightly slower evaluation of audit rules, which, given that they are evaluated for every syscall, is not great.</p>
    580 
    581 <p>In this post we start off by exploring the root cause which turns out to not be UEK specific, it also impacts upstream kernels as well. Then we detail the fixes and how they take advantage of the speculative out-of-order nature of the CPU pipeline.</p>
    582 
    583 <p>The changes, even though they target low-level optimizations, are quite straight-forward, almost trivial.</p>
    584 
    585 <h3 id="background">Background</h3>
    586 
    587 <p>Execution latency of the <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code>[1] increased by about 15% (measured on an Intel Skylake-X system), from 191ns on UEK5, to 217ns on UEK6.</p>
    588 
    589 <p>This was measured in the usual way:</p>
    590 
    591 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">clock_gettime(CLOCK_MONOTONIC, &amp;start);
    592 for (i = 0; i &lt; large_number; i++)
    593         syscall(SYS_getpid);
    594 clock_gettime(CLOCK_MONOTONIC, &amp;stop);</pre>
    595 
    596 <p>A quick <code style="background:#eeeeee;border:1px solic #cccccc;">perf record</code>, showed that almost all of the increased latency was in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> which was more expensive in UEK6.</p>
    597 
    598 <p>Oracle Exadata, where this problem was seen has 37 audit rules that are evaluated in the syscall path. Since audit only wants to log unusual or exceptional events, the benchmark would evaluate these rules in every iteration, but never generate any output. Essentially, purely local computation that became slower without there having been any material changes to the audit code or in the audit rules.</p>
    599 
    600 <h3 id="cpu-parameters">CPU-parameters</h3>
    601 
    602 <p>Some Intel Skylake-X parameters that we'll make use of later:</p>
    603 
    604 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">L1-load-latency: 4-6 cycles
    605 L2-load-latency: 14 cycles 
    606 L1-cache-size: 32K (512 cachelines: 64 sets, 8 ways each)
    607 
    608 ROB size: 224 micro-ops</pre>
    609 
    610 <p>The parameters are taken from the <a href="https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html">Intel SDM</a>.</p>
    611 
    612 <p><strong>Note:</strong> L1/L2 being the respective data-cache level and ROB, being the Reorder Buffer, where instructions are staged for in-order retirement.</p>
    613 
    614 <h2 id="root-cause-analysis">Root cause analysis</h2>
    615 
    616 <p>Drilling down with <code style="background:#eeeeee;border:1px solic #cccccc;">perf stat -d</code>:</p>
    617 
    618 <p>UEK5 (191 ns):</p>
    619 
    620 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
    621 # output normalized for a single getpid() call
    622 
    623  677.9    cycles                #  3.542 GHz
    624 1635.0    instructions          #  2.40  insn per cycle
    625  325.0    branches
    626    0.5    branch-misses         #  0.16% of all branches
    627  404.0    L1-dcache-loads
    628    0.4    L1-dcache-load-misses #  0.10% of all L1-dcache accesses</pre>
    629 
    630 <p>UEK6 (217ns):</p>
    631 
    632 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
    633 # output normalized for a single getpid() call
    634 
    635  770.4    cycles                #    3.545 GHz
    636 1652.0    instructions          #    2.14  insn per cycle
    637  332.2    branches
    638    1.5    branch-misses         #    0.45% of all branches
    639  407.3    L1-dcache-loads
    640    8.6    L1-dcache-load-misses #    2.13% of all L1-dcache accesses</pre>
    641 
    642 <p>Comparing, this is an increase of ~100 cycles with the L1d-loads and instruction counts being almost identical across UEK5 and UEK6. This underscores the fact that audit code which forms the bulk of instructions executed hasn’t changed all that much.</p>
    643 
    644 <p>The IPC is commensurately lower[2]. The proximal cause seems to be the increased L1d-load-misses and the one extra branch-miss.</p>
    645 
    646 <p>These observations were confirmed via enough non-correlated runs (with intervening reboot for each) and so are statistically significant. The L1d-load-miss numbers are somewhat variable across boot cycles, but the trend is close to what we see above.</p>
    647 
    648 <h3 id="audit_filter_syscall"><code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code></h3>
    649 
    650 <p>From <code style="background:#eeeeee;border:1px solic #cccccc;">perf record</code> we know that the bulk of the increased runtime went to <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code>. The procedure itself is primarily a loop that walks the list of rules, calling <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> for each rule to check if it needs to be evaluated for the current syscall. For <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> the answer will be <code style="background:#eeeeee;border:1px solic #cccccc;">false</code> most of the time (32 of 37 times.)</p>
    651 
    652 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">audit_filter_syscall(...) {
    653     struct audit_entry *e; 
    654     struct audit_entry *ctx;
    655 
    656     list = audit_filter_list[AUDIT_FILTER_EXIT]; 
    657 
    658     list_for_each_entry_rcu(e, list, list) {
    659 
    660         if (audit_in_mask(&amp;e-&gt;rule, ctx-&gt;major) &amp;&amp; 
    661             audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
    662                                &amp;state, false, x)) { 
    663                 rcu_read_unlock(); 
    664                 ctx-&gt;current_state = state;
    665                 return state;
    666         }
    667     }
    668 
    669 
    670 }
    671  
    672 audit_in_mask(const struct audit_krule *rule, unsigned long val) {
    673     if (val &gt; 0xffffffff)
    674         return false; 
    675 
    676     /*
    677      * val contains the current syscall number. AUDIT_WORD does
    678      * some bit shifting on it.
    679     */
    680     word = AUDIT_WORD(val);
    681     if (word &gt;= AUDIT_BITMASK_SIZE)
    682         return false;
    683 
    684     bit = AUDIT_BIT(val);
    685 
    686     /*
    687      * The load in rule-&gt;mask[word] depends on the audit_krule (which
    688      * hangs off the current rule entry) and the syscall number.
    689      */
    690     return rule-&gt;mask[word] &amp; bit;
    691 }
    692 audit_filter_rules(...) {
    693     /*
    694      * Large switch statement which we ignore for the rest of this
    695      * analysis because, as we will see later, loads executed in it don't
    696      * have an "interesting" alignment and so their latency should be easy
    697      * enough to hide.
    698      */
    699 }</pre>
    700 
    701 <h3 id="memory-accesses">Memory accesses</h3>
    702 
    703 <p>Next let’s look at the data structures accessed in the <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> loop and where the L1d-load-misses might be coming from.</p>
    704 
    705 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">/* Data structure layout annotated with size and cacheline occupancy
    706  * information using pahole. */
    707 
    708 struct audit_entry {    /* via audit_filter_list[AUDIT_FILTER_EXIT] */
    709 
    710         struct list_head           list;                 /*     0    16 */
    711         struct callback_head       rcu;                  /*    16    16 */
    712         struct audit_krule         rule;                 /*    32   376 */
    713         ...
    714         /* size: 408, cachelines: 7, members: 3 */
    715         /* last cacheline: 24 bytes */
    716 };
    717 
    718 struct audit_krule {    /* inlined in struct audit_entry */
    719         ...
    720         u32                        mask[64];             /*    16   256 */
    721         ...
    722         /* size: 376, cachelines: 6, members: 17 */
    723         /* last cacheline: 56 bytes */
    724 };
    725 
    726 struct audit_context {
    727         ...
    728         int                        major;                /*    20     4 */
    729         ...
    730         /* size: 920, cachelines: 15, members: 46 (slightly larger on UEK6) */
    731         /* sum members: 912, holes: 2, sum holes: 8 */
    732         /* last cacheline: 24 bytes */
    733 };</pre>
    734 
    735 <p>The effective execution loop in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> (with cacheline access annotations):</p>
    736 
    737 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">struct audit_entry *e = &amp;audit_filter_list[AUDIT_FILTER_EXIT];
    738 
    739 for_each_iteration {
    740     e = e-&gt;next;                    /* cacheline-0 of audit_entry */
    741     if (e == list)
    742         jmp out;
    743     if (audit_in_mask(e-&gt;rule.mask, /* cacheline-0 of audit_entry */
    744                       ctx-&gt;major))  /* cacheline-0 of audit_context */
    745         audit_filter_rules(e-&gt;rule);
    746 }
    747 out:</pre>
    748 
    749 <p>As the annotations above mention, there are a total of three loads:</p>
    750 
    751 <ol type="1">
    752 	<li>Pointer chasing in <code style="background:#eeeeee;border:1px solic #cccccc;">e-&gt;next</code>: the first cacheline of <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code>.</li>
    753 	<li><code style="background:#eeeeee;border:1px solic #cccccc;">e-&gt;rule.mask[]</code>: accesses the same cacheline as load (1) above.</li>
    754 	<li><code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code>: accesses the first cacheline of <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_context</code>.</li>
    755 </ol>
    756 
    757 <p>Loads (1) and (2) will access a total of 37 cachelines, corresponding to a rule per iteration. Also notice that every single basic block in the rest of the iteration (apart from some error checking in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code>) has data dependence on the evaluation of <code style="background:#eeeeee;border:1px solic #cccccc;">e=e-&gt;next</code>. Worse this is a loop carried dependency, so each iteration depends on the previous one.</p>
    758 
    759 <p>The cacheline for load (3) is accessed once every iteration. This load is unnecessary, <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> contains the syscall number, which is a constant for the duration of the syscall. However, because the compiler’s alias analysis cannot prove that <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> is not mutilated, it does not get cached in a register. This also means that <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> will do out-of-bound validation checks related to <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> over and over.</p>
    760 
    761 <p>Recalling the <code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat -d</code> output above there are a total of around 400 L1d-loads for each <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> call. Of those, the loop does a total of 37*3 loads which map to a total of 38 unique cachelines.</p>
    762 
    763 <p>Alright, I hear you think: granted, walking linked-lists is difficult, there are a lot of cachelines in a lot of iterations or whatever, life is hard and the compiler doesn’t know what it is doing[3]. Even given all of that, nothing here has changed from UEK5 to UEK6, so none of this explains why UEK6 would incur more L1d-load-misses[4].</p>
    764 
    765 <p>Which is true, so that’s next.</p>
    766 
    767 <h3 id="theory-of-the-case">Theory of the case</h3>
    768 
    769 <p>From the background above, we know that the loop is pure computation, and purely local computation at that, so code changes elsewhere should have no effect. And there were no significant code changes from UEK5 to UEK6, so the loop is unchanged (which also applies to the generated assembly.)</p>
    770 
    771 <p>Now insofar as L1d-load-misses are concerned: the number of cachelines accessed (from about 400 L1d-loads per <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> call, not all of which are to unique cachelines) amount to a number comfortably below the Skylake-X L1d-cache capacity of 512 cachelines. So this loop should not incur any capacity misses.</p>
    772 
    773 <p>Which leaves conflict misses as the probable cause[5]. Skylake-X has an 8-way associative L1: if more than 8 loads in the loop map to the same cache-set some accesses would incur conflict misses.</p>
    774 
    775 <p>Accesses in the loop and how they map to cache-sets:</p>
    776 
    777 <ul>
    778 	<li><code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code>: aligns at a 512B boundary, which limits it to cache-sets <code style="background:#eeeeee;border:1px solic #cccccc;">{0, 8, 16, ... 56}</code>, for a total of 8*8 cache-slots.</li>
    779 	<li><code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_context</code>: aligns at a 1024B boundary, which resolves to cache-sets <code style="background:#eeeeee;border:1px solic #cccccc;">{0, 16, 32, 48}</code>, for a total of 4*8 cache-slots. As described earlier, this is a single cacheline which competes with a subset of the <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code> cachelines.</li>
    780 </ul>
    781 
    782 <p>Even then, this is 37 cachelines slotted into 64 slots and another slotting into 32 of those 64. This should be easy enough to satisfy, assuming that the kernel allocator has a reasonably sane distribution and isn’t skewed towards a particular set of cachelines (or is similarly skewed on both UEK5 and UEK6.)</p>
    783 
    784 <h3 id="allocation-skew">Allocation skew</h3>
    785 
    786 <p>If, allocations for <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code> were distributed uniformly, they would map into cache-sets uniformly, ending with similar populations across the cache-sets. This would give a cacheline-spread metric of ~0 (obtained by calculating the standard-deviation of populations across cache-sets.)</p>
    787 
    788 <p>What we see:</p>
    789 
    790 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">cacheline-spread on UEK5:   1.58
    791 cacheline-spread on UEK6:   1.91</pre>
    792 
    793 <p>(These results are from a large number (&gt; 100) of non-correlated runs. <code style="background:#eeeeee;border:1px solic #cccccc;">auditd</code> allocates at boot, so this was done by rebooting between each run.)</p>
    794 
    795 <p>From these numbers, UEK5 is far from a flat distribution, and UEK6 is somewhat worse, but not dispositively so. Additionally, a slight imbalance will not cause performance degradation: that happens only after cache conflicts kick in, which is after cache-set population crosses the associativity threshold.</p>
    796 
    797 <p>To validate this, we measure how well cycles correlate[6] with 1) with L1d-misses, and 2) cacheline-spread:</p>
    798 <style type="text/css">.divTable {
    799   display: table;
    800   width: 80%;
    801 }
    802 .divTableRow {
    803   display: table-row;
    804 }
    805 .divTableHeading {
    806   display: table-header-group;
    807   background-color: #ddd;
    808   font-weight: bold;
    809 }
    810 .divTableCell {
    811   display: table-cell;
    812   padding: 3px 10px;
    813   border: 1px solid #999999;
    814 }
    815 </style>
    816 <p>&nbsp;</p>
    817 
    818 <div class="divTable">
    819 <div class="divTableHeading">
    820 <div class="divTableCell">Kernel</div>
    821 
    822 <div class="divTableCell">cycles:L1d-misses</div>
    823 
    824 <div class="divTableCell">cycles:cacheline-spread</div>
    825 </div>
    826 
    827 <div class="divTableRow">
    828 <div class="divTableCell">UEK5</div>
    829 
    830 <div class="divTableCell">0.74</div>
    831 
    832 <div class="divTableCell">0.22</div>
    833 </div>
    834 
    835 <div class="divTableRow">
    836 <div class="divTableCell">UEK6</div>
    837 
    838 <div class="divTableCell">0.74</div>
    839 
    840 <div class="divTableCell">0.61</div>
    841 </div>
    842 </div>
    843 
    844 <p>&nbsp;</p>
    845 
    846 <p>For both UEK5 and UEK6, “cycles:L1d-misses” is tightly correlated (though the value of 0.74 for both is happenstance) which makes sense. “cycles:cacheline-spread”, however, is well correlated only on UEK6, not UEK5. This suggests that the UEK6 allocator skew is meaningfully worse, enough to cause lower performance.</p>
    847 
    848 <p>Alright, having beaten this dead horse enough, let’s figure out how to fix it next[7].</p>
    849 
    850 <h2 id="speeding-it-up">Speeding it up</h2>
    851 
    852 <p>To get back our lost performance, our task is simple: optimize a hot-loop[8] which is itself executed in the hot syscall path. Compounding the problem, the critical load in the loop is accessed via a linked list.</p>
    853 
    854 <p>Stated like that, it sounds pretty bad. But, as we will see the structure of the problem helps quite a bit:</p>
    855 
    856 <ol type="1">
    857 	<li>On a sane system, the common-case is extremely common, syscalls are frequent, and audit logging is unusual. This means that low branch mispreds are not unusual and something we might even depend on.</li>
    858 	<li>We are optimizing a no-op loop: the loop walks a bunch of rules, does error checking, and decides if it needs to log. In the common-case, it will conclude that it doesn’t. (This is really (1) restated to stress the no-op nature of the loop.)</li>
    859 </ol>
    860 
    861 <p>A no-op loop implies that the code does not actually care about most of the values it computes. It just inches towards a foregone conclusion.</p>
    862 
    863 <p>This it does (as all code does) by means of dependency chains that transform the input state to output. Here, most dependency chains are short and, are really <em>only used to predict the control flow</em>. The only long dependency chain, woven through all the loop iterations, is the one walking the linked-list.</p>
    864 
    865 <p>Now, critically since the branches are predicted perfectly or almost so, the control flow can run quite a bit further than any loads and dependent computation. The control flow thus essentially feeds these loads and other instructions to the ROB, where they wait until resources/dependencies become available, compute the output from their chain which, to reiterate, will only be used to predict the control flow.</p>
    866 
    867 <p>Given that the control flow is already feeding instructions from the correct direction, these are in effect orphan chains that eventually retire without anyone having cared for the output they compute or how long that took.</p>
    868 
    869 <p>Except: this happy state continues only until we run into a resource constraint. For instance, the size of the ROB on Skylake-X is 224 entries and each loop iteration is ~20 instructions. This means instructions worth around 10 loop iterations can be present in the ROB. Now, given that instructions retire on x86 in-order, long running instructions (L1d-load-misses of course, but also L1d-load hits[9]) with long dependence chains would slow retirement down, even were control-flow to be predicted perfectly.</p>
    870 
    871 <p>Bearing these observations in mind, our fixes will try to reduce the amount and cost of work per loop iteration. This allows the loop to retire as close to the gating latency of any long running instructions in the loop.</p>
    872 
    873 <h3 id="cache-ctx-major-in-audit_filter_syscall">Cache <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code></h3>
    874 
    875 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">@@ -785,13 +785,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
    876  {
    877         struct audit_entry *e;
    878         enum audit_state state;
    879 +       unsigned long major = ctx-&gt;major;
    880 
    881         if (auditd_test_task(tsk))
    882                 return AUDIT_DISABLED;
    883 
    884         rcu_read_lock();
    885         list_for_each_entry_rcu(e, list, list) {
    886 -               if (audit_in_mask(&amp;e-&gt;rule, ctx-&gt;major) &amp;&amp;
    887 +               if (audit_in_mask(&amp;e-&gt;rule, major) &amp;&amp;
    888                     audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
    889                                        &amp;state, false)) {
    890                         rcu_read_unlock();</pre>
    891 
    892 <p>Caching <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> in a local variable helps in two ways:</p>
    893 
    894 <ul>
    895 	<li>Explicitly indicates to the compiler that there are no stores to the cached value. <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> operates on <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> doing some bit-shifting and error checking. Now that the compiler knows that <code style="background:#eeeeee;border:1px solic #cccccc;">major</code> is not modified, it can hoist most of that logic out of the loop so it is not reevaluated over-and-over in every loop iteration.</li>
    896 	<li>As described earlier, <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_context</code> has similar natural alignment concerns as <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code>. Allowing the compiler to cache <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> in a register (or on the stack) reduces one potential source of contention.</li>
    897 </ul>
    898 
    899 <p>With this change the number of instructions executed/loop-iteration reduce by 8 (of 20.) Note that most of those were almost free ALU instructions.</p>
    900 
    901 <p>L1d-loads: we removed one L1d-load but added two (due to the compiler now spilling and reloading some state to/from the stack.) However, given that stack accesses are much less likely to have conflicting alignment constraints, the increased loads are less of a concern than the one we got rid of.</p>
    902 
    903 <p>cycles: improve by about 40 cycles. This is because the greater room in the ROB allows our almost perfect branch prediction to speculatively run even further ahead of other instructions.</p>
    904 
    905 <p>Change in latency for UEK6:</p>
    906 
    907 <div class="divTable">
    908 <div class="divTableHeading">
    909 <div class="divTableCell">Version</div>
    910 
    911 <div class="divTableCell">Min<br>
    912 (ns)</div>
    913 
    914 <div class="divTableCell">Mean<br>
    915 (ns)</div>
    916 
    917 <div class="divTableCell">Median<br>
    918 (ns)</div>
    919 
    920 <div class="divTableCell">Max<br>
    921 (ns)</div>
    922 </div>
    923 
    924 <div class="divTableRow">
    925 <div class="divTableCell">baseline</div>
    926 
    927 <div class="divTableCell">196.26</div>
    928 
    929 <div class="divTableCell">212.00</div>
    930 
    931 <div class="divTableCell">207.80</div>
    932 
    933 <div class="divTableCell">240.52</div>
    934 </div>
    935 
    936 <div class="divTableRow">
    937 <div class="divTableCell">ctx-&gt;major</div>
    938 
    939 <div class="divTableCell">183.50</div>
    940 
    941 <div class="divTableCell">201.41</div>
    942 
    943 <div class="divTableCell">198.80</div>
    944 
    945 <div class="divTableCell">226.93</div>
    946 </div>
    947 </div>
    948 
    949 <p>&nbsp;</p>
    950 
    951 <p>From the min-max range, there is a rather large variation in latency that’s caused by variations in allocation resulting in high or low cacheline-spread. In almost all cases though, the latency improves by ~10ns or thereabouts.</p>
    952 
    953 <p>That said, after removing 8 instructions and one load (and adding two less consequential loads), the performance gain is rather miniscule: ~1 cycle/iteration. Just that the loop executes 37 times, so we make it up in volume.</p>
    954 
    955 <p>More details (<code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat</code> and the before/after versions of the generated code) in <a href="https://github.com/oracle/linux-uek/commit/87a39a3d2ca9a5c7e4d35e4cf4b839c53cc0678d">UEK6 commit-1</a> and in <a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=069545997510833281f45f83e097017b9fef19b7">Upstream commit-1</a>.</p>
    956 
    957 <h3 id="annotate-branch-direction-for-audit_in_mask">Annotate branch direction for <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code></h3>
    958 
    959 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">@@ -790,12 +790,13 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
    960          rcu_read_lock();
    961          list_for_each_entry_rcu(e, list, list) {
    962  -               if (audit_in_mask(&amp;e-&gt;rule, major) &amp;&amp;
    963  -                   audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
    964  -                                      &amp;state, false)) {
    965                                         ...
    966  +               if (unlikely(audit_in_mask(&amp;e-&gt;rule, major))) {
    967  +                       if (audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
    968  +                                              &amp;state, false)) {</pre>
    969 
    970 <p>Annotate <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> as <code style="background:#eeeeee;border:1px solic #cccccc;">unlikely()</code> to allow the compiler to pessimize the call to <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_rules()</code>. Two reasons for this change:</p>
    971 
    972 <ul>
    973 	<li>The primary motivation was to get rid of the extra branch mispred. This change succeeds in that task but it is unclear why: there’s no significant change in the basic-block structure. The only change is from a branch inversion due to the unlikely clause.</li>
    974 	<li>The branch inversion means that the not-taken direction is chosen more often: 32/37 times (changing from 5/37 earlier.) The issue-latency for not-taken branches is 0.5-1 cycles, for taken branches 1-2 cycles[10] is slightly cheaper.</li>
    975 </ul>
    976 
    977 <p>L1d-loads: reduce by 2 for each loop iteration. This is because the spills and reloads introduced in the “Cache <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code>…” patch have now been shifted to the unlikely path (the prologue and epilogue of the <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_rules()</code> call.)</p>
    978 
    979 <p>cycles: performance improves on average by ~30 cycles/call.</p>
    980 
    981 <p>Change in latency for UEK6:</p>
    982 
    983 <div class="divTable">
    984 <div class="divTableHeading">
    985 <div class="divTableCell">Version</div>
    986 
    987 <div class="divTableCell">Min<br>
    988 (ns)</div>
    989 
    990 <div class="divTableCell">Mean<br>
    991 (ns)</div>
    992 
    993 <div class="divTableCell">Median<br>
    994 (ns)</div>
    995 
    996 <div class="divTableCell">Max<br>
    997 (ns)</div>
    998 </div>
    999 
   1000 <div class="divTableRow">
   1001 <div class="divTableCell">ctx-&gt;major</div>
   1002 
   1003 <div class="divTableCell">183.50</div>
   1004 
   1005 <div class="divTableCell">201.41</div>
   1006 
   1007 <div class="divTableCell">198.80</div>
   1008 
   1009 <div class="divTableCell">226.93</div>
   1010 </div>
   1011 
   1012 <div class="divTableRow">
   1013 <div class="divTableCell">ctx-&gt;major+annot</div>
   1014 
   1015 <div class="divTableCell">165.26</div>
   1016 
   1017 <div class="divTableCell">188.72</div>
   1018 
   1019 <div class="divTableCell">184.25</div>
   1020 
   1021 <div class="divTableCell">230.34</div>
   1022 </div>
   1023 </div>
   1024 
   1025 <p>&nbsp;</p>
   1026 
   1027 <p>More details (<code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat</code> and the before/after versions of the generated code) in <a href="https://github.com/oracle/linux-uek/commit/0288dbdbfb5768ad8ae8a445c72f523bcb99eca0">UEK6 commit-2</a>.</p>
   1028 
   1029 <h3 id="remove-static-linkage-from-audit_filter_syscall">Remove static linkage from <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code></h3>
   1030 
   1031 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">@@ -777,7 +777,7 @@ static bool audit_in_mask(const struct audit_krule *rule, unsigned long
   1032    * also not high enough that we already know we have to write an audit
   1033    * record (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT).
   1034    */
   1035  -static enum audit_state audit_filter_syscall(struct task_struct *tsk,
   1036  +enum audit_state audit_filter_syscall(struct task_struct *tsk,
   1037                                               struct audit_context *ctx,
   1038                                               struct list_head *list)</pre>
   1039 
   1040 <p><code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> is only used locally in the file and so is marked <code style="background:#eeeeee;border:1px solic #cccccc;">static</code>. Additionally, it’s only ever called with a fixed <code style="background:#eeeeee;border:1px solic #cccccc;">list</code> value of <code style="background:#eeeeee;border:1px solic #cccccc;">&amp;audit_filter_list[AUDIT_FILTER_EXIT])</code>.</p>
   1041 
   1042 <p>GCC’s constant propagation pass makes use of these two things to, quite reasonably, const-propagate the third argument to the point of use.</p>
   1043 
   1044 <p>This causes the exit check in the <code style="background:#eeeeee;border:1px solic #cccccc;">list_for_each</code> loop to look like this:</p>
   1045 
   1046 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">audit_filter_syscall.constprop.18(task, ctx):
   1047    0:       48 8b 1b                mov    (%rbx),%rbx
   1048    3:       48 81 fb e0 67 ac 82    cmp    $0xffffffff82ac67e0,%rbx
   1049                     ffffffff8118b5ed: R_X86_64_32S  audit_filter_list+0x40
   1050   10:       75 e2                   jne    start_iter</pre>
   1051 
   1052 <p>while, without const-propagation it would have looked like this:</p>
   1053 
   1054 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">audit_filter_syscall(task, ctx, list):
   1055    0:       48 8b 1b                mov    (%rbx),%rbx
   1056    3:       4c 39 e3                cmp    %r12,%rbx
   1057    6:       75 e6                   jne    start_iter</pre>
   1058 
   1059 <p>Now either one ought to be alright, both <code style="background:#eeeeee;border:1px solic #cccccc;">cmp imm32,r</code> and <code style="background:#eeeeee;border:1px solic #cccccc;">cmp r,r</code> forms are equivalent with a latency of 1 cycle, and both are a single micro-op each.</p>
   1060 
   1061 <p>The second form of the <code style="background:#eeeeee;border:1px solic #cccccc;">cmp</code>, however, can be macro-op fused with the <code style="background:#eeeeee;border:1px solic #cccccc;">jne</code>; not entirely sure if the first form can be[11]. The second form is also denser, though that’s not a concern here.</p>
   1062 
   1063 <p>Disallowing GCC from making assumptions about calling contexts by removing the <code style="background:#eeeeee;border:1px solic #cccccc;">static</code> linkage from <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> forces it to pass the <code style="background:#eeeeee;border:1px solic #cccccc;">list</code> parameter in a register which results in a small performance improvement: ~20 cycles (about 0.5 cycles/loop iteration.)</p>
   1064 
   1065 <p>Change in latency for UEK6:</p>
   1066 
   1067 <div class="divTable">
   1068 <div class="divTableHeading">
   1069 <div class="divTableCell">Version</div>
   1070 
   1071 <div class="divTableCell">Min<br>
   1072 (ns)</div>
   1073 
   1074 <div class="divTableCell">Mean<br>
   1075 (ns)</div>
   1076 
   1077 <div class="divTableCell">Median<br>
   1078 (ns)</div>
   1079 
   1080 <div class="divTableCell">Max<br>
   1081 (ns)</div>
   1082 </div>
   1083 
   1084 <div class="divTableRow">
   1085 <div class="divTableCell">ctx-&gt;major+annot</div>
   1086 
   1087 <div class="divTableCell">165.26</div>
   1088 
   1089 <div class="divTableCell">188.72</div>
   1090 
   1091 <div class="divTableCell">184.25</div>
   1092 
   1093 <div class="divTableCell">230.34</div>
   1094 </div>
   1095 
   1096 <div class="divTableRow">
   1097 <div class="divTableCell">ctx-&gt;major+annot+extern</div>
   1098 
   1099 <div class="divTableCell">159.88</div>
   1100 
   1101 <div class="divTableCell">184.35</div>
   1102 
   1103 <div class="divTableCell">177.62</div>
   1104 
   1105 <div class="divTableCell">250.82</div>
   1106 </div>
   1107 </div>
   1108 
   1109 <p>&nbsp;</p>
   1110 
   1111 <p>More details (<code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat</code> and the before/after versions of the generated code) in <a href="https://github.com/oracle/linux-uek/commit/5a74015e20bff63d1052359fbc2c3418e0f6bc4e">UEK6 commit-3</a> and, <a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=50979953c0c41e929e5f955800da68e1bb24c7ab">Upstream commit-3</a>.</p>
   1112 
   1113 <h2 id="summary">Summary</h2>
   1114 
   1115 <p>The audit subystem is fairly stable in the Linux kernel, not given to frequent changes. So it was puzzling when it became slower in recent kernels, and because a primary user is the syscall path, concerning[12].</p>
   1116 
   1117 <p>The cause turned out to be higher skew in allocated buffers which results in more lopsided cache-set distribution.</p>
   1118 
   1119 <p>The fixes compensate for the higher costs in the loop by taking advantage of the peculiarities of the execution path and optimizing for the speculative nature of the CPU pipeline.</p>
   1120 
   1121 <p>The three patches, in sum reduce the overhead by about 30ns (~100 cycles).</p>
   1122 
   1123 <p>Final <code style="background:#eeeeee;border:1px solic #cccccc;">perf stat -d -r 5</code> go from:</p>
   1124 
   1125 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
   1126 # output normalized for a single getpid() call
   1127 
   1128 cycles                  761.65  (  +- 5.22% )
   1129 instructions           1639.17  (  +- 0.00% )
   1130 IPC                       2.18  (  +- 5.50% )
   1131 branches                328.21  (  +- 0.00% )
   1132 branch-misses             1.37  (  +- 6.56% )
   1133 L1-dcache-loads         404.35  (  +- 0.00% )
   1134 L1-dcache-load-misses     7.99  (  +- 70.71% )</pre>
   1135 
   1136 <p>to:</p>
   1137 
   1138 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
   1139 # output normalized for a single getpid() call
   1140 
   1141 cycles                  669.09  (  +- 11.23% )
   1142 instructions           1342.04  (  +-  0.00% )
   1143 IPC                       2.03  (  +-  9.85% )
   1144 branches                328.19  (  +-  0.00% )
   1145 branch-misses             0.56  (  +-  5.35% )
   1146 L1-dcache-loads         384.31  (  +-  0.00% )
   1147 L1-dcache-load-misses     5.77  (  +- 84.57% )</pre>
   1148 
   1149 <p>This compares quite well to the UEK5-baseline:</p>
   1150 
   1151 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
   1152 # output normalized for a single getpid() call
   1153 
   1154 cycles                  672.90  (  +-  1.65% )
   1155 instructions           1622.08  (  +-  0.00% )
   1156 IPC                       2.41  (  +-  1.65% )
   1157 branches                321.20  (  +-  0.00% )
   1158 branch-misses             0.51  (  +-  0.00% )
   1159 L1-dcache-loads         401.32  (  +-  0.00% )
   1160 L1-dcache-load-misses     2.28  (  +- 59.62% )</pre>
   1161 
   1162 <p>Note for non-Skylake-X architectures: Intel Icelake and AMD Milan (the other architectures tested) cope with L1d-load-misses much better so the baseline performance is much better.</p>
   1163 
   1164 <p>With these patches, they only show a small improvement (~10ns): Icelake has a bigger L1d-cache (48K), and a much bigger ROB. Milan also has a bigger ROB and does memory renaming and bunch of other pipeline optimizations that limit the effect of these optimizations.</p>
   1165 
   1166 <p><strong>Endnote:</strong> what I found personally instructive was how much C really is “a portable assembler” and the significant codegen (and performance) changes that can result from minimal changes to the code.</p>
   1167 
   1168 <h2 id="references">References</h2>
   1169 
   1170 <ol type="1">
   1171 	<li>
   1172 	<p><code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> has a minimal kernel execution path (only does a PID lookup), and so is generally used to measure the overhead of the syscall path.</p>
   1173 	</li>
   1174 	<li>
   1175 	<p>Comparing the IPC for the audit-only portion shows that a starker drop:</p>
   1176 
   1177 	<pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">UEK5: 1427.0  instructions  #  3.41  insn per cycle
   1178 UEK6: 1432.0  instructions  #  2.84  insn per cycle</pre>
   1179 	</li>
   1180 	<li>
   1181 	<p>Alas no, alias analysis is an undecidable problem.</p>
   1182 	</li>
   1183 	<li>
   1184 	<p>Or for that matter, what causes the extra branch-miss.</p>
   1185 	</li>
   1186 	<li>
   1187 	<p>Another possibility is out-of-line code -- frequent interrupts, vmexits etc -- trashing the cache but from profiling these were a non-issue.</p>
   1188 	</li>
   1189 	<li>
   1190 	<p>Measured using the pearson-quotient(x, y): correlation coefficient between quantities x and y.</p>
   1191 	</li>
   1192 	<li>
   1193 	<p>You might notice that this analysis does not address the extra branch-miss. That's because I still have no clue what causes it.</p>
   1194 	</li>
   1195 	<li>
   1196 	<p>The correct fix would be to fix whatever ails the allocator. However, from a quick look at the changes that have gone into related code, it seems non-trivial to find a particular commit which points to the root cause of the skew (especially given that the skew is not constant, but varies from run-to-run.) Also, notably, the fixes described below also apply to UEK5, which means that even if UEK6 becomes faster, UEK5 will also improve somewhat.</p>
   1197 	</li>
   1198 	<li>
   1199 	<p>As mentioned in <a href="#cpu-parameters">CPU-parameters</a>, L1d-loads take 4-6 cycles on Skylake-X. We also know that in the good case (UEK5), this loop is capable of an IPC of 3.41 insn per cycle. So, hiding L1d-load latency is critical for good performance.</p>
   1200 	</li>
   1201 	<li>
   1202 	<p><a href="https://www.agner.org/optimize/instruction_tables.pdf%3E">https://www.agner.org/optimize/instruction_tables.pdf</a>, pg 298 (Skylake-X)</p>
   1203 	</li>
   1204 	<li>
   1205 	<p>The first form fused, needs three inputs: <code style="background:#eeeeee;border:1px solic #cccccc;">%rbx</code>, an <code style="background:#eeeeee;border:1px solic #cccccc;">imm32</code> encoding the distance to the address being compared, and an <code style="background:#eeeeee;border:1px solic #cccccc;">imm8</code> encoding the distance to the branch-dest; the second needs two registers: <code style="background:#eeeeee;border:1px solic #cccccc;">%rbx</code>, <code style="background:#eeeeee;border:1px solic #cccccc;">%r12</code> and only the <code style="background:#eeeeee;border:1px solic #cccccc;">imm8</code>.</p>
   1206 	</li>
   1207 	<li>
   1208 	<p>Just for context, a kernel build (x86-defconfig) makes an aggregate of 27M syscalls, with a syscall every 44us.</p>
   1209 	</li>
   1210 </ol>
   1211 
   1212 
   1213             </section>
   1214             <!-- /RC84v1 -->
   1215 
   1216             <!-- RC84v2 -->
   1217             <section class="rc84v2 cpad">
   1218                 <div class="rc84w1 cwidth">
   1219 
   1220                     <div class="rc84bio">
   1221                         <div class="rc84img">
   1222                             <img src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/ui_defaultuserimage.jpg" alt="">
   1223                         </div>
   1224                         <div class="rc84blurb">
   1225                             <div class="blogtile-w2-inner text-wrap">
   1226                                 <h4>Ankur Arora</h4>
   1227 
   1228                                 <p></p>
   1229                             </div>
   1230                         </div>
   1231                     </div>
   1232 
   1233                 </div>
   1234             </section>
   1235             <!-- /RC84v2 -->
   1236 
   1237 
   1238         </div>
   1239 
   1240     </div>
   1241 </section>
   1242 <!-- /RC84v0 -->
   1243 
   1244 <!-- /RC83v0 -->
   1245 <input type="hidden" name="hiddenField" value="September 12, 2023" id="pubdate">
   1246 <input type="hidden" name="hiddenField" value="linux" id="primarychannel">
   1247 <div class="hydrate-container" data-hydrate="{&quot;metaItems&quot;:[{&quot;translatable&quot;:true,&quot;createdDate&quot;:{&quot;value&quot;:&quot;2023-09-12T15:00:01.308Z&quot;,&quot;timezone&quot;:&quot;UTC&quot;},&quot;fileExtension&quot;:&quot;contentItem&quot;,&quot;name&quot;:&quot;Ankur&quot;,&quot;description&quot;:&quot;&quot;,&quot;language&quot;:&quot;en-US&quot;,&quot;links&quot;:[{&quot;href&quot;:&quot;https://orasites-prodapp.cec.ocp.oraclecloud.com/content/published/api/v1.1/items/COREF415334566DE45208D79D6CD6FA88629?channelToken=3189ef66cf584820b5b19e6b10792d6f&quot;,&quot;rel&quot;:&quot;self&quot;,&quot;method&quot;:&quot;GET&quot;,&quot;mediaType&quot;:&quot;application/json&quot;}],&quot;id&quot;:&quot;COREF415334566DE45208D79D6CD6FA88629&quot;,&quot;updatedDate&quot;:{&quot;value&quot;:&quot;2023-09-12T15:00:01.308Z&quot;,&quot;timezone&quot;:&quot;UTC&quot;},&quot;type&quot;:&quot;Blog-Author&quot;,&quot;fields&quot;:{&quot;twitter_handle&quot;:null,&quot;facebook_url&quot;:null,&quot;profile_image&quot;:null,&quot;timezone&quot;:&quot;EST&quot;,&quot;last_name&quot;:&quot;Arora&quot;,&quot;bio&quot;:null,&quot;linkedin_url&quot;:null,&quot;middle_name&quot;:null,&quot;first_name&quot;:&quot;Ankur&quot;,&quot;job_title&quot;:null,&quot;email&quot;:&quot;ankur.a.arora&quot;},&quot;slug&quot;:&quot;ankur-arora&quot;}],&quot;contentData&quot;:{&quot;featured_image_display_option&quot;:null,&quot;featured_image_alt_text&quot;:null,&quot;attachments&quot;:null,&quot;og_title&quot;:&quot;Syscall latency...  and some uses of speculative execution&quot;,&quot;featured_image_alternate_text&quot;:&quot;&quot;,&quot;industry&quot;:null,&quot;title&quot;:&quot;Syscall latency...  and some uses of speculative execution&quot;,&quot;body&quot;:&quot;<!DOCTYPE html> <h2 id=\&quot;introduction\&quot;>Introduction</h2>\n\n<p>Moving from UEK5 to UEK6 brought about an unwelcome surprise: an increase in syscall latency on some x86 systems. The root cause, as we will see, was slightly slower evaluation of audit rules, which, given that they are evaluated for every syscall, is not great.</p>\n\n<p>In this post we start off by exploring the root cause which turns out to not be UEK specific, it also impacts upstream kernels as well. Then we detail the fixes and how they take advantage of the speculative out-of-order nature of the CPU pipeline.</p>\n\n<p>The changes, even though they target low-level optimizations, are quite straight-forward, almost trivial.</p>\n\n<h3 id=\&quot;background\&quot;>Background</h3>\n\n<p>Execution latency of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code>[1] increased by about 15% (measured on an Intel Skylake-X system), from 191ns on UEK5, to 217ns on UEK6.</p>\n\n<p>This was measured in the usual way:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nclock_gettime(CLOCK_MONOTONIC, &amp;amp;start);\nfor (i = 0; i &amp;lt; large_number; i++)\n        syscall(SYS_getpid);\nclock_gettime(CLOCK_MONOTONIC, &amp;amp;stop);</pre>\n\n<p>A quick <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf record</code>, showed that almost all of the increased latency was in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> which was more expensive in UEK6.</p>\n\n<p>Oracle Exadata, where this problem was seen has 37 audit rules that are evaluated in the syscall path. Since audit only wants to log unusual or exceptional events, the benchmark would evaluate these rules in every iteration, but never generate any output. Essentially, purely local computation that became slower without there having been any material changes to the audit code or in the audit rules.</p>\n\n<h3 id=\&quot;cpu-parameters\&quot;>CPU-parameters</h3>\n\n<p>Some Intel Skylake-X parameters that we&amp;#39;ll make use of later:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nL1-load-latency: 4-6 cycles\nL2-load-latency: 14 cycles \nL1-cache-size: 32K (512 cachelines: 64 sets, 8 ways each)\n\nROB size: 224 micro-ops</pre>\n\n<p>The parameters are taken from the <a href=\&quot;https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html\&quot;>Intel SDM</a>.</p>\n\n<p><strong>Note:</strong> L1/L2 being the respective data-cache level and ROB, being the Reorder Buffer, where instructions are staged for in-order retirement.</p>\n\n<h2 id=\&quot;root-cause-analysis\&quot;>Root cause analysis</h2>\n\n<p>Drilling down with <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf stat -d</code>:</p>\n\n<p>UEK5 (191 ns):</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\n 677.9    cycles                #  3.542 GHz\n1635.0    instructions          #  2.40  insn per cycle\n 325.0    branches\n   0.5    branch-misses         #  0.16% of all branches\n 404.0    L1-dcache-loads\n   0.4    L1-dcache-load-misses #  0.10% of all L1-dcache accesses</pre>\n\n<p>UEK6 (217ns):</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\n 770.4    cycles                #    3.545 GHz\n1652.0    instructions          #    2.14  insn per cycle\n 332.2    branches\n   1.5    branch-misses         #    0.45% of all branches\n 407.3    L1-dcache-loads\n   8.6    L1-dcache-load-misses #    2.13% of all L1-dcache accesses</pre>\n\n<p>Comparing, this is an increase of ~100 cycles with the L1d-loads and instruction counts being almost identical across UEK5 and UEK6. This underscores the fact that audit code which forms the bulk of instructions executed hasn&amp;rsquo;t changed all that much.</p>\n\n<p>The IPC is commensurately lower[2]. The proximal cause seems to be the increased L1d-load-misses and the one extra branch-miss.</p>\n\n<p>These observations were confirmed via enough non-correlated runs (with intervening reboot for each) and so are statistically significant. The L1d-load-miss numbers are somewhat variable across boot cycles, but the trend is close to what we see above.</p>\n\n<h3 id=\&quot;audit_filter_syscall\&quot;><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code></h3>\n\n<p>From <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf record</code> we know that the bulk of the increased runtime went to <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code>. The procedure itself is primarily a loop that walks the list of rules, calling <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> for each rule to check if it needs to be evaluated for the current syscall. For <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> the answer will be <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>false</code> most of the time (32 of 37 times.)</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\naudit_filter_syscall(...) {\n    struct audit_entry *e; \n    struct audit_entry *ctx;\n\n    list = audit_filter_list[AUDIT_FILTER_EXIT]; \n\n    list_for_each_entry_rcu(e, list, list) {\n\n        if (audit_in_mask(&amp;amp;e-&amp;gt;rule, ctx-&amp;gt;major) &amp;amp;&amp;amp; \n            audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n                               &amp;amp;state, false, x)) { \n                rcu_read_unlock(); \n                ctx-&amp;gt;current_state = state;\n                return state;\n        }\n    }\n\n\n}\n \naudit_in_mask(const struct audit_krule *rule, unsigned long val) {\n    if (val &amp;gt; 0xffffffff)\n        return false; \n\n    /*\n     * val contains the current syscall number. AUDIT_WORD does\n     * some bit shifting on it.\n    */\n    word = AUDIT_WORD(val);\n    if (word &amp;gt;= AUDIT_BITMASK_SIZE)\n        return false;\n\n    bit = AUDIT_BIT(val);\n\n    /*\n     * The load in rule-&amp;gt;mask[word] depends on the audit_krule (which\n     * hangs off the current rule entry) and the syscall number.\n     */\n    return rule-&amp;gt;mask[word] &amp;amp; bit;\n}\naudit_filter_rules(...) {\n    /*\n     * Large switch statement which we ignore for the rest of this\n     * analysis because, as we will see later, loads executed in it don&amp;#39;t\n     * have an &amp;quot;interesting&amp;quot; alignment and so their latency should be easy\n     * enough to hide.\n     */\n}</pre>\n\n<h3 id=\&quot;memory-accesses\&quot;>Memory accesses</h3>\n\n<p>Next let&amp;rsquo;s look at the data structures accessed in the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> loop and where the L1d-load-misses might be coming from.</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n/* Data structure layout annotated with size and cacheline occupancy\n * information using pahole. */\n\nstruct audit_entry {    /* via audit_filter_list[AUDIT_FILTER_EXIT] */\n\n        struct list_head           list;                 /*     0    16 */\n        struct callback_head       rcu;                  /*    16    16 */\n        struct audit_krule         rule;                 /*    32   376 */\n        ...\n        /* size: 408, cachelines: 7, members: 3 */\n        /* last cacheline: 24 bytes */\n};\n\nstruct audit_krule {    /* inlined in struct audit_entry */\n        ...\n        u32                        mask[64];             /*    16   256 */\n        ...\n        /* size: 376, cachelines: 6, members: 17 */\n        /* last cacheline: 56 bytes */\n};\n\nstruct audit_context {\n        ...\n        int                        major;                /*    20     4 */\n        ...\n        /* size: 920, cachelines: 15, members: 46 (slightly larger on UEK6) */\n        /* sum members: 912, holes: 2, sum holes: 8 */\n        /* last cacheline: 24 bytes */\n};</pre>\n\n<p>The effective execution loop in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> (with cacheline access annotations):</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nstruct audit_entry *e = &amp;amp;audit_filter_list[AUDIT_FILTER_EXIT];\n\nfor_each_iteration {\n    e = e-&amp;gt;next;                    /* cacheline-0 of audit_entry */\n    if (e == list)\n        jmp out;\n    if (audit_in_mask(e-&amp;gt;rule.mask, /* cacheline-0 of audit_entry */\n                      ctx-&amp;gt;major))  /* cacheline-0 of audit_context */\n        audit_filter_rules(e-&amp;gt;rule);\n}\nout:</pre>\n\n<p>As the annotations above mention, there are a total of three loads:</p>\n\n<ol type=\&quot;1\&quot;>\n\t<li>Pointer chasing in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>e-&amp;gt;next</code>: the first cacheline of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code>.</li>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>e-&amp;gt;rule.mask[]</code>: accesses the same cacheline as load (1) above.</li>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code>: accesses the first cacheline of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_context</code>.</li>\n</ol>\n\n<p>Loads (1) and (2) will access a total of 37 cachelines, corresponding to a rule per iteration. Also notice that every single basic block in the rest of the iteration (apart from some error checking in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code>) has data dependence on the evaluation of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>e=e-&amp;gt;next</code>. Worse this is a loop carried dependency, so each iteration depends on the previous one.</p>\n\n<p>The cacheline for load (3) is accessed once every iteration. This load is unnecessary, <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> contains the syscall number, which is a constant for the duration of the syscall. However, because the compiler&amp;rsquo;s alias analysis cannot prove that <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> is not mutilated, it does not get cached in a register. This also means that <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> will do out-of-bound validation checks related to <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> over and over.</p>\n\n<p>Recalling the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat -d</code> output above there are a total of around 400 L1d-loads for each <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> call. Of those, the loop does a total of 37*3 loads which map to a total of 38 unique cachelines.</p>\n\n<p>Alright, I hear you think: granted, walking linked-lists is difficult, there are a lot of cachelines in a lot of iterations or whatever, life is hard and the compiler doesn&amp;rsquo;t know what it is doing[3]. Even given all of that, nothing here has changed from UEK5 to UEK6, so none of this explains why UEK6 would incur more L1d-load-misses[4].</p>\n\n<p>Which is true, so that&amp;rsquo;s next.</p>\n\n<h3 id=\&quot;theory-of-the-case\&quot;>Theory of the case</h3>\n\n<p>From the background above, we know that the loop is pure computation, and purely local computation at that, so code changes elsewhere should have no effect. And there were no significant code changes from UEK5 to UEK6, so the loop is unchanged (which also applies to the generated assembly.)</p>\n\n<p>Now insofar as L1d-load-misses are concerned: the number of cachelines accessed (from about 400 L1d-loads per <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> call, not all of which are to unique cachelines) amount to a number comfortably below the Skylake-X L1d-cache capacity of 512 cachelines. So this loop should not incur any capacity misses.</p>\n\n<p>Which leaves conflict misses as the probable cause[5]. Skylake-X has an 8-way associative L1: if more than 8 loads in the loop map to the same cache-set some accesses would incur conflict misses.</p>\n\n<p>Accesses in the loop and how they map to cache-sets:</p>\n\n<ul>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code>: aligns at a 512B boundary, which limits it to cache-sets <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>{0, 8, 16, ... 56}</code>, for a total of 8*8 cache-slots.</li>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_context</code>: aligns at a 1024B boundary, which resolves to cache-sets <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>{0, 16, 32, 48}</code>, for a total of 4*8 cache-slots. As described earlier, this is a single cacheline which competes with a subset of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code> cachelines.</li>\n</ul>\n\n<p>Even then, this is 37 cachelines slotted into 64 slots and another slotting into 32 of those 64. This should be easy enough to satisfy, assuming that the kernel allocator has a reasonably sane distribution and isn&amp;rsquo;t skewed towards a particular set of cachelines (or is similarly skewed on both UEK5 and UEK6.)</p>\n\n<h3 id=\&quot;allocation-skew\&quot;>Allocation skew</h3>\n\n<p>If, allocations for <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code> were distributed uniformly, they would map into cache-sets uniformly, ending with similar populations across the cache-sets. This would give a cacheline-spread metric of ~0 (obtained by calculating the standard-deviation of populations across cache-sets.)</p>\n\n<p>What we see:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\ncacheline-spread on UEK5:   1.58\ncacheline-spread on UEK6:   1.91</pre>\n\n<p>(These results are from a large number (&amp;gt; 100) of non-correlated runs. <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>auditd</code> allocates at boot, so this was done by rebooting between each run.)</p>\n\n<p>From these numbers, UEK5 is far from a flat distribution, and UEK6 is somewhat worse, but not dispositively so. Additionally, a slight imbalance will not cause performance degradation: that happens only after cache conflicts kick in, which is after cache-set population crosses the associativity threshold.</p>\n\n<p>To validate this, we measure how well cycles correlate[6] with 1) with L1d-misses, and 2) cacheline-spread:</p>\n<style type=\&quot;text/css\&quot;>.divTable {\n  display: table;\n  width: 80%;\n}\n.divTableRow {\n  display: table-row;\n}\n.divTableHeading {\n  display: table-header-group;\n  background-color: #ddd;\n  font-weight: bold;\n}\n.divTableCell {\n  display: table-cell;\n  padding: 3px 10px;\n  border: 1px solid #999999;\n}\n</style>\n<p>&amp;nbsp;</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Kernel</div>\n\n<div class=\&quot;divTableCell\&quot;>cycles:L1d-misses</div>\n\n<div class=\&quot;divTableCell\&quot;>cycles:cacheline-spread</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>UEK5</div>\n\n<div class=\&quot;divTableCell\&quot;>0.74</div>\n\n<div class=\&quot;divTableCell\&quot;>0.22</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>UEK6</div>\n\n<div class=\&quot;divTableCell\&quot;>0.74</div>\n\n<div class=\&quot;divTableCell\&quot;>0.61</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>For both UEK5 and UEK6, &amp;ldquo;cycles:L1d-misses&amp;rdquo; is tightly correlated (though the value of 0.74 for both is happenstance) which makes sense. &amp;ldquo;cycles:cacheline-spread&amp;rdquo;, however, is well correlated only on UEK6, not UEK5. This suggests that the UEK6 allocator skew is meaningfully worse, enough to cause lower performance.</p>\n\n<p>Alright, having beaten this dead horse enough, let&amp;rsquo;s figure out how to fix it next[7].</p>\n\n<h2 id=\&quot;speeding-it-up\&quot;>Speeding it up</h2>\n\n<p>To get back our lost performance, our task is simple: optimize a hot-loop[8] which is itself executed in the hot syscall path. Compounding the problem, the critical load in the loop is accessed via a linked list.</p>\n\n<p>Stated like that, it sounds pretty bad. But, as we will see the structure of the problem helps quite a bit:</p>\n\n<ol type=\&quot;1\&quot;>\n\t<li>On a sane system, the common-case is extremely common, syscalls are frequent, and audit logging is unusual. This means that low branch mispreds are not unusual and something we might even depend on.</li>\n\t<li>We are optimizing a no-op loop: the loop walks a bunch of rules, does error checking, and decides if it needs to log. In the common-case, it will conclude that it doesn&amp;rsquo;t. (This is really (1) restated to stress the no-op nature of the loop.)</li>\n</ol>\n\n<p>A no-op loop implies that the code does not actually care about most of the values it computes. It just inches towards a foregone conclusion.</p>\n\n<p>This it does (as all code does) by means of dependency chains that transform the input state to output. Here, most dependency chains are short and, are really <em>only used to predict the control flow</em>. The only long dependency chain, woven through all the loop iterations, is the one walking the linked-list.</p>\n\n<p>Now, critically since the branches are predicted perfectly or almost so, the control flow can run quite a bit further than any loads and dependent computation. The control flow thus essentially feeds these loads and other instructions to the ROB, where they wait until resources/dependencies become available, compute the output from their chain which, to reiterate, will only be used to predict the control flow.</p>\n\n<p>Given that the control flow is already feeding instructions from the correct direction, these are in effect orphan chains that eventually retire without anyone having cared for the output they compute or how long that took.</p>\n\n<p>Except: this happy state continues only until we run into a resource constraint. For instance, the size of the ROB on Skylake-X is 224 entries and each loop iteration is ~20 instructions. This means instructions worth around 10 loop iterations can be present in the ROB. Now, given that instructions retire on x86 in-order, long running instructions (L1d-load-misses of course, but also L1d-load hits[9]) with long dependence chains would slow retirement down, even were control-flow to be predicted perfectly.</p>\n\n<p>Bearing these observations in mind, our fixes will try to reduce the amount and cost of work per loop iteration. This allows the loop to retire as close to the gating latency of any long running instructions in the loop.</p>\n\n<h3 id=\&quot;cache-ctx-major-in-audit_filter_syscall\&quot;>Cache <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code></h3>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n@@ -785,13 +785,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,\n {\n        struct audit_entry *e;\n        enum audit_state state;\n+       unsigned long major = ctx-&amp;gt;major;\n\n        if (auditd_test_task(tsk))\n                return AUDIT_DISABLED;\n\n        rcu_read_lock();\n        list_for_each_entry_rcu(e, list, list) {\n-               if (audit_in_mask(&amp;amp;e-&amp;gt;rule, ctx-&amp;gt;major) &amp;amp;&amp;amp;\n+               if (audit_in_mask(&amp;amp;e-&amp;gt;rule, major) &amp;amp;&amp;amp;\n                    audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n                                       &amp;amp;state, false)) {\n                        rcu_read_unlock();</pre>\n\n<p>Caching <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> in a local variable helps in two ways:</p>\n\n<ul>\n\t<li>Explicitly indicates to the compiler that there are no stores to the cached value. <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> operates on <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> doing some bit-shifting and error checking. Now that the compiler knows that <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>major</code> is not modified, it can hoist most of that logic out of the loop so it is not reevaluated over-and-over in every loop iteration.</li>\n\t<li>As described earlier, <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_context</code> has similar natural alignment concerns as <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code>. Allowing the compiler to cache <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> in a register (or on the stack) reduces one potential source of contention.</li>\n</ul>\n\n<p>With this change the number of instructions executed/loop-iteration reduce by 8 (of 20.) Note that most of those were almost free ALU instructions.</p>\n\n<p>L1d-loads: we removed one L1d-load but added two (due to the compiler now spilling and reloading some state to/from the stack.) However, given that stack accesses are much less likely to have conflicting alignment constraints, the increased loads are less of a concern than the one we got rid of.</p>\n\n<p>cycles: improve by about 40 cycles. This is because the greater room in the ROB allows our almost perfect branch prediction to speculatively run even further ahead of other instructions.</p>\n\n<p>Change in latency for UEK6:</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Version</div>\n\n<div class=\&quot;divTableCell\&quot;>Min<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Mean<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Median<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Max<br />\n(ns)</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>baseline</div>\n\n<div class=\&quot;divTableCell\&quot;>196.26</div>\n\n<div class=\&quot;divTableCell\&quot;>212.00</div>\n\n<div class=\&quot;divTableCell\&quot;>207.80</div>\n\n<div class=\&quot;divTableCell\&quot;>240.52</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major</div>\n\n<div class=\&quot;divTableCell\&quot;>183.50</div>\n\n<div class=\&quot;divTableCell\&quot;>201.41</div>\n\n<div class=\&quot;divTableCell\&quot;>198.80</div>\n\n<div class=\&quot;divTableCell\&quot;>226.93</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>From the min-max range, there is a rather large variation in latency that&amp;rsquo;s caused by variations in allocation resulting in high or low cacheline-spread. In almost all cases though, the latency improves by ~10ns or thereabouts.</p>\n\n<p>That said, after removing 8 instructions and one load (and adding two less consequential loads), the performance gain is rather miniscule: ~1 cycle/iteration. Just that the loop executes 37 times, so we make it up in volume.</p>\n\n<p>More details (<code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat</code> and the before/after versions of the generated code) in <a href=\&quot;https://github.com/oracle/linux-uek/commit/87a39a3d2ca9a5c7e4d35e4cf4b839c53cc0678d\&quot;>UEK6 commit-1</a> and in <a href=\&quot;https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=069545997510833281f45f83e097017b9fef19b7\&quot;>Upstream commit-1</a>.</p>\n\n<h3 id=\&quot;annotate-branch-direction-for-audit_in_mask\&quot;>Annotate branch direction for <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code></h3>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n@@ -790,12 +790,13 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,\n         rcu_read_lock();\n         list_for_each_entry_rcu(e, list, list) {\n -               if (audit_in_mask(&amp;amp;e-&amp;gt;rule, major) &amp;amp;&amp;amp;\n -                   audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n -                                      &amp;amp;state, false)) {\n                                        ...\n +               if (unlikely(audit_in_mask(&amp;amp;e-&amp;gt;rule, major))) {\n +                       if (audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n +                                              &amp;amp;state, false)) {</pre>\n\n<p>Annotate <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> as <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>unlikely()</code> to allow the compiler to pessimize the call to <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_rules()</code>. Two reasons for this change:</p>\n\n<ul>\n\t<li>The primary motivation was to get rid of the extra branch mispred. This change succeeds in that task but it is unclear why: there&amp;rsquo;s no significant change in the basic-block structure. The only change is from a branch inversion due to the unlikely clause.</li>\n\t<li>The branch inversion means that the not-taken direction is chosen more often: 32/37 times (changing from 5/37 earlier.) The issue-latency for not-taken branches is 0.5-1 cycles, for taken branches 1-2 cycles[10] is slightly cheaper.</li>\n</ul>\n\n<p>L1d-loads: reduce by 2 for each loop iteration. This is because the spills and reloads introduced in the &amp;ldquo;Cache <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code>&amp;hellip;&amp;rdquo; patch have now been shifted to the unlikely path (the prologue and epilogue of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_rules()</code> call.)</p>\n\n<p>cycles: performance improves on average by ~30 cycles/call.</p>\n\n<p>Change in latency for UEK6:</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Version</div>\n\n<div class=\&quot;divTableCell\&quot;>Min<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Mean<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Median<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Max<br />\n(ns)</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major</div>\n\n<div class=\&quot;divTableCell\&quot;>183.50</div>\n\n<div class=\&quot;divTableCell\&quot;>201.41</div>\n\n<div class=\&quot;divTableCell\&quot;>198.80</div>\n\n<div class=\&quot;divTableCell\&quot;>226.93</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major+annot</div>\n\n<div class=\&quot;divTableCell\&quot;>165.26</div>\n\n<div class=\&quot;divTableCell\&quot;>188.72</div>\n\n<div class=\&quot;divTableCell\&quot;>184.25</div>\n\n<div class=\&quot;divTableCell\&quot;>230.34</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>More details (<code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat</code> and the before/after versions of the generated code) in <a href=\&quot;https://github.com/oracle/linux-uek/commit/0288dbdbfb5768ad8ae8a445c72f523bcb99eca0\&quot;>UEK6 commit-2</a>.</p>\n\n<h3 id=\&quot;remove-static-linkage-from-audit_filter_syscall\&quot;>Remove static linkage from <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code></h3>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n@@ -777,7 +777,7 @@ static bool audit_in_mask(const struct audit_krule *rule, unsigned long\n   * also not high enough that we already know we have to write an audit\n   * record (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT).\n   */\n -static enum audit_state audit_filter_syscall(struct task_struct *tsk,\n +enum audit_state audit_filter_syscall(struct task_struct *tsk,\n                                              struct audit_context *ctx,\n                                              struct list_head *list)</pre>\n\n<p><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> is only used locally in the file and so is marked <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>static</code>. Additionally, it&amp;rsquo;s only ever called with a fixed <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>list</code> value of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>&amp;amp;audit_filter_list[AUDIT_FILTER_EXIT])</code>.</p>\n\n<p>GCC&amp;rsquo;s constant propagation pass makes use of these two things to, quite reasonably, const-propagate the third argument to the point of use.</p>\n\n<p>This causes the exit check in the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>list_for_each</code> loop to look like this:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\naudit_filter_syscall.constprop.18(task, ctx):\n   0:       48 8b 1b                mov    (%rbx),%rbx\n   3:       48 81 fb e0 67 ac 82    cmp    $0xffffffff82ac67e0,%rbx\n                    ffffffff8118b5ed: R_X86_64_32S  audit_filter_list+0x40\n  10:       75 e2                   jne    start_iter</pre>\n\n<p>while, without const-propagation it would have looked like this:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\naudit_filter_syscall(task, ctx, list):\n   0:       48 8b 1b                mov    (%rbx),%rbx\n   3:       4c 39 e3                cmp    %r12,%rbx\n   6:       75 e6                   jne    start_iter</pre>\n\n<p>Now either one ought to be alright, both <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>cmp imm32,r</code> and <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>cmp r,r</code> forms are equivalent with a latency of 1 cycle, and both are a single micro-op each.</p>\n\n<p>The second form of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>cmp</code>, however, can be macro-op fused with the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>jne</code>; not entirely sure if the first form can be[11]. The second form is also denser, though that&amp;rsquo;s not a concern here.</p>\n\n<p>Disallowing GCC from making assumptions about calling contexts by removing the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>static</code> linkage from <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> forces it to pass the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>list</code> parameter in a register which results in a small performance improvement: ~20 cycles (about 0.5 cycles/loop iteration.)</p>\n\n<p>Change in latency for UEK6:</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Version</div>\n\n<div class=\&quot;divTableCell\&quot;>Min<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Mean<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Median<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Max<br />\n(ns)</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major+annot</div>\n\n<div class=\&quot;divTableCell\&quot;>165.26</div>\n\n<div class=\&quot;divTableCell\&quot;>188.72</div>\n\n<div class=\&quot;divTableCell\&quot;>184.25</div>\n\n<div class=\&quot;divTableCell\&quot;>230.34</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major+annot+extern</div>\n\n<div class=\&quot;divTableCell\&quot;>159.88</div>\n\n<div class=\&quot;divTableCell\&quot;>184.35</div>\n\n<div class=\&quot;divTableCell\&quot;>177.62</div>\n\n<div class=\&quot;divTableCell\&quot;>250.82</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>More details (<code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat</code> and the before/after versions of the generated code) in <a href=\&quot;https://github.com/oracle/linux-uek/commit/5a74015e20bff63d1052359fbc2c3418e0f6bc4e\&quot;>UEK6 commit-3</a> and, <a href=\&quot;https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=50979953c0c41e929e5f955800da68e1bb24c7ab\&quot;>Upstream commit-3</a>.</p>\n\n<h2 id=\&quot;summary\&quot;>Summary</h2>\n\n<p>The audit subystem is fairly stable in the Linux kernel, not given to frequent changes. So it was puzzling when it became slower in recent kernels, and because a primary user is the syscall path, concerning[12].</p>\n\n<p>The cause turned out to be higher skew in allocated buffers which results in more lopsided cache-set distribution.</p>\n\n<p>The fixes compensate for the higher costs in the loop by taking advantage of the peculiarities of the execution path and optimizing for the speculative nature of the CPU pipeline.</p>\n\n<p>The three patches, in sum reduce the overhead by about 30ns (~100 cycles).</p>\n\n<p>Final <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf stat -d -r 5</code> go from:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\ncycles                  761.65  (  +- 5.22% )\ninstructions           1639.17  (  +- 0.00% )\nIPC                       2.18  (  +- 5.50% )\nbranches                328.21  (  +- 0.00% )\nbranch-misses             1.37  (  +- 6.56% )\nL1-dcache-loads         404.35  (  +- 0.00% )\nL1-dcache-load-misses     7.99  (  +- 70.71% )</pre>\n\n<p>to:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\ncycles                  669.09  (  +- 11.23% )\ninstructions           1342.04  (  +-  0.00% )\nIPC                       2.03  (  +-  9.85% )\nbranches                328.19  (  +-  0.00% )\nbranch-misses             0.56  (  +-  5.35% )\nL1-dcache-loads         384.31  (  +-  0.00% )\nL1-dcache-load-misses     5.77  (  +- 84.57% )</pre>\n\n<p>This compares quite well to the UEK5-baseline:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\ncycles                  672.90  (  +-  1.65% )\ninstructions           1622.08  (  +-  0.00% )\nIPC                       2.41  (  +-  1.65% )\nbranches                321.20  (  +-  0.00% )\nbranch-misses             0.51  (  +-  0.00% )\nL1-dcache-loads         401.32  (  +-  0.00% )\nL1-dcache-load-misses     2.28  (  +- 59.62% )</pre>\n\n<p>Note for non-Skylake-X architectures: Intel Icelake and AMD Milan (the other architectures tested) cope with L1d-load-misses much better so the baseline performance is much better.</p>\n\n<p>With these patches, they only show a small improvement (~10ns): Icelake has a bigger L1d-cache (48K), and a much bigger ROB. Milan also has a bigger ROB and does memory renaming and bunch of other pipeline optimizations that limit the effect of these optimizations.</p>\n\n<p><strong>Endnote:</strong> what I found personally instructive was how much C really is &amp;ldquo;a portable assembler&amp;rdquo; and the significant codegen (and performance) changes that can result from minimal changes to the code.</p>\n\n<h2 id=\&quot;references\&quot;>References</h2>\n\n<ol type=\&quot;1\&quot;>\n\t<li>\n\t<p><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> has a minimal kernel execution path (only does a PID lookup), and so is generally used to measure the overhead of the syscall path.</p>\n\t</li>\n\t<li>\n\t<p>Comparing the IPC for the audit-only portion shows that a starker drop:</p>\n\n\t<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nUEK5: 1427.0  instructions  #  3.41  insn per cycle\nUEK6: 1432.0  instructions  #  2.84  insn per cycle</pre>\n\t</li>\n\t<li>\n\t<p>Alas no, alias analysis is an undecidable problem.</p>\n\t</li>\n\t<li>\n\t<p>Or for that matter, what causes the extra branch-miss.</p>\n\t</li>\n\t<li>\n\t<p>Another possibility is out-of-line code -- frequent interrupts, vmexits etc -- trashing the cache but from profiling these were a non-issue.</p>\n\t</li>\n\t<li>\n\t<p>Measured using the pearson-quotient(x, y): correlation coefficient between quantities x and y.</p>\n\t</li>\n\t<li>\n\t<p>You might notice that this analysis does not address the extra branch-miss. That&amp;#39;s because I still have no clue what causes it.</p>\n\t</li>\n\t<li>\n\t<p>The correct fix would be to fix whatever ails the allocator. However, from a quick look at the changes that have gone into related code, it seems non-trivial to find a particular commit which points to the root cause of the skew (especially given that the skew is not constant, but varies from run-to-run.) Also, notably, the fixes described below also apply to UEK5, which means that even if UEK6 becomes faster, UEK5 will also improve somewhat.</p>\n\t</li>\n\t<li>\n\t<p>As mentioned in <a href=\&quot;#cpu-parameters\&quot;>CPU-parameters</a>, L1d-loads take 4-6 cycles on Skylake-X. We also know that in the good case (UEK5), this loop is capable of an IPC of 3.41 insn per cycle. So, hiding L1d-load latency is critical for good performance.</p>\n\t</li>\n\t<li>\n\t<p><a href=\&quot;https://www.agner.org/optimize/instruction_tables.pdf%3E\&quot;>https://www.agner.org/optimize/instruction_tables.pdf</a>, pg 298 (Skylake-X)</p>\n\t</li>\n\t<li>\n\t<p>The first form fused, needs three inputs: <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>%rbx</code>, an <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>imm32</code> encoding the distance to the address being compared, and an <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>imm8</code> encoding the distance to the branch-dest; the second needs two registers: <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>%rbx</code>, <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>%r12</code> and only the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>imm8</code>.</p>\n\t</li>\n\t<li>\n\t<p>Just for context, a kernel build (x86-defconfig) makes an aggregate of 27M syscalls, with a syscall every 44us.</p>\n\t</li>\n</ol>\n&quot;,&quot;translate&quot;:false,&quot;og_description&quot;:&quot;An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. &quot;,&quot;featuredpost&quot;:false,&quot;audience&quot;:null,&quot;product&quot;:null,&quot;comments&quot;:true,&quot;meta_title&quot;:&quot;Syscall latency...  and some uses of speculative execution&quot;,&quot;time_to_read&quot;:23,&quot;author&quot;:[{&quot;id&quot;:&quot;COREF415334566DE45208D79D6CD6FA88629&quot;,&quot;type&quot;:&quot;Blog-Author&quot;,&quot;typeCategory&quot;:&quot;ContentType&quot;,&quot;links&quot;:[]}],&quot;translated-pages&quot;:null,&quot;canonical_url&quot;:null,&quot;featured_image&quot;:{&quot;id&quot;:&quot;CONTCF8836A82B014903A5283C76DE901346&quot;,&quot;type&quot;:&quot;DigitalAsset&quot;,&quot;typeCategory&quot;:&quot;DigitalAssetType&quot;,&quot;links&quot;:[]},&quot;og_image&quot;:{&quot;id&quot;:&quot;CONTCF8836A82B014903A5283C76DE901346&quot;,&quot;type&quot;:&quot;DigitalAsset&quot;,&quot;typeCategory&quot;:&quot;DigitalAssetType&quot;,&quot;links&quot;:[]},&quot;meta_description&quot;:&quot;An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. &quot;,&quot;meta_robots&quot;:&quot;index, follow&quot;,&quot;primary_channel&quot;:&quot;linux&quot;,&quot;globalhomepagefeaturedpost&quot;:false,&quot;publish_date&quot;:{&quot;value&quot;:&quot;2023-09-12T15:00:00.000Z&quot;,&quot;timezone&quot;:&quot;UTC&quot;,&quot;formated&quot;:&quot;September 12, 2023&quot;},&quot;desc&quot;:&quot;An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. &quot;,&quot;author_id&quot;:&quot;CORE8B88E20204C04A0DADCEBC0499683C49&quot;,&quot;categories&quot;:[{&quot;category&quot;:&quot;Technologies&quot;,&quot;pageUrl&quot;:&quot;../category/lnx-technologies&quot;},{&quot;category&quot;:&quot;Linux Kernel Development&quot;,&quot;pageUrl&quot;:&quot;../category/lnx-linux-kernel-development&quot;}]}}"></div>
   1248 <!--  -->
   1249 <script>
   1250     window.SCSMacros = window.SCSMacros || {};
   1251     var pubdate = document.getElementById("pubdate").value;
   1252     window.SCSMacros.getCreationdateMacro = pubdate;
   1253    
   1254 
   1255 </script>
   1256 <script>
   1257     if (SCS && SCS['siteId'] === "Blogs-Home") {
   1258         var Primarychannel = document.getElementById("primarychannel").value;
   1259         if(Primarychannel != 'undefined' && Primarychannel != 'Blogs-Home'){
   1260         var slug = window.location.pathname.split('/')[2];
   1261         var domain = window.location.origin;
   1262         if(domain === "https://blogs.oracle.com"){
   1263              window.location.replace("https://blogs.oracle.com/"+Primarychannel+"/post/"+slug); 
   1264         }else{
   1265             window.location.replace("https://blogs-stage.oracle.com/"+Primarychannel+"/post/"+slug); 
   1266         }
   1267     }
   1268     }
   1269 </script>
   1270 
   1271 <script type="text/javascript">
   1272     /*! ORACLE - TRACKING URL */
   1273     $(document).ready(function() {
   1274         const oracleLinks = document.querySelectorAll('a[href*="go.oracle.com"][data-trackas]');
   1275 
   1276 oracleLinks.forEach(link => {
   1277   const url = new URL(link.href);
   1278 
   1279   if (!url.searchParams.has('source') && !url.searchParams.has('src1')) {
   1280     const currentUrlParams = new URLSearchParams(window.location.search);
   1281     const existingUrlParams = new URLSearchParams(url.search);
   1282 
   1283     if (currentUrlParams.has('source')) {
   1284       currentUrlParams.set('src1', currentUrlParams.get('source'));
   1285       currentUrlParams.delete('source');
   1286     }
   1287 
   1288     let mergedParams = existingUrlParams.toString();
   1289     if (mergedParams) {
   1290       mergedParams += '&';
   1291     } else {
   1292       mergedParams += '?';
   1293     }
   1294     mergedParams += currentUrlParams.toString();
   1295 
   1296     url.search = mergedParams;
   1297 
   1298     link.href = decodeURIComponent(url.toString());
   1299    
   1300   }
   1301 });
   1302     });
   1303 </script>
   1304 
   1305 				</div>
   1306 			</div>
   1307 		</div>
   1308 	</div>
   1309 </div>
   1310 </div></div></div></div></div>
   1311 		<!-- <div class="scs-slot" id="category-id"></div> -->
   1312 		<!-- RH03v5 -->
   1313 		<div id="Next-Previous-Posts" class="scs-slot"><div class="scs-row"><div class="scs-col" style="width: 100%;"><div id="a057a3dc-2397-4b35-88dc-e9904a3f1789"><div class="scs-component-bounding-box"><!-- -->
   1314 <div>
   1315 	<div class="scs-custom-component scs-component scs-component-default-style" style="margin-top:5px;margin-right:5px;margin-bottom:5px;margin-left:5px;">
   1316 		<div class="scs-component-content" style="width:100%;">
   1317 			<div style="" class="scs-custom-component-wrapper">
   1318 				<div id="a057a3dc-2397-4b35-88dc-e9904a3f1789customComponentDiv" data-scs-hydrate="true" data-scs-contenttype="Blog-Post" data-asset-operation="view:CORE8B88E20204C04A0DADCEBC0499683C49">
   1319 					<section class="rc83 rc83v0 rw-neutral-00bg cpad xwidth">
   1320     <div class="rc83w1 cwidth">
   1321         <div class="rc83pagenav">
   1322             <div class="rc83nav-lt">
   1323 				<a href="oracle-linux-automation-manager-21" class="rc83arrow-lt">
   1324                     <div class="icn-img icn-chevron-left"><br></div>
   1325                     <p id="PreviousPostText">Previous Post</p>
   1326                 </a>
   1327                 <h4>Discover the Latest Advancements in Automation with Oracle Linux Automation Manager 2.1</h4>
   1328                 <div class="rc83sub">
   1329                     <span><a href="/authors/monica-s">Monica S</a> | </span><span>3</span><span> min read</span>
   1330                 </div>
   1331              </div>
   1332             <div class="rc83nav-rt">
   1333                 <a href="get-inspired-at-oracle-cloudworld-2023hear-from-customers-technical-industry-experts-and-executives-and-get-your-questions-answered" class="rc83arrow-rt">
   1334                     <p id="NextPostText">Next Post</p>
   1335                     <div class="icn-img icn-chevron-right"><br></div>
   1336                 </a>
   1337                 <h4>Get inspired at Oracle CloudWorld 2023—hear from customers, technical industry experts, and executives and get your questions answered</h4>
   1338                 <div class="rc83sub">
   1339                     <span><a href="/authors/michele-resta">Michele Resta</a> | </span><span>3</span><span> min read</span>
   1340                 </div>
   1341              </div>
   1342         </div>
   1343     </div>
   1344 </section>
   1345 
   1346 				</div>
   1347 			</div>
   1348 		</div>
   1349 	</div>
   1350 </div>
   1351 </div></div></div></div></div>
   1352 		<!-- <div class="scs-slot" id="recent-posts"></div> -->
   1353 		<!-- U10v6 -->
   1354 		<div class="u10 u10v6" data-trackas="ffooter" data-ocomid="redwood">
   1355 
   1356 			<div class="u10w1">
   1357 
   1358 				<div class="u10w2">
   1359 					<div class="u10w3">
   1360 						<h5>Resources for</h5>
   1361 						<ul>
   1362 							<li><a href="https://www.oracle.com/corporate/">About</a></li>
   1363 							<li><a href="https://www.oracle.com/corporate/careers/"
   1364 									data-lbl="about-oracle:careers">Careers</a></li>
   1365 							<li><a href="https://developer.oracle.com">Developers</a></li>
   1366 							<li><a href="https://investor.oracle.com/home/default.aspx">Investors</a></li>
   1367 							<li><a href="https://www.oracle.com/partner/">Partners</a></li>
   1368 							<li><a href="https://www.oracle.com/startup/">Startups</a></li>
   1369 						</ul>
   1370 					</div>
   1371 				</div>
   1372 				<div class="u10w2">
   1373 					<div class="u10w3">
   1374 						<h5>Why Oracle</h5>
   1375 						<ul>
   1376 							<li><a href="https://www.oracle.com/corporate/analyst-reports.html">Analyst Reports</a></li>
   1377 							<li><a href="https://www.oracle.com/cx/what-is-crm/ ">Best CRM</a></li>
   1378 							<li><a href="https://www.oracle.com/cloud/economics/">Cloud Economics</a></li>
   1379 							<li><a href="https://www.oracle.com/corporate/citizenship/">Corporate Responsibility</a>
   1380 							</li>
   1381 							<li><a href="https://www.oracle.com/corporate/careers/diversity-inclusion/">Diversity and
   1382 									Inclusion</a></li>
   1383 							<li><a href="https://www.oracle.com/corporate/security-practices/">Security Practices</a>
   1384 							</li>
   1385 						</ul>
   1386 					</div>
   1387 				</div>
   1388 				<div class="u10w2">
   1389 					<div class="u10w3">
   1390 						<h5>Learn</h5>
   1391 						<ul>
   1392 							<li><a href="https://www.oracle.com/cx/service/what-is-customer-service/ ">What is Customer
   1393 									Service?</a></li>
   1394 							<li><a href=" https://www.oracle.com/erp/what-is-erp/">What is ERP?</a></li>
   1395 							<li><a
   1396 									href=" https://www.oracle.com/cx/marketing/automation/what-is-marketing-automation/ ">What
   1397 									is Marketing Automation?</a></li>
   1398 							<li><a href="https://www.oracle.com/erp/what-is-procurement/ ">What is Procurement?</a></li>
   1399 							<li><a
   1400 									href="https://www.oracle.com/human-capital-management/talent-management/what-is-talent-management/ ">What
   1401 									is Talent Management?</a></li>
   1402 							<li><a
   1403 									href=" https://www.oracle.com/cloud/compute/virtual-machines/what-is-virtual-machine/ ">What
   1404 									is VM?</a></li>
   1405 						</ul>
   1406 					</div>
   1407 				</div>
   1408 				<div class="u10w2">
   1409 					<div class="u10w3">
   1410 						<h5>What's New</h5>
   1411 						<ul>
   1412 							<li><a
   1413 									href="https://www.oracle.com/cloud/free/?source=:ow:o:h:nav:050120SiteFooter&intcmp=:ow:o:h:nav:050120SiteFooter">Try
   1414 									Oracle Cloud Free Tier</a></li>
   1415 							<li><a href="https://www.oracle.com/solutions/green/">Oracle Sustainability</a></li>
   1416 							<li><a href="https://www.oracle.com/corporate/covid-19.html ">Oracle COVID-19 Response</a>
   1417 							</li>
   1418 							<li><a href="https://www.oracle.com/sailgp/">Oracle and SailGP</a></li>
   1419 							<li><a href="https://www.oracle.com/premier-league/">Oracle and Premier League</a></li>
   1420 							<li><a href="https://www.oracle.com/redbullracing/">Oracle and Red Bull Racing Honda</a>
   1421 							</li>
   1422 						</ul>
   1423 					</div>
   1424 				</div>
   1425 				<div class="u10w2">
   1426 					<div class="u10w3">
   1427 						<h5>Contact Us</h5>
   1428 						<ul>
   1429 							<li><a href="tel:18006330738">US Sales 1.800.633.0738</a></li>
   1430 							<li><a href="https://www.oracle.com/corporate/contact/">How can we help?</a></li>
   1431 							<li><a href="https://go.oracle.com/subscriptions">Subscribe to Oracle Content</a></li>
   1432 							<li><a
   1433 									href="https://www.oracle.com/cloud/free/?source=:ow:o:h:nav:050120SiteFooter&intcmp=:ow:o:h:nav:050120SiteFooter">Try
   1434 									Oracle Cloud Free Tier</a></li>
   1435 							<li><a href="https://www.oracle.com/events/">Events</a></li>
   1436 							<li><a href="https://www.oracle.com/news/" data-lbl="news-events:newsroom">News</a></li>
   1437 						</ul>
   1438 					</div>
   1439 				</div>
   1440 				<div class="u10w4">
   1441 					<hr />
   1442 				</div>
   1443 
   1444 				<div class="u10w5 ">
   1445 					<ul class="u10-links u10w10">
   1446 						<li><a href="https://www.oracle.com/legal/copyright.html" data-lbl="copyright">&#169; 2022 Oracle</a></li>
   1447 						
   1448 						<li><a data-lbl="privacy"
   1449 								href="https://www.oracle.com/legal/privacy/">Privacy</a><span>/</span><a
   1450 								data-lbl="do-not-sell-my-info"
   1451 								href="https://www.oracle.com/legal/privacy/privacy-choices.html">Do Not Sell My Info</a>
   1452 						</li>
   1453 						<li>
   1454 							<div id='teconsent'> </div>
   1455 						</li>
   1456 						<li><a href="https://www.oracle.com/legal/privacy/privacy-policy.html#advertising"
   1457 								data-lbl="ad-choices">Ad Choices</a></li>
   1458 						<li><a href="https://www.oracle.com/corporate/careers/" data-lbl="careers">Careers</a></li>
   1459 					</ul>
   1460 
   1461 				</div>
   1462 			</div>
   1463 
   1464 		</div>
   1465 		<!-- /U10v6 -->
   1466 
   1467 
   1468 	</div>
   1469 	<script type="text/javascript" src="https://www.oracle.com/us/assets/metrics/ora_compendiumblogs.js"></script>
   1470 	<script type="text/javascript" src="https://www.oracle.com/assets/truste-oraclelib.js"></script>
   1471 	<script async="async" type="text/javascript"
   1472 		src="//consent.trustarc.com/notice?domain=oracle.com&c=teconsent&js=bb&noticeType=bb&text=true&gtm=1&cdn=1&pcookie"
   1473 		crossorigin=""></script>
   1474 	<script type="text/javascript" src="../_cache_8b25/siteinfo-common.js" charset="utf-8"></script><script type="text/javascript" src="../siteinfo-dynamic.js"></script>
   1475 	<script src="../_cache_8b25/_sitesclouddelivery/renderer/renderer.js"></script>
   1476 	
   1477 	
   1478 
   1479 
   1480 	<script src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/blogs-script.js"></script>
   1481 	<script src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/oracle-script.js"></script>
   1482 
   1483 	<!-- Avoid FOUC issue in FF with async loading of style sheets -->
   1484 	<style>
   1485 		body {
   1486 			opacity: 1;
   1487 		}
   1488 	</style>
   1489 
   1490 	<script type="text/javascript">
   1491 		$(document).ready(function () {
   1492 			$('a[data-lbl="copyright"]').html("&copy; " + new Date().getFullYear() +" Oracle " );
   1493 				
   1494 			});
   1495 	</script>
   1496 	<!--DTM/Launch embed code - Footer -->
   1497 
   1498 </body>
   1499 
   1500 </html>
	webdump_tests Testfiles for webdump
	git clone git://git.codemadness.org/webdump_tests
	Log \| Files \| Refs \| README