设为首页 收藏本站
查看: 790|回复: 0

[经验分享] Linux page cache

[复制链接]

尚未签到

发表于 2018-5-23 08:45:52 | 显示全部楼层 |阅读模式
  Overview
Block device layer
page cache
IO scheduer
  

  Page cache contains all file I/O data, direct I/O bypasses the page cache.
  

  Page cache helps Linux to economize I/O
  – Read requests can be made faster by adding a read ahead quantity, depending on the
  historical behavior of file system accesses by applications
  – Write requests are delayed and data in the page cache can have multiple updates before
  being written to disk.
  – Write requests in the page cache can be merged into larger I/O requests
  But page cache...
      – Requires Linux memory pages
      – Is not useful when cached data is not exploited
  Data just only needed once
  Application buffers data itself
      – In Linux does not know which data the application really needs next. It makes only a guess
  No alternatives if application cannot handle direct I/O
  

  Consider to use...
  direct I/O:
    – bypasses the page cache
    – is a good choice in all cases where the application does not want Linux to economize I/O and/or where the application buffers larger amount of file contents
async I/O:
    – prevents the application from being blocked in the I/O system call until the I/O completes
    – allows read merging by Linux in case of using page cache
    – can be combined with direct I/O

  temporary files:
    – should not reside on real disks, a ram disk or tmpfs allows fastest access to these files
    – they don't need to survive a crash, don't place them on a journaling file system
file system:
    – use ext3 and select the appropriate journaling mode (journal, ordered, writeback)
    – turning off atime is only suitable if no application makes decisions on "last read" time,consider relatime instead


  Direct I/O versus Page cache
  

  Direct I/O
      – Preferable if application caches itself
   Application knows best which data is needed again
   Application knows which data is most likely needed next
      Example database base management systems DBMS
      – Preferable if caching makes no sense
   Data only needed once
  Backup and restore
  Page cache
      – Optimizes re-read / write but can be critical
   Data written to the page cache but not to disk yet can get lost if data loss cannot easily be handled
      – If application cannot handle direct I/O
   Typical example is a file server

/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
* a page, though if it is a pagecache page, rmap structures can tell us
* who is mapping it.
*
* The objects in struct page are organized in double word blocks in
* order to allows us to use atomic double word operations on portions
* of struct page. That is currently only used by slub but the arrangement
* allows the use of atomic double word operations on the flags/mapping
* and lru list pointers also.
*/
struct page {
/* First double word block */
unsigned long flags;/* Atomic flags, some possibly
* updated asynchronously */
struct address_space *mapping;/* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
* memory, low bit is set, and
* it points to anon_vma object:
* see PAGE_MAPPING_ANON below.
*/
/* Second double word */
struct {
union {
pgoff_t index;/* Our offset within mapping. */
void *freelist;/* slub/slob first free object */
bool pfmemalloc;/* If set by the page allocator,
* ALLOC_NO_WATERMARKS was set
* and the low watermark was not
* met implying that the system
* is under some pressure. The
* caller should try ensure
* this page is only used to
* free other pages.
*/
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page->ptl */
#endif
};
union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
/* Used for cmpxchg_double in slub */
unsigned long counters;
#else
/*
* Keep _count separate from slub cmpxchg_double data.
* As the rest of the double word is protected by
* slab_lock but _count is not.
*/
unsigned counters;
#endif
struct {
union {
/*
* Count of ptes mapped in
* mms, to show when page is
* mapped & limit reverse map
* searches.
*
* Used also for tail pages
* refcounting instead of
* _count. Tail pages cannot
* be mapped and keeping the
* tail page _count zero at
* all times guarantees
* get_page_unless_zero() will
* never succeed on tail
* pages.
*/
atomic_t _mapcount;
struct { /* SLUB */
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
int units;/* SLOB */
};
atomic_t _count;/* Usage count, see below. */
};
};
};
/* Third double word block */
union {
struct list_head lru;/* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
struct {/* slub per cpu partial pages */
struct page *next;/* Next partial slab */
#ifdef CONFIG_64BIT
int pages;/* Nr of partial slabs left */
int pobjects;/* Approximate # of objects */
#else
short int pages;
short int pobjects;
#endif
};
struct list_head list;/* slobs list of pages */
struct slab *slab_page; /* slab fields */
};
/* Remainder is not double word aligned */
union {
unsigned long private;/* Mapping-private opaque data:
  * usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache;
* indicates order in the buddy
* system if PG_buddy is set.
*/
#if USE_SPLIT_PTE_PTLOCKS
#if BLOATED_SPINLOCKS
spinlock_t *ptl;
#else
spinlock_t ptl;
#endif
#endif
struct kmem_cache *slab_cache;/* SL[AU]B: Pointer to slab */
struct page *first_page;/* Compound tail pages */
};
/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
* highmem some memory is mapped into kernel virtual memory
* dynamically, so we need a place to store that address.
* Note that this field could be 16 bits on x86 ... ;)
*
* Architectures with slow multiplication can define
* WANT_PAGE_VIRTUAL in asm/page.h
*/
#if defined(WANT_PAGE_VIRTUAL)
void *virtual;/* Kernel virtual address (NULL if
   not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
unsigned long debug_flags;/* Use atomic bitops on this */
#endif
#ifdef CONFIG_KMEMCHECK
/*
* kmemcheck wants to track the status of each byte in a page; this
* is a pointer to such a status block. NULL if not tracked.
*/
void *shadow;
#endif
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
}
struct address_space {
struct inode*host;/* owner: inode, block_device */
struct radix_tree_rootpage_tree;/* radix tree of all pages */
spinlock_ttree_lock;/* and lock protecting it */
unsigned inti_mmap_writable;/* count VM_SHARED mappings */
struct rb_rooti_mmap;/* tree of private and shared mappings */
struct list_headi_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct mutexi_mmap_mutex;/* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned longnrpages;/* number of total pages */
unsigned longnrshadows;/* number of shadow entries */
pgoff_twriteback_index;/* writeback starts here */
const struct address_space_operations *a_ops;/* methods */
unsigned longflags;/* error bits/gfp mask */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
spinlock_tprivate_lock;/* for use by the address_space */
struct list_headprivate_list;/* ditto */
void*private_data;/* ditto */
} __attribute__((aligned(sizeof(long))));struct radix_tree_rootpage_tree; page tree根节点  

运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-480069-1-1.html 上篇帖子: Linux Linux grep命令用法以及正则表达 下篇帖子: 修改linux编码
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表