pax_global_header00006660000000000000000000000064147707233660014531gustar00rootroot0000000000000052 comment=393c5d374f52b6a1d849c60f3c1ff77c6ae721ba intel-lpmd-0.0.9/000077500000000000000000000000001477072336600136045ustar00rootroot00000000000000intel-lpmd-0.0.9/AUTHORS000066400000000000000000000001321477072336600146500ustar00rootroot00000000000000Zhang Rui Srinivas Pandruvada intel-lpmd-0.0.9/CODE_OF_CONDUCT.md000066400000000000000000000124761477072336600164150ustar00rootroot00000000000000# Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at CommunityCodeOfConduct AT intel DOT com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1, available at [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. For answers to common questions about this code of conduct, see the FAQ at [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at [https://www.contributor-covenant.org/translations][translations]. [homepage]: https://www.contributor-covenant.org [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html [Mozilla CoC]: https://github.com/mozilla/diversity [FAQ]: https://www.contributor-covenant.org/faq intel-lpmd-0.0.9/CONTRIBUTING.md000066400000000000000000000043711477072336600160420ustar00rootroot00000000000000# Contributing ### License is licensed under the terms in [LICENSE]. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. ### Sign your work Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below (from [developercertificate.org](http://developercertificate.org/)): ``` Developer Certificate of Origin Version 1.1 Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 660 York Street, Suite 102, San Francisco, CA 94110 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ``` Then you just add a line to every git commit message: Signed-off-by: Joe Smith Use your real name (sorry, no pseudonyms or anonymous contributions.) If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`. intel-lpmd-0.0.9/COPYING000066400000000000000000000431001477072336600146350ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. intel-lpmd-0.0.9/ChangeLog000066400000000000000000000000001477072336600153440ustar00rootroot00000000000000intel-lpmd-0.0.9/Makefile.am000066400000000000000000000031321477072336600156370ustar00rootroot00000000000000## Process this file with automake to generate Makefile.in include $(GLIB_MAKEFILE) SUBDIRS = . data tools ACLOCAL_AMFLAGS = -I m4 # Global C Flags AM_CFLAGS = \ ${GLIB_CFLAGS} \ $(XML_CFLAGS) \ -DTDRUNDIR=\"$(lpmd_rundir)\" \ -DTDCONFDIR=\"$(lpmd_confdir)\" \ $(UPOWER_CFLAGS) \ $(CFLAGS) \ $(libnl30_CFLAGS)\ $(libnlgenl30_CFLAGS) \ $(SYSTEMD_CFLAGS) \ -I src EXTRA_DIST=Makefile.glib \ intel_lpmd.pc.in # Programs to build sbin_PROGRAMS = intel_lpmd intel_lpmd_CPPFLAGS = \ -I@top_srcdir@/src \ -I@top_srcdir@/src/include \ -I@top_srcdir@/src/wlt_proxy/include \ -DTDLOCALEDIR=\"$(datadir)/locale\" \ -DGLIB_SUPPORT intel_lpmd_includedir = @top_srcdir@ intel_lpmd_LDADD = \ $(GLIB_LIBS) \ $(LIBNL_LIBS) \ $(LIBM) \ -lm \ $(LIBDL) \ $(XML_LIBS) \ $(UPOWER_LIBS) \ $(libnlgenl30_LIBS) \ $(SYSTEMD_LIBS) BUILT_SOURCES = \ lpmd-resource.c intel_lpmd_SOURCES = \ src/lpmd_main.c \ src/lpmd_proc.c \ src/lpmd_dbus_server.c \ src/lpmd_config.c \ src/lpmd_cpu.c \ src/lpmd_helpers.c \ src/lpmd_hfi.c \ src/lpmd_irq.c \ src/lpmd_socket.c \ src/lpmd_util.c \ src/wlt_proxy/wlt_proxy.c \ src/wlt_proxy/spike_mgmt.c \ src/wlt_proxy/state_machine.c \ src/wlt_proxy/state_util.c \ src/wlt_proxy/state_manager.c \ lpmd-resource.c man8_MANS = man/intel_lpmd.8 man/intel_lpmd_control.8 man5_MANS = man/intel_lpmd_config.xml.5 lpmd-resource.c: $(top_srcdir)/lpmd-resource.gresource.xml $(AM_V_GEN) glib-compile-resources --generate-source lpmd-resource.gresource.xml install-data-hook: gzip -f -k "$(DESTDIR)$(mandir)/man8/intel_lpmd.8"; mandb || true CLEANFILES = $(BUILT_SOURCES) intel-lpmd-0.0.9/NEWS000066400000000000000000000000041477072336600142750ustar00rootroot00000000000000TBD intel-lpmd-0.0.9/README.md000066400000000000000000000112521477072336600150640ustar00rootroot00000000000000# Intel Low Power Mode Daemon Intel Low Power Mode Daemon (lpmd) is a Linux daemon designed to optimize active idle power. It selects the most power-efficient CPUs based on a configuration file or CPU topology. Depending on system utilization and other hints, it puts the system into Low Power Mode by activating the power-efficient CPUs and disabling the rest, and restores the system from Low Power Mode by activating all CPUs. ## Usage Refer to the man pages for command line arguments and XML configurations: ```sh man intel_lpmd man intel_lpmd_control man intel_lpmd_config.xml ``` ## Install Dependencies ### Fedora ```sh dnf install automake autoconf-archive gcc glib2-devel dbus-glib-devel libxml2-devel libnl3-devel systemd-devel gtk-doc upower-devel ``` ### Ubuntu ```sh sudo apt install autoconf autoconf-archive gcc libglib2.0-dev libdbus-1-dev libdbus-glib-1-dev libxml2-dev libnl-3-dev libnl-genl-3-dev libsystemd-dev gtk-doc-tools libupower-glib-dev ``` ### OpenSUSE ```sh zypper in automake gcc ``` ## Build and Install ```sh ./autogen.sh make sudo make install ``` The generated artifacts are copied to respective directories under `/usr/local`. If a custom install path is preferred other than system default, make sure `--localstatedir` and `--sysconfdir` are set to the right path that the system can understand. If installed via RPM then artifacts would be under `/usr`. Example command for installation using prefix under `/opt/lpmd_install` dir with `--localstatedir` and `--sysconfdir` set to system default ```sh ./autogen.sh prefix=/opt/lpmd_install --localstatedir=/var --sysconfdir=/etc ``` ## Run ### Start Service ```sh sudo systemctl start intel_lpmd.service ``` ### Get Status ```sh sudo systemctl status intel_lpmd.service ``` ### Stop Service ```sh sudo systemctl stop intel_lpmd.service ``` ### Terminate using DBUS Interface ```sh sudo tests/lpm_test_interface.sh 1 ``` ## Testing Installation from Source Launch `lpmd` in no-daemon mode: ```sh ./intel_lpmd --no-daemon --dbus-enable --loglevel=debug ``` Start `lpmd` using: ```sh sudo sh tests/lpm_test_interface.sh 4 ``` Run a workload and monitor `lpmd` to ensure it puts the system in the appropriate state based on the load. ## Releases ### Release 0.0.9 - Fix lpmd from processing HFI/WLT updates when it is not in auto mode. - Improve README and other documents. - Add support for graphics utilization detection. - Add support for config states based on both WLT and graphics utilization. - Introduce LunarLake platform specific config file. - Minor fixes and cleanups. ### Release 0.0.8 - Introduce workload type proxy support. - Add support for model/sku specific config file. - Add detection for AC/DC status. - Honor power profile daemon default EPP when restoring. - Introduce MeteorLake-P platform specific config file. - Minor fixes and cleanups. ### Release 0.0.7 - Change lpmd description from "Low Power Mode Daemon" to "Energy Optimizer (lpmd)" because it covers more scenarios. - Fix invalid cgroup setting during probe, in case lpmd doesn't quit smoothly and cleanups are not done properly in the previous run. - Introduce a new parameter `--ignore-platform-check`. - Provide more detailed information when lpmd fails to probe on an unvalidated platform. - Various fixes for array bound check, potential memory leak, etc. - Autotool improvements. ### Release 0.0.6 - Remove automake and autoconf improvements due to a regression. - Deprecate the dbus-glib dependency. ### Release 0.0.5 - Fix compiling errors with `-Wall`. - Remove unintended default config file change to keep it unchanged since v0.0.3. ### Release 0.0.4 - Enhance HFI monitor to handle back-to-back HFI LPM hints. - Enhance HFI monitor to handle HFI hints for banned CPUs. - Introduce support for multiple Low Power states. - Introduce support for workload type hint. - Allow change EPP during Low Power modes transition. - Minor fixes and cleanups. ### Release 0.0.3 - Convert from glib-dbus to GDBus. - Add handling for CPU hotplug. - Use strict CPU model check to allow intel_lpmd to run on validated platforms only, including ADL/RPL/MTL for now. - CPUID.7 Hybrid bit is set - /sys/firmware/acpi/pm_profile returns 2 (mobile platform) - Use `cpuid()` to detect Lcores instead of using cache sysfs. - Enhance Ecore module detection. - Fix pthread error handling, suggested by ColinIanKing. - Werror fixes from aekoroglu. ### Release 0.0.2 - Various fixes and cleanups. ### Release 0.0.1 - Add initial lpmd support. ## Security See Intel's [Security Center](https://www.intel.com/content/www/us/en/security-center/default.html) for information on how to report a potential security issue or vulnerability. See also: [Security Policy](security.md) intel-lpmd-0.0.9/autogen.sh000077500000000000000000000005221477072336600156040ustar00rootroot00000000000000#!/bin/sh srcdir=`dirname $0` test -z "$srcdir" && srcdir=. olddir=`pwd` cd "$srcdir" aclocal --install || exit 1 gtkdocize --copy --flavour no-tmpl || exit 1 autoreconf --install --verbose || exit 1 cd "$olddir" if test -z "$NO_CONFIGURE"; then $srcdir/configure "$@" && echo "Now type 'make' to compile `basename $srcdir`." fi intel-lpmd-0.0.9/configure.ac000066400000000000000000000060531477072336600160760ustar00rootroot00000000000000AC_PREREQ(1.0) m4_define([lpmd_major_version], [0]) m4_define([lpmd_minor_version], [0.9]) m4_define([lpmd_version], [lpmd_major_version.lpmd_minor_version]) AC_INIT([intel_lpmd], [lpmd_version], [], [intel_lpmd]) m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR(build-aux) AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE([1.11 foreign no-define subdir-objects]) AM_MAINTAINER_MODE([enable]) GTK_DOC_CHECK([1.11],[--flavour no-tmpl]) AC_ARG_WITH(dbus-sys-dir, AS_HELP_STRING([--with-dbus-sys-dir=DIR], [where D-BUS system.d directory is])) if test -n "$with_dbus_sys_dir" ; then DBUS_SYS_DIR="$with_dbus_sys_dir" else DBUS_SYS_DIR="/etc/dbus-1/system.d" fi AC_SUBST(DBUS_SYS_DIR) # paths AC_SUBST(lpmd_binary, "$sbindir/$PACKAGE", [Binary executable]) AC_SUBST(lpmd_confdir, "$sysconfdir/$PACKAGE", [Configuration directory]) AC_SUBST(lpmd_rundir, "$localstatedir/run/$PACKAGE", [Runtime state directory]) PKG_PROG_PKG_CONFIG AC_ARG_WITH([systemdsystemunitdir], AS_HELP_STRING([--with-systemdsystemunitdir=DIR], [Directory for systemd service files]), [], [with_systemdsystemunitdir=$($PKG_CONFIG --variable=systemdsystemunitdir systemd)]) if test "x$with_systemdsystemunitdir" != xno; then AC_SUBST([systemdsystemunitdir], [$with_systemdsystemunitdir]) fi AM_CONDITIONAL(HAVE_SYSTEMD, [test -n "$with_systemdsystemunitdir" -a "x$with_systemdsystemunitdir" != xno ]) # print configuration echo echo "System paths:" echo " prefix: $prefix" echo " exec_prefix: $exec_prefix" echo " systemdunitdir: $with_systemdsystemunitdir" echo " lpmd_binary: $lpmd_binary" echo " lpmd_confdir: $lpmd_confdir" echo " lpmd_rundir: $lpmd_rundir" echo GETTEXT_PACKAGE=intel_lpmd AC_SUBST(GETTEXT_PACKAGE) AC_DEFINE_UNQUOTED(GETTEXT_PACKAGE,"$GETTEXT_PACKAGE", [Gettext package]) GLIB_VERSION_DEFINES="-DGLIB_VERSION_MIN_REQUIRED=GLIB_VERSION_2_26" PKG_CHECK_MODULES(GLIB, gio-unix-2.0 >= 2.22 gmodule-2.0 glib-2.0 gobject-2.0) GLIB_CFLAGS="$GLIB_CFLAGS $GLIB_VERSION_DEFINES" AC_SUBST(GLIB_CFLAGS) AC_SUBST(GLIB_LIBS) PKG_CHECK_MODULES(XML, libxml-2.0 >= 2.4) PKG_CHECK_MODULES(libnl30, [libnl-3.0], libnl30=yes, libnl30=no) PKG_CHECK_MODULES(libnlgenl30, [libnl-genl-3.0], libnlgenl30=yes, libnlgenl30=no) PKG_CHECK_MODULES([SYSTEMD], [libsystemd], [], [PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon], [], AC_MSG_ERROR([libsystemd support requested but found]))]) PKG_CHECK_MODULES(UPOWER, upower-glib) AC_PATH_PROG([GDBUS_CODEGEN],[gdbus-codegen]) AC_PROG_CC AC_PROG_INSTALL AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_ARG_ENABLE(werror, AS_HELP_STRING([--disable-werror], [Disable -Werror])) AS_IF([test "x$enable_werror" != "xno"], [CFLAGS="$CFLAGS -Werror"]) AC_CONFIG_FILES([Makefile data/Makefile tools/Makefile]) AC_ARG_ENABLE(gdbus, [AS_HELP_STRING([--disable-gdbus], [Switch DBus backend to glib-dbus. (Default: GDBus)])], [], [AC_DEFINE([GDBUS], [1], [Enable GDBus support])]) AC_OUTPUT intel-lpmd-0.0.9/data/000077500000000000000000000000001477072336600145155ustar00rootroot00000000000000intel-lpmd-0.0.9/data/Makefile.am000066400000000000000000000017661477072336600165630ustar00rootroot00000000000000## Process this file with automake to generate Makefile.in include $(GLIB_MAKEFILE) if HAVE_SYSTEMD systemdsystemunit_DATA = \ intel_lpmd.service intel_lpmd.service: intel_lpmd.service.in @$(edit) $< >$@ servicedir = $(datadir)/dbus-1/system-services service_in_files = org.freedesktop.intel_lpmd.service.in service_DATA = $(service_in_files:.service.in=.service) $(service_DATA): $(service_in_files) Makefile @$(edit) $< >$@ endif edit = sed \ -e 's|@bindir[@]|$(bindir)|g' \ -e 's|@sbindir[@]|$(sbindir)|g' \ -e 's|@sysconfdir[@]|$(sysconfdir)|g' \ -e 's|@localstatedir[@]|$(localstatedir)|g' dbusservicedir = $(DBUS_SYS_DIR) dbusservice_DATA = org.freedesktop.intel_lpmd.conf lpmd_configdir = $(lpmd_confdir) lpmd_config_DATA = \ intel_lpmd_config.xml \ intel_lpmd_config_F6_M170.xml \ intel_lpmd_config_F6_M189.xml EXTRA_DIST = \ intel_lpmd.service.in \ org.freedesktop.intel_lpmd.service.in \ $(dbusservice_DATA) CLEANFILES = intel_lpmd.service org.freedesktop.intel_lpmd.service intel-lpmd-0.0.9/data/intel_lpmd.service.in000066400000000000000000000006271477072336600206400ustar00rootroot00000000000000[Unit] Description= Intel Linux Energy Optimizer (lpmd) Service Documentation=man:intel_lpmd(8) ConditionVirtualization=no StartLimitInterval=200 StartLimitBurst=5 [Service] Type=dbus SuccessExitStatus=2 BusName=org.freedesktop.intel_lpmd ExecStart=@sbindir@/intel_lpmd --systemd --dbus-enable Restart=on-failure RestartSec=30 [Install] WantedBy=multi-user.target Alias=org.freedesktop.intel_lpmd.service intel-lpmd-0.0.9/data/intel_lpmd_config.xml000066400000000000000000000045171477072336600207220ustar00rootroot00000000000000 0 -1 -1 -1 0 0 10 95 0 0 0 0 0 intel-lpmd-0.0.9/data/intel_lpmd_config_F6_M170.xml000066400000000000000000000104151477072336600217530ustar00rootroot00000000000000 1 -1 -1 -1 0 1 1 0 0 0 0 0 0 6 170 * 1 WLT_IDLE 0 255 15 2000 -1 -1 2 WLT_BATTERY_LIFE 1 178 6 2000 -1 -1 3 WLT_SUSTAINED 2 64 6 2000 -1 -1 4 WLT_BURSTY 3 64 4 2000 -1 -1 intel-lpmd-0.0.9/data/intel_lpmd_config_F6_M189.xml000066400000000000000000000106761477072336600217760ustar00rootroot00000000000000 1 -1 -1 -1 0 1 1 0 10 95 0 0 0 0 0 6 189 * 1 UTIL_IDLE 1 50 -1 192 8 -1 1000 500 2000 2 UTIL_IDLE_SUSTAIN 75 2 -1 64 8 -1 1000 500 2000 3 UTIL_IDLE_BURSTY 3 75 -1 64 8 -1 1000 500 2000 4 UTIL_IDLE_GFX_BUSY 100 -1 128 8 -1 1000 500 2000 intel-lpmd-0.0.9/data/intel_lpmd_config_examples.xml000066400000000000000000000121321477072336600226100ustar00rootroot00000000000000 -1 0 -1 -1 -1 0 0 0 10 95 0 0 0 0 0 6 170 12P8E2L-28W 1 WLT_IDLE 0 255 -1 -1 2 WLT_BATTERY_LIFE 1 192 -1 -1 3 WLT_SUSTAINED 2 64 -1 -1 4 WLT_BURSTY 3 64 -1 -1 6 170 4P8E2L-15W 1 LPM_DEEP 2 50 -1 -1 -1 -1 16,17 500 500 2000 2 LPM_LOW 10 3 -1 -1 -1 -1 12,13,14,15 1000 1000 3000 3 FULL_POWER -1 -1 -1 -1 0-17 500 -1 2000 intel-lpmd-0.0.9/data/org.freedesktop.intel_lpmd.conf000066400000000000000000000016601477072336600226160ustar00rootroot00000000000000 intel-lpmd-0.0.9/data/org.freedesktop.intel_lpmd.service.in000066400000000000000000000001741477072336600237350ustar00rootroot00000000000000[D-BUS Service] Name=org.freedesktop.intel_lpmd Exec=/bin/false User=root SystemdService=org.freedesktop.intel_lpmd.service intel-lpmd-0.0.9/doc/000077500000000000000000000000001477072336600143515ustar00rootroot00000000000000intel-lpmd-0.0.9/doc/WLT_proxy.md000066400000000000000000000122501477072336600166020ustar00rootroot00000000000000 WLT (workload type) proxy hints use predefined CPU utilization thresholds and software algorithm to retrieve and detect WLT (same as WLT hints from hardware (WLTEnable)). When proxy hints detection (WLTProxyEnable) is enabled through config option, the hardware WLT hints will be ignored. On detecting workload type, framework takes predefined action with values in config file. # Pre-requisite |Workload Type (WLT) |Description |Internal states |EPP/EPB| | :---: | :---: |:---: |:---: | |Idle |Very low system usage and low power consumption |DEEP_MODE|PS/BAT | |BL(Battery Life)|Continuous light system usage with low power consumption |NORM_MODE, RESP_MODE |PS/BAT (platform optimal value)| |Sustained |Continuous heavy tasks without idleness|MDRT_MODE INIT_MODE |Perf/AC (platform optimal value)| |Bursty |Heavy short tasks with idleness in between |PERF_MODE |Perf/AC| PS - Power saver; Perf - performance; BAT - Battery bias; AC - AC bias. # Enabling and leveraging wlt proxy hints for dynamic energy optimization. * Software based WLT Proxy hint overwrites hardware based WLT Hint. To enable proxy hints, both WLT Hint and WLTProxy has to be enabled. ** WLTHintEnable set to Yes 1 ** WLTProxyEnable set to Yes 1 * WLT Proxy hints are calculated and dynamic energy optimizations are applied only in balanced power profile. Set to auto or force on. ** BalancedDef set to AUTO 0 # Value add with Dynamic EPP [based on workload] enabled on Core Ultra Gen 1 [Meteor Lake H], HP Baymax14W, CDB, PV SKU8W - N15479-021, in Balanced power profile, 10% performance improvement observed on Crossmark and Speedometer benchmarks. ## Known issues * Performance may suffer on some use cases. ** As algorithm currently takes average of all p-cores and e-cores utilization to identify workload type, solution has limitation of identifying single threaded workloads and memory related workloads. Benchmarks like Geekbench ST, WebXprt, Stream may not show improvement compared to Geekbench MT, Speedometer and Crossmark. # Workload detection algorithm - pseudo code * System CPU utilization thresholds and conditions are predefined and mapped to workload type. * Operating CPU frequency, Spike count rate, state stay time counter and operating frequency-voltage points makes up state switch conditions. * Enabling WLT proxy through config file calls WLT proxy handler. * On WLT Proxy handler, ** CPU load retrieved from system through perf MSR registers (system snapshot) ** State machine, switch state when predefined conditions are met ** Apply state actions ** Calculate/set new timer interval * When timer expires, WLT Proxy handler called again # WLT proxy states | | Init | Perf | Mod4e | Mod3e | Mod2e | Resp | Normal | deep | | :---: |:---: |:---: | :---: | :---: | :---: | :---: | :---: | :---: | | init | x | [1 cpu].lo < 10 utilization| -| -| -| -| -| - | | Perf | [all cpu].lo > 10 utilization| x| -| C0 max < 10%| -| sum_c0 util < 20% && sma avg < 70 %| -| - | | MOD4E | - | C0_max > 90%| x| -| -| worst_stall < 70%| sma_avg1 < 25 AND sma_avg2 < 25 AND sum_c0 < 50%| - | | MOD3E | - | C0_max > 90%| sma_avg1 > 25 AND sma_avg2 > 20| x| sma_avg1 b.w 4 and 25 AND sma_avg2 b/w 4 and 25| worst_stall < 70%| sma_avg1 < 4 AND sma_avg2 < 2 AND sma_avg3 < 2| - | | MOD2E | - | - | -| C0_max > 90% OR sma_avg1 > 25 AND sma_avg2 > 15| x| sorst_stall < 70%| sma_avg1 b/w 4 and 25 AND sma_avg2 < 25 countdown and switch| - | | Resp | - | C0_max > 70% && sma_avg1 > 40%| -| worst stall > 70%| -| x| -| - | | Normal| - | - | -| -| C0_max > 50% OR sma_avg1 > 40| worst stall < 70%| x| C0_max < 10% AND C0_2ndMax < 1% OR sma_avg1 < 2%; countdown and switch | | Deep | - | - | -| -| -| worst_stall < 70%| C0_max > 35%| x | x – invalid/same state; - not allowed state Variables used in state switch condition: * Multithreaded workload: all applicable CPUs for state utilized more that 10%. Not multithread workload: at least one CPU is utilized < 10%. * CPU.L0= 100 * perf_stats[t].mperf_diff / perf_stats[t].tsc_diff * Also following values will be calculated based on L0. C0_max , Min_load[C0.min]; max_load, max_2nd, max_3rd * sum_c0 = grp.c0_max + grp.c0_2nd_max + grp.c0_3rd_max * sma - simple moving average [tracks 3 max utilizations] * worst stall - perf_stats[t].pperf_diff / perf_stats[t].aperf_diff. cpu in wait due to memory of other dependency. # Files & functionality ## wlt_proxy.c * wlt proxy detection interface file * handles wlt_proxy enable/disable; entry/exit, timer expiry handler ## state_machine.c * handle current state * determine state change ## spike_mgmt.c * counts CPU utilization spike counts in given period * handles bursty workload type detection entry and exit ## state_util.c * retrieval of pref, HFM and SMA * calculations for state switch ## state_manager.c * state definition & management * state initialization/deinitialization * mapping state to workload type * polling frequency calculations intel-lpmd-0.0.9/lpmd-resource.gresource.xml000066400000000000000000000003241477072336600211030ustar00rootroot00000000000000 src/intel_lpmd_dbus_interface.xml intel-lpmd-0.0.9/m4/000077500000000000000000000000001477072336600141245ustar00rootroot00000000000000intel-lpmd-0.0.9/m4/.keep000066400000000000000000000000001477072336600150370ustar00rootroot00000000000000intel-lpmd-0.0.9/man/000077500000000000000000000000001477072336600143575ustar00rootroot00000000000000intel-lpmd-0.0.9/man/intel_lpmd.8000066400000000000000000000054151477072336600166040ustar00rootroot00000000000000.\" intel_lpmd (8) manual page .\" .\" This is free documentation; you can redistribute it and/or .\" modify it under the terms of the GNU General Public License as .\" published by the Free Software Foundation; either version 2 of .\" the License, or (at your option) any later version. .\" .\" The GNU General Public License's references to "object code" .\" and "executables" are to be interpreted as the output of any .\" document formatting or typesetting system, including .\" intermediate and printed output. .\" .\" This manual is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU General Public License for more details. .\" .\" You should have received a copy of the GNU General Public Licence along .\" with this manual; if not, write to the Free Software Foundation, Inc., .\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. .\" .\" Copyright (C) 2012 Intel Corporation. All rights reserved. .\" .TH intel_lpmd "8" "1 Jun 2023" .SH NAME intel_lpmd \- Intel Energy Optimizer (LPMD) Daemon .SH SYNOPSIS .B intel_lpmd .RI " [ " OPTIONS " ] .SH DESCRIPTION .B intel_lpmd is a Linux daemon used for energy optimization on Intel hybrid systems. This daemon uses a configuration file "intel_lpmd_config.xml". Based on the configuration, it will choose right set of CPUs to enable. For example, this daemon can monitor system utilization and choose a set of low power CPUs to enable and disable the rest. This enable disable of CPUs are done using Linux cpuset feature of intel power clamp driver. There is a control utility distributed along with this daemon. This control utility is called "intel_lpmd_control". This utility can be used to set different modes for this daemon. For example: intel_lpmd_control ON To turn on low power mode operation. intel_lpmd_control OFF To turn off low power mode operation. intel_lpmd_control AUTO To turn on low power mode operation in auto mode, which allows low power mode based on system utilization. .SH OPTIONS .TP .B -h --help Print the help message .TP .B --version Print intel_lpmd version and exit .TP .B --no-daemon Don't run as a daemon: Default is daemon mode .TP .B --systemd Assume daemon is started by systemd .TP .B --loglevel= log severity: can be info or debug .TP .B --dbus-enable Enable Dbus server to receive requests via Dbus .TP .B --ignore-platform-check Ignore platform check .SH EXAMPLES .TP .B intel_lpmd --loglevel=info --no-daemon --dbus-enable Run intel_lpmd with log directed to stdout .TP .B intel_lpmd --systemd --dbus-enable Run intel_lpmd as a service with logs directed to system journal .SH SEE ALSO intel_lpmd_config.xml(5) .SH AUTHOR Written by Zhang Rui intel-lpmd-0.0.9/man/intel_lpmd_config.xml.5000066400000000000000000000320101477072336600207140ustar00rootroot00000000000000.\" intel_lpmd_config.xml(5) manual page .\" .\" This is free documentation; you can redistribute it and/or .\" modify it under the terms of the GNU General Public License as .\" published by the Free Software Foundation; either version 2 of .\" the License, or (at your option) any later version. .\" .\" The GNU General Public License's references to "object code" .\" and "executables" are to be interpreted as the output of any .\" document formatting or typesetting system, including .\" intermediate and printed output. .\" .\" This manual is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU General Public License for more details. .\" .\" You should have received a copy of the GNU General Public Licence along .\" with this manual; if not, write to the Free Software Foundation, Inc., .\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. .\" .\" Copyright (C) 2012 Intel Corporation. All rights reserved. .\" .TH intel_lpmd_config.xml "5" "1 Jun 2023" .SH NAME intel_lpmd_config.xml \- Configuration file for intel_lpmd .SH SYNOPSIS $(TDCONFDIR)/etc/intel_lpmd/intel_lpmd_config.xml .SH DESCRIPTION .B intel_lpmd_config.xml is a configuration file for the Intel Low Power Mode Daemon. It is used to describe the lp_mode_cpus to use in Low Power Mode, as well as the way to restrict work to those CPUs. It also describes if and how the HFI monitor and utilization monitor works. The location of this file depends on the configuration option used during build time. .PP .B lp_mode_cpus is a set of active CPUs when system is in Low Power Mode. This usually equals a group of most power efficient CPUs on a platform to achieve best power saving. When not specified, intel_lpmd tool can detect this automatically. E.g. it uses an E-core Module on Intel Alderlake platform, and it uses the Low Power E-cores on SoC Die on Intel Meteorlake platform. .PP .B Mode specifies the way to migrate the tasks to the lp_mode_cpus. .IP \(bu 2 Mode 0: set cpuset to the lp_mode_cpus for systemd. All tasks created by systemd will run on these CPUs only. This is supported for cgroup v2 based systemd only. .IP \(bu 2 Mode 1: Isolate the non-lp_mode_cpus so that tasks are scheduled to the lp_mode_cpus only. .IP \(bu 2 Mode 2: Force idle injection to the non-lp_mode_cpus and leverage the scheduler to schedule the other tasks to the lp_mode_cpus. .PP .B PerformanceDef specifies the default behavior when power setting is set to Performance. .IP \(bu 2 -1 : Never enter Low Power Mode. .IP \(bu 2 0 : opportunistic Low Power Mode enter/exit based on HFI/Utilization request. .IP \(bu 2 1 : Always stay in Low Power Mode. .PP .B BalancedDef specifies the default behavior when power setting is set to Balanced. .PP .B PowersaverDef specifies the default behavior when power setting is set to Power saver. .PP .B HfiLpmEnable specifies if the HFI monitor can capture the HFI hints for Low Power Mode. .PP .B HfiSuvEnable specifies if the HFI monitor can capture the HFI hints for survivability mode. .PP .B WLTHintEnable Enable use of hardware Workload type hints. .PP .B WLTProxyEnable Enable use of Proxy Workload type hints. .PP .B util_entry_threshold specifies the system utilization threshold for entering Low Power Mode. The system workload is considered to fit the lp_mode_cpus capacity when system utilization is under this threshold. Setting to 0 or leaving this empty disables the utilization monitor. .PP .B util_exit_threshold specifies the CPU utilization threshold for exiting Low Power Mode. The system workload is considered to not fit the lp_mode_cpus capacity when the utilization of the busiest lp_mode_cpus is above this threshold. Setting to 0 or leaving this empty disables the utilization monitor. .PP .B EntryDelayMS specifies the sample interval used by the utilization Monitor when system wants to enter Low Power Mode based on system utilization. Setting to 0 or leaving this empty will cause the utilization Monitor to use the default interval, 1000 milli seconds. .PP .B ExitDelayMS specifies the sample interval used by the utilization Monitor when system wants to exit Low Power Mode based on CPU utilization. Setting to 0 or leaving this empty will cause the utilization Monitor to use the adaptive value. The adaptive interval is based on CPU utilization. The busier the CPU is, the shorter interval the utilization monitor uses. .PP .B EntryHystMS specifies a hysteresis threshold when system is in Low Power Mode. If set, when the previous average time stayed in Low Power Mode is lower than this value, the current enter Low Power Mode request will be ignored because it is expected that the system will exit Low Power Mode soon. Setting to 0 or leaving this empty disables this hysteresis algorithm. .PP .B ExitHystMS specifies a hysteresis threshold when system is not in Low Power Mode. If set, when the previous average time stayed out of Low-Power-Mode is lower than this value, the current exit Low Power Mode request will be ignored because it is expected that the system will enter Low Power Mode soon. Setting to 0 or leaving this empty disables this hysteresis algorithm. .PP .B IgnoreITMT Avoid changing scheduler ITMT flag. This means that during transition to low power mode, ITMT flag is not changed. This reduces latency during switching. This flag is not used when configuration uses "State" based configuration, where this flag can be defined per state. .PP .B States Allows to define per platform low power states. Each state defines has an entry condition and set of parameters to use. .SH State Definition There can be multiple State configuration can be present. Each configuration is valid for a platform. A State header defines parameters, which are used to match a platform. .B CPUFamily CPU generation to match. .PP .B CPUModel CPU model to match. .PP .B CPUConfig Define a configuration of CPUs and TDP to match different skews for the same CPU model and family. CPU configuration string format is: xPyEzL-tdpW. For example: 12P8E2L-28W, defines a platform with 6 P-cores with hyper threading enabled, 8 E cores, 2 LPE cores and the TDP is 28W. This configuration allows wildcard "*" to match any combination. .SH Per State Definition Each "State" defines entry criteria and parameters to use. .B ID A unique ID for the state. .PP .B Name A name for the state. .PP .B EntrySystemLoadThres System Entry load threshold in percent. System utilization is different based on the number of CPUs are active in a configuration. This value is calculated from /proc/stat sysfs. To enter into this state, the system utilization must be less or equal to this value. .PP .B EnterCPULoadThres CPU Entry load threshold in percent. Per CPU utilization is also obtained from /proc/stat. To enter into this state any active CPU utilization must be less or equal to this value. EnterCPULoadThres is checked before EntrySystemLoadThres to match a state. .PP .B WLTType Workload type value to enter into this state. If this value is defined then utilization based entry triggers are not used. To use this WLTHintEnable must be enabled, so that hardware notifications are enabled. .PP .B ActiveCPUs Active CPUs in this state. The list can be comma separated or use "-" for a range. This is optional to have active CPUs in a state. .PP .B EPP EPP to apply for this state. -1 to ignore. .PP .B EPB EPB to apply for this state. -1 to ignore. .PP .B ITMTState Set the state of ITMT flag. -1 to ignore. .PP .B IRQMigrate Migrate IRQs to the active CPUs in this state. -1 to ignore. .PP .B MinPollInterval Minimum polling interval in milli seconds. .PP .B MaxPollInterval Maximum polling interval in milli seconds. This is optional, if there is no maximum is desired. .PP .B PollIntervalIncrement Polling interval increment in milli seconds. If this value is -1, then polling increment is adaptive based on the utilization. .SH FILE FORMAT The configuration file format conforms to XML specifications. .sp 1 .EX Example CPUs 0|1|2 -1|0|1 -1|0|1 -1|0|1 0|1 0|1 Example threshold Example threshold Example delay Example delay Example hyst Example hyst -1 | EPP value .EE .SH EXAMPLE CONFIGURATIONS .PP .B Example 1: This is the minimum configuration. .IP \(bu 2 lp_mode_cpus: not set. Detects the lp_mode_cpus automatically. .IP \(bu 2 Mode: 0. Use cgroup-v2 systemd for task migration. .IP \(bu 2 HfiLpmEnable: 0. Ignore HFI Low Power mode hints. .IP \(bu 2 HfiSuvEnable: 0. Ignore HFI Survivability mode hints. With both HfiLpmEnable and HfiSuvEnable cleared, the HFI monitor will be disabled. .IP \(bu 2 util_entry_threshold: 0. Disable utilization monitor. .IP \(bu 2 util_exit_threshold: 0. Disable utilization monitor. .IP \(bu 2 EntryDelayMS: 0. Do not take effect when utilization monitor is disabled. .IP \(bu 2 ExitDelayMS: 0. Do not take effect when utilization monitor is disabled. .IP \(bu 2 EntryHystMS: 0. Do not take effect when utilization monitor is disabled. .IP \(bu 2 ExitHystMS: 0. Do not take effect when utilization monitor is disabled. .IP \(bu 2 lp_mode_epp: -1. Do not change EPP when entering Low Power Mode. .sp 1 .EX 0 0 0 0 0 0 0 0 0 -1 .PP .B Example 2: This is the typical configuration. The utilization thresholds and delays may be different based on requirement. .IP \(bu 2 lp_mode_cpus: not set. Detects the lp_mode_cpus automatically. .IP \(bu 2 Mode: 0. Use cgroup-v2 systemd for task migration. .IP \(bu 2 HfiLpmEnable: 1. Enter/Exit Low Power Mode based on HFI hints. .IP \(bu 2 HfiSuvEnable: 1. Enter/Exit Survivability mode based on HFI hints. .IP \(bu 2 util_entry_threshold: 5. Enter Low Power Mode when system utilization is lower than 5%. .IP \(bu 2 util_exit_threshold: 95. Exit Low Power Mode when the utilization of any of the lp_mode_cpus is higher than 95%. .IP \(bu 2 EntryDelayMS: 0. Resample every 1000ms when system is out of Low Power Mode. .IP \(bu 2 ExitDelayMS: 0. Resample adaptively based on the utilization of lp_mode_cpus when system is in Low Power Mode. .IP \(bu 2 EntryHystMS: 2000. Ignore the current Enter Low Power Mode request when the previous average time stayed in Low Power Mode is lower than 2000ms. .IP \(bu 2 ExitHystMS: 3000. Ignore the current Exit Low Power Mode request when the previous average time stayed out of Low Power Mode is lower than 3000ms. .IP \(bu 2 lp_mode_epp: -1. Do not change EPP when entering Low Power Mode. .sp 1 .EX 0 1 1 5 95 0 0 2000 3000 -1 .EE intel-lpmd-0.0.9/man/intel_lpmd_control.8000066400000000000000000000037631477072336600203500ustar00rootroot00000000000000.\" intel_lpmd_control (8) manual page .\" .\" This is free documentation; you can redistribute it and/or .\" modify it under the terms of the GNU General Public License as .\" published by the Free Software Foundation; either version 2 of .\" the License, or (at your option) any later version. .\" .\" The GNU General Public License's references to "object code" .\" and "executables" are to be interpreted as the output of any .\" document formatting or typesetting system, including .\" intermediate and printed output. .\" .\" This manual is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU General Public License for more details. .\" .\" You should have received a copy of the GNU General Public Licence along .\" with this manual; if not, write to the Free Software Foundation, Inc., .\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. .\" .\" Copyright (C) 2025 Intel Corporation. All rights reserved. .\" .TH intel_lpmd_control "8" "7 Mar 2025" .SH NAME intel_lpmd_control \- Control utility for the Intel Low Power Mode Daemon (LPMD) .SH SYNOPSIS .B intel_lpmd_control .RI " [ " OPTIONS " ] .SH DESCRIPTION .B intel_lpmd_control is a command-line utility used to control the Intel Low Power Mode Daemon (LPMD). It allows users to enable, disable, or set the daemon to automatic mode based on system utilization. .SH OPTIONS .TP .B ON Enables low power mode operation. .TP .B OFF Disables low power mode operation. .TP .B AUTO Enables low power mode operation in automatic mode, allowing system utilization to determine low power state activation. .SH EXAMPLES .TP .B intel_lpmd_control ON Turns on low power mode operation. .TP .B intel_lpmd_control OFF Turns off low power mode operation. .TP .B intel_lpmd_control AUTO Turns on low power mode operation in automatic mode. .SH SEE ALSO .B intel_lpmd(8) .SH AUTHOR Written by Deepak Sundar intel-lpmd-0.0.9/security.md000066400000000000000000000006261477072336600160010ustar00rootroot00000000000000# Security Policy Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. ## Reporting a Vulnerability Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). intel-lpmd-0.0.9/src/000077500000000000000000000000001477072336600143735ustar00rootroot00000000000000intel-lpmd-0.0.9/src/include/000077500000000000000000000000001477072336600160165ustar00rootroot00000000000000intel-lpmd-0.0.9/src/include/lpmd.h000066400000000000000000000232001477072336600171200ustar00rootroot00000000000000/* * intel_lpmd.h: Intel Low Power Daemon common header file * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef LPMD_INTEL_LPMD_H #define LPMD_INTEL_LPMD_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "config.h" #include "thermal.h" #define LOG_DEBUG_INFO 1 #define LOCKF_SUPPORT #ifdef GLIB_SUPPORT #include #include #include // Log macros enum log_level { LPMD_LOG_NONE, LPMD_LOG_INFO, LPMD_LOG_DEBUG, LPMD_LOG_MSG, LPMD_LOG_WARN, LPMD_LOG_ERROR, LPMD_LOG_FATAL, }; #define lpmd_log_fatal g_error // Print error and terminate #define lpmd_log_error g_critical #define lpmd_log_warn g_warning #define lpmd_log_msg g_message #define lpmd_log_debug g_debug #define lpmd_log_info(...) g_log(NULL, G_LOG_LEVEL_INFO, __VA_ARGS__) #else static int dummy_printf(const char *__restrict __format, ...) { return 0; } #define lpmd_log_fatal printf #define lpmd_log_error printf #define lpmd_log_warn printf #define lpmd_log_msg printf #define lpmd_log_debug dummy_printf #define lpmd_log_info printf #endif // Common return value defines #define LPMD_SUCCESS 0 #define LPMD_ERROR -1 #define LPMD_FATAL_ERROR -2 // Dbus related /* Well-known name for this service. */ #define INTEL_LPMD_SERVICE_NAME "org.freedesktop.intel_lpmd" #define INTEL_LPMD_SERVICE_OBJECT_PATH "/org/freedesktop/intel_lpmd" #define INTEL_LPMD_SERVICE_INTERFACE "org.freedesktop.intel_lpmd" typedef enum { TERMINATE, LPM_FORCE_ON, LPM_FORCE_OFF, LPM_AUTO, SUV_MODE_ENTER, SUV_MODE_EXIT, HFI_EVENT, } message_name_t; #define MAX_MSG_SIZE 512 typedef struct { message_name_t msg_id; int msg_size; unsigned long msg[MAX_MSG_SIZE]; } message_capsul_t; #define MAX_STR_LENGTH 256 #define MAX_CONFIG_STATES 10 #define MAX_STATE_NAME 32 #define MAX_CONFIG_LEN 64 typedef struct { int id; int valid; char name[MAX_STATE_NAME]; int wlt_type; int entry_system_load_thres; int exit_system_load_thres; int exit_system_load_hyst; int enter_cpu_load_thres; int exit_cpu_load_thres; int enter_gfx_load_thres; int exit_gfx_load_thres; int min_poll_interval; int max_poll_interval; int poll_interval_increment; int epp; int epb; char active_cpus[MAX_STR_LENGTH]; // If active CPUs are specified then // the below counts don't matter int island_0_number_p_cores; int island_0_number_e_cores; int island_1_number_p_cores; int island_1_number_e_cores; int island_2_number_p_cores; int island_2_number_e_cores; int itmt_state; int irq_migrate; // Private state variables, not configurable int entry_load_sys; int entry_load_cpu; }lpmd_config_state_t; // lpmd config data typedef struct { int mode; int performance_def; int balanced_def; int powersaver_def; int hfi_lpm_enable; int hfi_suv_enable; int wlt_hint_enable; int wlt_hint_poll_enable; int wlt_proxy_enable; int util_enable; int util_entry_threshold; int util_exit_threshold; int util_entry_delay; int util_exit_delay; int util_entry_hyst; int util_exit_hyst; int ignore_itmt; int lp_mode_epp; char lp_mode_cpus[MAX_STR_LENGTH]; int cpu_family; int cpu_model; char cpu_config[MAX_CONFIG_LEN]; int config_state_count; int tdp; lpmd_config_state_t config_states[MAX_CONFIG_STATES]; } lpmd_config_t; enum lpm_cpu_process_mode { LPM_CPU_CGROUPV2, LPM_CPU_ISOLATE, LPM_CPU_POWERCLAMP, LPM_CPU_OFFLINE, LPM_CPU_MODE_MAX = LPM_CPU_POWERCLAMP, }; enum lpm_command { USER_ENTER, /* Force enter LPM and always stay in LPM */ USER_AUTO, /* Allow oppotunistic LPM based on util/hfi request */ USER_EXIT, /* Force exit LPM and never enter LPM */ HFI_ENTER, HFI_EXIT, HFI_SUV_ENTER, HFI_SUV_EXIT, DBUS_SUV_ENTER, DBUS_SUV_EXIT, UTIL_ENTER, UTIL_EXIT, LPM_CMD_MAX, }; enum cpumask_idx { CPUMASK_LPM_DEFAULT, CPUMASK_ONLINE, CPUMASK_HFI, CPUMASK_HFI_BANNED, CPUMASK_HFI_SUV, /* HFI Survivability mode */ CPUMASK_HFI_LAST, CPUMASK_UTIL, CPUMASK_MAX, }; #define UTIL_DELAY_MAX 5000 #define UTIL_HYST_MAX 10000 #define cpuid(leaf, eax, ebx, ecx, edx) \ __cpuid(leaf, eax, ebx, ecx, edx); \ lpmd_log_debug("CPUID 0x%08x: eax = 0x%08x ebx = 0x%08x ecx = 0x%08x edx = 0x%08x\n", \ leaf, eax, ebx, ecx, edx); #define cpuid_count(leaf, subleaf, eax, ebx, ecx, edx) \ __cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); \ lpmd_log_debug("CPUID 0x%08x subleaf 0x%08x: eax = 0x%08x ebx = 0x%08x ecx = 0x%08x" \ "edx = 0x%08x\n", leaf, subleaf, eax, ebx, ecx, edx); #define SETTING_RESTORE -2 #define SETTING_IGNORE -1 /* WLT hints parsing */ typedef enum { WLT_IDLE = 0, WLT_BATTERY_LIFE = 1, WLT_SUSTAINED = 2, WLT_BURSTY = 3, WLT_INVALID = 4, } wlt_type_t; enum power_profile_daemon_mode { PPD_PERFORMANCE, PPD_BALANCED, PPD_POWERSAVER, PPD_INVALID }; /* Helpers for entering LPMode */ void set_lpm_epp(int val); int get_lpm_epp(void); void set_lpm_epb(int val); int get_lpm_epb(void); int get_epp_epb(int *epp, char *epp_string, int size, int *epb); void set_lpm_itmt(int val); int get_lpm_itmt(void); int get_itmt(void); int set_lpm_irq(cpu_set_t *cpumask, int action); int set_lpm_cpus(enum cpumask_idx idx); /* lpmd_main.c */ int in_debug_mode(void); int do_platform_check(void); /* lpmd_proc.c: interfaces */ int lpmd_lock(void); int lpmd_unlock(void); int in_lpm(void); int in_hfi_lpm(void); int in_suv_lpm(void); int in_auto_mode(void); int get_idle_percentage(void); int get_idle_duration(void); int get_cpu_mode(void); int has_hfi_lpm_monitor(void); int has_hfi_suv_monitor(void); int has_util_monitor(void); int get_util_entry_interval(void); int get_util_exit_interval(void); int get_util_entry_threshold(void); int get_util_exit_threshold(void); int get_util_entry_hyst(void); int get_util_exit_hyst(void); void set_ignore_itmt(void); int process_lpm(enum lpm_command cmd); int process_lpm_unlock(enum lpm_command cmd); int freeze_lpm(void); int restore_lpm(void); void lpmd_terminate(void); void lpmd_force_on(void); void lpmd_force_off(void); void lpmd_set_auto(void); void lpmd_suv_enter(void); void lpmd_suv_exit(void); void lpmd_notify_hfi_event(void); int is_on_battery(void); int get_ppd_mode(void); /* lpmd_proc.c: init func */ int lpmd_main(void); /* lpmd_dbus_server.c */ int intel_dbus_server_init(gboolean (*exit_handler)(void)); /* lpmd_config.c */ int lpmd_get_config(lpmd_config_t *lpmd_config); /* util.c */ int periodic_util_update(lpmd_config_t *lpmd_config, int wlt_index); int util_init(lpmd_config_t *lpmd_config); int use_config_states(void); void reset_config_state(void); /* cpu.c */ int check_cpu_capability(lpmd_config_t *lpmd_config); int init_cpu(char *cmd_cpus, enum lpm_cpu_process_mode mode, int lp_mode_epp); int process_cpus(int enter, enum lpm_cpu_process_mode mode); int parse_cpu_str(char *buf, enum cpumask_idx idx); int is_cpu_lcore(int cpu); int is_cpu_ecore(int cpu); int is_cpu_pcore(int cpu); /* cpu.c: helpers */ int is_cpu_online(int cpu); int is_cpu_for_lpm(int cpu); int get_max_cpus(void); int get_max_online_cpu(void); char* get_cpus_str(enum cpumask_idx idx); char* get_lpm_cpus_hexstr(void); int has_lpm_cpus(void); int has_cpus(enum cpumask_idx idx); void copy_cpu_mask_exclude(enum cpumask_idx source, enum cpumask_idx dest, enum cpumask_idx exlude); void copy_cpu_mask(enum cpumask_idx source, enum cpumask_idx dest); void copy_cpu_mask_exclude(enum cpumask_idx source, enum cpumask_idx dest, enum cpumask_idx exlude); cpu_set_t *get_cpumask(enum cpumask_idx source); int cpumask_to_str(cpu_set_t *cpumask, char *buf, int size); int cpumask_to_hexstr(cpu_set_t *cpumask, char *buf, int size); int cpumask_to_str_reverse(cpu_set_t *mask, char *buf, int size); int is_equal(enum cpumask_idx idx1, enum cpumask_idx idx2); int add_cpu(int cpu, enum cpumask_idx idx); void reset_cpus(enum cpumask_idx idx); int set_lpm_cpus(enum cpumask_idx new); int uevent_init(void); int check_cpu_hotplug(void); /* cpu.c : APIs for SUV mode support */ int process_suv_mode(enum lpm_command cmd); int has_suv_support(void); /* irq.c */ int init_irq(void); int process_irqs(int enter, enum lpm_cpu_process_mode mode); int update_lpm_irq(cpu_set_t *cpumask, int action); /* hfi.c */ int hfi_init(void); int hfi_kill(void); void hfi_receive(void); /* socket.c */ int socket_init_connection(char *name); int socket_send_cmd(char *name, char *data); /* helper */ int lpmd_write_str(const char *name, char *str, int print_level); int lpmd_write_str_verbose(const char *name, char *str, int print_level); int lpmd_write_str_append(const char *name, char *str, int print_level); int lpmd_write_int(const char *name, int val, int print_level); int lpmd_open(const char *name, int print_level); int lpmd_read_int(const char *name, int *val, int print_level); char* get_time(void); void time_start(void); char* time_delta(void); uint64_t read_msr(int cpu, uint32_t msr); #endif intel-lpmd-0.0.9/src/include/thermal.h000066400000000000000000000077641477072336600176410ustar00rootroot00000000000000/* * thermal.c: thermal netlink event header * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _UAPI_LINUX_THERMAL_H #define _UAPI_LINUX_THERMAL_H #define THERMAL_NAME_LENGTH 20 enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, THERMAL_DEVICE_ENABLED, }; enum thermal_trip_type { THERMAL_TRIP_ACTIVE = 0, THERMAL_TRIP_PASSIVE, THERMAL_TRIP_HOT, THERMAL_TRIP_CRITICAL, }; /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal" #define THERMAL_GENL_VERSION 0x01 #define THERMAL_GENL_SAMPLING_GROUP_NAME "sampling" #define THERMAL_GENL_EVENT_GROUP_NAME "event" /* Attributes of thermal_genl_family */ enum thermal_genl_attr { THERMAL_GENL_ATTR_UNSPEC, THERMAL_GENL_ATTR_TZ, THERMAL_GENL_ATTR_TZ_ID, THERMAL_GENL_ATTR_TZ_TEMP, THERMAL_GENL_ATTR_TZ_TRIP, THERMAL_GENL_ATTR_TZ_TRIP_ID, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, THERMAL_GENL_ATTR_TZ_TRIP_TEMP, THERMAL_GENL_ATTR_TZ_TRIP_HYST, THERMAL_GENL_ATTR_TZ_MODE, THERMAL_GENL_ATTR_TZ_NAME, THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT, THERMAL_GENL_ATTR_TZ_GOV, THERMAL_GENL_ATTR_TZ_GOV_NAME, THERMAL_GENL_ATTR_CDEV, THERMAL_GENL_ATTR_CDEV_ID, THERMAL_GENL_ATTR_CDEV_CUR_STATE, THERMAL_GENL_ATTR_CDEV_MAX_STATE, THERMAL_GENL_ATTR_CDEV_NAME, THERMAL_GENL_ATTR_GOV_NAME, THERMAL_GENL_ATTR_CAPACITY, THERMAL_GENL_ATTR_CAPACITY_CPU_COUNT, THERMAL_GENL_ATTR_CAPACITY_CPU_ID, THERMAL_GENL_ATTR_CAPACITY_CPU_PERF, THERMAL_GENL_ATTR_CAPACITY_CPU_EFF, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) enum thermal_genl_sampling { THERMAL_GENL_SAMPLING_TEMP, __THERMAL_GENL_SAMPLING_MAX, }; #define THERMAL_GENL_SAMPLING_MAX (__THERMAL_GENL_SAMPLING_MAX - 1) /* Events of thermal_genl_family */ enum thermal_genl_event { THERMAL_GENL_EVENT_UNSPEC, THERMAL_GENL_EVENT_TZ_CREATE, /* Thermal zone creation */ THERMAL_GENL_EVENT_TZ_DELETE, /* Thermal zone deletion */ THERMAL_GENL_EVENT_TZ_DISABLE, /* Thermal zone disabled */ THERMAL_GENL_EVENT_TZ_ENABLE, /* Thermal zone enabled */ THERMAL_GENL_EVENT_TZ_TRIP_UP, /* Trip point crossed the way up */ THERMAL_GENL_EVENT_TZ_TRIP_DOWN, /* Trip point crossed the way down */ THERMAL_GENL_EVENT_TZ_TRIP_CHANGE, /* Trip point changed */ THERMAL_GENL_EVENT_TZ_TRIP_ADD, /* Trip point added */ THERMAL_GENL_EVENT_TZ_TRIP_DELETE, /* Trip point deleted */ THERMAL_GENL_EVENT_CDEV_ADD, /* Cdev bound to the thermal zone */ THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ THERMAL_GENL_EVENT_CAPACITY_CHANGE, /* CPU capacity changed */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) /* Commands supported by the thermal_genl_family */ enum thermal_genl_cmd { THERMAL_GENL_CMD_UNSPEC, THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ __THERMAL_GENL_CMD_MAX, }; #define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) #endif /* _UAPI_LINUX_THERMAL_H */ intel-lpmd-0.0.9/src/intel_lpmd_dbus_interface.xml000066400000000000000000000010131477072336600222740ustar00rootroot00000000000000 intel-lpmd-0.0.9/src/lpmd_config.c000066400000000000000000000524171477072336600170310ustar00rootroot00000000000000/* * lpmd_config.c: xml config file parser * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "lpmd.h" #include #include #define CONFIG_FILE_NAME "intel_lpmd_config.xml" #define MAX_FILE_NAME_PATH 128 static void lpmd_dump_config(lpmd_config_t *lpmd_config) { int i; lpmd_config_state_t *state; if (!lpmd_config) return; lpmd_log_info ("Mode:%d\n", lpmd_config->mode); lpmd_log_info ("HFI LPM Enable:%d\n", lpmd_config->hfi_lpm_enable); lpmd_log_info ("HFI SUV Enable:%d\n", lpmd_config->hfi_suv_enable); lpmd_log_info ("WLT Hint Enable:%d\n", lpmd_config->wlt_hint_enable); lpmd_log_info ("WLT Proxy Enable:%d\n", lpmd_config->wlt_proxy_enable); lpmd_log_info ("WLT Proxy Enable:%d\n", lpmd_config->wlt_hint_poll_enable); lpmd_log_info ("Util entry threshold:%d\n", lpmd_config->util_entry_threshold); lpmd_log_info ("Util exit threshold:%d\n", lpmd_config->util_exit_threshold); lpmd_log_info ("Util LP Mode CPUs:%s\n", lpmd_config->lp_mode_cpus); lpmd_log_info ("EPP in LP Mode:%d\n", lpmd_config->lp_mode_epp); if (!lpmd_config->config_state_count) return; lpmd_log_info ("CPU Family:%d\n", lpmd_config->cpu_family); lpmd_log_info ("CPU Model:%d\n", lpmd_config->cpu_model); lpmd_log_info ("CPU Config:%s\n", lpmd_config->cpu_config); for (i = 0; i < lpmd_config->config_state_count; ++i) { state = &lpmd_config->config_states[i]; lpmd_log_info ("ID:%d\n", state->id); lpmd_log_info ("\tName:%s\n", state->name); lpmd_log_info ("\tentry_system_load_thres:%d\n", state->entry_system_load_thres); lpmd_log_info ("\texit_system_load_thres:%d\n", state->exit_system_load_thres); lpmd_log_info ("\texit_system_load_hyst:%d\n", state->exit_system_load_hyst); lpmd_log_info ("\tentry_cpu_load_thres:%d\n", state->enter_cpu_load_thres); lpmd_log_info ("\texit_cpu_load_thres:%d\n", state->exit_cpu_load_thres); lpmd_log_info ("\tentry_gfx_load_thres:%d\n", state->enter_gfx_load_thres); lpmd_log_info ("\texit_gfx_load_thres:%d\n", state->exit_gfx_load_thres); lpmd_log_info ("\tWLT Type:%d\n", state->wlt_type); lpmd_log_info ("\tmin_poll_interval:%d\n", state->min_poll_interval); lpmd_log_info ("\tmax_poll_interval:%d\n", state->max_poll_interval); lpmd_log_info ("\tpoll_interval_increment:%d\n", state->poll_interval_increment); lpmd_log_info ("\tEPP:%d\n", state->epp); lpmd_log_info ("\tEPB:%d\n", state->epb); lpmd_log_info ("\tITMTState:%d\n", state->itmt_state); lpmd_log_info ("\tIRQMigrate:%d\n", state->irq_migrate); if (state->active_cpus[0] != '\0') lpmd_log_info ("\tactive_cpus:%s\n", state->active_cpus); lpmd_log_info ("\tisland_0_number_p_cores:%d\n", state->island_0_number_p_cores); lpmd_log_info ("\tisland_0_number_e_cores:%d\n", state->island_0_number_e_cores); lpmd_log_info ("\tisland_1_number_p_cores:%d\n", state->island_1_number_p_cores); lpmd_log_info ("\tisland_1_number_e_cores:%d\n", state->island_1_number_e_cores); lpmd_log_info ("\tisland_2_number_p_cores:%d\n", state->island_2_number_p_cores); lpmd_log_info ("\tisland_2_number_e_cores:%d\n", state->island_2_number_e_cores); } } /* Set all of them Some of the operations are redundant, but it is useful to*/ static void lpmd_init_config_state(lpmd_config_state_t *state) { state->id = -1; state->valid = 0; state->name[0] = '\0'; state->wlt_type = -1; state->entry_system_load_thres = 0; state->exit_system_load_thres = 0; state->exit_system_load_hyst = 0; state->enter_cpu_load_thres = 0; state->exit_cpu_load_thres = 0; state->enter_gfx_load_thres = 0; state->exit_gfx_load_thres = 0; state->min_poll_interval = 0; state->max_poll_interval = 0; state->poll_interval_increment = 0; state->epp = SETTING_IGNORE; state->epb = SETTING_IGNORE; state->active_cpus[0] = '\0'; state->island_0_number_p_cores = 0; state->island_0_number_e_cores = 0; state->island_1_number_p_cores = 0; state->island_1_number_e_cores = 0; state->island_2_number_p_cores = 0; state->island_2_number_e_cores = 0; state->itmt_state = SETTING_IGNORE; state->irq_migrate = SETTING_IGNORE; state->entry_load_sys = 0; state->entry_load_cpu = 0; } static void lpmd_parse_state(xmlDoc *doc, xmlNode *a_node, lpmd_config_state_t *state) { xmlNode *cur_node = NULL; char *tmp_value; char *pos; if (!doc || !a_node || !state) return; lpmd_init_config_state(state); for (cur_node = a_node; cur_node; cur_node = cur_node->next) { if (cur_node->type == XML_ELEMENT_NODE) { tmp_value = (char*) xmlNodeListGetString (doc, cur_node->xmlChildrenNode, 1); if (tmp_value) { if (!strncmp((const char*)cur_node->name, "ID", strlen("ID"))) state->id = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Name", strlen("Name"))) { snprintf(state->name, MAX_STATE_NAME - 1, "%s", tmp_value); state->name[MAX_STATE_NAME - 1] = '\0'; } if (!strncmp((const char*)cur_node->name, "WLTType", strlen("WLTType"))) state->wlt_type = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "EntrySystemLoadThres", strlen("EntrySystemLoadThres"))) state->entry_system_load_thres = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "ExitSystemLoadThres", strlen("ExitSystemLoadThres"))) state->exit_system_load_thres = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "ExitSystemLoadhysteresis", strlen("ExitSystemLoadhysteresis"))) state->exit_system_load_hyst = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "EnterCPULoadThres", strlen("EnterCPULoadThres"))) state->enter_cpu_load_thres = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "ExitCPULoadThres", strlen("ExitCPULoadThres"))) state->exit_cpu_load_thres = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "EnterGFXLoadThres", strlen("EnterGFXLoadThres"))) state->enter_gfx_load_thres = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "ExitGFXLoadThres", strlen("ExitGFXLoadThres"))) state->exit_gfx_load_thres = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "MinPollInterval", strlen("MinPollInterval"))) state->min_poll_interval = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "MaxPollInterval", strlen("MaxPollInterval"))) state->max_poll_interval = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "PollIntervalIncrement", strlen("PollIntervalIncrement"))) state->poll_interval_increment = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "EPP", strlen("EPP"))) state->epp = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "EPB", strlen("EPB"))) state->epb = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "ITMTState", strlen("ITMTState"))) state->itmt_state = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "IRQMigrate", strlen("IRQMigrate"))) state->irq_migrate = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Island0Pcores", strlen("Island0Pcores"))) state->island_0_number_p_cores = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Island0Ecores", strlen("Island0Ecores"))) state->island_0_number_e_cores = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Island1Pcores", strlen("Island1Pcores"))) state->island_1_number_p_cores = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Island1Ecores", strlen("Island1Ecores"))) state->island_1_number_e_cores = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Island2Pcores", strlen("Island2Pcores"))) state->island_2_number_p_cores = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "Island2Ecores", strlen("Island2Ecores"))) state->island_2_number_e_cores = strtol (tmp_value, &pos, 10); if (!strncmp((const char*)cur_node->name, "ActiveCPUs", strlen("ActiveCPUs"))) { if (!strncmp (tmp_value, "-1", strlen ("-1"))) state->active_cpus[0] = '\0'; else snprintf (state->active_cpus, sizeof(state->active_cpus), "%s", tmp_value); } xmlFree(tmp_value); } } } } static int validate_config_state(lpmd_config_t *lpmd_config, lpmd_config_state_t *state) { if (!state->enter_gfx_load_thres && (lpmd_config->wlt_hint_enable || lpmd_config->wlt_proxy_enable)) { if (state->wlt_type >=0 && state->wlt_type < WLT_INVALID) state->valid = 1; } else { if ((state->enter_cpu_load_thres > 0 && state->enter_cpu_load_thres <= 100) || (state->entry_system_load_thres > 0 && state->entry_system_load_thres <= 100) || (state->enter_gfx_load_thres > 0 && state->enter_gfx_load_thres <= 100)) state->valid = 1; } return 0; } static int is_wildcard(char *str) { if (!str) return 1; if (strncmp(str, "*", strlen("*"))) return 1; if (strncmp(str, " * ", strlen(" * "))) return 1; return 0; } static void lpmd_parse_states(xmlDoc *doc, xmlNode *a_node, lpmd_config_t *lpmd_config) { xmlNode *cur_node = NULL; char *tmp_value; char *pos; int config_state_count = 0; int cpu_family = -1, cpu_model = -1; char cpu_config[MAX_CONFIG_LEN]; if (!doc || !a_node || !lpmd_config) return; /* A valid states table has been parsed */ if (lpmd_config->config_state_count) return; cpu_config[0] = '\0'; for (cur_node = a_node; cur_node; cur_node = cur_node->next) { if (cur_node->type == XML_ELEMENT_NODE) { if (cur_node->name) { tmp_value = (char*) xmlNodeListGetString (doc, cur_node->xmlChildrenNode, 1); if (!strncmp ((const char*) cur_node->name, "CPUFamily", strlen ("CPUFamily"))) { if (is_wildcard(tmp_value)) cpu_family = lpmd_config->cpu_family; else cpu_family = strtol (tmp_value, &pos, 10); } if (!strncmp ((const char*) cur_node->name, "CPUModel", strlen ("CPUModel"))) { if (is_wildcard(tmp_value)) cpu_model = lpmd_config->cpu_model; else cpu_model = strtol (tmp_value, &pos, 10); } if (!strncmp ((const char*) cur_node->name, "CPUConfig", strlen ("CPUConfig"))) { if (is_wildcard(tmp_value)) { strncpy(cpu_config, lpmd_config->cpu_config, MAX_CONFIG_LEN); } else { snprintf (cpu_config, MAX_CONFIG_LEN - 1, "%s", tmp_value); } cpu_config[MAX_CONFIG_LEN - 1] = '\0'; } if (tmp_value) xmlFree (tmp_value); if (strncmp ((const char*) cur_node->name, "State", strlen ("State"))) continue; /* Must check cpu family/model/config first to make sure the states applies */ if (cpu_family != lpmd_config->cpu_family || cpu_model != lpmd_config->cpu_model || strncmp(cpu_config, lpmd_config->cpu_config, MAX_CONFIG_LEN)) { lpmd_log_info("Ignore unsupported states for CPU family:%d,model%d,config:%s\n", cpu_family, cpu_model, cpu_config); return; } if (lpmd_config->config_state_count >= MAX_CONFIG_STATES) break; lpmd_parse_state (doc, cur_node->children, &lpmd_config->config_states[config_state_count]); validate_config_state(lpmd_config, &lpmd_config->config_states[config_state_count]); config_state_count += lpmd_config->config_states[config_state_count].valid; } } } lpmd_config->config_state_count = config_state_count; } static int lpmd_fill_config(xmlDoc *doc, xmlNode *a_node, lpmd_config_t *lpmd_config) { xmlNode *cur_node = NULL; char *tmp_value; char *pos; if (!doc || !a_node || !lpmd_config) return LPMD_ERROR; lpmd_config->performance_def = lpmd_config->balanced_def = lpmd_config->powersaver_def = LPM_FORCE_OFF; lpmd_config->lp_mode_epp = -1; for (cur_node = a_node; cur_node; cur_node = cur_node->next) { if (cur_node->type == XML_ELEMENT_NODE) { tmp_value = (char*) xmlNodeListGetString (doc, cur_node->xmlChildrenNode, 1); if (tmp_value) { if (!strncmp((const char*)cur_node->name, "Mode", strlen("Mode"))) { errno = 0; lpmd_config->mode = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || lpmd_config->mode > LPM_CPU_MODE_MAX || lpmd_config->mode < 0) goto err; } else if (!strncmp((const char*)cur_node->name, "HfiLpmEnable", strlen("HfiLpmEnable"))) { errno = 0; lpmd_config->hfi_lpm_enable = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || (lpmd_config->hfi_lpm_enable != 1 && lpmd_config->hfi_lpm_enable != 0)) goto err; } else if (!strncmp((const char*)cur_node->name, "HfiSuvEnable", strlen("HfiSuvEnable"))) { errno = 0; lpmd_config->hfi_suv_enable = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || (lpmd_config->hfi_suv_enable != 1 && lpmd_config->hfi_suv_enable != 0)) goto err; } else if (!strncmp((const char*)cur_node->name, "WLTHintEnable", strlen("WLtHintEnable"))) { errno = 0; lpmd_config->wlt_hint_enable = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || (lpmd_config->wlt_hint_enable != 1 && lpmd_config->wlt_hint_enable != 0)) goto err; } else if (!strncmp((const char*)cur_node->name, "WLTHintPollEnable", strlen("WLtHintPollEnable"))) { errno = 0; lpmd_config->wlt_hint_poll_enable = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || (lpmd_config->wlt_hint_poll_enable != 1 && lpmd_config->wlt_hint_poll_enable != 0)) goto err; } else if (!strncmp((const char*)cur_node->name, "WLTProxyEnable", strlen("WLTProxyEnable"))) { errno = 0; lpmd_config->wlt_proxy_enable = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || (lpmd_config->wlt_proxy_enable != 1 && lpmd_config->wlt_proxy_enable != 0)) goto err; } else if (!strncmp((const char*)cur_node->name, "EntryDelayMS", strlen ("EntryDelayMS"))) { errno = 0; lpmd_config->util_entry_delay = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0'|| lpmd_config->util_entry_delay < 0 || lpmd_config->util_entry_delay > UTIL_DELAY_MAX) goto err; } else if (!strncmp((const char*)cur_node->name, "ExitDelayMS", strlen ("ExitDelayMS"))) { errno = 0; lpmd_config->util_exit_delay = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0'|| lpmd_config->util_exit_delay < 0 || lpmd_config->util_exit_delay > UTIL_DELAY_MAX) goto err; } else if (!strncmp((const char*)cur_node->name, "util_entry_threshold", strlen ("util_entry_threshold"))) { errno = 0; lpmd_config->util_entry_threshold = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || lpmd_config->util_entry_threshold < 0 || lpmd_config->util_entry_threshold > 100) goto err; } else if (!strncmp((const char*)cur_node->name, "util_exit_threshold", strlen ("util_exit_threshold"))) { errno = 0; lpmd_config->util_exit_threshold = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0' || lpmd_config->util_exit_threshold < 0 || lpmd_config->util_exit_threshold > 100) goto err; } else if (!strncmp((const char*)cur_node->name, "EntryHystMS", strlen ("EntryHystMS"))) { errno = 0; lpmd_config->util_entry_hyst = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0'|| lpmd_config->util_entry_hyst < 0 || lpmd_config->util_entry_hyst > UTIL_HYST_MAX) goto err; } else if (!strncmp((const char*)cur_node->name, "ExitHystMS", strlen ("ExitHystMS"))) { errno = 0; lpmd_config->util_exit_hyst = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0'|| lpmd_config->util_exit_hyst < 0 || lpmd_config->util_exit_hyst > UTIL_HYST_MAX) goto err; } else if (!strncmp((const char*)cur_node->name, "lp_mode_epp", strlen ("lp_mode_epp"))) { errno = 0; lpmd_config->lp_mode_epp = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0'|| lpmd_config->lp_mode_epp > 255 || lpmd_config->lp_mode_epp < -1) goto err; if (lpmd_config->lp_mode_epp < 0) lpmd_config->lp_mode_epp = -1; } else if (!strncmp((const char*)cur_node->name, "IgnoreITMT", strlen ("IgnoreITMT"))) { errno = 0; lpmd_config->ignore_itmt = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0'|| lpmd_config->ignore_itmt < 0 || lpmd_config->ignore_itmt > 1) goto err; } else if (!strncmp((const char*)cur_node->name, "lp_mode_cpus", strlen ("lp_mode_cpus"))) { if (!strncmp (tmp_value, "-1", strlen ("-1"))) lpmd_config->lp_mode_cpus[0] = '\0'; else snprintf (lpmd_config->lp_mode_cpus, sizeof(lpmd_config->lp_mode_cpus), "%s", tmp_value); } else if (!strncmp((const char*)cur_node->name, "PerformanceDef", strlen ("PerformanceDef"))) { errno = 0; lpmd_config->performance_def = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0') goto err; if (lpmd_config->performance_def == -1) lpmd_config->performance_def = LPM_FORCE_OFF; else if (lpmd_config->performance_def == 1) lpmd_config->performance_def = LPM_FORCE_ON; else if (!lpmd_config->performance_def) lpmd_config->performance_def = LPM_AUTO; else goto err; } else if (!strncmp((const char*)cur_node->name, "BalancedDef", strlen ("BalancedDef"))) { errno = 0; lpmd_config->balanced_def = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0') goto err; if (lpmd_config->balanced_def == -1) lpmd_config->balanced_def = LPM_FORCE_OFF; else if (lpmd_config->balanced_def == 1) lpmd_config->balanced_def = LPM_FORCE_ON; else if (!lpmd_config->balanced_def) lpmd_config->balanced_def = LPM_AUTO; else goto err; } else if (!strncmp((const char*)cur_node->name, "PowersaverDef", strlen ("PowersaverDef"))) { errno = 0; lpmd_config->powersaver_def = strtol (tmp_value, &pos, 10); if (errno || *pos != '\0') goto err; if (lpmd_config->powersaver_def == -1) lpmd_config->powersaver_def = LPM_FORCE_OFF; else if (lpmd_config->powersaver_def == 1) lpmd_config->powersaver_def = LPM_FORCE_ON; else if (!lpmd_config->powersaver_def) lpmd_config->powersaver_def = LPM_AUTO; else goto err; } else if (!strncmp((const char*)cur_node->name, "States", strlen ("States"))) { errno = 0; lpmd_parse_states(doc, cur_node->children, lpmd_config); } else { lpmd_log_info ("Invalid configuration data\n"); goto err; } xmlFree (tmp_value); continue; err: xmlFree (tmp_value); lpmd_log_error ("node type: Element, name: %s value: %s\n", cur_node->name, tmp_value); return LPMD_ERROR; } } } /* use entry_threshold == 0 or exit_threshold == 0 to effectively disable util monitor */ if (lpmd_config->util_entry_threshold && lpmd_config->util_exit_threshold) lpmd_config->util_enable = 1; else lpmd_config->util_enable = 0; return LPMD_SUCCESS; } int lpmd_get_config(lpmd_config_t *lpmd_config) { char file_name[MAX_FILE_NAME_PATH]; xmlNode *root_element; xmlNode *cur_node; struct stat s; xmlDoc *doc; if (!lpmd_config) return LPMD_ERROR; snprintf(file_name, MAX_FILE_NAME_PATH, "%s/intel_lpmd_config_F%d_M%d_T%d.xml", TDCONFDIR, lpmd_config->cpu_family, lpmd_config->cpu_model, lpmd_config->tdp); lpmd_log_msg ("Looking for config file %s\n", file_name); if (!stat (file_name, &s)) goto process_xml; snprintf(file_name, MAX_FILE_NAME_PATH, "%s/intel_lpmd_config_F%d_M%d.xml", TDCONFDIR, lpmd_config->cpu_family, lpmd_config->cpu_model); lpmd_log_msg ("Looking for config file %s\n", file_name); if (!stat (file_name, &s)) goto process_xml; snprintf (file_name, MAX_FILE_NAME_PATH, "%s/%s", TDCONFDIR, CONFIG_FILE_NAME); lpmd_log_msg ("Reading configuration file %s\n", file_name); if (stat (file_name, &s)) { lpmd_log_msg ("error: could not find file %s\n", file_name); return LPMD_ERROR; } process_xml: doc = xmlReadFile (file_name, NULL, 0); if (doc == NULL) { lpmd_log_msg ("error: could not parse file %s\n", file_name); return LPMD_ERROR; } root_element = xmlDocGetRootElement (doc); if (root_element == NULL) { lpmd_log_warn ("error: could not get root element\n"); return LPMD_ERROR; } cur_node = NULL; for (cur_node = root_element; cur_node; cur_node = cur_node->next) { if (cur_node->type == XML_ELEMENT_NODE) { if (!strncmp ((const char*) cur_node->name, "Configuration", strlen ("Configuration"))) { if (lpmd_fill_config (doc, cur_node->children, lpmd_config) != LPMD_SUCCESS) { xmlFreeDoc (doc); return LPMD_ERROR; } } } } xmlFreeDoc (doc); lpmd_dump_config (lpmd_config); return LPMD_SUCCESS; } intel-lpmd-0.0.9/src/lpmd_cpu.c000066400000000000000000001234611477072336600163510ustar00rootroot00000000000000/* * lpmd_cpu.c: CPU related processing * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contain functions to manage Linux cpuset for LP CPUs. Also using * power clamp in lieu of Linux cpuset. There are helper functions to format * cpuset strings based on the which cpuset method is used or power clamp low * power cpumask. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "lpmd.h" static int topo_max_cpus; static int max_online_cpu; static size_t size_cpumask; struct lpm_cpus { cpu_set_t *mask; char *name; char *str; char *str_reverse; char *hexstr; char *hexstr_reverse; }; static struct lpm_cpus cpumasks[CPUMASK_MAX] = { [CPUMASK_LPM_DEFAULT] = { .name = "Low Power", }, [CPUMASK_ONLINE] = { .name = "Online", }, [CPUMASK_HFI] = { .name = "HFI Low Power", }, [CPUMASK_HFI_BANNED] = { .name = "HFI BANNED", }, [CPUMASK_HFI_SUV] = { .name = "HFI SUV", }, [CPUMASK_HFI_LAST] = { .name = "HFI LAST", }, }; static enum cpumask_idx lpm_cpus_cur = CPUMASK_MAX; int is_cpu_online(int cpu) { if (cpu < 0 || cpu >= topo_max_cpus) return 0; if (!cpumasks[CPUMASK_ONLINE].mask) return 0; return CPU_ISSET_S(cpu, size_cpumask, cpumasks[CPUMASK_ONLINE].mask); } int is_cpu_for_lpm(int cpu) { if (cpu < 0 || cpu >= topo_max_cpus) return 0; if (lpm_cpus_cur == CPUMASK_MAX) return 0; if (!cpumasks[lpm_cpus_cur].mask) return 0; return !!CPU_ISSET_S(cpu, size_cpumask, cpumasks[lpm_cpus_cur].mask); } int get_max_cpus(void) { return topo_max_cpus; } int get_max_online_cpu(void) { return max_online_cpu; } static size_t alloc_cpu_set(cpu_set_t **cpu_set) { cpu_set_t *_cpu_set; size_t size; _cpu_set = CPU_ALLOC((topo_max_cpus + 1)); if (_cpu_set == NULL) err (3, "CPU_ALLOC"); size = CPU_ALLOC_SIZE((topo_max_cpus + 1)); CPU_ZERO_S(size, _cpu_set); *cpu_set = _cpu_set; if (!size_cpumask) size_cpumask = size; if (size_cpumask && size_cpumask != size) { lpmd_log_error ("Conflict cpumask size %zu vs. %zu\n", size, size_cpumask); exit (-1); } return size; } static int cpu_migrate(int cpu) { cpu_set_t *mask; int ret; alloc_cpu_set (&mask); CPU_SET_S(cpu, size_cpumask, mask); ret = sched_setaffinity(0, size_cpumask, mask); CPU_FREE(mask); if (ret == -1) return -1; else return 0; } int cpumask_to_str(cpu_set_t *mask, char *buf, int length) { int i; int offset = 0; buf[0] = '\0'; for (i = 0; i < topo_max_cpus; i++) { if (!CPU_ISSET_S(i, size_cpumask, mask)) continue; if (length - 1 < offset) { lpmd_log_debug ("cpumask_to_str: Too many cpus\n"); return 1; } offset += snprintf (buf + offset, length - 1 - offset, "%d,", i); } if (offset) buf[offset - 1] = '\0'; return 0; } static char to_hexchar(int val) { if (val <= 9) return val + '0'; if (val >= 16) return -1; return val - 10 + 'a'; } int cpumask_to_hexstr(cpu_set_t *mask, char *str, int size) { int cpu; int i; int pos = 0; char c = 0; for (cpu = 0; cpu < topo_max_cpus; cpu++) { i = cpu % 4; if (!i) c = 0; if (CPU_ISSET_S(cpu, size_cpumask, mask)) c += (1 << i); if (i == 3) { str[pos] = to_hexchar (c); pos++; if (pos >= size) return -1; } } str[pos] = '\0'; pos--; for (i = 0; i <= pos / 2; i++) { c = str[i]; str[i] = str[pos - i]; str[pos - i] = c; } return 0; } char* get_cpus_str(enum cpumask_idx idx) { if (!cpumasks[idx].mask) return NULL; if (!CPU_COUNT_S(size_cpumask, cpumasks[idx].mask)) return NULL; if (cpumasks[idx].str) return cpumasks[idx].str; cpumasks[idx].str = calloc (MAX_STR_LENGTH, 1); if (!cpumasks[idx].str) err (3, "STR_ALLOC"); cpumask_to_str (cpumasks[idx].mask, cpumasks[idx].str, MAX_STR_LENGTH); return cpumasks[idx].str; } static char* get_cpus_hexstr(enum cpumask_idx idx) { if (!cpumasks[idx].mask) return NULL; if (!CPU_COUNT_S(size_cpumask, cpumasks[idx].mask)) return NULL; if (cpumasks[idx].hexstr) return cpumasks[idx].hexstr; cpumasks[idx].hexstr = calloc (MAX_STR_LENGTH, 1); if (!cpumasks[idx].hexstr) err (3, "STR_ALLOC"); cpumask_to_hexstr (cpumasks[idx].mask, cpumasks[idx].hexstr, MAX_STR_LENGTH); return cpumasks[idx].hexstr; } char* get_lpm_cpus_hexstr(void) { return get_cpus_hexstr (lpm_cpus_cur); } static char* get_cpus_hexstr_reverse(enum cpumask_idx idx) { cpu_set_t *mask; if (!cpumasks[idx].mask) return NULL; if (!CPU_COUNT_S(size_cpumask, cpumasks[idx].mask)) return NULL; if (cpumasks[idx].hexstr_reverse) return cpumasks[idx].hexstr_reverse; cpumasks[idx].hexstr_reverse = calloc (MAX_STR_LENGTH, 1); if (!cpumasks[idx].hexstr_reverse) err (3, "STR_ALLOC"); alloc_cpu_set (&mask); CPU_XOR_S(size_cpumask, mask, cpumasks[idx].mask, cpumasks[CPUMASK_ONLINE].mask); cpumask_to_hexstr (mask, cpumasks[idx].hexstr_reverse, MAX_STR_LENGTH); CPU_FREE(mask); return cpumasks[idx].hexstr_reverse; } int cpumask_to_str_reverse(cpu_set_t *mask, char *buf, int size) { cpu_set_t *tmp; alloc_cpu_set (&tmp); CPU_XOR_S(size_cpumask, tmp, mask, cpumasks[CPUMASK_ONLINE].mask); cpumask_to_str (tmp, buf, size); CPU_FREE(tmp); return 0; } static char* get_cpus_str_reverse(enum cpumask_idx idx) { cpu_set_t *mask; if (!cpumasks[idx].mask) return NULL; if (!CPU_COUNT_S(size_cpumask, cpumasks[idx].mask)) return NULL; if (cpumasks[idx].str_reverse) return cpumasks[idx].str_reverse; cpumasks[idx].str_reverse = calloc (MAX_STR_LENGTH, 1); if (!cpumasks[idx].str_reverse) err (3, "STR_ALLOC"); alloc_cpu_set (&mask); CPU_XOR_S(size_cpumask, mask, cpumasks[idx].mask, cpumasks[CPUMASK_ONLINE].mask); cpumask_to_str (mask, cpumasks[idx].str_reverse, MAX_STR_LENGTH); CPU_FREE(mask); return cpumasks[idx].str_reverse; } static int get_cpus_hexvals(enum cpumask_idx idx, uint8_t *vals, int size) { int i, j, k; uint8_t v = 0; if (!cpumasks[idx].mask) return -1; for (i = 0; i < topo_max_cpus; i++) { j = i % 8; k = i / 8; if (k >= size) { lpmd_log_error ("size too big\n"); return -1; } if (!CPU_ISSET_S(i, size_cpumask, cpumasks[idx].mask)) goto set_val; v |= 1 << j; set_val: if (j == 7) { vals[k] = v; v = 0; } } return 0; } int is_equal(enum cpumask_idx idx1, enum cpumask_idx idx2) { if (!cpumasks[idx1].mask || !cpumasks[idx2].mask) return 0; if (CPU_EQUAL_S(size_cpumask, cpumasks[idx1].mask, cpumasks[idx2].mask)) return 1; return 0; } int has_cpus(enum cpumask_idx idx) { if (idx == CPUMASK_MAX) return 0; if (!cpumasks[idx].mask) return 0; return CPU_COUNT_S(size_cpumask, cpumasks[idx].mask); } int has_lpm_cpus(void) { return has_cpus (lpm_cpus_cur); } cpu_set_t *get_cpumask(enum cpumask_idx idx) { return cpumasks[idx].mask; } static int _add_cpu(int cpu, enum cpumask_idx idx) { if (idx != CPUMASK_ONLINE && !is_cpu_online (cpu)) return 0; if (!cpumasks[idx].mask) alloc_cpu_set (&cpumasks[idx].mask); CPU_SET_S(cpu, size_cpumask, cpumasks[idx].mask); return LPMD_SUCCESS; } int add_cpu(int cpu, enum cpumask_idx idx) { if (cpu < 0 || cpu >= topo_max_cpus) return 0; _add_cpu (cpu, idx); if (idx & (CPUMASK_HFI | CPUMASK_HFI_SUV | CPUMASK_HFI_BANNED)) return 0; if (idx == CPUMASK_LPM_DEFAULT) { lpmd_log_info ("\tDetected %s CPU%d\n", cpumasks[idx].name, cpu); } else { if (idx < CPUMASK_MAX) lpmd_log_debug ("\tDetected %s CPU%d\n", cpumasks[idx].name, cpu); else lpmd_log_debug ("\tIncorrect CPU ID for CPU%d\n", cpu); } return 0; } void reset_cpus(enum cpumask_idx idx) { if (cpumasks[idx].mask) CPU_ZERO_S(size_cpumask, cpumasks[idx].mask); free (cpumasks[idx].str); free (cpumasks[idx].str_reverse); free (cpumasks[idx].hexstr); free (cpumasks[idx].hexstr_reverse); cpumasks[idx].str = NULL; cpumasks[idx].str_reverse = NULL; cpumasks[idx].hexstr = NULL; cpumasks[idx].hexstr_reverse = NULL; lpm_cpus_cur = CPUMASK_LPM_DEFAULT; } void copy_cpu_mask(enum cpumask_idx source, enum cpumask_idx dest) { int i; for (i = 0; i < topo_max_cpus; i++) { if (!CPU_ISSET_S(i, size_cpumask, cpumasks[source].mask)) continue; _add_cpu(i, dest); } } void copy_cpu_mask_exclude(enum cpumask_idx source, enum cpumask_idx dest, enum cpumask_idx exlude) { int i; for (i = 0; i < topo_max_cpus; i++) { if (!CPU_ISSET_S(i, size_cpumask, cpumasks[source].mask)) continue; if (CPU_ISSET_S(i, size_cpumask, cpumasks[exlude].mask)) continue; _add_cpu(i, dest); } } int set_lpm_cpus(enum cpumask_idx new) { if (lpm_cpus_cur == new) return 0; if (new == CPUMASK_HFI_SUV) CPU_XOR_S(size_cpumask, cpumasks[new].mask, cpumasks[CPUMASK_ONLINE].mask, cpumasks[new].mask); lpm_cpus_cur = new; return 0; } #define BITMASK_SIZE 32 static int set_max_cpu_num(void) { FILE *filep; unsigned long dummy; int i; topo_max_cpus = 0; for (i = 0; i < 256; ++i) { char path[MAX_STR_LENGTH]; snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i); filep = fopen (path, "r"); if (filep) break; } if (!filep) { lpmd_log_error ("Can't get max cpu number\n"); return -1; } while (fscanf (filep, "%lx,", &dummy) == 1) topo_max_cpus += BITMASK_SIZE; fclose (filep); lpmd_log_debug ("\t%d CPUs supported in maximum\n", topo_max_cpus); return 0; } /* Handling EPP */ #define MAX_EPP_STRING_LENGTH 32 struct cpu_info { char epp_str[MAX_EPP_STRING_LENGTH]; int epp; int epb; }; static struct cpu_info *saved_cpu_info; static int lp_mode_epp = SETTING_IGNORE; int get_lpm_epp(void) { return lp_mode_epp; } void set_lpm_epp(int val) { lp_mode_epp = val; } static int lp_mode_epb = SETTING_IGNORE; int get_lpm_epb(void) { return lp_mode_epb; } void set_lpm_epb(int val) { lp_mode_epb = val; } int get_epp(char *path, int *val, char *str, int size) { FILE *filep; int epp; int ret; filep = fopen (path, "r"); if (!filep) return 1; ret = fscanf (filep, "%d", &epp); if (ret == 1) { *val = epp; ret = 0; goto end; } ret = fread (str, 1, size, filep); if (ret <= 0) ret = 1; else { if (ret >= size) ret = size - 1; str[ret - 1] = '\0'; ret = 0; } end: fclose (filep); return ret; } int set_epp(char *path, int val, char *str) { FILE *filep; int ret; filep = fopen (path, "r+"); if (!filep) return 1; if (val >= 0) ret = fprintf (filep, "%d", val); else if (str && str[0] != '\0') ret = fprintf (filep, "%s", str); else { fclose (filep); return 1; } fclose (filep); if (ret <= 0) { if (val >= 0) lpmd_log_error ("Write \"%d\" to %s failed, ret %d\n", val, path, ret); else lpmd_log_error ("Write \"%s\" to %s failed, ret %d\n", str, path, ret); } return !(ret > 0); } static char *get_ppd_default_epp(void) { int ppd_mode = get_ppd_mode(); if (ppd_mode == PPD_INVALID) return NULL; if (ppd_mode == PPD_PERFORMANCE) return "performance"; if (ppd_mode == PPD_POWERSAVER) return "power"; if (is_on_battery()) return "balance_power"; return "balance_performance"; } int get_epp_epb(int *epp, char *epp_str, int size, int *epb) { int c; char path[MAX_STR_LENGTH]; for (c = 0; c < max_online_cpu; c++) { if (!is_cpu_online (c)) continue; *epp = -1; epp_str[0] = '\0'; snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/cpufreq/energy_performance_preference", c); get_epp (path, epp, epp_str, size); snprintf(path, MAX_STR_LENGTH, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", c); lpmd_read_int(path, epb, -1); return 0; } return 1; } int init_epp_epb(void) { int max_cpus = get_max_cpus (); int c; int ret; char path[MAX_STR_LENGTH]; saved_cpu_info = calloc (max_cpus, sizeof(struct cpu_info)); for (c = 0; c < max_cpus; c++) { saved_cpu_info[c].epp_str[0] = '\0'; saved_cpu_info[c].epp = -1; if (!is_cpu_online (c)) continue; snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/cpufreq/energy_performance_preference", c); ret = get_epp (path, &saved_cpu_info[c].epp, saved_cpu_info[c].epp_str, MAX_EPP_STRING_LENGTH); if (!ret) { if (saved_cpu_info[c].epp != -1) lpmd_log_debug ("CPU%d EPP: 0x%x\n", c, saved_cpu_info[c].epp); else lpmd_log_debug ("CPU%d EPP: %s\n", c, saved_cpu_info[c].epp_str); } snprintf(path, MAX_STR_LENGTH, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", c); ret = lpmd_read_int(path, &saved_cpu_info[c].epb, -1); if (ret) { saved_cpu_info[c].epb = -1; continue; } lpmd_log_debug ("CPU%d EPB: 0x%x\n", c, saved_cpu_info[c].epb); } return 0; } int process_epp_epb(void) { int max_cpus = get_max_cpus (); int c; int ret; char path[MAX_STR_LENGTH]; if (lp_mode_epp == SETTING_IGNORE) lpmd_log_info ("Ignore EPP\n"); if (lp_mode_epb == SETTING_IGNORE) lpmd_log_info ("Ignore EPB\n"); if (lp_mode_epp == SETTING_IGNORE && lp_mode_epb == SETTING_IGNORE) return 0; for (c = 0; c < max_cpus; c++) { int val; char *str = NULL; if (!is_cpu_online (c)) continue; if (lp_mode_epp != SETTING_IGNORE) { if (lp_mode_epp == SETTING_RESTORE) { val = -1; str = get_ppd_default_epp(); if (!str) { /* Fallback to cached EPP */ val = saved_cpu_info[c].epp; str = saved_cpu_info[c].epp_str; } } else { val = lp_mode_epp; } snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/cpufreq/energy_performance_preference", c); ret = set_epp (path, val, str); if (!ret) { if (val != -1) lpmd_log_debug ("Set CPU%d EPP to 0x%x\n", c, val); else lpmd_log_debug ("Set CPU%d EPP to %s\n", c, saved_cpu_info[c].epp_str); } } if (lp_mode_epb != SETTING_IGNORE) { if (lp_mode_epb == SETTING_RESTORE) val = saved_cpu_info[c].epb; else val = lp_mode_epb; snprintf (path, MAX_STR_LENGTH, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", c); ret = lpmd_write_int(path, val, -1); if (!ret) lpmd_log_debug ("Set CPU%d EPB to 0x%x\n", c, val); } } return 0; } static int uevent_fd = -1; int uevent_init(void) { struct sockaddr_nl nls; memset (&nls, 0, sizeof(struct sockaddr_nl)); nls.nl_family = AF_NETLINK; nls.nl_pid = getpid(); nls.nl_groups = -1; uevent_fd = socket (PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT); if (uevent_fd < 0) return uevent_fd; if (bind (uevent_fd, (struct sockaddr*) &nls, sizeof(struct sockaddr_nl))) { lpmd_log_warn ("kob_uevent bind failed\n"); close (uevent_fd); return -1; } lpmd_log_debug ("Uevent binded\n"); return uevent_fd; } static int has_cpu_uevent(void) { ssize_t i = 0; ssize_t len; const char *dev_path = "DEVPATH="; unsigned int dev_path_len = strlen(dev_path); const char *cpu_path = "/devices/system/cpu/cpu"; char buffer[MAX_STR_LENGTH]; len = recv (uevent_fd, buffer, sizeof(buffer) - 1, MSG_DONTWAIT); if (len <= 0) return 0; buffer[len] = '\0'; lpmd_log_debug ("Receive uevent: %s\n", buffer); while (i < len) { if (strlen (buffer + i) > dev_path_len && !strncmp (buffer + i, dev_path, dev_path_len)) { if (!strncmp (buffer + i + dev_path_len, cpu_path, strlen (cpu_path))) { lpmd_log_debug ("\tMatches: %s\n", buffer + i + dev_path_len); return 1; } } i += strlen (buffer + i) + 1; } return 0; } #define PATH_PROC_STAT "/proc/stat" int check_cpu_hotplug(void) { FILE *filep; static cpu_set_t *curr; static cpu_set_t *prev; cpu_set_t *tmp; if (!has_cpu_uevent ()) return 0; if (!curr) { alloc_cpu_set (&curr); alloc_cpu_set (&prev); CPU_OR_S (size_cpumask, curr, cpumasks[CPUMASK_ONLINE].mask, cpumasks[CPUMASK_ONLINE].mask); } tmp = prev; prev = curr; curr = tmp; CPU_ZERO_S (size_cpumask, curr); filep = fopen (PATH_PROC_STAT, "r"); if (!filep) return 0; while (!feof (filep)) { char *tmpline = NULL; size_t size = 0; char *line; int cpu; char *p; int ret; tmpline = NULL; size = 0; if (getline (&tmpline, &size, filep) <= 0) { free (tmpline); break; } line = strdup (tmpline); p = strtok (line, " "); ret = sscanf (p, "cpu%d", &cpu); if (ret != 1) goto free; CPU_SET_S (cpu, size_cpumask, curr); free: free (tmpline); free (line); } fclose (filep); /* CPU Hotplug detected, should freeze lpmd */ if (!CPU_EQUAL_S (size_cpumask, curr, cpumasks[CPUMASK_ONLINE].mask)) { lpmd_log_debug ("check_cpu_hotplug: CPU Hotplug detected, freeze lpmd\n"); return freeze_lpm (); } /* CPU restored to original state, should restore lpmd */ if (CPU_EQUAL_S (size_cpumask, curr, cpumasks[CPUMASK_ONLINE].mask) && !CPU_EQUAL_S (size_cpumask, curr, prev)) { lpmd_log_debug ("check_cpu_hotplug: CPU Hotplug restored, restore lpmd\n"); return restore_lpm (); } /* No update since last change */ return 0; } /* Bit 15 of CPUID.7 EDX stands for Hybrid support */ #define CPUFEATURE_HYBRID (1 << 15) #define PATH_PM_PROFILE "/sys/firmware/acpi/pm_profile" struct cpu_model_entry { unsigned int family; unsigned int model; }; static struct cpu_model_entry id_table[] = { { 6, 0x97 }, // Alderlake { 6, 0x9a }, // Alderlake { 6, 0xb7 }, // Raptorlake { 6, 0xba }, // Raptorlake { 6, 0xbf }, // Raptorlake S { 6, 0xaa }, // Meteorlake { 6, 0xac }, // Meteorlake { 6, 0xbd }, // Lunarlake { 0, 0 } // Last Invalid entry }; static int detect_supported_cpu(lpmd_config_t *lpmd_config) { unsigned int eax, ebx, ecx, edx; unsigned int max_level, family, model, stepping; int val; cpuid(0, eax, ebx, ecx, edx); /* Unsupported vendor */ if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { lpmd_log_info("Unsupported vendor\n"); return -1; } max_level = eax; cpuid(1, eax, ebx, ecx, edx); family = (eax >> 8) & 0xf; model = (eax >> 4) & 0xf; stepping = eax & 0xf; if (family == 6) model += ((eax >> 16) & 0xf) << 4; lpmd_log_info("%u CPUID levels; family:model:stepping 0x%x:%x:%x (%u:%u:%u)\n", max_level, family, model, stepping, family, model, stepping); if (!do_platform_check()) { lpmd_log_info("Ignore platform check\n"); goto end; } /* Need CPUID.1A to detect CPU core type */ if (max_level < 0x1a) { lpmd_log_info("CPUID leaf 0x1a not supported, unable to detect CPU type\n"); return -1; } cpuid_count(7, 0, eax, ebx, ecx, edx); /* Run on Hybrid platforms only */ if (!(edx & CPUFEATURE_HYBRID)) { lpmd_log_info("Non-Hybrid platform detected\n"); return -1; } /* /sys/firmware/acpi/pm_profile is mandatory */ if (lpmd_read_int(PATH_PM_PROFILE, &val, -1)) { lpmd_log_info("Failed to read PM profile %s\n", PATH_PM_PROFILE); return -1; } if (val != 2) { lpmd_log_info("Non-Mobile PM profile detected. %s returns %d\n", PATH_PM_PROFILE, val); return -1; } /* Platform meets all the criteria for lpmd to run, check the allow list */ val = 0; while (id_table[val].family) { if (id_table[val].family == family && id_table[val].model == model) break; val++; } /* Unsupported model */ if (!id_table[val].family) { lpmd_log_info("Platform not supported yet.\n"); lpmd_log_debug("Supported platforms:\n"); val = 0; while (id_table[val].family) { lpmd_log_debug("\tfamily %d model %d\n", id_table[val].family, id_table[val].model); val++; } return -1; } end: lpmd_config->cpu_family = family; lpmd_config->cpu_model = model; return 0; } /* Run intel_lpmd on the LP-Mode CPUs only */ static void lpmd_set_cpu_affinity(void) { if (!cpumasks[CPUMASK_LPM_DEFAULT].mask) return; if (!CPU_COUNT_S (size_cpumask, cpumasks[CPUMASK_LPM_DEFAULT].mask)) return; if (!sched_setaffinity (0, size_cpumask, cpumasks[CPUMASK_LPM_DEFAULT].mask)) lpmd_log_info ("\tSet intel_lpmd cpu affinity to CPU %s\n", get_cpus_str (CPUMASK_LPM_DEFAULT)); else lpmd_log_warn ("\tFailed to set intel_lpmd cpu affinity\n"); } /* * Detect LPM cpus * parse cpuset with following syntax * 1,2,4..6,8-10 and set bits in cpu_subset */ int parse_cpu_str(char *buf, enum cpumask_idx idx) { unsigned int start, end; char *next; int nr_cpus = 0; if (buf[0] == '\0') return 0; next = buf; while (next && *next) { if (*next == '\n') *next = '\0'; next++; } next = buf; while (next && *next) { if (*next == '\n') *next = '\0'; if (*next == '-') /* no negative cpu numbers */ goto error; start = strtoul (next, &next, 10); _add_cpu (start, idx); nr_cpus++; if (*next == '\0') break; if (*next == ',') { next += 1; continue; } if (*next == '-') { next += 1; /* start range */ } else if (*next == '.') { next += 1; if (*next == '.') next += 1; /* start range */ else goto error; } end = strtoul (next, &next, 10); if (end <= start) goto error; while (++start <= end) { _add_cpu (start, idx); nr_cpus++; } if (*next == ',') next += 1; else if (*next != '\0') goto error; } return nr_cpus; error: lpmd_log_error ("CPU string malformed: %s\n", buf); return -1; } static int detect_lpm_cpus_cmd(char *cmd) { int ret; ret = parse_cpu_str (cmd, CPUMASK_LPM_DEFAULT); if (ret <= 0) reset_cpus (CPUMASK_LPM_DEFAULT); return ret; } /* * Use one Ecore Module as LPM CPUs. * Applies on Hybrid platforms like AlderLake/RaptorLake. */ static int is_cpu_atom(int cpu) { unsigned int eax, ebx, ecx, edx; unsigned int type; if (cpu_migrate(cpu) < 0) { lpmd_log_error("Failed to migrated to cpu%d\n", cpu); return -1; } cpuid(0x1a, eax, ebx, ecx, edx); type = (eax >> 24) & 0xFF; return type == 0x20; } static int is_cpu_in_l3(int cpu) { unsigned int eax, ebx, ecx, edx, subleaf; if (cpu_migrate(cpu) < 0) { lpmd_log_error("Failed to migrated to cpu%d\n", cpu); err (1, "cpu migrate"); } for(subleaf = 0;; subleaf++) { unsigned int type, level; cpuid_count(4, subleaf, eax, ebx, ecx, edx); type = eax & 0x1f; level = (eax >> 5) & 0x7; /* No more caches */ if (!type) break; /* Unified Cache */ if (type !=3 ) continue; /* L3 */ if (level != 3) continue; return 1; } return 0; } int is_cpu_pcore(int cpu) { return !is_cpu_atom(cpu); } int is_cpu_ecore(int cpu) { if (!is_cpu_atom(cpu)) return 0; return is_cpu_in_l3(cpu); } int is_cpu_lcore(int cpu) { if (!is_cpu_atom(cpu)) return 0; return !is_cpu_in_l3(cpu); } static int detect_lpm_cpus_cluster(void) { FILE *filep; char path[MAX_STR_LENGTH]; char str[MAX_STR_LENGTH]; int i, ret; for (i = topo_max_cpus; i >= 0; i--) { if (!is_cpu_online (i)) continue; snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/topology/cluster_cpus_list", i); path[MAX_STR_LENGTH - 1] = '\0'; filep = fopen (path, "r"); if (!filep) continue; ret = fread (str, 1, MAX_STR_LENGTH - 1, filep); fclose (filep); if (ret <= 0) continue; str[ret] = '\0'; if (parse_cpu_str (str, CPUMASK_LPM_DEFAULT) <= 0) continue; /* An Ecore module contains 4 Atom cores */ if (CPU_COUNT_S(size_cpumask, cpumasks[CPUMASK_LPM_DEFAULT].mask) == 4 && is_cpu_atom(i)) break; reset_cpus (CPUMASK_LPM_DEFAULT); } if (!has_cpus (CPUMASK_LPM_DEFAULT)) { reset_cpus (CPUMASK_LPM_DEFAULT); return 0; } return CPU_COUNT_S(size_cpumask, cpumasks[CPUMASK_LPM_DEFAULT].mask); } static int detect_cpu_lcore(int cpu) { if (is_cpu_lcore(cpu)) _add_cpu (cpu, CPUMASK_LPM_DEFAULT); return 0; } /* * Use Lcore CPUs as LPM CPUs. * Applies on platforms like MeteorLake. */ static int detect_lpm_cpus_lcore(void) { int i; for (i = 0; i < topo_max_cpus; i++) { if (!is_cpu_online (i)) continue; if (detect_cpu_lcore(i) < 0) return -1; } /* All cpus has L3 */ if (!has_cpus (CPUMASK_LPM_DEFAULT)) return 0; /* All online cpus don't have L3 */ if (CPU_EQUAL_S(size_cpumask, cpumasks[CPUMASK_LPM_DEFAULT].mask, cpumasks[CPUMASK_ONLINE].mask)) goto err; return CPU_COUNT_S(size_cpumask, cpumasks[CPUMASK_LPM_DEFAULT].mask); err: reset_cpus (CPUMASK_LPM_DEFAULT); return 0; } static int detect_lpm_cpus(char *cmd_cpus) { int ret; char *str; if (cmd_cpus && cmd_cpus[0] != '\0') { ret = detect_lpm_cpus_cmd (cmd_cpus); if (ret <= 0) { lpmd_log_error ("\tInvalid -c parameter: %s\n", cmd_cpus); exit (-1); } str = "CommandLine"; goto end; } ret = detect_lpm_cpus_lcore (); if (ret < 0) return ret; if (ret > 0) { str = "Lcores"; goto end; } if (detect_lpm_cpus_cluster ()) { str = "Ecores"; goto end; } if (has_hfi_lpm_monitor () || has_hfi_suv_monitor ()) { lpmd_log_info ( "\tNo valid Low Power CPUs detected, use dynamic Low Power CPUs from HFI hints\n"); return 0; } else { lpmd_log_error ("\tNo valid Low Power CPUs detected, exit\n"); exit (1); } end: if (has_cpus (CPUMASK_LPM_DEFAULT)) lpmd_log_info ("\tUse CPU %s as Default Low Power CPUs (%s)\n", get_cpus_str (CPUMASK_LPM_DEFAULT), str); lpmd_set_cpu_affinity (); return 0; } static int check_cpu_offline_support(void) { return lpmd_open ("/sys/devices/system/cpu/cpu0/online", 1); } static int online_cpu(int cpu, int val) { char path[MAX_STR_LENGTH]; snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/online", cpu); return lpmd_write_int (path, val, LPMD_LOG_INFO); } static int process_cpu_offline(int enter) { int cpu; lpmd_log_info ("\t%s CPUs\n", enter ? "Offline" : "Online"); for (cpu = 0; cpu < topo_max_cpus; cpu++) { if (!is_cpu_online (cpu)) continue; if (!is_cpu_for_lpm (cpu)) { if (enter) online_cpu (cpu, 0); else online_cpu (cpu, 1); } else { online_cpu (cpu, 1); } } return 0; } /* Support for LPM_CPU_CGROUPV2 */ #define PATH_CGROUP "/sys/fs/cgroup" #define PATH_CG2_SUBTREE_CONTROL PATH_CGROUP "/cgroup.subtree_control" static int update_allowed_cpus(const char *unit, uint8_t *vals, int size) { sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus_message *m = NULL; sd_bus *bus = NULL; char buf[MAX_STR_LENGTH]; int offset; int ret; int i; // creates a new, independent bus connection to the system bus ret = sd_bus_open_system (&bus); if (ret < 0) { fprintf (stderr, "Failed to connect to system bus: %s\n", strerror (-ret)); goto finish; } /* * creates a new bus message object that encapsulates a D-Bus method call, * and returns it in the m output parameter. * The call will be made on the destination, path, on the interface, member. */ /* Issue the method call and store the response message in m */ ret = sd_bus_message_new_method_call (bus, &m, "org.freedesktop.systemd1", "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "SetUnitProperties"); if (ret < 0) { fprintf (stderr, "Failed to issue method call: %s\n", error.message); goto finish; } // Attach fields to a D-Bus message based on a type string ret = sd_bus_message_append (m, "sb", unit, 1); if (ret < 0) { fprintf (stderr, "Failed to append unit: %s\n", error.message); goto finish; } /* * appends a new container to the message m. * After opening a new container, it can be filled with content using * sd_bus_message_append(3) and similar functions. * Containers behave like a stack. To nest containers inside each other, * call sd_bus_message_open_container() multiple times without calling * sd_bus_message_close_container() in between. Each container will be * nested inside the previous container. * Instead of literals, the corresponding constants SD_BUS_TYPE_STRUCT, * SD_BUS_TYPE_ARRAY, SD_BUS_TYPE_VARIANT or SD_BUS_TYPE_DICT_ENTRY can also be used. */ ret = sd_bus_message_open_container (m, SD_BUS_TYPE_ARRAY, "(sv)"); if (ret < 0) { fprintf (stderr, "Failed to append array: %s\n", error.message); goto finish; } ret = sd_bus_message_open_container (m, SD_BUS_TYPE_STRUCT, "sv"); if (ret < 0) { fprintf (stderr, "Failed to open container struct: %s\n", error.message); goto finish; } /* * appends a single field to the message m. * The parameter type determines how the pointer p is interpreted. */ ret = sd_bus_message_append_basic (m, SD_BUS_TYPE_STRING, "AllowedCPUs"); if (ret < 0) { fprintf (stderr, "Failed to append string: %s\n", error.message); goto finish_1; } ret = sd_bus_message_open_container (m, 'v', "ay"); if (ret < 0) { fprintf (stderr, "Failed to open container: %s\n", error.message); goto finish_2; } /* * appends an array to a D-Bus message m. A container will be opened, * the array contents appended, and the container closed. */ ret = sd_bus_message_append_array (m, 'y', vals, size); if (ret < 0) { fprintf (stderr, "Failed to append allowed_cpus: %s\n", error.message); goto finish_2; } offset = snprintf (buf, MAX_STR_LENGTH, "\tSending Dbus message to systemd: %s: ", unit); for (i = 0; i < size; i++) { if (offset < MAX_STR_LENGTH) offset += snprintf (buf + offset, MAX_STR_LENGTH - offset, "0x%02x ", vals[i]); } buf[MAX_STR_LENGTH - 1] = '\0'; lpmd_log_info ("%s\n", buf); sd_bus_message_close_container (m); finish_2: sd_bus_message_close_container (m); finish_1: sd_bus_message_close_container (m); finish: if (ret >= 0) { ret = sd_bus_call (bus, m, 0, &error, NULL); if (ret < 0) { fprintf (stderr, "Failed to call: %s\n", error.message); } } sd_bus_error_free (&error); sd_bus_message_unref (m); sd_bus_unref (bus); return ret < 0 ? -1 : 0; } static int restore_systemd_cgroup() { int size = topo_max_cpus / 8; uint8_t *vals; vals = calloc (size, 1); if (!vals) return -1; get_cpus_hexvals (CPUMASK_ONLINE, vals, size); update_allowed_cpus ("system.slice", vals, size); update_allowed_cpus ("user.slice", vals, size); update_allowed_cpus ("machine.slice", vals, size); free (vals); return 0; } static int update_systemd_cgroup() { int size = topo_max_cpus / 8; uint8_t *vals; int ret; vals = calloc (size, 1); if (!vals) return -1; get_cpus_hexvals (lpm_cpus_cur, vals, size); ret = update_allowed_cpus ("system.slice", vals, size); if (ret) goto restore; ret = update_allowed_cpus ("user.slice", vals, size); if (ret) goto restore; ret = update_allowed_cpus ("machine.slice", vals, size); if (ret) goto restore; free (vals); return 0; restore: free (vals); restore_systemd_cgroup (); return ret; } static int check_cpu_cgroupv2_support(void) { if (lpmd_write_str (PATH_CG2_SUBTREE_CONTROL, "+cpuset", LPMD_LOG_DEBUG)) return 1; return 0; } static int process_cpu_cgroupv2_enter(void) { if (lpmd_write_str (PATH_CG2_SUBTREE_CONTROL, "+cpuset", LPMD_LOG_DEBUG)) return 1; return update_systemd_cgroup (); } static int process_cpu_cgroupv2_exit(void) { restore_systemd_cgroup (); return lpmd_write_str (PATH_CG2_SUBTREE_CONTROL, "-cpuset", LPMD_LOG_DEBUG); } static int process_cpu_cgroupv2(int enter) { if (enter) return process_cpu_cgroupv2_enter (); else return process_cpu_cgroupv2_exit (); } /* * Support for LPM_CPU_POWERCLAMP: * /sys/module/intel_powerclamp/parameters/cpumask * /sys/module/intel_powerclamp/parameters/max_idle */ #define PATH_CPUMASK "/sys/module/intel_powerclamp/parameters/cpumask" #define PATH_MAXIDLE "/sys/module/intel_powerclamp/parameters/max_idle" #define PATH_DURATION "/sys/module/intel_powerclamp/parameters/duration" #define PATH_THERMAL "/sys/class/thermal" static char path_powerclamp[MAX_STR_LENGTH * 2]; static int check_cpu_powerclamp_support(void) { FILE *filep; DIR *dir; struct dirent *entry; char *name = "intel_powerclamp"; char str[20]; int ret; if (lpmd_open (PATH_CPUMASK, 0)) return 1; if ((dir = opendir (PATH_THERMAL)) == NULL) { perror ("opendir() error"); return 1; } while ((entry = readdir (dir)) != NULL) { if (strlen (entry->d_name) > 100) continue; snprintf (path_powerclamp, MAX_STR_LENGTH * 2, "%s/%s/type", PATH_THERMAL, entry->d_name); filep = fopen (path_powerclamp, "r"); if (!filep) continue; ret = fread (str, strlen (name), 1, filep); fclose (filep); if (ret != 1) continue; if (!strncmp (str, name, strlen (name))) { snprintf (path_powerclamp, MAX_STR_LENGTH * 2, "%s/%s/cur_state", PATH_THERMAL, entry->d_name); lpmd_log_info ("\tFound %s device at %s/%s\n", name, PATH_THERMAL, entry->d_name); break; } } closedir (dir); if (path_powerclamp[0] == '\0') return 1; return 0; } static int default_dur = -1; static int _process_cpu_powerclamp_enter(char *cpumask_str, int pct, int dur) { if (lpmd_write_str (PATH_CPUMASK, cpumask_str, LPMD_LOG_DEBUG)) return 1; if (dur > 0) { if (lpmd_read_int (PATH_DURATION, &default_dur, LPMD_LOG_DEBUG)) return 1; if (lpmd_write_int (PATH_DURATION, dur, LPMD_LOG_DEBUG)) return 1; } if (lpmd_write_int (PATH_MAXIDLE, pct, LPMD_LOG_DEBUG)) return 1; if (lpmd_write_int (path_powerclamp, pct, LPMD_LOG_DEBUG)) return 1; return 0; } static int process_cpu_powerclamp_enter(void) { int pct = get_idle_percentage (); int dur = get_idle_duration (); return _process_cpu_powerclamp_enter (get_cpus_hexstr_reverse (lpm_cpus_cur), pct, dur); } static int process_cpu_powerclamp_exit() { if (lpmd_write_int (PATH_DURATION, default_dur, LPMD_LOG_DEBUG)) return 1; return lpmd_write_int (path_powerclamp, 0, LPMD_LOG_DEBUG); } static int process_cpu_powerclamp(int enter) { if (enter) return process_cpu_powerclamp_enter (); else return process_cpu_powerclamp_exit (); } // Support for SUV mode, which uses powerclamp #define SUV_IDLE_PCT 50 static int in_suv; static int enter_suv_mode(enum lpm_command cmd) { int ret; char *cpumask_str; char *name; // in_suv can either be HFI_SUV or DBUS_SUV, can not be both if (in_suv) return 0; if (cmd == HFI_SUV_ENTER) { cpumask_str = get_cpus_hexstr (CPUMASK_HFI_SUV); name = "HFI"; } else { cpumask_str = get_cpus_hexstr (CPUMASK_ONLINE); name = "DBUS"; } /* * When system is in LPM and it uses idle injection for LPM, * we need to exit LPM first because we need to reset the cpumask * of the intel_powerclamp sysfs I/F. * * In order to make the logic simpler, always exit LPM when Idle * injection is used for LPM. * The downside is that we need to do an extra LPM exit but this * should be rare because it is abnormal to get an SUV request when * system is already in LPM. */ if (get_cpu_mode () == LPM_CPU_POWERCLAMP) process_lpm_unlock (cmd); lpmd_log_info ("------ Enter %s Survivability Mode ---\n", name); ret = _process_cpu_powerclamp_enter (cpumask_str, SUV_IDLE_PCT, -1); if (!ret) in_suv = cmd; return ret; } static int exit_suv_mode(enum lpm_command cmd) { int cmd_enter; char *name; // If SUV mode is disabled or exited if (in_suv == -1 || in_suv == 0) return 0; if (cmd == HFI_SUV_EXIT) { cmd_enter = HFI_SUV_ENTER; name = "HFI"; } else { cmd_enter = DBUS_SUV_ENTER; name = "DBUS"; } if (in_suv != cmd_enter) return 0; lpmd_log_info ("------ Exit %s Survivability Mode ---\n", name); process_cpu_powerclamp_exit (); // Try to re-enter in case it was FORCED ON if (get_cpu_mode () == LPM_CPU_POWERCLAMP) process_lpm_unlock (cmd); in_suv = 0; return LPMD_SUCCESS; } int process_suv_mode(enum lpm_command cmd) { int ret; lpmd_lock (); if (cmd == HFI_SUV_ENTER || cmd == DBUS_SUV_ENTER) ret = enter_suv_mode (cmd); else if (cmd == HFI_SUV_EXIT || cmd == DBUS_SUV_EXIT) ret = exit_suv_mode (cmd); else ret = -1; lpmd_unlock (); return ret; } int has_suv_support(void) { return !(in_suv == -1); } static int __process_cpu_isolate_exit(char *name) { char path[MAX_STR_LENGTH]; DIR *dir; snprintf(path, MAX_STR_LENGTH, "/sys/fs/cgroup/%s", name); dir = opendir(path); if (!dir) return 1; closedir(dir); snprintf(path, MAX_STR_LENGTH, "/sys/fs/cgroup/%s/cpuset.cpus.partition", name); if (lpmd_write_str (path, "member", LPMD_LOG_DEBUG)) return 1; if (!get_cpus_str (CPUMASK_ONLINE)) return 0; snprintf(path, MAX_STR_LENGTH, "/sys/fs/cgroup/%s/cpuset.cpus", name); if (lpmd_write_str (path, get_cpus_str (CPUMASK_ONLINE), LPMD_LOG_DEBUG)) return 1; return 0; } static int check_cpu_isolate_support(void) { return check_cpu_cgroupv2_support (); } static int process_cpu_isolate_enter(void) { DIR *dir; int ret; dir = opendir ("/sys/fs/cgroup/lpm"); if (!dir) { ret = mkdir ("/sys/fs/cgroup/lpm", 0744); if (ret) { printf ("Can't create dir:%s errno:%d\n", "/sys/fs/cgroup/lpm", errno); return ret; } lpmd_log_info ("\tCreate %s\n", "/sys/fs/cgroup/lpm"); } else { closedir (dir); } if (lpmd_write_str ("/sys/fs/cgroup/lpm/cpuset.cpus.partition", "member", LPMD_LOG_DEBUG)) return 1; if (!CPU_EQUAL_S(size_cpumask, cpumasks[lpm_cpus_cur].mask, cpumasks[CPUMASK_ONLINE].mask)) { if (lpmd_write_str ("/sys/fs/cgroup/lpm/cpuset.cpus", get_cpus_str_reverse (lpm_cpus_cur), LPMD_LOG_DEBUG)) return 1; if (lpmd_write_str ("/sys/fs/cgroup/lpm/cpuset.cpus.partition", "isolated", LPMD_LOG_DEBUG)) return 1; } else { if (lpmd_write_str ("/sys/fs/cgroup/lpm/cpuset.cpus", get_cpus_str (CPUMASK_ONLINE), LPMD_LOG_DEBUG)) return 1; } return 0; } static int process_cpu_isolate_exit(void) { return __process_cpu_isolate_exit("lpm"); } static int process_cpu_isolate(int enter) { if (enter) return process_cpu_isolate_enter (); else return process_cpu_isolate_exit (); } static int check_cpu_mode_support(enum lpm_cpu_process_mode mode) { int ret; switch (mode) { case LPM_CPU_OFFLINE: ret = check_cpu_offline_support (); break; case LPM_CPU_CGROUPV2: ret = check_cpu_cgroupv2_support (); break; case LPM_CPU_POWERCLAMP: ret = check_cpu_powerclamp_support (); break; case LPM_CPU_ISOLATE: ret = check_cpu_isolate_support (); break; default: lpmd_log_error ("Invalid CPU process mode %d\n", mode); exit (-1); } if (ret) { lpmd_log_error ("Mode %d not supported\n", mode); return ret; } // Extra checks for SUV mode support if (mode != LPM_CPU_POWERCLAMP) { if (check_cpu_powerclamp_support ()) { in_suv = -1; lpmd_log_info ("Idle injection interface not detected, disable SUV mode support\n"); } } return ret; } #define PATH_RAPL "/sys/class/powercap" static int get_tdp(void) { FILE *filep; DIR *dir; struct dirent *entry; int ret; char path[MAX_STR_LENGTH * 2]; char str[MAX_STR_LENGTH]; char *pos; int tdp = 0; if ((dir = opendir (PATH_RAPL)) == NULL) { perror ("opendir() error"); return 1; } while ((entry = readdir (dir)) != NULL) { if (strlen (entry->d_name) > 100) continue; if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl"))) continue; snprintf (path, MAX_STR_LENGTH * 2, "%s/%s/name", PATH_RAPL, entry->d_name); filep = fopen (path, "r"); if (!filep) continue; ret = fread (str, 1, MAX_STR_LENGTH, filep); fclose (filep); if (ret <= 0) continue; if (strncmp(str, "package", strlen("package"))) continue; snprintf (path, MAX_STR_LENGTH * 2, "%s/%s/constraint_0_max_power_uw", PATH_RAPL, entry->d_name); filep = fopen (path, "r"); if (!filep) continue; ret = fread (str, 1, MAX_STR_LENGTH, filep); fclose (filep); if (ret <= 0) continue; if (ret >= MAX_STR_LENGTH) ret = MAX_STR_LENGTH - 1; str[ret] = '\0'; tdp = strtol(str, &pos, 10); break; } closedir (dir); return tdp / 1000000; } static void cpu_cleanup(void) { /* leanup previous cgroup setting in case service quits unexpectedly last time */ __process_cpu_isolate_exit("lpm"); } int check_cpu_capability(lpmd_config_t *lpmd_config) { FILE *filep; int i; char path[MAX_STR_LENGTH]; int ret; int pcores, ecores, lcores; int tdp; /* Must be called before migrating any tasks */ cpu_cleanup(); ret = detect_supported_cpu(lpmd_config); if (ret) { lpmd_log_info("Unsupported CPU type\n"); return ret; } ret = set_max_cpu_num (); if (ret) return ret; reset_cpus (CPUMASK_ONLINE); pcores = ecores = lcores = 0; for (i = 0; i < topo_max_cpus; i++) { unsigned int online = 0; snprintf (path, sizeof(path), "/sys/devices/system/cpu/cpu%d/online", i); filep = fopen (path, "r"); if (filep) { if (fscanf (filep, "%u", &online) != 1) lpmd_log_warn ("fread failed for %s\n", path); fclose (filep); } else if (!i) online = 1; else break; if (!online) continue; add_cpu (i, CPUMASK_ONLINE); if (is_cpu_pcore(i)) pcores++; else if (is_cpu_ecore(i)) ecores++; else if (is_cpu_lcore(i)) lcores++; } max_online_cpu = i; tdp = get_tdp(); lpmd_log_info("Detected %d Pcores, %d Ecores, %d Lcores, TDP %dW\n", pcores, ecores, lcores, tdp); ret = snprintf(lpmd_config->cpu_config, MAX_CONFIG_LEN - 1, " %dP%dE%dL-%dW ", pcores, ecores, lcores, tdp); lpmd_config->tdp = tdp; return 0; } int init_cpu(char *cmd_cpus, enum lpm_cpu_process_mode mode, int epp) { int ret; ret = detect_lpm_cpus (cmd_cpus); if (ret) return ret; ret = check_cpu_mode_support (mode); if (ret) return ret; init_epp_epb (); return 0; } int process_cpus(int enter, enum lpm_cpu_process_mode mode) { int ret; if (enter != 1 && enter != 0) return LPMD_ERROR; process_epp_epb (); if (lpm_cpus_cur == CPUMASK_MAX) { lpmd_log_info ("Ignore Task migration\n"); return 0; } lpmd_log_info ("Process CPUs ...\n"); switch (mode) { case LPM_CPU_OFFLINE: ret = process_cpu_offline (enter); break; case LPM_CPU_CGROUPV2: ret = process_cpu_cgroupv2 (enter); break; case LPM_CPU_POWERCLAMP: ret = process_cpu_powerclamp (enter); break; case LPM_CPU_ISOLATE: ret = process_cpu_isolate (enter); break; default: exit (-1); } return ret; } intel-lpmd-0.0.9/src/lpmd_dbus_server.c000066400000000000000000000205141477072336600201000ustar00rootroot00000000000000/* * lpmd_dbus_server.c: Dbus server for intel_lpmd * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains function to start dbus server and provide callbacks for * dbus messages. */ #include #include #include #include #include "lpmd.h" struct _PrefObject { GObject parent; }; #define PREF_TYPE_OBJECT (pref_object_get_type()) G_DECLARE_FINAL_TYPE(PrefObject, pref_object, PREF, OBJECT, GObject) #define MAX_DBUS_REPLY_STR_LEN 100 G_DEFINE_TYPE(PrefObject, pref_object, G_TYPE_OBJECT) static gboolean dbus_interface_terminate(PrefObject *obj, GError **error); static gboolean dbus_interface_l_pm__fo_rc_e__on(PrefObject *obj, GError **error); static gboolean dbus_interface_l_pm__fo_rc_e__of_f(PrefObject *obj, GError **error); static gboolean dbus_interface_l_pm__au_to(PrefObject *obj, GError **error); static gboolean dbus_interface_s_uv__mo_de__en_te_r(PrefObject *obj, GError **error); static gboolean dbus_interface_s_uv__mo_de__ex_it(PrefObject *obj, GError **error); static gboolean (*intel_lpmd_dbus_exit_callback)(void); // Dbus object initialization static void pref_object_init(PrefObject *obj) { g_assert (obj != NULL); } // Dbus object class initialization static void pref_object_class_init(PrefObjectClass *_class) { g_assert (_class != NULL); } static gboolean dbus_interface_terminate(PrefObject *obj, GError **error) { lpmd_log_debug ("intel_lpmd_dbus_interface_terminate\n"); lpmd_terminate (); if (intel_lpmd_dbus_exit_callback) intel_lpmd_dbus_exit_callback (); return TRUE; } static gboolean dbus_interface_l_pm__fo_rc_e__on(PrefObject *obj, GError **error) { lpmd_log_debug ("intel_lpmd_dbus_interface_lpm_enter\n"); lpmd_force_on (); return TRUE; } static gboolean dbus_interface_l_pm__fo_rc_e__of_f(PrefObject *obj, GError **error) { lpmd_log_debug ("intel_lpmd_dbus_interface_lpm_exit\n"); lpmd_force_off (); return TRUE; } static gboolean dbus_interface_l_pm__au_to(PrefObject *obj, GError **error) { lpmd_set_auto (); return TRUE; } static gboolean dbus_interface_s_uv__mo_de__en_te_r(PrefObject *obj, GError **error) { lpmd_log_debug ("intel_lpmd_dbus_interface_suv_enter\n"); if (!has_suv_support ()) return FALSE; lpmd_suv_enter (); return TRUE; } static gboolean dbus_interface_s_uv__mo_de__ex_it(PrefObject *obj, GError **error) { if (!has_suv_support ()) return FALSE; lpmd_log_debug ("intel_lpmd_dbus_interface_suv_exit\n"); lpmd_suv_exit (); return TRUE; } #pragma GCC diagnostic push static GDBusInterfaceVTable interface_vtable; extern gint watcher_id; static GDBusNodeInfo * lpmd_dbus_load_introspection(const gchar *filename, GError **error) { g_autoptr(GBytes) data = NULL; g_autofree gchar *path = NULL; path = g_build_filename("/org/freedesktop/intel_lpmd", filename, NULL); data = g_resources_lookup_data(path, G_RESOURCE_LOOKUP_FLAGS_NONE, error); if (data == NULL) return NULL; return g_dbus_node_info_new_for_xml((gchar *)g_bytes_get_data(data, NULL), error); } static void lpmd_dbus_handle_method_call(GDBusConnection *connection, const gchar *sender, const gchar *object_path, const gchar *interface_name, const gchar *method_name, GVariant *parameters, GDBusMethodInvocation *invocation, gpointer user_data) { PrefObject *obj = PREF_OBJECT(user_data); g_autoptr(GError) error = NULL; lpmd_log_debug("Dbus method called %s %s.\n", interface_name, method_name); if (g_strcmp0(method_name, "Terminate") == 0) { g_dbus_method_invocation_return_value(invocation, NULL); dbus_interface_terminate(obj, &error); return; } if (g_strcmp0(method_name, "LPM_FORCE_ON") == 0) { g_dbus_method_invocation_return_value(invocation, NULL); dbus_interface_l_pm__fo_rc_e__on(obj, &error); return; } if (g_strcmp0(method_name, "LPM_FORCE_OFF") == 0) { g_dbus_method_invocation_return_value(invocation, NULL); dbus_interface_l_pm__fo_rc_e__of_f(obj, &error); return; } if (g_strcmp0(method_name, "LPM_AUTO") == 0) { g_dbus_method_invocation_return_value(invocation, NULL); dbus_interface_l_pm__au_to(obj, &error); return; } if (g_strcmp0(method_name, "SUV_MODE_ENTER") == 0) { g_dbus_method_invocation_return_value(invocation, NULL); dbus_interface_s_uv__mo_de__en_te_r(obj, &error); return; } if (g_strcmp0(method_name, "SUV_MODE_EXIT") == 0) { g_dbus_method_invocation_return_value(invocation, NULL); dbus_interface_s_uv__mo_de__ex_it(obj, &error); return; } g_set_error(&error, G_DBUS_ERROR, G_DBUS_ERROR_UNKNOWN_METHOD, "no such method %s", method_name); g_dbus_method_invocation_return_gerror(invocation, error); } static GVariant * lpmd_dbus_handle_get_property(GDBusConnection *connection, const gchar *sender, const gchar *object_path, const gchar *interface_name, const gchar *property_name, GError **error, gpointer user_data) { return NULL; } static gboolean lpmd_dbus_handle_set_property(GDBusConnection *connection, const gchar *sender, const gchar *object_path, const gchar *interface_name, const gchar *property_name, GVariant *value, GError **error, gpointer user_data) { return TRUE; } static void lpmd_dbus_on_bus_acquired(GDBusConnection *connection, const gchar *name, gpointer user_data) { guint registration_id; GDBusProxy *proxy_id = NULL; GError *error = NULL; GDBusNodeInfo *introspection_data = NULL; if (user_data == NULL) { lpmd_log_error("user_data is NULL\n"); return; } introspection_data = lpmd_dbus_load_introspection("src/intel_lpmd_dbus_interface.xml", &error); if (introspection_data == NULL || error != NULL) { lpmd_log_error("Couldn't create introspection data: %s:\n", error->message); return; } registration_id = g_dbus_connection_register_object(connection, "/org/freedesktop/intel_lpmd", introspection_data->interfaces[0], &interface_vtable, user_data, NULL, &error); proxy_id = g_dbus_proxy_new_sync(connection, G_DBUS_PROXY_FLAGS_DO_NOT_LOAD_PROPERTIES, NULL, "org.freedesktop.DBus", "/org/freedesktop/DBus", "org.freedesktop.DBus", NULL, &error); g_assert(registration_id > 0); g_assert(proxy_id != NULL); } static void lpmd_dbus_on_name_acquired(GDBusConnection *connection, const gchar *name, gpointer user_data) { } static void lpmd_dbus_on_name_lost(GDBusConnection *connection, const gchar *name, gpointer user_data) { g_warning("Lost the name %s\n", name); exit(1); } // Set up Dbus server with GDBus int intel_dbus_server_init(gboolean (*exit_handler)(void)) { PrefObject *value_obj; intel_lpmd_dbus_exit_callback = exit_handler; value_obj = PREF_OBJECT(g_object_new(PREF_TYPE_OBJECT, NULL)); if (value_obj == NULL) { lpmd_log_error("Failed to create one Value instance:\n"); return LPMD_FATAL_ERROR; } interface_vtable.method_call = lpmd_dbus_handle_method_call; interface_vtable.get_property = lpmd_dbus_handle_get_property; interface_vtable.set_property = lpmd_dbus_handle_set_property; watcher_id = g_bus_own_name(G_BUS_TYPE_SYSTEM, "org.freedesktop.intel_lpmd", G_BUS_NAME_OWNER_FLAGS_REPLACE, lpmd_dbus_on_bus_acquired, lpmd_dbus_on_name_acquired, lpmd_dbus_on_name_lost, g_object_ref(value_obj), NULL); return LPMD_SUCCESS; } #pragma GCC diagnostic pop intel-lpmd-0.0.9/src/lpmd_helpers.c000066400000000000000000000135461477072336600172260ustar00rootroot00000000000000/* * lpmd_helper.c: helper functions * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include #include #include #include #include #include #include "lpmd.h" static int _write_str(const char *name, char *str, int print_level, int log_level, const char *mode) { FILE *filep; char prefix[16]; int i, ret; if (print_level >= 15) return 1; if (print_level <= 0) { prefix[0] = '\0'; } else { for (i = 0; i < print_level; i++) prefix[i] = '\t'; prefix[i] = '\0'; } filep = fopen (name, mode); if (!filep) { lpmd_log_error ("%sOpen %s failed\n", prefix, name); return 1; } ret = fprintf (filep, "%s", str); if (ret <= 0) { lpmd_log_error ("%sWrite \"%s\" to %s failed, strlen %zu, ret %d\n", prefix, str, name, strlen (str), ret); fclose (filep); return 1; } switch (print_level) { case LPMD_LOG_INFO: lpmd_log_info ("%sWrite \"%s\" to %s\n", prefix, str, name); break; case LPMD_LOG_DEBUG: lpmd_log_debug ("%sWrite \"%s\" to %s\n", prefix, str, name); break; case LPMD_LOG_MSG: lpmd_log_msg ("%sWrite \"%s\" to %s\n", prefix, str, name); break; default: break; } fclose (filep); return 0; } int lpmd_write_str(const char *name, char *str, int print_level) { if (!name || !str) return 0; return _write_str (name, str, print_level, 2, "r+"); } int lpmd_write_str_append(const char *name, char *str, int print_level) { if (!name || !str) return 0; return _write_str (name, str, print_level, 2, "a+"); } int lpmd_write_str_verbose(const char *name, char *str, int print_level) { if (!name || !str) return 0; return _write_str (name, str, print_level, 3, "r+"); } int lpmd_write_int(const char *name, int val, int print_level) { FILE *filep; char prefix[16]; int i, ret; struct timespec tp1 = { }, tp2 = { }; if (!name) return 1; clock_gettime (CLOCK_MONOTONIC, &tp1); if (print_level >= 15) return 1; if (print_level < 0) { prefix[0] = '\0'; } else { for (i = 0; i < print_level; i++) prefix[i] = '\t'; prefix[i] = '\0'; } filep = fopen (name, "r+"); if (!filep) { lpmd_log_error ("%sOpen %s failed\n", prefix, name); return 1; } ret = fprintf (filep, "%d", val); if (ret <= 0) { lpmd_log_error ("%sWrite \"%d\" to %s failed, ret %d\n", prefix, val, name, ret); fclose (filep); return 1; } clock_gettime (CLOCK_MONOTONIC, &tp2); switch (print_level) { case LPMD_LOG_INFO: lpmd_log_info ("%sWrite \"%d\" to %s (%lu ns)\n", prefix, val, name, 1000000000 * (tp2.tv_sec - tp1.tv_sec) + tp2.tv_nsec - tp1.tv_nsec); break; case LPMD_LOG_DEBUG: lpmd_log_debug ("%sWrite \"%d\" to %s (%lu ns)\n", prefix, val, name, 1000000000 * (tp2.tv_sec - tp1.tv_sec) + tp2.tv_nsec - tp1.tv_nsec); break; case LPMD_LOG_MSG: lpmd_log_msg ("%sWrite \"%d\" to %s (%lu ns)\n", prefix, val, name, 1000000000 * (tp2.tv_sec - tp1.tv_sec) + tp2.tv_nsec - tp1.tv_nsec); break; default: break; } fclose (filep); return 0; } int lpmd_read_int(const char *name, int *val, int print_level) { FILE *filep; char prefix[16]; int i, t, ret; if (!name || !val) return 1; if (print_level >= 15) return 1; if (print_level < 0) { prefix[0] = '\0'; } else { for (i = 0; i < print_level; i++) prefix[i] = '\t'; prefix[i] = '\0'; } filep = fopen (name, "r"); if (!filep) { lpmd_log_error ("%sOpen %s failed\n", prefix, name); return 1; } ret = fscanf (filep, "%d", &t); if (ret != 1) { lpmd_log_error ("%sRead %s failed, ret %d\n", prefix, name, ret); fclose (filep); return 1; } fclose (filep); *val = t; if (print_level >= 0) lpmd_log_debug ("%sRead \"%d\" from %s\n", prefix, *val, name); return 0; } /* * lpmd_open does not require print on success * print_level: -1: don't print on error */ int lpmd_open(const char *name, int print_level) { FILE *filep; char prefix[16]; int i; if (!name) return 1; if (print_level >= 15) return 1; if (print_level < 0) { prefix[0] = '\0'; } else { for (i = 0; i < print_level; i++) prefix[i] = '\t'; prefix[i] = '\0'; } filep = fopen (name, "r"); if (!filep) { if (print_level >= 0) lpmd_log_error ("%sOpen %s failed\n", prefix, name); return 1; } fclose (filep); return 0; } char* get_time(void) { static time_t time_cur; time_cur = time (NULL); return ctime (&time_cur); } static struct timespec timespec; static char time_buf[MAX_STR_LENGTH]; void time_start(void) { clock_gettime (CLOCK_MONOTONIC, ×pec); } char* time_delta(void) { static struct timespec tp1; clock_gettime (CLOCK_MONOTONIC, &tp1); snprintf (time_buf, MAX_STR_LENGTH, "%ld ns", 1000000000 * (tp1.tv_sec - timespec.tv_sec) + tp1.tv_nsec - timespec.tv_nsec); memset (×pec, 0, sizeof(timespec)); return time_buf; } uint64_t read_msr(int cpu, uint32_t msr) { char msr_file_name[64]; int fd; uint64_t value; snprintf(msr_file_name, sizeof(msr_file_name), "/dev/cpu/%d/msr", cpu); fd = open(msr_file_name, O_RDONLY); if (fd < 0) return UINT64_MAX; if (pread(fd, &value, sizeof(value), msr) != sizeof(value)) { close(fd); return UINT64_MAX; } close(fd); return value; } intel-lpmd-0.0.9/src/lpmd_hfi.c000066400000000000000000000234461477072336600163320ustar00rootroot00000000000000/* * lpmd_hfi.c: intel_lpmd HFI monitor * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file processes HFI messages from the firmware. When the EE column for * a CPU is 255, that CPU will be in allowed list to run all thread. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "thermal.h" #include "lpmd.h" struct hfi_event_data { struct nl_sock *nl_handle; struct nl_cb *nl_cb; }; struct hfi_event_data drv; static int ack_handler(struct nl_msg *msg, void *arg) { int *err = arg; *err = 0; return NL_STOP; } static int finish_handler(struct nl_msg *msg, void *arg) { int *ret = arg; *ret = 0; return NL_SKIP; } static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, void *arg) { int *ret = arg; *ret = err->error; return NL_SKIP; } static int seq_check_handler(struct nl_msg *msg, void *arg) { return NL_OK; } static int send_and_recv_msgs(struct hfi_event_data *drv, struct nl_msg *msg, int (*valid_handler)(struct nl_msg*, void*), void *valid_data) { struct nl_cb *cb; int err = -ENOMEM; cb = nl_cb_clone (drv->nl_cb); if (!cb) goto out; err = nl_send_auto_complete (drv->nl_handle, msg); if (err < 0) goto out; err = 1; nl_cb_err (cb, NL_CB_CUSTOM, error_handler, &err); nl_cb_set (cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err); nl_cb_set (cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err); if (valid_handler) nl_cb_set (cb, NL_CB_VALID, NL_CB_CUSTOM, valid_handler, valid_data); while (err > 0) err = nl_recvmsgs (drv->nl_handle, cb); out: nl_cb_put (cb); nlmsg_free (msg); return err; } struct family_data { const char *group; int id; }; static int family_handler(struct nl_msg *msg, void *arg) { struct family_data *res = arg; struct nlattr *tb[CTRL_ATTR_MAX + 1]; struct genlmsghdr *gnlh = nlmsg_data (nlmsg_hdr (msg)); struct nlattr *mcgrp; int i; nla_parse (tb, CTRL_ATTR_MAX, genlmsg_attrdata (gnlh, 0), genlmsg_attrlen (gnlh, 0), NULL); if (!tb[CTRL_ATTR_MCAST_GROUPS]) return NL_SKIP; nla_for_each_nested (mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) { struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1]; nla_parse (tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data (mcgrp), nla_len (mcgrp), NULL); if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] || !tb2[CTRL_ATTR_MCAST_GRP_ID] || strncmp (nla_data (tb2[CTRL_ATTR_MCAST_GRP_NAME]), res->group, nla_len (tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0) continue; res->id = nla_get_u32 (tb2[CTRL_ATTR_MCAST_GRP_ID]); break; }; return 0; } static int nl_get_multicast_id(struct hfi_event_data *drv, const char *family, const char *group) { struct nl_msg *msg; int ret = -1; struct family_data res = { group, -ENOENT }; msg = nlmsg_alloc (); if (!msg) return -ENOMEM; genlmsg_put (msg, 0, 0, genl_ctrl_resolve (drv->nl_handle, "nlctrl"), 0, 0, CTRL_CMD_GETFAMILY, 0); NLA_PUT_STRING (msg, CTRL_ATTR_FAMILY_NAME, family); ret = send_and_recv_msgs (drv, msg, family_handler, &res); msg = NULL; if (ret == 0) ret = res.id; nla_put_failure: nlmsg_free (msg); return ret; } /* Process HFI event */ struct perf_cap { int cpu; int perf; int eff; }; static int suv_bit_set(void) { // Depends on kernel patch to export kernel knobs for this return 0; } /* * Detect different kinds of CPU HFI hint * "LPM". EFF == 255 * "SUV". PERF == EFF == 0, suv bit set. * "BAN". PERF == EFF == 0, suv bit not set. * "NOR". */ static char *update_one_cpu(struct perf_cap *perf_cap) { if (perf_cap->cpu < 0) return NULL; if (!perf_cap->cpu) { reset_cpus (CPUMASK_HFI); reset_cpus (CPUMASK_HFI_BANNED); } if (perf_cap->eff == 255 * 4 && has_hfi_lpm_monitor ()) { add_cpu (perf_cap->cpu, CPUMASK_HFI); return "LPM"; } if (!perf_cap->perf && !perf_cap->eff && has_hfi_suv_monitor () && suv_bit_set ()) { add_cpu (perf_cap->cpu, CPUMASK_HFI_SUV); return "SUV"; } if (!perf_cap->perf && !perf_cap->eff) { add_cpu (perf_cap->cpu, CPUMASK_HFI_BANNED); return "BAN"; } return "NOR"; } static void process_one_event(int first, int last, int nr) { /* Need to update more CPUs */ if (nr == 16 && last != get_max_online_cpu ()) return; if (has_cpus (CPUMASK_HFI)) { /* Ignore duplicate event */ if (is_equal (CPUMASK_HFI_LAST, CPUMASK_HFI )) { lpmd_log_debug ("\tDuplicated HFI LPM hints ignored\n\n"); return; } if (in_hfi_lpm ()) { lpmd_log_debug ("\tUpdate HFI LPM event\n\n"); } else { lpmd_log_debug ("\tDetect HFI LPM event\n"); } process_lpm (HFI_ENTER); reset_cpus (CPUMASK_HFI_LAST); copy_cpu_mask(CPUMASK_HFI, CPUMASK_HFI_LAST); } else if (has_cpus (CPUMASK_HFI_SUV)) { if (in_suv_lpm ()) { lpmd_log_debug ("\tUpdate HFI SUV event\n\n"); } else { lpmd_log_debug ("\tDetect HFI SUV event\n"); } // TODO: SUV re-enter is not supported for now process_suv_mode (HFI_SUV_ENTER); } else if (has_cpus (CPUMASK_HFI_BANNED)) { copy_cpu_mask_exclude(CPUMASK_ONLINE, CPUMASK_HFI, CPUMASK_HFI_BANNED); /* Ignore duplicate event */ if (is_equal (CPUMASK_HFI_LAST, CPUMASK_HFI )) { lpmd_log_debug ("\tDuplicated HFI BANNED hints ignored\n\n"); return; } if (in_hfi_lpm ()) { lpmd_log_debug ("\tUpdate HFI LPM event with banned CPUs\n\n"); } else { lpmd_log_debug ("\tDetect HFI LPM event with banned CPUs\n"); } process_lpm (HFI_ENTER); reset_cpus (CPUMASK_HFI_LAST); copy_cpu_mask(CPUMASK_HFI, CPUMASK_HFI_LAST); } else if (in_hfi_lpm ()) { lpmd_log_debug ("\tHFI LPM recover\n"); // Don't override the DETECT_LPM_CPU_DEFAULT so it is auto recovered process_lpm (HFI_EXIT); reset_cpus (CPUMASK_HFI_LAST); } else if (in_suv_lpm ()) { lpmd_log_debug ("\tHFI SUV recover\n"); // Don't override the DETECT_LPM_CPU_DEFAULT so it is auto recovered process_suv_mode (HFI_SUV_EXIT); } else { lpmd_log_info ("\t\t\tUnsupported HFI event ignored\n"); } } static int handle_event(struct nl_msg *n, void *arg) { struct nlmsghdr *nlh = nlmsg_hdr (n); struct genlmsghdr *genlhdr = genlmsg_hdr (nlh); struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; struct nlattr *cap; struct perf_cap perf_cap; int first_cpu = -1, last_cpu = -1, nr_cpus = 0; int j, index = 0, offset = 0; char buf[MAX_STR_LENGTH]; if (!in_auto_mode()) return 0; if (genlhdr->cmd != THERMAL_GENL_EVENT_CAPACITY_CHANGE) return 0; if (genlmsg_parse (nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL)) return -1; perf_cap.cpu = perf_cap.perf = perf_cap.eff = -1; nla_for_each_nested (cap, attrs[THERMAL_GENL_ATTR_CAPACITY], j) { switch (index) { case 0: offset += snprintf (buf + offset, MAX_STR_LENGTH - offset, "\tCPU %3d: ", nla_get_u32 (cap)); perf_cap.cpu = nla_get_u32 (cap); break; case 1: offset += snprintf (buf + offset, MAX_STR_LENGTH - offset, " PERF [%4d] ", nla_get_u32 (cap)); perf_cap.perf = nla_get_u32 (cap); break; case 2: offset += snprintf (buf + offset, MAX_STR_LENGTH - offset, " EFF [%4d] ", nla_get_u32 (cap)); perf_cap.eff = nla_get_u32 (cap); break; default: break; } index++; if (index == 3) { char *str; str = update_one_cpu (&perf_cap); offset += snprintf (buf + offset, MAX_STR_LENGTH - offset, " TYPE [%s]", str); buf[MAX_STR_LENGTH - 1] = '\0'; lpmd_log_debug ("\t\t\t%s\n", buf); index = 0; offset = 0; if (first_cpu == -1) first_cpu = perf_cap.cpu; last_cpu = perf_cap.cpu; nr_cpus++; } } process_one_event (first_cpu, last_cpu, nr_cpus); return 0; } static int done = 0; int hfi_kill(void) { nl_socket_free (drv.nl_handle); done = 1; return 0; } void hfi_receive(void) { int err = 0; while (!err) err = nl_recvmsgs (drv.nl_handle, drv.nl_cb); } int hfi_init(void) { struct nl_sock *sock; struct nl_cb *cb; int mcast_id; reset_cpus (CPUMASK_HFI_LAST); signal (SIGPIPE, SIG_IGN); sock = nl_socket_alloc (); if (!sock) { lpmd_log_error ("nl_socket_alloc failed\n"); goto err_proc; } if (genl_connect (sock)) { lpmd_log_error ("genl_connect(sk_event) failed\n"); goto err_proc; } drv.nl_handle = sock; drv.nl_cb = cb = nl_cb_alloc (NL_CB_DEFAULT); if (drv.nl_cb == NULL) { lpmd_log_error ("Failed to allocate netlink callbacks"); goto err_proc; } mcast_id = nl_get_multicast_id (&drv, THERMAL_GENL_FAMILY_NAME, THERMAL_GENL_EVENT_GROUP_NAME); if (mcast_id < 0) { lpmd_log_error ("nl_get_multicast_id failed\n"); goto err_proc; } if (nl_socket_add_membership (sock, mcast_id)) { lpmd_log_error ("nl_socket_add_membership failed"); goto err_proc; } nl_cb_set (cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, &done); nl_cb_set (cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL); nl_socket_set_nonblocking (sock); if (drv.nl_handle) return nl_socket_get_fd (drv.nl_handle); err_proc: return -1; } intel-lpmd-0.0.9/src/lpmd_irq.c000066400000000000000000000155021477072336600163510ustar00rootroot00000000000000/* * irq.c: irq related processing * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "lpmd.h" char *lp_mode_irq_str; static char irq_socket_name[64]; static int irqbalance_pid = -1; #define MAX_IRQS 128 struct info_irq { int irq; char affinity[MAX_STR_LENGTH]; }; struct info_irqs { /* Cached IRQ smp_affinity info */ int nr_irqs; struct info_irq irq[MAX_IRQS]; }; struct info_irqs info_irqs; struct info_irqs *info = &info_irqs; static char irq_str[MAX_STR_LENGTH]; static int lp_mode_irq; int set_lpm_irq(cpu_set_t *cpumask, int action) { lp_mode_irq = action; if (lp_mode_irq == SETTING_IGNORE) return 0; if (irqbalance_pid > 0) { if (lp_mode_irq == SETTING_RESTORE) snprintf(irq_str, sizeof("NULL"), "NULL"); else { cpumask_to_str_reverse(cpumask, irq_str, MAX_STR_LENGTH); if (irq_str[0] == '\0') snprintf(irq_str, sizeof("NULL"), "NULL"); } } else { if (lp_mode_irq != SETTING_RESTORE) cpumask_to_hexstr(cpumask, irq_str, MAX_STR_LENGTH); } return 0; } /* Interrupt Management */ #define SOCKET_PATH "irqbalance" #define SOCKET_TMPFS "/run/irqbalance" static int irqbalance_ban_cpus(int enter) { char socket_cmd[MAX_STR_LENGTH]; struct timespec tp1, tp2; int offset; if (lp_mode_irq == SETTING_RESTORE) lpmd_log_debug ("\tRestore IRQ affinity (irqbalance)\n"); else lpmd_log_debug ("\tUpdate IRQ affinity (irqbalance)\n"); offset = snprintf (socket_cmd, MAX_STR_LENGTH, "settings cpus %s", irq_str); if (offset >= MAX_STR_LENGTH) offset = MAX_STR_LENGTH - 1; socket_cmd[offset] = '\0'; clock_gettime (CLOCK_MONOTONIC, &tp1); socket_send_cmd (irq_socket_name, socket_cmd); clock_gettime (CLOCK_MONOTONIC, &tp2); lpmd_log_debug ("\tSend socket command %s (%lu ns)\n", socket_cmd, 1000000000UL * (tp2.tv_sec - tp1.tv_sec) + tp2.tv_nsec - tp1.tv_nsec); return 0; } static int native_restore_irqs(void) { char path[MAX_STR_LENGTH]; int i; lpmd_log_debug ("\tRestore IRQ affinity (native)\n"); for (i = 0; i < info->nr_irqs; i++) { char *str = info->irq[i].affinity; snprintf (path, MAX_STR_LENGTH, "/proc/irq/%i/smp_affinity", info->irq[i].irq); lpmd_write_str (path, str, LPMD_LOG_DEBUG); } memset (info, 0, sizeof(*info)); return 0; } static int irq_updated; static int update_one_irq(int irq) { FILE *filep; size_t size = 0; char path[MAX_STR_LENGTH]; char *str = NULL; if (info->nr_irqs >= (MAX_IRQS - 1)) { lpmd_log_error ("Too many IRQs\n"); return -1; } snprintf (path, MAX_STR_LENGTH, "/proc/irq/%i/smp_affinity", irq); if (!irq_updated) { info->irq[info->nr_irqs].irq = irq; filep = fopen (path, "r"); if (!filep) return -1; if (getline (&str, &size, filep) <= 0) { lpmd_log_error ("Failed to get IRQ%d smp_affinity\n", irq); free (str); fclose (filep); return -1; } fclose (filep); snprintf (info->irq[info->nr_irqs].affinity, MAX_STR_LENGTH, "%s", str); free (str); /* Remove the Newline */ size = strnlen (info->irq[info->nr_irqs].affinity, MAX_STR_LENGTH); info->irq[info->nr_irqs].affinity[size - 1] = '\0'; info->nr_irqs++; } return lpmd_write_str (path, irq_str, LPMD_LOG_DEBUG); } static int native_update_irqs(void) { FILE *filep; char *line = NULL; size_t size = 0; lpmd_log_debug ("\tUpdate IRQ affinity (native)\n"); filep = fopen ("/proc/interrupts", "r"); if (!filep) { perror ("Error open /proc/interrupts\n"); return -1; } /* first line is the header we don't need; nuke it */ if (getline (&line, &size, filep) <= 0) { perror ("Error getline\n"); free (line); fclose (filep); return -1; } free (line); while (!feof (filep)) { int number; char *c; line = NULL; size = 0; if (getline (&line, &size, filep) <= 0) { free (line); break; } /* lines with letters in front are special, like NMI count. Ignore */ c = line; while (isblank(*(c))) c++; if (!isdigit(*c)) { free (line); break; } c = strchr (line, ':'); if (!c) { free (line); continue; } *c = 0; number = strtoul (line, NULL, 10); update_one_irq (number); free (line); } fclose (filep); irq_updated = 1; return 0; } static int native_process_irqs(int enter) { if (lp_mode_irq == SETTING_RESTORE) return native_restore_irqs (); else return native_update_irqs (); } int process_irqs(int enter, enum lpm_cpu_process_mode mode) { /* No need to handle IRQs in offline mode */ if (mode == LPM_CPU_OFFLINE) return 0; if (lp_mode_irq == SETTING_IGNORE) { lpmd_log_info ("Ignore IRQ migration\n"); return 0; } lpmd_log_info ("Process IRQs ...\n"); if (irqbalance_pid == -1) return native_process_irqs (enter); else return irqbalance_ban_cpus (enter); } int init_irq(void) { DIR *dir; int socket_fd; int ret; lpmd_log_info ("Detecting IRQs ...\n"); dir = opendir ("/run/irqbalance"); if (dir) { struct dirent *entry; do { entry = readdir (dir); if (entry) { if (!strncmp (entry->d_name, "irqbalance", 10)) { ret = sscanf (entry->d_name, "irqbalance%d.sock", &irqbalance_pid); if (!ret) irqbalance_pid = -1; } } } while ((entry) && (irqbalance_pid == -1)); closedir (dir); } if (irqbalance_pid == -1) { lpmd_log_info ("\tirqbalance not running, run in native mode\n"); return LPMD_SUCCESS; } snprintf (irq_socket_name, 64, "%s/%s%d.sock", SOCKET_TMPFS, SOCKET_PATH, irqbalance_pid); socket_fd = socket_init_connection (irq_socket_name); if (socket_fd < 0) { lpmd_log_error ("Can not connect to irqbalance socket /run/irqbalance/irqbalance%d.sock\n", irqbalance_pid); return LPMD_ERROR; } close (socket_fd); lpmd_log_info ("\tFind irqbalance socket %s\n", irq_socket_name); return LPMD_SUCCESS; } intel-lpmd-0.0.9/src/lpmd_main.c000066400000000000000000000212721477072336600165030ustar00rootroot00000000000000/* * lpmd_main.c: Intel Low Power Daemon main entry point * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This source file contains main() function, which parses command line * option. Call lpmd init function. Provide logging support. * Also allow to daemonize. */ #include #include #include #include #include "lpmd.h" #if !defined(INTEL_LPMD_DIST_VERSION) #define INTEL_LPMD_DIST_VERSION PACKAGE_VERSION #endif #define EXIT_UNSUPPORTED 2 extern int intel_lpmd_dbus_server_init(gboolean (*exit_handler)(void)); // Lock file static int lock_file_handle = -1; static const char *lock_file = TDRUNDIR "/intel_lpmd.pid"; // Default log level static int lpmd_log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE; int in_debug_mode(void) { return !!(lpmd_log_level & G_LOG_LEVEL_DEBUG); } // Daemonize or not static gboolean intel_lpmd_daemonize; static gboolean use_syslog; // Disable dbus static gboolean dbus_enable; static gboolean ignore_platform_check = FALSE; int do_platform_check(void) { if (ignore_platform_check) return 0; return 1; } static GMainLoop *g_main_loop; #ifdef GDBUS gint watcher_id = 0; #endif // g_log handler. All logs will be directed here static void intel_lpmd_logger(const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer user_data) { if (!(lpmd_log_level & log_level)) return; int syslog_priority; const char *prefix; time_t seconds; switch (log_level) { case G_LOG_LEVEL_ERROR: prefix = "[CRIT]"; syslog_priority = LOG_CRIT; break; case G_LOG_LEVEL_CRITICAL: prefix = "[ERR]"; syslog_priority = LOG_ERR; break; case G_LOG_LEVEL_WARNING: prefix = "[WARN]"; syslog_priority = LOG_WARNING; break; case G_LOG_LEVEL_MESSAGE: prefix = "[MSG]"; syslog_priority = LOG_NOTICE; break; case G_LOG_LEVEL_DEBUG: prefix = "[DEBUG]"; syslog_priority = LOG_DEBUG; break; case G_LOG_LEVEL_INFO: default: prefix = "[INFO]"; syslog_priority = LOG_INFO; break; } seconds = time (NULL); if (use_syslog) syslog (syslog_priority, "%s", message); else g_print ("[%lld]%s%s", (long long) seconds, prefix, message); } static void clean_up_lockfile(void) { if (lock_file_handle != -1) { (void) close (lock_file_handle); (void) unlink (lock_file); } } static bool check_intel_lpmd_running(void) { lock_file_handle = open (lock_file, O_RDWR | O_CREAT, 0600); if (lock_file_handle == -1) { // Couldn't open lock file lpmd_log_error ("Could not open PID lock file %s, exiting\n", lock_file); return false; } // Try to lock file if (lockf (lock_file_handle, F_TLOCK, 0) == -1) { // Couldn't get lock on lock file lpmd_log_error ("Couldn't get lock file %d\n", getpid ()); close (lock_file_handle); return true; } return false; } // SIGTERM & SIGINT handler static gboolean sig_int_handler(void) { // Call terminate function lpmd_terminate (); sleep (1); if (g_main_loop) g_main_loop_quit (g_main_loop); // Clean up if any clean_up_lockfile (); exit (EXIT_SUCCESS); return FALSE; } int main(int argc, char *argv[]) { gboolean show_version = FALSE; gboolean log_info = FALSE; gboolean log_debug = FALSE; gboolean no_daemon = FALSE; gboolean systemd = FALSE; gboolean success; GOptionContext *opt_ctx; int ret; intel_lpmd_daemonize = TRUE; use_syslog = TRUE; dbus_enable = FALSE; GOptionEntry options[] = { { "version", 0, 0, G_OPTION_ARG_NONE, &show_version, N_ ("Print intel_lpmd version and exit"), NULL }, { "no-daemon", 0, 0, G_OPTION_ARG_NONE, &no_daemon, N_ ("Don't become a daemon: Default is daemon mode"), NULL }, { "systemd", 0, 0, G_OPTION_ARG_NONE, &systemd, N_ ("Assume daemon is started by systemd, always run in non-daemon mode when using this parameter"), NULL }, { "loglevel=info", 0, 0, G_OPTION_ARG_NONE, &log_info, N_ ("Log severity: info level and up"), NULL }, { "loglevel=debug", 0, 0, G_OPTION_ARG_NONE, &log_debug, N_ ("Log severity: debug level and up: Max logging"), NULL }, { "dbus-enable", 0, 0, G_OPTION_ARG_NONE, &dbus_enable, N_ ( "Enable Dbus"), NULL }, { "ignore-platform-check", 0, 0, G_OPTION_ARG_NONE, &ignore_platform_check, N_ ( "Ignore platform check"), NULL }, { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL } }; if (!g_module_supported ()) { fprintf (stderr, "GModules are not supported on your platform!\n"); exit (EXIT_FAILURE); } // Set locale to be able to use environment variables setlocale (LC_ALL, ""); bindtextdomain (GETTEXT_PACKAGE, TDLOCALEDIR); bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8"); textdomain (GETTEXT_PACKAGE); // Parse options opt_ctx = g_option_context_new (NULL); g_option_context_set_translation_domain (opt_ctx, GETTEXT_PACKAGE); g_option_context_set_ignore_unknown_options (opt_ctx, FALSE); g_option_context_set_help_enabled (opt_ctx, TRUE); g_option_context_add_main_entries (opt_ctx, options, NULL); g_option_context_set_summary (opt_ctx, "Intel Energy Optimizer (LPMD) Daemon based on system usage takes action " "to improve energy efficiency the system.\n\n" "Copyright (c) 2024, Intel Corporation\n" "This program comes with ABSOLUTELY NO WARRANTY.\n" "This work is licensed under GPL v2.\n\n" "Use \"man intel_lpmd\" to get more details."); success = g_option_context_parse (opt_ctx, &argc, &argv, NULL); g_option_context_free (opt_ctx); if (!success) { fprintf (stderr, "Invalid option. Please use --help to see a list of valid options.\n"); exit (EXIT_FAILURE); } if (show_version) { fprintf (stdout, INTEL_LPMD_DIST_VERSION "\n"); exit (EXIT_SUCCESS); } if (getuid () != 0) { fprintf (stderr, "You must be root to run intel_lpmd!\n"); exit (EXIT_FAILURE); } if (g_mkdir_with_parents (TDRUNDIR, 0755) != 0) { fprintf (stderr, "Cannot create '%s': %s", TDRUNDIR, strerror (errno)); exit (EXIT_FAILURE); } if (g_mkdir_with_parents (TDCONFDIR, 0755) != 0) { fprintf (stderr, "Cannot create '%s': %s", TDCONFDIR, strerror (errno)); exit (EXIT_FAILURE); } if (log_info) { lpmd_log_level |= G_LOG_LEVEL_INFO; } if (log_debug) { lpmd_log_level |= G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG; } openlog ("intel_lpmd", LOG_PID, LOG_USER | LOG_DAEMON | LOG_SYSLOG); // Don't care return val intel_lpmd_daemonize = !no_daemon && !systemd; use_syslog = !no_daemon || systemd; g_log_set_handler (NULL, G_LOG_LEVEL_MASK, intel_lpmd_logger, NULL); if (check_intel_lpmd_running ()) { lpmd_log_error ("An instance of intel_lpmd is already running, exiting ...\n"); exit (EXIT_FAILURE); } if (!intel_lpmd_daemonize) { g_unix_signal_add (SIGINT, G_SOURCE_FUNC (sig_int_handler), NULL); g_unix_signal_add (SIGTERM, G_SOURCE_FUNC (sig_int_handler), NULL); } /* * Initialize the GType/GObject system * Since GLib 2.36, the type system is initialised automatically and this function * does nothing. Deprecated since: 2.36 */ g_type_init (); // Create a main loop that will dispatch callbacks g_main_loop = g_main_loop_new (NULL, FALSE); if (g_main_loop == NULL) { clean_up_lockfile (); lpmd_log_error ("Couldn't create GMainLoop:\n"); return LPMD_FATAL_ERROR; } if (intel_lpmd_daemonize) { printf ("Ready to serve requests: Daemonizing..\n"); lpmd_log_info ("intel_lpmd ver %s: Ready to serve requests: Daemonizing..\n", INTEL_LPMD_DIST_VERSION); if (daemon (0, 0) != 0) { clean_up_lockfile (); lpmd_log_error ("Failed to daemonize.\n"); return LPMD_FATAL_ERROR; } } if (dbus_enable) intel_dbus_server_init (sig_int_handler); ret = lpmd_main (); if (ret != LPMD_SUCCESS) { clean_up_lockfile (); closelog (); if (ret == LPMD_ERROR) exit (EXIT_UNSUPPORTED); else exit (EXIT_FAILURE); } // Start service requests on the D-Bus lpmd_log_debug ("Start main loop\n"); g_main_loop_run (g_main_loop); lpmd_log_warn ("Oops g main loop exit..\n"); #ifdef GDBUS g_bus_unwatch_name (watcher_id); #endif fprintf (stdout, "Exiting ..\n"); clean_up_lockfile (); closelog (); } intel-lpmd-0.0.9/src/lpmd_proc.c000066400000000000000000000626211477072336600165250ustar00rootroot00000000000000/* * lpmd_proc.c: Intel Low Power Daemon core processing * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains the main LPMD thread and poll loop. Call correct * processing function on receiving user or system command. */ #include "lpmd.h" #include #include "wlt_proxy.h" static lpmd_config_t lpmd_config; char *lpm_cmd_str[LPM_CMD_MAX] = { [USER_ENTER] = "usr enter", [USER_EXIT] = "usr exit", [USER_AUTO] = "usr auto", [HFI_ENTER] = "hfi enter", [HFI_EXIT] = "hfi exit", [UTIL_ENTER] = "utl enter", [UTIL_EXIT] = "utl exit", }; static int in_low_power_mode = 0; static UpClient *upower_client; static pthread_mutex_t lpmd_mutex; int lpmd_lock(void) { return pthread_mutex_lock (&lpmd_mutex); } int lpmd_unlock(void) { return pthread_mutex_unlock (&lpmd_mutex); } /* * It may take a relatively long time to enter/exit low power mode. * Hold lpmd_lock to make sure there is no state change ongoing. */ int in_lpm(void) { int ret; lpmd_lock (); ret = in_low_power_mode; lpmd_unlock (); return !!ret; } /* Can be configurable */ int get_idle_percentage(void) { return 90; } /* Can be configurable */ int get_idle_duration(void) { return -1; } int get_cpu_mode(void) { return lpmd_config.mode; } static int has_hfi_capability(void) { unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; cpuid(6, eax, ebx, ecx, edx); if (eax & (1 << 19)) { lpmd_log_info("HFI capability detected\n"); return 1; } return 0; } int has_hfi_lpm_monitor(void) { return !!lpmd_config.hfi_lpm_enable; } int has_hfi_suv_monitor(void) { return !!lpmd_config.hfi_suv_enable; } int has_util_monitor(void) { return !!lpmd_config.util_enable; } int get_util_entry_interval(void) { return lpmd_config.util_entry_delay; } int get_util_exit_interval(void) { return lpmd_config.util_exit_delay; } int get_util_entry_threshold(void) { return lpmd_config.util_entry_threshold; } int get_util_exit_threshold(void) { return lpmd_config.util_exit_threshold; } int get_util_entry_hyst(void) { return lpmd_config.util_entry_hyst; } int get_util_exit_hyst(void) { return lpmd_config.util_exit_hyst; } /* ITMT Management */ #define PATH_ITMT_CONTROL "/proc/sys/kernel/sched_itmt_enabled" static int saved_itmt = SETTING_IGNORE; static int lp_mode_itmt = SETTING_IGNORE; int get_lpm_itmt(void) { return lp_mode_itmt; } void set_lpm_itmt(int val) { lp_mode_itmt = val; } int get_itmt(void) { int val; lpmd_read_int(PATH_ITMT_CONTROL, &val, -1); return val; } static int init_itmt(void) { return lpmd_read_int(PATH_ITMT_CONTROL, &saved_itmt, -1); } static int process_itmt(void) { if (lp_mode_itmt == SETTING_RESTORE) lp_mode_itmt = saved_itmt; if (lp_mode_itmt == SETTING_IGNORE) { lpmd_log_debug("Ignore ITMT\n"); return 0; } lpmd_log_debug ("%s ITMT\n", lp_mode_itmt ? "Enable" : "Disable"); return lpmd_write_int(PATH_ITMT_CONTROL, lp_mode_itmt, -1); } /* Main functions */ enum lpm_state { LPM_USER_ON = 1 << 0, LPM_USER_OFF = 1 << 1, LPM_SUV_ON = 1 << 2, LPM_HFI_ON = 1 << 3, LPM_UTIL_ON = 1 << 4, }; /* Force off by default */ int lpm_state = LPM_USER_OFF; int in_hfi_lpm(void) { return lpm_state & LPM_HFI_ON; } int in_suv_lpm(void) { return lpm_state & LPM_SUV_ON; } int in_auto_mode() { return !(lpm_state & (LPM_USER_ON | LPM_USER_OFF)); } /* * 1: request valid and already satisfied. 0: respond valid and need to continue to process. -1: request invalid */ static int lpm_can_process(enum lpm_command cmd) { switch (cmd) { case USER_ENTER: lpm_state &= ~LPM_USER_OFF; lpm_state |= LPM_USER_ON; /* Set the flag but do not proceed when in SUV mode */ if (lpm_state & LPM_SUV_ON) return 0; return 1; case USER_EXIT: lpm_state &= ~LPM_USER_ON; lpm_state |= LPM_USER_OFF; /* Set the flag but do not proceed when in SUV mode */ if (lpm_state & LPM_SUV_ON) return 0; return 1; case USER_AUTO: lpm_state &= ~LPM_USER_ON; lpm_state &= ~LPM_USER_OFF; /* Assume the system is already in HFI_LPM so that we can handle next HFI update whatever it is */ if (has_hfi_lpm_monitor()) { lpmd_log_info("Use HFI\n"); lpm_state |= LPM_HFI_ON; } return 0; case HFI_ENTER: if (lpm_state & (LPM_USER_OFF | LPM_USER_ON)) return 0; /* Ignore HFI LPM hints when in SUV mode */ if (lpm_state & LPM_SUV_ON) return 0; lpm_state |= LPM_HFI_ON; return 1; case HFI_EXIT: lpm_state &= ~LPM_HFI_ON; if (lpm_state & LPM_USER_ON) return 0; /* Do not proceed when in SUV mode */ if (lpm_state & LPM_SUV_ON) return 0; return 1; case UTIL_ENTER: if (lpm_state & (LPM_USER_OFF)) return 0; /* Do not proceed when in SUV mode */ if (lpm_state & LPM_SUV_ON) return 0; return 1; case UTIL_EXIT: if (lpm_state & LPM_USER_ON) return 0; /* Do not proceed when in SUV mode */ if (lpm_state & LPM_SUV_ON) return 0; /* Trust HFI LPM hints over utilization monitor */ if (lpm_state & LPM_HFI_ON) return 0; return 1; /* Quit LPM because of SUV mode */ case HFI_SUV_ENTER: lpm_state &= ~LPM_HFI_ON; /* HFI SUV hints means LPM hints is invalid */ /* Fallthrough */ case DBUS_SUV_ENTER: lpm_state |= LPM_SUV_ON; return 1; /* Re-enter LPM when quitting SUV mode */ case HFI_SUV_EXIT: case DBUS_SUV_EXIT: lpm_state &= ~LPM_SUV_ON; /* Re-enter LPM because it is forced by user */ if (lpm_state & LPM_USER_ON) return 1; /* Do oppoturnistic LPM based on util/hfi requests */ return 0; default: return 1; } } static int dry_run = 0; /* Must be invoked with lpmd_lock held */ int enter_lpm(enum lpm_command cmd) { lpmd_log_debug ("Request %d (%10s). lpm_state 0x%x\n", cmd, lpm_cmd_str[cmd], lpm_state); if (!lpm_can_process (cmd)) { lpmd_log_debug ("Request stopped. lpm_state 0x%x\n", lpm_state); return 1; } if (in_low_power_mode && cmd != HFI_ENTER && cmd != UTIL_ENTER) { lpmd_log_debug ("Request skipped because the system is already in Low Power Mode ---\n"); return 0; } time_start (); if (cmd != USER_ENTER && cmd != UTIL_ENTER && cmd != HFI_ENTER) { lpmd_log_info ("Unsupported LPM reason %d\n", cmd); return 1; } lpmd_log_msg ("------ Enter Low Power Mode (%10s) --- %s", lpm_cmd_str[cmd], get_time ()); if (dry_run) { lpmd_log_debug ("----- Dry Run -----\n"); goto end; } process_itmt (); process_irqs (1, get_cpu_mode ()); process_cpus (1, get_cpu_mode ()); end: lpmd_log_info ("----- Done (%s) ---\n", time_delta ()); in_low_power_mode = 1; return 0; } /* Must be invoked with lpmd_lock held */ int exit_lpm(enum lpm_command cmd) { lpmd_log_debug ("Request %d (%10s). lpm_state 0x%x\n", cmd, lpm_cmd_str[cmd], lpm_state); if (!lpm_can_process (cmd)) { lpmd_log_debug ("Request stopped. lpm_state 0x%x\n", lpm_state); return 1; } if (!in_low_power_mode) { lpmd_log_debug ( "Request skipped because the system is already out of Low Power Mode ---\n"); return 0; } time_start (); lpmd_log_msg ("------ Exit Low Power Mode (%10s) --- %s", lpm_cmd_str[cmd], get_time ()); if (dry_run) { lpmd_log_debug ("----- Dry Run -----\n"); goto end; } process_cpus (0, get_cpu_mode ()); process_irqs (0, get_cpu_mode ()); process_itmt (); end: lpmd_log_info ("----- Done (%s) ---\n", time_delta ()); in_low_power_mode = 0; return 0; } static int lpmd_freezed = 0; /* should be invoked without lock held */ int process_lpm_unlock(enum lpm_command cmd) { int ret; if (lpmd_freezed) { lpmd_log_error("lpmd freezed, command (%s) ignored\n", lpm_cmd_str[cmd]); return 0; } switch (cmd) { case UTIL_ENTER: if (!use_config_states()) { set_lpm_epp (lpmd_config.lp_mode_epp); set_lpm_epb (SETTING_IGNORE); set_lpm_itmt (lpmd_config.ignore_itmt ? SETTING_IGNORE : 0); /* Disable ITMT */ set_lpm_irq(get_cpumask(CPUMASK_LPM_DEFAULT), 1); set_lpm_cpus (CPUMASK_LPM_DEFAULT); } ret = enter_lpm (cmd); break; case USER_ENTER: case USER_AUTO: reset_config_state(); set_lpm_epp (lpmd_config.lp_mode_epp); set_lpm_epb (SETTING_IGNORE); set_lpm_itmt (lpmd_config.ignore_itmt ? SETTING_IGNORE : 0); /* Disable ITMT */ set_lpm_irq(get_cpumask(CPUMASK_LPM_DEFAULT), 1); set_lpm_cpus (CPUMASK_LPM_DEFAULT); ret = enter_lpm (cmd); break; case HFI_SUV_EXIT: case DBUS_SUV_EXIT: set_lpm_epp (SETTING_IGNORE); set_lpm_epb (SETTING_IGNORE); set_lpm_itmt (SETTING_IGNORE); set_lpm_irq(NULL, SETTING_IGNORE); /* SUV ignores IRQ */ set_lpm_cpus (CPUMASK_HFI_SUV); ret = enter_lpm (cmd); break; case HFI_ENTER: set_lpm_epp (lpmd_config.lp_mode_epp); set_lpm_epb (SETTING_IGNORE); set_lpm_itmt (0); /* HFI always disables ITMT */ set_lpm_irq(NULL, SETTING_IGNORE); /* HFI ignores IRQ */ set_lpm_cpus (CPUMASK_HFI); ret = enter_lpm (cmd); break; /* exit_lpm does not require to invoke set_lpm_cpus() */ case USER_EXIT: case UTIL_EXIT: reset_config_state(); set_lpm_epp (SETTING_RESTORE); set_lpm_epb (SETTING_RESTORE); set_lpm_itmt (SETTING_RESTORE); set_lpm_irq(NULL, SETTING_RESTORE); ret = exit_lpm (cmd); break; case HFI_SUV_ENTER: case DBUS_SUV_ENTER: set_lpm_epp (SETTING_IGNORE); set_lpm_epb (SETTING_IGNORE); set_lpm_itmt (SETTING_IGNORE); set_lpm_irq(NULL, SETTING_IGNORE); ret = exit_lpm (cmd); break; case HFI_EXIT: set_lpm_epp (lpmd_config.lp_mode_epp == SETTING_IGNORE ? SETTING_IGNORE : SETTING_RESTORE); set_lpm_epb (SETTING_IGNORE); set_lpm_itmt (SETTING_RESTORE); /* Restore ITMT */ set_lpm_irq(NULL, SETTING_IGNORE); /* HFI ignores IRQ */ ret = exit_lpm (cmd); break; default: ret = -1; break; } return ret; } int process_lpm(enum lpm_command cmd) { int ret; lpmd_lock (); ret = process_lpm_unlock (cmd); lpmd_unlock (); return ret; } static int saved_lpm_state = -1; int freeze_lpm(void) { lpmd_lock (); if (lpmd_freezed) goto end; if (saved_lpm_state < 0) saved_lpm_state = lpm_state & (LPM_USER_ON | LPM_USER_OFF); process_lpm_unlock (USER_EXIT); /* Set lpmd_freezed later to allow process_lpm () */ lpmd_freezed = 1; end: lpmd_unlock (); return 0; } int restore_lpm(void) { lpmd_lock (); if (!lpmd_freezed) goto end; if (saved_lpm_state >= 0) { lpm_state = saved_lpm_state; saved_lpm_state = -1; } /* Clear lpmd_freezed to allow process_lpm () */ lpmd_freezed = 0; /* Restore previous USER_* cmd */ if (lpm_state & LPM_USER_ON) { process_lpm_unlock (USER_ENTER); goto end; } if (lpm_state & LPM_USER_OFF) { process_lpm_unlock (USER_EXIT); goto end; } process_lpm_unlock (USER_AUTO); end: lpmd_unlock (); return 0; } static int proc_message(message_capsul_t *msg); static int write_pipe_fd; static void lpmd_send_message(message_name_t msg_id, int size, unsigned char *msg) { message_capsul_t msg_cap; int result; memset (&msg_cap, 0, sizeof(message_capsul_t)); msg_cap.msg_id = msg_id; msg_cap.msg_size = (size > MAX_MSG_SIZE) ? MAX_MSG_SIZE : size; if (msg) memcpy (msg_cap.msg, msg, msg_cap.msg_size); result = write (write_pipe_fd, &msg_cap, sizeof(message_capsul_t)); if (result < 0) lpmd_log_warn ("Write to pipe failed\n"); } void lpmd_terminate(void) { if (lpmd_config.wlt_proxy_enable) { wlt_proxy_uninit(); } lpmd_send_message (TERMINATE, 0, NULL); sleep (1); if (upower_client) g_clear_object(&upower_client); } void lpmd_force_on(void) { lpmd_send_message (LPM_FORCE_ON, 0, NULL); } void lpmd_force_off(void) { lpmd_send_message (LPM_FORCE_OFF, 0, NULL); } void lpmd_set_auto(void) { lpmd_send_message (LPM_AUTO, 0, NULL); } void lpmd_suv_enter(void) { lpmd_send_message (SUV_MODE_ENTER, 0, NULL); } void lpmd_suv_exit(void) { lpmd_send_message (SUV_MODE_EXIT, 0, NULL); } void lpmd_notify_hfi_event(void) { lpmd_send_message (HFI_EVENT, 0, NULL); sleep (1); } #define LPMD_NUM_OF_POLL_FDS 5 static pthread_t lpmd_core_main; static pthread_attr_t lpmd_attr; static struct pollfd poll_fds[LPMD_NUM_OF_POLL_FDS]; static int poll_fd_cnt; static int idx_pipe_fd = -1; static int idx_uevent_fd = -1; static int idx_hfi_fd = -1; static int wlt_fd; static int idx_wlt_fd = -1; // Workload type classification #define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms" #define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable" #define WORKLOAD_TYPE_INDEX_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index" #define NOTIFICATION_DELAY 100 // Clear workload type notifications static void exit_wlt() { int fd; /* Disable feature via sysfs knob */ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) return; // Disable WLT notification if (write(fd, "0\n", 2) < 0) { close (fd); return; } close(fd); } // Initialize Workload type notifications static int init_wlt() { char delay_str[64]; int fd; lpmd_log_debug ("init_wlt begin\n"); // Set notification delay fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); if (fd < 0) return fd; sprintf(delay_str, "%d\n", NOTIFICATION_DELAY); if (write(fd, delay_str, strlen(delay_str)) < 0) { close(fd); return -1; } close(fd); // Enable WLT notification fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); if (fd < 0) return fd; if (write(fd, "1\n", 2) < 0) { close(fd); return -1; } close(fd); // Open FD for workload type attribute fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY); if (fd < 0) { exit_wlt(); return fd; } lpmd_log_debug ("init_wlt end wlt fd:%d\n", fd); return fd; } // Read current Workload type static int read_wlt(int fd) { char index_str[4]; int index, ret; if (fd < 0) return WLT_INVALID; if ((lseek(fd, 0L, SEEK_SET)) < 0) return WLT_INVALID; ret = read(fd, index_str, sizeof(index_str)); if (ret <= 0) return WLT_INVALID; ret = sscanf(index_str, "%d", &index); if (ret < 0) return WLT_INVALID; lpmd_log_debug("wlt:%d\n", index); return index; } static void poll_for_wlt(int enable) { static int wlt_enabled_once = 0; lpmd_log_info("%s enable:%d\n", __func__, enable); if (wlt_fd <= 0) { if (enable) { wlt_fd = init_wlt(); if (wlt_fd < 0) return; } else { return; } } if (enable) { idx_wlt_fd = poll_fd_cnt; poll_fds[idx_wlt_fd].fd = wlt_fd; poll_fds[idx_wlt_fd].events = POLLPRI; poll_fds[idx_wlt_fd].revents = 0; if (!wlt_enabled_once) poll_fd_cnt++; wlt_enabled_once = 1; } else if (idx_wlt_fd >= 0) { poll_fds[idx_wlt_fd].fd = -1; idx_wlt_fd = -1; } } #include static GDBusProxy *power_profiles_daemon; static enum power_profile_daemon_mode ppd_mode = PPD_INVALID; int get_ppd_mode(void) { return ppd_mode; } static void power_profiles_changed_cb(void) { g_autoptr (GVariant) active_profile_v = NULL; active_profile_v = g_dbus_proxy_get_cached_property (power_profiles_daemon, "ActiveProfile"); if (active_profile_v && g_variant_is_of_type (active_profile_v, G_VARIANT_TYPE_STRING)) { const char *active_profile = g_variant_get_string (active_profile_v, NULL); lpmd_log_debug ("power_profiles_changed_cb: %s\n", active_profile); if (strcmp (active_profile, "power-saver") == 0) { ppd_mode = PPD_POWERSAVER; lpmd_send_message (lpmd_config.powersaver_def, 0, NULL); } else if (strcmp (active_profile, "performance") == 0) { ppd_mode = PPD_PERFORMANCE; lpmd_send_message (lpmd_config.performance_def, 0, NULL); } else if (strcmp (active_profile, "balanced") == 0) { ppd_mode = PPD_BALANCED; lpmd_send_message (lpmd_config.balanced_def, 0, NULL); } else { lpmd_log_warn("Ignore unsupported power profile: %s\n", active_profile); } } } static void connect_to_power_profile_daemon(void) { g_autoptr (GDBusConnection) bus = NULL; bus = g_bus_get_sync (G_BUS_TYPE_SYSTEM, NULL, NULL); if (bus) { power_profiles_daemon = g_dbus_proxy_new_sync (bus, G_DBUS_PROXY_FLAGS_DO_NOT_AUTO_START, NULL, "net.hadess.PowerProfiles", "/net/hadess/PowerProfiles", "net.hadess.PowerProfiles", NULL, NULL); if (power_profiles_daemon) { g_signal_connect_swapped (power_profiles_daemon, "g-properties-changed", (GCallback) power_profiles_changed_cb, NULL); power_profiles_changed_cb (); } else { lpmd_log_info ("Could not setup DBus watch for power-profiles-daemon"); } } } static int battery_mode; int is_on_battery(void) { return battery_mode; } static void upower_daemon_cb (UpClient *client, GParamSpec *pspec, gpointer user_data) { battery_mode = up_client_get_on_battery(upower_client); lpmd_log_info("upower event: on-battery: %d\n", battery_mode); } static void connect_to_upower_daemon(void) { GError *error = NULL; GPtrArray *devices; int i; upower_client = up_client_new_full (NULL, &error); if (upower_client == NULL) { g_warning ("Cannot connect to upowerd: %s", error->message); g_error_free (error); return; } lpmd_log_info("connected to upower daemon\n"); g_signal_connect (upower_client, "notify", G_CALLBACK (upower_daemon_cb), NULL); devices = up_client_get_devices2 (upower_client); for (i=0; i < devices->len; i++) { UpDevice *device; device = g_ptr_array_index (devices, i); g_signal_connect (device, "notify", G_CALLBACK (upower_daemon_cb), NULL); } } /* Poll time out default */ #define POLL_TIMEOUT_DEFAULT_SECONDS 1 static bool main_loop_terminate; // called from LPMD main thread to process user and system messages static int proc_message(message_capsul_t *msg) { int ret = 0; lpmd_log_debug ("Received message %d\n", msg->msg_id); switch (msg->msg_id) { case TERMINATE: lpmd_log_msg ("Terminating ...\n"); ret = -1; main_loop_terminate = true; hfi_kill (); process_lpm (USER_EXIT); break; case LPM_FORCE_ON: // Always stay in LPM mode process_lpm (USER_ENTER); break; case LPM_FORCE_OFF: // Never enter LPM mode process_lpm (USER_EXIT); break; case LPM_AUTO: // Enable oppotunistic LPM process_lpm (USER_AUTO); break; case SUV_MODE_ENTER: // Call function to enter SUV mode process_suv_mode (DBUS_SUV_ENTER); break; case SUV_MODE_EXIT: // Call function to exit SUV mode process_suv_mode (DBUS_SUV_EXIT); break; case HFI_EVENT: // Call the HFI callback from here break; default: break; } return ret; } static void dump_poll_results(int ret) { int i; lpmd_log_debug("poll_fds[]: ret %d, pipe %d, uevent %d, hfi %d, wlt %d\n", ret, idx_pipe_fd, idx_uevent_fd, idx_hfi_fd, idx_wlt_fd); for (i = 0; i < poll_fd_cnt; i++) lpmd_log_debug("poll_fds[%d]: event %d, revent %d\n", i, poll_fds[i].events, poll_fds[i].revents); } // LPMD processing thread. This is callback to pthread lpmd_core_main static void* lpmd_core_main_loop(void *arg) { int interval = -1, n; for (;;) { if (main_loop_terminate) break; // Opportunistic LPM is disabled in below cases if ((lpm_state & (LPM_USER_ON | LPM_USER_OFF | LPM_SUV_ON)) | has_hfi_lpm_monitor ()) interval = -1; else if (interval == -1) interval = 100; lpmd_log_debug("Poll with interval %d\n", interval); n = poll (poll_fds, poll_fd_cnt, interval); if (n < 0) { lpmd_log_warn ("Write to pipe failed\n"); continue; } dump_poll_results(n); /* Time out, need to choose next util state and interval */ if (n == 0 && interval > 0) { if (lpmd_config.wlt_proxy_enable) { int wlt_proxy_type = read_wlt_proxy(&interval); periodic_util_update (&lpmd_config, wlt_proxy_type); } else if (lpmd_config.wlt_hint_enable && lpmd_config.wlt_hint_poll_enable) { int wlt_type = read_wlt(wlt_fd); interval = periodic_util_update (&lpmd_config, wlt_type); } else { interval = periodic_util_update (&lpmd_config, -1); } } if (idx_pipe_fd >= 0 && (poll_fds[idx_pipe_fd].revents & POLLIN)) { // process message written on pipe here message_capsul_t msg; int result = read (poll_fds[idx_pipe_fd].fd, &msg, sizeof(message_capsul_t)); if (result < 0) { lpmd_log_warn ("read on wakeup fd failed\n"); poll_fds[idx_pipe_fd].revents = 0; continue; } if (proc_message (&msg) < 0) { lpmd_log_debug ("Terminating thread..\n"); } } if (idx_uevent_fd >= 0 && (poll_fds[idx_uevent_fd].revents & POLLIN)) { check_cpu_hotplug (); } if (idx_hfi_fd >= 0 && (poll_fds[idx_hfi_fd].revents & POLLIN)) { hfi_receive (); } if (idx_wlt_fd >= 0 && (poll_fds[idx_wlt_fd].revents & POLLPRI)) { int wlt_index; wlt_index = read_wlt(poll_fds[idx_wlt_fd].fd); if (in_auto_mode()) interval = periodic_util_update (&lpmd_config, wlt_index); } } return NULL; } static void build_default_config_state(void) { lpmd_config_state_t *state; if (lpmd_config.config_state_count) return; state = &lpmd_config.config_states[0]; state->id = 1; snprintf(state->name, MAX_STATE_NAME, "LPM_DEEP"); state->entry_system_load_thres = lpmd_config.util_entry_threshold; state->enter_cpu_load_thres = lpmd_config.util_exit_threshold; state->itmt_state = lpmd_config.ignore_itmt ? SETTING_IGNORE : 0; state->irq_migrate = 1; state->min_poll_interval = 100; state->max_poll_interval = 1000; state->poll_interval_increment = -1; state->epp = lpmd_config.lp_mode_epp; state->epb = SETTING_IGNORE; state->valid = 1; state->wlt_type = -1; snprintf(state->active_cpus, MAX_STR_LENGTH, "%s", get_cpus_str(CPUMASK_LPM_DEFAULT)); state = &lpmd_config.config_states[1]; state->id = 2; snprintf(state->name, MAX_STATE_NAME, "FULL_POWER"); state->entry_system_load_thres = 100; state->enter_cpu_load_thres = 100; state->itmt_state = lpmd_config.ignore_itmt ? SETTING_IGNORE : SETTING_RESTORE; state->irq_migrate = 1; state->min_poll_interval = 1000; state->max_poll_interval = 1000; state->epp = lpmd_config.lp_mode_epp == SETTING_IGNORE ? SETTING_IGNORE : SETTING_RESTORE; state->epb = SETTING_IGNORE; state->valid = 1; state->wlt_type = -1; snprintf(state->active_cpus, MAX_STR_LENGTH, "%s", get_cpus_str(CPUMASK_ONLINE)); lpmd_config.config_state_count = 2; } int lpmd_main(void) { int wake_fds[2]; int ret; lpmd_log_debug ("lpmd_main begin\n"); ret = check_cpu_capability(&lpmd_config); if (ret) return ret; // Call all lpmd related functions here ret = lpmd_get_config (&lpmd_config); if (ret) return ret; pthread_mutex_init (&lpmd_mutex, NULL); ret = init_cpu (lpmd_config.lp_mode_cpus, lpmd_config.mode, lpmd_config.lp_mode_epp); if (ret) return ret; init_itmt(); if (!has_suv_support () && lpmd_config.hfi_suv_enable) lpmd_config.hfi_suv_enable = 0; if (!has_hfi_capability ()) lpmd_config.hfi_lpm_enable = 0; /* Must done after init_cpu() */ build_default_config_state(); util_init(&lpmd_config); ret = init_irq (); if (ret) return ret; connect_to_upower_daemon(); // Pipe is used for communication between two processes ret = pipe (wake_fds); if (ret) { lpmd_log_error ("pipe creation failed %d:\n", ret); return LPMD_FATAL_ERROR; } if (fcntl (wake_fds[0], F_SETFL, O_NONBLOCK) < 0) { lpmd_log_error ("Cannot set non-blocking on pipe: %s\n", strerror (errno)); (void)close(wake_fds[0]); (void)close(wake_fds[1]); return LPMD_FATAL_ERROR; } if (fcntl (wake_fds[1], F_SETFL, O_NONBLOCK) < 0) { lpmd_log_error ("Cannot set non-blocking on pipe: %s\n", strerror (errno)); (void)close(wake_fds[0]); (void)close(wake_fds[1]); return LPMD_FATAL_ERROR; } write_pipe_fd = wake_fds[1]; memset (poll_fds, 0, sizeof(poll_fds)); idx_pipe_fd = poll_fd_cnt; poll_fds[idx_pipe_fd].fd = wake_fds[0]; poll_fds[idx_pipe_fd].events = POLLIN; poll_fds[idx_pipe_fd].revents = 0; poll_fd_cnt++; poll_fds[poll_fd_cnt].fd = uevent_init (); if (poll_fds[poll_fd_cnt].fd > 0) { idx_uevent_fd = poll_fd_cnt; poll_fds[idx_uevent_fd].events = POLLIN; poll_fds[idx_uevent_fd].revents = 0; poll_fd_cnt++; } if (lpmd_config.hfi_lpm_enable || lpmd_config.hfi_suv_enable) { poll_fds[poll_fd_cnt].fd = hfi_init (); if (poll_fds[poll_fd_cnt].fd > 0) { idx_hfi_fd = poll_fd_cnt; poll_fds[idx_hfi_fd].events = POLLIN; poll_fds[idx_hfi_fd].revents = 0; poll_fd_cnt++; } } if (lpmd_config.wlt_hint_enable) { if (lpmd_config.wlt_proxy_enable) { if (wlt_proxy_init() != LPMD_SUCCESS) { lpmd_config.wlt_proxy_enable = 0; lpmd_log_error ("Error setting up WLT Proxy. wlt_proxy_enable disabled\n"); } } if (!lpmd_config.hfi_lpm_enable && !lpmd_config.hfi_suv_enable) { lpmd_config.util_enable = 0; if (!lpmd_config.wlt_proxy_enable) { poll_for_wlt(1); } } } pthread_attr_init (&lpmd_attr); pthread_attr_setdetachstate (&lpmd_attr, PTHREAD_CREATE_DETACHED); connect_to_power_profile_daemon (); /* * lpmd_core_main_loop: is the thread where all LPMD actions take place. * All other thread send message via pipe to trigger processing */ ret = pthread_create (&lpmd_core_main, &lpmd_attr, lpmd_core_main_loop, NULL); if (ret) return LPMD_FATAL_ERROR; lpmd_log_debug ("lpmd_init succeeds\n"); return LPMD_SUCCESS; } intel-lpmd-0.0.9/src/lpmd_socket.c000066400000000000000000000073631477072336600170540ustar00rootroot00000000000000/* * lpmd_socket.c: Intel Low Power Daemon socket helpers * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file is used to send messages to IRQ daemon via sockets. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "lpmd.h" /* socket helpers */ int socket_init_connection(char *name) { struct sockaddr_un addr; static int socket_fd; if (!name) return 0; memset (&addr, 0, sizeof(struct sockaddr_un)); socket_fd = socket (AF_LOCAL, SOCK_STREAM, 0); if (socket_fd < 0) { perror ("Error opening socket"); return 0; } addr.sun_family = AF_UNIX; snprintf (addr.sun_path, sizeof(addr.sun_path), "%s", name); if (connect (socket_fd, (struct sockaddr*) &addr, sizeof(addr)) < 0) { /* Try connect to abstract */ memset (&addr, 0, sizeof(struct sockaddr_un)); addr.sun_family = AF_UNIX; if (connect (socket_fd, (struct sockaddr*) &addr, sizeof(addr)) < 0) { close (socket_fd); return 0; } } return socket_fd; } static struct msghdr* create_credentials_msg() { struct ucred *credentials; struct msghdr *msg; struct cmsghdr *cmsg; credentials = malloc (sizeof(struct ucred)); if (!credentials) return NULL; credentials->pid = getpid (); credentials->uid = geteuid (); credentials->gid = getegid (); msg = malloc (sizeof(struct msghdr)); if (!msg) { free (credentials); return msg; } memset (msg, 0, sizeof(struct msghdr)); msg->msg_iovlen = 1; msg->msg_control = malloc (CMSG_SPACE(sizeof(struct ucred))); if (!msg->msg_control) { free (credentials); free (msg); return NULL; } msg->msg_controllen = CMSG_SPACE(sizeof(struct ucred)); cmsg = CMSG_FIRSTHDR(msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_CREDENTIALS; cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); memcpy (CMSG_DATA(cmsg), credentials, sizeof(struct ucred)); free (credentials); return msg; } int socket_send_cmd(char *name, char *data) { int socket_fd; struct msghdr *msg; struct iovec iov; char buf[MAX_STR_LENGTH]; int ret; if (!name || !data) return LPMD_ERROR; socket_fd = socket_init_connection (name); if (!socket_fd) return LPMD_ERROR; msg = create_credentials_msg (); if (!msg) return LPMD_ERROR; iov.iov_base = (void*) data; iov.iov_len = strlen (data); msg->msg_iov = &iov; if (sendmsg (socket_fd, msg, 0) < 0) { free (msg->msg_control); free (msg); return LPMD_ERROR; } ret = read (socket_fd, buf, MAX_STR_LENGTH); if (ret < 0) lpmd_log_debug ("read failed\n"); close (socket_fd); free (msg->msg_control); free (msg); return LPMD_SUCCESS; } intel-lpmd-0.0.9/src/lpmd_util.c000066400000000000000000000455071477072336600165430ustar00rootroot00000000000000/* * util.c: intel_lpmd utilization monitor * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains logic similar to "top" program to get utilization from * /proc/sys kernel interface. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "lpmd.h" /* System should quit Low Power Mode when it is overloaded */ #define PATH_PROC_STAT "/proc/stat" static lpmd_config_state_t *current_state; void reset_config_state(void) { current_state = NULL; } enum type_stat { STAT_CPU, STAT_USER, STAT_NICE, STAT_SYSTEM, STAT_IDLE, STAT_IOWAIT, STAT_IRQ, STAT_SOFTIRQ, STAT_STEAL, STAT_GUEST, STAT_GUEST_NICE, STAT_MAX, }; struct proc_stat_info { int cpu; int valid; unsigned long long stat[STAT_MAX]; }; struct proc_stat_info *proc_stat_prev; struct proc_stat_info *proc_stat_cur; static int busy_sys = -1; static int busy_cpu = -1; static int busy_gfx = -1; char *path_gfx_rc6; char *path_sam_mc6; static int probe_gfx_util_sysfs(void) { FILE *fp; char buf[8]; bool gt0_is_gt; if (access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) return 1; fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); if (!fp) return 1; if (!fread(buf, sizeof(char), 7, fp)) { fclose(fp); return 1; } fclose(fp); if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) { if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) path_gfx_rc6 = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) path_sam_mc6 = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; } else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) { if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) path_gfx_rc6 = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) path_sam_mc6 = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; } lpmd_log_debug("Use %s for gfx rc6\n", path_gfx_rc6); lpmd_log_debug("Use %s for sam mc6\n", path_sam_mc6); return 0; } static int get_gfx_util_sysfs(unsigned long long time_ms) { static unsigned long long gfx_rc6_prev = ULLONG_MAX, sam_mc6_prev = ULLONG_MAX; unsigned long long gfx_rc6, sam_mc6; unsigned long long val; FILE *fp; int gfx_util, sam_util; int ret; int i; gfx_util = sam_util = -1; fp = fopen(path_gfx_rc6, "r"); if (fp) { ret = fscanf(fp, "%lld", &gfx_rc6); if (ret != 1) gfx_rc6 = ULLONG_MAX; fclose(fp); } fp = fopen(path_sam_mc6, "r"); if (fp) { ret = fscanf(fp, "%lld", &sam_mc6); if (ret != 1) sam_mc6 = ULLONG_MAX; fclose(fp); } if (gfx_rc6 == ULLONG_MAX && sam_mc6 == ULLONG_MAX) return -1; if (gfx_rc6 != ULLONG_MAX) { if (gfx_rc6_prev != ULLONG_MAX) gfx_util = 10000 - (gfx_rc6 - gfx_rc6_prev) * 10000 / time_ms; gfx_rc6_prev = gfx_rc6; lpmd_log_debug("GFX Utilization: %d.%d\n", gfx_util / 100, gfx_util % 100); } if (sam_mc6 != ULLONG_MAX) { if (sam_mc6_prev != ULLONG_MAX) sam_util = 10000 - (sam_mc6 - sam_mc6_prev) * 10000 / time_ms; sam_mc6_prev = sam_mc6; lpmd_log_debug("SAM Utilization: %d.%d\n", sam_util / 100, sam_util % 100); } return gfx_util > sam_util ? gfx_util : sam_util; } /* Get GFX_RC6 and SAM_MC6 from sysfs and calculate gfx util based on this */ static int parse_gfx_util_sysfs(void) { static int gfx_sysfs_available = 1; static struct timespec ts_prev; struct timespec ts_cur; unsigned long time_ms; int ret; busy_gfx = -1; if (!gfx_sysfs_available) return 1; clock_gettime (CLOCK_MONOTONIC, &ts_cur); if (!ts_prev.tv_sec && !ts_prev.tv_nsec) { ret = probe_gfx_util_sysfs(); if (ret) { gfx_sysfs_available = 0; return 1; } ts_prev = ts_cur; return 0; } time_ms = (ts_cur.tv_sec - ts_prev.tv_sec) * 1000 + (ts_cur.tv_nsec - ts_prev.tv_nsec) / 1000000; ts_prev = ts_cur; busy_gfx = get_gfx_util_sysfs(time_ms); return 0; } #define MSR_TSC 0x10 #define MSR_PKG_ANY_GFXE_C0_RES 0x65A static int parse_gfx_util_msr(void) { static uint64_t val_prev; uint64_t val; static uint64_t tsc_prev; uint64_t tsc; int cpu; cpu = sched_getcpu(); tsc = read_msr(cpu, MSR_TSC); if (tsc == UINT64_MAX) goto err; val = read_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES); if (val == UINT64_MAX) goto err; if (!tsc_prev || !val_prev) { tsc_prev = tsc; val_prev = val; busy_gfx = -1; return 0; } busy_gfx = (val - val_prev) * 10000 / (tsc - tsc_prev); tsc_prev = tsc; val_prev = val; return 0; err: lpmd_log_debug("parse_gfx_util_msr failed\n"); busy_gfx = -1; return 1; } static int parse_gfx_util(void) { int ret; /* Prefer to get graphics utilization from GFX/SAM RC6 sysfs */ ret = parse_gfx_util_sysfs(); if (!ret) return 0; /* Fallback to MSR */ return parse_gfx_util_msr(); } static int calculate_busypct(struct proc_stat_info *cur, struct proc_stat_info *prev) { int idx; unsigned long long busy = 0, total = 0; for (idx = STAT_USER; idx < STAT_MAX; idx++) { total += (cur->stat[idx] - prev->stat[idx]); // Align with the "top" utility logic if (idx != STAT_IDLE && idx != STAT_IOWAIT) busy += (cur->stat[idx] - prev->stat[idx]); } if (total) return busy * 10000 / total; else return 0; } static int parse_proc_stat(void) { FILE *filep; int i; int val; int count = get_max_online_cpu() + 1; int sys_idx = count - 1; int size = sizeof(struct proc_stat_info) * count; filep = fopen (PATH_PROC_STAT, "r"); if (!filep) return 1; if (!proc_stat_prev) proc_stat_prev = calloc(sizeof(struct proc_stat_info), count); if (!proc_stat_prev) { fclose (filep); return 1; } if (!proc_stat_cur) proc_stat_cur = calloc(sizeof(struct proc_stat_info), count); if (!proc_stat_cur) { free(proc_stat_prev); fclose (filep); proc_stat_prev = NULL; return 1; } memcpy (proc_stat_prev, proc_stat_cur, size); memset (proc_stat_cur, 0, size); while (!feof (filep)) { int idx; char *tmpline = NULL; struct proc_stat_info *info; size_t size = 0; char *line; int cpu; char *p; int ret; tmpline = NULL; size = 0; if (getline (&tmpline, &size, filep) <= 0) { free (tmpline); break; } line = strdup (tmpline); p = strtok (line, " "); if (strncmp (p, "cpu", 3)) { free (tmpline); free (line); continue; } ret = sscanf (p, "cpu%d", &cpu); if (ret == -1 && !(strncmp (p, "cpu", 3))) { /* Read system line */ info = &proc_stat_cur[sys_idx]; } else if (ret == 1) { info = &proc_stat_cur[cpu]; } else { free (tmpline); free (line); continue; } info->valid = 1; idx = STAT_CPU; while (p != NULL) { if (idx >= STAT_MAX) break; if (idx == STAT_CPU) { idx++; p = strtok (NULL, " "); continue; } if (sscanf (p, "%llu", &info->stat[idx]) <= 0) lpmd_log_debug("Failed to parse /proc/stat, defer update in next snapshot."); p = strtok (NULL, " "); idx++; } free (tmpline); free (line); } fclose (filep); busy_sys = calculate_busypct (&proc_stat_cur[sys_idx], &proc_stat_prev[sys_idx]); busy_cpu = 0; for (i = 1; i <= get_max_online_cpu(); i++) { if (!proc_stat_cur[i].valid) continue; val = calculate_busypct (&proc_stat_cur[i], &proc_stat_prev[i]); if (busy_cpu < val) busy_cpu = val; } return 0; } enum system_status { SYS_IDLE, SYS_NORMAL, SYS_OVERLOAD, SYS_UNKNOWN, }; static enum system_status sys_stat = SYS_NORMAL; static int first_run = 1; static enum system_status get_sys_stat(void) { if (first_run) return SYS_NORMAL; if (!in_lpm () && busy_sys <= (get_util_entry_threshold () * 100)) return SYS_IDLE; else if (in_lpm () && busy_cpu > (get_util_exit_threshold () * 100)) return SYS_OVERLOAD; return SYS_NORMAL; } /* * Support for hyst statistics * Ignore the current request if: * a. stay in current state too short * b. average time of the target state is too low * Note: This is not well tuned yet, set either util_in_hyst or util_out_hyst to 0 * to avoid the hyst algorithm. */ #define DECAY_PERIOD 5 static struct timespec tp_last_in, tp_last_out; static unsigned long util_out_hyst, util_in_hyst; static unsigned long util_in_min, util_out_min; static unsigned long avg_in, avg_out; static int util_should_proceed(enum system_status status) { struct timespec tp_now; unsigned long cur_in, cur_out; if (!util_out_hyst && !util_in_hyst) return 1; clock_gettime (CLOCK_MONOTONIC, &tp_now); if (status == SYS_IDLE) { cur_out = (tp_now.tv_sec - tp_last_out.tv_sec) * 1000000000 + tp_now.tv_nsec - tp_last_out.tv_nsec; // in msec cur_out /= 1000000; avg_out = avg_out * (DECAY_PERIOD - 1) / DECAY_PERIOD + cur_out / DECAY_PERIOD; if (avg_in >= util_in_hyst && cur_out >= util_out_min) return 1; lpmd_log_info ("\t\t\tIgnore SYS_IDLE: avg_in %lu, avg_out %lu, cur_out %lu\n", avg_in, avg_out, cur_out); avg_in = avg_in * (DECAY_PERIOD + 1) / DECAY_PERIOD; return 0; } else if (status == SYS_OVERLOAD) { cur_in = (tp_now.tv_sec - tp_last_in.tv_sec) * 1000000000 + tp_now.tv_nsec - tp_last_in.tv_nsec; cur_in /= 1000000; avg_in = avg_in * (DECAY_PERIOD - 1) / DECAY_PERIOD + cur_in / DECAY_PERIOD; if (avg_out >= util_out_hyst && cur_in >= util_in_min) return 1; lpmd_log_info ("\t\t\tIgnore SYS_OVERLOAD: avg_in %lu, avg_out %lu, cur_in %lu\n", avg_in, avg_out, cur_in); avg_out = avg_out * (DECAY_PERIOD + 1) / DECAY_PERIOD; return 0; } return 0; } static int get_util_interval(void) { int interval; if (in_lpm ()) { interval = get_util_exit_interval (); if (interval || busy_cpu < 0) return interval; if (first_run) return 1000; interval = 1000 * (10000 - busy_cpu) / 10000; } else { interval = get_util_entry_interval (); if (interval) return interval; interval = 1000; } interval = (interval / 100) * 100; if (!interval) interval = 100; return interval; } static int state_match(lpmd_config_state_t *state, int bsys, int bcpu, int bgfx, int wlt_index) { if (!state->valid) return 0; if (state->wlt_type != -1) { /* wlt hint must match */ if (state->wlt_type != wlt_index) return 0; /* return match directly if no util threshold specified */ if (!state->enter_gfx_load_thres) return 1; /* leverage below logic to handle util threshold */ } /* No need to dump utilization info if no threshold specified */ if (!state->enter_cpu_load_thres && !state->entry_system_load_thres && !state->enter_gfx_load_thres) return 1; if (state->enter_cpu_load_thres) { if (bcpu > state->enter_cpu_load_thres) goto unmatch; } if (state->enter_gfx_load_thres) { if (bgfx == -1) lpmd_log_debug("Graphics utilization not available, ignore graphics threshold\n"); else if (bgfx > state->enter_gfx_load_thres) goto unmatch; } if (state->entry_system_load_thres) { if (bsys > state->entry_system_load_thres) { if (!state->exit_system_load_hyst || state != current_state) goto unmatch; if (bsys > state->entry_load_sys + state->exit_system_load_hyst || bsys > state->entry_system_load_thres + state->exit_system_load_hyst) goto unmatch; } } lpmd_log_debug("Match %12s: sys_thres %3d cpu_thres %3d gfx_thres %3d hyst %3d\n", state->name, state->entry_system_load_thres, state->enter_cpu_load_thres, state->enter_gfx_load_thres, state->exit_system_load_hyst); return 1; unmatch: lpmd_log_debug("Ignore %12s: sys_thres %3d cpu_thres %3d gfx_thres %3d hyst %3d\n", state->name, state->entry_system_load_thres, state->enter_cpu_load_thres, state->enter_gfx_load_thres, state->exit_system_load_hyst); return 0; } #define DEFAULT_POLL_RATE_MS 1000 static int enter_state(lpmd_config_state_t *state, int bsys, int bcpu) { static int interval = DEFAULT_POLL_RATE_MS; state->entry_load_sys = bsys; state->entry_load_cpu = bcpu; /* Adjust polling interval only */ if (state == current_state) { if (state->poll_interval_increment > 0) { interval += state->poll_interval_increment; } /* Adaptive polling interval based on cpu utilization */ if (state->poll_interval_increment == -1) { interval = state->max_poll_interval * (10000 - bcpu) / 10000; interval /= 100; interval *= 100; } if (state->min_poll_interval && interval < state->min_poll_interval) interval = state->min_poll_interval; if (state->max_poll_interval && interval > state->max_poll_interval) interval = state->max_poll_interval; return interval; } set_lpm_epp(state->epp); set_lpm_epb(state->epb); set_lpm_itmt(state->itmt_state); if (state->active_cpus[0] != '\0') { reset_cpus(CPUMASK_UTIL); parse_cpu_str(state->active_cpus, CPUMASK_UTIL); if (state->irq_migrate != SETTING_IGNORE) set_lpm_irq(get_cpumask(CPUMASK_UTIL), 1); else set_lpm_irq(NULL, SETTING_IGNORE); set_lpm_cpus(CPUMASK_UTIL); } else { set_lpm_irq(NULL, SETTING_IGNORE); set_lpm_cpus(CPUMASK_MAX); /* Ignore Task migration */ } process_lpm(UTIL_ENTER); if (state->min_poll_interval) interval = state->min_poll_interval; else interval = DEFAULT_POLL_RATE_MS; current_state = state; return interval; } static void dump_system_status(lpmd_config_t *config, int interval) { int epp, epb; char epp_str[32] = ""; char buf[MAX_STR_LENGTH * 2]; int offset; int size; offset = 0; size = MAX_STR_LENGTH * 2; offset += snprintf(buf, size, "[%d/%d] %12s: ", current_state->id, config->config_state_count, current_state->name); size = MAX_STR_LENGTH * 2 - offset; if (busy_sys == -1) offset += snprintf(buf + offset, size, "bsys na, "); else offset += snprintf(buf + offset, size, "bsys %3d.%02d, ", busy_sys / 100, busy_sys % 100); size = MAX_STR_LENGTH * 2 - offset; if (busy_cpu == -1) offset += snprintf(buf + offset, size, "bcpu na, "); else offset += snprintf(buf + offset, size, "bcpu %3d.%02d, ", busy_cpu / 100, busy_cpu % 100); size = MAX_STR_LENGTH * 2 - offset; if (busy_gfx == -1) offset += snprintf(buf + offset, size, "bgfx na, "); else offset += snprintf(buf + offset, size, "bgfx %3d.%02d, ", busy_gfx / 100, busy_gfx % 100); size = MAX_STR_LENGTH * 2 - offset; get_epp_epb(&epp, epp_str, 32, &epb); if (epp >= 0) offset += snprintf(buf + offset, size, "epp %3d, ", epp); else offset += snprintf(buf + offset, size, "epp %s, ", epp_str); size = MAX_STR_LENGTH * 2 - offset; offset += snprintf(buf + offset, size, "epb %3d, ", epb); size = MAX_STR_LENGTH * 2 - offset; if (current_state->itmt_state != SETTING_IGNORE) offset += snprintf(buf + offset, size, "itmt %2d, ", get_itmt()); size = MAX_STR_LENGTH * 2 - offset; snprintf(buf + offset, size, "interval %4d", interval); lpmd_log_info("%s\n", buf); } static int process_next_config_state(lpmd_config_t *config, int wlt_index) { lpmd_config_state_t *state = NULL; int i = 0; int interval = -1; // Check for new state for (i = 0; i < config->config_state_count; ++i) { state = &config->config_states[i]; if (state_match(state, busy_sys, busy_cpu, busy_gfx, wlt_index)) { interval = enter_state(state, busy_sys, busy_cpu); break; } } if (!current_state) return interval; dump_system_status(config, interval); return interval; } static int use_config_state = 1; int use_config_states(void) { return use_config_state; } int periodic_util_update(lpmd_config_t *lpmd_config, int wlt_index) { int interval; static int initialized; if (wlt_index >= 0) { if (lpmd_config->wlt_hint_poll_enable) { parse_gfx_util(); interval = process_next_config_state(lpmd_config, wlt_index); } else { process_next_config_state(lpmd_config, wlt_index); interval = -1; } return interval; } // poll() timeout should be -1 when util monitor not enabled if (!has_util_monitor ()) return -1; if (!initialized) { clock_gettime (CLOCK_MONOTONIC, &tp_last_in); clock_gettime (CLOCK_MONOTONIC, &tp_last_out); avg_in = util_in_hyst = get_util_entry_hyst (); avg_out = util_out_hyst = get_util_exit_hyst (); util_in_min = util_in_hyst / 2; util_out_min = util_out_hyst / 2; initialized = 1; } parse_proc_stat (); parse_gfx_util(); if (!lpmd_config->config_state_count || !use_config_state) { sys_stat = get_sys_stat (); interval = get_util_interval (); lpmd_log_info ( "\t\tSYS util %3d.%02d (Entry threshold : %3d )," " CPU util %3d.%02d ( Exit threshold : %3d ), resample after" " %4d ms\n", busy_sys / 100, busy_sys % 100, get_util_entry_threshold (), busy_cpu / 100, busy_cpu % 100, get_util_exit_threshold (), interval); first_run = 0; if (!util_should_proceed (sys_stat)) return interval; switch (sys_stat) { case SYS_IDLE: process_lpm (UTIL_ENTER); first_run = 1; clock_gettime (CLOCK_MONOTONIC, &tp_last_in); interval = 1000; break; case SYS_OVERLOAD: process_lpm (UTIL_EXIT); first_run = 1; clock_gettime (CLOCK_MONOTONIC, &tp_last_out); break; default: break; } } else interval = process_next_config_state(lpmd_config, wlt_index); return interval; } int util_init(lpmd_config_t *lpmd_config) { lpmd_config_state_t *state; int nr_state = 0; int i, ret; for (i = 0; i < lpmd_config->config_state_count; i++) { state = &lpmd_config->config_states[i]; if (state->active_cpus[0] != '\0') { ret = parse_cpu_str(state->active_cpus, CPUMASK_UTIL); if (ret <= 0) { state->valid = 0; continue; } } if (!state->min_poll_interval) state->min_poll_interval = state->max_poll_interval > DEFAULT_POLL_RATE_MS ? DEFAULT_POLL_RATE_MS : state->max_poll_interval; if (!state->max_poll_interval) state->max_poll_interval = state->min_poll_interval > DEFAULT_POLL_RATE_MS ? state->min_poll_interval : DEFAULT_POLL_RATE_MS; if (!state->poll_interval_increment) state->poll_interval_increment = -1; state->entry_system_load_thres *= 100; state->enter_cpu_load_thres *= 100; state->exit_cpu_load_thres *= 100; state->enter_gfx_load_thres *= 100; nr_state++; } if (nr_state < 2) { lpmd_log_info("%d valid config states found\n", nr_state); use_config_state = 0; return 1; } return 0; } intel-lpmd-0.0.9/src/wlt_proxy/000077500000000000000000000000001477072336600164425ustar00rootroot00000000000000intel-lpmd-0.0.9/src/wlt_proxy/include/000077500000000000000000000000001477072336600200655ustar00rootroot00000000000000intel-lpmd-0.0.9/src/wlt_proxy/include/state_common.h000066400000000000000000000060711477072336600227320ustar00rootroot00000000000000/* * state_common.h: Intel Linux Energy Optimizer proxy detection common header file * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _WLT_PROXY_COMMON_H_ #define _WLT_PROXY_COMMON_H_ /* threshold (%) for instantaneous utilizations */ #define UTIL_LOWEST 1 #define UTIL_LOWER 2 #define UTIL_LOW 10 #define UTIL_FILL_START 35 #define UTIL_BELOW_HALF 40 #define UTIL_HALF 50 #define UTIL_ABOVE_HALF 70 #define UTIL_NEAR_FULL 90 /* floating point comparison */ #define EPSILON (0.01) #define A_LTE_B(A,B) (((B-A) >= EPSILON) ? 1 : 0 ) #define A_GTE_B(A,B) (((A-B) >= EPSILON) ? 1 : 0 ) #define A_GT_B(A,B) (((A-B) > EPSILON) ? 1 : 0 ) /* state indexes for WLT proxy detection based cpu usage high to low */ enum state_idx { INIT_MODE, PERF_MODE, MDRT4E_MODE, MDRT3E_MODE, MDRT2E_MODE, RESP_MODE, NORM_MODE, DEEP_MODE }; #define MAX_MODE 8 struct group_util { /* top 3 max utils and last (min) util */ float c0_max; float c0_min; float worst_stall; int worst_stall_cpu; float c0_2nd_max; float c0_3rd_max; int delta; /* simple moving average for top 3 utils */ int sma_sum[3]; int sma_avg1; int sma_avg2; int sma_avg3; int sma_pos; }; /* feature states */ #define DEACTIVATED (-1) #define UNDEFINED (0) #define RUNNING (1) #define ACTIVATED (2) #define PAUSE (3) /* state_manager.c */ void uninit_state_manager(void); enum state_idx get_cur_state(void); int get_last_poll(void); int get_poll_ms(enum state_idx); int get_state_poll(int, enum state_idx); int set_stay_count(enum state_idx, int); int get_stay_count(enum state_idx); int staytime_to_staycount(enum state_idx state); int prep_state_change(enum state_idx, enum state_idx, int); int do_countdown(enum state_idx); /* state_util.c */ int util_init_proxy(void); void util_uninit_proxy(void); int state_max_avg(); int update_perf_diffs(float *, int); int max_mt_detected(enum state_idx); /* state_machine.c */ int state_machine_auto(); /* spike_mgmt.c */ int add_spike_time(int); int add_non_spike_time(int); int get_spike_rate(void); int get_burst_rate_per_min(void); int fresh_burst_response(int initial_val); int burst_rate_breach(void); int strikeout_once(int); #endif /* _WLT_PROXY_COMMON_H_ */ intel-lpmd-0.0.9/src/wlt_proxy/include/wlt_proxy.h000066400000000000000000000017761477072336600223200ustar00rootroot00000000000000/* * wlt_proxy.h: Intel Linux Energy Optimizer proxy detection header file * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _WLT_PROXY_H_ #define _WLT_PROXY_H_ int read_wlt_proxy(int* interval); int wlt_proxy_init(); void wlt_proxy_uninit(void); #endif/* _WLT_PROXY_H_ */ intel-lpmd-0.0.9/src/wlt_proxy/spike_mgmt.c000066400000000000000000000141571477072336600207550ustar00rootroot00000000000000/* * spike_mgmt.c: Intel Low Power Daemon WLT proxy spike detection in CPU usage. * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains condition checks for CPU utilization spikes and idleness count/detection. */ #define _GNU_SOURCE #include #include //clockid_t #include //perror #include #include //bool #include "state_common.h" /* * spike burst refers to continuous spikes in a series of back to back samples. * burt count and strength (as %) are good indicators to segregate random noise * (that doesn't deserve performance) from bursty workload needing performance. * * Example of spike burst (|) and non-spike (.) sampling: * ...||..||||...|...|||..... * - here, first burst has two spikes. * - second and third burst have 4 and 3 spikes respectively * - the single spike in between is not considered as burst * * Using this a few indicators are derived: * spike rate = total_spike_time * 100/ MAX_TRACKED_SPIKE_TIME * spike_rate is defined as spike-time % of some MAX_TRACKED_SPIKE_TIME * * spike rate avg = spike_rate_total / spike_rate_samples * spike rate avg is used to control how long "1 min" in bc_reset_min appears to the algo. * * bc_rest_min is used to control how long to wait before reset of past spike burst count * SPIKE_TIME_BIAS macro will bias this to be longer or shorter based on * recent history (i.e more prominent the spiking, the longer it will be remembered) */ #define MAX_TRACKED_SPIKE_TIME 1000 #define MAX_BURST_COUNT 1000 #define BURST_COUNT_THRESHOLD 3 //shorten time by 50% if spike rate was as low as 0. No change if spike rate was 100 #define SPIKE_TIME_BIAS(avg, min) ((100 - avg) * min/(2 * 100)) extern int state_demote; int burst_count = 0; /*local variables*/ static int total_spike_time; static int spike_sec_prev = 0; static int spike_rate_total; static int spike_rate_samples; static int burst_rate_per_min; static bool spike_burst_flag = false; static float bc_reset_min = 90.0; static int once_flag; static int strike_count; /** increment avg spike rate */ int update_spike_rate_avg(int sr) { spike_rate_total += sr; spike_rate_samples++; return 1; } /** reset avg spike rate */ int clear_spike_rate_avg() { spike_rate_samples = spike_rate_total = 0; return 1; } /** * burst count determines number of bursts occurred in recent past (1 min) * arg real_spike specifies if invoked to update actual spike (1) or * just a refresh to burst_count (0) * burst count is decremented if no spikes in last 1 min */ static int update_burst_count(int real_spike_burst) { float minutes = 1.0; clockid_t clk = CLOCK_MONOTONIC; struct timespec ts; if (clock_gettime(clk, &ts)) { perror("clock_gettime1"); return -1; } if (spike_sec_prev) { minutes = (float)(ts.tv_sec - spike_sec_prev) / bc_reset_min; } else { spike_sec_prev = ts.tv_sec; return 0; } if (real_spike_burst && (get_cur_state() <= MDRT4E_MODE)) { burst_count++; spike_sec_prev = ts.tv_sec; } else if ((minutes > 1.0) || (burst_count > MAX_BURST_COUNT)) { burst_count = 0; spike_sec_prev = ts.tv_sec; } if (minutes < 1.0) { burst_rate_per_min = burst_count; } else if (minutes && (minutes > 1.0)) { burst_rate_per_min = (int)((float)burst_count / minutes); } return burst_rate_per_min; } int get_burst_rate_per_min(void) { return burst_rate_per_min; } int fresh_burst_response(int initial_burst_rate) { if (!initial_burst_rate) return 0; if ((initial_burst_rate >= BURST_COUNT_THRESHOLD) || (get_burst_rate_per_min() > initial_burst_rate)) return 1; return 0; } int burst_rate_breach(void) { return (get_burst_rate_per_min() >= BURST_COUNT_THRESHOLD) ? 1 : 0; } /* Calculate spike rate */ int get_spike_rate() { int spike_pct = total_spike_time * 100 / MAX_TRACKED_SPIKE_TIME; return (spike_pct > 100) ? 100 : spike_pct; } /* count spikes */ int add_spike_time(int duration) { int spike_rate; if (total_spike_time < MAX_TRACKED_SPIKE_TIME) total_spike_time += duration; /* spike burst has more than 1 spike */ if (!spike_burst_flag) { /* rising edge of spike burst */ spike_burst_flag = true; } else if (state_demote && !once_flag) { update_burst_count(1); once_flag = 1; } spike_rate = get_spike_rate(); update_spike_rate_avg(spike_rate); return 1; } /* count idleness / non spike times */ int add_non_spike_time(int duration) { float avg; int sr; if (total_spike_time > 0) total_spike_time -= duration; total_spike_time = (total_spike_time < 0) ? 0 : total_spike_time; sr = get_spike_rate(); if (!sr && spike_burst_flag) { /* falling edge of burst */ spike_burst_flag = false; avg = spike_rate_total / spike_rate_samples; if (!once_flag) update_burst_count(1); bc_reset_min = 60.0 - (int)SPIKE_TIME_BIAS(avg, bc_reset_min); clear_spike_rate_avg(); once_flag = 0; } else { update_burst_count(0); once_flag = 0; } return 1; } /* decrement strike count */ int strikeout_once(int n) { if (!strike_count) strike_count = n; else strike_count -= 1; if (strike_count < 0) strike_count = 0; return strike_count; } intel-lpmd-0.0.9/src/wlt_proxy/state_machine.c000066400000000000000000000305161477072336600214170ustar00rootroot00000000000000/* * state_machine.c: Intel Low Power Daemon WLT proxy state change handling. * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains condition checks for state switch. */ #define _GNU_SOURCE #include #include #include "state_common.h" #include "lpmd.h" //logs /* * stall scalability refer to non-stallable percentage of utilization. * e.g due to memory or other dependency. If work is reasonably scaling well, * values in 80 to 90+% is expected */ #define STALL_SCALE_LOWER_MARK 60 #define N_STRIKE (10) /* threshold (%) for sustained (avg) utilizations */ #define SUS_LOWER 2 #define SUS_LOW_RANGE_START 4 #define SUS_LOW_RANGE_END 25 extern int burst_count; extern struct group_util grp; extern int state_demote; extern int next_proxy_poll; int max_util; static int only_once = 0; /* function checks conditions for state switch */ int state_machine_auto() { float dummy; int present_state = get_cur_state(); update_perf_diffs(&dummy, 0); max_util = (int)round(grp.c0_max); //end /* * we do not want to track avg util for following case: * a) Responsive transit mode (fast poll can flood avg leading to incorrect decisions) */ if (present_state != RESP_MODE) state_max_avg(); int completed_poll = get_last_poll(); float sum_c0 = grp.c0_max + grp.c0_2nd_max + grp.c0_3rd_max; int mdrt_count; int perf_count, initial_burst_count; initial_burst_count = get_burst_rate_per_min(); mdrt_count = get_stay_count(MDRT3E_MODE); int sr = get_spike_rate(); if (A_LTE_B(grp.c0_max, UTIL_NEAR_FULL)) { add_non_spike_time(completed_poll); } else if (A_GT_B(grp.c0_max, UTIL_NEAR_FULL) || sr) { add_spike_time(completed_poll); } /* should we reset perf-count due to new burst? */ if (fresh_burst_response(initial_burst_count)) { set_stay_count(PERF_MODE, staytime_to_staycount(PERF_MODE)); set_stay_count(MDRT3E_MODE, 0); } perf_count = get_stay_count(PERF_MODE); if (!perf_count && !mdrt_count) set_stay_count(MDRT3E_MODE, staytime_to_staycount(MDRT3E_MODE)); state_demote = 0; int isMT = !max_mt_detected(INIT_MODE); if(only_once == 0) { lpmd_log_debug("present_state, isMT, C0_max, C0_2ndMax, sum_c0, sma avg1, sma avg2, sma avg3, worst_stall, next_proxy_poll\n"); only_once = 1; } lpmd_log_debug("%d, %d, %.2f, %.2f, %.2f, %d, %d, %d, %.2f, %d\n", \ present_state, \ isMT, \ grp.c0_max, \ grp.c0_2nd_max, \ sum_c0, \ grp.sma_avg1, \ grp.sma_avg2, \ grp.sma_avg3, \ grp.worst_stall, \ next_proxy_poll); switch (present_state) { case INIT_MODE: /* * init mode is super-set of all default/available cpu on the system. */ /* promote -- if not high multi-thread trend */ if (!max_mt_detected(INIT_MODE)) { lpmd_log_debug("INIT_MODE to PERF_MODE\n"); prep_state_change(INIT_MODE, PERF_MODE, 0); break; } // stay -- full MT break; case PERF_MODE: // Demote -- if highly MT if (max_mt_detected(PERF_MODE)) { lpmd_log_debug("PERF_MODE to INIT_MODE = mt detected.\n"); prep_state_change(PERF_MODE, INIT_MODE, 0); break; } // Stay -- if there was recent perf/resp bursts if (burst_count > 0 && !do_countdown(PERF_MODE)){ lpmd_log_debug("PERF_MODE: burst_count is %d > 0 && !do_countdown\n", burst_count); break; } // Promote but through responsive watch -- if top sampled util and their avg are receding. if (A_LTE_B(sum_c0, (2 * UTIL_LOW)) && A_LTE_B(grp.sma_avg1, UTIL_ABOVE_HALF)) { lpmd_log_debug("PERF_MODE to RESP_MODE\n"); prep_state_change(PERF_MODE, RESP_MODE, 0); break; } // Promote -- to moderate (3) MT state if (!burst_rate_breach() && A_LTE_B(grp.c0_max, UTIL_LOW)) // && A_LTE_B(sum_avg, UTIL_BELOW_HALF)) { set_stay_count(MDRT3E_MODE, 0); lpmd_log_debug("PERF_MODE to MDRT3E_MODE\n"); prep_state_change(PERF_MODE, MDRT3E_MODE, 0); break; } //Stay -- all else break; case RESP_MODE: // Demote -- if ST above halfway mark and avg trending higher if (A_GT_B(grp.c0_max, UTIL_ABOVE_HALF) && A_GT_B(grp.sma_avg1, UTIL_BELOW_HALF)) { lpmd_log_debug("RESP_MODE to PERF_MODE\n"); prep_state_change(RESP_MODE, PERF_MODE, 0); break; } // Stay -- if there were recent burst of spikes if (perf_count && burst_rate_breach()) break; // Promote -- all else if (A_LTE_B(grp.worst_stall * 100, STALL_SCALE_LOWER_MARK)) { lpmd_log_debug("worst stall is less than STALL_SCALE_LOWER_MARK -- stay here.\n"); } else { lpmd_log_debug("RESP_MODE to MDRT3E_MODE\n"); prep_state_change(RESP_MODE, MDRT3E_MODE, 0); } break; case MDRT4E_MODE: if (A_LTE_B(grp.worst_stall * 100, STALL_SCALE_LOWER_MARK)) { lpmd_log_debug("MDRT4E_MODE to RESP_MODE\n"); prep_state_change(MDRT4E_MODE, RESP_MODE, 0); break; } // Demote if (A_GT_B(grp.c0_max, UTIL_NEAR_FULL)) { if (!burst_rate_breach() && strikeout_once(N_STRIKE)) break; lpmd_log_debug("MDRT4E_MODE to PERF_MODE\n"); prep_state_change(MDRT4E_MODE, PERF_MODE, 0); break; } // promote if (A_LTE_B(grp.sma_avg1, SUS_LOW_RANGE_END) && A_LTE_B(grp.sma_avg2, SUS_LOW_RANGE_END) && A_LTE_B(sum_c0, UTIL_HALF)) { if (!do_countdown(MDRT4E_MODE)) break; lpmd_log_debug("MDRT4E_MODE to NORM_MODE\n"); prep_state_change(MDRT4E_MODE, NORM_MODE, 0); break; } // stay break; case MDRT3E_MODE: // Demote -- if mem bound work is stalling but didn't show higher utilization if (A_LTE_B(grp.worst_stall * 100, STALL_SCALE_LOWER_MARK)) { lpmd_log_debug("MDRT3E_MODE to RESP_MODE %.2f < %d\n", grp.worst_stall, STALL_SCALE_LOWER_MARK); prep_state_change(MDRT3E_MODE, RESP_MODE, 0); break; } // Demote to perf if (A_GT_B(grp.c0_max, UTIL_NEAR_FULL)) { if (!burst_rate_breach() && strikeout_once(N_STRIKE)) { lpmd_log_debug("MDRT3E_MODE: burst_rate_breach AND strikeout_once - not met\n"); break; } lpmd_log_debug("MDRT3E_MODE to PERF_MODE\n"); prep_state_change(MDRT3E_MODE, PERF_MODE, 0); break; } // Demote to 4 thread sustained if (A_GTE_B(grp.sma_avg1, SUS_LOW_RANGE_END) && A_GTE_B(grp.sma_avg2, (SUS_LOW_RANGE_END - 5))) { lpmd_log_debug("MDRT3E_MODE to MDRT4E_MODE %d > %d\n", grp.sma_avg1, SUS_LOW_RANGE_END); prep_state_change(MDRT3E_MODE, MDRT4E_MODE, 0); break; } // promote if ((A_GT_B(grp.sma_avg1, SUS_LOW_RANGE_START) && A_LTE_B(grp.sma_avg1, SUS_LOW_RANGE_END)) && (A_GT_B(grp.sma_avg2, SUS_LOW_RANGE_START) && A_LTE_B(grp.sma_avg2, SUS_LOW_RANGE_END))) { if (!do_countdown(MDRT3E_MODE)) { lpmd_log_debug("MDRT3E_MODE: to MDRT2E_MODE - do countdown not met\n"); break; } lpmd_log_debug("MDRT3E_MODE to MDRT2E_MODE %d < %d\n", grp.sma_avg1, MDRT2E_MODE); prep_state_change(MDRT3E_MODE, MDRT2E_MODE, 0); break; } // Promote -- if top three avg util are trending lower. if (A_LTE_B(grp.sma_avg1, SUS_LOW_RANGE_END) && (A_LTE_B(grp.sma_avg2, SUS_LOWER) && A_LTE_B(grp.sma_avg3, SUS_LOWER))) { if (!do_countdown(MDRT3E_MODE)) { lpmd_log_debug("MDRT3E_MODE: to NORM_MODE - do countdown not met\n"); break; } lpmd_log_debug("MDRT3E_MODE to NORM_MODE\n"); prep_state_change(MDRT3E_MODE, NORM_MODE, 0); break; } lpmd_log_debug("MDRT3E_MODE: stay\n"); break; case MDRT2E_MODE: // Demote -- if mem bound work is stalling but didn't show higher utilization if (A_LTE_B(grp.worst_stall * 100, STALL_SCALE_LOWER_MARK)) { lpmd_log_debug("MDRT2E_MODE to RESP_MODE\n"); prep_state_change(MDRT2E_MODE, RESP_MODE, 0); break; } // Demote -- if instant util nearing full or sustained moderate avg1 trend with avg2 trailing closeby if (A_GT_B(grp.c0_max, UTIL_NEAR_FULL) || (A_GTE_B(grp.sma_avg1, SUS_LOW_RANGE_END) && A_GTE_B(grp.sma_avg2, SUS_LOW_RANGE_END - 10))) { if (!burst_rate_breach() && strikeout_once(N_STRIKE)) break; lpmd_log_debug("MDRT2E_MODE to MDRT3E_MODE\n"); prep_state_change(MDRT2E_MODE, MDRT3E_MODE, 0); break; } // Promote -- if top two avg util are trending lower. if ((A_GT_B(grp.sma_avg1, SUS_LOW_RANGE_START) && A_LTE_B(grp.sma_avg1, SUS_LOW_RANGE_END)) && A_LTE_B(grp.sma_avg2, SUS_LOW_RANGE_END)) { if (!do_countdown(MDRT2E_MODE)) { break; } lpmd_log_debug("MDRT2E_MODE to NORM_MODE\n"); prep_state_change(MDRT2E_MODE, NORM_MODE, 0); break; } // stay break; case NORM_MODE: // Demote -- if mem bound work is stalling but didn't show higher utilization if (A_LTE_B(grp.worst_stall * 100, STALL_SCALE_LOWER_MARK)) { lpmd_log_debug("NORM_MODE to RESP_MODE\n"); prep_state_change(NORM_MODE, RESP_MODE, 0); break; } // Demote -- if instant util more than half or if signs of sustained ST activity. if (A_GT_B(grp.c0_max, UTIL_HALF) || (A_GT_B(grp.sma_avg1, UTIL_BELOW_HALF))) { /* In this state its better to absorb few spike (noise) before reacting */ if (!burst_rate_breach() && strikeout_once(N_STRIKE)) break; lpmd_log_debug("NORM_MODE to MDRT2E_MODE\n"); prep_state_change(NORM_MODE, MDRT2E_MODE, 0); break; } // Promote -- if top few instant util or top avg is trending lower. if ((A_LTE_B(grp.c0_max, UTIL_LOW) && A_LTE_B(grp.c0_2nd_max, UTIL_LOWEST)) || A_LTE_B(grp.sma_avg1, SUS_LOWER)) { /* its better to absorb few dips before reacting out of a steady-state */ if (!do_countdown(NORM_MODE)) break; lpmd_log_debug("NORM_MODE to DEEP_MODE\n"); prep_state_change(NORM_MODE, DEEP_MODE, 0); break; } break; case DEEP_MODE: // Demote -- if mem bound work is stalling but didn't show higher util. if (A_LTE_B(grp.worst_stall * 100, STALL_SCALE_LOWER_MARK)) { lpmd_log_debug("DEEP_MODE to RESP_MODE\n"); prep_state_change(DEEP_MODE, RESP_MODE, 0); break; } // Demote -- if there are early signs of instantaneous utilization build-up. if (A_GT_B(grp.c0_max, UTIL_FILL_START)) { lpmd_log_debug("DEEP_MODE to NORM_MODE\n"); prep_state_change(DEEP_MODE, NORM_MODE, 0); break; } break; } return 1; } intel-lpmd-0.0.9/src/wlt_proxy/state_manager.c000066400000000000000000000171431477072336600214260ustar00rootroot00000000000000/* * state_manager.c: Intel Linux Energy Optimizer WLT proxy detection state manager * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains the proxy workload type detection - state definition, initialization and set/get functions. */ #define _GNU_SOURCE #include #include "lpmd.h" //logs #include "state_common.h" #include "wlt_proxy.h" //set_workload_hint #ifdef __GNUC__ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #else #define likely(x) (x) #define unlikely(x) (x) #endif /* * If polling is too fast some of the stats (such as util) * could be momentarily high owing to state change disturbances. * avoid unexpected decision due to this as it may not be tied to workload per-se. * any setting below, say 100ms, needs careful assessment. */ #define MIN_POLL_PERIOD 100 #define BASE_POLL_RESP 96 #define BASE_POLL_MT 100 #define BASE_POLL_PERF 280 #define BASE_POLL_MDRT4E 600 // e.g., 4E cores of a module #define BASE_POLL_MDRT3E 800 // e.g., 3E cores of a module #define BASE_POLL_MDRT2E 1000 // e.g., 2E cores of a module #define BASE_POLL_NORM 1200 #define BASE_POLL_DEEP 1800 /* hold period (ms) before moving to deeper state */ #define MDRT_MODE_STAY (4000) #define PERF_MODE_STAY (10000) /* poll interval type */ enum elastic_poll { ZEROTH, LINEAR, QUADRATIC, CUBIC, }; /* state properties */ struct _stState { bool disabled; char *name; char *str; char *str_reverse; char *hexstr; char *hexstr_reverse; int poll; enum elastic_poll poll_order; int stay_count; int stay_count_update_sec; int stay_count_update_sec_prev; int spike_type; float stay_scalar; int ppw_enabled; int last_max_util; int last_poll; }; static struct _stState state_info[MAX_MODE] = { [INIT_MODE] = {.name = "Avail cpu: P/E/L",.poll = BASE_POLL_MT,.poll_order = ZEROTH}, [PERF_MODE] = {.name = "Perf:non-soc cpu",.poll = BASE_POLL_PERF,.poll_order = ZEROTH}, [MDRT2E_MODE] = {.name = "Moderate 2E",.poll = BASE_POLL_MDRT2E,.poll_order = LINEAR}, [MDRT3E_MODE] = {.name = "Moderate 3E",.poll = BASE_POLL_MDRT3E,.poll_order = LINEAR}, [MDRT4E_MODE] = {.name = "Moderate 4E",.poll = BASE_POLL_MDRT4E, .poll_order = LINEAR}, [RESP_MODE] = {.name = "Responsive 2L",.poll = BASE_POLL_RESP, .poll_order = CUBIC}, [NORM_MODE] = {.name = "Normal LP 2L",.poll = BASE_POLL_NORM, .poll_order = QUADRATIC}, [DEEP_MODE] = {.name = "Deep LP 1L",.poll = BASE_POLL_DEEP, .poll_order = CUBIC}, }; static enum state_idx cur_state = NORM_MODE; static int needs_state_reset = 1; extern int wlt_type; int state_demote = 0; extern int next_proxy_poll; extern int max_util; static void set_state_reset(void) { needs_state_reset = 1; } enum state_idx get_cur_state(void) { return cur_state; } static void set_cur_state(enum state_idx state) { cur_state = state; } static int is_state_valid(enum state_idx state) { return ((state >= INIT_MODE) && (state < MAX_MODE) && !state_info[state].disabled); } int get_poll_ms(enum state_idx state) { return state_info[state].poll; } int get_stay_count(enum state_idx state) { return (state_info[state].stay_count); } int set_stay_count(enum state_idx state, int count) { return (state_info[state].stay_count = count); } /* return 1 if stay count reaches 0 */ int do_countdown(enum state_idx state) { state_info[state].stay_count -= 1; if (state_info[state].stay_count <= 0) { state_info[state].stay_count = 0; return 1; } return 0; } /* get poll value in microsec */ int get_state_poll(int util, enum state_idx state) { int poll, scale = (100 - util); float scale2; int order = (int)state_info[state].poll_order; /* avoiding fpow() overhead */ switch (order) { case ZEROTH: scale2 = (float)1; break; case LINEAR: scale2 = (float)scale / 100; break; case QUADRATIC: scale2 = (float)scale *scale / 10000; break; case CUBIC: scale2 = (float)scale *scale * scale / 1000000; break; default: scale2 = (float)scale / 100; break; } poll = (int)(state_info[cur_state].poll * scale2); /* limiting min poll to MIN_POLL_PERIOD ms */ if (poll < MIN_POLL_PERIOD) return MIN_POLL_PERIOD; return poll; } int get_last_maxutil(void) { return state_info[cur_state].last_max_util; } static int set_last_maxutil(int v) { state_info[cur_state].last_max_util = v; return 1; } int set_last_poll(int v) { state_info[cur_state].last_poll = v; return 1; } int get_last_poll(void) { return state_info[cur_state].last_poll; } /* initiate state change */ static int apply_state_change(void) { float test; if (!needs_state_reset) { return 0; } update_perf_diffs(&test, 1); needs_state_reset = 0; return 1; } /* Internal state to WLT mapping*/ static int get_state_mapping(enum state_idx state){ switch(state) { case PERF_MODE: return WLT_BURSTY; case RESP_MODE: case NORM_MODE: return WLT_BATTERY_LIFE; case DEEP_MODE: return WLT_IDLE; case INIT_MODE: case MDRT4E_MODE: case MDRT3E_MODE: case MDRT2E_MODE: return WLT_SUSTAINED; default: return WLT_IDLE; } } /* prepare for state change */ int prep_state_change(enum state_idx from_state, enum state_idx to_state, int reset) { set_cur_state(to_state); set_state_reset(); set_last_maxutil(DEACTIVATED); if (to_state < from_state) state_demote = 1; //proxy: apply state change and get poll interval apply_state_change(); if (likely(is_state_valid(to_state))) { next_proxy_poll = get_state_poll(max_util, to_state); } wlt_type = get_state_mapping(to_state); return 1; } /* return staycount for the state */ int staytime_to_staycount(enum state_idx state) { int stay_count = 0; switch (state) { case MDRT2E_MODE: case MDRT3E_MODE: case MDRT4E_MODE: stay_count = (int)MDRT_MODE_STAY/get_poll_ms(MDRT3E_MODE); break; case PERF_MODE: stay_count = (int)PERF_MODE_STAY/get_poll_ms(PERF_MODE); break; } return stay_count; } /* cleanup */ void uninit_state_manager() { for (int idx = INIT_MODE; idx < MAX_MODE; idx++) { if(state_info[idx].str != NULL) free(state_info[idx].str); if(state_info[idx].str_reverse != NULL) free(state_info[idx].str_reverse); if(state_info[idx].hexstr != NULL) free(state_info[idx].hexstr); if(state_info[idx].hexstr_reverse != NULL) free(state_info[idx].hexstr_reverse); } } intel-lpmd-0.0.9/src/wlt_proxy/state_util.c000066400000000000000000000414371477072336600207740ustar00rootroot00000000000000/* * state_util.c: Intel Linux Energy Optimizer WLT calculations * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains the proxy workload type detection data collection and calculations [perf, sma]. */ #include //perf_event_attr #include //syscall __NR_perf_event_open #include #include //uint64_t #include //round #include "lpmd.h" #include "state_common.h" /* * simple moving average (sma), event count based - not time. * updated for upto top 3 max util streams. * exact cpu # is not tracked; only the max since continuum of task * keeps switching cpus anyway. * array implementation with SMA_LENGTH number of values. */ #define SMA_LENGTH (25) #define SMA_CPU_COUNT (3) #define SCALE_DECIMAL (100) static int sample[3][SMA_LENGTH]; enum core_type { P_CORE = 1, E_CORE = 2, L_CORE = 3 }; typedef struct { int cpu; enum core_type cpu_type; int aperf_fd; int mperf_fd; int pperf_fd; uint64_t aperf_diff; uint64_t mperf_diff; uint64_t pperf_diff; uint64_t tsc_diff; uint64_t nperf; /* * As initial freq f0 changes to some other value * in the next cycle, it influences the initial * load l0 and associated stall-factor (1-s0) * track them for perf-per-watt evaluation. */ float f0; float l0; float s0; } perf_stats_t; perf_stats_t *perf_stats; struct group_util grp; struct thread_data { unsigned long long tsc; unsigned long long aperf; unsigned long long mperf; unsigned long long pperf; } *thread_even, *thread_odd; static uint64_t *last_aperf = NULL; static uint64_t *last_mperf = NULL; static uint64_t *last_pperf = NULL; static uint64_t *last_tsc = NULL; /* * Intel Alderlake hardware errata #ADL026: pperf bits 31:64 could be incorrect. * https://edc.intel.com/content/www/us/en/design/ipla/software-development-plat * forms/client/platforms/alder-lake-desktop/682436/007/errata-details/#ADL026 * u644diff() implements a workaround. Assuming real diffs less than MAX(uint32) */ #define u64diff(b, a) (((uint64_t)b < (uint64_t)a) ? \ (uint64_t)((uint32_t)~0UL - (uint32_t)a + (uint32_t)b) :\ ((uint64_t)b - (uint64_t)a)) /* routine to evaluate & store a per-cpu msr value's diff */ #define VARI(a, b, i) a##b[i] #define cpu_generate_msr_diff(scope) \ uint64_t cpu_get_diff_##scope(uint64_t cur_value, int instance)\ { \ uint64_t diff; \ diff = (VARI(last_, scope, instance) == 0) ? \ 0 : u64diff(cur_value, VARI(last_, scope, instance)); \ VARI(last_, scope, instance) = cur_value; \ return diff; \ } /********************Perf calculation - begin *****************************************/ cpu_generate_msr_diff(aperf); cpu_generate_msr_diff(mperf); cpu_generate_msr_diff(pperf); cpu_generate_msr_diff(tsc); /* initialize perf_stat structure */ static int perf_stat_init(void) { int max_cpus = get_max_cpus(); perf_stats = NULL; perf_stats = calloc(sizeof(perf_stats_t), max_cpus); if ( !perf_stats ) { lpmd_log_error("WLT_Proxy: memory failure\n"); return 0; } for (int t = 0; t < max_cpus; t++) { if(!is_cpu_online(t)) { continue; } perf_stats[t].cpu = t; if (is_cpu_pcore(t)) { perf_stats[t].cpu_type = P_CORE; } else if (is_cpu_ecore(t)) { perf_stats[t].cpu_type = E_CORE; } else { perf_stats[t].cpu_type = L_CORE; } } return 1; } /* is cpu applicable for the given state*/ static int cpu_applicable(int cpu, enum state_idx state) { switch (state) { case INIT_MODE: //for INIT mode need all cores [P,E,L] return 1; case NORM_MODE: // 2 L cores case DEEP_MODE: // 1 L core case RESP_MODE: // all L core case MDRT2E_MODE: // 2 E cores case MDRT3E_MODE: // 3 E cores case MDRT4E_MODE: // 4 E cores case PERF_MODE: if (perf_stats[cpu].cpu_type != L_CORE) { return 1; } default: break; } return 0; } static int init_perf_calculations(int n) { if (!perf_stat_init()) { lpmd_log_error("\nerror initiating cpu proxy\n"); return -1; } last_aperf = calloc(sizeof(uint64_t), n); last_mperf = calloc(sizeof(uint64_t), n); last_pperf = calloc(sizeof(uint64_t), n); last_tsc = calloc(sizeof(uint64_t), n); if (!last_aperf || !last_mperf || !last_mperf || !last_tsc) { lpmd_log_error("calloc failure perf vars\n"); return -2; } return LPMD_SUCCESS; } /*helper - pperf reading */ static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) { int fdmt; int bytes_read; char buf[64]; int ret = -1; fdmt = open(path, O_RDONLY, 0); if (fdmt == -1) { lpmd_log_error("Failed to parse perf counter info %s\n", path); ret = -1; goto cleanup_and_exit; } bytes_read = read(fdmt, buf, sizeof(buf) - 1); if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { lpmd_log_error("Failed to parse perf counter info %s\n", path); ret = -1; goto cleanup_and_exit; } buf[bytes_read] = '\0'; if (sscanf(buf, parse_format, value_ptr) != 1) { lpmd_log_error("Failed to parse perf counter info %s\n", path); ret = -1; goto cleanup_and_exit; } ret = 0; cleanup_and_exit: if (fdmt >= 0) close(fdmt); return ret; } /*helper - pperf reading */ static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) { unsigned int v; int status; status = read_perf_counter_info(path, parse_format, &v); if (status) v = -1; return v; } /*helper - pperf reading */ static int read_pperf_config(void) { const char *const path = "/sys/bus/event_source/devices/msr/events/pperf"; const char *const format = "event=%x"; return read_perf_counter_info_n(path, format); } /*helper - pperf reading */ static unsigned int read_aperf_config(void) { const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; const char *const format = "event=%x"; return read_perf_counter_info_n(path, format); } /*helper - pperf reading */ static unsigned int read_mperf_config(void) { const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; const char *const format = "event=%x"; return read_perf_counter_info_n(path, format); } /*helper - pperf reading */ static unsigned int read_tsc_config(void) { const char *const path = "/sys/bus/event_source/devices/msr/events/tsc"; const char *const format = "event=%x"; return read_perf_counter_info_n(path, format); } /*helper - pperf reading */ static unsigned int read_msr_type(void) { const char *const path = "/sys/bus/event_source/devices/msr/type"; const char *const format = "%u"; return read_perf_counter_info_n(path, format); } /*helper - pperf reading */ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); } /*helper - pperf reading */ static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) { struct perf_event_attr attr; const pid_t pid = -1; const unsigned long flags = 0; memset(&attr, 0, sizeof(struct perf_event_attr)); attr.type = type; attr.size = sizeof(struct perf_event_attr); attr.config = config; attr.disabled = 0; attr.sample_type = PERF_SAMPLE_IDENTIFIER; attr.read_format = read_format; const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); return fd; } /*helper - pperf reading */ static void open_amperf_fd(int cpu) { const unsigned int msr_type = read_msr_type(); const unsigned int aperf_config = read_aperf_config(); const unsigned int mperf_config = read_mperf_config(); const unsigned int pperf_config = read_pperf_config(); perf_stats[cpu].aperf_fd = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); perf_stats[cpu].mperf_fd = open_perf_counter(cpu, msr_type, mperf_config, perf_stats[cpu].aperf_fd, PERF_FORMAT_GROUP); perf_stats[cpu].pperf_fd = open_perf_counter(cpu, msr_type, pperf_config, perf_stats[cpu].aperf_fd, PERF_FORMAT_GROUP); } /*helper - pperf reading */ static int get_amperf_fd(int cpu) { if (perf_stats[cpu].aperf_fd) return perf_stats[cpu].aperf_fd; open_amperf_fd(cpu); return perf_stats[cpu].aperf_fd; } /*helper - pperf reading */ static unsigned long long rdtsc(void) { unsigned int low, high; asm volatile ("rdtsc":"=a" (low), "=d"(high)); return low | ((unsigned long long)high) << 32; } /* Helper for - Reading APERF, MPERF and TSC using the perf API. Calc perf [cpu utilization per core] difference from MSR registers */ static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) { union { struct { unsigned long nr_entries; unsigned long aperf; unsigned long mperf; unsigned long pperf; }; unsigned long as_array[4]; } cnt; const int fd_amperf = get_amperf_fd(cpu); if(fd_amperf == -1) { return LPMD_ERROR; } /* * Read the TSC with rdtsc, because we want the absolute value and not * the offset from the start of the counter. */ t->tsc = rdtsc(); const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); if (n != sizeof(cnt.as_array)) return -2; t->aperf = cnt.aperf; t->mperf = cnt.mperf; t->pperf = cnt.pperf; return LPMD_SUCCESS; } /*Calc perf [cpu utilization per core] difference from MSR registers */ int update_perf_diffs(float *sum_norm_perf, int stat_init_only) { int fd, maxed_cpu = -1; float min_load = 100.0, min_s0 = 1.0, next_s0 = 1.0; float max_load = 0, max_2nd_load = 0, max_3rd_load = 0, next_load = 0; uint64_t aperf_raw, mperf_raw, pperf_raw, tsc_raw, poll_cpu_us = 0; int t, min_s0_cpu = 0, first_pass = 1; for (t = 0; t < get_max_online_cpu(); t++) { if (!cpu_applicable(t, get_cur_state())) { continue; } /*reading through perf api*/ struct thread_data tdata; if(read_aperf_mperf_tsc_perf(&tdata , t) != LPMD_SUCCESS) { lpmd_log_error("read_aperf_mperf_tsc_perf failed for cpu = %d\n", t); continue; } perf_stats[t].pperf_diff = cpu_get_diff_pperf(tdata.pperf, t); perf_stats[t].aperf_diff = cpu_get_diff_aperf(tdata.aperf, t); perf_stats[t].mperf_diff = cpu_get_diff_mperf(tdata.mperf, t); perf_stats[t].tsc_diff = cpu_get_diff_tsc(tdata.tsc, t); if (stat_init_only) continue; /* * Normalized perf metric defined as pperf per load per time. * The rationale is detailed here: * github.com/intel/psst >whitepapers >Generic_perf_per_watt.pdf * Given that delta_load = delta_mperf/delta_tsc, we can rewrite * as given below. */ if (perf_stats[t].tsc_diff) { next_load = (float)100 *perf_stats[t].mperf_diff / perf_stats[t].tsc_diff; perf_stats[t].l0 = next_load; } if (A_LTE_B(max_load, next_load)) { max_load = next_load; maxed_cpu = perf_stats[t].cpu; } else if (A_LTE_B(max_2nd_load, next_load)) { max_2nd_load = next_load; } else if (A_LTE_B(max_3rd_load, next_load)) { max_3rd_load = next_load; } /* min scalability */ if (perf_stats[t].aperf_diff) { next_s0 = (float)perf_stats[t].pperf_diff / perf_stats[t].aperf_diff; /* since aperf/pperf are not read oneshot, ratio > 1 is not ruled out */ next_s0 = (next_s0 >= 1) ? (1 - EPSILON) : next_s0; } if (A_LTE_B(next_s0, min_s0) || first_pass) { min_s0 = next_s0; min_s0_cpu = perf_stats[t].cpu; } if (A_GT_B(min_load, next_load)) { min_load = next_load; } first_pass = 0; } if (stat_init_only) return 0; grp.worst_stall = min_s0; grp.worst_stall_cpu = min_s0_cpu; grp.c0_max = max_load; grp.c0_2nd_max = max_2nd_load; grp.c0_3rd_max = max_3rd_load; grp.c0_min = min_load; return maxed_cpu; } /* close perf fd's */ static void close_amperf_fd(int cpu) { if(perf_stats[cpu].aperf_fd) close(perf_stats[cpu].aperf_fd); if(perf_stats[cpu].mperf_fd) close(perf_stats[cpu].mperf_fd); if(perf_stats[cpu].pperf_fd) close(perf_stats[cpu].pperf_fd); } /* cleanup perf_stat structure */ static void perf_stat_uninit() { int max_cpus = get_max_cpus(); if (perf_stats) { for (size_t i = 0; i < max_cpus; ++i) { close_amperf_fd(i); memset( &perf_stats[i], 0, sizeof(perf_stats_t)); } free(perf_stats); } } static void uninit_perf_calculations() { perf_stat_uninit(); if (last_aperf) free(last_aperf); if (last_mperf) free(last_mperf); if (last_pperf) free(last_pperf); if (last_tsc) free(last_tsc); } /********************perf calculation - end *****************************************/ /********************SMA calculation - begin *****************************************/ /* initialize avg calculation variables */ static void init_sma_calculations() { for (int i = 0; i < SMA_CPU_COUNT; i++) { grp.sma_sum[i] = -1; for (int j = 0; j < SMA_LENGTH; j++) sample[i][j] = 0; } grp.sma_pos = -1; } /* Helper avg calculation */ static int do_sum(int *sam, int len) { int sum = 0; for (int i = 0; i < len; i++) sum += sam[i]; return sum; } /* average cpu usage */ int state_max_avg() { grp.sma_pos += 1; int v1 = (int)round(grp.c0_max * SCALE_DECIMAL); int v2 = (int)round(grp.c0_2nd_max * SCALE_DECIMAL); int v3 = (int)round(grp.c0_3rd_max * SCALE_DECIMAL); if (grp.sma_pos == SMA_LENGTH) grp.sma_pos = 0; if (grp.sma_sum[0] == -1) { sample[0][grp.sma_pos] = v1; sample[1][grp.sma_pos] = v2; sample[2][grp.sma_pos] = v3; if (grp.sma_pos == SMA_LENGTH - 1) { grp.sma_sum[0] = do_sum(sample[0], SMA_LENGTH); grp.sma_sum[1] = do_sum(sample[1], SMA_LENGTH); grp.sma_sum[2] = do_sum(sample[2], SMA_LENGTH); } } else { grp.sma_sum[0] = grp.sma_sum[0] - sample[0][grp.sma_pos] + v1; grp.sma_sum[1] = grp.sma_sum[1] - sample[1][grp.sma_pos] + v2; grp.sma_sum[2] = grp.sma_sum[2] - sample[2][grp.sma_pos] + v3; sample[0][grp.sma_pos] = v1; sample[1][grp.sma_pos] = v2; sample[2][grp.sma_pos] = v3; } grp.sma_avg1 = (int)round((double)grp.sma_sum[0] / (double)(SMA_LENGTH * SCALE_DECIMAL)); grp.sma_avg2 = (int)round((double)grp.sma_sum[1] / (double)(SMA_LENGTH * SCALE_DECIMAL)); grp.sma_avg3 = (int)round((double)grp.sma_sum[2] / (double)(SMA_LENGTH * SCALE_DECIMAL)); return 1; } /********************SMA calculation - end *****************************************/ /* return multi threaded false if at least one cpu is under utilizied */ int max_mt_detected(enum state_idx state) { for (int t = 0; t < get_max_online_cpu(); t++) { if (!cpu_applicable(t, state)) continue; if A_LTE_B (perf_stats[t].l0, (UTIL_LOW)) return 0; } return 1; } /* initialize */ int util_init_proxy(void) { float dummy; if(init_perf_calculations(get_max_online_cpu()) < 0) { lpmd_log_error("WLT_Proxy: error initializing perf calculations"); return LPMD_ERROR; } update_perf_diffs(&dummy, 1); init_sma_calculations(); return LPMD_SUCCESS; } /* cleanup */ void util_uninit_proxy(void) { uninit_perf_calculations(); uninit_state_manager(); } intel-lpmd-0.0.9/src/wlt_proxy/wlt_proxy.c000066400000000000000000000031051477072336600206540ustar00rootroot00000000000000/* * wlt_proxy.c: Intel Linux Energy Optimizer WLT proxy * * Copyright (C) 2024 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This file contains the Workload type detection proxy entry and callback functions. */ #include "lpmd.h" //wlt_type #include "state_common.h" /* wlt_proxy polling interval - updated at every state change */ int next_proxy_poll = 1000; /* wlt_proxy hint - updated at every state change */ int wlt_type = WLT_IDLE; /* called at the configured interval to take action; return next interval and workload type*/ int read_wlt_proxy(int *interval) { state_machine_auto(); *interval = next_proxy_poll; return wlt_type; } /* Returns success if proxy supported on platform */ int wlt_proxy_init() { return util_init_proxy(); } /* make sure all resource are properly released and closed */ void wlt_proxy_uninit(void) { util_uninit_proxy(); } intel-lpmd-0.0.9/tests/000077500000000000000000000000001477072336600147465ustar00rootroot00000000000000intel-lpmd-0.0.9/tests/lpm_test_interface.sh000077500000000000000000000034771477072336600211670ustar00rootroot00000000000000#!/bin/bash if [ "$(whoami)" != "root" ] then echo "This script must be run as root" exit 1 fi opt_no= if [ ! -z "$1" ] then opt_no=$1 fi while true; do if [ -z "$1" ] then echo "****options****" echo "0 : Allow All CPUs" echo "1 : Terminate" echo "2 : LPM force on" echo "3 : LPM force off" echo "4 : LPM auto" echo "5 : SUV_MODE Enter" echo "6 : SUV_MODE Exit" echo "7 : Quit" echo -n " Enter choice: " read opt_no fi case $opt_no in 0) echo "0 : Allow All CPUs" echo -n " Enter Maximum CPU number" read max_cpu sudo systemctl set-property --runtime user.slice AllowedCPUs=0-$max_cpu sudo systemctl set-property --runtime system.slice AllowedCPUs=0-$max_cpu ;; 1) echo "1 : Terminate" dbus-send --system --dest=org.freedesktop.intel_lpmd --print-reply /org/freedesktop/intel_lpmd org.freedesktop.intel_lpmd.Terminate ;; 2) echo "2 : LPM force on" dbus-send --system --dest=org.freedesktop.intel_lpmd --print-reply /org/freedesktop/intel_lpmd org.freedesktop.intel_lpmd.LPM_FORCE_ON ;; 3) echo "3 : LPM force off" dbus-send --system --dest=org.freedesktop.intel_lpmd --print-reply /org/freedesktop/intel_lpmd org.freedesktop.intel_lpmd.LPM_FORCE_OFF ;; 4) echo "4 : LPM auto" dbus-send --system --dest=org.freedesktop.intel_lpmd --print-reply /org/freedesktop/intel_lpmd org.freedesktop.intel_lpmd.LPM_AUTO ;; 5) echo "5 : SUV_MODE Enter" dbus-send --system --dest=org.freedesktop.intel_lpmd --print-reply /org/freedesktop/intel_lpmd org.freedesktop.intel_lpmd.SUV_MODE_ENTER ;; 6) echo "6 : SUV_MODE Exit" dbus-send --system --dest=org.freedesktop.intel_lpmd --print-reply /org/freedesktop/intel_lpmd org.freedesktop.intel_lpmd.SUV_MODE_EXIT ;; 7) exit 0 ;; *) echo "7 : Quit" echo "Invalid option" esac [ ! -z "$1" ] && break done intel-lpmd-0.0.9/tools/000077500000000000000000000000001477072336600147445ustar00rootroot00000000000000intel-lpmd-0.0.9/tools/Makefile.am000077500000000000000000000002171477072336600170030ustar00rootroot00000000000000## Process this file with automake to generate Makefile.in AM_CFLAGS = $(GLIB_CFLAGS) LDADD = $(GLIB_LIBS) bin_PROGRAMS = intel_lpmd_control intel-lpmd-0.0.9/tools/intel_lpmd_control.c000066400000000000000000000050071477072336600210010ustar00rootroot00000000000000/* * intel_lpmd_control.c: Intel Low Power Daemon control utility * * Copyright (C) 2023 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * This program can be used to control modes of Low power mode daemon */ #include #include #include #include #include #include #include #include #include #define INTEL_LPMD_SERVICE_NAME "org.freedesktop.intel_lpmd" #define INTEL_LPMD_SERVICE_OBJECT_PATH "/org/freedesktop/intel_lpmd" #define INTEL_LPMD_SERVICE_INTERFACE "org.freedesktop.intel_lpmd" int main(int argc, char **argv) { g_autoptr(GDBusConnection) connection = NULL; g_autoptr (GString) command = NULL; GError *error = NULL; if (geteuid () != 0) { g_warning ("Must run as root"); exit (1); } if (argc < 2) { fprintf (stderr, "intel_lpmd_control: missing control command\n"); fprintf (stderr, "syntax:\n"); fprintf (stderr, "intel_lpmd_control ON|OFF|AUTO\n"); exit (0); } if (!strncmp (argv[1], "ON", 2)) command = g_string_new ("LPM_FORCE_ON"); else if (!strncmp (argv[1], "OFF", 3)) command = g_string_new ("LPM_FORCE_OFF"); else if (!strncmp (argv[1], "AUTO", 4)) command = g_string_new ("LPM_AUTO"); else { g_warning ("intel_lpmd_control: Invalid command"); exit (1); } connection = g_bus_get_sync(G_BUS_TYPE_SYSTEM, NULL, &error); if (connection == NULL) return FALSE; g_dbus_connection_call_sync (connection, INTEL_LPMD_SERVICE_NAME, INTEL_LPMD_SERVICE_OBJECT_PATH, INTEL_LPMD_SERVICE_INTERFACE, command->str, NULL, NULL, G_DBUS_CALL_FLAGS_NONE, -1, NULL, &error); if (error != NULL) { g_warning ("Fail on connecting lpmd: %s", error->message); exit (1); } return 0; }