本文为阅读 NewRelic Python agent 源代码时的思考,主要讨论如何在不修改用户代码的前提下在 Python 中实现模块级别的 hook。使用到的代码版本为 4.20.1.121
newrelic-admin
调用的是位于 newrelic/admin/__init__.py
中的 main
函数,不过在这之前有两个 module 中执行的函数 load_internal_plugins
和 load_external_plugins
# newrelic/admin/__init__.py
_builtin_plugins = [
# ...
'run_program',
'run_python',
]
def load_internal_plugins():
for name in _builtin_plugins:
module_name = '%s.%s' % (__name__, name)
__import__(module_name)
def load_external_plugins():
try:
import pkg_resources
except ImportError:
return
group = 'newrelic.admin'
for entrypoint in pkg_resources.iter_entry_points(group=group):
__import__(entrypoint.module_name)
load_internal_plugins()
load_external_plugins()
它们分别用于加载内置和外部的插件,比如 run_program
。这些 plugin 可以借助 newrelic.admin.command
装饰器注册自己的命令行参数
当我们使用 run-program
去执行用户程序的时候,agent 会修改掉 PYTHONPATH
# newrelic/admin/run_program.py
@command('run-program', '...', 'Executes the command ...')
def run_program(args):
# 仅摘取关键代码
from newrelic import __file__ as root_directory
root_directory = os.path.dirname(root_directory)
boot_directory = os.path.join(root_directory, 'bootstrap')
python_path = boot_directory
if 'PYTHONPATH' in os.environ:
path = os.environ['PYTHONPATH'].split(os.path.pathsep)
if boot_directory not in path:
python_path = "%s%s%s" % (boot_directory, os.path.pathsep,
os.environ['PYTHONPATH'])
os.environ['PYTHONPATH'] = python_path
newrelic 安装路径下的 bootstrap 目录会被添加到 PYTHONPATH
的最前面,这样便可以通过最高优先级来导入 newrelic
的 module
然后 newrelic 做完这些准备工作后,就去执行用户的命令了(环境变量是会被继承的参考之前写过的文章)
# newrelic/admin/run_program.py
@command('run-program', '...', 'Executes the command ...')
def run_program(args):
# 仅摘取关键代码
program_exe_path = args[0] # args 为用户命令,比如 python app.py
# 根据 PATH 寻找 bin 文件的完整路径
if not os.path.dirname(program_exe_path):
program_search_path = os.environ.get('PATH', '').split(os.path.pathsep)
for path in program_search_path:
path = os.path.join(path, program_exe_path)
if os.path.exists(path) and os.access(path, os.X_OK):
program_exe_path = path
break
log_message('program_exe_path = %r', program_exe_path)
log_message('execl_arguments = %r', [program_exe_path] + args)
# 执行
os.execl(program_exe_path, *args)
我们回头来看 bootstrap 目录下面有什么
bootstrap
├── __init__.py
└── sitecustomize.py
解释器在初始化的时候会自动导入 PYTHONPATH
下存在的 sitecustomize
和 usercustomize
(sitecustomize
优先级更高)。这样 newrelic 便可以在应用本身中做手脚了
首先 newrelic 会去尝试导入原本的 sitecustomize
,因为担心会再次导入自身(因为 bootstrap 路径在最前面)而且 import system 自身可能会存在缓存,所以这里在从 sys.path
中删除 bootstrap 路径后使用 imp
去导入而不是直接 import
# newrelic/bootstrap/sitecustomize.py
import imp
# We need to import the original sitecustomize.py file if it exists. We
# can't just try and import the existing one as we will pick up
# ourselves again. Even if we remove ourselves from sys.modules and
# remove the bootstrap directory from sys.path, still not sure that the
# import system will not have cached something and return a reference to
# ourselves rather than searching again. What we therefore do is use the
# imp module to find the module, excluding the bootstrap directory from
# the search, and then load what was found.
boot_directory = os.path.dirname(__file__)
root_directory = os.path.dirname(os.path.dirname(boot_directory))
path = list(sys.path)
if boot_directory in path:
del path[path.index(boot_directory)]
try:
(file, pathname, description) = imp.find_module('sitecustomize', path)
except ImportError:
pass
else:
imp.load_module('sitecustomize', file, pathname, description)
然后 newrelic agent 进行初始化
# newrelic/bootstrap/sitecustomize.py
import newrelic.config
newrelic.config.initialize(config_file, environment)
# newrelic/config.py
def initialize(config_file=None, environment=None, ignore_errors=None,
log_file=None, log_level=None):
if config_file is None:
config_file = os.environ.get('NEW_RELIC_CONFIG_FILE', None)
if environment is None:
environment = os.environ.get('NEW_RELIC_ENVIRONMENT', None)
if ignore_errors is None:
ignore_errors = newrelic.core.config._environ_as_bool(
'NEW_RELIC_IGNORE_STARTUP_ERRORS', True)
_load_configuration(config_file, environment, ignore_errors,
log_file, log_level)
if _settings.monitor_mode or _settings.developer_mode:
_settings.enabled = True
_setup_instrumentation()
_setup_data_source()
_setup_extensions()
_setup_agent_console()
else:
_settings.enabled = False
在 sys.meta_path
中添加了自定义的 finder,实现了对 import 行为的拦截(from xx import yy
也会触发 xx
的 finder),参考文档 sys.meta_path
和 PEP-302
sys.meta_path.insert(0, newrelic.api.import_hook.ImportHookFinder())
finder 和 loader 的实现
def _notify_import_hooks(name, module):
# Is assumed that this function is called with the global
# import lock held. This should be the case as should only
# be called from load_module() of the import hook loader.
hooks = _import_hooks.get(name, None)
if hooks is not None:
_import_hooks[name] = None
for callable in hooks:
callable(module)
class _ImportHookChainedLoader:
def __init__(self, loader):
self.loader = loader
def load_module(self, fullname):
module = self.loader.load_module(fullname)
# Call the import hooks on the module being handled.
_notify_import_hooks(fullname, module)
return module
class ImportHookFinder:
def __init__(self):
self._skip = {}
def find_module(self, fullname, path=None):
# If not something we are interested in we can return.
if fullname not in _import_hooks:
return None
# Check whether this is being called on the second time
# through and return.
if fullname in self._skip:
return None
# We are now going to call back into import. We set a
# flag to see we are handling the module so that check
# above drops out on subsequent pass and we don't go
# into an infinite loop.
self._skip[fullname] = True
try:
loader = find_loader(fullname, path) # importlib.find_loader
if loader:
return _ImportHookChainedLoader(loader)
finally:
del self._skip[fullname]
在 Python 3.4 后我们应当在 finder 对象中实现 find_spec
方法,find_module
只是作为 find_spec
不存在时的 callback。这里同时兼容了 Python 2 和 3
register_import_hook
函数负责将 hook 注册到 _import_hooks
中,那么什么时候注册进来的呢,让我们回到 initialize
中
_load_configuration
中加载了用户自定义的 trace hook_setup_instrumentation
中加载了内置的 trace hook,其中又分为多个维度- …
这里选择其中一部分来举例子
def _setup_instrumentation():
# ...
_process_module_builtin_defaults()
def _process_module_builtin_defaults():
# ...
_process_module_definition('flask.app',
'newrelic.hooks.framework_flask',
'instrument_flask_app')
_process_module_definition('flask.templating',
'newrelic.hooks.framework_flask',
'instrument_flask_templating')
_process_module_definition('flask.blueprints',
'newrelic.hooks.framework_flask',
'instrument_flask_blueprints')
_process_module_definition('flask.views',
'newrelic.hooks.framework_flask',
'instrument_flask_views')
所有的 hook 都放在 newrelic/hooks 目录下面
# newrelic/hooks/framework_flask.py
def instrument_flask_app(module):
# ...
wrap_function_wrapper(module, 'Flask.handle_http_exception',
_nr_wrapper_Flask_handle_http_exception_)
使用 _nr_wrapper_Flask_handle_http_exception_
去装饰原生的函数,然后记录数据上传到 Server 端
Summary#
总结一下:
- 修改
PYTHONPATH
,利用sitecustomize
导入自己的 hook sys.meta_path
添加自己的 finder- 装饰原生函数
- 可能还有其他的魔法?
根据 newrelic agent 的现有代码量来说,如果要造一个推送应用 metrics 数据到 push gateway,然后由 Prometheus 做 APM 的轮子好像不如直接改代码来的方便一些