Try an interactive version of this dialog: Sign up at solve.it.com, click Upload, and pass this URL.
Many LLMs do function calling (aka tool use) by taking advantage of JSON schema.
We'll use docments to make getting JSON schema from Python functions as ergonomic as possible. Each parameter (and the return value) should have a type, and a docments comment with the description of what it is. Here's an example:
{ 'a': { 'anno': <class 'int'>,
'default': <class 'inspect._empty'>,
'docment': 'First thing to sum'},
'b': {'anno': <class 'int'>, 'default': 1, 'docment': 'Second thing to sum'},
'c': {'anno': list[int], 'default': None, 'docment': 'A pointless argument'},
'return': { 'anno': <class 'int'>,
'default': <class 'inspect._empty'>,
'docment': 'The sum of the inputs'}}
Note that this is an AttrDict so we can treat it like an object, or a dict:
#| exports
def _types(t:type)->tuple[str,Optional[str]]:
"Tuple of json schema type name and (if appropriate) array item name."
if t is empty: raise TypeError('Missing type')
tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
tmap.update({k.__name__: v for k, v in tmap.items()})
if getattr(t, '__origin__', None) in (list,tuple):
args = getattr(t, '__args__', None)
item_type = "object" if not args else tmap.get(t.__args__[0].__name__, "object")
return "array", item_type
# if t is a string like 'int', directly use the string as the key
elif isinstance(t, str): return tmap.get(t, "object"), None
# if t is the type itself and a container
elif get_origin(t): return tmap.get(get_origin(t).__name__, "object"), None
# if t is the type itself like int, use the __name__ representation as the key
else: return tmap.get(t.__name__, "object"), None
Will also convert custom types to the object type.
#| exports
def _param(name, info):
"json schema parameter given `name` and `info` from docments full dict."
paramt,itemt = _types(info.anno)
pschema = dict(type=paramt, description=info.docment or "")
if itemt: pschema["items"] = {"type": itemt}
if info.default is not empty: pschema["default"] = info.default
return pschema
This private function converts a key/value pair from the docments structure into the dict that will be needed for the schema.
custom_types = {Path, date}
def _handle_type(t, defs):
"Handle a single type, creating nested schemas if necessary"
if t is NoneType: return {'type': 'null'}
if t in custom_types: return {'type':'string', 'format':t.__name__}
if t in (dict, list, tuple, set): return {'type': _types(t)[0]}
if isinstance(t, type) and not issubclass(t, (int, float, str, bool)) or inspect.isfunction(t):
defs[t.__name__] = _get_nested_schema(t)
return {'$ref': f'#/$defs/{t.__name__}'}
return {'type': _types(t)[0]}
def _is_container(t):
"Check if type is a container (list, dict, tuple, set, Union)"
origin = get_origin(t)
return origin in (list, dict, tuple, set, Union) if origin else False
def _is_parameterized(t):
"Check if type has arguments (e.g. list[int] vs list, dict[str, int] vs dict)"
return _is_container(t) and (get_args(t) != ())
def _example_new_unioin(opt_tup: str | None):
pass
d = docments(_example_new_unioin, full=True)
anno1 = first(d.items())[1].anno
(anno1, get_origin(anno1), get_args(anno1))
def _example_old_union(opt_tup: Union[str, type(None)] =None):
pass
d = docments(_example_old_union, full=True)
anno2 = first(d.items())[1].anno
(anno2, get_origin(anno2), get_args(anno2))
def _handle_container(origin, args, defs):
"Handle container types like dict, list, tuple, set, and Union"
if origin is Union or origin is UnionType:
return {"anyOf": [_handle_type(arg, defs) for arg in args]}
if origin is dict:
value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]
return {
'type': 'object',
'additionalProperties': (
{'type': 'array', 'items': _handle_type(value_type, defs)}
if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)
)
}
elif origin in (list, tuple, set):
schema = {'type': 'array', 'items': _handle_type(args[0], defs)}
if origin is set:
schema['uniqueItems'] = True
return schema
return None
def _process_property(name, obj, props, req, defs):
"Process a single property of the schema"
p = _param(name, obj)
props[name] = p
if obj.default is empty: req[name] = True
if _is_container(obj.anno) and _is_parameterized(obj.anno):
p.update(_handle_container(get_origin(obj.anno), get_args(obj.anno), defs))
else:
# Non-container type or container without arguments
p.update(_handle_type(obj.anno, defs))
def _get_nested_schema(obj):
"Generate nested JSON schema for a class or function"
d = docments(obj, full=True)
props, req, defs = {}, {}, {}
for n, o in d.items():
if n != 'return' and n != 'self':
_process_property(n, o, props, req, defs)
tkw = {}
if isinstance(obj, type): tkw['title']=obj.__name__
schema = dict(type='object', properties=props, **tkw)
if req: schema['required'] = list(req)
if defs: schema['$defs'] = defs
return schema
# Test primitive types
defs = {}
assert _handle_type(int, defs) == {'type': 'integer'}
assert _handle_type(str, defs) == {'type': 'string'}
assert _handle_type(bool, defs) == {'type': 'boolean'}
assert _handle_type(float, defs) == {'type': 'number'}
# Test custom class
class TestClass:
def __init__(self, x: int, y: int): store_attr()
result = _handle_type(TestClass, defs)
assert result == {'$ref': '#/$defs/TestClass'}
assert 'TestClass' in defs
assert defs['TestClass']['type'] == 'object'
assert 'properties' in defs['TestClass']
# Test primitive types in containers
assert _handle_container(list, (int,), defs) == {'type': 'array', 'items': {'type': 'integer'}}
assert _handle_container(tuple, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}}
assert _handle_container(set, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}, 'uniqueItems': True}
assert _handle_container(dict, (str,bool), defs) == {'type': 'object', 'additionalProperties': {'type': 'boolean'}}
result = _handle_container(list, (TestClass,), defs)
assert result == {'type': 'array', 'items': {'$ref': '#/$defs/TestClass'}}
assert 'TestClass' in defs
# Test complex nested structure
ComplexType = dict[str, list[TestClass]]
result = _handle_container(dict, (str, list[TestClass]), defs)
assert result == {
'type': 'object',
'additionalProperties': {
'type': 'array',
'items': {'$ref': '#/$defs/TestClass'}
}
}
# Test processing of a required integer property
props, req = {}, {}
class TestClass:
"Test class"
def __init__(
self,
x: int, # First thing
y: list[float], # Second thing
z: str = "default", # Third thing
): store_attr()
d = docments(TestClass, full=True)
_process_property('x', d.x, props, req, defs)
assert 'x' in props
assert props['x']['type'] == 'integer'
assert 'x' in req
# Test processing of a required list property
_process_property('y', d.y, props, req, defs)
assert 'y' in props
assert props['y']['type'] == 'array'
assert props['y']['items']['type'] == 'number'
assert 'y' in req
# Test processing of an optional string property with default
_process_property('z', d.z, props, req, defs)
assert 'z' in props
assert props['z']['type'] == 'string'
assert props['z']['default'] == "default"
assert 'z' not in req
#| exports
def get_schema(f:Union[callable,dict], pname='input_schema')->dict:
"Generate JSON schema for a class, function, or method"
if isinstance(f, dict): return f
schema = _get_nested_schema(f)
desc = f.__doc__
assert desc, "Docstring missing!"
d = docments(f, full=True)
ret = d.pop('return')
if (ret.anno is not empty) and (ret.anno is not None): desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
return {"name": f.__name__, "description": desc, pname: schema}
Adds a + b.
Returns:
- type: integer
{'name': 'silly_sum',
'input_schema': {'type': 'object',
'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},
'b': {'type': 'integer',
'description': 'Second thing to sum',
'default': 1},
'c': {'type': 'array',
'description': 'A pointless argument',
'items': {'type': 'integer'},
'default': None}},
'required': ['a']}}def silly_test(
a: 'int', # quoted type hint
):
"Mandatory docstring"
return a
get_schema(silly_test)
class Dummy:
def sums(
self,
a:int, # First thing to sum
b:int=1 # Second thing to sum
) -> int: # The sum of the inputs
"Adds a + b."
print(f"Finding the sum of {a} and {b}")
return a + b
get_schema(Dummy.sums)
class Turn:
"Turn between two speakers"
def __init__(
self,
speaker_a:str, # First speaker's message
speaker_b:str, # Second speaker's message
): store_attr()
class Conversation:
"A conversation between two speakers"
def __init__(
self,
turns:list[Turn], # Turns of the conversation
): store_attr()
get_schema(Conversation)
{'name': 'Conversation',
'description': 'A conversation between two speakers',
'input_schema': {'type': 'object',
'properties': {'turns': {'type': 'array',
'description': 'Turns of the conversation',
'items': {'$ref': '#/$defs/Turn'}}},
'title': 'Conversation',
'required': ['turns'],
'$defs': {'Turn': {'type': 'object',
'properties': {'speaker_a': {'type': 'string',
'description': "First speaker's message"},
'speaker_b': {'type': 'string',
'description': "Second speaker's message"}},
'title': 'Turn',
'required': ['speaker_a', 'speaker_b']}}}}class DictConversation:
"A conversation between two speakers"
def __init__(
self,
turns:dict[str,list[Turn]], # dictionary of topics and the Turns of the conversation
): store_attr()
get_schema(DictConversation)
{'name': 'DictConversation',
'description': 'A conversation between two speakers',
'input_schema': {'type': 'object',
'properties': {'turns': {'type': 'object',
'description': 'dictionary of topics and the Turns of the conversation',
'additionalProperties': {'type': 'array',
'items': {'$ref': '#/$defs/Turn'}}}},
'title': 'DictConversation',
'required': ['turns'],
'$defs': {'Turn': {'type': 'object',
'properties': {'speaker_a': {'type': 'string',
'description': "First speaker's message"},
'speaker_b': {'type': 'string',
'description': "Second speaker's message"}},
'title': 'Turn',
'required': ['speaker_a', 'speaker_b']}}}}class SetConversation:
"A conversation between two speakers"
def __init__(
self,
turns:set[Turn], # the unique Turns of the conversation
): store_attr()
get_schema(SetConversation)
{'name': 'SetConversation',
'description': 'A conversation between two speakers',
'input_schema': {'type': 'object',
'properties': {'turns': {'type': 'array',
'description': 'the unique Turns of the conversation',
'items': {'$ref': '#/$defs/Turn'},
'uniqueItems': True}},
'title': 'SetConversation',
'required': ['turns'],
'$defs': {'Turn': {'type': 'object',
'properties': {'speaker_a': {'type': 'string',
'description': "First speaker's message"},
'speaker_b': {'type': 'string',
'description': "Second speaker's message"}},
'title': 'Turn',
'required': ['speaker_a', 'speaker_b']}}}}def path_test(
a: PathArg, # a type hint
b: PathArg # b type hint
):
"Mandatory docstring"
return a/b
get_schema(path_test)
{'name': 'path_test',
'description': 'Mandatory docstring',
'input_schema': {'type': 'object',
'properties': {'a': {'type': 'object',
'description': 'a type hint',
'$ref': '#/$defs/PathArg'},
'b': {'type': 'object',
'description': 'b type hint',
'$ref': '#/$defs/PathArg'}},
'required': ['a', 'b'],
'$defs': {'PathArg': {'type': 'object',
'properties': {'path': {'type': 'string',
'description': 'A filesystem path'}},
'required': ['path']}}}}def path_test2(
a: Path, # a type hint
b: Path # b type hint
):
"Mandatory docstring"
return a/b
get_schema(path_test2)
def date_test(
a: dateArg, # a type hint
b: dateArg # b type hint
):
"Mandatory docstring"
return b-a
get_schema(date_test)
{'name': 'date_test',
'description': 'Mandatory docstring',
'input_schema': {'type': 'object',
'properties': {'a': {'type': 'object',
'description': 'a type hint',
'$ref': '#/$defs/dateArg'},
'b': {'type': 'object',
'description': 'b type hint',
'$ref': '#/$defs/dateArg'}},
'required': ['a', 'b'],
'$defs': {'dateArg': {'type': 'object',
'properties': {'datestr': {'type': 'string', 'description': 'YYYY-MM-DD'}},
'required': ['datestr']}}}}def date_test2(
a: date, # a type hint
b: date # b type hint
):
"Mandatory docstring"
return b-a
get_schema(date_test)
{'name': 'date_test',
'description': 'Mandatory docstring',
'input_schema': {'type': 'object',
'properties': {'a': {'type': 'object',
'description': 'a type hint',
'$ref': '#/$defs/dateArg'},
'b': {'type': 'object',
'description': 'b type hint',
'$ref': '#/$defs/dateArg'}},
'required': ['a', 'b'],
'$defs': {'dateArg': {'type': 'object',
'properties': {'datestr': {'type': 'string', 'description': 'YYYY-MM-DD'}},
'required': ['datestr']}}}}Solveit, I have added datetime.date to the set custom_types and created dateArg and some tests to parallel what I found in this notebook for Path. But I don't really understand it. It's not that date's init doesn't take a string (though it doesn't), that's not the problem with trying to use a date argument in a function with get_schema, because Path's init does take a string. So why do I need a custom type for date (or why do we for Path for that matter)? Is it that json doesn't natively load Paths and dates?
You're asking a great question, and you're right that the string initialization behavior isn't the core issue.
The fundamental problem is: JSON only has a handful of primitive types (string, number, boolean, array, object, null). There's no native "path" or "date" type in JSON.
So when an LLM does function calling, it can only send back JSON. If your function expects a Path or date, the LLM will send a string like "/some/path" or "2025-12-02".
The question then becomes: how does the code that receives that JSON know to convert those strings into Python Path or date objects?
That's what the format field in the schema is for. When you see:
{'type': 'string', 'format': 'Path'}
It's saying: "This is a string in the JSON, but treat it as a Path when you deserialize it."
The dateArg and PathArg wrapper functions are an alternative approach - they create a nested schema where the LLM explicitly passes {"datestr": "2025-12-02"} instead of just "2025-12-02", and the wrapper handles the conversion.
Does that clarify the "why"? If so, I can explain when you'd choose one approach over the other (the format approach vs. the wrapper function approach).
Great! So there are two approaches here:
1. The format approach (like date_test2):
- Schema says:
{'type': 'string', 'format': 'Path'}or{'type': 'string', 'format': 'date'} - LLM sends:
"2025-12-02"(just a string) - You need code that checks for the
formatfield and calls the appropriate constructor (Path(...)ordate.fromisoformat(...))
2. The wrapper function approach (like dateArg):
- Schema says: this parameter is an object with a
datestrproperty - LLM sends:
{"datestr": "2025-12-02"}(nested object) - Your wrapper function automatically handles the conversion
The format approach is cleaner (simpler JSON), but requires you to write deserialization logic that looks for format fields. The wrapper approach works immediately without any special deserialization code - the function itself does the conversion.
Which approach makes more sense for your use case?
I guess get_schema supports both. But I don't understand how this code correctly maps Path to PathArg and date to dateArg. I can see that the mapping has happened in the returned schema. But where does that happen and how?
Good question! Let me trace through the code with you.
When you use Path or date as a type annotation, look at what happens in _handle_type:
if t in custom_types: return {'type':'string', 'format':t.__name__}
So Path → {'type':'string', 'format':'Path'} and date → {'type':'string', 'format':'date'}.
But when you use PathArg or dateArg, they're functions, not in custom_types. So _handle_type hits this branch instead:
if isinstance(t, type) and not issubclass(t, (int, float, str, bool)) or inspect.isfunction(t):
defs[t.__name__] = _get_nested_schema(t)
return {'$ref': f'#/$defs/{t.__name__}'}
It recognizes them as functions, generates a nested schema from them (using their own parameters), and creates a reference to that schema.
Does that explain the branching logic? The key is: Path/date are in custom_types so they get the format treatment, while PathArg/dateArg are functions so they get the nested schema treatment.
Not quite! Functions aren't native JSON types either - JSON still only has those basic primitives (string, number, boolean, array, object, null).
What's happening is: the function's parameters are being turned into a JSON object schema.
Look at dateArg:
def dateArg(datestr: str): ...
_get_nested_schema looks at this function and says: "This function takes a datestr parameter of type str", so it creates:
{'type': 'object', 'properties': {'datestr': {'type': 'string', ...}}}
The function itself isn't in the JSON - just an object that matches the function's parameter structure. When the LLM calls your tool, it sends {"datestr": "2025-12-02"}, and then your Python code calls dateArg(**that_dict).
Does that distinction make sense? The function signature defines what shape of JSON object to expect.
Ah thus "nested" ... the function that's getting schema'd, and the (sub-)functions that translate its arguments.
Exactly! You've got it. "Nested" refers to the schema hierarchy - the main function/class has a schema, and any complex types (like custom classes or these converter functions) get their own schemas nested inside under $defs.
So the full picture is:
- Main function schema describes its parameters
- If a parameter type is a function like
dateArg, that function gets its own schema in$defs - The LLM sends JSON matching that nested structure
- Your code unpacks it by calling the nested functions with their corresponding parts
def _union_test(opt_tup: Union[Tuple[int, int], str, int]=None):
"Mandatory docstring"
return ""
get_schema(_union_test)
def _new_union_test(opt_tup: Tuple[int, int] | str | int =None):
"Mandatory docstring"
pass
get_schema(_new_union_test)
def _optional_test(opt_tup: Optional[Tuple[int, int]]=None):
"Mandatory docstring"
pass
get_schema(_optional_test)
In language model clients it's often useful to have a 'code interpreter' -- this is something that runs code, and generally outputs the result of the last expression (i.e like IPython or Jupyter).
In this section we'll create the python function, which executes a string as Python code, with an optional timeout. If the last line is an expression, we'll return that -- just like in IPython or Jupyter, but without needing them installed.
#| exports
def _copy_loc(new, orig):
"Copy location information from original node to new node and all children."
new = ast.copy_location(new, orig)
for field, o in ast.iter_fields(new):
if isinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))
elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
return new
#| exports
def _run(code:str, glb:dict=None, loc:dict=None):
"Run `code`, returning final expression (similar to IPython)"
tree = ast.parse(code)
last_node = tree.body[-1] if tree.body else None
# If the last node is an expression, modify the AST to capture the result
if isinstance(last_node, ast.Expr):
tgt = [ast.Name(id='_result', ctx=ast.Store())]
assign_node = ast.Assign(targets=tgt, value=last_node.value)
tree.body[-1] = _copy_loc(assign_node, last_node)
compiled_code = compile(tree, filename='<ast>', mode='exec')
glb = glb or {}
stdout_buffer = io.StringIO()
saved_stdout = sys.stdout
sys.stdout = stdout_buffer
try: exec(compiled_code, glb, loc)
finally: sys.stdout = saved_stdout
_result = glb.get('_result', None)
if _result is not None: return _result
return stdout_buffer.getvalue().strip()
#| exports
def python(
code:str, # Code to execute
glb:Optional[dict]=None, # Globals namespace
loc:Optional[dict]=None, # Locals namespace
timeout:int=3600 # Maximum run time in seconds
):
"Executes python `code` with `timeout` and returning final expression (similar to IPython)."
def handler(*args): raise TimeoutError()
if glb is None: glb = inspect.currentframe().f_back.f_globals
if loc is None: loc=glb
signal.signal(signal.SIGALRM, handler)
signal.alarm(timeout)
try: return _run(code, glb, loc)
except Exception as e: return traceback.format_exc()
finally: signal.alarm(0)
python("""def factorial(n):
if n == 0 or n == 1: return 1
else: return n * factorial(n-1)
factorial(5)""")
If the code takes longer than timeout then it returns an error string.
Traceback (most recent call last):
File "/tmp/ipykernel_309/2052945749.py", line 14, in python
try: return _run(code, glb, loc)
^^^^^^^^^^^^^^^^^^^^
File "/tmp/ipykernel_309/1858893181.py", line 18, in _run
try: exec(compiled_code, glb, loc)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<ast>", line 1, in <module>
File "/tmp/ipykernel_309/2052945749.py", line 9, in handler
def handler(*args): raise TimeoutError()
^^^^^^^^^^^^^^^^^^^^
TimeoutError
{'name': 'python',
'description': 'Executes python `code` with `timeout` and returning final expression (similar to IPython).',
'input_schema': {'type': 'object',
'properties': {'code': {'type': 'string', 'description': 'Code to execute'},
'glb': {'type': 'object',
'description': 'Globals namespace',
'default': None,
'anyOf': [{'type': 'object'}, {'type': 'null'}]},
'loc': {'type': 'object',
'description': 'Locals namespace',
'default': None,
'anyOf': [{'type': 'object'}, {'type': 'null'}]},
'timeout': {'type': 'integer',
'description': 'Maximum run time in seconds',
'default': 3600}},
'required': ['code']}}Many LLM API providers offer tool calling where an LLM can choose to call a given tool. This is also helpful for structured outputs since the response from the LLM is contrained to the required arguments of the tool.
This section will be dedicated to helper functions for calling tools. We don't want to allow LLMs to call just any possible function (that would be a security disaster!) so we create a namespace -- that is, a dictionary of allowable function names to call.
#| exports
def call_func(fc_name, fc_inputs, ns, raise_on_err=True):
"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
if not isinstance(ns, abc.Mapping): ns = mk_ns(ns)
func = ns[fc_name]
# Clean up bad param names
inps = {re.sub(r'\W', '', k):v for k,v in fc_inputs.items()}
try: return func(**fc_inputs)
except Exception as e:
if raise_on_err: raise e from None
else: return traceback.format_exc()
#| exports
async def call_func_async(fc_name, fc_inputs, ns, raise_on_err=True):
"Awaits the function `fc_name` with the given `fc_inputs` using namespace `ns`."
res = call_func(fc_name, fc_inputs, ns, raise_on_err=raise_on_err)
if inspect.iscoroutine(res):
try: res = await res
except Exception as e:
if raise_on_err: raise e from None
else: return traceback.format_exc()
return res