Instrumenting with a limit
Both track_context() and @trackallow you to pass in a list of of limit_ids. The limit will then track all inference requests scoped under these instrumentation APIs .
Example: track_context()
$ pip install payi openai dotenvfrom payi import Payi
from payi.lib.instrument import track_context, payi_instrument
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
payi = Payi()
payi_instrument(payi=payi, config={ "use_case_name": "limit_example" })
# The Pay-i service will generate a limit_id, you can also specify your own limit_id
limit_response = payi.limits.create(
limit_name="Example limit",
max=10.0,
limit_type="allow",
threshold=0.80
)
limit_id = limit_response.limit.limit_id
openai = OpenAI()
# The limit will track both inference calls in thhe underlying scope
with track_context(limit_ids=[limit_id]):
response = openai.chat.completions.create(
model="gpt-5.5",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello, how can I use the API?"}
])
# Optional if you need to monitor limit state per inferference call
# Resilient to disabling payi or the ingestion was deferred due to network error
if hasattr(response, 'xproxy_result'):
state = response.xproxy_result.limits[limit_id].state
if value.state == 'exceeded':
...
elif value.state == 'overrun`:
...
response = openai.chat.completions.create(
model="gpt-5.5",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Tell me a short story?"}
])
limit_response = payi.limits.retrieve(limit_id=limit_id)
total_cost = limit_response.limit.totals.cost.total.base
print(f"limit_id {limit_id}: total cost {total_cost}")