Instrumenting with a limit

Both track_context() and @trackallow you to pass in a list of of limit_ids. The limit will then track all inference requests scoped under these instrumentation APIs .

Example: track_context()

$ pip install payi openai dotenv

from payi import Payi
from payi.lib.instrument import track_context, payi_instrument
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

payi = Payi()
payi_instrument(payi=payi, config={ "use_case_name": "limit_example" })

# The Pay-i service will generate a limit_id, you can also specify your own limit_id
limit_response = payi.limits.create(
  limit_name="Example limit",
  max=10.0,           
  limit_type="allow",  
  threshold=0.80       
	)
limit_id = limit_response.limit.limit_id

openai = OpenAI()

# The limit will track both inference calls in thhe underlying scope
with track_context(limit_ids=[limit_id]):
  response = openai.chat.completions.create(
    model="gpt-5.5",
    messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": "Hello, how can I use the API?"}
    ])
  
  # Optional if you need to monitor limit state per inferference call
  # Resilient to disabling payi or the ingestion was deferred due to network error
  if hasattr(response, 'xproxy_result'):
    state = response.xproxy_result.limits[limit_id].state
    if value.state == 'exceeded':
      ...
    elif value.state == 'overrun`:
			...

  response = openai.chat.completions.create(
    model="gpt-5.5",
    messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": "Tell me a short story?"}
    ])
  
limit_response = payi.limits.retrieve(limit_id=limit_id)
total_cost = limit_response.limit.totals.cost.total.base
print(f"limit_id {limit_id}: total cost {total_cost}")