openapi: 3.1.0
info:
  title: CreatorNode Postproduction API - Scene Timestamps
  description: >
    Suggest scene transition timestamps from narration audio and ordered scenes.


    Use this endpoint when you already know scene order and need
    timeline-aligned cut points.

    Each scene must include `startCueText` and `anchorText`. Optional
    `narrationText` improves

    script localization, and optional top-level `languageCode` improves
    speech-to-text accuracy.
  version: 1.0.0
  contact:
    name: CreatorNode Support
    url: https://creatornode.io/support
  license:
    name: Proprietary
    url: https://creatornode.io/legal
servers:
  - url: https://api.creatornode.io/postproduction
    description: Production
tags:
  - name: Postproduction
    description: Postproduction processing endpoints
paths:
  /v1/scene-timestamps:
    post:
      operationId: sceneTimestamps
      tags:
        - Postproduction
      summary: Suggest scene transition timestamps from narration audio and ordered
        scenes
      description: Upload one narration audio file and ordered scenes to receive
        timeline-aligned scene starts and transition recommendations. Each scene
        must include startCueText and anchorText. Optional narrationText helps
        localize the spoken script, and optional languageCode improves
        speech-to-text. startCueText remains the strongest scene-start hint.
        Response shape stays unchanged.
      security:
        - ApiKeyAuth: []
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: "#/components/schemas/SceneTimestampsMultipartRequest"
            examples:
              cueRequired:
                summary: Multipart request with required per-scene cue fields
                value:
                  metadata: '{"startOffsetMs":2000,"languageCode":"en","scenes":[{"id":"prep","startCueText":"We
                    start by laying out everything","anchorText":"We lay out the
                    tomatoes, basil, garlic, and olive
                    oil."},{"id":"cooking","startCueText":"Everything then hits
                    the hot pan","anchorText":"Everything hits the hot pan with
                    olive oil."},{"id":"plating","startCueText":"Finally, the
                    dish is plated","anchorText":"The dish is plated and
                    finished with herbs."}],"narrationText":"We start by laying
                    out everything we need. Fresh tomatoes, basil, garlic, and
                    olive oil are all ready on the board. Everything then hits
                    the hot pan. Finally, the dish is plated and finished with
                    herbs."}'
                  audio: (cooking-narration.mp3)
              upstreamCueHints:
                summary: Multipart request using cue fields carried forward from upstream
                  describe-scenes
                value:
                  metadata: '{"startOffsetMs":1200,"languageCode":"en","scenes":[{"id":"bridge","startCueText":"At
                    daybreak the cyclist crosses the bridge","anchorText":"The
                    cyclist heads across the bridge at
                    daybreak."},{"id":"market","startCueText":"The route pushes
                    into the market","anchorText":"The story moves into the
                    crowded market
                    streets."},{"id":"skyline","startCueText":"The city settles
                    into sunset","anchorText":"The city settles into a warm
                    sunset skyline."}],"narrationText":"At daybreak the cyclist
                    crosses the bridge. The route then pushes into the crowded
                    market streets. The city finally settles into a warm sunset
                    skyline."}'
                  audio: (cycling-narration.mp3)
      responses:
        "200":
          description: Scene timestamps generated successfully
          headers:
            X-Request-Id:
              description: Unique request identifier
              schema:
                type: string
                example: req_scene_123
            X-Credits-Used:
              description: Credits consumed by this request (present only for paid tiers)
              schema:
                type: integer
                example: 13
            X-Credits-Remaining:
              description: Remaining credits on the API key (present only for prepaid tiers)
              schema:
                type: integer
                example: 87
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/SceneTimestampsSuccessResponse"
              example:
                success: true
                data:
                  startOffsetMs: 2000
                  scenes:
                    - index: 0
                      id: prep
                      label: Scene 1
                      startMs: 2000
                    - index: 1
                      id: cooking
                      label: Scene 2
                      startMs: 16500
                    - index: 2
                      id: plating
                      label: Scene 3
                      startMs: 33200
                  transitions:
                    - index: 0
                      fromSceneIndex: 0
                      toSceneIndex: 1
                      recommendedMs: 16500
                      confidence:
                        score: 0.82
                        intervalMs:
                          from: 15800
                          to: 17400
                    - index: 1
                      fromSceneIndex: 1
                      toSceneIndex: 2
                      recommendedMs: 33200
                      confidence:
                        score: 0.71
                        intervalMs:
                          from: 32000
                          to: 35100
                meta:
                  requestId: req_scene_123
                  processingTimeMs: 842
                  tier: free
                  audio:
                    mimeType: audio/mpeg
                    sizeBytes: 724680
                    durationMs: 45300
                recommendations:
                  - type: tip
                    title: Provide narration text
                    message: Providing narrationText improves transcription accuracy for
                      domain-specific wording.
                    priority: medium
        "400":
          description: Request validation failed, for example VALIDATION_ERROR,
            TOO_MANY_SCENES, NARRATION_TEXT_TOO_LONG, UPLOAD_SIZE_EXCEEDED,
            AUDIO_PROBE_FAILED, or UNSAFE_PROMPT.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              examples:
                tooManyScenes:
                  summary: Tier scene count exceeded
                  value:
                    success: false
                    error:
                      code: TOO_MANY_SCENES
                      message: "Too many scenes: maximum 10 for tier free"
                      details:
                        sceneCount: 12
                        maxScenes: 10
                    meta:
                      requestId: req_scene_123
                probeFailed:
                  summary: Uploaded audio could not be read
                  value:
                    success: false
                    error:
                      code: AUDIO_PROBE_FAILED
                      message: Audio file could not be read
                    meta:
                      requestId: req_scene_123
        "401":
          description: Unauthorized - invalid or missing API key
        "422":
          description: Audio and required scene cues could not be aligned reliably
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              example:
                success: false
                error:
                  code: CANNOT_ALIGN
                  message: Audio and scenes could not be reliably aligned
                  details:
                    reason: Try more distinct scene anchorText/startCueText or add narrationText
                meta:
                  requestId: req_scene_123
                recommendations:
                  - type: tip
                    title: Improve input quality
                    message: Try more distinct scene cue text, clearer narration audio, or add
                      narrationText.
                    priority: high
                  - type: tip
                    title: Manual review may still be needed
                    message: Broad cue text can widen confidence intervals even when the request is
                      otherwise valid.
                    priority: medium
        "429":
          description: Rate limited - too many requests
        "500":
          description: Unexpected alignment processing error
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              example:
                success: false
                error:
                  code: AI_ALIGNMENT_FAILED
                  message: Scene alignment failed
                meta:
                  requestId: req_scene_123
        "502":
          description: Speech-to-text provider temporarily unavailable
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              example:
                success: false
                error:
                  code: STT_PROVIDER_ERROR
                  message: Speech-to-text processing failed
                meta:
                  requestId: req_scene_123
        "503":
          description: Safety service temporarily unavailable
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              example:
                success: false
                error:
                  code: SAFETY_CHECK_UNAVAILABLE
                  message: Safety check failed
                meta:
                  requestId: req_scene_123
        "504":
          description: Processing timed out
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              examples:
                processingTimeout:
                  summary: End-to-end processing timed out
                  value:
                    success: false
                    error:
                      code: PROCESSING_TIMEOUT
                      message: Processing exceeded time limit
                    meta:
                      requestId: req_scene_123
                safetyTimeout:
                  summary: Safety pre-check timed out
                  value:
                    success: false
                    error:
                      code: SAFETY_CHECK_TIMEOUT
                      message: Safety check failed
                    meta:
                      requestId: req_scene_123
components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: X-API-Key
      description: APIM subscription key for authenticated access. Without a key,
        requests use free tier limits.
  schemas:
    Recommendation:
      type: object
      required:
        - type
        - title
        - message
      properties:
        type:
          type: string
          enum:
            - upgrade
            - top_up
            - feature
            - tip
            - warning
            - fix
        title:
          type: string
        message:
          type: string
        action:
          type: object
          required:
            - label
            - url
          properties:
            label:
              type: string
            url:
              type: string
              format: uri
        priority:
          type: string
          enum:
            - low
            - medium
            - high
          default: low
    ErrorResponse:
      type: object
      required:
        - success
        - error
      properties:
        success:
          type: boolean
          enum:
            - false
        error:
          type: object
          required:
            - code
            - message
          properties:
            code:
              type: string
              description: Endpoint-specific error code, for example VALIDATION_ERROR,
                UPLOAD_SIZE_EXCEEDED, AUDIO_PROBE_FAILED, CANNOT_ALIGN, or
                PROCESSING_TIMEOUT.
            message:
              type: string
            details:
              type: object
              additionalProperties: true
        meta:
          type: object
          properties:
            requestId:
              type: string
        recommendations:
          type: array
          items:
            $ref: "#/components/schemas/Recommendation"
    SceneTimestampsSceneInput:
      type: object
      additionalProperties: false
      required:
        - anchorText
        - startCueText
      properties:
        id:
          type: string
          minLength: 1
          maxLength: 100
          description: Optional stable client scene identifier echoed back in the response.
          example: intro
        anchorText:
          type: string
          minLength: 1
          description: Required short script-facing alignment hint for this scene. Prefer
            one concise spoken beat or claim rather than a broad visual summary.
          example: We lay out the tomatoes, basil, garlic, and olive oil.
        startCueText:
          type: string
          minLength: 1
          description: Required short scene-entry cue for this scene. Prefer the earliest
            short narration phrase after which the scene should already feel
            active.
          example: We start by laying out everything
    SceneTimestampsMetadata:
      type: object
      additionalProperties: false
      required:
        - scenes
      description: Alignment combines transcript timing from uploaded audio with
        required per-scene startCueText and anchorText cues. Optional
        narrationText localizes the spoken script, and optional languageCode
        improves speech-to-text accuracy. startCueText remains the strongest
        scene-start hint.
      properties:
        startOffsetMs:
          type: integer
          minimum: 0
          maximum: 86400000
          default: 0
          description: Delay before narration begins on the final video timeline.
          example: 2000
        languageCode:
          type: string
          minLength: 1
          maxLength: 20
          description: Optional BCP-47-like language hint forwarded to speech-to-text.
          example: en
        scenes:
          type: array
          minItems: 1
          maxItems: 100
          description: Ordered scenes. Array position defines scene order.
          items:
            $ref: "#/components/schemas/SceneTimestampsSceneInput"
        narrationText:
          type: string
          minLength: 1
          description: Optional full narration text used to localize the spoken script
            against the transcript before scenes are mapped onto it.
          example: We start by laying out everything we need. Fresh tomatoes, basil,
            garlic, and olive oil are all ready on the board.
      example:
        startOffsetMs: 2000
        languageCode: en
        scenes:
          - id: prep
            startCueText: We start by laying out everything
            anchorText: We lay out the tomatoes, basil, garlic, and olive oil.
          - id: cooking
            startCueText: Everything then hits the hot pan
            anchorText: Everything hits the hot pan with olive oil.
          - id: plating
            startCueText: Finally, the dish is plated
            anchorText: The dish is plated and finished with herbs.
        narrationText: We start by laying out everything we need. Fresh tomatoes, basil,
          garlic, and olive oil are all ready on the board.
    SceneTimestampsMultipartRequest:
      type: object
      description: Multipart request with JSON metadata and one narration audio file.
        Public response shape stays the same while the request requires
        per-scene anchorText and startCueText cues.
      required:
        - metadata
        - audio
      properties:
        metadata:
          type: string
          description: JSON string containing SceneTimestampsMetadata.
          example: '{"startOffsetMs":2000,"languageCode":"en","scenes":[{"id":"prep","startCueText":"We
            start by laying out everything","anchorText":"We lay out the
            tomatoes, basil, garlic, and olive
            oil."},{"id":"cooking","startCueText":"Everything then hits the hot
            pan","anchorText":"Everything hits the hot pan with olive
            oil."},{"id":"plating","startCueText":"Finally, the dish is
            plated","anchorText":"The dish is plated and finished with
            herbs."}],"narrationText":"We start by laying out everything we
            need. Fresh tomatoes, basil, garlic, and olive oil are all ready on
            the board. Everything then hits the hot pan. Finally, the dish is
            plated and finished with herbs."}'
        audio:
          type: string
          format: binary
          description: One narration audio file. Supported formats in v1 are MP3 and
            AAC/M4A.
    SceneTimestampConfidenceInterval:
      type: object
      required:
        - from
        - to
      properties:
        from:
          type: integer
          minimum: 0
          example: 15800
        to:
          type: integer
          minimum: 0
          example: 17400
    SceneTimestampConfidence:
      type: object
      required:
        - score
        - intervalMs
      properties:
        score:
          type: number
          minimum: 0
          maximum: 1
          example: 0.82
        intervalMs:
          $ref: "#/components/schemas/SceneTimestampConfidenceInterval"
    SceneTimestampItem:
      type: object
      required:
        - index
        - startMs
      properties:
        index:
          type: integer
          minimum: 0
          description: Zero-based scene index derived from request array order.
          example: 0
        id:
          type: string
          description: Echoed client-provided scene identifier when present.
          example: prep
        label:
          type: string
          description: Safe best-effort display label.
          example: Scene 1
        startMs:
          type: integer
          minimum: 0
          description: Recommended start timestamp on the final video timeline.
          example: 2000
    SceneTimestampTransition:
      type: object
      required:
        - index
        - fromSceneIndex
        - toSceneIndex
        - recommendedMs
        - confidence
      properties:
        index:
          type: integer
          minimum: 0
          description: Zero-based transition index.
          example: 0
        fromSceneIndex:
          type: integer
          minimum: 0
          example: 0
        toSceneIndex:
          type: integer
          minimum: 0
          example: 1
        recommendedMs:
          type: integer
          minimum: 0
          description: Preferred transition timestamp on the final video timeline.
          example: 16500
        confidence:
          $ref: "#/components/schemas/SceneTimestampConfidence"
    SceneTimestampsSuccessResponse:
      type: object
      required:
        - success
        - data
        - meta
      properties:
        success:
          type: boolean
          enum:
            - true
        data:
          type: object
          required:
            - startOffsetMs
            - scenes
            - transitions
          properties:
            startOffsetMs:
              type: integer
              minimum: 0
              example: 2000
            scenes:
              type: array
              items:
                $ref: "#/components/schemas/SceneTimestampItem"
            transitions:
              type: array
              items:
                $ref: "#/components/schemas/SceneTimestampTransition"
        meta:
          type: object
          required:
            - requestId
            - processingTimeMs
            - tier
            - audio
          properties:
            requestId:
              type: string
              example: req_scene_123
            processingTimeMs:
              type: integer
              example: 842
            tier:
              type: string
              example: free
            audio:
              type: object
              required:
                - mimeType
                - sizeBytes
              properties:
                mimeType:
                  type: string
                  example: audio/mpeg
                sizeBytes:
                  type: integer
                  minimum: 0
                  example: 724680
                durationMs:
                  type: integer
                  minimum: 0
                  example: 45300
        recommendations:
          type: array
          items:
            $ref: "#/components/schemas/Recommendation"
    SceneTimestampsDemoRequest:
      type: object
      additionalProperties: false
      required:
        - scenes
      description: >
        Demo request mirrors the real metadata JSON but uses plain
        application/json.

        Demo mode accepts 1-5 scenes and does not upload audio.
      properties:
        startOffsetMs:
          type: integer
          minimum: 0
          default: 2000
          example: 2000
        languageCode:
          type: string
          minLength: 1
          maxLength: 20
        scenes:
          type: array
          minItems: 1
          maxItems: 5
          items:
            $ref: "#/components/schemas/SceneTimestampsSceneInput"
        narrationText:
          type: string
          minLength: 1
      example:
        startOffsetMs: 2000
        languageCode: en
        scenes:
          - id: prep
            startCueText: We start by laying out everything
            anchorText: We lay out the tomatoes, basil, garlic, and olive oil.
          - id: cooking
            startCueText: Everything then hits the hot pan
            anchorText: Everything hits the hot pan with olive oil.
          - id: plating
            startCueText: Finally, the dish is plated
            anchorText: The dish is plated and finished with herbs.
        narrationText: We start by laying out everything we need. Fresh tomatoes, basil,
          garlic, and olive oil are all ready on the board.
    SceneTimestampsDemoResponse:
      allOf:
        - $ref: "#/components/schemas/SceneTimestampsSuccessResponse"
        - type: object
          required:
            - demoMode
            - warning
          properties:
            demoMode:
              type: boolean
              enum:
                - true
            warning:
              type: string
              description: Explains that demo response is deterministic sample data.