diff --git a/js/.gitignore b/js/.gitignore
index 4b11d6959..b2563a1d4 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -51,6 +51,10 @@ Chinook_Sqlite.sql
 /evaluation/langchain.js
 /evaluation/langchain.d.ts
 /evaluation/langchain.d.cts
+/evaluation/llm.cjs
+/evaluation/llm.js
+/evaluation/llm.d.ts
+/evaluation/llm.d.cts
 /schemas.cjs
 /schemas.js
 /schemas.d.ts
diff --git a/js/package.json b/js/package.json
index cf7d3acfa..f4269a838 100644
--- a/js/package.json
+++ b/js/package.json
@@ -25,6 +25,10 @@
     "evaluation/langchain.js",
     "evaluation/langchain.d.ts",
     "evaluation/langchain.d.cts",
+    "evaluation/llm.cjs",
+    "evaluation/llm.js",
+    "evaluation/llm.d.ts",
+    "evaluation/llm.d.cts",
     "schemas.cjs",
     "schemas.js",
     "schemas.d.ts",
@@ -113,7 +117,6 @@
     "@babel/preset-env": "^7.22.4",
     "@faker-js/faker": "^8.4.1",
     "@jest/globals": "^29.5.0",
-    "@langchain/core": "^0.3.14",
     "@langchain/langgraph": "^0.2.18",
     "@langchain/openai": "^0.3.11",
     "@opentelemetry/sdk-trace-base": "^1.26.0",
@@ -132,13 +135,14 @@
     "eslint-plugin-no-instanceof": "^1.0.1",
     "eslint-plugin-prettier": "^4.2.1",
     "jest": "^29.5.0",
-    "langchain": "^0.3.3",
     "openai": "^4.67.3",
     "prettier": "^2.8.8",
     "ts-jest": "^29.1.0",
     "ts-node": "^10.9.1",
     "typescript": "^5.4.5",
-    "zod": "^3.23.8"
+    "zod": "^3.23.8",
+    "@langchain/core": "^0.3.14",
+    "langchain": "^0.3.3"
   },
   "peerDependencies": {
     "openai": "*"
@@ -209,6 +213,15 @@
       "import": "./evaluation/langchain.js",
       "require": "./evaluation/langchain.cjs"
     },
+    "./evaluation/llm": {
+      "types": {
+        "import": "./evaluation/llm.d.ts",
+        "require": "./evaluation/llm.d.cts",
+        "default": "./evaluation/llm.d.ts"
+      },
+      "import": "./evaluation/llm.js",
+      "require": "./evaluation/llm.cjs"
+    },
     "./schemas": {
       "types": {
         "import": "./schemas.d.ts",
diff --git a/js/scripts/create-entrypoints.js b/js/scripts/create-entrypoints.js
index 9cce2ab22..b5bcb545f 100644
--- a/js/scripts/create-entrypoints.js
+++ b/js/scripts/create-entrypoints.js
@@ -12,6 +12,7 @@ const entrypoints = {
   traceable: "traceable",
   evaluation: "evaluation/index",
   "evaluation/langchain": "evaluation/langchain",
+  "evaluation/llm": "evaluation/llm_evaluator",
   schemas: "schemas",
   langchain: "langchain",
   vercel: "vercel",
diff --git a/js/src/evaluation/llm.ts b/js/src/evaluation/llm.ts
new file mode 100644
index 000000000..3f8c4cacd
--- /dev/null
+++ b/js/src/evaluation/llm.ts
@@ -0,0 +1,5 @@
+export {
+  CategoricalScoreConfig,
+  ContinuousScoreConfig,
+  LLMEvaluator,
+} from "./llm_evaluator.js";
diff --git a/js/src/evaluation/llm_evaluator.ts b/js/src/evaluation/llm_evaluator.ts
new file mode 100644
index 000000000..3f8bd2f4e
--- /dev/null
+++ b/js/src/evaluation/llm_evaluator.ts
@@ -0,0 +1,290 @@
+// eslint-disable-next-line import/no-extraneous-dependencies
+import { ChatPromptTemplate } from "@langchain/core/prompts";
+import * as uuid from "uuid";
+import {
+  EvaluationResult,
+  EvaluationResults,
+  RunEvaluator,
+} from "./evaluator.js";
+import type { Run, Example } from "../schemas.js";
+// eslint-disable-next-line import/no-extraneous-dependencies
+import { BaseLanguageModel } from "@langchain/core/language_models/base";
+
+/**
+ * Configuration for categorical (enum-based) scoring in evaluations.
+ * Used to define discrete categories or labels for evaluation results.
+ */
+export class CategoricalScoreConfig {
+  /** Feedback key for the evaluator */
+  key: string;
+  /** Array of valid categorical choices/labels that can be assigned */
+  choices: string[];
+  /** Description of what this score measures or represents */
+  description: string;
+  /** Optional key for the LLM reasoning/explanation for the score */
+  reasoningKey?: string;
+  /** Optional description of score reasoning, provided to the LLM in the structured output */
+  reasoningDescription?: string;
+
+  /**
+   * Creates a new categorical score configuration
+   * @param params Configuration parameters
+   * @param params.key Feedback key for the evaluator
+   * @param params.choices Array of valid categorical options
+   * @param params.description Description of the scoring criteria
+   * @param params.reasoningKey Optional key for the LLM reasoning/explanation for the score
+   * @param params.reasoningDescription Optional description of score reasoning, provided to the LLM in the structured output
+   */
+  constructor(params: {
+    key: string;
+    choices: string[];
+    description: string;
+    reasoningKey?: string;
+    reasoningDescription?: string;
+  }) {
+    this.key = params.key;
+    this.choices = params.choices;
+    this.description = params.description;
+    this.reasoningKey = params.reasoningKey;
+    this.reasoningDescription = params.reasoningDescription;
+  }
+}
+
+/**
+ * Configuration for continuous (numeric) scoring in evaluations.
+ * Used to define scores that fall within a numeric range.
+ */
+export class ContinuousScoreConfig {
+  /** Feedback key for the evaluator */
+  key: string;
+  /** Minimum allowed score value (defaults to 0) */
+  min: number;
+  /** Maximum allowed score value (defaults to 1) */
+  max: number;
+  /** Description of the scoring criteria */
+  description: string;
+  /** Optional key for the LLM reasoning/explanation for the score */
+  reasoningKey?: string;
+  /** Optional description of score reasoning, provided to the LLM in the structured output */
+  reasoningDescription?: string;
+
+  /**
+   * Creates a new continuous score configuration
+   * @param params Configuration parameters
+   * @param params.key Feedback key for the evaluator
+   * @param params.description Description of the scoring criteria
+   * @param params.min Optional minimum score value (defaults to 0)
+   * @param params.max Optional maximum score value (defaults to 1)
+   * @param params.reasoningKey Optional key for the LLM reasoning/explanation for the score
+   * @param params.reasoningDescription Optional description of score reasoning, provided to the LLM in the structured output
+   */
+  constructor(params: {
+    key: string;
+    description: string;
+    min?: number;
+    max?: number;
+    reasoningKey?: string;
+    reasoningDescription?: string;
+  }) {
+    this.key = params.key;
+    this.min = params.min ?? 0;
+    this.max = params.max ?? 1;
+    this.description = params.description;
+    this.reasoningKey = params.reasoningKey;
+    this.reasoningDescription = params.reasoningDescription;
+  }
+}
+
+type ScoreConfig = CategoricalScoreConfig | ContinuousScoreConfig;
+
+function createScoreJsonSchema(scoreConfig: ScoreConfig): Record<string, any> {
+  const properties: Record<string, any> = {};
+
+  if (scoreConfig.reasoningKey) {
+    properties[scoreConfig.reasoningKey] = {
+      type: "string",
+      description:
+        scoreConfig.reasoningDescription ||
+        "First, think step by step to explain your score.",
+    };
+  }
+
+  if ("choices" in scoreConfig) {
+    properties.value = {
+      type: "string",
+      enum: scoreConfig.choices,
+      description: `The score for the evaluation, one of ${scoreConfig.choices.join(
+        ", "
+      )}.`,
+    };
+  } else {
+    properties.score = {
+      type: "number",
+      minimum: scoreConfig.min,
+      maximum: scoreConfig.max,
+      description: `The score for the evaluation, between ${scoreConfig.min} and ${scoreConfig.max}, inclusive.`,
+    };
+  }
+
+  return {
+    title: scoreConfig.key,
+    description: scoreConfig.description,
+    type: "object",
+    properties,
+    required: scoreConfig.reasoningKey
+      ? ["choices" in scoreConfig ? "value" : "score", scoreConfig.reasoningKey]
+      : ["choices" in scoreConfig ? "value" : "score"],
+  };
+}
+
+interface LLMEvaluatorParams {
+  promptTemplate: string | [string, string][];
+  scoreConfig: ScoreConfig;
+  chatModel: BaseLanguageModel;
+  mapVariables?: (run: Run, example?: Example) => Record<string, any>;
+}
+
+export class LLMEvaluator implements RunEvaluator {
+  prompt: any;
+  mapVariables?: (run: Run, example?: Example) => Record<string, any>;
+  scoreConfig: ScoreConfig;
+  scoreSchema: Record<string, any>;
+  runnable: any;
+
+  constructor() {}
+
+  static async create(params: LLMEvaluatorParams): Promise<LLMEvaluator> {
+    const evaluator = new LLMEvaluator();
+    await evaluator.initialize(
+      params.promptTemplate,
+      params.scoreConfig,
+      params.chatModel,
+      params.mapVariables
+    );
+    return evaluator;
+  }
+  private async initialize(
+    promptTemplate: string | [string, string][],
+    scoreConfig: ScoreConfig,
+    chatModel: BaseLanguageModel,
+    mapVariables?: (run: Run, example?: Example) => Record<string, any>
+  ) {
+    try {
+      // Store the configuration
+      this.scoreConfig = scoreConfig;
+      this.mapVariables = mapVariables;
+
+      // Create the score schema
+      this.scoreSchema = createScoreJsonSchema(scoreConfig);
+
+      // Create the prompt template
+      if (typeof promptTemplate === "string") {
+        this.prompt = ChatPromptTemplate.fromMessages([
+          { role: "human", content: promptTemplate },
+        ]);
+      } else {
+        this.prompt = ChatPromptTemplate.fromMessages(promptTemplate);
+      }
+
+      const modelWithStructuredOutput = chatModel.withStructuredOutput
+        ? chatModel.withStructuredOutput(this.scoreSchema)
+        : null;
+      if (!modelWithStructuredOutput) {
+        throw new Error("Passed chat model must support structured output");
+      }
+      this.runnable = this.prompt.pipe(modelWithStructuredOutput);
+    } catch (e: unknown) {
+      throw new Error(
+        `Failed to initialize LLMEvaluator: ${(e as Error).message}`
+      );
+    }
+  }
+
+  async evaluateRun(
+    run: Run,
+    example?: Example
+  ): Promise<EvaluationResult | EvaluationResults> {
+    const runId = uuid.v4();
+    const variables = this.prepareVariables(run, example);
+    const output = await this.runnable.invoke(variables, { runId: runId });
+
+    return this.parseOutput(output, runId);
+  }
+
+  private prepareVariables(run: Run, example?: Example): Record<string, any> {
+    if (this.mapVariables) {
+      return this.mapVariables(run, example);
+    }
+
+    const variables: Record<string, any> = {};
+
+    // Input handling
+    if (Object.keys(run.inputs).length === 0) {
+      throw new Error(
+        "No input keys are present in run.inputs but the prompt requires 'input'."
+      );
+    }
+    if (Object.keys(run.inputs).length !== 1) {
+      throw new Error(
+        "Multiple input keys are present in run.inputs. Please provide a map_variables function."
+      );
+    }
+    variables.input = Object.values(run.inputs)[0];
+
+    // Output handling
+    if (!run.outputs || Object.keys(run.outputs).length === 0) {
+      throw new Error(
+        "No output keys are present in run.outputs but the prompt requires 'output'."
+      );
+    }
+    if (Object.keys(run.outputs).length !== 1) {
+      throw new Error(
+        "Multiple output keys are present in run.outputs. Please provide a map_variables function."
+      );
+    }
+    variables.output = Object.values(run.outputs)[0];
+
+    // Expected output handling
+    if (example?.outputs) {
+      if (Object.keys(example.outputs).length === 0) {
+        throw new Error(
+          "No output keys are present in example.outputs but the prompt requires 'expected'."
+        );
+      }
+      if (Object.keys(example.outputs).length !== 1) {
+        throw new Error(
+          "Multiple output keys are present in example.outputs. Please provide a map_variables function."
+        );
+      }
+      variables.expected = Object.values(example.outputs)[0];
+    }
+
+    return variables;
+  }
+
+  private parseOutput(
+    output: Record<string, any>,
+    runId: string
+  ): EvaluationResult {
+    const explanation = this.scoreConfig.reasoningKey
+      ? output[this.scoreConfig.reasoningKey]
+      : undefined;
+    if ("choices" in this.scoreConfig) {
+      const value = output.value;
+      return {
+        key: this.scoreConfig.key,
+        value,
+        comment: explanation,
+        sourceRunId: runId,
+      };
+    } else {
+      const score = output.score;
+      return {
+        key: this.scoreConfig.key,
+        score,
+        comment: explanation,
+        sourceRunId: runId,
+      };
+    }
+  }
+}
diff --git a/js/src/tests/llm_evaluator.int.test.ts b/js/src/tests/llm_evaluator.int.test.ts
new file mode 100644
index 000000000..c5762a60d
--- /dev/null
+++ b/js/src/tests/llm_evaluator.int.test.ts
@@ -0,0 +1,183 @@
+import { expect, test } from "@jest/globals";
+import { Client } from "../index.js";
+import {
+  CategoricalScoreConfig,
+  ContinuousScoreConfig,
+  LLMEvaluator,
+} from "../evaluation/llm.js";
+import { evaluate } from "../evaluation/_runner.js";
+import { ChatOpenAI } from "@langchain/openai";
+
+const CHAT_MODEL = new ChatOpenAI({ model: "gpt-4" });
+
+const TESTING_DATASET_NAME = "LLMEvaluator dataset";
+
+test("llm evaluator initialization with categorical config", async () => {
+  const evaluator = await LLMEvaluator.create({
+    promptTemplate: "Is the response vague? Y/N\n{input}",
+    scoreConfig: new CategoricalScoreConfig({
+      key: "vagueness",
+      choices: ["Y", "N"],
+      description: "Whether the response is vague. Y for yes, N for no.",
+      reasoningKey: "explanation",
+    }),
+    chatModel: CHAT_MODEL,
+  });
+
+  expect(evaluator).toBeDefined();
+  // Check input variables extracted from template
+  expect(evaluator.prompt.inputVariables).toEqual(["input"]);
+  // Verify JSON schema for categorical scoring
+  expect(evaluator.scoreSchema).toEqual({
+    type: "object",
+    description: "Whether the response is vague. Y for yes, N for no.",
+    title: "vagueness",
+    properties: {
+      value: {
+        type: "string",
+        enum: ["Y", "N"],
+        description: "The score for the evaluation, one of Y, N.",
+      },
+      explanation: {
+        type: "string",
+        description: "First, think step by step to explain your score.",
+      },
+    },
+    required: ["value", "explanation"],
+  });
+
+  expect((evaluator.scoreConfig as CategoricalScoreConfig).choices).toEqual([
+    "Y",
+    "N",
+  ]);
+});
+
+test("llm evaluator initialization with continuous config", async () => {
+  const evaluator = await LLMEvaluator.create({
+    promptTemplate: "Rate the response from 0 to 1.\n{input}",
+    scoreConfig: new ContinuousScoreConfig({
+      key: "rating",
+      description: "The rating of the response, from 0 to 1.",
+      min: 0,
+      max: 1,
+    }),
+    chatModel: CHAT_MODEL,
+  });
+
+  expect(evaluator).toBeDefined();
+  // Check input variables extracted from template
+  expect(evaluator.prompt.inputVariables).toEqual(["input"]);
+  // Verify JSON schema for continuous scoring
+  expect(evaluator.scoreSchema).toEqual({
+    type: "object",
+    title: "rating",
+    description: "The rating of the response, from 0 to 1.",
+    properties: {
+      score: {
+        type: "number",
+        minimum: 0,
+        maximum: 1,
+        description:
+          "The score for the evaluation, between 0 and 1, inclusive.",
+      },
+    },
+    required: ["score"],
+  });
+  // Verify score config properties
+  expect(evaluator.scoreConfig.key).toBe("rating");
+  expect((evaluator.scoreConfig as ContinuousScoreConfig).min).toBe(0);
+  expect((evaluator.scoreConfig as ContinuousScoreConfig).max).toBe(1);
+});
+
+test("llm evaluator with custom variable mapping", async () => {
+  const evaluator = await LLMEvaluator.create({
+    promptTemplate: [
+      [
+        "system",
+        "Is the output accurate with respect to the context and question? Y/N",
+      ],
+      ["human", "Context: {context}\nQuestion: {question}\nOutput: {output}"],
+    ],
+    scoreConfig: new CategoricalScoreConfig({
+      key: "accuracy",
+      choices: ["Y", "N"],
+      description:
+        "Whether the output is accurate with respect to the context and question.",
+      reasoningKey: "explanation",
+      reasoningDescription: "First, think step by step to explain your score.",
+    }),
+    chatModel: CHAT_MODEL,
+    mapVariables: (run: any, example?: any) => ({
+      context: example?.inputs?.context || "",
+      question: example?.inputs?.question || "",
+      output: run.outputs?.output || "",
+    }),
+  });
+
+  expect(evaluator).toBeDefined();
+});
+
+test("llm evaluator can evaluate runs", async () => {
+  const client = new Client();
+  await client.clonePublicDataset(
+    "https://beta.smith.langchain.com/public/06785303-0f70-4466-b637-f23d38c0f28e/d",
+    {
+      datasetName: TESTING_DATASET_NAME,
+    }
+  );
+  const evaluator = await LLMEvaluator.create({
+    promptTemplate: "Is the response vague? Y/N\n{response}",
+    scoreConfig: new CategoricalScoreConfig({
+      key: "vagueness",
+      choices: ["Y", "N"],
+      description: "Whether the response is vague. Y for yes, N for no.",
+      reasoningKey: "explanation",
+      reasoningDescription: "First, think step by step to explain your score.",
+    }),
+    chatModel: CHAT_MODEL,
+    mapVariables: (run: any, _example?: any) => ({
+      response: run.outputs?.["output"] ?? "",
+    }),
+  });
+
+  const targetFunc = (input: Record<string, any>) => {
+    return { output: input.question + " This is a test response" };
+  };
+
+  const evalRes = await evaluate(targetFunc, {
+    data: TESTING_DATASET_NAME,
+    evaluators: [evaluator],
+    description: "LLM evaluator test run",
+  });
+
+  expect(evalRes.results).toHaveLength(10);
+  const firstResult = evalRes.results[0];
+
+  const evaluation = firstResult.evaluationResults.results[0];
+  expect(evaluation.key).toBe("vagueness");
+  expect(["Y", "N"]).toContain(evaluation.value);
+  expect(evaluation.comment).toBeDefined();
+
+  await client.deleteDataset({ datasetName: TESTING_DATASET_NAME });
+});
+
+test("llm evaluator with multiple prompt messages", async () => {
+  const evaluator = await LLMEvaluator.create({
+    promptTemplate: [
+      ["system", "You are a helpful assistant evaluating responses."],
+      ["human", "Rate this response from 0 to 1: {response}"],
+    ],
+    scoreConfig: new ContinuousScoreConfig({
+      key: "rating",
+      description: "Quality rating from 0 to 1",
+      min: 0,
+      max: 1,
+    }),
+    chatModel: CHAT_MODEL,
+    mapVariables: (run: any, _example?: any) => ({
+      response: run.outputs?.["output"] ?? "",
+    }),
+  });
+
+  expect(evaluator).toBeDefined();
+});
diff --git a/js/tsconfig.json b/js/tsconfig.json
index b778ed83f..5d3d8b2c5 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -38,6 +38,7 @@
       "src/traceable.ts",
       "src/evaluation/index.ts",
       "src/evaluation/langchain.ts",
+      "src/evaluation/llm_evaluator.ts",
       "src/schemas.ts",
       "src/langchain.ts",
       "src/vercel.ts",