// Unique constraintsCREATE CONSTRAINT paper_id ON (p:Paper) ASSERT p.id IS UNIQUE;CREATE CONSTRAINT author_id ON (a:Author) ASSERT a.id IS UNIQUE;CREATE CONSTRAINT institution_id ON (i:Institution) ASSERT i.id IS UNIQUE;// Indexes for searchCREATE INDEX paper_title FOR (p:Paper) ON (p.title);CREATE INDEX author_name FOR (a:Author) ON (a.name);CREATE FULLTEXT INDEX paper_search FOR (p:Paper) ON EACH [p.title, p.abstract];// Vector index for embeddingsCREATE VECTOR INDEX paper_embedding FOR (p:Paper) ON (p.embedding)OPTIONS {dimension: 768, similarity: 'cosine'};
def import_citations(paper_id, cited_ids): with driver.session() as session: session.run(""" MATCH (citing:Paper {id: $citing_id}) UNWIND $cited_ids AS cited_id MATCH (cited:Paper {id: cited_id}) MERGE (citing)-[:CITES]->(cited) """, citing_id=paper_id, cited_ids=cited_ids)
// User searches for "graph neural networks"WITH "graph neural networks" AS query_text// Generate embedding (done in application layer)// Assume $query_embedding is passed from appMATCH (p:Paper)WHERE vector.similarity.cosine(p.embedding, $query_embedding) > 0.7RETURN p.title, p.abstract, vector.similarity.cosine(p.embedding, $query_embedding) AS similarityORDER BY similarity DESCLIMIT 10
Python API:
from sentence_transformers import SentenceTransformermodel = SentenceTransformer('all-MiniLM-L6-v2')def search_papers(query_text): query_embedding = model.encode(query_text).tolist() with driver.session() as session: result = session.run(""" MATCH (p:Paper) WHERE vector.similarity.cosine(p.embedding, $embedding) > 0.7 RETURN p.title AS title, p.abstract AS abstract, vector.similarity.cosine(p.embedding, $embedding) AS score ORDER BY score DESC LIMIT 10 """, embedding=query_embedding) return [dict(record) for record in result]# Usageresults = search_papers("transformers for natural language processing")for r in results: print(f"{r['title']} (score: {r['score']:.3f})")
MATCH (me:Author {id: $author_id})-[:AUTHORED]->(:Paper)-[:ABOUT]->(topic:Topic)WITH me, collect(DISTINCT topic) AS my_topicsMATCH (other:Author)-[:AUTHORED]->(:Paper)-[:ABOUT]->(topic)WHERE other <> me AND topic IN my_topicsWITH other, count(topic) AS common_topicsORDER BY common_topics DESCLIMIT 10OPTIONAL MATCH (other)-[:AFFILIATED_WITH]->(inst:Institution)RETURN other.name AS author, common_topics, inst.name AS institution
“Papers you might be interested in” (collaborative filtering):
// Based on what similar authors readMATCH (me:Author {id: $author_id})-[:AUTHORED]->(my_paper:Paper)MATCH (my_paper)<-[:CITES]-(citing:Paper)<-[:AUTHORED]-(other:Author)MATCH (other)-[:AUTHORED]->(rec:Paper)WHERE NOT (me)-[:AUTHORED]->(rec)WITH rec, count(DISTINCT other) AS scoreORDER BY score DESCLIMIT 10RETURN rec.title, rec.year, score
from fastapi import FastAPI, HTTPExceptionfrom pydantic import BaseModelfrom neo4j import GraphDatabasefrom sentence_transformers import SentenceTransformerfrom typing import List, Optionalapp = FastAPI()driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))model = SentenceTransformer('all-MiniLM-L6-v2')class SearchQuery(BaseModel): query: str limit: int = 10class Paper(BaseModel): id: str title: str abstract: str year: int authors: List[str] topics: List[str]@app.get("/papers/{paper_id}", response_model=Paper)def get_paper(paper_id: str): with driver.session() as session: result = session.run(""" MATCH (p:Paper {id: $id}) OPTIONAL MATCH (a:Author)-[:AUTHORED]->(p) OPTIONAL MATCH (p)-[:ABOUT]->(t:Topic) RETURN p.id AS id, p.title AS title, p.abstract AS abstract, p.year AS year, collect(DISTINCT a.name) AS authors, collect(DISTINCT t.name) AS topics """, id=paper_id).single() if not result: raise HTTPException(status_code=404, detail="Paper not found") return Paper(**result)@app.post("/search")def search_papers(query: SearchQuery): embedding = model.encode(query.query).tolist() with driver.session() as session: result = session.run(""" MATCH (p:Paper) WHERE vector.similarity.cosine(p.embedding, $embedding) > 0.6 OPTIONAL MATCH (a:Author)-[:AUTHORED]->(p) WITH p, vector.similarity.cosine(p.embedding, $embedding) AS score, collect(a.name) AS authors ORDER BY score DESC LIMIT $limit RETURN p.id AS id, p.title AS title, p.year AS year, authors, score """, embedding=embedding, limit=query.limit) return [dict(record) for record in result]@app.get("/authors/{author_id}/recommendations")def recommend_papers(author_id: str, limit: int = 10): with driver.session() as session: result = session.run(""" MATCH (me:Author {id: $author_id})-[:AUTHORED]->(my_paper:Paper) MATCH (my_paper)-[:ABOUT]->(topic:Topic)<-[:ABOUT]-(rec:Paper) WHERE NOT (me)-[:AUTHORED]->(rec) WITH rec, count(DISTINCT topic) AS relevance ORDER BY relevance DESC, rec.citation_count DESC LIMIT $limit OPTIONAL MATCH (a:Author)-[:AUTHORED]->(rec) RETURN rec.id AS id, rec.title AS title, rec.year AS year, relevance, collect(a.name) AS authors """, author_id=author_id, limit=limit) return [dict(record) for record in result]@app.get("/stats")def get_stats(): with driver.session() as session: result = session.run(""" MATCH (p:Paper) WITH count(p) AS papers MATCH (a:Author) WITH papers, count(a) AS authors MATCH ()-[c:CITES]->() WITH papers, authors, count(c) AS citations MATCH (t:Topic) WITH papers, authors, citations, count(t) AS topics RETURN papers, authors, citations, topics """).single() return dict(result)if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)