>>> import pandas as pd
>>>
>>> def center(pdf):
...     return pdf.assign(v=pdf.v - pdf.v.mean())
...
>>> def summary(key, lpdf, rpdf):
...     (key,) = key
...     return pd.DataFrame({
...         "id": [key],
...         "diff": [lpdf.v.mean() - rpdf.v.mean()],
...         "count": [lpdf.v.count() + rpdf.v.count()],
...     })
...
>>> df1 = spark.createDataFrame([(1, 1.0), (1, 2.0), (2, 3.0)], ["id", "v"])
>>> df2 = spark.createDataFrame([(1, 5.0)], ["id", "v"])
>>>
>>> df1.groupby("id").applyInPandas(center, schema="id long, v double").sort("id", "v").show()
+---+----+
| id|   v|
+---+----+
|  1|-0.5|
|  1| 0.5|
|  2| 0.0|
+---+----+

>>> df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
...     summary, schema="id long, diff double, count long"
... ).sort("id").show()
+---+----+-----+
| id|diff|count|
+---+----+-----+
|  1|-3.5|    3|
|  2|NULL|    1|
+---+----+-----+
